From 2f1a787dcb9403d61226717c3cd209cc5c538080 Mon Sep 17 00:00:00 2001 From: kbche Date: Sun, 17 Jan 2021 11:40:58 +0000 Subject: [PATCH 1/5] Eporner extractor --- extractors/eporner/eporner.go | 151 +++++++++++++++++++++++++++++ extractors/eporner/eporner_test.go | 65 +++++++++++++ extractors/extractors.go | 2 + 3 files changed, 218 insertions(+) create mode 100644 extractors/eporner/eporner.go create mode 100644 extractors/eporner/eporner_test.go diff --git a/extractors/eporner/eporner.go b/extractors/eporner/eporner.go new file mode 100644 index 000000000..3fb6236a7 --- /dev/null +++ b/extractors/eporner/eporner.go @@ -0,0 +1,151 @@ +package eporner + +import ( + "net/url" + "strconv" + "strings" + + "github.com/PuerkitoBio/goquery" + "github.com/iawia002/annie/extractors/types" + "github.com/iawia002/annie/parser" + "github.com/iawia002/annie/request" + "github.com/iawia002/annie/utils" +) + +const ( + downloadclass = ".dloaddivcol" +) + +var () + +type src struct { + url string + quality string + sizestr string + size int64 +} + +func getSrcMeta(text string) *src { + sti := strings.Index(text, "(") + ste := strings.Index(text, ")") + itext := text[sti+1 : ste] + strs := strings.Split(itext, ",") + s := &src{} + + if len(strs) == 2 { + s.quality = strings.Trim(strs[0], " ") + s.sizestr = strings.Trim(strs[1], " ") + } + + if s.sizestr == "" { + s.size = 0 + return s + } + + valunit := strings.Split(s.sizestr, " ") + val, err := strconv.ParseFloat(valunit[0], 64) + if err != nil { + s.size = 0 + return s + } + unit := valunit[1] + switch unit { + case "KB": + s.size = int64(val * 1024.0) + case "MB": + s.size = int64(val * 1024 * 1024) + case "GB": + s.size = int64(val * 1024 * 1024 * 1024) + default: + s.size = int64(val) + } + return s +} + +func getSrc(html string) []*src { + srcs := []*src{} + d, err := parser.GetDoc(html) + if err != nil { + return nil + } + + d.Find(downloadclass).Each(func(i int, s *goquery.Selection) { + s.Contents().Each(func(i int, s *goquery.Selection) { + for ns := range s.Nodes { + n := s.Get(ns) + if n.Data == "a" { + var sr *src + if n.FirstChild != nil { + // fmt.Printf("data: %d %+v", i, *n.FirstChild) + sr = getSrcMeta(n.FirstChild.Data) + } + for _, a := range n.Attr { + if a.Key == "href" { + sr.url = a.Val + } + } + srcs = append(srcs, sr) + } + // fmt.Printf("%d %+v\n", i, n) + } + // fmt.Printf("%d %+v\n", i, s.Get(1)) + }) + }) + + return srcs +} + +type extractor struct{} + +// New returns a youtube extractor. +func New() types.Extractor { + return &extractor{} +} + +// Extract is the main function to extract the data. +func (e *extractor) Extract(u string, option types.Options) ([]*types.Data, error) { + html, err := request.Get(u, u, nil) + if err != nil { + return nil, err + } + var title string + desc := utils.MatchOneOf(html, `(.+?)`) + if len(desc) > 1 { + title = desc[1] + } else { + title = "eporner" + } + uu, err := url.Parse(u) + if err != nil { + return nil, err + } + + streams := make(map[string]*types.Stream, len(getSrc(html))) + for _, src := range getSrc(html) { + // fmt.Printf("src: %+v\n", src) + srcurl := uu.Scheme + "://" + uu.Host + src.url + // size, err := request.Size(srcurl, u) + if err != nil { + return nil, err + } + urlData := &types.Part{ + URL: srcurl, + Size: src.size, + Ext: "mp4", + } + streams[src.quality] = &types.Stream{ + Parts: []*types.Part{urlData}, + Size: src.size, + Quality: src.quality, + } + } + return []*types.Data{ + { + Site: "EPORNER eporner.com", + Title: title, + Type: types.DataTypeVideo, + Streams: streams, + URL: u, + }, + }, nil +} diff --git a/extractors/eporner/eporner_test.go b/extractors/eporner/eporner_test.go new file mode 100644 index 000000000..d35741fcd --- /dev/null +++ b/extractors/eporner/eporner_test.go @@ -0,0 +1,65 @@ +package eporner + +import ( + "reflect" + "testing" +) + +func Test_getSrc(t *testing.T) { + type args struct { + html string + } + tests := []struct { + name string + args args + want []*src + }{ + { + name: "T1", + args: args{html: `
+ `}, + want: []*src{ + {url: "/dload/baNFPbuIABZ/240/4307932-240p.mp4", quality: "240p", sizestr: "131.79 MB"}, + {url: "/dload/baNFPbuIABZ/360/4307932-360p.mp4", quality: "360p", sizestr: "235.5 MB"}, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := getSrc(tt.args.html); !reflect.DeepEqual(got, tt.want) { + t.Errorf("getSrc() = %v, want %v", got, tt.want) + } + }) + } +} + +func Test_getSrcMeta(t *testing.T) { + type args struct { + text string + } + tests := []struct { + name string + args args + want *src + }{ + { + name: "T2", + args: args{text: "Download MP4 (240p, 131.79 MB)"}, + want: &src{quality: "240p", sizestr: "131.79 MB"}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := getSrcMeta(tt.args.text); !reflect.DeepEqual(got, tt.want) { + t.Errorf("getSrcMeta() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/extractors/extractors.go b/extractors/extractors.go index 3e4334ad2..dd12bc48f 100644 --- a/extractors/extractors.go +++ b/extractors/extractors.go @@ -9,6 +9,7 @@ import ( "github.com/iawia002/annie/extractors/bilibili" "github.com/iawia002/annie/extractors/douyin" "github.com/iawia002/annie/extractors/douyu" + "github.com/iawia002/annie/extractors/eporner" "github.com/iawia002/annie/extractors/facebook" "github.com/iawia002/annie/extractors/geekbang" "github.com/iawia002/annie/extractors/haokan" @@ -74,6 +75,7 @@ func init() { "tiktok": tiktok.New(), "haokan": haokan.New(), "acfun": acfun.New(), + "eporner": eporner.New(), } } From b8a1a2ada592eac90583c3f01a4e1908681f0f2f Mon Sep 17 00:00:00 2001 From: kbche Date: Sun, 17 Jan 2021 15:31:10 +0000 Subject: [PATCH 2/5] Modified testcase for extractor eporner --- extractors/eporner/eporner_test.go | 61 +++++++----------------------- 1 file changed, 14 insertions(+), 47 deletions(-) diff --git a/extractors/eporner/eporner_test.go b/extractors/eporner/eporner_test.go index d35741fcd..891e65b5c 100644 --- a/extractors/eporner/eporner_test.go +++ b/extractors/eporner/eporner_test.go @@ -1,65 +1,32 @@ package eporner import ( - "reflect" "testing" + + "github.com/iawia002/annie/extractors/types" + "github.com/iawia002/annie/test" ) -func Test_getSrc(t *testing.T) { - type args struct { - html string - } +func TestDownload(t *testing.T) { tests := []struct { name string - args args - want []*src + args test.Args }{ { - name: "T1", - args: args{html: `
- `}, - want: []*src{ - {url: "/dload/baNFPbuIABZ/240/4307932-240p.mp4", quality: "240p", sizestr: "131.79 MB"}, - {url: "/dload/baNFPbuIABZ/360/4307932-360p.mp4", quality: "360p", sizestr: "235.5 MB"}, + name: "normal test", + args: test.Args{ + URL: "https://www.eporner.com/video-mbubfvXYFip/dirtywivesclub-becky-bandini/", + Quality: "1080p", + Size: 1525510307, + Title: "DirtyWivesClub - Becky Bandini - EPORNER", }, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - if got := getSrc(tt.args.html); !reflect.DeepEqual(got, tt.want) { - t.Errorf("getSrc() = %v, want %v", got, tt.want) - } - }) - } -} - -func Test_getSrcMeta(t *testing.T) { - type args struct { - text string - } - tests := []struct { - name string - args args - want *src - }{ - { - name: "T2", - args: args{text: "Download MP4 (240p, 131.79 MB)"}, - want: &src{quality: "240p", sizestr: "131.79 MB"}, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - if got := getSrcMeta(tt.args.text); !reflect.DeepEqual(got, tt.want) { - t.Errorf("getSrcMeta() = %v, want %v", got, tt.want) - } + data, err := New().Extract(tt.args.URL, types.Options{}) + test.CheckError(t, err) + test.Check(t, tt.args, data[0]) }) } } From 11452249820c95953681c782e3078a9a35c45533 Mon Sep 17 00:00:00 2001 From: kbche Date: Sun, 17 Jan 2021 15:33:27 +0000 Subject: [PATCH 3/5] Updated README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 1bb8bd536..3d9f21d23 100644 --- a/README.md +++ b/README.md @@ -627,6 +627,7 @@ XVIDEOS | | ✓ | | | | TikTok | | ✓ | | | | 好看视频 | | ✓ | | | | AcFun | | ✓ | | ✓ | | +Eporner | | ✓ | | | | ## Known issues From a87b200c2e0dfa57d91f348a33fbf8659f9dd0b5 Mon Sep 17 00:00:00 2001 From: kbche Date: Sun, 17 Jan 2021 15:41:21 +0000 Subject: [PATCH 4/5] Minor modifications in size calculations and removed commented code --- extractors/eporner/eporner.go | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/extractors/eporner/eporner.go b/extractors/eporner/eporner.go index 3fb6236a7..de852dc99 100644 --- a/extractors/eporner/eporner.go +++ b/extractors/eporner/eporner.go @@ -51,7 +51,7 @@ func getSrcMeta(text string) *src { unit := valunit[1] switch unit { case "KB": - s.size = int64(val * 1024.0) + s.size = int64(val * 1024) case "MB": s.size = int64(val * 1024 * 1024) case "GB": @@ -76,7 +76,6 @@ func getSrc(html string) []*src { if n.Data == "a" { var sr *src if n.FirstChild != nil { - // fmt.Printf("data: %d %+v", i, *n.FirstChild) sr = getSrcMeta(n.FirstChild.Data) } for _, a := range n.Attr { @@ -86,9 +85,7 @@ func getSrc(html string) []*src { } srcs = append(srcs, sr) } - // fmt.Printf("%d %+v\n", i, n) } - // fmt.Printf("%d %+v\n", i, s.Get(1)) }) }) @@ -97,7 +94,7 @@ func getSrc(html string) []*src { type extractor struct{} -// New returns a youtube extractor. +// New returns a eporner extractor. func New() types.Extractor { return &extractor{} } @@ -119,11 +116,11 @@ func (e *extractor) Extract(u string, option types.Options) ([]*types.Data, erro if err != nil { return nil, err } - - streams := make(map[string]*types.Stream, len(getSrc(html))) - for _, src := range getSrc(html) { - // fmt.Printf("src: %+v\n", src) + srcs := getSrc(html) + streams := make(map[string]*types.Stream, len(srcs)) + for _, src := range srcs { srcurl := uu.Scheme + "://" + uu.Host + src.url + // skipping an extra HEAD request to the URL. // size, err := request.Size(srcurl, u) if err != nil { return nil, err From c419830208a51d3ac66bf5974a606859ebfdec9d Mon Sep 17 00:00:00 2001 From: kbche Date: Sun, 17 Jan 2021 16:08:24 +0000 Subject: [PATCH 5/5] Added some fixes from ci warnings --- extractors/eporner/eporner.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/extractors/eporner/eporner.go b/extractors/eporner/eporner.go index de852dc99..ac9a5bdaa 100644 --- a/extractors/eporner/eporner.go +++ b/extractors/eporner/eporner.go @@ -16,8 +16,6 @@ const ( downloadclass = ".dloaddivcol" ) -var () - type src struct { url string quality string