summaryrefslogtreecommitdiff
path: root/modules/followlinks
diff options
context:
space:
mode:
authorPhilipp Tanlak <philipp.tanlak@gmail.com>2023-10-30 19:02:50 +0100
committerPhilipp Tanlak <philipp.tanlak@gmail.com>2023-10-30 19:02:50 +0100
commit2d3cd6584dedce45ea709d1757a28ce7537f3472 (patch)
treec21ce25cd66731c56b3fd13c86734bd13ebd7d25 /modules/followlinks
parent2bfae5b426bf4a0b99d3979ed12d63cb50c39b17 (diff)
Refactor to prepare for builtin JS functions
Diffstat (limited to 'modules/followlinks')
-rw-r--r--modules/followlinks/followlinks_test.go124
1 files changed, 66 insertions, 58 deletions
diff --git a/modules/followlinks/followlinks_test.go b/modules/followlinks/followlinks_test.go
index f3eb4fe..af186f9 100644
--- a/modules/followlinks/followlinks_test.go
+++ b/modules/followlinks/followlinks_test.go
@@ -20,24 +20,26 @@ func TestFollowLinks(t *testing.T) {
var urls []string
var mu sync.Mutex
- scraper := flyscrape.NewScraper()
- scraper.LoadModule(&starturl.Module{URL: "http://www.example.com/foo/bar"})
- scraper.LoadModule(&followlinks.Module{})
-
- scraper.LoadModule(hook.Module{
- AdaptTransportFn: func(rt http.RoundTripper) http.RoundTripper {
- return flyscrape.MockTransport(200, `
+ mods := []flyscrape.Module{
+ &starturl.Module{URL: "http://www.example.com/foo/bar"},
+ &followlinks.Module{},
+ hook.Module{
+ AdaptTransportFn: func(rt http.RoundTripper) http.RoundTripper {
+ return flyscrape.MockTransport(200, `
<a href="/baz">Baz</a>
<a href="baz">Baz</a>
<a href="http://www.google.com">Google</a>`)
+ },
+ ReceiveResponseFn: func(r *flyscrape.Response) {
+ mu.Lock()
+ urls = append(urls, r.Request.URL)
+ mu.Unlock()
+ },
},
- ReceiveResponseFn: func(r *flyscrape.Response) {
- mu.Lock()
- urls = append(urls, r.Request.URL)
- mu.Unlock()
- },
- })
+ }
+ scraper := flyscrape.NewScraper()
+ scraper.Modules = mods
scraper.Run()
require.Len(t, urls, 5)
@@ -52,28 +54,30 @@ func TestFollowSelector(t *testing.T) {
var urls []string
var mu sync.Mutex
- scraper := flyscrape.NewScraper()
- scraper.LoadModule(&starturl.Module{URL: "http://www.example.com/foo/bar"})
- scraper.LoadModule(&followlinks.Module{
- Follow: []string{".next a[href]"},
- })
-
- scraper.LoadModule(hook.Module{
- AdaptTransportFn: func(rt http.RoundTripper) http.RoundTripper {
- return flyscrape.MockTransport(200, `
+ mods := []flyscrape.Module{
+ &starturl.Module{URL: "http://www.example.com/foo/bar"},
+ &followlinks.Module{
+ Follow: []string{".next a[href]"},
+ },
+ hook.Module{
+ AdaptTransportFn: func(rt http.RoundTripper) http.RoundTripper {
+ return flyscrape.MockTransport(200, `
<a href="/baz">Baz</a>
<a href="baz">Baz</a>
<div class="next">
<a href="http://www.google.com">Google</a>
</div>`)
+ },
+ ReceiveResponseFn: func(r *flyscrape.Response) {
+ mu.Lock()
+ urls = append(urls, r.Request.URL)
+ mu.Unlock()
+ },
},
- ReceiveResponseFn: func(r *flyscrape.Response) {
- mu.Lock()
- urls = append(urls, r.Request.URL)
- mu.Unlock()
- },
- })
+ }
+ scraper := flyscrape.NewScraper()
+ scraper.Modules = mods
scraper.Run()
require.Len(t, urls, 2)
@@ -85,26 +89,28 @@ func TestFollowDataAttr(t *testing.T) {
var urls []string
var mu sync.Mutex
- scraper := flyscrape.NewScraper()
- scraper.LoadModule(&starturl.Module{URL: "http://www.example.com/foo/bar"})
- scraper.LoadModule(&followlinks.Module{
- Follow: []string{"[data-url]"},
- })
-
- scraper.LoadModule(hook.Module{
- AdaptTransportFn: func(rt http.RoundTripper) http.RoundTripper {
- return flyscrape.MockTransport(200, `
+ mods := []flyscrape.Module{
+ &starturl.Module{URL: "http://www.example.com/foo/bar"},
+ &followlinks.Module{
+ Follow: []string{"[data-url]"},
+ },
+ hook.Module{
+ AdaptTransportFn: func(rt http.RoundTripper) http.RoundTripper {
+ return flyscrape.MockTransport(200, `
<a href="/baz">Baz</a>
<a href="baz">Baz</a>
<div data-url="http://www.google.com">Google</div>`)
+ },
+ ReceiveResponseFn: func(r *flyscrape.Response) {
+ mu.Lock()
+ urls = append(urls, r.Request.URL)
+ mu.Unlock()
+ },
},
- ReceiveResponseFn: func(r *flyscrape.Response) {
- mu.Lock()
- urls = append(urls, r.Request.URL)
- mu.Unlock()
- },
- })
+ }
+ scraper := flyscrape.NewScraper()
+ scraper.Modules = mods
scraper.Run()
require.Len(t, urls, 2)
@@ -116,26 +122,28 @@ func TestFollowMultiple(t *testing.T) {
var urls []string
var mu sync.Mutex
- scraper := flyscrape.NewScraper()
- scraper.LoadModule(&starturl.Module{URL: "http://www.example.com/foo/bar"})
- scraper.LoadModule(&followlinks.Module{
- Follow: []string{"a.prev", "a.next"},
- })
-
- scraper.LoadModule(hook.Module{
- AdaptTransportFn: func(rt http.RoundTripper) http.RoundTripper {
- return flyscrape.MockTransport(200, `
+ mods := []flyscrape.Module{
+ &starturl.Module{URL: "http://www.example.com/foo/bar"},
+ &followlinks.Module{
+ Follow: []string{"a.prev", "a.next"},
+ },
+ hook.Module{
+ AdaptTransportFn: func(rt http.RoundTripper) http.RoundTripper {
+ return flyscrape.MockTransport(200, `
<a href="/baz">Baz</a>
<a class="prev" href="a">a</a>
<a class="next" href="b">b</a>`)
+ },
+ ReceiveResponseFn: func(r *flyscrape.Response) {
+ mu.Lock()
+ urls = append(urls, r.Request.URL)
+ mu.Unlock()
+ },
},
- ReceiveResponseFn: func(r *flyscrape.Response) {
- mu.Lock()
- urls = append(urls, r.Request.URL)
- mu.Unlock()
- },
- })
+ }
+ scraper := flyscrape.NewScraper()
+ scraper.Modules = mods
scraper.Run()
require.Len(t, urls, 3)