diff options
Diffstat (limited to 'modules/depth')
| -rw-r--r-- | modules/depth/depth.go | 15 | ||||
| -rw-r--r-- | modules/depth/depth_test.go | 40 |
2 files changed, 35 insertions, 20 deletions
diff --git a/modules/depth/depth.go b/modules/depth/depth.go index 0cfbc71..866f5ae 100644 --- a/modules/depth/depth.go +++ b/modules/depth/depth.go @@ -9,15 +9,22 @@ import ( ) func init() { - flyscrape.RegisterModule(new(Module)) + flyscrape.RegisterModule(Module{}) } type Module struct { Depth int `json:"depth"` } -func (m *Module) CanRequest(url string, depth int) bool { - return depth <= m.Depth +func (Module) ModuleInfo() flyscrape.ModuleInfo { + return flyscrape.ModuleInfo{ + ID: "depth", + New: func() flyscrape.Module { return new(Module) }, + } } -var _ flyscrape.CanRequest = (*Module)(nil) +func (m *Module) ValidateRequest(r *flyscrape.Request) bool { + return r.Depth <= m.Depth +} + +var _ flyscrape.RequestValidator = (*Module)(nil) diff --git a/modules/depth/depth_test.go b/modules/depth/depth_test.go index c9afd6f..10b67e9 100644 --- a/modules/depth/depth_test.go +++ b/modules/depth/depth_test.go @@ -6,36 +6,44 @@ package depth_test import ( "net/http" + "sync" "testing" "github.com/philippta/flyscrape" "github.com/philippta/flyscrape/modules/depth" "github.com/philippta/flyscrape/modules/followlinks" + "github.com/philippta/flyscrape/modules/hook" "github.com/philippta/flyscrape/modules/starturl" "github.com/stretchr/testify/require" ) func TestDepth(t *testing.T) { + var urls []string + var mu sync.Mutex + scraper := flyscrape.NewScraper() scraper.LoadModule(&starturl.Module{URL: "http://www.example.com"}) scraper.LoadModule(&followlinks.Module{}) scraper.LoadModule(&depth.Module{Depth: 2}) - - scraper.SetTransport(func(r *http.Request) (*http.Response, error) { - switch r.URL.String() { - case "http://www.example.com": - return flyscrape.MockResponse(200, `<a href="http://www.google.com">Google</a>`) - case "http://www.google.com": - return flyscrape.MockResponse(200, `<a href="http://www.duckduckgo.com">DuckDuckGo</a>`) - case "http://www.duckduckgo.com": - return flyscrape.MockResponse(200, `<a href="http://www.example.com">Example</a>`) - } - return flyscrape.MockResponse(200, "") - }) - - var urls []string - scraper.OnRequest(func(req *flyscrape.Request) { - urls = append(urls, req.URL) + scraper.LoadModule(hook.Module{ + AdaptTransportFn: func(rt http.RoundTripper) http.RoundTripper { + return flyscrape.RoundTripFunc(func(r *http.Request) (*http.Response, error) { + switch r.URL.String() { + case "http://www.example.com": + return flyscrape.MockResponse(200, `<a href="http://www.google.com">Google</a>`) + case "http://www.google.com": + return flyscrape.MockResponse(200, `<a href="http://www.duckduckgo.com">DuckDuckGo</a>`) + case "http://www.duckduckgo.com": + return flyscrape.MockResponse(200, `<a href="http://www.example.com">Example</a>`) + } + return flyscrape.MockResponse(200, "") + }) + }, + ReceiveResponseFn: func(r *flyscrape.Response) { + mu.Lock() + urls = append(urls, r.Request.URL) + mu.Unlock() + }, }) scraper.Run() |