summaryrefslogtreecommitdiff
path: root/modules/depth
diff options
context:
space:
mode:
authorPhilipp Tanlak <philipp.tanlak@gmail.com>2023-10-05 14:53:37 +0200
committerPhilipp Tanlak <philipp.tanlak@gmail.com>2023-10-05 14:53:37 +0200
commit1fc497fbdc79a43c62ac2e8eaf4827752dbeef8e (patch)
tree67738e213ef97f249bdfa0f1bddda0839192cb77 /modules/depth
parentbd9e7f7acfd855d4685aa4544169c0e29cdbf205 (diff)
Refactor codebase into modules
Diffstat (limited to 'modules/depth')
-rw-r--r--modules/depth/depth.go15
-rw-r--r--modules/depth/depth_test.go40
2 files changed, 35 insertions, 20 deletions
diff --git a/modules/depth/depth.go b/modules/depth/depth.go
index 0cfbc71..866f5ae 100644
--- a/modules/depth/depth.go
+++ b/modules/depth/depth.go
@@ -9,15 +9,22 @@ import (
)
func init() {
- flyscrape.RegisterModule(new(Module))
+ flyscrape.RegisterModule(Module{})
}
type Module struct {
Depth int `json:"depth"`
}
-func (m *Module) CanRequest(url string, depth int) bool {
- return depth <= m.Depth
+func (Module) ModuleInfo() flyscrape.ModuleInfo {
+ return flyscrape.ModuleInfo{
+ ID: "depth",
+ New: func() flyscrape.Module { return new(Module) },
+ }
}
-var _ flyscrape.CanRequest = (*Module)(nil)
+func (m *Module) ValidateRequest(r *flyscrape.Request) bool {
+ return r.Depth <= m.Depth
+}
+
+var _ flyscrape.RequestValidator = (*Module)(nil)
diff --git a/modules/depth/depth_test.go b/modules/depth/depth_test.go
index c9afd6f..10b67e9 100644
--- a/modules/depth/depth_test.go
+++ b/modules/depth/depth_test.go
@@ -6,36 +6,44 @@ package depth_test
import (
"net/http"
+ "sync"
"testing"
"github.com/philippta/flyscrape"
"github.com/philippta/flyscrape/modules/depth"
"github.com/philippta/flyscrape/modules/followlinks"
+ "github.com/philippta/flyscrape/modules/hook"
"github.com/philippta/flyscrape/modules/starturl"
"github.com/stretchr/testify/require"
)
func TestDepth(t *testing.T) {
+ var urls []string
+ var mu sync.Mutex
+
scraper := flyscrape.NewScraper()
scraper.LoadModule(&starturl.Module{URL: "http://www.example.com"})
scraper.LoadModule(&followlinks.Module{})
scraper.LoadModule(&depth.Module{Depth: 2})
-
- scraper.SetTransport(func(r *http.Request) (*http.Response, error) {
- switch r.URL.String() {
- case "http://www.example.com":
- return flyscrape.MockResponse(200, `<a href="http://www.google.com">Google</a>`)
- case "http://www.google.com":
- return flyscrape.MockResponse(200, `<a href="http://www.duckduckgo.com">DuckDuckGo</a>`)
- case "http://www.duckduckgo.com":
- return flyscrape.MockResponse(200, `<a href="http://www.example.com">Example</a>`)
- }
- return flyscrape.MockResponse(200, "")
- })
-
- var urls []string
- scraper.OnRequest(func(req *flyscrape.Request) {
- urls = append(urls, req.URL)
+ scraper.LoadModule(hook.Module{
+ AdaptTransportFn: func(rt http.RoundTripper) http.RoundTripper {
+ return flyscrape.RoundTripFunc(func(r *http.Request) (*http.Response, error) {
+ switch r.URL.String() {
+ case "http://www.example.com":
+ return flyscrape.MockResponse(200, `<a href="http://www.google.com">Google</a>`)
+ case "http://www.google.com":
+ return flyscrape.MockResponse(200, `<a href="http://www.duckduckgo.com">DuckDuckGo</a>`)
+ case "http://www.duckduckgo.com":
+ return flyscrape.MockResponse(200, `<a href="http://www.example.com">Example</a>`)
+ }
+ return flyscrape.MockResponse(200, "")
+ })
+ },
+ ReceiveResponseFn: func(r *flyscrape.Response) {
+ mu.Lock()
+ urls = append(urls, r.Request.URL)
+ mu.Unlock()
+ },
})
scraper.Run()