summaryrefslogtreecommitdiff
path: root/modules/jsonprinter
diff options
context:
space:
mode:
Diffstat (limited to 'modules/jsonprinter')
-rw-r--r--modules/jsonprinter/jsonprinter.go22
-rw-r--r--modules/jsonprinter/jsonprinter_test.go47
2 files changed, 16 insertions, 53 deletions
diff --git a/modules/jsonprinter/jsonprinter.go b/modules/jsonprinter/jsonprinter.go
index 3936277..3026f29 100644
--- a/modules/jsonprinter/jsonprinter.go
+++ b/modules/jsonprinter/jsonprinter.go
@@ -6,6 +6,7 @@ package jsonprinter
import (
"fmt"
+ "time"
"github.com/philippta/flyscrape"
)
@@ -18,10 +19,6 @@ type Module struct {
first bool
}
-func (m *Module) ID() string {
- return "jsonprinter"
-}
-
func (m *Module) OnResponse(resp *flyscrape.Response) {
if resp.Error == nil && resp.Data == nil {
return
@@ -33,15 +30,28 @@ func (m *Module) OnResponse(resp *flyscrape.Response) {
fmt.Println(",")
}
- fmt.Print(flyscrape.PrettyPrint(resp.ScrapeResult, " "))
+ o := output{
+ URL: resp.Request.URL,
+ Data: resp.Data,
+ Error: resp.Error,
+ Timestamp: time.Now(),
+ }
+
+ fmt.Print(flyscrape.PrettyPrint(o, " "))
}
func (m *Module) OnComplete() {
fmt.Println("\n]")
}
+type output struct {
+ URL string `json:"url,omitempty"`
+ Data any `json:"data,omitempty"`
+ Error error `json:"error,omitempty"`
+ Timestamp time.Time `json:"timestamp,omitempty"`
+}
+
var (
- _ flyscrape.Module = (*Module)(nil)
_ flyscrape.OnResponse = (*Module)(nil)
_ flyscrape.OnComplete = (*Module)(nil)
)
diff --git a/modules/jsonprinter/jsonprinter_test.go b/modules/jsonprinter/jsonprinter_test.go
deleted file mode 100644
index 29cc438..0000000
--- a/modules/jsonprinter/jsonprinter_test.go
+++ /dev/null
@@ -1,47 +0,0 @@
-// This Source Code Form is subject to the terms of the Mozilla Public
-// License, v. 2.0. If a copy of the MPL was not distributed with this
-// file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-package jsonprinter_test
-
-import (
- "net/http"
- "testing"
-
- "github.com/philippta/flyscrape"
- "github.com/philippta/flyscrape/modules/depth"
- "github.com/philippta/flyscrape/modules/followlinks"
- "github.com/philippta/flyscrape/modules/starturl"
- "github.com/stretchr/testify/require"
-)
-
-func TestDepth(t *testing.T) {
- scraper := flyscrape.NewScraper()
- scraper.LoadModule(&starturl.Module{URL: "http://www.example.com/"})
- scraper.LoadModule(&followlinks.Module{})
- scraper.LoadModule(&depth.Module{Depth: 2})
-
- scraper.SetTransport(func(r *http.Request) (*http.Response, error) {
- switch r.URL.String() {
- case "http://www.example.com/":
- return flyscrape.MockResponse(200, `<a href="http://www.google.com">Google</a>`)
- case "http://www.google.com/":
- return flyscrape.MockResponse(200, `<a href="http://www.duckduckgo.com">DuckDuckGo</a>`)
- case "http://www.duckduckgo.com/":
- return flyscrape.MockResponse(200, `<a href="http://www.example.com">Example</a>`)
- }
- return flyscrape.MockResponse(200, "")
- })
-
- var urls []string
- scraper.OnRequest(func(req *flyscrape.Request) {
- urls = append(urls, req.URL)
- })
-
- scraper.Run()
-
- require.Len(t, urls, 3)
- require.Contains(t, urls, "http://www.example.com/")
- require.Contains(t, urls, "http://www.google.com/")
- require.Contains(t, urls, "http://www.duckduckgo.com/")
-}