From 1fc497fbdc79a43c62ac2e8eaf4827752dbeef8e Mon Sep 17 00:00:00 2001 From: Philipp Tanlak Date: Thu, 5 Oct 2023 14:53:37 +0200 Subject: Refactor codebase into modules --- cmd/flyscrape/dev.go | 49 ++++++++++++++++++------------------------------- cmd/flyscrape/main.go | 5 +++-- cmd/flyscrape/run.go | 9 +++++++-- 3 files changed, 28 insertions(+), 35 deletions(-) (limited to 'cmd') diff --git a/cmd/flyscrape/dev.go b/cmd/flyscrape/dev.go index 95c627e..fba3fba 100644 --- a/cmd/flyscrape/dev.go +++ b/cmd/flyscrape/dev.go @@ -17,7 +17,6 @@ type DevCommand struct{} func (c *DevCommand) Run(args []string) error { fs := flag.NewFlagSet("flyscrape-dev", flag.ContinueOnError) - proxy := fs.String("proxy", "", "proxy") fs.Usage = c.Usage if err := fs.Parse(args); err != nil { @@ -28,50 +27,25 @@ func (c *DevCommand) Run(args []string) error { return fmt.Errorf("too many arguments") } - var fetch flyscrape.FetchFunc - if *proxy != "" { - fetch = flyscrape.ProxiedFetch(*proxy) - } else { - fetch = flyscrape.Fetch() - } - - fetch = flyscrape.CachedFetch(fetch) script := fs.Arg(0) err := flyscrape.Watch(script, func(s string) error { cfg, scrape, err := flyscrape.Compile(s) if err != nil { - screen.Clear() - screen.MoveTopLeft() - - if errs, ok := err.(interface{ Unwrap() []error }); ok { - for _, err := range errs.Unwrap() { - log.Printf("%s:%v\n", script, err) - } - } else { - log.Println(err) - } - - // ignore compilation errors + printCompileErr(script, err) return nil } scraper := flyscrape.NewScraper() scraper.ScrapeFunc = scrape + flyscrape.LoadModules(scraper, cfg) + scraper.DisableModule("followlinks") + screen.Clear() + screen.MoveTopLeft() scraper.Run() - scraper.OnResponse(func(resp *flyscrape.Response) { - screen.Clear() - screen.MoveTopLeft() - if resp.Error != nil { - log.Println(resp.Error) - return - } - fmt.Println(flyscrape.PrettyPrint(resp.Data, "")) - }) - return nil }) if err != nil && err != flyscrape.StopWatch { @@ -97,3 +71,16 @@ Examples: $ flyscrape dev example.js `[1:]) } + +func printCompileErr(script string, err error) { + screen.Clear() + screen.MoveTopLeft() + + if errs, ok := err.(interface{ Unwrap() []error }); ok { + for _, err := range errs.Unwrap() { + log.Printf("%s:%v\n", script, err) + } + } else { + log.Println(err) + } +} diff --git a/cmd/flyscrape/main.go b/cmd/flyscrape/main.go index bac411e..81d0e2b 100644 --- a/cmd/flyscrape/main.go +++ b/cmd/flyscrape/main.go @@ -12,10 +12,11 @@ import ( "os" "strings" + _ "github.com/philippta/flyscrape/modules/cache" _ "github.com/philippta/flyscrape/modules/depth" _ "github.com/philippta/flyscrape/modules/domainfilter" _ "github.com/philippta/flyscrape/modules/followlinks" - _ "github.com/philippta/flyscrape/modules/jsonprinter" + _ "github.com/philippta/flyscrape/modules/jsonprint" _ "github.com/philippta/flyscrape/modules/ratelimit" _ "github.com/philippta/flyscrape/modules/starturl" _ "github.com/philippta/flyscrape/modules/urlfilter" @@ -66,7 +67,7 @@ Usage: flyscrape [arguments] Commands: - + new creates a sample scraping script run runs a scraping script dev watches and re-runs a scraping script diff --git a/cmd/flyscrape/run.go b/cmd/flyscrape/run.go index 4580e6d..b467abe 100644 --- a/cmd/flyscrape/run.go +++ b/cmd/flyscrape/run.go @@ -12,6 +12,7 @@ import ( "time" "github.com/philippta/flyscrape" + "github.com/philippta/flyscrape/modules/hook" ) type RunCommand struct{} @@ -41,14 +42,18 @@ func (c *RunCommand) Run(args []string) error { scraper := flyscrape.NewScraper() scraper.ScrapeFunc = scrape + flyscrape.LoadModules(scraper, cfg) count := 0 start := time.Now() - scraper.OnResponse(func(resp *flyscrape.Response) { - count++ + scraper.LoadModule(hook.Module{ + ReceiveResponseFn: func(r *flyscrape.Response) { + count++ + }, }) + scraper.Run() log.Printf("Scraped %d websites in %v\n", count, time.Since(start)) -- cgit v1.2.3