From 08df9258a532b653c243e077e82491dbe62ad854 Mon Sep 17 00:00:00 2001 From: Philipp Tanlak Date: Sat, 23 Sep 2023 17:41:57 +0200 Subject: refactor scraper into modules --- cmd/flyscrape/dev.go | 27 +++++++++++++-------------- cmd/flyscrape/main.go | 8 ++++++++ cmd/flyscrape/run.go | 37 ++++++------------------------------- 3 files changed, 27 insertions(+), 45 deletions(-) (limited to 'cmd') diff --git a/cmd/flyscrape/dev.go b/cmd/flyscrape/dev.go index 85ac1a1..169e6d3 100644 --- a/cmd/flyscrape/dev.go +++ b/cmd/flyscrape/dev.go @@ -56,23 +56,22 @@ func (c *DevCommand) Run(args []string) error { return nil } - opts.Depth = 0 - scr := flyscrape.Scraper{ - ScrapeOptions: opts, - ScrapeFunc: scrape, - FetchFunc: fetch, - } + scraper := flyscrape.NewScraper() + scraper.ScrapeFunc = scrape + flyscrape.LoadModules(scraper, opts) - result := <-scr.Scrape() - screen.Clear() - screen.MoveTopLeft() + scraper.Run() - if result.Error != nil { - log.Println(result.Error) - return nil - } + scraper.OnResponse(func(resp *flyscrape.Response) { + screen.Clear() + screen.MoveTopLeft() + if resp.Error != nil { + log.Println(resp.Error) + return + } + fmt.Println(flyscrape.PrettyPrint(resp.ScrapeResult, "")) + }) - fmt.Println(flyscrape.PrettyPrint(result, "")) return nil }) if err != nil && err != flyscrape.StopWatch { diff --git a/cmd/flyscrape/main.go b/cmd/flyscrape/main.go index 4e448bb..bac411e 100644 --- a/cmd/flyscrape/main.go +++ b/cmd/flyscrape/main.go @@ -11,6 +11,14 @@ import ( "log" "os" "strings" + + _ "github.com/philippta/flyscrape/modules/depth" + _ "github.com/philippta/flyscrape/modules/domainfilter" + _ "github.com/philippta/flyscrape/modules/followlinks" + _ "github.com/philippta/flyscrape/modules/jsonprinter" + _ "github.com/philippta/flyscrape/modules/ratelimit" + _ "github.com/philippta/flyscrape/modules/starturl" + _ "github.com/philippta/flyscrape/modules/urlfilter" ) func main() { diff --git a/cmd/flyscrape/run.go b/cmd/flyscrape/run.go index 987d0e0..22f41fd 100644 --- a/cmd/flyscrape/run.go +++ b/cmd/flyscrape/run.go @@ -18,8 +18,6 @@ type RunCommand struct{} func (c *RunCommand) Run(args []string) error { fs := flag.NewFlagSet("flyscrape-run", flag.ContinueOnError) - noPrettyPrint := fs.Bool("no-pretty-print", false, "no-pretty-print") - proxy := fs.String("proxy", "", "proxy") fs.Usage = c.Usage if err := fs.Parse(args); err != nil { @@ -41,32 +39,17 @@ func (c *RunCommand) Run(args []string) error { return fmt.Errorf("failed to compile script: %w", err) } - svc := flyscrape.Scraper{ - ScrapeOptions: opts, - ScrapeFunc: scrape, - } - if *proxy != "" { - svc.FetchFunc = flyscrape.ProxiedFetch(*proxy) - } + scraper := flyscrape.NewScraper() + scraper.ScrapeFunc = scrape + flyscrape.LoadModules(scraper, opts) count := 0 start := time.Now() - for result := range svc.Scrape() { - if count > 0 { - fmt.Println(",") - } - if count == 0 { - fmt.Println("[") - } - if *noPrettyPrint { - fmt.Print(flyscrape.Print(result, " ")) - } else { - fmt.Print(flyscrape.PrettyPrint(result, " ")) - } + scraper.OnResponse(func(resp *flyscrape.Response) { count++ - } - fmt.Println("\n]") + }) + scraper.Run() log.Printf("Scraped %d websites in %v\n", count, time.Since(start)) return nil @@ -80,18 +63,10 @@ Usage: flyscrape run SCRIPT -Arguments: - - -no-pretty-print - Disables pretty printing of scrape results. - Examples: # Run the script. $ flyscrape run example.js - - # Run the script with pretty printing disabled. - $ flyscrape run -no-pretty-print example.js `[1:]) } -- cgit v1.2.3