summaryrefslogtreecommitdiff
path: root/cmd
diff options
context:
space:
mode:
authorPhilipp Tanlak <philipp.tanlak@gmail.com>2023-09-23 17:41:57 +0200
committerPhilipp Tanlak <philipp.tanlak@gmail.com>2023-09-23 17:41:57 +0200
commit08df9258a532b653c243e077e82491dbe62ad854 (patch)
treee72b04dba61e65d3bfb9cdb0ad3a87f5caa95eb3 /cmd
parentc6950bcd5cd8fe9e7cc63fde7216a5a9b93b8aa0 (diff)
refactor scraper into modules
Diffstat (limited to 'cmd')
-rw-r--r--cmd/flyscrape/dev.go27
-rw-r--r--cmd/flyscrape/main.go8
-rw-r--r--cmd/flyscrape/run.go37
3 files changed, 27 insertions, 45 deletions
diff --git a/cmd/flyscrape/dev.go b/cmd/flyscrape/dev.go
index 85ac1a1..169e6d3 100644
--- a/cmd/flyscrape/dev.go
+++ b/cmd/flyscrape/dev.go
@@ -56,23 +56,22 @@ func (c *DevCommand) Run(args []string) error {
return nil
}
- opts.Depth = 0
- scr := flyscrape.Scraper{
- ScrapeOptions: opts,
- ScrapeFunc: scrape,
- FetchFunc: fetch,
- }
+ scraper := flyscrape.NewScraper()
+ scraper.ScrapeFunc = scrape
+ flyscrape.LoadModules(scraper, opts)
- result := <-scr.Scrape()
- screen.Clear()
- screen.MoveTopLeft()
+ scraper.Run()
- if result.Error != nil {
- log.Println(result.Error)
- return nil
- }
+ scraper.OnResponse(func(resp *flyscrape.Response) {
+ screen.Clear()
+ screen.MoveTopLeft()
+ if resp.Error != nil {
+ log.Println(resp.Error)
+ return
+ }
+ fmt.Println(flyscrape.PrettyPrint(resp.ScrapeResult, ""))
+ })
- fmt.Println(flyscrape.PrettyPrint(result, ""))
return nil
})
if err != nil && err != flyscrape.StopWatch {
diff --git a/cmd/flyscrape/main.go b/cmd/flyscrape/main.go
index 4e448bb..bac411e 100644
--- a/cmd/flyscrape/main.go
+++ b/cmd/flyscrape/main.go
@@ -11,6 +11,14 @@ import (
"log"
"os"
"strings"
+
+ _ "github.com/philippta/flyscrape/modules/depth"
+ _ "github.com/philippta/flyscrape/modules/domainfilter"
+ _ "github.com/philippta/flyscrape/modules/followlinks"
+ _ "github.com/philippta/flyscrape/modules/jsonprinter"
+ _ "github.com/philippta/flyscrape/modules/ratelimit"
+ _ "github.com/philippta/flyscrape/modules/starturl"
+ _ "github.com/philippta/flyscrape/modules/urlfilter"
)
func main() {
diff --git a/cmd/flyscrape/run.go b/cmd/flyscrape/run.go
index 987d0e0..22f41fd 100644
--- a/cmd/flyscrape/run.go
+++ b/cmd/flyscrape/run.go
@@ -18,8 +18,6 @@ type RunCommand struct{}
func (c *RunCommand) Run(args []string) error {
fs := flag.NewFlagSet("flyscrape-run", flag.ContinueOnError)
- noPrettyPrint := fs.Bool("no-pretty-print", false, "no-pretty-print")
- proxy := fs.String("proxy", "", "proxy")
fs.Usage = c.Usage
if err := fs.Parse(args); err != nil {
@@ -41,32 +39,17 @@ func (c *RunCommand) Run(args []string) error {
return fmt.Errorf("failed to compile script: %w", err)
}
- svc := flyscrape.Scraper{
- ScrapeOptions: opts,
- ScrapeFunc: scrape,
- }
- if *proxy != "" {
- svc.FetchFunc = flyscrape.ProxiedFetch(*proxy)
- }
+ scraper := flyscrape.NewScraper()
+ scraper.ScrapeFunc = scrape
+ flyscrape.LoadModules(scraper, opts)
count := 0
start := time.Now()
- for result := range svc.Scrape() {
- if count > 0 {
- fmt.Println(",")
- }
- if count == 0 {
- fmt.Println("[")
- }
- if *noPrettyPrint {
- fmt.Print(flyscrape.Print(result, " "))
- } else {
- fmt.Print(flyscrape.PrettyPrint(result, " "))
- }
+ scraper.OnResponse(func(resp *flyscrape.Response) {
count++
- }
- fmt.Println("\n]")
+ })
+ scraper.Run()
log.Printf("Scraped %d websites in %v\n", count, time.Since(start))
return nil
@@ -80,18 +63,10 @@ Usage:
flyscrape run SCRIPT
-Arguments:
-
- -no-pretty-print
- Disables pretty printing of scrape results.
-
Examples:
# Run the script.
$ flyscrape run example.js
-
- # Run the script with pretty printing disabled.
- $ flyscrape run -no-pretty-print example.js
`[1:])
}