summaryrefslogtreecommitdiff
path: root/cmd
diff options
context:
space:
mode:
Diffstat (limited to 'cmd')
-rw-r--r--cmd/flyscrape/dev.go49
-rw-r--r--cmd/flyscrape/main.go5
-rw-r--r--cmd/flyscrape/run.go9
3 files changed, 28 insertions, 35 deletions
diff --git a/cmd/flyscrape/dev.go b/cmd/flyscrape/dev.go
index 95c627e..fba3fba 100644
--- a/cmd/flyscrape/dev.go
+++ b/cmd/flyscrape/dev.go
@@ -17,7 +17,6 @@ type DevCommand struct{}
func (c *DevCommand) Run(args []string) error {
fs := flag.NewFlagSet("flyscrape-dev", flag.ContinueOnError)
- proxy := fs.String("proxy", "", "proxy")
fs.Usage = c.Usage
if err := fs.Parse(args); err != nil {
@@ -28,50 +27,25 @@ func (c *DevCommand) Run(args []string) error {
return fmt.Errorf("too many arguments")
}
- var fetch flyscrape.FetchFunc
- if *proxy != "" {
- fetch = flyscrape.ProxiedFetch(*proxy)
- } else {
- fetch = flyscrape.Fetch()
- }
-
- fetch = flyscrape.CachedFetch(fetch)
script := fs.Arg(0)
err := flyscrape.Watch(script, func(s string) error {
cfg, scrape, err := flyscrape.Compile(s)
if err != nil {
- screen.Clear()
- screen.MoveTopLeft()
-
- if errs, ok := err.(interface{ Unwrap() []error }); ok {
- for _, err := range errs.Unwrap() {
- log.Printf("%s:%v\n", script, err)
- }
- } else {
- log.Println(err)
- }
-
- // ignore compilation errors
+ printCompileErr(script, err)
return nil
}
scraper := flyscrape.NewScraper()
scraper.ScrapeFunc = scrape
+
flyscrape.LoadModules(scraper, cfg)
+ scraper.DisableModule("followlinks")
+ screen.Clear()
+ screen.MoveTopLeft()
scraper.Run()
- scraper.OnResponse(func(resp *flyscrape.Response) {
- screen.Clear()
- screen.MoveTopLeft()
- if resp.Error != nil {
- log.Println(resp.Error)
- return
- }
- fmt.Println(flyscrape.PrettyPrint(resp.Data, ""))
- })
-
return nil
})
if err != nil && err != flyscrape.StopWatch {
@@ -97,3 +71,16 @@ Examples:
$ flyscrape dev example.js
`[1:])
}
+
+func printCompileErr(script string, err error) {
+ screen.Clear()
+ screen.MoveTopLeft()
+
+ if errs, ok := err.(interface{ Unwrap() []error }); ok {
+ for _, err := range errs.Unwrap() {
+ log.Printf("%s:%v\n", script, err)
+ }
+ } else {
+ log.Println(err)
+ }
+}
diff --git a/cmd/flyscrape/main.go b/cmd/flyscrape/main.go
index bac411e..81d0e2b 100644
--- a/cmd/flyscrape/main.go
+++ b/cmd/flyscrape/main.go
@@ -12,10 +12,11 @@ import (
"os"
"strings"
+ _ "github.com/philippta/flyscrape/modules/cache"
_ "github.com/philippta/flyscrape/modules/depth"
_ "github.com/philippta/flyscrape/modules/domainfilter"
_ "github.com/philippta/flyscrape/modules/followlinks"
- _ "github.com/philippta/flyscrape/modules/jsonprinter"
+ _ "github.com/philippta/flyscrape/modules/jsonprint"
_ "github.com/philippta/flyscrape/modules/ratelimit"
_ "github.com/philippta/flyscrape/modules/starturl"
_ "github.com/philippta/flyscrape/modules/urlfilter"
@@ -66,7 +67,7 @@ Usage:
flyscrape <command> [arguments]
Commands:
-
+
new creates a sample scraping script
run runs a scraping script
dev watches and re-runs a scraping script
diff --git a/cmd/flyscrape/run.go b/cmd/flyscrape/run.go
index 4580e6d..b467abe 100644
--- a/cmd/flyscrape/run.go
+++ b/cmd/flyscrape/run.go
@@ -12,6 +12,7 @@ import (
"time"
"github.com/philippta/flyscrape"
+ "github.com/philippta/flyscrape/modules/hook"
)
type RunCommand struct{}
@@ -41,14 +42,18 @@ func (c *RunCommand) Run(args []string) error {
scraper := flyscrape.NewScraper()
scraper.ScrapeFunc = scrape
+
flyscrape.LoadModules(scraper, cfg)
count := 0
start := time.Now()
- scraper.OnResponse(func(resp *flyscrape.Response) {
- count++
+ scraper.LoadModule(hook.Module{
+ ReceiveResponseFn: func(r *flyscrape.Response) {
+ count++
+ },
})
+
scraper.Run()
log.Printf("Scraped %d websites in %v\n", count, time.Since(start))