summaryrefslogtreecommitdiff
path: root/cmd
diff options
context:
space:
mode:
authorPhilipp Tanlak <philipp.tanlak@gmail.com>2023-08-11 18:31:20 +0200
committerPhilipp Tanlak <philipp.tanlak@gmail.com>2023-08-11 18:31:20 +0200
commit062b36fe5725d1267c66db2e506b4131d78ce772 (patch)
tree998e5260feb1babac8dae512b56d67d8f20f7266 /cmd
parent7e4cf39a0ba6ccbd5cc036700a8b1ff9358ecc3d (diff)
simplify project structure
Diffstat (limited to 'cmd')
-rw-r--r--cmd/flyscrape/main.go31
-rw-r--r--cmd/watch/main.go83
2 files changed, 102 insertions, 12 deletions
diff --git a/cmd/flyscrape/main.go b/cmd/flyscrape/main.go
index fb31056..85e40a9 100644
--- a/cmd/flyscrape/main.go
+++ b/cmd/flyscrape/main.go
@@ -7,9 +7,9 @@ import (
"io"
"net/http"
"os"
+ "time"
- "flyscrape/flyscrape"
- "flyscrape/js"
+ "flyscrape"
)
func main() {
@@ -17,14 +17,20 @@ func main() {
exit("Please provide a file to run.")
}
- opts, scrape, err := js.Compile(os.Args[1])
+ src, err := os.ReadFile(os.Args[1])
+ if err != nil {
+ exit(fmt.Sprintf("Error reading file: %v", err))
+ }
+
+ opts, scrape, err := flyscrape.Compile(string(src))
if err != nil {
exit(fmt.Sprintf("Error compiling JavaScript file: %v", err))
}
- svc := flyscrape.Service{
- ScrapeOptions: *opts,
+ svc := flyscrape.Scraper{
+ ScrapeOptions: opts,
ScrapeFunc: scrape,
+ Concurrency: 5,
FetchFunc: func(url string) (string, error) {
resp, err := http.Get(url)
if err != nil {
@@ -39,14 +45,15 @@ func main() {
return string(data), nil
},
}
- results := svc.Scrape()
- if err != nil {
- }
- fmt.Printf("%T\n", results[0])
- data, _ := json.MarshalIndent(results, "", " ")
- fmt.Println(string(data))
- return
+ count := 0
+ start := time.Now()
+ for result := range svc.Scrape() {
+ data, _ := json.MarshalIndent(result, "", " ")
+ fmt.Println(string(data))
+ count++
+ }
+ fmt.Printf("Scraped %d websites in %v\n", count, time.Since(start))
}
func exit(msg string) {
diff --git a/cmd/watch/main.go b/cmd/watch/main.go
new file mode 100644
index 0000000..5065d8b
--- /dev/null
+++ b/cmd/watch/main.go
@@ -0,0 +1,83 @@
+package main
+
+import (
+ "encoding/json"
+ "fmt"
+ "io"
+ "log"
+ "net/http"
+ "os"
+
+ "flyscrape"
+
+ "github.com/cornelk/hashmap"
+ "github.com/inancgumus/screen"
+)
+
+func main() {
+ if len(os.Args) != 2 {
+ fmt.Println("Please provide a file to run.")
+ os.Exit(1)
+ }
+
+ cache := hashmap.New[string, string]()
+
+ err := flyscrape.Watch(os.Args[1], func(s string) error {
+ opts, scrape, err := flyscrape.Compile(s)
+ if err == nil {
+ run(cache, opts, scrape)
+ }
+ return nil
+ })
+ if err != nil {
+ log.Fatal(err)
+ }
+}
+
+func run(cache *hashmap.Map[string, string], opts flyscrape.ScrapeOptions, fn flyscrape.ScrapeFunc) {
+ opts.Depth = 0
+
+ svc := flyscrape.Scraper{
+ Concurrency: 20,
+ ScrapeOptions: opts,
+ ScrapeFunc: fn,
+ FetchFunc: func(url string) (string, error) {
+ if html, ok := cache.Get(url); ok {
+ return html, nil
+ }
+ html, err := fetch(url)
+ if err != nil {
+ return "", err
+ }
+ cache.Set(url, html)
+ return html, nil
+ },
+ }
+
+ result := <-svc.Scrape()
+ if result.Error != nil {
+ fmt.Println(result.Error)
+ }
+
+ screen.Clear()
+ screen.MoveTopLeft()
+
+ enc := json.NewEncoder(os.Stdout)
+ enc.SetEscapeHTML(false)
+ enc.SetIndent("", " ")
+ enc.Encode(result)
+}
+
+func fetch(url string) (string, error) {
+ resp, err := http.Get(url)
+ if err != nil {
+ return "", err
+ }
+ defer resp.Body.Close()
+ body, err := io.ReadAll(resp.Body)
+ if err != nil {
+ return "", err
+ }
+
+ return string(body), nil
+}