1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
|
package main
import (
"encoding/json"
"fmt"
"io"
"log"
"net/http"
"os"
"flyscrape"
"github.com/cornelk/hashmap"
"github.com/inancgumus/screen"
)
func main() {
if len(os.Args) != 2 {
fmt.Println("Please provide a file to run.")
os.Exit(1)
}
cache := hashmap.New[string, string]()
err := flyscrape.Watch(os.Args[1], func(s string) error {
opts, scrape, err := flyscrape.Compile(s)
if err == nil {
run(cache, opts, scrape)
}
return nil
})
if err != nil {
log.Fatal(err)
}
}
func run(cache *hashmap.Map[string, string], opts flyscrape.ScrapeOptions, fn flyscrape.ScrapeFunc) {
opts.Depth = 0
svc := flyscrape.Scraper{
Concurrency: 20,
ScrapeOptions: opts,
ScrapeFunc: fn,
FetchFunc: func(url string) (string, error) {
if html, ok := cache.Get(url); ok {
return html, nil
}
html, err := fetch(url)
if err != nil {
return "", err
}
cache.Set(url, html)
return html, nil
},
}
result := <-svc.Scrape()
if result.Error != nil {
fmt.Println(result.Error)
}
screen.Clear()
screen.MoveTopLeft()
enc := json.NewEncoder(os.Stdout)
enc.SetEscapeHTML(false)
enc.SetIndent("", " ")
enc.Encode(result)
}
func fetch(url string) (string, error) {
resp, err := http.Get(url)
if err != nil {
return "", err
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(body), nil
}
|