summaryrefslogblamecommitdiff
path: root/cmd/watch/main.go
blob: 5065d8b4621e08644dad180d3fb65e8109a41ab3 (plain) (tree)


















































































                                                                                                     
package main

import (
	"encoding/json"
	"fmt"
	"io"
	"log"
	"net/http"
	"os"

	"flyscrape"

	"github.com/cornelk/hashmap"
	"github.com/inancgumus/screen"
)

func main() {
	if len(os.Args) != 2 {
		fmt.Println("Please provide a file to run.")
		os.Exit(1)
	}

	cache := hashmap.New[string, string]()

	err := flyscrape.Watch(os.Args[1], func(s string) error {
		opts, scrape, err := flyscrape.Compile(s)
		if err == nil {
			run(cache, opts, scrape)
		}
		return nil
	})
	if err != nil {
		log.Fatal(err)
	}
}

func run(cache *hashmap.Map[string, string], opts flyscrape.ScrapeOptions, fn flyscrape.ScrapeFunc) {
	opts.Depth = 0

	svc := flyscrape.Scraper{
		Concurrency:   20,
		ScrapeOptions: opts,
		ScrapeFunc:    fn,
		FetchFunc: func(url string) (string, error) {
			if html, ok := cache.Get(url); ok {
				return html, nil
			}
			html, err := fetch(url)
			if err != nil {
				return "", err
			}
			cache.Set(url, html)
			return html, nil
		},
	}

	result := <-svc.Scrape()
	if result.Error != nil {
		fmt.Println(result.Error)
	}

	screen.Clear()
	screen.MoveTopLeft()

	enc := json.NewEncoder(os.Stdout)
	enc.SetEscapeHTML(false)
	enc.SetIndent("", "   ")
	enc.Encode(result)
}

func fetch(url string) (string, error) {
	resp, err := http.Get(url)
	if err != nil {
		return "", err
	}
	defer resp.Body.Close()
	body, err := io.ReadAll(resp.Body)
	if err != nil {
		return "", err
	}

	return string(body), nil
}