create new command

author: Philipp Tanlak <philipp.tanlak@gmail.com> 2023-08-16 19:05:24 +0200
committer: Philipp Tanlak <philipp.tanlak@gmail.com> 2023-08-16 19:05:24 +0200
commit: d82e66800478219dd924c6969bd91dbfe004fc9d (patch)
tree: 2c8211096fa38d619c7f5260b0193aea3c1a56ff
parent: 807796ad35b48c58f61f6c058e12ec10078fd0e3 (diff)
6 files changed, 91 insertions, 3 deletions
diff --git a/cmd/flyscrape/main.go b/cmd/flyscrape/main.go
index ab57d02..299e7e5 100644
--- a/cmd/flyscrape/main.go
+++ b/cmd/flyscrape/main.go
@@ -30,6 +30,8 @@ func (m *Main) Run(args []string) error {
 	}
 
 	switch cmd {
+	case "new":
+		return (&NewCommand{}).Run(args)
 	case "run":
 		return (&RunCommand{}).Run(args)
 	case "watch":
@@ -53,6 +55,7 @@ Usage:
 
 Commands:
     
+    new    creates a sample scraping script
     run    runs a scraping script
     watch  watches and re-runs a scraping script
 `[1:])
diff --git a/cmd/flyscrape/new.go b/cmd/flyscrape/new.go
new file mode 100644
index 0000000..7a4c662
--- /dev/null
+++ b/cmd/flyscrape/new.go
@@ -0,0 +1,52 @@
+package main
+
+import (
+	"flag"
+	"fmt"
+	"os"
+
+	"flyscrape/js"
+)
+
+type NewCommand struct{}
+
+func (c *NewCommand) Run(args []string) error {
+	fs := flag.NewFlagSet("flyscrape-new", flag.ContinueOnError)
+	fs.Usage = c.Usage
+
+	if err := fs.Parse(args); err != nil {
+		return err
+	} else if fs.NArg() == 0 || fs.Arg(0) == "" {
+		return fmt.Errorf("script path required")
+	} else if fs.NArg() > 1 {
+		return fmt.Errorf("too many arguments")
+	}
+
+	script := fs.Arg(0)
+	if _, err := os.Stat(script); err == nil {
+		return fmt.Errorf("script already exists")
+	}
+
+	if err := os.WriteFile(script, js.Template, 0o644); err != nil {
+		return fmt.Errorf("failed to create script %q: %w", script, err)
+	}
+
+	fmt.Printf("Scraping script %v created.\n", script)
+	return nil
+}
+
+func (c *NewCommand) Usage() {
+	fmt.Println(`
+The new command creates a new scraping script.
+
+Usage:
+
+    flyscrape new SCRIPT
+
+
+Examples:
+
+    # Create a new scraping script.
+    $ flyscrape new example.js
+`[1:])
+}
diff --git a/cmd/flyscrape/run.go b/cmd/flyscrape/run.go
index ca037d2..8ec9390 100644
--- a/cmd/flyscrape/run.go
+++ b/cmd/flyscrape/run.go
@@ -28,12 +28,12 @@ func (c *RunCommand) Run(args []string) error {
 	script := fs.Arg(0)
 	src, err := os.ReadFile(script)
 	if err != nil {
-		return fmt.Errorf("failed to read script %q: %v", script, err)
+		return fmt.Errorf("failed to read script %q: %w", script, err)
 	}
 
 	opts, scrape, err := flyscrape.Compile(string(src))
 	if err != nil {
-		return fmt.Errorf("failed to compile script: %v", err)
+		return fmt.Errorf("failed to compile script: %w", err)
 	}
 
 	svc := flyscrape.Scraper{
diff --git a/cmd/flyscrape/watch.go b/cmd/flyscrape/watch.go
index ca006db..99fac4e 100644
--- a/cmd/flyscrape/watch.go
+++ b/cmd/flyscrape/watch.go
@@ -27,7 +27,7 @@ func (c *WatchCommand) Run(args []string) error {
 	fetch := flyscrape.CachedFetch()
 	script := fs.Arg(0)
 
-	flyscrape.Watch(script, func(s string) error {
+	err := flyscrape.Watch(script, func(s string) error {
 		opts, scrape, err := flyscrape.Compile(s)
 		if err != nil {
 			log.Println(err)
@@ -53,6 +53,9 @@ func (c *WatchCommand) Run(args []string) error {
 		flyscrape.PrettyPrint(result)
 		return nil
 	})
+	if err != nil && err != flyscrape.StopWatch {
+		return fmt.Errorf("failed to watch script %q: %w", script, err)
+	}
 
 	return nil
 }
diff --git a/js/embed.go b/js/embed.go
index 5413e77..dcc8d93 100644
--- a/js/embed.go
+++ b/js/embed.go
@@ -6,3 +6,6 @@ import _ "embed"
 
 //go:embed flyscrape_bundle.js
 var Flyscrape string
+
+//go:embed template.js
+var Template []byte
diff --git a/js/template.js b/js/template.js
new file mode 100644
index 0000000..d33adc5
--- /dev/null
+++ b/js/template.js
@@ -0,0 +1,27 @@
+import { parse } from "flyscrape";
+
+export const options = {
+    url: "https://news.ycombinator.com/",     // Specify the URL to start scraping from.
+    depth: 1,                                 // Specify how deep links should be followed (0 = no follow).
+    allowedDomains: ["news.ycombinator.com"], // Specify the allowed domains to follow.
+}
+
+export default function({ html, url }) {
+    const $ = parse(html);
+
+    return {
+        title: $('title').text(),
+        entries: $('.athing').toArray().map(entry => {
+            const link = $(entry).find('.titleline > a');
+            const rank = $(entry).find('.rank');
+            const points = $(entry).next().find('.score');
+
+            return {
+                title: link.text(),                                     // Extract the title text.
+                url: link.attr('href'),                                 // Extract the link href.
+                rank: parseInt(rank.text().slice(0, -1)),               // Extract and cleanup the rank.
+                points: parseInt(points.text().replace(' points', '')), // Extract and cleanup the points.
+            }
+        }),
+    };
+}
author	Philipp Tanlak <philipp.tanlak@gmail.com>	2023-08-16 19:05:24 +0200
committer	Philipp Tanlak <philipp.tanlak@gmail.com>	2023-08-16 19:05:24 +0200
commit	d82e66800478219dd924c6969bd91dbfe004fc9d (patch)
tree	2c8211096fa38d619c7f5260b0193aea3c1a56ff
parent	807796ad35b48c58f61f6c058e12ec10078fd0e3 (diff)