5 files changed, 105 insertions, 121 deletions
diff --git a/cmd/flyscrape/run.go b/cmd/flyscrape/run.go
index 9a2a7bb..2d76a35 100644
--- a/cmd/flyscrape/run.go
+++ b/cmd/flyscrape/run.go
@@ -14,7 +14,6 @@ type RunCommand struct{}
 
 func (c *RunCommand) Run(args []string) error {
 	fs := flag.NewFlagSet("flyscrape-run", flag.ContinueOnError)
-	concurrent := fs.Int("concurrent", 0, "concurrency")
 	noPrettyPrint := fs.Bool("no-pretty-print", false, "no-pretty-print")
 	fs.Usage = c.Usage
 
@@ -40,7 +39,6 @@ func (c *RunCommand) Run(args []string) error {
 	svc := flyscrape.Scraper{
 		ScrapeOptions: opts,
 		ScrapeFunc:    scrape,
-		Concurrency:   *concurrent,
 	}
 
 	count := 0
@@ -76,9 +74,6 @@ Usage:
 
 Arguments:
 
-    -concurrent NUM
-        Determines the number of concurrent requests.
-
     -no-pretty-print
         Disables pretty printing of scrape results.
 
diff --git a/js/template.js b/js/template.js
index f75df28..56fffa0 100644
--- a/js/template.js
+++ b/js/template.js
@@ -1,9 +1,9 @@
-import { parse } from "flyscrape";
+import { parse } from 'flyscrape';
 
 export const options = {
-    url: "https://news.ycombinator.com/",     // Specify the URL to start scraping from.
+    url: 'https://news.ycombinator.com/',     // Specify the URL to start scraping from.
     depth: 1,                                 // Specify how deep links should be followed.  (default = 0, no follow)
-    allowedDomains: ["news.ycombinator.com"], // Specify the allowed domains.                (default = domain from url)
+    allowedDomains: ['news.ycombinator.com'], // Specify the allowed domains.                (default = domain from url)
     blockedDomains: [],                       // Specify the blocked domains.                (default = none)
     rate: 100,                                // Specify the rate in requests per second.    (default = 100)
 }
@@ -13,7 +13,7 @@ export default function({ html, url }) {
     const title = $('title');
     const entries = $('.athing').toArray();
 
-    if (entries.length == 0) {
+    if (!entries.length) {
         return null; // Omits scraped pages without entries.
     }
 
diff --git a/js_test.go b/js_test.go
index 34c4183..bf7bc46 100644
--- a/js_test.go
+++ b/js_test.go
@@ -1,7 +1,6 @@
 package flyscrape_test
 
 import (
-	"os"
 	"testing"
 
 	"flyscrape"
@@ -19,11 +18,25 @@ var html = `
     </body>
 </html>`
 
-func TestV8(t *testing.T) {
-	data, err := os.ReadFile("examples/esbuild.github.io.js")
-	require.NoError(t, err)
+var script = `
+import { parse } from "flyscrape";
+
+export const options = {
+    url: "https://localhost/",
+}
 
-	opts, run, err := flyscrape.Compile(string(data))
+export default function({ html, url }) {
+    const $ = parse(html);
+
+    return {
+        headline: $("h1").text(),
+        body: $("p").text()
+    }
+}
+`
+
+func TestV8(t *testing.T) {
+	opts, run, err := flyscrape.Compile(script)
 	require.NoError(t, err)
 	require.NotNil(t, opts)
 	require.NotNil(t, run)
diff --git a/scrape.go b/scrape.go
index ac75c73..be26e3c 100644
--- a/scrape.go
+++ b/scrape.go
@@ -32,133 +32,125 @@ type ScrapeResult struct {
 	Timestamp time.Time `json:"timestamp"`
 }
 
-type (
-	ScrapeFunc func(ScrapeParams) (any, error)
-	FetchFunc  func(url string) (string, error)
-)
+func (s *ScrapeResult) omit() bool {
+	return s.Error == nil && s.Data == nil
+}
+
+type ScrapeFunc func(ScrapeParams) (any, error)
+
+type FetchFunc func(url string) (string, error)
+
+type target struct {
+	url   string
+	depth int
+}
 
 type Scraper struct {
 	ScrapeOptions ScrapeOptions
 	ScrapeFunc    ScrapeFunc
 	FetchFunc     FetchFunc
-	Concurrency   int
 
 	visited *hashmap.Map[string, struct{}]
 	wg      *sync.WaitGroup
+	jobs    chan target
+	results chan ScrapeResult
 }
 
-type target struct {
-	url   string
-	depth int
-}
-
-type result struct {
-	url   string
-	data  any
-	links []string
-	err   error
-}
+func (s *Scraper) init() {
+	s.visited = hashmap.New[string, struct{}]()
+	s.wg = &sync.WaitGroup{}
+	s.jobs = make(chan target, 1024)
+	s.results = make(chan ScrapeResult)
 
-func (s *Scraper) Scrape() <-chan ScrapeResult {
-	if s.Concurrency == 0 {
-		s.Concurrency = 1
-	}
 	if s.FetchFunc == nil {
 		s.FetchFunc = Fetch()
 	}
+
 	if s.ScrapeOptions.Rate == 0 {
 		s.ScrapeOptions.Rate = 100
 	}
+
 	if len(s.ScrapeOptions.AllowedDomains) == 0 {
 		u, err := url.Parse(s.ScrapeOptions.URL)
 		if err == nil {
 			s.ScrapeOptions.AllowedDomains = []string{u.Host()}
 		}
 	}
+}
 
-	jobs := make(chan target, 1024)
-	results := make(chan result)
-	scraperesults := make(chan ScrapeResult)
-	s.visited = hashmap.New[string, struct{}]()
-	s.wg = &sync.WaitGroup{}
+func (s *Scraper) Scrape() <-chan ScrapeResult {
+	s.init()
+	s.enqueueJob(s.ScrapeOptions.URL, s.ScrapeOptions.Depth)
 
-	go s.worker(jobs, results)
+	go s.worker()
+	go s.waitClose()
 
-	s.wg.Add(1)
-	s.visited.Set(s.ScrapeOptions.URL, struct{}{})
-	jobs <- target{url: s.ScrapeOptions.URL, depth: s.ScrapeOptions.Depth}
+	return s.results
+}
 
-	go func() {
-		s.wg.Wait()
-		close(jobs)
-		close(results)
-	}()
+func (s *Scraper) worker() {
+	var (
+		rate      = time.Duration(float64(time.Second) / s.ScrapeOptions.Rate)
+		leakyjobs = leakychan(s.jobs, rate)
+	)
 
-	go func() {
-		for res := range results {
-			scraperesults <- ScrapeResult{
-				URL:       res.url,
-				Data:      res.data,
-				Links:     res.links,
-				Error:     res.err,
-				Timestamp: time.Now().UTC(),
+	for job := range leakyjobs {
+		go func(job target) {
+			defer s.wg.Done()
+
+			res := s.process(job)
+			if !res.omit() {
+				s.results <- res
 			}
-		}
-		close(scraperesults)
-	}()
 
-	return scraperesults
-}
-
-func (s *Scraper) worker(jobs chan target, results chan<- result) {
-	rate := time.Duration(float64(time.Second) / s.ScrapeOptions.Rate)
-	for j := range leakychan(jobs, rate) {
-		j := j
-		go func() {
-			res := s.process(j)
-
-			if j.depth > 0 {
-				for _, l := range res.links {
-					if _, ok := s.visited.Get(l); ok {
-						continue
-					}
-
-					if !s.isURLAllowed(l) {
-						continue
-					}
-
-					s.wg.Add(1)
-					select {
-					case jobs <- target{url: l, depth: j.depth - 1}:
-						s.visited.Set(l, struct{}{})
-					default:
-						log.Println("queue is full, can't add url:", l)
-						s.wg.Done()
-					}
-				}
+			if job.depth <= 0 {
+				return
 			}
 
-			if res.err != nil || res.data != nil {
-				results <- res
+			for _, l := range res.Links {
+				if _, ok := s.visited.Get(l); ok {
+					continue
+				}
+
+				if !s.isURLAllowed(l) {
+					continue
+				}
+
+				s.enqueueJob(l, job.depth-1)
 			}
-			s.wg.Done()
-		}()
+		}(job)
 	}
 }
 
-func (s *Scraper) process(job target) result {
+func (s *Scraper) process(job target) (res ScrapeResult) {
+	res.URL = job.url
+	res.Timestamp = time.Now()
+
 	html, err := s.FetchFunc(job.url)
 	if err != nil {
-		return result{url: job.url, err: err}
+		res.Error = err
+		return
 	}
 
-	links := Links(html, job.url)
-	data, err := s.ScrapeFunc(ScrapeParams{HTML: html, URL: job.url})
+	res.Links = links(html, job.url)
+	res.Data, err = s.ScrapeFunc(ScrapeParams{HTML: html, URL: job.url})
 	if err != nil {
-		return result{url: job.url, links: links, err: err}
+		res.Error = err
+		return
 	}
 
-	return result{url: job.url, data: data, links: links}
+	return
+}
+
+func (s *Scraper) enqueueJob(url string, depth int) {
+	s.wg.Add(1)
+	select {
+	case s.jobs <- target{url: url, depth: depth}:
+		s.visited.Set(url, struct{}{})
+	default:
+		log.Println("queue is full, can't add url:", url)
+		s.wg.Done()
+	}
 }
 
 func (s *Scraper) isURLAllowed(rawurl string) bool {
@@ -187,7 +179,13 @@ func (s *Scraper) isURLAllowed(rawurl string) bool {
 	return ok
 }
 
-func Links(html string, origin string) []string {
+func (s *Scraper) waitClose() {
+	s.wg.Wait()
+	close(s.jobs)
+	close(s.results)
+}
+
+func links(html string, origin string) []string {
 	var links []string
 	doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
 	if err != nil {
diff --git a/scrape_test.go b/scrape_test.go
index 643b10d..ffd8b70 100644
--- a/scrape_test.go
+++ b/scrape_test.go
@@ -11,7 +11,6 @@ import (
 
 func TestScrape(t *testing.T) {
 	svc := flyscrape.Scraper{
-		Concurrency: 10,
 		ScrapeOptions: flyscrape.ScrapeOptions{
 			URL:            "http://example.com/foo/bar",
 			Depth:          1,
@@ -45,24 +44,3 @@ func TestScrape(t *testing.T) {
 	require.Equal(t, "http://example.com/foo/baz", urls[2])
 	require.Equal(t, "http://www.google.com/", urls[3])
 }
-
-func TestFindLinks(t *testing.T) {
-	origin := "http://example.com/foo/bar"
-	html := `
-        <html>
-            <body>
-                <a href="/baz">Baz</a>
-                <a href="baz">Baz</a>
-                <a href="http://www.google.com">Google</a>
-                <a href="javascript:void(0)">Google</a>
-                <a href="/foo#hello">Anchor</a>
-            </body>
-        </html>`
-
-	links := flyscrape.Links(html, origin)
-	require.Len(t, links, 4)
-	require.Equal(t, "http://example.com/baz", links[0])
-	require.Equal(t, "http://example.com/foo/baz", links[1])
-	require.Equal(t, "http://www.google.com/", links[2])
-	require.Equal(t, "http://example.com/foo", links[3])
-}