diff options
| -rw-r--r-- | examples/urls_from_file.js | 2 | ||||
| -rw-r--r-- | scrape.go | 6 |
2 files changed, 7 insertions, 1 deletions
diff --git a/examples/urls_from_file.js b/examples/urls_from_file.js index 0231032..4633c9c 100644 --- a/examples/urls_from_file.js +++ b/examples/urls_from_file.js @@ -1,7 +1,7 @@ import urls from "./urls.txt" export const config = { - urls: urls.split("\n").filter(Boolean) + urls: urls.split("\n") }; export default function({ doc }) { @@ -10,6 +10,7 @@ import ( "log" "net/http" "net/http/cookiejar" + "strings" "sync" "github.com/cornelk/hashmap" @@ -210,6 +211,11 @@ func (s *Scraper) process(url string, depth int) { } func (s *Scraper) enqueueJob(url string, depth int) { + url = strings.TrimSpace(url) + if url == "" { + return + } + if _, ok := s.visited.Get(url); ok { return } |