summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPhilipp Tanlak <philipp.tanlak@gmail.com>2024-02-17 23:42:55 +0100
committerPhilipp Tanlak <philipp.tanlak@gmail.com>2024-02-17 23:42:55 +0100
commitc796f4164c13e30135246c08304acd7142673f60 (patch)
tree739f498d883df995313da1aefc68d5a9bd7b78af
parentf4a69b75da6d29680c0ebcded88f67016cf6fdc4 (diff)
Make urls more fault tolerantv0.7.2
-rw-r--r--examples/urls_from_file.js2
-rw-r--r--scrape.go6
2 files changed, 7 insertions, 1 deletions
diff --git a/examples/urls_from_file.js b/examples/urls_from_file.js
index 0231032..4633c9c 100644
--- a/examples/urls_from_file.js
+++ b/examples/urls_from_file.js
@@ -1,7 +1,7 @@
import urls from "./urls.txt"
export const config = {
- urls: urls.split("\n").filter(Boolean)
+ urls: urls.split("\n")
};
export default function({ doc }) {
diff --git a/scrape.go b/scrape.go
index 019849d..f09cba6 100644
--- a/scrape.go
+++ b/scrape.go
@@ -10,6 +10,7 @@ import (
"log"
"net/http"
"net/http/cookiejar"
+ "strings"
"sync"
"github.com/cornelk/hashmap"
@@ -210,6 +211,11 @@ func (s *Scraper) process(url string, depth int) {
}
func (s *Scraper) enqueueJob(url string, depth int) {
+ url = strings.TrimSpace(url)
+ if url == "" {
+ return
+ }
+
if _, ok := s.visited.Get(url); ok {
return
}