From c796f4164c13e30135246c08304acd7142673f60 Mon Sep 17 00:00:00 2001 From: Philipp Tanlak Date: Sat, 17 Feb 2024 23:42:55 +0100 Subject: Make urls more fault tolerant --- scrape.go | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'scrape.go') diff --git a/scrape.go b/scrape.go index 019849d..f09cba6 100644 --- a/scrape.go +++ b/scrape.go @@ -10,6 +10,7 @@ import ( "log" "net/http" "net/http/cookiejar" + "strings" "sync" "github.com/cornelk/hashmap" @@ -210,6 +211,11 @@ func (s *Scraper) process(url string, depth int) { } func (s *Scraper) enqueueJob(url string, depth int) { + url = strings.TrimSpace(url) + if url == "" { + return + } + if _, ok := s.visited.Get(url); ok { return } -- cgit v1.2.3