diff options
| author | Philipp Tanlak <philipp.tanlak@gmail.com> | 2024-02-17 23:42:55 +0100 |
|---|---|---|
| committer | Philipp Tanlak <philipp.tanlak@gmail.com> | 2024-02-17 23:42:55 +0100 |
| commit | c796f4164c13e30135246c08304acd7142673f60 (patch) | |
| tree | 739f498d883df995313da1aefc68d5a9bd7b78af /scrape.go | |
| parent | f4a69b75da6d29680c0ebcded88f67016cf6fdc4 (diff) | |
Make urls more fault tolerantv0.7.2
Diffstat (limited to 'scrape.go')
| -rw-r--r-- | scrape.go | 6 |
1 files changed, 6 insertions, 0 deletions
@@ -10,6 +10,7 @@ import ( "log" "net/http" "net/http/cookiejar" + "strings" "sync" "github.com/cornelk/hashmap" @@ -210,6 +211,11 @@ func (s *Scraper) process(url string, depth int) { } func (s *Scraper) enqueueJob(url string, depth int) { + url = strings.TrimSpace(url) + if url == "" { + return + } + if _, ok := s.visited.Get(url); ok { return } |