diff options
| author | Philipp Tanlak <philipp.tanlak@gmail.com> | 2023-08-17 20:13:29 +0200 |
|---|---|---|
| committer | Philipp Tanlak <philipp.tanlak@gmail.com> | 2023-08-17 20:13:29 +0200 |
| commit | 8a44dc0856d7bf2cdc2eafa8594f4a47d488c3fd (patch) | |
| tree | 3943b4b5b17fce915727f89e038b0ae6199420d6 /scrape.go | |
| parent | 8812c84f32c74ac2f44af1abdb7e4e6f7cbf10b0 (diff) | |
omit empty scrape results
Diffstat (limited to 'scrape.go')
| -rw-r--r-- | scrape.go | 19 |
1 files changed, 14 insertions, 5 deletions
@@ -19,6 +19,7 @@ type ScrapeParams struct { type ScrapeOptions struct { URL string `json:"url"` AllowedDomains []string `json:"allowedDomains"` + BlockedDomains []string `json:"blockedDomains"` Depth int `json:"depth"` Rate float64 `json:"rate"` } @@ -135,7 +136,9 @@ func (s *Scraper) worker(jobs chan target, results chan<- result) { } } - results <- res + if res.err != nil || res.data != nil { + results <- res + } s.wg.Done() }() } @@ -163,17 +166,23 @@ func (s *Scraper) isURLAllowed(rawurl string) bool { } host := u.Host() + ok := false for _, domain := range s.ScrapeOptions.AllowedDomains { - if domain == "*" { - return true + if domain == "*" || host == domain { + ok = true + break } + } + + for _, domain := range s.ScrapeOptions.BlockedDomains { if host == domain { - return true + ok = false + break } } - return false + return ok } func Links(html string, origin string) []string { |