summaryrefslogtreecommitdiff
path: root/scrape.go
diff options
context:
space:
mode:
authorPhilipp Tanlak <philipp.tanlak@gmail.com>2023-08-17 20:13:29 +0200
committerPhilipp Tanlak <philipp.tanlak@gmail.com>2023-08-17 20:13:29 +0200
commit8a44dc0856d7bf2cdc2eafa8594f4a47d488c3fd (patch)
tree3943b4b5b17fce915727f89e038b0ae6199420d6 /scrape.go
parent8812c84f32c74ac2f44af1abdb7e4e6f7cbf10b0 (diff)
omit empty scrape results
Diffstat (limited to 'scrape.go')
-rw-r--r--scrape.go19
1 files changed, 14 insertions, 5 deletions
diff --git a/scrape.go b/scrape.go
index 6ff92dc..793e418 100644
--- a/scrape.go
+++ b/scrape.go
@@ -19,6 +19,7 @@ type ScrapeParams struct {
type ScrapeOptions struct {
URL string `json:"url"`
AllowedDomains []string `json:"allowedDomains"`
+ BlockedDomains []string `json:"blockedDomains"`
Depth int `json:"depth"`
Rate float64 `json:"rate"`
}
@@ -135,7 +136,9 @@ func (s *Scraper) worker(jobs chan target, results chan<- result) {
}
}
- results <- res
+ if res.err != nil || res.data != nil {
+ results <- res
+ }
s.wg.Done()
}()
}
@@ -163,17 +166,23 @@ func (s *Scraper) isURLAllowed(rawurl string) bool {
}
host := u.Host()
+ ok := false
for _, domain := range s.ScrapeOptions.AllowedDomains {
- if domain == "*" {
- return true
+ if domain == "*" || host == domain {
+ ok = true
+ break
}
+ }
+
+ for _, domain := range s.ScrapeOptions.BlockedDomains {
if host == domain {
- return true
+ ok = false
+ break
}
}
- return false
+ return ok
}
func Links(html string, origin string) []string {