From 80e24f0c780725bc854362def00211e88cb673bd Mon Sep 17 00:00:00 2001 From: Philipp Tanlak Date: Sat, 17 Feb 2024 21:49:05 +0100 Subject: Respect multiple urls in domain- and urlfilter --- modules/domainfilter/domainfilter.go | 13 +++++++++---- modules/urlfilter/urlfilter.go | 6 ++++++ 2 files changed, 15 insertions(+), 4 deletions(-) (limited to 'modules') diff --git a/modules/domainfilter/domainfilter.go b/modules/domainfilter/domainfilter.go index e8691d3..ec95d68 100644 --- a/modules/domainfilter/domainfilter.go +++ b/modules/domainfilter/domainfilter.go @@ -15,6 +15,7 @@ func init() { type Module struct { URL string `json:"url"` + URLs []string `json:"urls"` AllowedDomains []string `json:"allowedDomains"` BlockedDomains []string `json:"blockedDomains"` @@ -29,11 +30,15 @@ func (Module) ModuleInfo() flyscrape.ModuleInfo { } func (m *Module) Provision(v flyscrape.Context) { - if m.URL == "" { - return + if m.URL != "" { + if u, err := url.Parse(m.URL); err == nil { + m.AllowedDomains = append(m.AllowedDomains, u.Host()) + } } - if u, err := url.Parse(m.URL); err == nil { - m.AllowedDomains = append(m.AllowedDomains, u.Host()) + for _, u := range m.URLs { + if u, err := url.Parse(u); err == nil { + m.AllowedDomains = append(m.AllowedDomains, u.Host()) + } } } diff --git a/modules/urlfilter/urlfilter.go b/modules/urlfilter/urlfilter.go index 1297c35..58675e8 100644 --- a/modules/urlfilter/urlfilter.go +++ b/modules/urlfilter/urlfilter.go @@ -16,6 +16,7 @@ func init() { type Module struct { URL string `json:"url"` + URLs []string `json:"urls"` AllowedURLs []string `json:"allowedURLs"` BlockedURLs []string `json:"blockedURLs"` @@ -61,6 +62,11 @@ func (m *Module) ValidateRequest(r *flyscrape.Request) bool { if r.URL == m.URL { return true } + for _, u := range m.URLs { + if r.URL == u { + return true + } + } // allow if no filter is set if len(m.allowedURLsRE) == 0 && len(m.blockedURLsRE) == 0 { -- cgit v1.2.3