summaryrefslogtreecommitdiff
path: root/modules
diff options
context:
space:
mode:
authorPhilipp Tanlak <philipp.tanlak@gmail.com>2024-02-17 21:49:05 +0100
committerPhilipp Tanlak <philipp.tanlak@gmail.com>2024-02-17 21:49:05 +0100
commit80e24f0c780725bc854362def00211e88cb673bd (patch)
tree357e16dbeb5409c77480000edf4f8bb9cd24518f /modules
parentc460e48cc920f39722bb9e404a399dbaa8d89c89 (diff)
Respect multiple urls in domain- and urlfilter
Diffstat (limited to 'modules')
-rw-r--r--modules/domainfilter/domainfilter.go13
-rw-r--r--modules/urlfilter/urlfilter.go6
2 files changed, 15 insertions, 4 deletions
diff --git a/modules/domainfilter/domainfilter.go b/modules/domainfilter/domainfilter.go
index e8691d3..ec95d68 100644
--- a/modules/domainfilter/domainfilter.go
+++ b/modules/domainfilter/domainfilter.go
@@ -15,6 +15,7 @@ func init() {
type Module struct {
URL string `json:"url"`
+ URLs []string `json:"urls"`
AllowedDomains []string `json:"allowedDomains"`
BlockedDomains []string `json:"blockedDomains"`
@@ -29,11 +30,15 @@ func (Module) ModuleInfo() flyscrape.ModuleInfo {
}
func (m *Module) Provision(v flyscrape.Context) {
- if m.URL == "" {
- return
+ if m.URL != "" {
+ if u, err := url.Parse(m.URL); err == nil {
+ m.AllowedDomains = append(m.AllowedDomains, u.Host())
+ }
}
- if u, err := url.Parse(m.URL); err == nil {
- m.AllowedDomains = append(m.AllowedDomains, u.Host())
+ for _, u := range m.URLs {
+ if u, err := url.Parse(u); err == nil {
+ m.AllowedDomains = append(m.AllowedDomains, u.Host())
+ }
}
}
diff --git a/modules/urlfilter/urlfilter.go b/modules/urlfilter/urlfilter.go
index 1297c35..58675e8 100644
--- a/modules/urlfilter/urlfilter.go
+++ b/modules/urlfilter/urlfilter.go
@@ -16,6 +16,7 @@ func init() {
type Module struct {
URL string `json:"url"`
+ URLs []string `json:"urls"`
AllowedURLs []string `json:"allowedURLs"`
BlockedURLs []string `json:"blockedURLs"`
@@ -61,6 +62,11 @@ func (m *Module) ValidateRequest(r *flyscrape.Request) bool {
if r.URL == m.URL {
return true
}
+ for _, u := range m.URLs {
+ if r.URL == u {
+ return true
+ }
+ }
// allow if no filter is set
if len(m.allowedURLsRE) == 0 && len(m.blockedURLsRE) == 0 {