summaryrefslogtreecommitdiff
path: root/module.go
diff options
context:
space:
mode:
authorPhilipp Tanlak <philipp.tanlak@gmail.com>2023-10-05 14:53:37 +0200
committerPhilipp Tanlak <philipp.tanlak@gmail.com>2023-10-05 14:53:37 +0200
commit1fc497fbdc79a43c62ac2e8eaf4827752dbeef8e (patch)
tree67738e213ef97f249bdfa0f1bddda0839192cb77 /module.go
parentbd9e7f7acfd855d4685aa4544169c0e29cdbf205 (diff)
Refactor codebase into modules
Diffstat (limited to 'module.go')
-rw-r--r--module.go91
1 files changed, 74 insertions, 17 deletions
diff --git a/module.go b/module.go
index 1839b76..0465808 100644
--- a/module.go
+++ b/module.go
@@ -1,44 +1,101 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
package flyscrape
import (
"encoding/json"
"net/http"
+ "sync"
)
-type Module any
+type Module interface {
+ ModuleInfo() ModuleInfo
+}
+
+type ModuleInfo struct {
+ ID string
+ New func() Module
+}
-type Transport interface {
- Transport(*http.Request) (*http.Response, error)
+type TransportAdapter interface {
+ AdaptTransport(http.RoundTripper) http.RoundTripper
}
-type CanRequest interface {
- CanRequest(url string, depth int) bool
+type RequestValidator interface {
+ ValidateRequest(*Request) bool
}
-type OnRequest interface {
- OnRequest(*Request)
+type RequestBuilder interface {
+ BuildRequest(*Request)
}
-type OnResponse interface {
- OnResponse(*Response)
+
+type ResponseReceiver interface {
+ ReceiveResponse(*Response)
}
-type OnLoad interface {
- OnLoad(Visitor)
+type Provisioner interface {
+ Provision(Context)
}
-type OnComplete interface {
- OnComplete()
+type Finalizer interface {
+ Finalize()
}
func RegisterModule(mod Module) {
- globalModules = append(globalModules, mod)
+ modulesMu.Lock()
+ defer modulesMu.Unlock()
+
+ id := mod.ModuleInfo().ID
+ if _, ok := modules[id]; ok {
+ panic("module with id: " + id + " already registered")
+ }
+ modules[mod.ModuleInfo().ID] = mod
}
func LoadModules(s *Scraper, cfg Config) {
- for _, mod := range globalModules {
- json.Unmarshal(cfg, mod)
+ modulesMu.RLock()
+ defer modulesMu.RUnlock()
+
+ loaded := map[string]struct{}{}
+
+ // load standard modules in order
+ for _, id := range moduleOrder {
+ mod := modules[id].ModuleInfo().New()
+ if err := json.Unmarshal(cfg, mod); err != nil {
+ panic("failed to decode config: " + err.Error())
+ }
+ s.LoadModule(mod)
+ loaded[id] = struct{}{}
+ }
+
+ // load custom modules
+ for id := range modules {
+ if _, ok := loaded[id]; ok {
+ continue
+ }
+ mod := modules[id].ModuleInfo().New()
+ if err := json.Unmarshal(cfg, mod); err != nil {
+ panic("failed to decode config: " + err.Error())
+ }
s.LoadModule(mod)
+ loaded[id] = struct{}{}
}
}
-var globalModules = []Module{}
+var (
+ modules = map[string]Module{}
+ modulesMu sync.RWMutex
+
+ moduleOrder = []string{
+ "cache",
+ "starturl",
+ "followlinks",
+ "depth",
+ "domainfilter",
+ "urlfilter",
+ "ratelimit",
+ "jsonprint",
+ }
+)