From 1fc497fbdc79a43c62ac2e8eaf4827752dbeef8e Mon Sep 17 00:00:00 2001 From: Philipp Tanlak Date: Thu, 5 Oct 2023 14:53:37 +0200 Subject: Refactor codebase into modules --- module.go | 91 +++++++++++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 74 insertions(+), 17 deletions(-) (limited to 'module.go') diff --git a/module.go b/module.go index 1839b76..0465808 100644 --- a/module.go +++ b/module.go @@ -1,44 +1,101 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + package flyscrape import ( "encoding/json" "net/http" + "sync" ) -type Module any +type Module interface { + ModuleInfo() ModuleInfo +} + +type ModuleInfo struct { + ID string + New func() Module +} -type Transport interface { - Transport(*http.Request) (*http.Response, error) +type TransportAdapter interface { + AdaptTransport(http.RoundTripper) http.RoundTripper } -type CanRequest interface { - CanRequest(url string, depth int) bool +type RequestValidator interface { + ValidateRequest(*Request) bool } -type OnRequest interface { - OnRequest(*Request) +type RequestBuilder interface { + BuildRequest(*Request) } -type OnResponse interface { - OnResponse(*Response) + +type ResponseReceiver interface { + ReceiveResponse(*Response) } -type OnLoad interface { - OnLoad(Visitor) +type Provisioner interface { + Provision(Context) } -type OnComplete interface { - OnComplete() +type Finalizer interface { + Finalize() } func RegisterModule(mod Module) { - globalModules = append(globalModules, mod) + modulesMu.Lock() + defer modulesMu.Unlock() + + id := mod.ModuleInfo().ID + if _, ok := modules[id]; ok { + panic("module with id: " + id + " already registered") + } + modules[mod.ModuleInfo().ID] = mod } func LoadModules(s *Scraper, cfg Config) { - for _, mod := range globalModules { - json.Unmarshal(cfg, mod) + modulesMu.RLock() + defer modulesMu.RUnlock() + + loaded := map[string]struct{}{} + + // load standard modules in order + for _, id := range moduleOrder { + mod := modules[id].ModuleInfo().New() + if err := json.Unmarshal(cfg, mod); err != nil { + panic("failed to decode config: " + err.Error()) + } + s.LoadModule(mod) + loaded[id] = struct{}{} + } + + // load custom modules + for id := range modules { + if _, ok := loaded[id]; ok { + continue + } + mod := modules[id].ModuleInfo().New() + if err := json.Unmarshal(cfg, mod); err != nil { + panic("failed to decode config: " + err.Error()) + } s.LoadModule(mod) + loaded[id] = struct{}{} } } -var globalModules = []Module{} +var ( + modules = map[string]Module{} + modulesMu sync.RWMutex + + moduleOrder = []string{ + "cache", + "starturl", + "followlinks", + "depth", + "domainfilter", + "urlfilter", + "ratelimit", + "jsonprint", + } +) -- cgit v1.2.3