diff options
| author | Philipp Tanlak <philipp.tanlak@gmail.com> | 2023-09-23 17:41:57 +0200 |
|---|---|---|
| committer | Philipp Tanlak <philipp.tanlak@gmail.com> | 2023-09-23 17:41:57 +0200 |
| commit | 08df9258a532b653c243e077e82491dbe62ad854 (patch) | |
| tree | e72b04dba61e65d3bfb9cdb0ad3a87f5caa95eb3 /module.go | |
| parent | c6950bcd5cd8fe9e7cc63fde7216a5a9b93b8aa0 (diff) | |
refactor scraper into modules
Diffstat (limited to 'module.go')
| -rw-r--r-- | module.go | 65 |
1 files changed, 65 insertions, 0 deletions
diff --git a/module.go b/module.go new file mode 100644 index 0000000..bc90c02 --- /dev/null +++ b/module.go @@ -0,0 +1,65 @@ +package flyscrape + +import ( + "encoding/json" + "fmt" + "net/http" + "sync" +) + +type Module interface { + ID() string +} + +type Transport interface { + Transport(*http.Request) (*http.Response, error) +} + +type CanRequest interface { + CanRequest(url string, depth int) bool +} + +type OnRequest interface { + OnRequest(*Request) +} +type OnResponse interface { + OnResponse(*Response) +} + +type OnLoad interface { + OnLoad(Visitor) +} + +type OnComplete interface { + OnComplete() +} + +func RegisterModule(m Module) { + id := m.ID() + if id == "" { + panic("module id is missing") + } + + globalModulesMu.Lock() + defer globalModulesMu.Unlock() + + if _, ok := globalModules[id]; ok { + panic(fmt.Sprintf("module %s already registered", id)) + } + globalModules[id] = m +} + +func LoadModules(s *Scraper, opts Options) { + globalModulesMu.RLock() + defer globalModulesMu.RUnlock() + + for _, mod := range globalModules { + json.Unmarshal(opts, mod) + s.LoadModule(mod) + } +} + +var ( + globalModules = map[string]Module{} + globalModulesMu sync.RWMutex +) |