summaryrefslogtreecommitdiff
path: root/module.go
diff options
context:
space:
mode:
authorPhilipp Tanlak <philipp.tanlak@gmail.com>2023-09-23 17:41:57 +0200
committerPhilipp Tanlak <philipp.tanlak@gmail.com>2023-09-23 17:41:57 +0200
commit08df9258a532b653c243e077e82491dbe62ad854 (patch)
treee72b04dba61e65d3bfb9cdb0ad3a87f5caa95eb3 /module.go
parentc6950bcd5cd8fe9e7cc63fde7216a5a9b93b8aa0 (diff)
refactor scraper into modules
Diffstat (limited to 'module.go')
-rw-r--r--module.go65
1 files changed, 65 insertions, 0 deletions
diff --git a/module.go b/module.go
new file mode 100644
index 0000000..bc90c02
--- /dev/null
+++ b/module.go
@@ -0,0 +1,65 @@
+package flyscrape
+
+import (
+ "encoding/json"
+ "fmt"
+ "net/http"
+ "sync"
+)
+
+type Module interface {
+ ID() string
+}
+
+type Transport interface {
+ Transport(*http.Request) (*http.Response, error)
+}
+
+type CanRequest interface {
+ CanRequest(url string, depth int) bool
+}
+
+type OnRequest interface {
+ OnRequest(*Request)
+}
+type OnResponse interface {
+ OnResponse(*Response)
+}
+
+type OnLoad interface {
+ OnLoad(Visitor)
+}
+
+type OnComplete interface {
+ OnComplete()
+}
+
+func RegisterModule(m Module) {
+ id := m.ID()
+ if id == "" {
+ panic("module id is missing")
+ }
+
+ globalModulesMu.Lock()
+ defer globalModulesMu.Unlock()
+
+ if _, ok := globalModules[id]; ok {
+ panic(fmt.Sprintf("module %s already registered", id))
+ }
+ globalModules[id] = m
+}
+
+func LoadModules(s *Scraper, opts Options) {
+ globalModulesMu.RLock()
+ defer globalModulesMu.RUnlock()
+
+ for _, mod := range globalModules {
+ json.Unmarshal(opts, mod)
+ s.LoadModule(mod)
+ }
+}
+
+var (
+ globalModules = map[string]Module{}
+ globalModulesMu sync.RWMutex
+)