summaryrefslogtreecommitdiff
path: root/modules/cache
diff options
context:
space:
mode:
authorPhilipp Tanlak <philipp.tanlak@gmail.com>2023-10-05 14:53:37 +0200
committerPhilipp Tanlak <philipp.tanlak@gmail.com>2023-10-05 14:53:37 +0200
commit1fc497fbdc79a43c62ac2e8eaf4827752dbeef8e (patch)
tree67738e213ef97f249bdfa0f1bddda0839192cb77 /modules/cache
parentbd9e7f7acfd855d4685aa4544169c0e29cdbf205 (diff)
Refactor codebase into modules
Diffstat (limited to 'modules/cache')
-rw-r--r--modules/cache/cache.go78
-rw-r--r--modules/cache/cache_test.go38
2 files changed, 116 insertions, 0 deletions
diff --git a/modules/cache/cache.go b/modules/cache/cache.go
new file mode 100644
index 0000000..1a321be
--- /dev/null
+++ b/modules/cache/cache.go
@@ -0,0 +1,78 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+package cache
+
+import (
+ "bufio"
+ "bytes"
+ "net/http"
+ "net/http/httputil"
+
+ "github.com/cornelk/hashmap"
+ "github.com/philippta/flyscrape"
+)
+
+func init() {
+ flyscrape.RegisterModule(Module{})
+}
+
+type Module struct {
+ Cache string `json:"cache"`
+
+ cache *hashmap.Map[string, []byte]
+}
+
+func (Module) ModuleInfo() flyscrape.ModuleInfo {
+ return flyscrape.ModuleInfo{
+ ID: "cache",
+ New: func() flyscrape.Module { return new(Module) },
+ }
+}
+
+func (m *Module) Provision(flyscrape.Context) {
+ if m.disabled() {
+ return
+ }
+ if m.cache == nil {
+ m.cache = hashmap.New[string, []byte]()
+ }
+}
+
+func (m *Module) AdaptTransport(t http.RoundTripper) http.RoundTripper {
+ if m.disabled() {
+ return t
+ }
+
+ return flyscrape.RoundTripFunc(func(r *http.Request) (*http.Response, error) {
+ key := cacheKey(r)
+
+ if b, ok := m.cache.Get(key); ok {
+ if resp, err := http.ReadResponse(bufio.NewReader(bytes.NewReader(b)), r); err == nil {
+ return resp, nil
+ }
+ }
+
+ resp, err := t.RoundTrip(r)
+ if err != nil {
+ return resp, err
+ }
+
+ encoded, err := httputil.DumpResponse(resp, true)
+ if err != nil {
+ return resp, err
+ }
+
+ m.cache.Set(key, encoded)
+ return resp, nil
+ })
+}
+
+func (m *Module) disabled() bool {
+ return m.Cache == ""
+}
+
+func cacheKey(r *http.Request) string {
+ return r.Method + " " + r.URL.String()
+}
diff --git a/modules/cache/cache_test.go b/modules/cache/cache_test.go
new file mode 100644
index 0000000..4565e00
--- /dev/null
+++ b/modules/cache/cache_test.go
@@ -0,0 +1,38 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+package cache_test
+
+import (
+ "net/http"
+ "testing"
+
+ "github.com/philippta/flyscrape"
+ "github.com/philippta/flyscrape/modules/cache"
+ "github.com/philippta/flyscrape/modules/hook"
+ "github.com/philippta/flyscrape/modules/starturl"
+ "github.com/stretchr/testify/require"
+)
+
+func TestCache(t *testing.T) {
+ cachemod := &cache.Module{Cache: "memory"}
+ calls := 0
+
+ for i := 0; i < 2; i++ {
+ scraper := flyscrape.NewScraper()
+ scraper.LoadModule(&starturl.Module{URL: "http://www.example.com"})
+ scraper.LoadModule(hook.Module{
+ AdaptTransportFn: func(rt http.RoundTripper) http.RoundTripper {
+ return flyscrape.RoundTripFunc(func(r *http.Request) (*http.Response, error) {
+ calls++
+ return flyscrape.MockResponse(200, "foo")
+ })
+ },
+ })
+ scraper.LoadModule(cachemod)
+ scraper.Run()
+ }
+
+ require.Equal(t, 1, calls)
+}