summaryrefslogtreecommitdiff
path: root/modules
diff options
context:
space:
mode:
authorRafi Ramadhana <42462215+rafiramadhana@users.noreply.github.com>2023-11-23 18:58:41 +0700
committerGitHub <noreply@github.com>2023-11-23 12:58:41 +0100
commitcbdbbd249239345f88bea031beb55e84c2f47688 (patch)
treef3b001d36c0da83a1827eb8008615f5f2577a638 /modules
parent13322edf37510b6d3bb68a853368fd1a0a67a105 (diff)
Add custom request header (#18)
Diffstat (limited to 'modules')
-rw-r--r--modules/headers/headers.go42
-rw-r--r--modules/headers/headers_test.go94
2 files changed, 136 insertions, 0 deletions
diff --git a/modules/headers/headers.go b/modules/headers/headers.go
new file mode 100644
index 0000000..877b370
--- /dev/null
+++ b/modules/headers/headers.go
@@ -0,0 +1,42 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+package headers
+
+import (
+ "net/http"
+
+ "github.com/philippta/flyscrape"
+)
+
+func init() {
+ flyscrape.RegisterModule(Module{})
+}
+
+type Module struct {
+ Headers map[string]string `json:"headers"`
+}
+
+func (Module) ModuleInfo() flyscrape.ModuleInfo {
+ return flyscrape.ModuleInfo{
+ ID: "headers",
+ New: func() flyscrape.Module { return new(Module) },
+ }
+}
+
+func (m Module) AdaptTransport(t http.RoundTripper) http.RoundTripper {
+ if len(m.Headers) == 0 {
+ return t
+ }
+
+ return flyscrape.RoundTripFunc(func(r *http.Request) (*http.Response, error) {
+ for k, v := range m.Headers {
+ r.Header.Set(k, v)
+ }
+
+ return t.RoundTrip(r)
+ })
+}
+
+var _ flyscrape.TransportAdapter = Module{}
diff --git a/modules/headers/headers_test.go b/modules/headers/headers_test.go
new file mode 100644
index 0000000..72b9001
--- /dev/null
+++ b/modules/headers/headers_test.go
@@ -0,0 +1,94 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+package headers_test
+
+import (
+ "fmt"
+ "net/http"
+ "reflect"
+ "testing"
+
+ "github.com/philippta/flyscrape"
+ "github.com/philippta/flyscrape/modules/headers"
+ "github.com/philippta/flyscrape/modules/hook"
+ "github.com/philippta/flyscrape/modules/starturl"
+ "github.com/stretchr/testify/require"
+)
+
+func TestHeaders(t *testing.T) {
+ testCases := []struct {
+ name string
+ headersFn func() headers.Module
+ wantHeaders map[string][]string
+ }{
+ {
+ name: "empty custom headers",
+ headersFn: func() headers.Module {
+ return headers.Module{
+ Headers: map[string]string{},
+ }
+ },
+ wantHeaders: map[string][]string{"User-Agent": {"flyscrape/0.1"}},
+ },
+ {
+ name: "no duplicate headers between default and custom",
+ headersFn: func() headers.Module {
+ return headers.Module{
+ Headers: map[string]string{
+ "Authorization": "Basic ZGVtbzpwQDU1dzByZA==",
+ },
+ }
+ },
+ wantHeaders: map[string][]string{
+ "Authorization": {"Basic ZGVtbzpwQDU1dzByZA=="},
+ "User-Agent": {"flyscrape/0.1"},
+ },
+ },
+ {
+ name: "duplicate headers between default and custom",
+ headersFn: func() headers.Module {
+ return headers.Module{
+ Headers: map[string]string{
+ "Authorization": "Basic ZGVtbzpwQDU1dzByZA==",
+ "User-Agent": "Gecko/1.0",
+ },
+ }
+ },
+ wantHeaders: map[string][]string{
+ "Authorization": {"Basic ZGVtbzpwQDU1dzByZA=="},
+ "User-Agent": {"Gecko/1.0"},
+ },
+ },
+ }
+
+ for _, tc := range testCases {
+ t.Run(tc.name, func(t *testing.T) {
+ var headers map[string][]string
+
+ mods := []flyscrape.Module{
+ &starturl.Module{URL: "http://www.example.com"},
+ hook.Module{
+ AdaptTransportFn: func(rt http.RoundTripper) http.RoundTripper {
+ return flyscrape.RoundTripFunc(func(r *http.Request) (*http.Response, error) {
+ headers = r.Header
+ return rt.RoundTrip(r)
+ })
+ },
+ },
+ tc.headersFn(),
+ }
+
+ scraper := flyscrape.NewScraper()
+ scraper.Modules = mods
+ scraper.Run()
+
+ require.Truef(
+ t,
+ reflect.DeepEqual(tc.wantHeaders, headers),
+ fmt.Sprintf("expected: %v; actual: %v", tc.wantHeaders, headers),
+ )
+ })
+ }
+}