From 03b3be0c3bbc70584e8988e1810dc28eacf4521f Mon Sep 17 00:00:00 2001 From: Philipp Tanlak Date: Tue, 17 Oct 2023 19:19:38 +0200 Subject: Add HTTP(S) Proxy support --- modules/proxy/proxy.go | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 modules/proxy/proxy.go (limited to 'modules/proxy/proxy.go') diff --git a/modules/proxy/proxy.go b/modules/proxy/proxy.go new file mode 100644 index 0000000..120a856 --- /dev/null +++ b/modules/proxy/proxy.go @@ -0,0 +1,61 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +package proxy + +import ( + "crypto/tls" + "math/rand" + "net/http" + "net/url" + + "github.com/philippta/flyscrape" +) + +func init() { + flyscrape.RegisterModule(Module{}) +} + +type Module struct { + Proxies []string `json:"proxies"` + + transports []*http.Transport +} + +func (Module) ModuleInfo() flyscrape.ModuleInfo { + return flyscrape.ModuleInfo{ + ID: "proxy", + New: func() flyscrape.Module { return new(Module) }, + } +} + +func (m *Module) Provision(ctx flyscrape.Context) { + if m.disabled() { + return + } + + for _, purl := range m.Proxies { + if parsed, err := url.Parse(purl); err == nil { + m.transports = append(m.transports, &http.Transport{ + Proxy: http.ProxyURL(parsed), + TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, + }) + } + } +} + +func (m *Module) AdaptTransport(t http.RoundTripper) http.RoundTripper { + if m.disabled() { + return t + } + + return flyscrape.RoundTripFunc(func(r *http.Request) (*http.Response, error) { + transport := m.transports[rand.Intn(len(m.transports))] + return transport.RoundTrip(r) + }) +} + +func (m *Module) disabled() bool { + return len(m.Proxies) == 0 +} -- cgit v1.2.3