summaryrefslogtreecommitdiff
path: root/fetch.go
diff options
context:
space:
mode:
authorPhilipp Tanlak <philipp.tanlak@gmail.com>2023-08-27 19:10:49 +0200
committerPhilipp Tanlak <philipp.tanlak@gmail.com>2023-08-27 19:10:49 +0200
commit5c16435e2218344a6e232ebb48cf022a32ba85d5 (patch)
tree3cfa1dbc1f489ba4509fc408a8c0afccca7f9c7c /fetch.go
parent52107c13b4c2c4efa9269b187916f3195be5a10d (diff)
add tests and allow urls
Diffstat (limited to 'fetch.go')
-rw-r--r--fetch.go39
1 files changed, 32 insertions, 7 deletions
diff --git a/fetch.go b/fetch.go
index 8303a76..f9d49d7 100644
--- a/fetch.go
+++ b/fetch.go
@@ -5,21 +5,29 @@
package flyscrape
import (
+ "crypto/tls"
"io"
"net/http"
+ "net/url"
"github.com/cornelk/hashmap"
)
-func CachedFetch() FetchFunc {
- cache := hashmap.New[string, string]()
+func ProxiedFetch(proxyURL string) FetchFunc {
+ pu, err := url.Parse(proxyURL)
+ if err != nil {
+ panic("invalid proxy url")
+ }
- return func(url string) (string, error) {
- if html, ok := cache.Get(url); ok {
- return html, nil
- }
+ client := http.Client{
+ Transport: &http.Transport{
+ Proxy: http.ProxyURL(pu),
+ TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
+ },
+ }
- resp, err := http.Get(url)
+ return func(url string) (string, error) {
+ resp, err := client.Get(url)
if err != nil {
return "", err
}
@@ -31,6 +39,23 @@ func CachedFetch() FetchFunc {
}
html := string(body)
+ return html, nil
+ }
+}
+
+func CachedFetch(fetch FetchFunc) FetchFunc {
+ cache := hashmap.New[string, string]()
+
+ return func(url string) (string, error) {
+ if html, ok := cache.Get(url); ok {
+ return html, nil
+ }
+
+ html, err := fetch(url)
+ if err != nil {
+ return "", err
+ }
+
cache.Set(url, html)
return html, nil
}