summaryrefslogtreecommitdiff
path: root/scrape.go
diff options
context:
space:
mode:
authorPhilipp Tanlak <philipp.tanlak@gmail.com>2023-10-11 20:20:30 +0200
committerPhilipp Tanlak <philipp.tanlak@gmail.com>2023-10-11 20:20:30 +0200
commitfb84ca746e92e371161f1e1de3b01a048a9ae979 (patch)
tree5bb8fbb7fd654b241b389697cc46bad00ce2f8b7 /scrape.go
parentcd40ab75f44e9f6ac86beca576a934fd790fc9fb (diff)
Implement file based caching
Diffstat (limited to 'scrape.go')
-rw-r--r--scrape.go8
1 files changed, 7 insertions, 1 deletions
diff --git a/scrape.go b/scrape.go
index c1257a9..00a74bf 100644
--- a/scrape.go
+++ b/scrape.go
@@ -18,6 +18,7 @@ import (
type FetchFunc func(url string) (string, error)
type Context interface {
+ ScriptName() string
Visit(url string)
MarkVisited(url string)
MarkUnvisited(url string)
@@ -57,6 +58,7 @@ func NewScraper() *Scraper {
type Scraper struct {
ScrapeFunc ScrapeFunc
+ Script string
wg sync.WaitGroup
jobs chan target
@@ -95,6 +97,10 @@ func (s *Scraper) MarkUnvisited(url string) {
s.visited.Del(url)
}
+func (s *Scraper) ScriptName() string {
+ return s.Script
+}
+
func (s *Scraper) Run() {
for _, mod := range s.modules {
if v, ok := mod.(Provisioner); ok {
@@ -116,7 +122,7 @@ func (s *Scraper) Run() {
func (s *Scraper) initClient() {
jar, _ := cookiejar.New(nil)
- s.client = &http.Client{Jar: jar}
+ s.client = &http.Client{Jar: jar, Transport: http.DefaultTransport}
for _, mod := range s.modules {
if v, ok := mod.(TransportAdapter); ok {