diff options
| author | Philipp Tanlak <philipp.tanlak@gmail.com> | 2023-10-11 20:20:30 +0200 |
|---|---|---|
| committer | Philipp Tanlak <philipp.tanlak@gmail.com> | 2023-10-11 20:20:30 +0200 |
| commit | fb84ca746e92e371161f1e1de3b01a048a9ae979 (patch) | |
| tree | 5bb8fbb7fd654b241b389697cc46bad00ce2f8b7 /scrape.go | |
| parent | cd40ab75f44e9f6ac86beca576a934fd790fc9fb (diff) | |
Implement file based caching
Diffstat (limited to 'scrape.go')
| -rw-r--r-- | scrape.go | 8 |
1 files changed, 7 insertions, 1 deletions
@@ -18,6 +18,7 @@ import ( type FetchFunc func(url string) (string, error) type Context interface { + ScriptName() string Visit(url string) MarkVisited(url string) MarkUnvisited(url string) @@ -57,6 +58,7 @@ func NewScraper() *Scraper { type Scraper struct { ScrapeFunc ScrapeFunc + Script string wg sync.WaitGroup jobs chan target @@ -95,6 +97,10 @@ func (s *Scraper) MarkUnvisited(url string) { s.visited.Del(url) } +func (s *Scraper) ScriptName() string { + return s.Script +} + func (s *Scraper) Run() { for _, mod := range s.modules { if v, ok := mod.(Provisioner); ok { @@ -116,7 +122,7 @@ func (s *Scraper) Run() { func (s *Scraper) initClient() { jar, _ := cookiejar.New(nil) - s.client = &http.Client{Jar: jar} + s.client = &http.Client{Jar: jar, Transport: http.DefaultTransport} for _, mod := range s.modules { if v, ok := mod.(TransportAdapter); ok { |