summaryrefslogtreecommitdiff
path: root/scrape_test.go
blob: 602be9f56cb66e5a06af55e4740a929da055bcd8 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.

package flyscrape_test

import (
	"sort"
	"testing"

	"flyscrape"

	"github.com/stretchr/testify/require"
)

func TestScrape(t *testing.T) {
	svc := flyscrape.Scraper{
		ScrapeOptions: flyscrape.ScrapeOptions{
			URL:            "http://example.com/foo/bar",
			Depth:          1,
			AllowedDomains: []string{"example.com", "www.google.com"},
		},
		ScrapeFunc: func(params flyscrape.ScrapeParams) (any, error) {
			return map[string]any{
				"url": params.URL,
			}, nil
		},
		FetchFunc: func(url string) (string, error) {
			return `<html>
                <body>
                    <a href="/baz">Baz</a>
                    <a href="baz">Baz</a>
                    <a href="http://www.google.com">Google</a>
                </body>
            </html>`, nil
		},
	}

	var urls []string
	for res := range svc.Scrape() {
		urls = append(urls, res.URL)
	}
	sort.Strings(urls)

	require.Len(t, urls, 4)
	require.Equal(t, "http://example.com/baz", urls[0])
	require.Equal(t, "http://example.com/foo/bar", urls[1])
	require.Equal(t, "http://example.com/foo/baz", urls[2])
	require.Equal(t, "http://www.google.com/", urls[3])
}