summaryrefslogtreecommitdiff
path: root/modules/starturl/starturl_test.go
blob: 54f899a8dbfaa04fc45b8a329412932948a31cdf (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.

package starturl_test

import (
	"net/http"
	"testing"

	"github.com/philippta/flyscrape"
	"github.com/philippta/flyscrape/modules/hook"
	"github.com/philippta/flyscrape/modules/starturl"
	"github.com/stretchr/testify/require"
)

func TestStartURL(t *testing.T) {
	var url string
	var depth int

	mods := []flyscrape.Module{
		&starturl.Module{URL: "http://www.example.com/foo/bar"},
		hook.Module{
			AdaptTransportFn: func(rt http.RoundTripper) http.RoundTripper {
				return flyscrape.MockTransport(200, "")
			},
			BuildRequestFn: func(r *flyscrape.Request) {
				url = r.URL
				depth = r.Depth
			},
		},
	}

	scraper := flyscrape.NewScraper()
	scraper.Modules = mods
	scraper.Run()

	require.Equal(t, "http://www.example.com/foo/bar", url)
	require.Equal(t, 0, depth)
}

func TestStartURL_MultipleStartingURLs(t *testing.T) {
	testCases := []struct {
		name          string
		startURLModFn func() *starturl.Module
		urls          []string
	}{
		{
			name: ".URL and .URLs",
			startURLModFn: func() *starturl.Module {
				return &starturl.Module{
					URL: "http://www.example.com/foo",
					URLs: []string{
						"http://www.example.com/bar",
						"http://www.example.com/baz",
					},
				}
			},
			urls: []string{
				"http://www.example.com/foo",
				"http://www.example.com/bar",
				"http://www.example.com/baz",
			},
		},
		{
			name: "only .URL",
			startURLModFn: func() *starturl.Module {
				return &starturl.Module{
					URL: "http://www.example.com/foo",
				}
			},
			urls: []string{
				"http://www.example.com/foo",
			},
		},
		{
			name: "only .URLs",
			startURLModFn: func() *starturl.Module {
				return &starturl.Module{
					URLs: []string{
						"http://www.example.com/bar",
						"http://www.example.com/baz",
					},
				}
			},
			urls: []string{
				"http://www.example.com/bar",
				"http://www.example.com/baz",
			},
		},
		{
			name: "empty",
			startURLModFn: func() *starturl.Module {
				return &starturl.Module{}
			},
			urls: []string{},
		},
	}

	for _, tc := range testCases {
		t.Run(tc.name, func(t *testing.T) {
			urls := []string{}

			mods := []flyscrape.Module{
				tc.startURLModFn(),
				hook.Module{
					AdaptTransportFn: func(rt http.RoundTripper) http.RoundTripper {
						return flyscrape.MockTransport(http.StatusOK, "")
					},
					BuildRequestFn: func(r *flyscrape.Request) {
						urls = append(urls, r.URL)
					},
				},
			}

			scraper := flyscrape.NewScraper()
			scraper.Modules = mods
			scraper.Run()

			require.ElementsMatch(t, tc.urls, urls)
		})
	}
}