summaryrefslogblamecommitdiff
path: root/modules/starturl/starturl_test.go
blob: 4b6a5549a6bc8a63e47d46d2494966774695eda3 (plain) (tree)
1
2
3
4
5
6
7
8
9






                                                                      
                  
              


                                        
                                                     



                                                         
                                 

                      
 









                                                                                        
                  
         
 

                                         




                                                               





























































                                                                             
                                          







                                                                                                        
                                                         
                                                                          
                                                           











                                                               
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.

package starturl_test

import (
	"net/http"
	"sync"
	"testing"

	"github.com/philippta/flyscrape"
	"github.com/philippta/flyscrape/modules/hook"
	"github.com/philippta/flyscrape/modules/starturl"
	"github.com/stretchr/testify/require"
)

func TestStartURL(t *testing.T) {
	var url string
	var depth int

	mods := []flyscrape.Module{
		&starturl.Module{URL: "http://www.example.com/foo/bar"},
		hook.Module{
			AdaptTransportFn: func(rt http.RoundTripper) http.RoundTripper {
				return flyscrape.MockTransport(200, "")
			},
			BuildRequestFn: func(r *flyscrape.Request) {
				url = r.URL
				depth = r.Depth
			},
		},
	}

	scraper := flyscrape.NewScraper()
	scraper.Modules = mods
	scraper.Run()

	require.Equal(t, "http://www.example.com/foo/bar", url)
	require.Equal(t, 0, depth)
}

func TestStartURL_MultipleStartingURLs(t *testing.T) {
	testCases := []struct {
		name          string
		startURLModFn func() *starturl.Module
		urls          []string
	}{
		{
			name: ".URL and .URLs",
			startURLModFn: func() *starturl.Module {
				return &starturl.Module{
					URL: "http://www.example.com/foo",
					URLs: []string{
						"http://www.example.com/bar",
						"http://www.example.com/baz",
					},
				}
			},
			urls: []string{
				"http://www.example.com/foo",
				"http://www.example.com/bar",
				"http://www.example.com/baz",
			},
		},
		{
			name: "only .URL",
			startURLModFn: func() *starturl.Module {
				return &starturl.Module{
					URL: "http://www.example.com/foo",
				}
			},
			urls: []string{
				"http://www.example.com/foo",
			},
		},
		{
			name: "only .URLs",
			startURLModFn: func() *starturl.Module {
				return &starturl.Module{
					URLs: []string{
						"http://www.example.com/bar",
						"http://www.example.com/baz",
					},
				}
			},
			urls: []string{
				"http://www.example.com/bar",
				"http://www.example.com/baz",
			},
		},
		{
			name: "empty",
			startURLModFn: func() *starturl.Module {
				return &starturl.Module{}
			},
			urls: []string{},
		},
	}

	for _, tc := range testCases {
		t.Run(tc.name, func(t *testing.T) {
			urls := []string{}
			mu := sync.Mutex{}

			mods := []flyscrape.Module{
				tc.startURLModFn(),
				hook.Module{
					AdaptTransportFn: func(rt http.RoundTripper) http.RoundTripper {
						return flyscrape.MockTransport(http.StatusOK, "")
					},
					BuildRequestFn: func(r *flyscrape.Request) {
						mu.Lock()
						urls = append(urls, r.URL)
						mu.Unlock()
					},
				},
			}

			scraper := flyscrape.NewScraper()
			scraper.Modules = mods
			scraper.Run()

			require.ElementsMatch(t, tc.urls, urls)
		})
	}
}