summaryrefslogtreecommitdiff
path: root/scrape/parser_test.go
blob: 4eb515d1790ab5e47686d5d1a4568141c4b5e38f (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
package scrape

import (
	"encoding/json"
	"fmt"
	"testing"

	"github.com/stretchr/testify/require"
)

func TestParser(t *testing.T) {
	o := ParseFromJSON(html, `{
		"title": "head > title",
		"headline": "body h1",
		"sections": {
			"#each": ".container",
			"head": "h2",
			"text": "p",
			"inner": {
				"#each": ".inner",
				"headline": "h3"
			},
			"one": {
				"#element": ".one",
				"value": ".val"
			}
		}
	}`)
	require.Equal(t, o, nil)

	b, _ := json.MarshalIndent(o, "", "  ")
	fmt.Println(string(b))
}

func TestParser2(t *testing.T) {
	o := ParseFromJSON(html, `{
		"#each": ".container",
		"head": "h2",
		"text": "p"
	}`)

	b, _ := json.MarshalIndent(o, "", "  ")
	fmt.Println(string(b))
}

var html = `
<html>
	<head>
		<title>Title</title>
	</head>
	<body>
		<h1>Headline</h1>
		<div class="container">
			<h2>Section 1</h2>
			<p>
				Paragraph 1
			</p>
			<div class="one">
				<div class="val">One</div>
			</div>
			<div class="inner">
				<h3>Inner H3</h3>
			</div>
			<div class="inner">
				<h3>Inner H3 next</h3>
			</div>
		</div>
		<div class="container">
			<h2>Section 2</h2>
			<p>
				Paragraph 2
			</p>
			<div class="one"><div class="val">Two</div></div>
			<div class="inner">
				<h3>Inner H3 2</h3>
			</div>
			<div class="inner">
				<h3>Inner H3 2 next</h3>
			</div>
		</div>
	</body>
</html>
`