summaryrefslogtreecommitdiff
path: root/template.js
blob: a7b438401e15dcb4eca7fac86b35658fe2826ccd (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
export const config = {
    // Specify the URL to start scraping from.
    url: "https://example.com/",

    // Specify the multiple URLs to start scraping from.   (default = [])
    // urls: [                          
    //     "https://anothersite.com/",
    //     "https://yetanother.com/",
    // ],

    // Specify how deep links should be followed.          (default = 0, no follow)
    // depth: 5,                        

    // Speficy the css selectors to follow.                (default = ["a[href]"])
    // follow: [".next > a", ".related a"],                      
 
    // Specify the allowed domains. ['*'] for all.         (default = domain from url)
    // allowedDomains: ["example.com", "anothersite.com"],              
 
    // Specify the blocked domains.                        (default = none)
    // blockedDomains: ["somesite.com"],              

    // Specify the allowed URLs as regex.                  (default = all allowed)
    // allowedURLs: ["/posts", "/articles/\d+"],                 
 
    // Specify the blocked URLs as regex.                  (default = none)
    // blockedURLs: ["/admin"],                 
   
    // Specify the rate in requests per minute.            (default = no rate limit)
    // rate: 60,                       

    // Specify the number of concurrent requests.          (default = no limit)
    // concurrency: 1,                       

    // Specify a single HTTP(S) proxy URL.                 (default = no proxy)
    // proxy: "http://someproxy.com:8043",

    // Specify multiple HTTP(S) proxy URLs.                (default = no proxy)
    // proxies: [
    //   "http://someproxy.com:8043",
    //   "http://someotherproxy.com:8043",
    // ],                     

    // Enable file-based request caching.                  (default = no cache)
    // cache: "file",                   

    // Specify the HTTP request header.                    (default = none)
    // headers: {                       
    //     "Authorization": "Bearer ...",
    //     "User-Agent": "Mozilla ...",
    // },

    // Use the cookie store of your local browser.         (default = off)
    // Options: "chrome" | "edge" | "firefox"
    // cookies: "chrome",

    // Specify the output options.
    // output: {
    //     // Specify the output file.                        (default = stdout)
    //     file: "results.json",
    //
    //     // Specify the output format.                      (default = json)
    //     // Options: "json" | "ndjson"
    //     format: "json",
    // },
};

export default function({ doc, absoluteURL }) {
  const title = doc.find("h1");
  const link = doc.find("a");

  return {
    title: title.text(),
    link: {
      text: link.text(),
      url: absoluteURL(link.attr("href")),
    },
  };
}