From 2bfae5b426bf4a0b99d3979ed12d63cb50c39b17 Mon Sep 17 00:00:00 2001 From: Philipp Tanlak Date: Thu, 19 Oct 2023 20:43:04 +0200 Subject: Add examples --- README.md | 2 ++ examples/coinmarketcap.js | 30 ++++++++++++++++++++++++++++++ examples/hackernews.js | 25 +++++++++++++++++++++++++ examples/reddit.js | 33 +++++++++++++++++++++++++++++++++ 4 files changed, 90 insertions(+) create mode 100644 examples/coinmarketcap.js create mode 100644 examples/hackernews.js create mode 100644 examples/reddit.js diff --git a/README.md b/README.md index d2cb12a..19ad92c 100644 --- a/README.md +++ b/README.md @@ -76,6 +76,8 @@ $ flyscrape run hackernews.js ] ``` +Check out the [examples folder](examples) for more detailed examples. + ## Installation ### Pre-compiled binary diff --git a/examples/coinmarketcap.js b/examples/coinmarketcap.js new file mode 100644 index 0000000..7693fd8 --- /dev/null +++ b/examples/coinmarketcap.js @@ -0,0 +1,30 @@ +export const config = { + url: "https://coinmarketcap.com/", +}; + +export default function({ doc }) { + const rows = doc.find(".cmc-table tbody tr"); + + return { + currencies: rows + .map((row) => { + const cols = row.find("td"); + + return { + position: cols.get(1).text(), + currency: cols.get(2).find("p").get(0).text(), + symbol: cols.get(2).find("p").get(1).text(), + price: cols.get(3).text(), + change: { + "1h": cols.get(4).text(), + "24h": cols.get(5).text(), + "7dh": cols.get(6).text(), + }, + marketcap: cols.get(7).find("span").get(1).text(), + volume: cols.get(8).find("p").get(0).text(), + supply: cols.get(9).text(), + }; + }) + .slice(0, 10), + }; +} diff --git a/examples/hackernews.js b/examples/hackernews.js new file mode 100644 index 0000000..71dc52f --- /dev/null +++ b/examples/hackernews.js @@ -0,0 +1,25 @@ +export const config = { + url: "https://news.ycombinator.com/", + depth: 9, + cache: "file", + follow: ["a.morelink[href]"], +}; + +export default function({ doc, absoluteURL }) { + const posts = doc.find(".athing"); + + return { + posts: posts.map((post) => { + const link = post.find(".titleline > a"); + const meta = post.next(); + + return { + url: absoluteURL(link.attr("href")), + user: meta.find(".hnuser").text(), + title: link.text(), + points: meta.find(".score").text().replace(" points", ""), + created: meta.find(".age").attr("title"), + }; + }), + }; +} diff --git a/examples/reddit.js b/examples/reddit.js new file mode 100644 index 0000000..bd4e9c3 --- /dev/null +++ b/examples/reddit.js @@ -0,0 +1,33 @@ +export const config = { + url: "https://old.reddit.com/", +}; + +export default function({ doc, absoluteURL }) { + const posts = doc.find("#siteTable .thing:not(.promoted)"); + + return { + posts: posts.map((post) => { + const rank = post.find(".rank"); + const user = post.find(".author"); + const created = post.find("time"); + const title = post.find("a.title"); + const comments = post.find(".comments"); + const subreddit = post.find(".subreddit"); + const upvotes = post.find(".score.unvoted"); + const thumbnail = post.find("a.thumbnail img"); + + return { + rank: rank.text(), + user: user.text(), + created: created.attr("datetime"), + title: title.text(), + link: absoluteURL(title.attr("href")), + comments: comments.text().replace(" comments", ""), + comments_link: comments.attr("href"), + subreddit: subreddit.text(), + upvotes: upvotes.text(), + thumbnail: absoluteURL(thumbnail.attr("src")), + }; + }), + }; +} -- cgit v1.2.3