summaryrefslogtreecommitdiff
path: root/examples/useragents/chrome.js
diff options
context:
space:
mode:
authorPhilipp Tanlak <philipp.tanlak@gmail.com>2024-11-24 13:08:25 +0100
committerGitHub <noreply@github.com>2024-11-24 13:08:25 +0100
commit26a033b3ec1269d3927831bda1749a484ed83733 (patch)
treec3ff3641baea401f066ad1b80ac239faaa5b2605 /examples/useragents/chrome.js
parentd2aec146ac5a2aef4a87813be47e5e1dc7404c51 (diff)
Generate random user agent when non is provided (#76)
Diffstat (limited to 'examples/useragents/chrome.js')
-rw-r--r--examples/useragents/chrome.js50
1 files changed, 50 insertions, 0 deletions
diff --git a/examples/useragents/chrome.js b/examples/useragents/chrome.js
new file mode 100644
index 0000000..c3a6fcd
--- /dev/null
+++ b/examples/useragents/chrome.js
@@ -0,0 +1,50 @@
+import { parse } from "flyscrape";
+
+export const config = {
+ url: "https://chromereleases.googleblog.com/search/label/Stable%20updates",
+ follow: [".blog-pager-older-link"],
+ depth: 30,
+ cache: "file",
+};
+
+export default function ({ doc, absoluteURL }) {
+ const posts = doc.find(".post");
+ return posts.map(post => {
+ const title = post.find("h2").text().trim();
+ const body = parse(post.find(".post-content").text()).find("p:nth-child(1)").text().trim();
+
+ const regexes = [
+ /(\d+\.\d+\.\d+\.\d+(\/\.\d+)?)\)? for (Mac)/,
+ /(\d+\.\d+\.\d+\.\d+(\/\.\d+)?)\)? for (Windows)/,
+ /(\d+\.\d+\.\d+\.\d+(\/\.\d+)?)\)? for (Linux)/,
+ /(\d+\.\d+\.\d+\.\d+(\/\.\d+)?)\)? for (iOS)/,
+ /(\d+\.\d+\.\d+\.\d+(\/\.\d+)?)\)? for (Android)/,
+ /(\d+\.\d+\.\d+\.\d+(\/\.\d+)?)\)? for (ChromeOS)/,
+ /(\d+\.\d+\.\d+\.\d+(\/\.\d+)?)\)? for (Mac,Linux)/,
+ /(\d+\.\d+\.\d+\.\d+(\/\.\d+)?)\)? for (Mac and Linux)/,
+ /(\d+\.\d+\.\d+\.\d+(\/\.\d+)?)\)?\s\(Platform version:\s[\d\.]+\)\sfor\smost\s(ChromeOS)/,
+ ];
+
+ const versions = new Set();
+ for (const regex of regexes) {
+ const matches = body.match(regex);
+ if (!matches) {
+ continue;
+ }
+
+ let versionStr = matches[1];
+
+ let vv = versionStr.split("/");
+ if (vv.length == 2) {
+ vv[1] = vv[0].substring(0, vv[0].lastIndexOf(".")) + vv[1];
+ }
+
+ for (const version of vv) {
+ versions.add(version)
+ }
+ }
+
+
+ return versions
+ }).filter(Boolean).flat();
+}