summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPhilipp Tanlak <philipp.tanlak@gmail.com>2023-08-17 20:18:55 +0200
committerPhilipp Tanlak <philipp.tanlak@gmail.com>2023-08-17 20:18:55 +0200
commit8f55226a53682f9f4a0b63778b3338451605a6c3 (patch)
treed435e924de369cebbbe66449f2cd82f79b6c0300
parent5e2b1d1dc902ba53fc537b31e835d82c0e55dfb6 (diff)
add timestamp
-rw-r--r--examples/esbuild.github.io.js20
-rw-r--r--examples/pkg.go.dev.js22
-rw-r--r--result.json79
-rw-r--r--scrape.go18
4 files changed, 10 insertions, 129 deletions
diff --git a/examples/esbuild.github.io.js b/examples/esbuild.github.io.js
deleted file mode 100644
index 7839791..0000000
--- a/examples/esbuild.github.io.js
+++ /dev/null
@@ -1,20 +0,0 @@
-import { parse } from "flyscrape";
-
-export const options = {
- url: "https://esbuild.github.io/plugins/",
- depth: 1,
- rate: 100,
- allowedDomains: [
- "esbuild.github.io",
- "nodejs.org",
- ],
-}
-
-export default function({ html }) {
- const doc = parse(html);
-
- return {
- headline: doc('h1').text().trim(),
- body: doc('main > p:nth-of-type(1)').text().trim(),
- };
-}
diff --git a/examples/pkg.go.dev.js b/examples/pkg.go.dev.js
deleted file mode 100644
index 12aa308..0000000
--- a/examples/pkg.go.dev.js
+++ /dev/null
@@ -1,22 +0,0 @@
-import { parse } from "flyscrape";
-
-export const options = {
- url: "https://pkg.go.dev/github.com/stretchr/testify/require",
-}
-
-export default function({ html }) {
- const $ = parse(html);
-
- return {
- package: $('h1').text().trim(),
- meta: {
- version: $('[data-test-id=UnitHeader-version] > a').text().replace("Version: ", "").trim(),
- license: $('[data-test-id=UnitHeader-licenses] > a').text().trim(),
- published: $('[data-test-id=UnitHeader-commitTime]').text().replace("Published: ", "").trim(),
- imports: $('[data-test-id=UnitHeader-imports] > a').text().replace("Imports: ", "").trim(),
- importedBy: $('[data-test-id=UnitHeader-importedby] > a').text().replace("Imported by: ", "").replace(/,/g,"").trim(),
- },
- functions: $('.Documentation-indexList .Documentation-indexFunction > a').toArray().map(el => $(el).text()),
- types: $('.Documentation-indexList .Documentation-indexType > a').toArray().map(el => $(el).text()),
- };
-}
diff --git a/result.json b/result.json
deleted file mode 100644
index 88bdc55..0000000
--- a/result.json
+++ /dev/null
@@ -1,79 +0,0 @@
-[
- {
- "url": "https://esbuild.github.io/plugins/",
- "data": {
- "body": "The plugin API allows you to inject code into various parts of the build process. Unlike the rest of the API, it's not available from the command line. You must write either JavaScript or Go code to use the plugin API. Plugins can also only be used with the build API, not with the transform API.",
- "headline": "Plugins"
- }
- },
- {
- "url": "https://esbuild.github.io/try/",
- "data": {
- "body": "",
- "headline": "esbuild"
- }
- },
- {
- "url": "https://esbuild.github.io/getting-started/",
- "data": {
- "body": "First, download and install the esbuild command locally. A prebuilt native executable can be installed using npm (which is automatically installed when you install the node JavaScript runtime):",
- "headline": "Getting Started"
- }
- },
- {
- "url": "https://esbuild.github.io/",
- "data": {
- "body": "Our current build tools for the web are 10-100x slower than they could be. The main goal of the esbuild bundler project is to bring about a new era of build tool performance, and create an easy-to-use modern bundler along the way.",
- "headline": "esbuild"
- }
- },
- {
- "url": "https://esbuild.github.io/analyze/",
- "data": {
- "body": "",
- "headline": "esbuild"
- }
- },
- {
- "url": "https://esbuild.github.io/api/",
- "data": {
- "body": "The API can be accessed in one of three languages: on the command line, in JavaScript, and in Go. The concepts and parameters are largely identical between the three languages so they will be presented together here instead of having separate documentation for each language. You can switch between languages using the CLI, JS, and Go tabs in the top-right corner of each code example. Some specifics for each language:",
- "headline": "API"
- }
- },
- {
- "url": "https://esbuild.github.io/faq/",
- "data": {
- "body": "This is a collection of common questions about esbuild. You can also ask questions on the GitHub issue tracker.",
- "headline": "FAQ"
- }
- },
- {
- "url": "https://esbuild.github.io/content-types/",
- "data": {
- "body": "All of the built-in content types are listed below. Each content type has an associated \"loader\" which tells esbuild how to interpret the file contents. Some file extensions already have a loader configured for them by default, although the defaults can be overridden.",
- "headline": "Content Types"
- }
- },
- {
- "url": "https://nodejs.org/api/packages.html",
- "data": {
- "body": "",
- "headline": "Node.js v20.5.1 documentation"
- }
- },
- {
- "url": "https://nodejs.org/api/modules.html",
- "data": {
- "body": "",
- "headline": "Node.js v20.5.1 documentation"
- }
- },
- {
- "url": "https://nodejs.org/api/crypto.html",
- "data": {
- "body": "",
- "headline": "Node.js v20.5.1 documentation"
- }
- }
-]
diff --git a/scrape.go b/scrape.go
index 793e418..ac75c73 100644
--- a/scrape.go
+++ b/scrape.go
@@ -25,10 +25,11 @@ type ScrapeOptions struct {
}
type ScrapeResult struct {
- URL string `json:"url"`
- Data any `json:"data,omitempty"`
- Links []string `json:"-"`
- Error error `json:"error,omitempty"`
+ URL string `json:"url"`
+ Data any `json:"data,omitempty"`
+ Links []string `json:"-"`
+ Error error `json:"error,omitempty"`
+ Timestamp time.Time `json:"timestamp"`
}
type (
@@ -96,10 +97,11 @@ func (s *Scraper) Scrape() <-chan ScrapeResult {
go func() {
for res := range results {
scraperesults <- ScrapeResult{
- URL: res.url,
- Data: res.data,
- Links: res.links,
- Error: res.err,
+ URL: res.url,
+ Data: res.data,
+ Links: res.links,
+ Error: res.err,
+ Timestamp: time.Now().UTC(),
}
}
close(scraperesults)