summaryrefslogtreecommitdiff
path: root/modules/output/ndjson/ndjson.go
diff options
context:
space:
mode:
authorPhilipp Tanlak <philipp.tanlak@gmail.com>2023-12-04 17:35:06 +0100
committerGitHub <noreply@github.com>2023-12-04 17:35:06 +0100
commit8c68e0ed414bfb323d6e94db55c95db13797ef8e (patch)
tree379fd505aec7e54d9f7dcc7b7d6ae5264f4b956a /modules/output/ndjson/ndjson.go
parent807b9a1693645270609d4d795aa2b8eeacaae40e (diff)
Make output file and format configurable (#24)
Diffstat (limited to 'modules/output/ndjson/ndjson.go')
-rw-r--r--modules/output/ndjson/ndjson.go112
1 files changed, 112 insertions, 0 deletions
diff --git a/modules/output/ndjson/ndjson.go b/modules/output/ndjson/ndjson.go
new file mode 100644
index 0000000..956b2ed
--- /dev/null
+++ b/modules/output/ndjson/ndjson.go
@@ -0,0 +1,112 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+package ndjson
+
+import (
+ "encoding/json"
+ "io"
+ "log"
+ "os"
+ "time"
+
+ "github.com/philippta/flyscrape"
+)
+
+func init() {
+ flyscrape.RegisterModule(Module{})
+}
+
+type Module struct {
+ Output struct {
+ Format string `json:"format"`
+ File string `json:"file"`
+ } `json:"output"`
+
+ w io.WriteCloser
+}
+
+func (Module) ModuleInfo() flyscrape.ModuleInfo {
+ return flyscrape.ModuleInfo{
+ ID: "output.ndjson",
+ New: func() flyscrape.Module { return new(Module) },
+ }
+}
+
+func (m *Module) Provision(ctx flyscrape.Context) {
+ if m.disabled() {
+ return
+ }
+
+ if m.Output.File == "" {
+ m.w = nopCloser{os.Stdout}
+ return
+ }
+
+ f, err := os.Create(m.Output.File)
+ if err != nil {
+ log.Printf("failed to create file %q: %v", m.Output.File, err)
+ os.Exit(1)
+ }
+ m.w = f
+}
+
+func (m *Module) ReceiveResponse(resp *flyscrape.Response) {
+ if m.disabled() {
+ return
+ }
+
+ if resp.Error == nil && resp.Data == nil {
+ return
+ }
+
+ o := output{
+ URL: resp.Request.URL,
+ Data: resp.Data,
+ Timestamp: time.Now(),
+ }
+ if resp.Error != nil {
+ o.Error = resp.Error.Error()
+ }
+
+ enc := json.NewEncoder(m.w)
+ enc.SetEscapeHTML(false)
+ enc.Encode(o)
+}
+
+func (m *Module) Finalize() {
+ if m.disabled() {
+ return
+ }
+ m.w.Close()
+}
+
+func (m *Module) disabled() bool {
+ return m.Output.Format != "ndjson"
+}
+
+type output struct {
+ URL string `json:"url,omitempty"`
+ Data any `json:"data,omitempty"`
+ Error string `json:"error,omitempty"`
+ Timestamp time.Time `json:"timestamp,omitempty"`
+}
+
+type nopCloser struct {
+ io.Writer
+}
+
+func (c nopCloser) Write(p []byte) (n int, err error) {
+ return c.Writer.Write(p)
+}
+
+func (c nopCloser) Close() error {
+ return nil
+}
+
+var (
+ _ flyscrape.Provisioner = (*Module)(nil)
+ _ flyscrape.ResponseReceiver = (*Module)(nil)
+ _ flyscrape.Finalizer = (*Module)(nil)
+)