diff options
Diffstat (limited to 'modules')
| -rw-r--r-- | modules/output/json/json.go | 130 | ||||
| -rw-r--r-- | modules/output/ndjson/ndjson.go (renamed from modules/jsonprint/jsonprint.go) | 69 |
2 files changed, 186 insertions, 13 deletions
diff --git a/modules/output/json/json.go b/modules/output/json/json.go new file mode 100644 index 0000000..5b4e9d0 --- /dev/null +++ b/modules/output/json/json.go @@ -0,0 +1,130 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +package json + +import ( + "bytes" + "encoding/json" + "fmt" + "io" + "log" + "os" + "time" + + "github.com/philippta/flyscrape" +) + +func init() { + flyscrape.RegisterModule(Module{}) +} + +type Module struct { + Output struct { + Format string `json:"format"` + File string `json:"file"` + } `json:"output"` + + once bool + w io.WriteCloser +} + +func (Module) ModuleInfo() flyscrape.ModuleInfo { + return flyscrape.ModuleInfo{ + ID: "output.json", + New: func() flyscrape.Module { return new(Module) }, + } +} + +func (m *Module) Provision(ctx flyscrape.Context) { + if m.disabled() { + return + } + + if m.Output.File == "" { + m.w = nopCloser{os.Stdout} + return + } + + f, err := os.Create(m.Output.File) + if err != nil { + log.Printf("failed to create file %q: %v", m.Output.File, err) + os.Exit(1) + } + m.w = f +} + +func (m *Module) ReceiveResponse(resp *flyscrape.Response) { + if m.disabled() { + return + } + + if resp.Error == nil && resp.Data == nil { + return + } + + if !m.once { + fmt.Fprintln(m.w, "[") + m.once = true + } else { + fmt.Fprintln(m.w, ",") + } + + o := output{ + URL: resp.Request.URL, + Data: resp.Data, + Timestamp: time.Now(), + } + if resp.Error != nil { + o.Error = resp.Error.Error() + } + + var buf bytes.Buffer + enc := json.NewEncoder(&buf) + enc.SetEscapeHTML(false) + enc.SetIndent(" ", " ") + enc.Encode(o) + + fmt.Fprint(m.w, " ") + fmt.Fprint(m.w, buf.String()[:buf.Len()-1]) +} + +func (m *Module) Finalize() { + if m.disabled() { + return + } + if m.once { + fmt.Fprintln(m.w, "\n]") + } + m.w.Close() +} + +func (m *Module) disabled() bool { + return m.Output.Format != "json" && m.Output.Format != "" +} + +type output struct { + URL string `json:"url,omitempty"` + Data any `json:"data,omitempty"` + Error string `json:"error,omitempty"` + Timestamp time.Time `json:"timestamp,omitempty"` +} + +type nopCloser struct { + io.Writer +} + +func (c nopCloser) Write(p []byte) (n int, err error) { + return c.Writer.Write(p) +} + +func (c nopCloser) Close() error { + return nil +} + +var ( + _ flyscrape.Provisioner = (*Module)(nil) + _ flyscrape.ResponseReceiver = (*Module)(nil) + _ flyscrape.Finalizer = (*Module)(nil) +) diff --git a/modules/jsonprint/jsonprint.go b/modules/output/ndjson/ndjson.go index c40a8b9..956b2ed 100644 --- a/modules/jsonprint/jsonprint.go +++ b/modules/output/ndjson/ndjson.go @@ -2,10 +2,13 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at http://mozilla.org/MPL/2.0/. -package jsonprint +package ndjson import ( - "fmt" + "encoding/json" + "io" + "log" + "os" "time" "github.com/philippta/flyscrape" @@ -16,26 +19,46 @@ func init() { } type Module struct { - once bool + Output struct { + Format string `json:"format"` + File string `json:"file"` + } `json:"output"` + + w io.WriteCloser } func (Module) ModuleInfo() flyscrape.ModuleInfo { return flyscrape.ModuleInfo{ - ID: "jsonprint", + ID: "output.ndjson", New: func() flyscrape.Module { return new(Module) }, } } +func (m *Module) Provision(ctx flyscrape.Context) { + if m.disabled() { + return + } + + if m.Output.File == "" { + m.w = nopCloser{os.Stdout} + return + } + + f, err := os.Create(m.Output.File) + if err != nil { + log.Printf("failed to create file %q: %v", m.Output.File, err) + os.Exit(1) + } + m.w = f +} + func (m *Module) ReceiveResponse(resp *flyscrape.Response) { - if resp.Error == nil && resp.Data == nil { + if m.disabled() { return } - if !m.once { - fmt.Println("[") - m.once = true - } else { - fmt.Println(",") + if resp.Error == nil && resp.Data == nil { + return } o := output{ @@ -47,13 +70,20 @@ func (m *Module) ReceiveResponse(resp *flyscrape.Response) { o.Error = resp.Error.Error() } - fmt.Print(flyscrape.Prettify(o, " ")) + enc := json.NewEncoder(m.w) + enc.SetEscapeHTML(false) + enc.Encode(o) } func (m *Module) Finalize() { - if m.once { - fmt.Println("\n]") + if m.disabled() { + return } + m.w.Close() +} + +func (m *Module) disabled() bool { + return m.Output.Format != "ndjson" } type output struct { @@ -63,7 +93,20 @@ type output struct { Timestamp time.Time `json:"timestamp,omitempty"` } +type nopCloser struct { + io.Writer +} + +func (c nopCloser) Write(p []byte) (n int, err error) { + return c.Writer.Write(p) +} + +func (c nopCloser) Close() error { + return nil +} + var ( + _ flyscrape.Provisioner = (*Module)(nil) _ flyscrape.ResponseReceiver = (*Module)(nil) _ flyscrape.Finalizer = (*Module)(nil) ) |