diff options
-rw-r--r-- | skate/zippy.go | 20 |
1 files changed, 17 insertions, 3 deletions
diff --git a/skate/zippy.go b/skate/zippy.go index 37fca4b..5767b34 100644 --- a/skate/zippy.go +++ b/skate/zippy.go @@ -1,7 +1,13 @@ // This file contains various "reducers", e.g. working on two data streams and // applying a function on groups of documents with a shared key. // -// Note: A bit repetitive, but not want to introduce any other abstraction. +// Note: This is a bit repetitive, but not want to introduce any other +// abstraction for now. Since most of the logic is in the grouper functions, we +// could make them top level and then assemble the zipkey runner on the fly. +// +// The most confusing aspect currently is the variety of schemas hidden within +// the readers (and string groups): release, ref, ref-as-release, open library, +// wikipedia, ... package skate import ( @@ -14,10 +20,10 @@ import ( json "github.com/segmentio/encoding/json" ) -// groupLogf logs a message alongsize a serialized group. +// groupLogf logs a message alongsize a serialized group for debugging. func groupLogf(g *zipkey.Group, s string, vs ...interface{}) { log.Printf(s, vs...) - b, _ := json.Marshal(g) + b, _ := json.MarshalIndent(g, "", " ") log.Println(string(b)) } @@ -305,6 +311,14 @@ func ZippyVerifyRefsOpenLibrary(olr, refs io.Reader, w io.Writer) error { return zipper.Run() } +// ZippyBrefAugment takes all matched elements from bref and adds docs from raw +// refs, which have not been matched. It also gets rid of duplicate matches. +// +// We can identify, which docs have been matched by checking the ref key and index. +func ZippyBrefAugment(bref, raw io.Reader, w io.Writer) error { + return nil +} + // Cut returns a specific column (1-indexed, like CutSep) from a tabular // file, returns empty string if column is invalid. func Cut(line string, column int) string { |