aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--skate/zippy.go20
1 files changed, 17 insertions, 3 deletions
diff --git a/skate/zippy.go b/skate/zippy.go
index 37fca4b..5767b34 100644
--- a/skate/zippy.go
+++ b/skate/zippy.go
@@ -1,7 +1,13 @@
// This file contains various "reducers", e.g. working on two data streams and
// applying a function on groups of documents with a shared key.
//
-// Note: A bit repetitive, but not want to introduce any other abstraction.
+// Note: This is a bit repetitive, but not want to introduce any other
+// abstraction for now. Since most of the logic is in the grouper functions, we
+// could make them top level and then assemble the zipkey runner on the fly.
+//
+// The most confusing aspect currently is the variety of schemas hidden within
+// the readers (and string groups): release, ref, ref-as-release, open library,
+// wikipedia, ...
package skate
import (
@@ -14,10 +20,10 @@ import (
json "github.com/segmentio/encoding/json"
)
-// groupLogf logs a message alongsize a serialized group.
+// groupLogf logs a message alongsize a serialized group for debugging.
func groupLogf(g *zipkey.Group, s string, vs ...interface{}) {
log.Printf(s, vs...)
- b, _ := json.Marshal(g)
+ b, _ := json.MarshalIndent(g, "", " ")
log.Println(string(b))
}
@@ -305,6 +311,14 @@ func ZippyVerifyRefsOpenLibrary(olr, refs io.Reader, w io.Writer) error {
return zipper.Run()
}
+// ZippyBrefAugment takes all matched elements from bref and adds docs from raw
+// refs, which have not been matched. It also gets rid of duplicate matches.
+//
+// We can identify, which docs have been matched by checking the ref key and index.
+func ZippyBrefAugment(bref, raw io.Reader, w io.Writer) error {
+ return nil
+}
+
// Cut returns a specific column (1-indexed, like CutSep) from a tabular
// file, returns empty string if column is invalid.
func Cut(line string, column int) string {