aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--skate/cmd/skate-reduce/main.go17
-rw-r--r--skate/zippy.go50
2 files changed, 67 insertions, 0 deletions
diff --git a/skate/cmd/skate-reduce/main.go b/skate/cmd/skate-reduce/main.go
index c0ef9fc..098240a 100644
--- a/skate/cmd/skate-reduce/main.go
+++ b/skate/cmd/skate-reduce/main.go
@@ -44,6 +44,11 @@
// * oled | zippy mode for releases and OL inputs, emit biblioref.
// |
// | $ skate-reduce -m oled -F a.ndj -O b.ndj
+// |
+// |
+// * rere | zippy mode for OL release and ref (as release) inputs, emit biblioref.
+// |
+// | $ skate-reduce -m rere -O a.ndj -F b.ndj
//
package main
@@ -159,6 +164,18 @@ func main() {
if err := skate.ZippyVerifyRefsOpenLibrary(o, f, bw); err != nil {
log.Fatal(err)
}
+ case "rere":
+ o, f, err := xio.OpenTwo(*releases, *refs)
+ if err != nil {
+ log.Fatal(err)
+ }
+ r, ok := reasonMap[*reason]
+ if !ok {
+ log.Fatalf("unknown reason: %v", *reason)
+ }
+ if err := skate.ZippyExactReleases(o, f, r, bw); err != nil {
+ log.Fatal(err)
+ }
default:
log.Fatalf("invalid mode")
}
diff --git a/skate/zippy.go b/skate/zippy.go
index 7ee12cf..793fe4a 100644
--- a/skate/zippy.go
+++ b/skate/zippy.go
@@ -73,6 +73,56 @@ func ZippyExact(releases, refs io.Reader, matchResult MatchResult, w io.Writer)
return zipper.Run()
}
+// ZippyExactReleases takes two release readers (key, doc) and assigns a fixed
+// match result.
+func ZippyExactReleases(olReader, reReader io.Reader, matchResult MatchResult, w io.Writer) error {
+ var (
+ enc = json.NewEncoder(w)
+ keyer = makeKeyFunc("\t", 1)
+ i = 0
+ grouper = func(g *zipkey.Group) error {
+ i++
+ if i%10000 == 0 {
+ log.Printf("processed %v groups", i)
+ }
+ var (
+ target, re *Release
+ err error
+ )
+ if len(g.G0) == 0 || len(g.G1) == 0 {
+ return nil
+ }
+ if target, err = stringToRelease(Cut(g.G0[0], 2)); err != nil {
+ groupLogf(g, "[skip] failed to parse release: %v", err)
+ return nil
+ }
+ for _, line := range g.G1 {
+ if re, err = stringToRelease(Cut(line, 2)); err != nil {
+ groupLogf(g, "[skip] failed to parse release: %v", err)
+ continue
+ }
+ var bref BiblioRef
+ bref.SourceReleaseIdent = re.Ident
+ bref.SourceWorkIdent = re.WorkID
+ bref.SourceReleaseStage = re.ReleaseStage
+ bref.SourceYear = fmt.Sprintf("%d", re.ReleaseYear())
+ bref.RefIndex = re.Extra.Skate.Ref.Index + 1 // we want 1-index (also helps with omitempty)
+ bref.RefKey = re.Extra.Skate.Ref.Key
+ bref.TargetOpenLibraryWork = target.WorkID
+ bref.MatchProvenance = re.Extra.Skate.Ref.Source
+ bref.MatchStatus = matchResult.Status.Short()
+ bref.MatchReason = matchResult.Reason.Short()
+ if err := enc.Encode(bref); err != nil {
+ return err
+ }
+ }
+ return nil
+ }
+ )
+ zipper := zipkey.New(olReader, reReader, keyer, grouper)
+ return zipper.Run()
+}
+
// ZippyExactWiki takes a release and wiki reader (key, doc) and assigns a
// fixed match result.
func ZippyExactWiki(releases, wiki io.Reader, mr MatchResult, w io.Writer) error {