From 25c10e412df5775d101c0c07752cf826e0850934 Mon Sep 17 00:00:00 2001 From: Martin Czygan Date: Tue, 8 Jun 2021 22:27:03 +0200 Subject: reduce: add rere mode --- skate/cmd/skate-reduce/main.go | 17 ++++++++++++++ skate/zippy.go | 50 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) diff --git a/skate/cmd/skate-reduce/main.go b/skate/cmd/skate-reduce/main.go index c0ef9fc..098240a 100644 --- a/skate/cmd/skate-reduce/main.go +++ b/skate/cmd/skate-reduce/main.go @@ -44,6 +44,11 @@ // * oled | zippy mode for releases and OL inputs, emit biblioref. // | // | $ skate-reduce -m oled -F a.ndj -O b.ndj +// | +// | +// * rere | zippy mode for OL release and ref (as release) inputs, emit biblioref. +// | +// | $ skate-reduce -m rere -O a.ndj -F b.ndj // package main @@ -159,6 +164,18 @@ func main() { if err := skate.ZippyVerifyRefsOpenLibrary(o, f, bw); err != nil { log.Fatal(err) } + case "rere": + o, f, err := xio.OpenTwo(*releases, *refs) + if err != nil { + log.Fatal(err) + } + r, ok := reasonMap[*reason] + if !ok { + log.Fatalf("unknown reason: %v", *reason) + } + if err := skate.ZippyExactReleases(o, f, r, bw); err != nil { + log.Fatal(err) + } default: log.Fatalf("invalid mode") } diff --git a/skate/zippy.go b/skate/zippy.go index 7ee12cf..793fe4a 100644 --- a/skate/zippy.go +++ b/skate/zippy.go @@ -73,6 +73,56 @@ func ZippyExact(releases, refs io.Reader, matchResult MatchResult, w io.Writer) return zipper.Run() } +// ZippyExactReleases takes two release readers (key, doc) and assigns a fixed +// match result. +func ZippyExactReleases(olReader, reReader io.Reader, matchResult MatchResult, w io.Writer) error { + var ( + enc = json.NewEncoder(w) + keyer = makeKeyFunc("\t", 1) + i = 0 + grouper = func(g *zipkey.Group) error { + i++ + if i%10000 == 0 { + log.Printf("processed %v groups", i) + } + var ( + target, re *Release + err error + ) + if len(g.G0) == 0 || len(g.G1) == 0 { + return nil + } + if target, err = stringToRelease(Cut(g.G0[0], 2)); err != nil { + groupLogf(g, "[skip] failed to parse release: %v", err) + return nil + } + for _, line := range g.G1 { + if re, err = stringToRelease(Cut(line, 2)); err != nil { + groupLogf(g, "[skip] failed to parse release: %v", err) + continue + } + var bref BiblioRef + bref.SourceReleaseIdent = re.Ident + bref.SourceWorkIdent = re.WorkID + bref.SourceReleaseStage = re.ReleaseStage + bref.SourceYear = fmt.Sprintf("%d", re.ReleaseYear()) + bref.RefIndex = re.Extra.Skate.Ref.Index + 1 // we want 1-index (also helps with omitempty) + bref.RefKey = re.Extra.Skate.Ref.Key + bref.TargetOpenLibraryWork = target.WorkID + bref.MatchProvenance = re.Extra.Skate.Ref.Source + bref.MatchStatus = matchResult.Status.Short() + bref.MatchReason = matchResult.Reason.Short() + if err := enc.Encode(bref); err != nil { + return err + } + } + return nil + } + ) + zipper := zipkey.New(olReader, reReader, keyer, grouper) + return zipper.Run() +} + // ZippyExactWiki takes a release and wiki reader (key, doc) and assigns a // fixed match result. func ZippyExactWiki(releases, wiki io.Reader, mr MatchResult, w io.Writer) error { -- cgit v1.2.3