diff options
Diffstat (limited to 'skate/zippy.go')
-rw-r--r-- | skate/zippy.go | 48 |
1 files changed, 45 insertions, 3 deletions
diff --git a/skate/zippy.go b/skate/zippy.go index c949069..7aff7a6 100644 --- a/skate/zippy.go +++ b/skate/zippy.go @@ -319,7 +319,7 @@ func ZippyVerifyRefsOpenLibrary(olr, refs io.Reader, w io.Writer) error { // We can identify, which docs have been matched by checking the ref key and index. func ZippyBrefAugment(bref, raw io.Reader, w io.Writer) error { var ( - _ = json.NewEncoder(w) + enc = json.NewEncoder(w) keyer = makeKeyFunc("\t", 1) grouper = func(g *zipkey.Group) error { // g.G0 contains a matched docs for a given work id, g.G1 all raw @@ -327,13 +327,26 @@ func ZippyBrefAugment(bref, raw io.Reader, w io.Writer) error { // First, iterate over all matches and sort out duplicates, e.g. // docs that have the same source and target id. - unique, err := uniqueMatches(g.G0) + matched, err := uniqueMatches(g.G0) if err != nil { return err } + var refs = make([]*Ref, len(g.G1)) + for i := 0; i < len(refs); i++ { + var ref Ref + if err := json.Unmarshal([]byte(g.G1[i]), &ref); err != nil { + return err + } + refs[i] = &ref + } + matchedRefsExtend(matched, refs) + for _, bref := range matched { + if err := enc.Encode(bref); err != nil { + return err + } + } // We want to find all items in g.G1, which are not in unique. This // is a set like operation, but we want a custom comparator. - log.Println(unique) return nil } @@ -342,6 +355,35 @@ func ZippyBrefAugment(bref, raw io.Reader, w io.Writer) error { return zipper.Run() } +// matchedRefsExtend takes a set of (unique) biblioref docs and will emit that +// set of biblioref docs (unchanged) plus raw references as biblioref, which +// did not result in a match (determined by ref key and index). +func matchedRefsExtend(matched []*BiblioRef, refs []*Ref) { + s := set.New() // store key + index of matched items + for _, m := range matched { + s.Add(m.Key + fmt.Sprintf("%d", m.RefIndex)) + } + for _, r := range refs { + if s.Contains(r.Key + fmt.Sprintf("%d", r.Index)) { + continue + } + var bref BiblioRef + bref.Key = fmt.Sprintf("%s_%d", r.ReleaseIdent, r.Index) + bref.RefIndex = r.Index + bref.RefKey = r.Key + bref.SourceReleaseIdent = r.ReleaseIdent + bref.SourceReleaseStage = r.ReleaseStage + bref.SourceWorkIdent = r.WorkIdent + bref.SourceYear = fmt.Sprintf("%d", r.ReleaseYear) + bref.TargetUnstructured = r.Biblio.Unstructured + // Reuse fields for debugging, for now. + bref.MatchStatus = StatusUnmatched.Short() + bref.MatchReason = ReasonUnknown.Short() + matched = append(matched, &bref) + } + return +} + // uniqueMatches takes a list of bref docs (unserialized) and will return a // list of serialized bref docs, containing unique matches. func uniqueMatches(docs []string) (result []*BiblioRef, err error) { |