aboutsummaryrefslogtreecommitdiffstats
path: root/skate/zippy.go
diff options
context:
space:
mode:
Diffstat (limited to 'skate/zippy.go')
-rw-r--r--skate/zippy.go48
1 files changed, 45 insertions, 3 deletions
diff --git a/skate/zippy.go b/skate/zippy.go
index c949069..7aff7a6 100644
--- a/skate/zippy.go
+++ b/skate/zippy.go
@@ -319,7 +319,7 @@ func ZippyVerifyRefsOpenLibrary(olr, refs io.Reader, w io.Writer) error {
// We can identify, which docs have been matched by checking the ref key and index.
func ZippyBrefAugment(bref, raw io.Reader, w io.Writer) error {
var (
- _ = json.NewEncoder(w)
+ enc = json.NewEncoder(w)
keyer = makeKeyFunc("\t", 1)
grouper = func(g *zipkey.Group) error {
// g.G0 contains a matched docs for a given work id, g.G1 all raw
@@ -327,13 +327,26 @@ func ZippyBrefAugment(bref, raw io.Reader, w io.Writer) error {
// First, iterate over all matches and sort out duplicates, e.g.
// docs that have the same source and target id.
- unique, err := uniqueMatches(g.G0)
+ matched, err := uniqueMatches(g.G0)
if err != nil {
return err
}
+ var refs = make([]*Ref, len(g.G1))
+ for i := 0; i < len(refs); i++ {
+ var ref Ref
+ if err := json.Unmarshal([]byte(g.G1[i]), &ref); err != nil {
+ return err
+ }
+ refs[i] = &ref
+ }
+ matchedRefsExtend(matched, refs)
+ for _, bref := range matched {
+ if err := enc.Encode(bref); err != nil {
+ return err
+ }
+ }
// We want to find all items in g.G1, which are not in unique. This
// is a set like operation, but we want a custom comparator.
- log.Println(unique)
return nil
}
@@ -342,6 +355,35 @@ func ZippyBrefAugment(bref, raw io.Reader, w io.Writer) error {
return zipper.Run()
}
+// matchedRefsExtend takes a set of (unique) biblioref docs and will emit that
+// set of biblioref docs (unchanged) plus raw references as biblioref, which
+// did not result in a match (determined by ref key and index).
+func matchedRefsExtend(matched []*BiblioRef, refs []*Ref) {
+ s := set.New() // store key + index of matched items
+ for _, m := range matched {
+ s.Add(m.Key + fmt.Sprintf("%d", m.RefIndex))
+ }
+ for _, r := range refs {
+ if s.Contains(r.Key + fmt.Sprintf("%d", r.Index)) {
+ continue
+ }
+ var bref BiblioRef
+ bref.Key = fmt.Sprintf("%s_%d", r.ReleaseIdent, r.Index)
+ bref.RefIndex = r.Index
+ bref.RefKey = r.Key
+ bref.SourceReleaseIdent = r.ReleaseIdent
+ bref.SourceReleaseStage = r.ReleaseStage
+ bref.SourceWorkIdent = r.WorkIdent
+ bref.SourceYear = fmt.Sprintf("%d", r.ReleaseYear)
+ bref.TargetUnstructured = r.Biblio.Unstructured
+ // Reuse fields for debugging, for now.
+ bref.MatchStatus = StatusUnmatched.Short()
+ bref.MatchReason = ReasonUnknown.Short()
+ matched = append(matched, &bref)
+ }
+ return
+}
+
// uniqueMatches takes a list of bref docs (unserialized) and will return a
// list of serialized bref docs, containing unique matches.
func uniqueMatches(docs []string) (result []*BiblioRef, err error) {