aboutsummaryrefslogtreecommitdiffstats
path: root/skate
diff options
context:
space:
mode:
Diffstat (limited to 'skate')
-rw-r--r--skate/zippy.go59
1 files changed, 58 insertions, 1 deletions
diff --git a/skate/zippy.go b/skate/zippy.go
index 5767b34..bd7bf70 100644
--- a/skate/zippy.go
+++ b/skate/zippy.go
@@ -14,8 +14,10 @@ import (
"fmt"
"io"
"log"
+ "sort"
"strings"
+ "git.archive.org/martin/cgraph/skate/set"
"git.archive.org/martin/cgraph/skate/zipkey"
json "github.com/segmentio/encoding/json"
)
@@ -316,7 +318,57 @@ func ZippyVerifyRefsOpenLibrary(olr, refs io.Reader, w io.Writer) error {
//
// We can identify, which docs have been matched by checking the ref key and index.
func ZippyBrefAugment(bref, raw io.Reader, w io.Writer) error {
- return nil
+ var (
+ enc = json.NewEncoder(w)
+ keyer = makeKeyFunc("\t", 1)
+ grouper = func(g *zipkey.Group) error {
+ // g.G0 contains a matched docs for a given work id, g.G1 all raw
+ // refs, with the same work id.
+
+ // First, iterate over all matches and sort out duplicates, e.g.
+ // docs that have the same source and target id.
+ uniqueBref, err := uniqueMatches(g.G0)
+ if err != nil {
+ return err
+ }
+ return nil
+ }
+ )
+ zipper := zipkey.New(bref, raw, keyer, grouper)
+ return zipper.Run()
+}
+
+// uniqueMatches takes a list of bref docs (unserialized) and will return a
+// list of serialized bref docs, containing unique matches.
+func uniqueMatches(docs []string) (result []*BiblioRef, err error) {
+ var (
+ brefs []*BiblioRef
+ bref BiblioRef
+ )
+ for _, doc := range docs {
+ if err := json.Unmarshal([]byte(doc), &bref); err != nil {
+ return nil, err
+ }
+ brefs = append(brefs, &bref)
+ }
+ // Make sure we exact matches come first.
+ sort.Slice(brefs, func(i, j int) bool {
+ return brefs[i].MatchStatus != StatusExact.Short()
+ })
+ // We consider a match unique, if source and target match.
+ hash := func(bref *BiblioRef) string {
+ return bref.SourceReleaseIdent + bref.TargetReleaseIdent
+ }
+ seen := set.New()
+ for _, doc := range brefs {
+ v := hash(doc)
+ if seen.Contains(v) {
+ continue
+ }
+ seen.Add(v)
+ result = append(result, doc)
+ }
+ return result, nil
}
// Cut returns a specific column (1-indexed, like CutSep) from a tabular
@@ -371,3 +423,8 @@ func stringToWiki(s string) (r *MinimalCitations, err error) {
err = json.Unmarshal([]byte(s), &r)
return
}
+
+func stringToBiblioref(s string) (r *BiblioRef, err error) {
+ err = json.Unmarshal([]byte(s), &r)
+ return
+}