zippy: add test

author: Martin Czygan <martin.czygan@gmail.com> 2021-06-30 21:42:48 +0200
committer: Martin Czygan <martin.czygan@gmail.com> 2021-06-30 21:42:48 +0200
commit: 4e85737c1268a62dd40bc39a3f5016462f591d2a (patch)
tree: 2a0d62f353655815d1ecd082f3ca806516a9695c /skate/zippy.go
parent: eb71aa4b05c1e02d2e125b9a5d16adc23ee71560 (diff)
download: refcat-4e85737c1268a62dd40bc39a3f5016462f591d2a.tar.gz
refcat-4e85737c1268a62dd40bc39a3f5016462f591d2a.zip
1 files changed, 13 insertions, 13 deletions
diff --git a/skate/zippy.go b/skate/zippy.go
index 69f4473..febd4c5 100644
--- a/skate/zippy.go
+++ b/skate/zippy.go
@@ -349,12 +349,12 @@ func ZippyBrefAugment(bref, raw io.Reader, w io.Writer) error {
 				}
 				refs[i] = &ref
 			}
+			// TODO: this slows down this process; be a bit smarter about slices.
 			matched = matchedRefsExtend(matched, refs, &stats)
 			// At this point, we may have duplicates by "_id", e.g. source
 			// release ident and ref index (example:
 			// 4kg2dejsgzaf3cszs2lt5hz4by_9, which appears three times, one
 			// exact match, and twice unmatched).
-			// TODO: remove duplicates
 			matched = deduplicateBrefs(matched)
 			matched = removeSelfLinks(matched)
 			for _, bref := range matched {
@@ -372,8 +372,8 @@ func ZippyBrefAugment(bref, raw io.Reader, w io.Writer) error {
 	return err
 }
 
-// removeSelfLinks removes self-referential links. Those should be caught
-// earlier at the root cause later.
+// removeSelfLinks removes self-referential links. TODO: Those should be caught
+// at the root cause.
 func removeSelfLinks(brefs []*BiblioRef) (result []*BiblioRef) {
 	for _, bref := range brefs {
 		if bref.SourceReleaseIdent == bref.TargetReleaseIdent {
@@ -418,15 +418,17 @@ func deduplicateBrefs(brefs []*BiblioRef) []*BiblioRef {
 // did not result in a match (determined by e.g. ref key and index).  XXX: We
 // may have duplicate refs as well - how to distinguish them?
 func matchedRefsExtend(matched []*BiblioRef, refs []*Ref, stats *statsAugment) []*BiblioRef {
-	s := set.New() // store key + index of matched items
+	seen := set.New() // store "key + index" of matched items
 	for _, m := range matched {
-		s.Add(m.RefKey + fmt.Sprintf("%d", m.RefIndex))
+		s := m.RefKey + fmt.Sprintf("%d", m.RefIndex)
+		seen.Add(s)
 	}
 	for _, r := range refs {
-		if s.Contains(r.Key + fmt.Sprintf("%d", r.Index)) {
+		s := r.Key + fmt.Sprintf("%d", r.Index)
+		if seen.Contains(s) {
 			stats.skipMatchedRef++
-			log.Printf("skip-matched-ref [%d]: from %d matches; ident=%v, title=%s, key=%v, index=%v",
-				stats.skipMatchedRef, len(matched), r.ReleaseIdent, r.Biblio.Title, r.Key, fmt.Sprintf("%d", r.Index))
+			log.Printf("skip-matched-ref [%d]: from %d matches; ident=%v, title=%s, key=%v, index=%d",
+				stats.skipMatchedRef, len(matched), r.ReleaseIdent, r.Biblio.Title, r.Key, r.Index)
 			continue
 		}
 		var bref BiblioRef
@@ -442,7 +444,6 @@ func matchedRefsExtend(matched []*BiblioRef, refs []*Ref, stats *statsAugment) [
 		// Reuse fields for debugging, for now.
 		bref.MatchStatus = StatusUnmatched.Short()
 		bref.MatchReason = ReasonUnknown.Short()
-		// bref.Extra.Ref = *r
 		matched = append(matched, &bref)
 	}
 	return matched
@@ -450,7 +451,7 @@ func matchedRefsExtend(matched []*BiblioRef, refs []*Ref, stats *statsAugment) [
 
 // uniqueMatches takes a list of bref docs (unserialized) and will return a
 // list of deserialized bref docs, containing unique matches only (e.g. filter
-// out things duplicate matches, e.g. from exact and fuzzy). We are including
+// out duplicate matches, e.g. from exact and fuzzy). We are including
 // "skate-bref-id" post-processing here as well (but there is surely a better
 // place for that).
 func uniqueMatches(docs []string, stats *statsAugment) (result []*BiblioRef, err error) {
@@ -535,11 +536,10 @@ func FindByPrefix(ss []string, prefix string) string {
 // column from fields separated by sep; column is 1-indexed.
 func makeKeyFunc(sep string, column int) func(string) (string, error) {
 	return func(s string) (string, error) {
-		if k := CutSep(s, sep, column); k == "" {
-			return k, fmt.Errorf("cannot get key from column %d in line (len=%d): %s", column, len(s), s)
-		} else {
+		if k := CutSep(s, sep, column); k != "" {
 			return k, nil
 		}
+		return "", fmt.Errorf("cannot get key from column %d in line (len=%d): %s", column, len(s), s)
 	}
 }
author	Martin Czygan <martin.czygan@gmail.com>	2021-06-30 21:42:48 +0200
committer	Martin Czygan <martin.czygan@gmail.com>	2021-06-30 21:42:48 +0200
commit	4e85737c1268a62dd40bc39a3f5016462f591d2a (patch)
tree	2a0d62f353655815d1ecd082f3ca806516a9695c /skate/zippy.go
parent	eb71aa4b05c1e02d2e125b9a5d16adc23ee71560 (diff)
download	refcat-4e85737c1268a62dd40bc39a3f5016462f591d2a.tar.gz refcat-4e85737c1268a62dd40bc39a3f5016462f591d2a.zip