aboutsummaryrefslogtreecommitdiffstats
path: root/skate/zippy.go
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2021-06-30 21:42:48 +0200
committerMartin Czygan <martin.czygan@gmail.com>2021-06-30 21:42:48 +0200
commit4e85737c1268a62dd40bc39a3f5016462f591d2a (patch)
tree2a0d62f353655815d1ecd082f3ca806516a9695c /skate/zippy.go
parenteb71aa4b05c1e02d2e125b9a5d16adc23ee71560 (diff)
downloadrefcat-4e85737c1268a62dd40bc39a3f5016462f591d2a.tar.gz
refcat-4e85737c1268a62dd40bc39a3f5016462f591d2a.zip
zippy: add test
Diffstat (limited to 'skate/zippy.go')
-rw-r--r--skate/zippy.go26
1 files changed, 13 insertions, 13 deletions
diff --git a/skate/zippy.go b/skate/zippy.go
index 69f4473..febd4c5 100644
--- a/skate/zippy.go
+++ b/skate/zippy.go
@@ -349,12 +349,12 @@ func ZippyBrefAugment(bref, raw io.Reader, w io.Writer) error {
}
refs[i] = &ref
}
+ // TODO: this slows down this process; be a bit smarter about slices.
matched = matchedRefsExtend(matched, refs, &stats)
// At this point, we may have duplicates by "_id", e.g. source
// release ident and ref index (example:
// 4kg2dejsgzaf3cszs2lt5hz4by_9, which appears three times, one
// exact match, and twice unmatched).
- // TODO: remove duplicates
matched = deduplicateBrefs(matched)
matched = removeSelfLinks(matched)
for _, bref := range matched {
@@ -372,8 +372,8 @@ func ZippyBrefAugment(bref, raw io.Reader, w io.Writer) error {
return err
}
-// removeSelfLinks removes self-referential links. Those should be caught
-// earlier at the root cause later.
+// removeSelfLinks removes self-referential links. TODO: Those should be caught
+// at the root cause.
func removeSelfLinks(brefs []*BiblioRef) (result []*BiblioRef) {
for _, bref := range brefs {
if bref.SourceReleaseIdent == bref.TargetReleaseIdent {
@@ -418,15 +418,17 @@ func deduplicateBrefs(brefs []*BiblioRef) []*BiblioRef {
// did not result in a match (determined by e.g. ref key and index). XXX: We
// may have duplicate refs as well - how to distinguish them?
func matchedRefsExtend(matched []*BiblioRef, refs []*Ref, stats *statsAugment) []*BiblioRef {
- s := set.New() // store key + index of matched items
+ seen := set.New() // store "key + index" of matched items
for _, m := range matched {
- s.Add(m.RefKey + fmt.Sprintf("%d", m.RefIndex))
+ s := m.RefKey + fmt.Sprintf("%d", m.RefIndex)
+ seen.Add(s)
}
for _, r := range refs {
- if s.Contains(r.Key + fmt.Sprintf("%d", r.Index)) {
+ s := r.Key + fmt.Sprintf("%d", r.Index)
+ if seen.Contains(s) {
stats.skipMatchedRef++
- log.Printf("skip-matched-ref [%d]: from %d matches; ident=%v, title=%s, key=%v, index=%v",
- stats.skipMatchedRef, len(matched), r.ReleaseIdent, r.Biblio.Title, r.Key, fmt.Sprintf("%d", r.Index))
+ log.Printf("skip-matched-ref [%d]: from %d matches; ident=%v, title=%s, key=%v, index=%d",
+ stats.skipMatchedRef, len(matched), r.ReleaseIdent, r.Biblio.Title, r.Key, r.Index)
continue
}
var bref BiblioRef
@@ -442,7 +444,6 @@ func matchedRefsExtend(matched []*BiblioRef, refs []*Ref, stats *statsAugment) [
// Reuse fields for debugging, for now.
bref.MatchStatus = StatusUnmatched.Short()
bref.MatchReason = ReasonUnknown.Short()
- // bref.Extra.Ref = *r
matched = append(matched, &bref)
}
return matched
@@ -450,7 +451,7 @@ func matchedRefsExtend(matched []*BiblioRef, refs []*Ref, stats *statsAugment) [
// uniqueMatches takes a list of bref docs (unserialized) and will return a
// list of deserialized bref docs, containing unique matches only (e.g. filter
-// out things duplicate matches, e.g. from exact and fuzzy). We are including
+// out duplicate matches, e.g. from exact and fuzzy). We are including
// "skate-bref-id" post-processing here as well (but there is surely a better
// place for that).
func uniqueMatches(docs []string, stats *statsAugment) (result []*BiblioRef, err error) {
@@ -535,11 +536,10 @@ func FindByPrefix(ss []string, prefix string) string {
// column from fields separated by sep; column is 1-indexed.
func makeKeyFunc(sep string, column int) func(string) (string, error) {
return func(s string) (string, error) {
- if k := CutSep(s, sep, column); k == "" {
- return k, fmt.Errorf("cannot get key from column %d in line (len=%d): %s", column, len(s), s)
- } else {
+ if k := CutSep(s, sep, column); k != "" {
return k, nil
}
+ return "", fmt.Errorf("cannot get key from column %d in line (len=%d): %s", column, len(s), s)
}
}