diff options
Diffstat (limited to 'skate')
-rw-r--r-- | skate/zippy.go | 26 | ||||
-rw-r--r-- | skate/zippy_test.go | 35 |
2 files changed, 36 insertions, 25 deletions
diff --git a/skate/zippy.go b/skate/zippy.go index 69f4473..febd4c5 100644 --- a/skate/zippy.go +++ b/skate/zippy.go @@ -349,12 +349,12 @@ func ZippyBrefAugment(bref, raw io.Reader, w io.Writer) error { } refs[i] = &ref } + // TODO: this slows down this process; be a bit smarter about slices. matched = matchedRefsExtend(matched, refs, &stats) // At this point, we may have duplicates by "_id", e.g. source // release ident and ref index (example: // 4kg2dejsgzaf3cszs2lt5hz4by_9, which appears three times, one // exact match, and twice unmatched). - // TODO: remove duplicates matched = deduplicateBrefs(matched) matched = removeSelfLinks(matched) for _, bref := range matched { @@ -372,8 +372,8 @@ func ZippyBrefAugment(bref, raw io.Reader, w io.Writer) error { return err } -// removeSelfLinks removes self-referential links. Those should be caught -// earlier at the root cause later. +// removeSelfLinks removes self-referential links. TODO: Those should be caught +// at the root cause. func removeSelfLinks(brefs []*BiblioRef) (result []*BiblioRef) { for _, bref := range brefs { if bref.SourceReleaseIdent == bref.TargetReleaseIdent { @@ -418,15 +418,17 @@ func deduplicateBrefs(brefs []*BiblioRef) []*BiblioRef { // did not result in a match (determined by e.g. ref key and index). XXX: We // may have duplicate refs as well - how to distinguish them? func matchedRefsExtend(matched []*BiblioRef, refs []*Ref, stats *statsAugment) []*BiblioRef { - s := set.New() // store key + index of matched items + seen := set.New() // store "key + index" of matched items for _, m := range matched { - s.Add(m.RefKey + fmt.Sprintf("%d", m.RefIndex)) + s := m.RefKey + fmt.Sprintf("%d", m.RefIndex) + seen.Add(s) } for _, r := range refs { - if s.Contains(r.Key + fmt.Sprintf("%d", r.Index)) { + s := r.Key + fmt.Sprintf("%d", r.Index) + if seen.Contains(s) { stats.skipMatchedRef++ - log.Printf("skip-matched-ref [%d]: from %d matches; ident=%v, title=%s, key=%v, index=%v", - stats.skipMatchedRef, len(matched), r.ReleaseIdent, r.Biblio.Title, r.Key, fmt.Sprintf("%d", r.Index)) + log.Printf("skip-matched-ref [%d]: from %d matches; ident=%v, title=%s, key=%v, index=%d", + stats.skipMatchedRef, len(matched), r.ReleaseIdent, r.Biblio.Title, r.Key, r.Index) continue } var bref BiblioRef @@ -442,7 +444,6 @@ func matchedRefsExtend(matched []*BiblioRef, refs []*Ref, stats *statsAugment) [ // Reuse fields for debugging, for now. bref.MatchStatus = StatusUnmatched.Short() bref.MatchReason = ReasonUnknown.Short() - // bref.Extra.Ref = *r matched = append(matched, &bref) } return matched @@ -450,7 +451,7 @@ func matchedRefsExtend(matched []*BiblioRef, refs []*Ref, stats *statsAugment) [ // uniqueMatches takes a list of bref docs (unserialized) and will return a // list of deserialized bref docs, containing unique matches only (e.g. filter -// out things duplicate matches, e.g. from exact and fuzzy). We are including +// out duplicate matches, e.g. from exact and fuzzy). We are including // "skate-bref-id" post-processing here as well (but there is surely a better // place for that). func uniqueMatches(docs []string, stats *statsAugment) (result []*BiblioRef, err error) { @@ -535,11 +536,10 @@ func FindByPrefix(ss []string, prefix string) string { // column from fields separated by sep; column is 1-indexed. func makeKeyFunc(sep string, column int) func(string) (string, error) { return func(s string) (string, error) { - if k := CutSep(s, sep, column); k == "" { - return k, fmt.Errorf("cannot get key from column %d in line (len=%d): %s", column, len(s), s) - } else { + if k := CutSep(s, sep, column); k != "" { return k, nil } + return "", fmt.Errorf("cannot get key from column %d in line (len=%d): %s", column, len(s), s) } } diff --git a/skate/zippy_test.go b/skate/zippy_test.go index f1e8822..7290e94 100644 --- a/skate/zippy_test.go +++ b/skate/zippy_test.go @@ -224,18 +224,6 @@ func TestMatchedRefsExtend(t *testing.T) { MatchStatus: StatusUnmatched.Short(), MatchReason: ReasonUnknown.Short(), SourceYear: "0", - // Extra: struct { - // Ref Ref `json:"ref"` - // }{ - // Ref: Ref{ - // ReleaseIdent: "0000", - // Biblio: Biblio{ - // Title: "Title", - // }, - // Index: 3, - // Key: "K3", - // }, - // }, }, }, }, @@ -277,3 +265,26 @@ func TestMatchedRefsExtend(t *testing.T) { } } } + +func TestRemoveSelfLinks(t *testing.T) { + var cases = []struct { + brefs []*BiblioRef + result []*BiblioRef + }{ + { + brefs: nil, + result: nil, + }, + { + brefs: []*BiblioRef{}, + result: nil, + }, + } + for i, c := range cases { + result := removeSelfLinks(c.brefs) + if !reflect.DeepEqual(result, c.result) { + t.Fatalf("[%d]: got %v, want %v (%v)", + i, result, c.result, pretty.Diff(result, c.result)) + } + } +} |