diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2021-07-27 23:21:07 +0200 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2021-07-27 23:21:10 +0200 |
commit | 759aaa63dcb98709a53da8cfe54279524fec55ce (patch) | |
tree | 094e663c71176165d6d5d9ace3f92d1cd7ed4731 /skate | |
parent | ae08433f9edaec0cab34abc700d5eb75a4497287 (diff) | |
download | refcat-759aaa63dcb98709a53da8cfe54279524fec55ce.tar.gz refcat-759aaa63dcb98709a53da8cfe54279524fec55ce.zip |
reduce: add more logging, temporarily
Diffstat (limited to 'skate')
-rw-r--r-- | skate/reduce.go | 7 |
1 files changed, 6 insertions, 1 deletions
diff --git a/skate/reduce.go b/skate/reduce.go index 37de218..992aaec 100644 --- a/skate/reduce.go +++ b/skate/reduce.go @@ -451,11 +451,12 @@ func ZippyBrefAugment(bref, raw io.Reader, w io.Writer) error { // First, iterate over all matches and sort out duplicates, e.g. // docs that have the same source and target id. - log.Printf("group K=%s, G0=%d, G1=%d", g.Key, len(g.G0), len(g.G1)) + log.Printf("group K=%s, G0=%d (bref), G1=%d (ref)", g.Key, len(g.G0), len(g.G1)) matched, err := uniqueMatches(CutBatch(g.G0, 2), &stats) if err != nil { return err } + log.Printf("found %v unique matches", len(matched)) var refs = make([]*Ref, len(g.G1)) for i := 0; i < len(refs); i++ { var ( @@ -469,12 +470,15 @@ func ZippyBrefAugment(bref, raw io.Reader, w io.Writer) error { } // TODO: this slows down this process; be a bit smarter about slices. matched = matchedRefsExtend(matched, refs, &stats) + log.Printf("matchedRefsExtend results in %d docs", len(matched)) // At this point, we may have duplicates by "_id", e.g. source // release ident and ref index (example: // 4kg2dejsgzaf3cszs2lt5hz4by_9, which appears three times, one // exact match, and twice unmatched). matched = deduplicateBrefs(matched) + log.Printf("deduplicateBrefs results in %d docs", len(matched)) matched = removeSelfLinks(matched) + log.Printf("removeSelfLinks results in %d docs", len(matched)) for _, bref := range matched { stats.total++ if err := enc.Encode(bref); err != nil { @@ -533,6 +537,7 @@ func deduplicateBrefs(brefs []*BiblioRef) []*BiblioRef { i int ) for _, v := range brefs { + // XXX: Is this enough, do we get rid of too many things here? if seen.Contains(v.Key) { continue } |