aboutsummaryrefslogtreecommitdiffstats
path: root/skate
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2021-07-27 23:21:07 +0200
committerMartin Czygan <martin.czygan@gmail.com>2021-07-27 23:21:10 +0200
commit759aaa63dcb98709a53da8cfe54279524fec55ce (patch)
tree094e663c71176165d6d5d9ace3f92d1cd7ed4731 /skate
parentae08433f9edaec0cab34abc700d5eb75a4497287 (diff)
downloadrefcat-759aaa63dcb98709a53da8cfe54279524fec55ce.tar.gz
refcat-759aaa63dcb98709a53da8cfe54279524fec55ce.zip
reduce: add more logging, temporarily
Diffstat (limited to 'skate')
-rw-r--r--skate/reduce.go7
1 files changed, 6 insertions, 1 deletions
diff --git a/skate/reduce.go b/skate/reduce.go
index 37de218..992aaec 100644
--- a/skate/reduce.go
+++ b/skate/reduce.go
@@ -451,11 +451,12 @@ func ZippyBrefAugment(bref, raw io.Reader, w io.Writer) error {
// First, iterate over all matches and sort out duplicates, e.g.
// docs that have the same source and target id.
- log.Printf("group K=%s, G0=%d, G1=%d", g.Key, len(g.G0), len(g.G1))
+ log.Printf("group K=%s, G0=%d (bref), G1=%d (ref)", g.Key, len(g.G0), len(g.G1))
matched, err := uniqueMatches(CutBatch(g.G0, 2), &stats)
if err != nil {
return err
}
+ log.Printf("found %v unique matches", len(matched))
var refs = make([]*Ref, len(g.G1))
for i := 0; i < len(refs); i++ {
var (
@@ -469,12 +470,15 @@ func ZippyBrefAugment(bref, raw io.Reader, w io.Writer) error {
}
// TODO: this slows down this process; be a bit smarter about slices.
matched = matchedRefsExtend(matched, refs, &stats)
+ log.Printf("matchedRefsExtend results in %d docs", len(matched))
// At this point, we may have duplicates by "_id", e.g. source
// release ident and ref index (example:
// 4kg2dejsgzaf3cszs2lt5hz4by_9, which appears three times, one
// exact match, and twice unmatched).
matched = deduplicateBrefs(matched)
+ log.Printf("deduplicateBrefs results in %d docs", len(matched))
matched = removeSelfLinks(matched)
+ log.Printf("removeSelfLinks results in %d docs", len(matched))
for _, bref := range matched {
stats.total++
if err := enc.Encode(bref); err != nil {
@@ -533,6 +537,7 @@ func deduplicateBrefs(brefs []*BiblioRef) []*BiblioRef {
i int
)
for _, v := range brefs {
+ // XXX: Is this enough, do we get rid of too many things here?
if seen.Contains(v.Key) {
continue
}