diff options
-rw-r--r-- | skate/cmd/skate-reduce/main.go | 2 | ||||
-rw-r--r-- | skate/zippy.go | 6 |
2 files changed, 5 insertions, 3 deletions
diff --git a/skate/cmd/skate-reduce/main.go b/skate/cmd/skate-reduce/main.go index fa3d5e9..74da648 100644 --- a/skate/cmd/skate-reduce/main.go +++ b/skate/cmd/skate-reduce/main.go @@ -75,7 +75,7 @@ var ( batchSize = flag.Int("b", 10000, "batch size") mode = flag.String("m", "ref", "mode, e.g. exact, fuzzy, ref, bref, wiki, oled, oledt, unmatched") quite = flag.Bool("q", false, "be quite") - logFile = flag.String("log", "", "log filename") + logFile = flag.String("log", "", "log filename (stderr, if empty)") // Possible inputs -- we could switch to a subcommand cli parser? bref = flag.String("B", "", "path to bref file") diff --git a/skate/zippy.go b/skate/zippy.go index beae483..1e660da 100644 --- a/skate/zippy.go +++ b/skate/zippy.go @@ -390,7 +390,8 @@ func matchedRefsExtend(matched []*BiblioRef, refs []*Ref) []*BiblioRef { } // uniqueMatches takes a list of bref docs (unserialized) and will return a -// list of deserialized bref docs, containing unique matches only. +// list of deserialized bref docs, containing unique matches only (e.g. filter +// out things duplicate matches, e.g. from exact and fuzzy). func uniqueMatches(docs []string) (result []*BiblioRef, err error) { var ( brefs []*BiblioRef @@ -402,7 +403,7 @@ func uniqueMatches(docs []string) (result []*BiblioRef, err error) { } brefs = append(brefs, &bref) } - // Make sure we exact matches come first. + // Make sure exact matches come first. sort.Slice(brefs, func(i, j int) bool { return brefs[i].MatchStatus != StatusExact.Short() }) @@ -414,6 +415,7 @@ func uniqueMatches(docs []string) (result []*BiblioRef, err error) { for _, doc := range brefs { v := hash(doc) if seen.Contains(v) { + log.Printf("skip: %v %v %v", doc.SourceReleaseIdent, doc.MatchStatus, doc.MatchReason) continue } seen.Add(v) |