aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--skate/cmd/skate-reduce/main.go2
-rw-r--r--skate/zippy.go6
2 files changed, 5 insertions, 3 deletions
diff --git a/skate/cmd/skate-reduce/main.go b/skate/cmd/skate-reduce/main.go
index fa3d5e9..74da648 100644
--- a/skate/cmd/skate-reduce/main.go
+++ b/skate/cmd/skate-reduce/main.go
@@ -75,7 +75,7 @@ var (
batchSize = flag.Int("b", 10000, "batch size")
mode = flag.String("m", "ref", "mode, e.g. exact, fuzzy, ref, bref, wiki, oled, oledt, unmatched")
quite = flag.Bool("q", false, "be quite")
- logFile = flag.String("log", "", "log filename")
+ logFile = flag.String("log", "", "log filename (stderr, if empty)")
// Possible inputs -- we could switch to a subcommand cli parser?
bref = flag.String("B", "", "path to bref file")
diff --git a/skate/zippy.go b/skate/zippy.go
index beae483..1e660da 100644
--- a/skate/zippy.go
+++ b/skate/zippy.go
@@ -390,7 +390,8 @@ func matchedRefsExtend(matched []*BiblioRef, refs []*Ref) []*BiblioRef {
}
// uniqueMatches takes a list of bref docs (unserialized) and will return a
-// list of deserialized bref docs, containing unique matches only.
+// list of deserialized bref docs, containing unique matches only (e.g. filter
+// out things duplicate matches, e.g. from exact and fuzzy).
func uniqueMatches(docs []string) (result []*BiblioRef, err error) {
var (
brefs []*BiblioRef
@@ -402,7 +403,7 @@ func uniqueMatches(docs []string) (result []*BiblioRef, err error) {
}
brefs = append(brefs, &bref)
}
- // Make sure we exact matches come first.
+ // Make sure exact matches come first.
sort.Slice(brefs, func(i, j int) bool {
return brefs[i].MatchStatus != StatusExact.Short()
})
@@ -414,6 +415,7 @@ func uniqueMatches(docs []string) (result []*BiblioRef, err error) {
for _, doc := range brefs {
v := hash(doc)
if seen.Contains(v) {
+ log.Printf("skip: %v %v %v", doc.SourceReleaseIdent, doc.MatchStatus, doc.MatchReason)
continue
}
seen.Add(v)