aboutsummaryrefslogtreecommitdiffstats
path: root/skate/reduce.go
diff options
context:
space:
mode:
Diffstat (limited to 'skate/reduce.go')
-rw-r--r--skate/reduce.go15
1 files changed, 7 insertions, 8 deletions
diff --git a/skate/reduce.go b/skate/reduce.go
index d093f5a..df96076 100644
--- a/skate/reduce.go
+++ b/skate/reduce.go
@@ -1,5 +1,5 @@
-// This file contains various "reducers", e.g. merging data from two streams and
-// applying a function on groups of documents with a shared key.
+// This file contains various "reducers", which e.g. merge data from two
+// streams and apply a function on groups of documents with a shared key.
//
// Note: This is a bit repetitive, but we do not want to introduce any other
// abstraction for now. Since most of the logic is in the "grouper" functions,
@@ -7,10 +7,10 @@
// the fly.
//
// The most confusing aspect currently is the variety of schemas hidden within
-// the readers (and string groups): release, ref, ref-as-release, open library,
-// wikipedia, ...
+// the readers (and string groups): release, ref, biblioref, csl,
+// ref-as-release, open library, wikipedia, ...
//
-// We call the biblioref schema sometimes just bref.
+// We call the biblioref schema sometimes just bref, for short.
//
// TODO:
// * [ ] pass release stage through all match types
@@ -89,7 +89,7 @@ func ZippyExact(releases, refs io.Reader, matchResult MatchResult, w io.Writer)
}
return nil
}
- batcher = zipkey.NewBatcher(grouper) // hard-code for now; on 24 cores 10K take up over 8G of RAM
+ batcher = zipkey.NewBatcher(grouper)
)
defer batcher.Close()
zipper := zipkey.New(releases, refs, keyer, batcher.GroupFunc)
@@ -180,7 +180,7 @@ func ZippyExactWiki(releases, wiki io.Reader, mr MatchResult, w io.Writer) error
// We use lowercase base32 w/o padding of the original
// PageTitle as component for the id. XXX: ok for now?
if wiki.Language == "" {
- lang = "en"
+ lang = "en" // XXX: We currently only use "en" subset.
} else {
lang = wiki.Language
}
@@ -191,7 +191,6 @@ func ZippyExactWiki(releases, wiki io.Reader, mr MatchResult, w io.Writer) error
}
seen.Add(key)
bref.Key = key
- // XXX: We currently only use "en" subset.
bref.SourceWikipediaArticle = fmt.Sprintf("%s:%s", lang, wiki.PageTitle)
bref.TargetReleaseIdent = target.Ident
bref.TargetWorkIdent = target.WorkID