diff options
Diffstat (limited to 'skate/reduce.go')
-rw-r--r-- | skate/reduce.go | 15 |
1 files changed, 7 insertions, 8 deletions
diff --git a/skate/reduce.go b/skate/reduce.go index d093f5a..df96076 100644 --- a/skate/reduce.go +++ b/skate/reduce.go @@ -1,5 +1,5 @@ -// This file contains various "reducers", e.g. merging data from two streams and -// applying a function on groups of documents with a shared key. +// This file contains various "reducers", which e.g. merge data from two +// streams and apply a function on groups of documents with a shared key. // // Note: This is a bit repetitive, but we do not want to introduce any other // abstraction for now. Since most of the logic is in the "grouper" functions, @@ -7,10 +7,10 @@ // the fly. // // The most confusing aspect currently is the variety of schemas hidden within -// the readers (and string groups): release, ref, ref-as-release, open library, -// wikipedia, ... +// the readers (and string groups): release, ref, biblioref, csl, +// ref-as-release, open library, wikipedia, ... // -// We call the biblioref schema sometimes just bref. +// We call the biblioref schema sometimes just bref, for short. // // TODO: // * [ ] pass release stage through all match types @@ -89,7 +89,7 @@ func ZippyExact(releases, refs io.Reader, matchResult MatchResult, w io.Writer) } return nil } - batcher = zipkey.NewBatcher(grouper) // hard-code for now; on 24 cores 10K take up over 8G of RAM + batcher = zipkey.NewBatcher(grouper) ) defer batcher.Close() zipper := zipkey.New(releases, refs, keyer, batcher.GroupFunc) @@ -180,7 +180,7 @@ func ZippyExactWiki(releases, wiki io.Reader, mr MatchResult, w io.Writer) error // We use lowercase base32 w/o padding of the original // PageTitle as component for the id. XXX: ok for now? if wiki.Language == "" { - lang = "en" + lang = "en" // XXX: We currently only use "en" subset. } else { lang = wiki.Language } @@ -191,7 +191,6 @@ func ZippyExactWiki(releases, wiki io.Reader, mr MatchResult, w io.Writer) error } seen.Add(key) bref.Key = key - // XXX: We currently only use "en" subset. bref.SourceWikipediaArticle = fmt.Sprintf("%s:%s", lang, wiki.PageTitle) bref.TargetReleaseIdent = target.Ident bref.TargetWorkIdent = target.WorkID |