diff options
Diffstat (limited to 'skate/reduce.go')
| -rw-r--r-- | skate/reduce.go | 15 | 
1 files changed, 7 insertions, 8 deletions
| diff --git a/skate/reduce.go b/skate/reduce.go index d093f5a..df96076 100644 --- a/skate/reduce.go +++ b/skate/reduce.go @@ -1,5 +1,5 @@ -// This file contains various "reducers", e.g. merging data from two streams and -// applying a function on groups of documents with a shared key. +// This file contains various "reducers", which e.g. merge data from two +// streams and apply a function on groups of documents with a shared key.  //  // Note: This is a bit repetitive, but we do not want to introduce any other  // abstraction for now. Since most of the logic is in the "grouper" functions, @@ -7,10 +7,10 @@  // the fly.  //  // The most confusing aspect currently is the variety of schemas hidden within -// the readers (and string groups): release, ref, ref-as-release, open library, -// wikipedia, ... +// the readers (and string groups): release, ref, biblioref, csl, +// ref-as-release, open library, wikipedia, ...  // -// We call the biblioref schema sometimes just bref. +// We call the biblioref schema sometimes just bref, for short.  //  // TODO:  // * [ ] pass release stage through all match types @@ -89,7 +89,7 @@ func ZippyExact(releases, refs io.Reader, matchResult MatchResult, w io.Writer)  			}  			return nil  		} -		batcher = zipkey.NewBatcher(grouper) // hard-code for now; on 24 cores 10K take up over 8G of RAM +		batcher = zipkey.NewBatcher(grouper)  	)  	defer batcher.Close()  	zipper := zipkey.New(releases, refs, keyer, batcher.GroupFunc) @@ -180,7 +180,7 @@ func ZippyExactWiki(releases, wiki io.Reader, mr MatchResult, w io.Writer) error  				// We use lowercase base32 w/o padding of the original  				// PageTitle as component for the id. XXX: ok for now?  				if wiki.Language == "" { -					lang = "en" +					lang = "en" // XXX: We currently only use "en" subset.  				} else {  					lang = wiki.Language  				} @@ -191,7 +191,6 @@ func ZippyExactWiki(releases, wiki io.Reader, mr MatchResult, w io.Writer) error  				}  				seen.Add(key)  				bref.Key = key -				// XXX: We currently only use "en" subset.  				bref.SourceWikipediaArticle = fmt.Sprintf("%s:%s", lang, wiki.PageTitle)  				bref.TargetReleaseIdent = target.Ident  				bref.TargetWorkIdent = target.WorkID | 
