update docs

author: Martin Czygan <martin.czygan@gmail.com> 2021-07-14 22:59:28 +0200
committer: Martin Czygan <martin.czygan@gmail.com> 2021-07-14 22:59:28 +0200
commit: 902ae78beda424f203db8b8f59cd5e54516475af (patch)
tree: b16eb99cb7b05fda875598d47dde03fcd3f8e59a
parent: e99f54c2e653f0662ba0823fa3ac4bccc475dce1 (diff)
download: refcat-902ae78beda424f203db8b8f59cd5e54516475af.tar.gz
refcat-902ae78beda424f203db8b8f59cd5e54516475af.zip
2 files changed, 11 insertions, 11 deletions
diff --git a/skate/reduce.go b/skate/reduce.go
index d093f5a..df96076 100644
--- a/skate/reduce.go
+++ b/skate/reduce.go
@@ -1,5 +1,5 @@
-// This file contains various "reducers", e.g. merging data from two streams and
-// applying a function on groups of documents with a shared key.
+// This file contains various "reducers", which e.g. merge data from two
+// streams and apply a function on groups of documents with a shared key.
 //
 // Note: This is a bit repetitive, but we do not want to introduce any other
 // abstraction for now. Since most of the logic is in the "grouper" functions,
@@ -7,10 +7,10 @@
 // the fly.
 //
 // The most confusing aspect currently is the variety of schemas hidden within
-// the readers (and string groups): release, ref, ref-as-release, open library,
-// wikipedia, ...
+// the readers (and string groups): release, ref, biblioref, csl,
+// ref-as-release, open library, wikipedia, ...
 //
-// We call the biblioref schema sometimes just bref.
+// We call the biblioref schema sometimes just bref, for short.
 //
 // TODO:
 // * [ ] pass release stage through all match types
@@ -89,7 +89,7 @@ func ZippyExact(releases, refs io.Reader, matchResult MatchResult, w io.Writer)
 			}
 			return nil
 		}
-		batcher = zipkey.NewBatcher(grouper) // hard-code for now; on 24 cores 10K take up over 8G of RAM
+		batcher = zipkey.NewBatcher(grouper)
 	)
 	defer batcher.Close()
 	zipper := zipkey.New(releases, refs, keyer, batcher.GroupFunc)
@@ -180,7 +180,7 @@ func ZippyExactWiki(releases, wiki io.Reader, mr MatchResult, w io.Writer) error
 				// We use lowercase base32 w/o padding of the original
 				// PageTitle as component for the id. XXX: ok for now?
 				if wiki.Language == "" {
-					lang = "en"
+					lang = "en" // XXX: We currently only use "en" subset.
 				} else {
 					lang = wiki.Language
 				}
@@ -191,7 +191,6 @@ func ZippyExactWiki(releases, wiki io.Reader, mr MatchResult, w io.Writer) error
 				}
 				seen.Add(key)
 				bref.Key = key
-				// XXX: We currently only use "en" subset.
 				bref.SourceWikipediaArticle = fmt.Sprintf("%s:%s", lang, wiki.PageTitle)
 				bref.TargetReleaseIdent = target.Ident
 				bref.TargetWorkIdent = target.WorkID
diff --git a/skate/zipkey/batch.go b/skate/zipkey/batch.go
index 6ab7eee..56f6f0d 100644
--- a/skate/zipkey/batch.go
+++ b/skate/zipkey/batch.go
@@ -22,7 +22,7 @@ type Batcher struct {
 	closing    bool // https://stackoverflow.com/q/16105325/89391
 }
 
-// NewBatcher set ups a new Batcher with a batch size of 1000.
+// NewBatcher set ups a new Batcher with a default batch size.
 func NewBatcher(gf groupFunc) *Batcher {
 	return NewBatcherSize(gf, defaultBatchSize)
 }
@@ -43,7 +43,8 @@ func NewBatcherSize(gf groupFunc, size int) *Batcher {
 }
 
 // Close tears down the batcher. If this is not called, you get goroutine leaks
-// and will miss the data from the last uncommitted batch.
+// and will miss the data from the last uncommitted batch. Calling this
+// function more than once will result in a panic.
 func (b *Batcher) Close() error {
 	b.closing = true
 	g := make([]*Group, len(b.batch))
@@ -56,7 +57,7 @@ func (b *Batcher) Close() error {
 }
 
 // GroupFunc is a drop-in for a groupFunc. Use this function, where you used
-// grouper before. Not thread safe. Panics if called after Close.
+// groupFunc before. Not thread safe. Panics if called after Close.
 func (b *Batcher) GroupFunc(g *Group) error {
 	if b.closing {
 		panic("cannot call GroupFunc after Close")
author	Martin Czygan <martin.czygan@gmail.com>	2021-07-14 22:59:28 +0200
committer	Martin Czygan <martin.czygan@gmail.com>	2021-07-14 22:59:28 +0200
commit	902ae78beda424f203db8b8f59cd5e54516475af (patch)
tree	b16eb99cb7b05fda875598d47dde03fcd3f8e59a
parent	e99f54c2e653f0662ba0823fa3ac4bccc475dce1 (diff)
download	refcat-902ae78beda424f203db8b8f59cd5e54516475af.tar.gz refcat-902ae78beda424f203db8b8f59cd5e54516475af.zip