aboutsummaryrefslogtreecommitdiffstats
path: root/skate/cmd/skate-cluster
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2021-05-04 23:59:53 +0200
committerMartin Czygan <martin.czygan@gmail.com>2021-05-04 23:59:53 +0200
commita63d76e3fc3c59c2eec2de4e538b45e41e1f8aa9 (patch)
treedbfeb167e56b3d581877a0224e56be8423852aa0 /skate/cmd/skate-cluster
parent6462e64ce8e61f54e1c3b1247c2039a2eddd5875 (diff)
downloadrefcat-a63d76e3fc3c59c2eec2de4e538b45e41e1f8aa9.tar.gz
refcat-a63d76e3fc3c59c2eec2de4e538b45e41e1f8aa9.zip
tweaks; move parsing out of command
Diffstat (limited to 'skate/cmd/skate-cluster')
-rw-r--r--skate/cmd/skate-cluster/main.go26
1 files changed, 14 insertions, 12 deletions
diff --git a/skate/cmd/skate-cluster/main.go b/skate/cmd/skate-cluster/main.go
index 754eab8..de11de1 100644
--- a/skate/cmd/skate-cluster/main.go
+++ b/skate/cmd/skate-cluster/main.go
@@ -1,5 +1,5 @@
-// skate-cluster takes the (tab) output of skate-sorted-keys and generates a
-// "cluster" document, grouping docs by key. Can do some pre-filtering (e.g.
+// skate-cluster takes the (tab) output of skate-map (plus sort) and generates
+// a "cluster" document, grouping docs by key. Can do some pre-filtering (e.g.
// require refs and release docs in a single cluster).
//
// For example, this:
@@ -44,10 +44,12 @@ func main() {
batch, fields []string
keyIndex = *keyField - 1
docIndex = *docField - 1
+ line string
+ err error
)
defer bw.Flush()
for {
- line, err := br.ReadString('\n')
+ line, err = br.ReadString('\n')
if err == io.EOF {
break
}
@@ -79,16 +81,16 @@ func main() {
// containsBoth return true, if we have a ref and a non-ref item in the batch.
func containsBoth(batch []string) bool {
- var isRef int
+ var numRef int
for _, doc := range batch {
- // This is brittle. Most JSON should be in compact form, and there the
- // following chars are by convention added to distinguish a release
- // coming from a reference doc from other releases.
+ // This is brittle (but faster). Most JSON should be in compact form,
+ // and there the following chars are by convention added to distinguish
+ // a release coming from a reference doc from other releases.
if strings.Contains(doc, `"status":"ref"`) {
- isRef++
+ numRef++
}
}
- return isRef > 0 && isRef < len(batch)
+ return numRef > 0 && numRef < len(batch)
}
// writeBatch writes out a single line containing the key and the cluster values.
@@ -102,9 +104,9 @@ func writeBatch(w io.Writer, key string, batch []string) (err error) {
if *requireBoth && !containsBoth(batch) {
return nil
}
- // This is brittle, but all items in a batch are valid JSON objects, hence,
- // the following will be valid JSON as well, or will it? The key should not
- // contain a quote.
+ // This is brittle (and fast), but all items in a batch are valid JSON
+ // objects, hence, the following will be valid JSON as well, or will it?
+ // The key should not contain a quote.
_, err = fmt.Fprintf(w, "{\"k\": \"%s\", \"v\": [%s]}\n", key, strings.Join(batch, ","))
return
}