diff options
Diffstat (limited to 'skate/cmd/skate-cluster')
| -rw-r--r-- | skate/cmd/skate-cluster/main.go | 26 | 
1 files changed, 14 insertions, 12 deletions
diff --git a/skate/cmd/skate-cluster/main.go b/skate/cmd/skate-cluster/main.go index 754eab8..de11de1 100644 --- a/skate/cmd/skate-cluster/main.go +++ b/skate/cmd/skate-cluster/main.go @@ -1,5 +1,5 @@ -// skate-cluster takes the (tab) output of skate-sorted-keys and generates a -// "cluster" document, grouping docs by key. Can do some pre-filtering (e.g. +// skate-cluster takes the (tab) output of skate-map (plus sort) and generates +// a "cluster" document, grouping docs by key. Can do some pre-filtering (e.g.  // require refs and release docs in a single cluster).  //  // For example, this: @@ -44,10 +44,12 @@ func main() {  		batch, fields  []string  		keyIndex       = *keyField - 1  		docIndex       = *docField - 1 +		line           string +		err            error  	)  	defer bw.Flush()  	for { -		line, err := br.ReadString('\n') +		line, err = br.ReadString('\n')  		if err == io.EOF {  			break  		} @@ -79,16 +81,16 @@ func main() {  // containsBoth return true, if we have a ref and a non-ref item in the batch.  func containsBoth(batch []string) bool { -	var isRef int +	var numRef int  	for _, doc := range batch { -		// This is brittle. Most JSON should be in compact form, and there the -		// following chars are by convention added to distinguish a release -		// coming from a reference doc from other releases. +		// This is brittle (but faster). Most JSON should be in compact form, +		// and there the following chars are by convention added to distinguish +		// a release coming from a reference doc from other releases.  		if strings.Contains(doc, `"status":"ref"`) { -			isRef++ +			numRef++  		}  	} -	return isRef > 0 && isRef < len(batch) +	return numRef > 0 && numRef < len(batch)  }  // writeBatch writes out a single line containing the key and the cluster values. @@ -102,9 +104,9 @@ func writeBatch(w io.Writer, key string, batch []string) (err error) {  	if *requireBoth && !containsBoth(batch) {  		return nil  	} -	// This is brittle, but all items in a batch are valid JSON objects, hence, -	// the following will be valid JSON as well, or will it? The key should not -	// contain a quote. +	// This is brittle (and fast), but all items in a batch are valid JSON +	// objects, hence, the following will be valid JSON as well, or will it? +	// The key should not contain a quote.  	_, err = fmt.Fprintf(w, "{\"k\": \"%s\", \"v\": [%s]}\n", key, strings.Join(batch, ","))  	return  }  | 
