aboutsummaryrefslogtreecommitdiffstats
path: root/skate/zipkey/batch.go
blob: d81897d03dcd7ee0a9f179481bcc53abf9140d6b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
package zipkey

import (
	"runtime"
	"sync"
)

// Batcher runs reducers in parallel on batches of groups.
type Batcher struct {
	Size       int
	NumWorkers int
	gf         groupFunc
	batch      []*Group
	queue      chan []*Group
	wg         sync.WaitGroup
	err        error
}

// NewBatcher set ups a new Batcher.
func NewBatcher(gf groupFunc) *Batcher {
	batcher := Batcher{
		gf:         gf,
		Size:       1000,
		NumWorkers: runtime.NumCPU(),
		queue:      make(chan []*Group),
	}
	for i := 0; i < batcher.NumWorkers; i++ {
		batcher.wg.Add(1)
		go batcher.worker(batcher.queue, &batcher.wg)
	}
	return &batcher
}

func (b *Batcher) Close() error {
	g := make([]*Group, len(b.batch))
	copy(g, b.batch)
	b.queue <- g
	b.batch = nil
	close(b.queue)
	b.wg.Wait()
	return b.err
}

// GroupFunc implement the groupFunc type.
func (b *Batcher) GroupFunc(g *Group) error {
	b.batch = append(b.batch, g)
	if len(b.batch) == b.Size {
		g := make([]*Group, len(b.batch))
		copy(g, b.batch)
		b.queue <- g
		b.batch = nil
	}
	return nil
}

// worker will wind down after a first error encountered.
func (b *Batcher) worker(queue chan []*Group, wg *sync.WaitGroup) {
	defer wg.Done()
OUTER:
	for batch := range queue {
		for _, g := range batch {
			err := b.gf(g)
			if err != nil {
				b.err = err
				break OUTER
			}
		}
	}
}