blob: c31909c858e790c0263f9385fd8252b755bea7d1 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
|
package zipkey
import (
"runtime"
"sync"
)
// Batcher runs reducers in parallel on batches of groups.
type Batcher struct {
Size int
NumWorkers int
gf groupFunc
batch []*Group
queue chan []*Group
wg sync.WaitGroup
err error
closing bool
}
// NewBatcher set ups a new Batcher.
func NewBatcher(gf groupFunc) *Batcher {
batcher := Batcher{
gf: gf,
Size: 1000,
NumWorkers: runtime.NumCPU(),
queue: make(chan []*Group),
}
for i := 0; i < batcher.NumWorkers; i++ {
batcher.wg.Add(1)
go batcher.worker(batcher.queue, &batcher.wg)
}
return &batcher
}
func (b *Batcher) Close() error {
b.closing = true
g := make([]*Group, len(b.batch))
copy(g, b.batch)
b.queue <- g
b.batch = nil
close(b.queue)
b.wg.Wait()
return b.err
}
// GroupFunc implement the groupFunc type. Not thread safe.
func (b *Batcher) GroupFunc(g *Group) error {
if b.closing {
panic("cannot call GroupFunc after Close")
}
b.batch = append(b.batch, g)
if len(b.batch) == b.Size {
g := make([]*Group, len(b.batch))
copy(g, b.batch)
b.queue <- g
b.batch = nil
}
return nil
}
// worker will wind down after a first error encountered.
func (b *Batcher) worker(queue chan []*Group, wg *sync.WaitGroup) {
defer wg.Done()
OUTER:
for batch := range queue {
for _, g := range batch {
if err := b.gf(g); err != nil {
b.err = err
break OUTER
}
}
}
}
|