diff options
Diffstat (limited to 'skate/zipkey')
-rw-r--r-- | skate/zipkey/testdata/c0a | 4 | ||||
-rw-r--r-- | skate/zipkey/testdata/c0b | 2 | ||||
-rw-r--r-- | skate/zipkey/testdata/c0c | 2 | ||||
-rw-r--r-- | skate/zipkey/testdata/c1a | 13 | ||||
-rw-r--r-- | skate/zipkey/testdata/c1b | 4 | ||||
-rw-r--r-- | skate/zipkey/testdata/c1c | 2 | ||||
-rw-r--r-- | skate/zipkey/testdata/c2a | 5 | ||||
-rw-r--r-- | skate/zipkey/testdata/c2b | 5 | ||||
-rw-r--r-- | skate/zipkey/testdata/c2c | 0 | ||||
-rw-r--r-- | skate/zipkey/testdata/c3a | 6 | ||||
-rw-r--r-- | skate/zipkey/testdata/c3b | 6 | ||||
-rw-r--r-- | skate/zipkey/testdata/c3c | 1 | ||||
-rw-r--r-- | skate/zipkey/testdata/c4a | 4 | ||||
-rw-r--r-- | skate/zipkey/testdata/c4b | 4 | ||||
-rw-r--r-- | skate/zipkey/testdata/c4c | 1 | ||||
-rw-r--r-- | skate/zipkey/testdata/c5a | 0 | ||||
-rw-r--r-- | skate/zipkey/testdata/c5b | 0 | ||||
-rw-r--r-- | skate/zipkey/testdata/c5c | 0 | ||||
-rw-r--r-- | skate/zipkey/zipkey.go | 138 | ||||
-rw-r--r-- | skate/zipkey/zipkey_test.go | 83 |
20 files changed, 280 insertions, 0 deletions
diff --git a/skate/zipkey/testdata/c0a b/skate/zipkey/testdata/c0a new file mode 100644 index 0000000..4685b39 --- /dev/null +++ b/skate/zipkey/testdata/c0a @@ -0,0 +1,4 @@ +k0 a +k1 b +k2 c +k3 d diff --git a/skate/zipkey/testdata/c0b b/skate/zipkey/testdata/c0b new file mode 100644 index 0000000..293bb00 --- /dev/null +++ b/skate/zipkey/testdata/c0b @@ -0,0 +1,2 @@ +k1 B +k3 D diff --git a/skate/zipkey/testdata/c0c b/skate/zipkey/testdata/c0c new file mode 100644 index 0000000..3a3ea49 --- /dev/null +++ b/skate/zipkey/testdata/c0c @@ -0,0 +1,2 @@ +{"Key":"","G0":["k1 b\n"],"G1":["k1 B\n"]} +{"Key":"","G0":["k3 d\n"],"G1":["k3 D\n"]}
\ No newline at end of file diff --git a/skate/zipkey/testdata/c1a b/skate/zipkey/testdata/c1a new file mode 100644 index 0000000..7741741 --- /dev/null +++ b/skate/zipkey/testdata/c1a @@ -0,0 +1,13 @@ +1 a +1 a +1 a +1 a +1 a +1 a +1 a +1 a +2 b +2 b +2 b +2 b +2 b diff --git a/skate/zipkey/testdata/c1b b/skate/zipkey/testdata/c1b new file mode 100644 index 0000000..ef55241 --- /dev/null +++ b/skate/zipkey/testdata/c1b @@ -0,0 +1,4 @@ +1 A +2 B +3 C +4 D diff --git a/skate/zipkey/testdata/c1c b/skate/zipkey/testdata/c1c new file mode 100644 index 0000000..d108667 --- /dev/null +++ b/skate/zipkey/testdata/c1c @@ -0,0 +1,2 @@ +{"Key":"","G0":["1 a\n","1 a\n","1 a\n","1 a\n","1 a\n","1 a\n","1 a\n","1 a\n"],"G1":["1 A\n"]} +{"Key":"","G0":["2 b\n","2 b\n","2 b\n","2 b\n","2 b\n"],"G1":["2 B\n"]}
\ No newline at end of file diff --git a/skate/zipkey/testdata/c2a b/skate/zipkey/testdata/c2a new file mode 100644 index 0000000..8036bb1 --- /dev/null +++ b/skate/zipkey/testdata/c2a @@ -0,0 +1,5 @@ +a 1 +a 1 +b 2 +b 2 + diff --git a/skate/zipkey/testdata/c2b b/skate/zipkey/testdata/c2b new file mode 100644 index 0000000..665ef57 --- /dev/null +++ b/skate/zipkey/testdata/c2b @@ -0,0 +1,5 @@ +c 3 +c 3 +d 4 +d 4 + diff --git a/skate/zipkey/testdata/c2c b/skate/zipkey/testdata/c2c new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/skate/zipkey/testdata/c2c diff --git a/skate/zipkey/testdata/c3a b/skate/zipkey/testdata/c3a new file mode 100644 index 0000000..b0ec63f --- /dev/null +++ b/skate/zipkey/testdata/c3a @@ -0,0 +1,6 @@ +2 1 +2 2 +2 3 +2 4 +2 5 + diff --git a/skate/zipkey/testdata/c3b b/skate/zipkey/testdata/c3b new file mode 100644 index 0000000..779b0fb --- /dev/null +++ b/skate/zipkey/testdata/c3b @@ -0,0 +1,6 @@ +2 a +2 b +2 c +2 d +2 e + diff --git a/skate/zipkey/testdata/c3c b/skate/zipkey/testdata/c3c new file mode 100644 index 0000000..eaabdad --- /dev/null +++ b/skate/zipkey/testdata/c3c @@ -0,0 +1 @@ +{"Key":"","G0":["2 1\n","2 2\n","2 3\n","2 4\n","2 5\n"],"G1":["2 a\n","2 b\n","2 c\n","2 d\n","2 e\n"]}
\ No newline at end of file diff --git a/skate/zipkey/testdata/c4a b/skate/zipkey/testdata/c4a new file mode 100644 index 0000000..b61ee9d --- /dev/null +++ b/skate/zipkey/testdata/c4a @@ -0,0 +1,4 @@ +a k0 a +a k0 a +a k0 a +a k0 a diff --git a/skate/zipkey/testdata/c4b b/skate/zipkey/testdata/c4b new file mode 100644 index 0000000..29e0c16 --- /dev/null +++ b/skate/zipkey/testdata/c4b @@ -0,0 +1,4 @@ +b k0 b +b k0 b +b k0 b +b k0 b diff --git a/skate/zipkey/testdata/c4c b/skate/zipkey/testdata/c4c new file mode 100644 index 0000000..4cf2f92 --- /dev/null +++ b/skate/zipkey/testdata/c4c @@ -0,0 +1 @@ +{"Key":"","G0":["a k0 a\n","a k0 a\n","a k0 a\n","a k0 a\n"],"G1":["b k0 b\n","b k0 b\n","b k0 b\n","b k0 b\n"]} diff --git a/skate/zipkey/testdata/c5a b/skate/zipkey/testdata/c5a new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/skate/zipkey/testdata/c5a diff --git a/skate/zipkey/testdata/c5b b/skate/zipkey/testdata/c5b new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/skate/zipkey/testdata/c5b diff --git a/skate/zipkey/testdata/c5c b/skate/zipkey/testdata/c5c new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/skate/zipkey/testdata/c5c diff --git a/skate/zipkey/zipkey.go b/skate/zipkey/zipkey.go new file mode 100644 index 0000000..a03bb28 --- /dev/null +++ b/skate/zipkey/zipkey.go @@ -0,0 +1,138 @@ +package zipkey + +import ( + "bufio" + "io" +) + +// Group groups items by key and will contain the complete records (e.g. line) +// for further processing. +type Group struct { + Key string + G0 []string + G1 []string +} + +type ( + keyFunc func(string) (string, error) + groupFunc func(*Group) error +) + +// ZipRun reads records (separated by sep) from two readers, extracts a key +// from each record with a keyFunc and collects records from the two streams +// into a Group. A callback can be registered, which allows to customize the +// processing of the group. +type ZipRun struct { + r0, r1 *bufio.Reader + kf keyFunc + gf groupFunc + sep byte +} + +// New create a new ready to run ZipRun value. +func New(r0, r1 io.Reader, kf keyFunc, gf groupFunc) *ZipRun { + return &ZipRun{ + r0: bufio.NewReader(r0), + r1: bufio.NewReader(r1), + kf: kf, + gf: gf, + sep: '\n', + } +} + +// Run starts reading from both readers. The process stops, if one reader is +// exhausted or reads from any reader fail. +func (c *ZipRun) Run() error { + var ( + k0, k1, c0, c1 string // key: k0, k1; current line: c0, c1 + done bool + err error + lineKey = func(r *bufio.Reader) (line, key string, err error) { + if line, err = r.ReadString(c.sep); err != nil { + return + } + key, err = c.kf(line) + return + } + ) + for { + if done { + break + } + switch { + case k0 == "" || k0 < k1: + for k0 == "" || k0 < k1 { + c0, k0, err = lineKey(c.r0) + if err == io.EOF { + return nil + } + if err != nil { + return err + } + } + case k1 == "" || k0 > k1: + for k1 == "" || k0 > k1 { + c1, k1, err = lineKey(c.r1) + if err == io.EOF { + return nil + } + if err != nil { + return err + } + } + case k0 == k1: + g := &Group{ + G0: []string{c0}, + G1: []string{c1}, + } + for { + c0, err = c.r0.ReadString(c.sep) + if err == io.EOF { + done = true + break + } + if err != nil { + return err + } + k, err := c.kf(c0) + if err != nil { + return err + } + if k == k0 { + g.G0 = append(g.G0, c0) + k0 = k + } else { + k0 = k + break + } + } + for { + c1, err = c.r1.ReadString(c.sep) + if err == io.EOF { + done = true + break + } + if err != nil { + return err + } + k, err := c.kf(c1) + if err != nil { + return err + } + if k == k1 { + g.G1 = append(g.G1, c1) + k1 = k + } else { + k1 = k + break + } + } + if c.gf != nil { + if err := c.gf(g); err != nil { + return err + } + } + } + } + return nil +} diff --git a/skate/zipkey/zipkey_test.go b/skate/zipkey/zipkey_test.go new file mode 100644 index 0000000..0a49a45 --- /dev/null +++ b/skate/zipkey/zipkey_test.go @@ -0,0 +1,83 @@ +package zipkey + +import ( + "bytes" + "encoding/json" + "strings" + "testing" + + "git.archive.org/martin/cgraph/skate/must" +) + +func TestZipRun(t *testing.T) { + makeKeyFunc := func(index int) func(string) (string, error) { + return func(s string) (string, error) { + parts := strings.Fields(s) + if index >= len(parts) { + return "", nil + } + return parts[index], nil + } + } + var cases = []struct { + a string + b string + c string + kf func(string) (string, error) + }{ + { + "testdata/c0a", + "testdata/c0b", + "testdata/c0c", + makeKeyFunc(0), + }, + { + "testdata/c1a", + "testdata/c1b", + "testdata/c1c", + makeKeyFunc(0), + }, + { + "testdata/c2a", + "testdata/c2b", + "testdata/c2c", + makeKeyFunc(0), + }, + { + "testdata/c3a", + "testdata/c3b", + "testdata/c3c", + makeKeyFunc(0), + }, + { + "testdata/c4a", + "testdata/c4b", + "testdata/c4c", + makeKeyFunc(1), + }, + { + "testdata/c5a", + "testdata/c5b", + "testdata/c5c", + makeKeyFunc(0), + }, + } + for _, c := range cases { + var ( + ar = must.Open(c.a) + br = must.Open(c.b) + cr = strings.TrimSpace(string(must.ReadFile(c.c))) + buf bytes.Buffer + groupFunc = func(g *Group) error { + return json.NewEncoder(&buf).Encode(g) + } + cm = New(ar, br, c.kf, groupFunc) + ) + if err := cm.Run(); err != nil { + t.Errorf("[%s] failed: %v", c.a, err) + } + if got := strings.TrimSpace(buf.String()); cr != got { + t.Errorf("[%s ...] got %v, want %v", c.a, got, cr) + } + } +} |