aboutsummaryrefslogtreecommitdiffstats
path: root/skate/zipkey
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2021-03-21 01:17:38 +0100
committerMartin Czygan <martin.czygan@gmail.com>2021-03-21 01:17:38 +0100
commit09a7e8c9d013f13a1aa1ef4e9b7f397647b79967 (patch)
tree122b474e27afbc66cba1182e983ef5c8555ed12f /skate/zipkey
parenta7e0cf191ebf8fb499e0ab9a3b6cae45727f1286 (diff)
downloadrefcat-09a7e8c9d013f13a1aa1ef4e9b7f397647b79967.tar.gz
refcat-09a7e8c9d013f13a1aa1ef4e9b7f397647b79967.zip
initial import of skate
Diffstat (limited to 'skate/zipkey')
-rw-r--r--skate/zipkey/testdata/c0a4
-rw-r--r--skate/zipkey/testdata/c0b2
-rw-r--r--skate/zipkey/testdata/c0c2
-rw-r--r--skate/zipkey/testdata/c1a13
-rw-r--r--skate/zipkey/testdata/c1b4
-rw-r--r--skate/zipkey/testdata/c1c2
-rw-r--r--skate/zipkey/testdata/c2a5
-rw-r--r--skate/zipkey/testdata/c2b5
-rw-r--r--skate/zipkey/testdata/c2c0
-rw-r--r--skate/zipkey/testdata/c3a6
-rw-r--r--skate/zipkey/testdata/c3b6
-rw-r--r--skate/zipkey/testdata/c3c1
-rw-r--r--skate/zipkey/testdata/c4a4
-rw-r--r--skate/zipkey/testdata/c4b4
-rw-r--r--skate/zipkey/testdata/c4c1
-rw-r--r--skate/zipkey/testdata/c5a0
-rw-r--r--skate/zipkey/testdata/c5b0
-rw-r--r--skate/zipkey/testdata/c5c0
-rw-r--r--skate/zipkey/zipkey.go138
-rw-r--r--skate/zipkey/zipkey_test.go83
20 files changed, 280 insertions, 0 deletions
diff --git a/skate/zipkey/testdata/c0a b/skate/zipkey/testdata/c0a
new file mode 100644
index 0000000..4685b39
--- /dev/null
+++ b/skate/zipkey/testdata/c0a
@@ -0,0 +1,4 @@
+k0 a
+k1 b
+k2 c
+k3 d
diff --git a/skate/zipkey/testdata/c0b b/skate/zipkey/testdata/c0b
new file mode 100644
index 0000000..293bb00
--- /dev/null
+++ b/skate/zipkey/testdata/c0b
@@ -0,0 +1,2 @@
+k1 B
+k3 D
diff --git a/skate/zipkey/testdata/c0c b/skate/zipkey/testdata/c0c
new file mode 100644
index 0000000..3a3ea49
--- /dev/null
+++ b/skate/zipkey/testdata/c0c
@@ -0,0 +1,2 @@
+{"Key":"","G0":["k1 b\n"],"G1":["k1 B\n"]}
+{"Key":"","G0":["k3 d\n"],"G1":["k3 D\n"]} \ No newline at end of file
diff --git a/skate/zipkey/testdata/c1a b/skate/zipkey/testdata/c1a
new file mode 100644
index 0000000..7741741
--- /dev/null
+++ b/skate/zipkey/testdata/c1a
@@ -0,0 +1,13 @@
+1 a
+1 a
+1 a
+1 a
+1 a
+1 a
+1 a
+1 a
+2 b
+2 b
+2 b
+2 b
+2 b
diff --git a/skate/zipkey/testdata/c1b b/skate/zipkey/testdata/c1b
new file mode 100644
index 0000000..ef55241
--- /dev/null
+++ b/skate/zipkey/testdata/c1b
@@ -0,0 +1,4 @@
+1 A
+2 B
+3 C
+4 D
diff --git a/skate/zipkey/testdata/c1c b/skate/zipkey/testdata/c1c
new file mode 100644
index 0000000..d108667
--- /dev/null
+++ b/skate/zipkey/testdata/c1c
@@ -0,0 +1,2 @@
+{"Key":"","G0":["1 a\n","1 a\n","1 a\n","1 a\n","1 a\n","1 a\n","1 a\n","1 a\n"],"G1":["1 A\n"]}
+{"Key":"","G0":["2 b\n","2 b\n","2 b\n","2 b\n","2 b\n"],"G1":["2 B\n"]} \ No newline at end of file
diff --git a/skate/zipkey/testdata/c2a b/skate/zipkey/testdata/c2a
new file mode 100644
index 0000000..8036bb1
--- /dev/null
+++ b/skate/zipkey/testdata/c2a
@@ -0,0 +1,5 @@
+a 1
+a 1
+b 2
+b 2
+
diff --git a/skate/zipkey/testdata/c2b b/skate/zipkey/testdata/c2b
new file mode 100644
index 0000000..665ef57
--- /dev/null
+++ b/skate/zipkey/testdata/c2b
@@ -0,0 +1,5 @@
+c 3
+c 3
+d 4
+d 4
+
diff --git a/skate/zipkey/testdata/c2c b/skate/zipkey/testdata/c2c
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/skate/zipkey/testdata/c2c
diff --git a/skate/zipkey/testdata/c3a b/skate/zipkey/testdata/c3a
new file mode 100644
index 0000000..b0ec63f
--- /dev/null
+++ b/skate/zipkey/testdata/c3a
@@ -0,0 +1,6 @@
+2 1
+2 2
+2 3
+2 4
+2 5
+
diff --git a/skate/zipkey/testdata/c3b b/skate/zipkey/testdata/c3b
new file mode 100644
index 0000000..779b0fb
--- /dev/null
+++ b/skate/zipkey/testdata/c3b
@@ -0,0 +1,6 @@
+2 a
+2 b
+2 c
+2 d
+2 e
+
diff --git a/skate/zipkey/testdata/c3c b/skate/zipkey/testdata/c3c
new file mode 100644
index 0000000..eaabdad
--- /dev/null
+++ b/skate/zipkey/testdata/c3c
@@ -0,0 +1 @@
+{"Key":"","G0":["2 1\n","2 2\n","2 3\n","2 4\n","2 5\n"],"G1":["2 a\n","2 b\n","2 c\n","2 d\n","2 e\n"]} \ No newline at end of file
diff --git a/skate/zipkey/testdata/c4a b/skate/zipkey/testdata/c4a
new file mode 100644
index 0000000..b61ee9d
--- /dev/null
+++ b/skate/zipkey/testdata/c4a
@@ -0,0 +1,4 @@
+a k0 a
+a k0 a
+a k0 a
+a k0 a
diff --git a/skate/zipkey/testdata/c4b b/skate/zipkey/testdata/c4b
new file mode 100644
index 0000000..29e0c16
--- /dev/null
+++ b/skate/zipkey/testdata/c4b
@@ -0,0 +1,4 @@
+b k0 b
+b k0 b
+b k0 b
+b k0 b
diff --git a/skate/zipkey/testdata/c4c b/skate/zipkey/testdata/c4c
new file mode 100644
index 0000000..4cf2f92
--- /dev/null
+++ b/skate/zipkey/testdata/c4c
@@ -0,0 +1 @@
+{"Key":"","G0":["a k0 a\n","a k0 a\n","a k0 a\n","a k0 a\n"],"G1":["b k0 b\n","b k0 b\n","b k0 b\n","b k0 b\n"]}
diff --git a/skate/zipkey/testdata/c5a b/skate/zipkey/testdata/c5a
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/skate/zipkey/testdata/c5a
diff --git a/skate/zipkey/testdata/c5b b/skate/zipkey/testdata/c5b
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/skate/zipkey/testdata/c5b
diff --git a/skate/zipkey/testdata/c5c b/skate/zipkey/testdata/c5c
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/skate/zipkey/testdata/c5c
diff --git a/skate/zipkey/zipkey.go b/skate/zipkey/zipkey.go
new file mode 100644
index 0000000..a03bb28
--- /dev/null
+++ b/skate/zipkey/zipkey.go
@@ -0,0 +1,138 @@
+package zipkey
+
+import (
+ "bufio"
+ "io"
+)
+
+// Group groups items by key and will contain the complete records (e.g. line)
+// for further processing.
+type Group struct {
+ Key string
+ G0 []string
+ G1 []string
+}
+
+type (
+ keyFunc func(string) (string, error)
+ groupFunc func(*Group) error
+)
+
+// ZipRun reads records (separated by sep) from two readers, extracts a key
+// from each record with a keyFunc and collects records from the two streams
+// into a Group. A callback can be registered, which allows to customize the
+// processing of the group.
+type ZipRun struct {
+ r0, r1 *bufio.Reader
+ kf keyFunc
+ gf groupFunc
+ sep byte
+}
+
+// New create a new ready to run ZipRun value.
+func New(r0, r1 io.Reader, kf keyFunc, gf groupFunc) *ZipRun {
+ return &ZipRun{
+ r0: bufio.NewReader(r0),
+ r1: bufio.NewReader(r1),
+ kf: kf,
+ gf: gf,
+ sep: '\n',
+ }
+}
+
+// Run starts reading from both readers. The process stops, if one reader is
+// exhausted or reads from any reader fail.
+func (c *ZipRun) Run() error {
+ var (
+ k0, k1, c0, c1 string // key: k0, k1; current line: c0, c1
+ done bool
+ err error
+ lineKey = func(r *bufio.Reader) (line, key string, err error) {
+ if line, err = r.ReadString(c.sep); err != nil {
+ return
+ }
+ key, err = c.kf(line)
+ return
+ }
+ )
+ for {
+ if done {
+ break
+ }
+ switch {
+ case k0 == "" || k0 < k1:
+ for k0 == "" || k0 < k1 {
+ c0, k0, err = lineKey(c.r0)
+ if err == io.EOF {
+ return nil
+ }
+ if err != nil {
+ return err
+ }
+ }
+ case k1 == "" || k0 > k1:
+ for k1 == "" || k0 > k1 {
+ c1, k1, err = lineKey(c.r1)
+ if err == io.EOF {
+ return nil
+ }
+ if err != nil {
+ return err
+ }
+ }
+ case k0 == k1:
+ g := &Group{
+ G0: []string{c0},
+ G1: []string{c1},
+ }
+ for {
+ c0, err = c.r0.ReadString(c.sep)
+ if err == io.EOF {
+ done = true
+ break
+ }
+ if err != nil {
+ return err
+ }
+ k, err := c.kf(c0)
+ if err != nil {
+ return err
+ }
+ if k == k0 {
+ g.G0 = append(g.G0, c0)
+ k0 = k
+ } else {
+ k0 = k
+ break
+ }
+ }
+ for {
+ c1, err = c.r1.ReadString(c.sep)
+ if err == io.EOF {
+ done = true
+ break
+ }
+ if err != nil {
+ return err
+ }
+ k, err := c.kf(c1)
+ if err != nil {
+ return err
+ }
+ if k == k1 {
+ g.G1 = append(g.G1, c1)
+ k1 = k
+ } else {
+ k1 = k
+ break
+ }
+ }
+ if c.gf != nil {
+ if err := c.gf(g); err != nil {
+ return err
+ }
+ }
+ }
+ }
+ return nil
+}
diff --git a/skate/zipkey/zipkey_test.go b/skate/zipkey/zipkey_test.go
new file mode 100644
index 0000000..0a49a45
--- /dev/null
+++ b/skate/zipkey/zipkey_test.go
@@ -0,0 +1,83 @@
+package zipkey
+
+import (
+ "bytes"
+ "encoding/json"
+ "strings"
+ "testing"
+
+ "git.archive.org/martin/cgraph/skate/must"
+)
+
+func TestZipRun(t *testing.T) {
+ makeKeyFunc := func(index int) func(string) (string, error) {
+ return func(s string) (string, error) {
+ parts := strings.Fields(s)
+ if index >= len(parts) {
+ return "", nil
+ }
+ return parts[index], nil
+ }
+ }
+ var cases = []struct {
+ a string
+ b string
+ c string
+ kf func(string) (string, error)
+ }{
+ {
+ "testdata/c0a",
+ "testdata/c0b",
+ "testdata/c0c",
+ makeKeyFunc(0),
+ },
+ {
+ "testdata/c1a",
+ "testdata/c1b",
+ "testdata/c1c",
+ makeKeyFunc(0),
+ },
+ {
+ "testdata/c2a",
+ "testdata/c2b",
+ "testdata/c2c",
+ makeKeyFunc(0),
+ },
+ {
+ "testdata/c3a",
+ "testdata/c3b",
+ "testdata/c3c",
+ makeKeyFunc(0),
+ },
+ {
+ "testdata/c4a",
+ "testdata/c4b",
+ "testdata/c4c",
+ makeKeyFunc(1),
+ },
+ {
+ "testdata/c5a",
+ "testdata/c5b",
+ "testdata/c5c",
+ makeKeyFunc(0),
+ },
+ }
+ for _, c := range cases {
+ var (
+ ar = must.Open(c.a)
+ br = must.Open(c.b)
+ cr = strings.TrimSpace(string(must.ReadFile(c.c)))
+ buf bytes.Buffer
+ groupFunc = func(g *Group) error {
+ return json.NewEncoder(&buf).Encode(g)
+ }
+ cm = New(ar, br, c.kf, groupFunc)
+ )
+ if err := cm.Run(); err != nil {
+ t.Errorf("[%s] failed: %v", c.a, err)
+ }
+ if got := strings.TrimSpace(buf.String()); cr != got {
+ t.Errorf("[%s ...] got %v, want %v", c.a, got, cr)
+ }
+ }
+}