From 23910bdad2983a4b79aa2b5a1d36d3d0822cb06d Mon Sep 17 00:00:00 2001 From: Martin Czygan Date: Mon, 14 Jun 2021 22:42:47 +0200 Subject: add cut batch helper --- skate/zippy.go | 25 ++++++++++++++++++------- skate/zippy_test.go | 31 +++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 7 deletions(-) diff --git a/skate/zippy.go b/skate/zippy.go index 70d42fc..b4f2804 100644 --- a/skate/zippy.go +++ b/skate/zippy.go @@ -328,19 +328,22 @@ func ZippyBrefAugment(bref, raw io.Reader, w io.Writer) error { // First, iterate over all matches and sort out duplicates, e.g. // docs that have the same source and target id. - matched, err := uniqueMatches(g.G0) + matched, err := uniqueMatches(CutBatch(g.G0, 2)) if err != nil { return err } var refs = make([]*Ref, len(g.G1)) for i := 0; i < len(refs); i++ { - var ref Ref - if err := json.Unmarshal([]byte(g.G1[i]), &ref); err != nil { + var ( + data []byte = []byte(Cut(g.G1[i], 2)) + ref Ref + ) + if err := json.Unmarshal(data, &ref); err != nil { return err } refs[i] = &ref } - matchedRefsExtend(matched, refs) // XXX: Still duplicates (?) + matchedRefsExtend(matched, refs) for _, bref := range matched { if err := enc.Encode(bref); err != nil { return err @@ -419,13 +422,21 @@ func uniqueMatches(docs []string) (result []*BiblioRef, err error) { return result, nil } -// Cut returns a specific column (1-indexed, like CutSep) from a tabular -// file, returns empty string if column is invalid. +// CutBatch runs Cut over a list of lines. +func CutBatch(lines []string, column int) (result []string) { + for _, line := range lines { + result = append(result, Cut(line, column)) + } + return result +} + +// Cut returns a specific column (1-indexed) from a line, returns empty string +// if column is invalid. func Cut(line string, column int) string { return CutSep(line, "\t", column) } -// CutSep allows to specify a separator. +// CutSep allows to specify a separator, column is 1-indexed. func CutSep(line, sep string, column int) string { parts := strings.Split(strings.TrimSpace(line), sep) if len(parts) < column { diff --git a/skate/zippy_test.go b/skate/zippy_test.go index ed982b7..d0529c2 100644 --- a/skate/zippy_test.go +++ b/skate/zippy_test.go @@ -29,6 +29,37 @@ func TestLineColumn(t *testing.T) { } } +func TestCutBatch(t *testing.T) { + var cases = []struct { + lines []string + column int + result []string + }{ + { + []string{}, + 1, + nil, + }, + { + []string{}, + 9, + nil, + }, + { + []string{"1\t2\n", "3\t4\n"}, + 2, + []string{"2", "4"}, + }, + } + for _, c := range cases { + result := CutBatch(c.lines, c.column) + if !reflect.DeepEqual(result, c.result) { + t.Fatalf("got %v (%d), want %v (%d)", + result, len(result), c.result, len(c.result)) + } + } +} + func TestUniqueMatches(t *testing.T) { var cases = []struct { about string -- cgit v1.2.3