diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2021-07-13 19:52:02 +0200 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2021-07-13 19:52:02 +0200 |
commit | 5eb527be3f34dbadae698f3ece164e34c031cb91 (patch) | |
tree | 68a848af0e684819223445ddc7aa2fbe1f98ea98 | |
parent | eb370abba6e8928e88d536880acac9b9fa40835b (diff) | |
download | refcat-5eb527be3f34dbadae698f3ece164e34c031cb91.tar.gz refcat-5eb527be3f34dbadae698f3ece164e34c031cb91.zip |
reduce: temp bug fix for line cutter
we wanted to trim whitespace at one point, because values contained the
separator values; however this breaks with empty values; move back to
not trimming values except for the newline, when requesting the last
value; moving forward, we need to clean or reject dirty values or use a
different delimiter
-rw-r--r-- | skate/reduce.go | 6 | ||||
-rw-r--r-- | skate/reduce_test.go | 87 |
2 files changed, 61 insertions, 32 deletions
diff --git a/skate/reduce.go b/skate/reduce.go index 9986152..eac2e83 100644 --- a/skate/reduce.go +++ b/skate/reduce.go @@ -598,10 +598,14 @@ func Cut(line string, column int) string { // CutSep allows to specify a separator, column is 1-indexed. func CutSep(line, sep string, column int) string { - parts := strings.Split(strings.TrimSpace(line), sep) + // XXX: This will cut the tab separator, if there is no other value. + parts := strings.Split(line, sep) if len(parts) < column { return "" } else { + if len(parts) == column { + return strings.TrimSuffix(parts[column-1], "\n") + } return parts[column-1] } } diff --git a/skate/reduce_test.go b/skate/reduce_test.go index 4db0687..99c0ed7 100644 --- a/skate/reduce_test.go +++ b/skate/reduce_test.go @@ -34,37 +34,6 @@ func TestLineColumn(t *testing.T) { } } -func TestCutBatch(t *testing.T) { - var cases = []struct { - lines []string - column int - result []string - }{ - { - []string{}, - 1, - nil, - }, - { - []string{}, - 9, - nil, - }, - { - []string{"1\t2\n", "3\t4\n"}, - 2, - []string{"2", "4"}, - }, - } - for _, c := range cases { - result := CutBatch(c.lines, c.column) - if !reflect.DeepEqual(result, c.result) { - t.Fatalf("got %v (%d), want %v (%d)", - result, len(result), c.result, len(c.result)) - } - } -} - func TestUniqueMatches(t *testing.T) { var cases = []struct { about string @@ -453,3 +422,59 @@ func tempWriteFile(buf *bytes.Buffer) (string, error) { } return f.Name(), nil } + +func TestCutBatch(t *testing.T) { + var cases = []struct { + lines []string + column int + result []string + }{ + { + []string{}, + 1, + nil, + }, + { + []string{}, + 9, + nil, + }, + { + []string{"1\t2\n", "3\t4\n"}, + 2, + []string{"2", "4"}, + }, + } + for _, c := range cases { + result := CutBatch(c.lines, c.column) + if !reflect.DeepEqual(result, c.result) { + t.Fatalf("got %v (%d), want %v (%d)", + result, len(result), c.result, len(c.result)) + } + } +} + +func TestCutSep(t *testing.T) { + var cases = []struct { + line string + sep string + column int + result string + }{ + {"", "\t", 1, ""}, + {"", "\t", 2, ""}, + {"a\tb", "\t", 1, "a"}, + {"a\tb", "\t", 2, "b"}, + {"a\tb", "\t", 3, ""}, + {"a\t\tb", "\t", 1, "a"}, + {"a\t\tb", "\t", 2, ""}, + {"a\t\tb", "\t", 3, "b"}, + {"\tb", "\t", 1, ""}, + } + for _, c := range cases { + result := CutSep(c.line, c.sep, c.column) + if !reflect.DeepEqual(result, c.result) { + t.Fatalf("got %v, want %v", result, c.result) + } + } +} |