aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2021-04-30 03:29:27 +0200
committerMartin Czygan <martin.czygan@gmail.com>2021-04-30 03:29:33 +0200
commit0c7485486eaa62e8b7673949e09d546b78649ab8 (patch)
tree32b246b9789a8da7d1499dc8dde1001198f2caca
parent5e7543c3f4612dacd7f1d4e04c75cd48ecb67b0c (diff)
downloadrefcat-0c7485486eaa62e8b7673949e09d546b78649ab8.tar.gz
refcat-0c7485486eaa62e8b7673949e09d546b78649ab8.zip
fix newline handling
-rw-r--r--skate/map.go21
-rw-r--r--skate/map_test.go40
2 files changed, 56 insertions, 5 deletions
diff --git a/skate/map.go b/skate/map.go
index 9d3c98d..094d3e2 100644
--- a/skate/map.go
+++ b/skate/map.go
@@ -40,14 +40,25 @@ type PartialDoc struct {
// doc). We want fields, but we do not want to bake in TSV into each function.
type Mapper func([]byte) ([][]byte, error)
-// AsTSV serializes the result of a field mapper as TSV. This is a slim adapter,
-// e.g. to parallel.Processor, which expects this function signature.
+// AsTSV serializes the result of a field mapper as TSV. This is a slim
+// adapter, e.g. to parallel.Processor, which expects this function signature.
+// If the last byte of the last field is not a newline, it will be appended.
func (f Mapper) AsTSV(p []byte) ([]byte, error) {
- fields, err := f(p)
- if err != nil {
+ var (
+ fields [][]byte
+ err error
+ )
+ if fields, err = f(p); err != nil {
return nil, err
}
- return bytes.Join(fields, bTab), nil
+ if len(fields) == 0 {
+ return nil, nil
+ }
+ b := bytes.Join(fields, bTab)
+ if len(b) > 0 && !bytes.HasSuffix(b, bNewline) {
+ b = append(b, bNewline...)
+ }
+ return b, nil
}
// WithPrefix adds a given prefix to the first element.
diff --git a/skate/map_test.go b/skate/map_test.go
index a439d33..a81cb3d 100644
--- a/skate/map_test.go
+++ b/skate/map_test.go
@@ -149,6 +149,46 @@ func TestMapperTitleSandcrawler(t *testing.T) {
}
}
+func TestAsTSV(t *testing.T) {
+ var cases = []struct {
+ f Mapper
+ err error
+ want string
+ }{
+ {
+ f: Mapper(func(_ []byte) ([][]byte, error) {
+ return [][]byte{
+ []byte("a"),
+ []byte("b"),
+ []byte("c"),
+ }, nil
+ }),
+ err: nil,
+ want: "a\tb\tc\n",
+ },
+ {
+ f: Mapper(func(_ []byte) ([][]byte, error) {
+ return [][]byte{
+ []byte("a"),
+ []byte("b"),
+ []byte("c\n"),
+ }, nil
+ }),
+ err: nil,
+ want: "a\tb\tc\n",
+ },
+ }
+ for _, c := range cases {
+ got, err := c.f.AsTSV([]byte{})
+ if err != c.err {
+ t.Fatalf("got %v, want nil", got)
+ }
+ if string(got) != c.want {
+ t.Fatalf("got %v, want %v", string(got), c.want)
+ }
+ }
+}
+
func prettySlice(p [][]byte) (result []string) {
result = make([]string, len(p))
for i, v := range p {