From 581a044fcc49144afe04e11d1d4bb662ad595f6b Mon Sep 17 00:00:00 2001 From: Martin Czygan Date: Tue, 11 May 2021 00:00:46 +0200 Subject: skip-on-empty: switch to one indexed column --- python/refcat/tasks.py | 4 ++-- skate/cmd/skate-map/main.go | 6 +++--- skate/map.go | 1 + 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/python/refcat/tasks.py b/python/refcat/tasks.py index fbe7b1a..77690c5 100644 --- a/python/refcat/tasks.py +++ b/python/refcat/tasks.py @@ -305,8 +305,8 @@ class RefsDOI(Refcat): def run(self): output = shellout(""" zstdcat -T0 {input} | - skate-map -m ff -x biblio.doi | - LC_ALL=C sort -T {tmpdir} -k2,2 -S25% --parallel 4 | + skate-map -m ff -x biblio.doi -skip-on-empty 1 | + LC_ALL=C sort -T {tmpdir} -k1,1 -S25% --parallel 4 | zstd -T0 -c > {output} """, n=self.n, diff --git a/skate/cmd/skate-map/main.go b/skate/cmd/skate-map/main.go index 4b30927..3682e7b 100644 --- a/skate/cmd/skate-map/main.go +++ b/skate/cmd/skate-map/main.go @@ -54,7 +54,7 @@ var ( extraValue = flag.String("x", "", "extra value to pass to configurable mappers") bestEffort = flag.Bool("B", false, "best effort") logFile = flag.String("log", "", "log filename") - skipOnEmpty = flag.Int("skip-on-empty", -1, "omit docs with empty value in given column (zero indexed)") + skipOnEmpty = flag.Int("skip-on-empty", 0, "omit docs with empty value in given column (one indexed)") help = `skate-map available mappers @@ -87,8 +87,8 @@ func main() { if mapf, ok := availableMappers[*mapperName]; !ok { log.Fatalf("unknown mapper name: %v", *mapperName) } else { - if *skipOnEmpty >= 0 { - mapf = skate.WithSkipOnEmpty(mapf, *skipOnEmpty) + if *skipOnEmpty > 0 { + mapf = skate.WithSkipOnEmpty(mapf, *skipOnEmpty-1) } if *keyPrefix != "" { mapf = skate.WithPrefix(mapf, *keyPrefix) diff --git a/skate/map.go b/skate/map.go index cf2933e..4a79c1c 100644 --- a/skate/map.go +++ b/skate/map.go @@ -90,6 +90,7 @@ func WithBestEffort(f Mapper) Mapper { } // WithSkipOnEmpty ignores results where the value at a given field is empty. +// One indexed. func WithSkipOnEmpty(f Mapper, index int) Mapper { return func(p []byte) ([][]byte, error) { fields, err := f(p) -- cgit v1.2.3