aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2021-05-11 00:00:46 +0200
committerMartin Czygan <martin.czygan@gmail.com>2021-05-11 00:00:46 +0200
commit581a044fcc49144afe04e11d1d4bb662ad595f6b (patch)
tree37020c5d4372ea212f4d5ec8dc39a4d399bd08cc
parent4fda7f72a87a863641138126c0b0e117a9b93d7b (diff)
downloadrefcat-581a044fcc49144afe04e11d1d4bb662ad595f6b.tar.gz
refcat-581a044fcc49144afe04e11d1d4bb662ad595f6b.zip
skip-on-empty: switch to one indexed column
-rw-r--r--python/refcat/tasks.py4
-rw-r--r--skate/cmd/skate-map/main.go6
-rw-r--r--skate/map.go1
3 files changed, 6 insertions, 5 deletions
diff --git a/python/refcat/tasks.py b/python/refcat/tasks.py
index fbe7b1a..77690c5 100644
--- a/python/refcat/tasks.py
+++ b/python/refcat/tasks.py
@@ -305,8 +305,8 @@ class RefsDOI(Refcat):
def run(self):
output = shellout("""
zstdcat -T0 {input} |
- skate-map -m ff -x biblio.doi |
- LC_ALL=C sort -T {tmpdir} -k2,2 -S25% --parallel 4 |
+ skate-map -m ff -x biblio.doi -skip-on-empty 1 |
+ LC_ALL=C sort -T {tmpdir} -k1,1 -S25% --parallel 4 |
zstd -T0 -c > {output}
""",
n=self.n,
diff --git a/skate/cmd/skate-map/main.go b/skate/cmd/skate-map/main.go
index 4b30927..3682e7b 100644
--- a/skate/cmd/skate-map/main.go
+++ b/skate/cmd/skate-map/main.go
@@ -54,7 +54,7 @@ var (
extraValue = flag.String("x", "", "extra value to pass to configurable mappers")
bestEffort = flag.Bool("B", false, "best effort")
logFile = flag.String("log", "", "log filename")
- skipOnEmpty = flag.Int("skip-on-empty", -1, "omit docs with empty value in given column (zero indexed)")
+ skipOnEmpty = flag.Int("skip-on-empty", 0, "omit docs with empty value in given column (one indexed)")
help = `skate-map available mappers
@@ -87,8 +87,8 @@ func main() {
if mapf, ok := availableMappers[*mapperName]; !ok {
log.Fatalf("unknown mapper name: %v", *mapperName)
} else {
- if *skipOnEmpty >= 0 {
- mapf = skate.WithSkipOnEmpty(mapf, *skipOnEmpty)
+ if *skipOnEmpty > 0 {
+ mapf = skate.WithSkipOnEmpty(mapf, *skipOnEmpty-1)
}
if *keyPrefix != "" {
mapf = skate.WithPrefix(mapf, *keyPrefix)
diff --git a/skate/map.go b/skate/map.go
index cf2933e..4a79c1c 100644
--- a/skate/map.go
+++ b/skate/map.go
@@ -90,6 +90,7 @@ func WithBestEffort(f Mapper) Mapper {
}
// WithSkipOnEmpty ignores results where the value at a given field is empty.
+// One indexed.
func WithSkipOnEmpty(f Mapper, index int) Mapper {
return func(p []byte) ([][]byte, error) {
fields, err := f(p)