From 7c8d01e9e9afd5845c4334111d064e8b246a0d8f Mon Sep 17 00:00:00 2001 From: Martin Czygan Date: Wed, 4 Aug 2021 01:02:23 +0200 Subject: tasks: select may be slow --- python/refcat/tasks.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'python') diff --git a/python/refcat/tasks.py b/python/refcat/tasks.py index 5e53f98..ef5138d 100644 --- a/python/refcat/tasks.py +++ b/python/refcat/tasks.py @@ -375,6 +375,7 @@ class ReleaseExportReduced(Refcat): def output(self): return luigi.LocalTarget(path=self.path(ext="json.zst"), format=Zstd) + class ReleaseIdentDOIList(Refcat): """ Create TSV (ident, doi). @@ -386,7 +387,7 @@ class ReleaseIdentDOIList(Refcat): output = shellout(""" zstdcat -T0 {input} | parallel --block 10M -j 20 --pipe - "jq -rc 'select(.ext_ids.doi != null) | [.ident, .ext_ids.doi] | @tsv'" > {output} + "jq -rc '[.ident, .ext_ids.doi] | @tsv' | LC_ALL=C grep -F '.'" > {output} """, input=self.input().path) luigi.LocalTarget(output).move(self.output().path) -- cgit v1.2.3