diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2021-08-04 01:02:23 +0200 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2021-08-04 01:02:23 +0200 |
commit | 7c8d01e9e9afd5845c4334111d064e8b246a0d8f (patch) | |
tree | 9d3881f915ab4927c1c0407df11b628f5d894f77 /python | |
parent | 65668624e1a792d479bfdbc35cd465348c224684 (diff) | |
download | refcat-7c8d01e9e9afd5845c4334111d064e8b246a0d8f.tar.gz refcat-7c8d01e9e9afd5845c4334111d064e8b246a0d8f.zip |
tasks: select may be slow
Diffstat (limited to 'python')
-rw-r--r-- | python/refcat/tasks.py | 3 |
1 files changed, 2 insertions, 1 deletions
diff --git a/python/refcat/tasks.py b/python/refcat/tasks.py index 5e53f98..ef5138d 100644 --- a/python/refcat/tasks.py +++ b/python/refcat/tasks.py @@ -375,6 +375,7 @@ class ReleaseExportReduced(Refcat): def output(self): return luigi.LocalTarget(path=self.path(ext="json.zst"), format=Zstd) + class ReleaseIdentDOIList(Refcat): """ Create TSV (ident, doi). @@ -386,7 +387,7 @@ class ReleaseIdentDOIList(Refcat): output = shellout(""" zstdcat -T0 {input} | parallel --block 10M -j 20 --pipe - "jq -rc 'select(.ext_ids.doi != null) | [.ident, .ext_ids.doi] | @tsv'" > {output} + "jq -rc '[.ident, .ext_ids.doi] | @tsv' | LC_ALL=C grep -F '.'" > {output} """, input=self.input().path) luigi.LocalTarget(output).move(self.output().path) |