aboutsummaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2021-08-04 01:02:23 +0200
committerMartin Czygan <martin.czygan@gmail.com>2021-08-04 01:02:23 +0200
commit7c8d01e9e9afd5845c4334111d064e8b246a0d8f (patch)
tree9d3881f915ab4927c1c0407df11b628f5d894f77 /python
parent65668624e1a792d479bfdbc35cd465348c224684 (diff)
downloadrefcat-7c8d01e9e9afd5845c4334111d064e8b246a0d8f.tar.gz
refcat-7c8d01e9e9afd5845c4334111d064e8b246a0d8f.zip
tasks: select may be slow
Diffstat (limited to 'python')
-rw-r--r--python/refcat/tasks.py3
1 files changed, 2 insertions, 1 deletions
diff --git a/python/refcat/tasks.py b/python/refcat/tasks.py
index 5e53f98..ef5138d 100644
--- a/python/refcat/tasks.py
+++ b/python/refcat/tasks.py
@@ -375,6 +375,7 @@ class ReleaseExportReduced(Refcat):
def output(self):
return luigi.LocalTarget(path=self.path(ext="json.zst"), format=Zstd)
+
class ReleaseIdentDOIList(Refcat):
"""
Create TSV (ident, doi).
@@ -386,7 +387,7 @@ class ReleaseIdentDOIList(Refcat):
output = shellout("""
zstdcat -T0 {input} |
parallel --block 10M -j 20 --pipe
- "jq -rc 'select(.ext_ids.doi != null) | [.ident, .ext_ids.doi] | @tsv'" > {output}
+ "jq -rc '[.ident, .ext_ids.doi] | @tsv' | LC_ALL=C grep -F '.'" > {output}
""",
input=self.input().path)
luigi.LocalTarget(output).move(self.output().path)