aboutsummaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2021-09-08 12:23:39 +0200
committerMartin Czygan <martin.czygan@gmail.com>2021-09-08 12:23:39 +0200
commitfb6676de0fdc592bd067bd38a34fe4799f7dbb4f (patch)
treeb84898f8b140e187248283fe7152053fb4e4e90c /python
parent24caa791fd5f831dc8c5fccfa0709c68146533c7 (diff)
downloadrefcat-fb6676de0fdc592bd067bd38a34fe4799f7dbb4f.tar.gz
refcat-fb6676de0fdc592bd067bd38a34fe4799f7dbb4f.zip
tasks: coci doi only; COCIDOIOnly
Diffstat (limited to 'python')
-rw-r--r--python/refcat/techreport.py20
1 files changed, 18 insertions, 2 deletions
diff --git a/python/refcat/techreport.py b/python/refcat/techreport.py
index 904f9c9..064e357 100644
--- a/python/refcat/techreport.py
+++ b/python/refcat/techreport.py
@@ -4,11 +4,27 @@ Tasks for techreport.
"""
from refcat.tasks import Refcat, OpenCitations
+from refcat.base import shellout, Zstd
-class COCIStats(Refcat):
+
+class COCIDOIOnly(Refcat):
+ """
+ Extract DOI-DOI pair, order dois lexicographically.
+ """
def requires(self):
return OpenCitations()
def run(self):
- print(self.input().path)
+ """ https://unix.stackexchange.com/a/37470/376 """
+ output = shellout("""
+ zstdcat -T0 {input} |
+ tail -n +2 |
+ cut -d , -f2,3 |
+ perl -F, -lane 'printf qq[%s\n], join ",", sort @F' |
+ zstd -c -T0 > {output}
+ """, input=self.input().path)
+ luigi.LocalTarget(output).move(self.output().path)
+
+ def output(self):
+ return luigi.LocalTarget(path=self.path(ext="tsv.zst"), format=Zstd)