aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--python/refcat/techreport.py20
1 files changed, 18 insertions, 2 deletions
diff --git a/python/refcat/techreport.py b/python/refcat/techreport.py
index 904f9c9..064e357 100644
--- a/python/refcat/techreport.py
+++ b/python/refcat/techreport.py
@@ -4,11 +4,27 @@ Tasks for techreport.
"""
from refcat.tasks import Refcat, OpenCitations
+from refcat.base import shellout, Zstd
-class COCIStats(Refcat):
+
+class COCIDOIOnly(Refcat):
+ """
+ Extract DOI-DOI pair, order dois lexicographically.
+ """
def requires(self):
return OpenCitations()
def run(self):
- print(self.input().path)
+ """ https://unix.stackexchange.com/a/37470/376 """
+ output = shellout("""
+ zstdcat -T0 {input} |
+ tail -n +2 |
+ cut -d , -f2,3 |
+ perl -F, -lane 'printf qq[%s\n], join ",", sort @F' |
+ zstd -c -T0 > {output}
+ """, input=self.input().path)
+ luigi.LocalTarget(output).move(self.output().path)
+
+ def output(self):
+ return luigi.LocalTarget(path=self.path(ext="tsv.zst"), format=Zstd)