aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2021-09-08 12:37:58 +0200
committerMartin Czygan <martin.czygan@gmail.com>2021-09-08 12:37:58 +0200
commit6043cb86de54b4fceaabbf1a561b600d2f3d244f (patch)
treed316e7ba45555e8fe0da6dbe2a01929bc73c8b26
parent70d6297e064a06a4df0a6bb2ebc4b6029283efb8 (diff)
downloadrefcat-6043cb86de54b4fceaabbf1a561b600d2f3d244f.tar.gz
refcat-6043cb86de54b4fceaabbf1a561b600d2f3d244f.zip
tasks: add BrefDOIOnly
-rw-r--r--python/refcat/techreport.py22
1 files changed, 21 insertions, 1 deletions
diff --git a/python/refcat/techreport.py b/python/refcat/techreport.py
index e23e151..0b1faa5 100644
--- a/python/refcat/techreport.py
+++ b/python/refcat/techreport.py
@@ -2,7 +2,7 @@
Tasks for techreport.
"""
import luigi
-from refcat.tasks import Refcat, OpenCitations
+from refcat.tasks import Refcat, OpenCitations, BrefCombined
from refcat.base import shellout, Zstd
@@ -27,3 +27,23 @@ class COCIDOIOnly(Refcat):
def output(self):
return luigi.LocalTarget(path=self.path(ext="tsv.zst"), format=Zstd)
+
+
+class BrefDOIOnly(Refcat):
+ """
+ Bref, reduced to doi, so we can compare with others.
+ """
+ def requires(self):
+ return BrefWithDOI()
+
+ def run(self):
+ output = shellout("""
+ zstdcat -T0 {input} |
+ parallel --pipe -j 16 --block 10M "jq -rc '[.source_doi, .target_doi] | @csv'" |
+ perl -F, -lane 'printf qq[%s\n], join ",", sort @F' |
+ zstd -c -T0 > {output}
+ """,
+ input=self.input().path)
+
+ def output(self):
+ return luigi.LocalTarget(path=self.path(ext="tsv.zst"), format=Zstd)