aboutsummaryrefslogtreecommitdiffstats
path: root/python/refcat/report.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/refcat/report.py')
-rw-r--r--python/refcat/report.py59
1 files changed, 59 insertions, 0 deletions
diff --git a/python/refcat/report.py b/python/refcat/report.py
new file mode 100644
index 0000000..8060aa3
--- /dev/null
+++ b/python/refcat/report.py
@@ -0,0 +1,59 @@
+"""
+Tasks for techreport.
+"""
+import luigi
+
+from refcat.base import Zstd, shellout
+from refcat.tasks import BrefWithDOI, OpenCitations, Refcat
+
+
+class COCIDOIOnly(Refcat):
+ """
+ Extract DOI-DOI pair, order dois lexicographically.
+ """
+ def requires(self):
+ return OpenCitations()
+
+ def run(self):
+ """ https://unix.stackexchange.com/a/37470/376 """
+ output = shellout(r"""
+ zstdcat -T0 {input} |
+ tail -n +2 |
+ cut -d , -f2,3 |
+ perl -F, -lane 'printf qq[%s\n], join ",", sort @F' |
+ LC_ALL=C sort -T {tmpdir} -S25% |
+ zstd -c -T0 > {output}
+ """,
+ input=self.input().path,
+ tmpdir=self.tmpdir)
+ luigi.LocalTarget(output).move(self.output().path)
+
+ def output(self):
+ return luigi.LocalTarget(path=self.path(ext="tsv.zst"), format=Zstd)
+
+
+class BrefDOIOnly(Refcat):
+ """
+ Bref, reduced to doi, so we can compare with others.
+ """
+ def requires(self):
+ return BrefWithDOI()
+
+ def run(self):
+ output = shellout(r"""
+ zstdcat -T0 {input} |
+ parallel --pipe -j 24 --block 10M "jq -R -rc 'fromjson? | [.source_doi, .target_doi] | @tsv'" |
+ tr $'\t' ',' |
+ perl -F, -lane 'printf qq[%s\n], join ",", sort @F' |
+ LC_ALL=C sort -T {tmpdir} -S25% |
+ zstd -c -T0 > {output}
+ """,
+ input=self.input().path,
+ tmpdir=self.tmpdir)
+ luigi.LocalTarget(output).move(self.output().path)
+
+ def output(self):
+ return luigi.LocalTarget(path=self.path(ext="tsv.zst"), format=Zstd)
+
+
+# TODO: Mag DOI