aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2021-08-04 03:07:29 +0200
committerMartin Czygan <martin.czygan@gmail.com>2021-08-04 03:07:29 +0200
commit902cac30f940fe5ea909d7fea5713d08d307d35c (patch)
tree4f72ecb96fbc0b67fe76cdeb4a8dccb181a8d07d
parentf745a1e3c26690edf26e4b65d1229382beed42d5 (diff)
downloadrefcat-902cac30f940fe5ea909d7fea5713d08d307d35c.tar.gz
refcat-902cac30f940fe5ea909d7fea5713d08d307d35c.zip
tasks: add BrefWithDOI
-rw-r--r--python/refcat/tasks.py28
1 files changed, 27 insertions, 1 deletions
diff --git a/python/refcat/tasks.py b/python/refcat/tasks.py
index f4aacb1..8dcc676 100644
--- a/python/refcat/tasks.py
+++ b/python/refcat/tasks.py
@@ -398,7 +398,7 @@ class ReleaseIdentDOIList(Refcat):
class ReleaseIdentDOIMapping(Refcat):
"""
- Create a mapping database from release ident to DOI.
+ Create a mapping database from release ident to DOI. 21min.
"""
def requires(self):
return ReleaseIdentDOIList()
@@ -411,6 +411,32 @@ class ReleaseIdentDOIMapping(Refcat):
return luigi.LocalTarget(path=self.path(ext="db"))
+class BrefWithDOI(Refcat):
+ """
+ Take the paper matches and add source and target DOI.
+ """
+ def requires(self):
+ return {
+ "bref": Bref(),
+ "mapping": ReleaseIdentDOIMapping(),
+ }
+
+ def run(self):
+ output = shellout("""
+ zstdcat {bref} |
+ tabby -A -db {mapping}
+ -m source_release_ident:source_doi
+ -m target_release_ident:target_doi
+ | zstd -c -T0 > {output}
+ """,
+ bref=self.input().get("bref"),
+ mapping=self.input().get("mapping"))
+ luigi.LocalTarget(output).move(self.output().path)
+
+ def output(self):
+ return luigi.LocalTarget(path=self.path(ext="json.zst"), format=Zstd)
+
+
class UnmatchedRefs(Refcat):
"""
File with not yet considered refs (e.g. no title, doi, ...); around