From 902cac30f940fe5ea909d7fea5713d08d307d35c Mon Sep 17 00:00:00 2001 From: Martin Czygan Date: Wed, 4 Aug 2021 03:07:29 +0200 Subject: tasks: add BrefWithDOI --- python/refcat/tasks.py | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/python/refcat/tasks.py b/python/refcat/tasks.py index f4aacb1..8dcc676 100644 --- a/python/refcat/tasks.py +++ b/python/refcat/tasks.py @@ -398,7 +398,7 @@ class ReleaseIdentDOIList(Refcat): class ReleaseIdentDOIMapping(Refcat): """ - Create a mapping database from release ident to DOI. + Create a mapping database from release ident to DOI. 21min. """ def requires(self): return ReleaseIdentDOIList() @@ -411,6 +411,32 @@ class ReleaseIdentDOIMapping(Refcat): return luigi.LocalTarget(path=self.path(ext="db")) +class BrefWithDOI(Refcat): + """ + Take the paper matches and add source and target DOI. + """ + def requires(self): + return { + "bref": Bref(), + "mapping": ReleaseIdentDOIMapping(), + } + + def run(self): + output = shellout(""" + zstdcat {bref} | + tabby -A -db {mapping} + -m source_release_ident:source_doi + -m target_release_ident:target_doi + | zstd -c -T0 > {output} + """, + bref=self.input().get("bref"), + mapping=self.input().get("mapping")) + luigi.LocalTarget(output).move(self.output().path) + + def output(self): + return luigi.LocalTarget(path=self.path(ext="json.zst"), format=Zstd) + + class UnmatchedRefs(Refcat): """ File with not yet considered refs (e.g. no title, doi, ...); around -- cgit v1.2.3