diff options
Diffstat (limited to 'python')
| -rw-r--r-- | python/refcat/tasks.py | 23 | 
1 files changed, 21 insertions, 2 deletions
| diff --git a/python/refcat/tasks.py b/python/refcat/tasks.py index f131fbc..7c1fabc 100644 --- a/python/refcat/tasks.py +++ b/python/refcat/tasks.py @@ -863,18 +863,37 @@ class UnmatchedMapped(Refcat):          return luigi.LocalTarget(path=self.path(ext="tsv.zst"), format=Zstd) +class UnmatchedRefsToRelease(Refcat): +    """ +    Convert unmatched refs to releases. +    """ +    def requires(self): +        return UnmatchedRefs() + +    def run(self): +        output = shellout(""" +                          zstdcat -T0 {input} | +                          skate-conv -f ref | +                          zstd -T0 -c > {output} +                          """, +                          input=self.input().path) +        luigi.LocalTarget(output).move(self.output().path) + +    def output(self): +        return luigi.LocalTarget(path=self.path(ext="tsv.zst"), format=Zstd) + +  class UnmatchedResolveJournalNames(Refcat):      """      Try to resolve journal names so we can match against both abbreviations.      Keep only the resolved docs (for now).      """      def requires(self): -        return UnmatchedRefs() +        return UnmatchedRefsToRelease()      def run(self):          output = shellout("""                            zstdcat -T0 {input} | -                          skate-conv -f ref |                            skate-resolve-journal-name -R -f 1 -B -A {abbrev} |                            zstd -T0 -c > {output}                            """, | 
