aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--python/refcat/tasks.py22
1 files changed, 22 insertions, 0 deletions
diff --git a/python/refcat/tasks.py b/python/refcat/tasks.py
index 17caf96..4193867 100644
--- a/python/refcat/tasks.py
+++ b/python/refcat/tasks.py
@@ -864,6 +864,28 @@ class UnmatchedMapped(Refcat):
return luigi.LocalTarget(path=self.path(ext="tsv.zst"), format=Zstd)
+class UnmatchedResolveJournalNames(Refcat):
+ """
+ Try to resolve journal names so we can match against both abbreviations and
+ full names.
+ """
+ def requires(self):
+ return UnmatchedMapped()
+
+ def run(self):
+ output = shellout("""
+ zstdcat -T0 {input} |
+ skate-resolve-journal-name -f 2 -B -A {abbrev}
+ zstd -T0 -c > {output}
+ """,
+ abbrev=settings.JOURNAL_ABBREVIATIONS,
+ input=self.input().path)
+ luigi.LocalTarget(output).move(self.output().path)
+
+ def output(self):
+ return luigi.LocalTarget(path=self.path(ext="tsv.zst"), format=Zstd)
+
+
class UnmatchedOpenLibraryMatchTable(Refcat):
"""
Run matching and write tabular results to file. About 50M rows.