aboutsummaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2021-06-01 16:16:59 +0200
committerMartin Czygan <martin.czygan@gmail.com>2021-06-01 16:16:59 +0200
commit61105115ce24706a0e0c16c0f29b0f49e07ee2f7 (patch)
treec837952ebcc79834a53860835b2c01dea4171d97 /python
parent6833ddc7ac15a17961264ccb8df433e8d4fa1f07 (diff)
downloadrefcat-61105115ce24706a0e0c16c0f29b0f49e07ee2f7.tar.gz
refcat-61105115ce24706a0e0c16c0f29b0f49e07ee2f7.zip
add UnmatchedResolveJournalNames
Diffstat (limited to 'python')
-rw-r--r--python/refcat/tasks.py22
1 files changed, 22 insertions, 0 deletions
diff --git a/python/refcat/tasks.py b/python/refcat/tasks.py
index 17caf96..4193867 100644
--- a/python/refcat/tasks.py
+++ b/python/refcat/tasks.py
@@ -864,6 +864,28 @@ class UnmatchedMapped(Refcat):
return luigi.LocalTarget(path=self.path(ext="tsv.zst"), format=Zstd)
+class UnmatchedResolveJournalNames(Refcat):
+ """
+ Try to resolve journal names so we can match against both abbreviations and
+ full names.
+ """
+ def requires(self):
+ return UnmatchedMapped()
+
+ def run(self):
+ output = shellout("""
+ zstdcat -T0 {input} |
+ skate-resolve-journal-name -f 2 -B -A {abbrev}
+ zstd -T0 -c > {output}
+ """,
+ abbrev=settings.JOURNAL_ABBREVIATIONS,
+ input=self.input().path)
+ luigi.LocalTarget(output).move(self.output().path)
+
+ def output(self):
+ return luigi.LocalTarget(path=self.path(ext="tsv.zst"), format=Zstd)
+
+
class UnmatchedOpenLibraryMatchTable(Refcat):
"""
Run matching and write tabular results to file. About 50M rows.