aboutsummaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2021-05-26 23:36:09 +0200
committerMartin Czygan <martin.czygan@gmail.com>2021-05-26 23:36:09 +0200
commit75f987f897ea7dc4a8c2f3047d547ce4997220ad (patch)
treee646e0b9e41b684a9567536192961d36dd11bed8 /python
parentab072dda62b2b184666fbce0ed845ba17e54ff05 (diff)
downloadrefcat-75f987f897ea7dc4a8c2f3047d547ce4997220ad.tar.gz
refcat-75f987f897ea7dc4a8c2f3047d547ce4997220ad.zip
tasks: OpenLibraryRelease
Diffstat (limited to 'python')
-rw-r--r--python/refcat/tasks.py18
1 files changed, 18 insertions, 0 deletions
diff --git a/python/refcat/tasks.py b/python/refcat/tasks.py
index 4880f43..c45b6ed 100644
--- a/python/refcat/tasks.py
+++ b/python/refcat/tasks.py
@@ -748,6 +748,23 @@ class OpenLibraryWorksSorted(Refcat):
def output(self):
return luigi.LocalTarget(path=self.path(ext="tsv.zst"), format=Zstd)
+class OpenLibraryRelease(Refcat):
+ """
+ Turn Open Library into Release Entities with author mapping.
+ """
+ def run(self):
+ # TODO: remove hardcoded values.
+ output = shellout("""
+ zstdcat -T0 /magna/data/ol_dump_editions_latest.txt.zst |
+ cut -f5 |
+ skate-conv -f oled -Xa <(zstdcat -T0 /magna/data/ol_author_mapping.tsv.zst)
+ """
+ luigi.LocalTarget(output).move(self.output().path)
+
+ def output(self):
+ return luigi.LocalTarget(path=self.path(ext="json.zst"), format=Zstd)
+
+
class UnmatchedMapped(Refcat):
"""
Map unmatched refs (in release schema) to titles to do approximate title
@@ -770,3 +787,4 @@ class UnmatchedMapped(Refcat):
def output(self):
return luigi.LocalTarget(path=self.path(ext="tsv.zst"), format=Zstd)
+