aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--python/refcat/tasks.py18
1 files changed, 18 insertions, 0 deletions
diff --git a/python/refcat/tasks.py b/python/refcat/tasks.py
index 4880f43..c45b6ed 100644
--- a/python/refcat/tasks.py
+++ b/python/refcat/tasks.py
@@ -748,6 +748,23 @@ class OpenLibraryWorksSorted(Refcat):
def output(self):
return luigi.LocalTarget(path=self.path(ext="tsv.zst"), format=Zstd)
+class OpenLibraryRelease(Refcat):
+ """
+ Turn Open Library into Release Entities with author mapping.
+ """
+ def run(self):
+ # TODO: remove hardcoded values.
+ output = shellout("""
+ zstdcat -T0 /magna/data/ol_dump_editions_latest.txt.zst |
+ cut -f5 |
+ skate-conv -f oled -Xa <(zstdcat -T0 /magna/data/ol_author_mapping.tsv.zst)
+ """
+ luigi.LocalTarget(output).move(self.output().path)
+
+ def output(self):
+ return luigi.LocalTarget(path=self.path(ext="json.zst"), format=Zstd)
+
+
class UnmatchedMapped(Refcat):
"""
Map unmatched refs (in release schema) to titles to do approximate title
@@ -770,3 +787,4 @@ class UnmatchedMapped(Refcat):
def output(self):
return luigi.LocalTarget(path=self.path(ext="tsv.zst"), format=Zstd)
+