From 8b18e4311aceae98f730dc655c24ba72494dc9ae Mon Sep 17 00:00:00 2001
From: Martin Czygan <martin.czygan@gmail.com>
Date: Mon, 24 May 2021 20:32:42 +0200
Subject: tasks: add UnmatchedMapped, for OL

---
 python/refcat/tasks.py | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'python')

diff --git a/python/refcat/tasks.py b/python/refcat/tasks.py
index 54a5890..fa72062 100644
--- a/python/refcat/tasks.py
+++ b/python/refcat/tasks.py
@@ -747,3 +747,24 @@ class OpenLibraryWorksSorted(Refcat):
 
     def output(self):
         return luigi.LocalTarget(path=self.path(ext="tsv.zst"), format=Zstd)
+
+class UnmatchedMapped(Refcat):
+    """
+    Map unmatched refs (in release schema) to titles to do approximate title matches with OL.
+    """
+
+    def requires(self):
+        return UnmatchedRefs()
+
+    def run(self):
+        output = shellout("""
+                          zstdcat -T0 {input} |
+                          skate-map -m ts |
+                          LC_ALL=C sort -T {tmpdir} -S25% -k1,1 --parallel 4 |
+                          zstd -T0 -c > {output}
+                          """,
+                          tmpdir=self.tmpdir,
+                          input=self.input().path)
+
+    def output(self):
+        return luigi.LocalTarget(path=self.path(ext="tsv.zst"), format=Zstd)
-- 
cgit v1.2.3