aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2021-06-03 23:53:52 +0200
committerMartin Czygan <martin.czygan@gmail.com>2021-06-03 23:53:52 +0200
commit93d16238ed576ba61c2b0131274dcf9fc077afbf (patch)
tree8a4cfec759fc702283587f000e1d78406984a66e
parent7171710338709aba58393558f4c647a59422942e (diff)
downloadrefcat-93d16238ed576ba61c2b0131274dcf9fc077afbf.tar.gz
refcat-93d16238ed576ba61c2b0131274dcf9fc077afbf.zip
update notes
-rw-r--r--python/refcat/tasks.py7
1 files changed, 5 insertions, 2 deletions
diff --git a/python/refcat/tasks.py b/python/refcat/tasks.py
index e2ff102..7364df1 100644
--- a/python/refcat/tasks.py
+++ b/python/refcat/tasks.py
@@ -102,6 +102,7 @@ TODO
----
* [ ] partial (hold)
+* [ ] OL fuzzy
* [ ] unmatched (in a final pass)
We can match by id and key, e.g. extract id and key, sort and merge (id, key)
@@ -937,7 +938,7 @@ class OpenLibraryEditionsMapped(Refcat):
class UnmatchedMapped(Refcat):
"""
Map unmatched refs (converted to release schema on the fly) to container
- names to do approximate title matches with OL; 35m14.801s.
+ names to do approximate matches with OL. 217m53.989s.
"""
def requires(self):
return RefsWithoutIdentifiers()
@@ -960,7 +961,9 @@ class UnmatchedMapped(Refcat):
class UnmatchedOpenLibraryMatchTable(Refcat):
"""
- Run matching and write tabular results to file. About 50M rows.
+ Run matching and write tabular results to file.
+
+ Total rows: 139507963, exact/strong matches: 11777185.
"""
def requires(self):
return {