diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2021-06-03 23:53:52 +0200 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2021-06-03 23:53:52 +0200 |
commit | 93d16238ed576ba61c2b0131274dcf9fc077afbf (patch) | |
tree | 8a4cfec759fc702283587f000e1d78406984a66e | |
parent | 7171710338709aba58393558f4c647a59422942e (diff) | |
download | refcat-93d16238ed576ba61c2b0131274dcf9fc077afbf.tar.gz refcat-93d16238ed576ba61c2b0131274dcf9fc077afbf.zip |
update notes
-rw-r--r-- | python/refcat/tasks.py | 7 |
1 files changed, 5 insertions, 2 deletions
diff --git a/python/refcat/tasks.py b/python/refcat/tasks.py index e2ff102..7364df1 100644 --- a/python/refcat/tasks.py +++ b/python/refcat/tasks.py @@ -102,6 +102,7 @@ TODO ---- * [ ] partial (hold) +* [ ] OL fuzzy * [ ] unmatched (in a final pass) We can match by id and key, e.g. extract id and key, sort and merge (id, key) @@ -937,7 +938,7 @@ class OpenLibraryEditionsMapped(Refcat): class UnmatchedMapped(Refcat): """ Map unmatched refs (converted to release schema on the fly) to container - names to do approximate title matches with OL; 35m14.801s. + names to do approximate matches with OL. 217m53.989s. """ def requires(self): return RefsWithoutIdentifiers() @@ -960,7 +961,9 @@ class UnmatchedMapped(Refcat): class UnmatchedOpenLibraryMatchTable(Refcat): """ - Run matching and write tabular results to file. About 50M rows. + Run matching and write tabular results to file. + + Total rows: 139507963, exact/strong matches: 11777185. """ def requires(self): return { |