diff options
-rw-r--r-- | python/notes/version_4.md | 8 | ||||
-rw-r--r-- | python/refcat/tasks.py | 3 |
2 files changed, 10 insertions, 1 deletions
diff --git a/python/notes/version_4.md b/python/notes/version_4.md index aa758c5..97fb4e1 100644 --- a/python/notes/version_4.md +++ b/python/notes/version_4.md @@ -39,3 +39,11 @@ If nothing else defined, and unstructured contains a URL, we may extract that. ``` Also, these may say: "accessed at ..." + +# URL + +* url cleanup in place + +# Partial Data Mapping + +* how to map partial docs onto a key diff --git a/python/refcat/tasks.py b/python/refcat/tasks.py index 8d00b55..a82c535 100644 --- a/python/refcat/tasks.py +++ b/python/refcat/tasks.py @@ -663,7 +663,8 @@ class BrefZipArxiv(Refcat): class BrefZipFuzzy(Refcat): """ - Run skate-reduce from two files, fuzzy mode. + Run skate-reduce from two files, fuzzy mode; 1039m55.350s, skate-reduce not + parallelized yet. """ mapper = luigi.Parameter(default="ts", description="mapper short name") |