diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2021-05-21 23:21:30 +0200 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2021-05-21 23:21:30 +0200 |
commit | 8e3ff8d9f68f1664de1b2f346f7253942ee6c719 (patch) | |
tree | 2c930062f021c92d098ad49ca84fc657af731cf1 | |
parent | f11aa899c7a4bb0ffca72dacdafff422c435c1f1 (diff) | |
download | refcat-8e3ff8d9f68f1664de1b2f346f7253942ee6c719.tar.gz refcat-8e3ff8d9f68f1664de1b2f346f7253942ee6c719.zip |
add notes
-rw-r--r-- | python/notes/version_4.md | 8 | ||||
-rw-r--r-- | python/refcat/tasks.py | 3 |
2 files changed, 10 insertions, 1 deletions
diff --git a/python/notes/version_4.md b/python/notes/version_4.md index aa758c5..97fb4e1 100644 --- a/python/notes/version_4.md +++ b/python/notes/version_4.md @@ -39,3 +39,11 @@ If nothing else defined, and unstructured contains a URL, we may extract that. ``` Also, these may say: "accessed at ..." + +# URL + +* url cleanup in place + +# Partial Data Mapping + +* how to map partial docs onto a key diff --git a/python/refcat/tasks.py b/python/refcat/tasks.py index 8d00b55..a82c535 100644 --- a/python/refcat/tasks.py +++ b/python/refcat/tasks.py @@ -663,7 +663,8 @@ class BrefZipArxiv(Refcat): class BrefZipFuzzy(Refcat): """ - Run skate-reduce from two files, fuzzy mode. + Run skate-reduce from two files, fuzzy mode; 1039m55.350s, skate-reduce not + parallelized yet. """ mapper = luigi.Parameter(default="ts", description="mapper short name") |