aboutsummaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2021-05-21 23:21:30 +0200
committerMartin Czygan <martin.czygan@gmail.com>2021-05-21 23:21:30 +0200
commit8e3ff8d9f68f1664de1b2f346f7253942ee6c719 (patch)
tree2c930062f021c92d098ad49ca84fc657af731cf1 /python
parentf11aa899c7a4bb0ffca72dacdafff422c435c1f1 (diff)
downloadrefcat-8e3ff8d9f68f1664de1b2f346f7253942ee6c719.tar.gz
refcat-8e3ff8d9f68f1664de1b2f346f7253942ee6c719.zip
add notes
Diffstat (limited to 'python')
-rw-r--r--python/notes/version_4.md8
-rw-r--r--python/refcat/tasks.py3
2 files changed, 10 insertions, 1 deletions
diff --git a/python/notes/version_4.md b/python/notes/version_4.md
index aa758c5..97fb4e1 100644
--- a/python/notes/version_4.md
+++ b/python/notes/version_4.md
@@ -39,3 +39,11 @@ If nothing else defined, and unstructured contains a URL, we may extract that.
```
Also, these may say: "accessed at ..."
+
+# URL
+
+* url cleanup in place
+
+# Partial Data Mapping
+
+* how to map partial docs onto a key
diff --git a/python/refcat/tasks.py b/python/refcat/tasks.py
index 8d00b55..a82c535 100644
--- a/python/refcat/tasks.py
+++ b/python/refcat/tasks.py
@@ -663,7 +663,8 @@ class BrefZipArxiv(Refcat):
class BrefZipFuzzy(Refcat):
"""
- Run skate-reduce from two files, fuzzy mode.
+ Run skate-reduce from two files, fuzzy mode; 1039m55.350s, skate-reduce not
+ parallelized yet.
"""
mapper = luigi.Parameter(default="ts", description="mapper short name")