From 8e3ff8d9f68f1664de1b2f346f7253942ee6c719 Mon Sep 17 00:00:00 2001 From: Martin Czygan Date: Fri, 21 May 2021 23:21:30 +0200 Subject: add notes --- python/notes/version_4.md | 8 ++++++++ python/refcat/tasks.py | 3 ++- 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'python') diff --git a/python/notes/version_4.md b/python/notes/version_4.md index aa758c5..97fb4e1 100644 --- a/python/notes/version_4.md +++ b/python/notes/version_4.md @@ -39,3 +39,11 @@ If nothing else defined, and unstructured contains a URL, we may extract that. ``` Also, these may say: "accessed at ..." + +# URL + +* url cleanup in place + +# Partial Data Mapping + +* how to map partial docs onto a key diff --git a/python/refcat/tasks.py b/python/refcat/tasks.py index 8d00b55..a82c535 100644 --- a/python/refcat/tasks.py +++ b/python/refcat/tasks.py @@ -663,7 +663,8 @@ class BrefZipArxiv(Refcat): class BrefZipFuzzy(Refcat): """ - Run skate-reduce from two files, fuzzy mode. + Run skate-reduce from two files, fuzzy mode; 1039m55.350s, skate-reduce not + parallelized yet. """ mapper = luigi.Parameter(default="ts", description="mapper short name") -- cgit v1.2.3