diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2021-07-23 11:12:03 +0200 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2021-07-23 11:12:03 +0200 |
commit | 05de691fbc8ab72741b61d310c90dbc770fd4b3a (patch) | |
tree | 5a43008a3e5c1efc1c0889299f27b1df9e26dff4 | |
parent | af405a9ae8bc4a5530dce6e17e6fc41bab4c7403 (diff) | |
download | refcat-05de691fbc8ab72741b61d310c90dbc770fd4b3a.tar.gz refcat-05de691fbc8ab72741b61d310c90dbc770fd4b3a.zip |
tasks: simplify url list task
-rw-r--r-- | python/refcat/tasks.py | 5 |
1 files changed, 1 insertions, 4 deletions
diff --git a/python/refcat/tasks.py b/python/refcat/tasks.py index 42fa924..792a9c3 100644 --- a/python/refcat/tasks.py +++ b/python/refcat/tasks.py @@ -467,15 +467,12 @@ class URLList(Refcat): List of mostly cleaned, unique URLs from refs. """ def requires(self): - return URLTabs() + return URLTabsCleaned() def run(self): output = shellout(""" zstdcat -T0 {input} | cut -f 3 | - skate-cleanup -X -c url -B -S -f 1 | - LC_ALL=C sort -u -T {tmpdir} -k1,1 -S25% | - LC_ALL=C grep -E '^https?://' | zstd -T0 -c > {output} """, n=self.n, |