From 05de691fbc8ab72741b61d310c90dbc770fd4b3a Mon Sep 17 00:00:00 2001 From: Martin Czygan Date: Fri, 23 Jul 2021 11:12:03 +0200 Subject: tasks: simplify url list task --- python/refcat/tasks.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'python') diff --git a/python/refcat/tasks.py b/python/refcat/tasks.py index 42fa924..792a9c3 100644 --- a/python/refcat/tasks.py +++ b/python/refcat/tasks.py @@ -467,15 +467,12 @@ class URLList(Refcat): List of mostly cleaned, unique URLs from refs. """ def requires(self): - return URLTabs() + return URLTabsCleaned() def run(self): output = shellout(""" zstdcat -T0 {input} | cut -f 3 | - skate-cleanup -X -c url -B -S -f 1 | - LC_ALL=C sort -u -T {tmpdir} -k1,1 -S25% | - LC_ALL=C grep -E '^https?://' | zstd -T0 -c > {output} """, n=self.n, -- cgit v1.2.3