aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2021-07-23 11:12:03 +0200
committerMartin Czygan <martin.czygan@gmail.com>2021-07-23 11:12:03 +0200
commit05de691fbc8ab72741b61d310c90dbc770fd4b3a (patch)
tree5a43008a3e5c1efc1c0889299f27b1df9e26dff4
parentaf405a9ae8bc4a5530dce6e17e6fc41bab4c7403 (diff)
downloadrefcat-05de691fbc8ab72741b61d310c90dbc770fd4b3a.tar.gz
refcat-05de691fbc8ab72741b61d310c90dbc770fd4b3a.zip
tasks: simplify url list task
-rw-r--r--python/refcat/tasks.py5
1 files changed, 1 insertions, 4 deletions
diff --git a/python/refcat/tasks.py b/python/refcat/tasks.py
index 42fa924..792a9c3 100644
--- a/python/refcat/tasks.py
+++ b/python/refcat/tasks.py
@@ -467,15 +467,12 @@ class URLList(Refcat):
List of mostly cleaned, unique URLs from refs.
"""
def requires(self):
- return URLTabs()
+ return URLTabsCleaned()
def run(self):
output = shellout("""
zstdcat -T0 {input} |
cut -f 3 |
- skate-cleanup -X -c url -B -S -f 1 |
- LC_ALL=C sort -u -T {tmpdir} -k1,1 -S25% |
- LC_ALL=C grep -E '^https?://' |
zstd -T0 -c > {output}
""",
n=self.n,