aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--python/refcat/tasks.py8
1 files changed, 5 insertions, 3 deletions
diff --git a/python/refcat/tasks.py b/python/refcat/tasks.py
index 4a4a357..a7a3ad9 100644
--- a/python/refcat/tasks.py
+++ b/python/refcat/tasks.py
@@ -1500,15 +1500,17 @@ class CDXURL(Refcat):
limit = luigi.IntParameter(default=10000, significant=False)
def requires(self):
- return URLList()
+ return RefsURL()
def run(self):
output = shellout("""
zstdcat -T0 {input} |
- head -n {limit} |
+ LC_ALL=C cut -f 1 |
+ LC_ALL=C head -n {limit} |
skate-cdx-lookup -q -s 50ms -c {cache} -j -B |
skate-map -m cdxu |
- LC_ALL=C sort -T {tmpdir} -k1,1 -S25% | zstd -c -T0 > {output}
+ LC_ALL=C sort -T {tmpdir} -k1,1 -S25% |
+ zstd -c -T0 > {output}
""",
limit=self.limit,
input=self.input().path,