aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--python/refcat/tasks.py4
1 files changed, 2 insertions, 2 deletions
diff --git a/python/refcat/tasks.py b/python/refcat/tasks.py
index 269c4f8..1dfaccb 100644
--- a/python/refcat/tasks.py
+++ b/python/refcat/tasks.py
@@ -1497,7 +1497,7 @@ class CDXURL(Refcat):
Stub implementation of ad-hoc CDX. We only consider a subset of documents.
"""
cache = luigi.Parameter(default="/magna/.cache/skate/cdx", significant=False)
- limit = luigi.IntParameter(default=10000, significant=False)
+ limit = luigi.IntParameter(default=50000, significant=False)
def requires(self):
return RefsURL()
@@ -1509,7 +1509,7 @@ class CDXURL(Refcat):
LC_ALL=C head -n {limit} |
skate-cdx-lookup -q -s 50ms -c {cache} -j -B |
skate-map -m cdxu |
- LC_ALL=C sort -T {tmpdir} -k1,1 -S25% |
+ LC_ALL=C sort -u -T {tmpdir} -k1,1 -S25% |
zstd -c -T0 > {output}
""",
tmpdir=self.tmpdir,