aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--python/refcat/tasks.py6
1 files changed, 2 insertions, 4 deletions
diff --git a/python/refcat/tasks.py b/python/refcat/tasks.py
index 21335f3..6ab8d06 100644
--- a/python/refcat/tasks.py
+++ b/python/refcat/tasks.py
@@ -1258,9 +1258,7 @@ class Bref(Refcat):
class BrefSortedByWorkID(Refcat):
"""
- Sort by work id. 237m45.094s.
-
- Final file currently has: 915168340 docs.
+ Sort by work id. Keep only docs that actually have a work id. 237m45.094s.
"""
def requires(self):
return Bref()
@@ -1268,7 +1266,7 @@ class BrefSortedByWorkID(Refcat):
def run(self):
output = shellout("""
zstdcat -T0 {bref} |
- skate-map -B -m ff -x source_work_ident |
+ skate-map -skip-on-empty 1 -B -m ff -x source_work_ident |
LC_ALL=C sort -T {tmpdir} -S25% -k1,1 | zstd -c -T0 > {output}
""",
tmpdir=self.tmpdir,