aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--python/refcat/tasks.py5
1 files changed, 3 insertions, 2 deletions
diff --git a/python/refcat/tasks.py b/python/refcat/tasks.py
index 9a11653..aa473f6 100644
--- a/python/refcat/tasks.py
+++ b/python/refcat/tasks.py
@@ -1310,8 +1310,9 @@ class BrefCombined(Refcat):
This is basically a reduce step, where we group by work id (since the raw
refs were already sorted by work id).
- Data point: version 2021-05-06 results in 1,323,614,061 docs (77G
- compressed; about 285G when indexed in ES7).
+ Data points: version 2021-05-06 results in 1,323,614,061 docs (77G
+ compressed; about 285G when indexed in ES7); version 2021-07-06 contained
+ 1,865,637,767 docs (116G).
Data point: 72G matches, 170G unmatched (compressed); about 3.8B docs
(close to 300k docs/s):