aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2021-07-16 21:55:04 +0200
committerMartin Czygan <martin.czygan@gmail.com>2021-07-16 21:55:04 +0200
commite79c7dd585651a57c2550d55623921a0e2fa0fac (patch)
tree79e16d2a1405c63ab5f1204426fdda9b1ed7c5cb
parentede2f4e7258255f949be0489e558fe5d008f97c0 (diff)
downloadrefcat-e79c7dd585651a57c2550d55623921a0e2fa0fac.tar.gz
refcat-e79c7dd585651a57c2550d55623921a0e2fa0fac.zip
tasks: add data point
-rw-r--r--python/refcat/tasks.py5
1 files changed, 3 insertions, 2 deletions
diff --git a/python/refcat/tasks.py b/python/refcat/tasks.py
index 9a11653..aa473f6 100644
--- a/python/refcat/tasks.py
+++ b/python/refcat/tasks.py
@@ -1310,8 +1310,9 @@ class BrefCombined(Refcat):
This is basically a reduce step, where we group by work id (since the raw
refs were already sorted by work id).
- Data point: version 2021-05-06 results in 1,323,614,061 docs (77G
- compressed; about 285G when indexed in ES7).
+ Data points: version 2021-05-06 results in 1,323,614,061 docs (77G
+ compressed; about 285G when indexed in ES7); version 2021-07-06 contained
+ 1,865,637,767 docs (116G).
Data point: 72G matches, 170G unmatched (compressed); about 3.8B docs
(close to 300k docs/s):