From 0ab3f66664fd4cc63cf9040e351d725c6a5c22b9 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 26 Feb 2020 12:27:28 -0800 Subject: update ES transform README - smaller batch sizes to prevent esbulk errors - file transform/index --- extra/elasticsearch/README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/extra/elasticsearch/README.md b/extra/elasticsearch/README.md index 3e0857b4..df4cb918 100644 --- a/extra/elasticsearch/README.md +++ b/extra/elasticsearch/README.md @@ -59,8 +59,9 @@ Bulk insert from a file on disk: Or, in a bulk production live-stream conversion: export LC_ALL=C.UTF-8 - time zcat /srv/fatcat/snapshots/release_export_expanded.json.gz | pv -l | parallel -j20 --linebuffer --round-robin --pipe ./fatcat_transform.py elasticsearch-releases - - | esbulk -verbose -size 20000 -id ident -w 8 -index fatcat_release -type release - time zcat /srv/fatcat/snapshots/container_export.json.gz | pv -l | ./fatcat_transform.py elasticsearch-containers - - | esbulk -verbose -size 20000 -id ident -w 8 -index fatcat_container -type container + time zcat /srv/fatcat/snapshots/release_export_expanded.json.gz | pv -l | parallel -j20 --linebuffer --round-robin --pipe ./fatcat_transform.py elasticsearch-releases - - | esbulk -verbose -size 2000 -id ident -w 8 -index fatcat_release -type release + time zcat /srv/fatcat/snapshots/container_export.json.gz | pv -l | ./fatcat_transform.py elasticsearch-containers - - | esbulk -verbose -size 2000 -id ident -w 8 -index fatcat_container -type container + time zcat /srv/fatcat/snapshots/file_export.json.gz | pv -l | parallel -j20 --linebuffer --round-robin --pipe ./fatcat_transform.py elasticsearch-files - - | esbulk -verbose -size 2000 -id ident -w 8 -index fatcat_file -type file ## Index Aliases -- cgit v1.2.3