From 15c7c9ea0f09b2e30dffa85cd79a9f761ea29607 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 26 Jan 2021 00:55:05 -0800 Subject: sim indexing: new parallel fetch structure --- notes/indexing_pipeline.md | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'notes/indexing_pipeline.md') diff --git a/notes/indexing_pipeline.md b/notes/indexing_pipeline.md index f891d27..ce4d687 100644 --- a/notes/indexing_pipeline.md +++ b/notes/indexing_pipeline.md @@ -46,3 +46,11 @@ Transform and index both into local elasticsearch: => 132635 docs in 2m18.787824205s at 955.667 docs/s with 4 workers +## Iterated + + # in pipenv shell + python -m fatcat_scholar.sim_pipeline run_print_issues \ + | parallel -j8 --colsep "\t" python -m fatcat_scholar.sim_pipeline run_fetch_issue {1} {2} \ + | pv -l \ + | gzip \ + > data/sim_intermediate.json.gz -- cgit v1.2.3