From ed78736b5d96a294c89b201c1d992bd30d809434 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Mon, 24 Sep 2018 18:04:54 -0700 Subject: updated import timing --- notes/import_timing_20180923.txt | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 notes/import_timing_20180923.txt (limited to 'notes/import_timing_20180923.txt') diff --git a/notes/import_timing_20180923.txt b/notes/import_timing_20180923.txt new file mode 100644 index 00000000..f8814f3d --- /dev/null +++ b/notes/import_timing_20180923.txt @@ -0,0 +1,39 @@ + + 105595.18user 3903.65system 15:59:39elapsed 190%CPU (0avgtext+0avgdata 458836maxresident)k + 71022792inputs+327828472outputs (176major+31149593minor)pagefaults 0swaps + + real 959m39.521s + user 1845m10.392s + sys 70m33.780s + +Did I get the same error again? I'm confused: + + HTTP response body: {"message":"number of parameters must be between 0 and 65535\n"} + (but not in all threads) + +Yes, ugh, because 50*2500 can be over (it's not just individual large releases, +they come in big batches). + +But: + + select count(id) from release_ident; => 70006121 + +A lot, though not 72 million like last time, hrm. I'm... going to move ahead I +guess. + +"Processed 4440850 lines, inserted 3509600, updated 0." + => implies 79029915 records + + time zcat /srv/fatcat/datasets/ia_papers_manifest_2018-01-25.matched.json.gz | pv -l | time parallel -j12 --round-robin --pipe ./fatcat_import.py import-matched --no-file-update - + Processed 530750 lines, inserted 435239, updated 0. (etc) + Command exited with non-zero status 1 + 15121.47user 676.49system 2:23:52elapsed 183%CPU (0avgtext+0avgdata 70076maxresident)k + 127760inputs+3477184outputs (116major+475489minor)pagefaults 0swaps + + real 143m52.681s + user 252m31.620s + sys 11m21.608s + + zcat /srv/fatcat/datasets/2018-08-27-2352.17-matchcrossref.insertable.json.gz | pv -l | time parallel -j12 --round-robin --pipe ./fatcat_import.py import-matched - + + (running...) -- cgit v1.2.3