aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2019-01-24 13:18:28 -0800
committerBryan Newbold <bnewbold@robocracy.org>2019-01-24 13:18:28 -0800
commitf4c5bf54eef06b45907c12c40dd0ac46a0467a5b (patch)
tree6d531c12715fe50c77f0b34d7122bee57bc0ed9a
parent2cadc1f1bca6fb5c3b7eb940b838023322fc0eeb (diff)
downloadfatcat-f4c5bf54eef06b45907c12c40dd0ac46a0467a5b.tar.gz
fatcat-f4c5bf54eef06b45907c12c40dd0ac46a0467a5b.zip
more 2019-01-16 import timing
-rw-r--r--notes/bootstrap/import_timing_20190116.txt70
1 files changed, 70 insertions, 0 deletions
diff --git a/notes/bootstrap/import_timing_20190116.txt b/notes/bootstrap/import_timing_20190116.txt
index 1b4821c0..96723ca8 100644
--- a/notes/bootstrap/import_timing_20190116.txt
+++ b/notes/bootstrap/import_timing_20190116.txt
@@ -420,3 +420,73 @@ Huh. Expected this to basically double size... what happened? Doing fetches?
So... it was doing fetches (no 'no_release_updates' flag passed), but still
inserted 5 million? also not good.
+ time xzcat /srv/fatcat/datasets/crossref-works.2018-09-05.json.xz | time parallel -j20 --round-robin --pipe ./fatcat_import.py crossref - /srv/fatcat/datasets/20180216.ISSN-to-ISSN-L.txt --extid-map-file /srv/fatcat/datasets/release_ids.ia_munge_20180908.sqlite3 --no-release-updates
+
+ Processed 5001450 lines, inserted 4811233, updated 0.
+ 138166.58user 5605.61system 43:47:54elapsed 91%CPU (0avgtext+0avgdata 426964maxresident)k
+ 208379288inputs+488675440outputs (267864major+49077696minor)pagefaults 0swaps
+
+ real 2627m55.003s
+ user 2386m11.476s
+ sys 99m0.408s
+
+ table_name | table_size | indexes_size | total_size
+ --------------------------------------------------------------+------------+--------------+------------
+ "public"."release_rev" | 93 GB | 53 GB | 146 GB
+ "public"."release_contrib" | 52 GB | 45 GB | 97 GB
+ "public"."refs_blob" | 72 GB | 1898 MB | 73 GB
+ "public"."release_edit" | 18 GB | 20 GB | 39 GB
+ "public"."work_edit" | 18 GB | 20 GB | 39 GB
+ "public"."work_ident" | 12 GB | 20 GB | 32 GB
+ "public"."release_ident" | 12 GB | 20 GB | 32 GB
+ "public"."work_rev" | 8185 MB | 7596 MB | 15 GB
+ "public"."file_rev_url" | 6151 MB | 2346 MB | 8496 MB
+ "public"."abstracts" | 4134 MB | 303 MB | 4437 MB
+ "public"."file_rev" | 1403 MB | 2309 MB | 3712 MB
+ "public"."file_edit" | 1181 MB | 1236 MB | 2417 MB
+ "public"."file_rev_release" | 721 MB | 1266 MB | 1987 MB
+ "public"."file_ident" | 691 MB | 1163 MB | 1854 MB
+ "public"."release_rev_abstract" | 700 MB | 919 MB | 1619 MB
+ "public"."editgroup" | 486 MB | 788 MB | 1275 MB
+ "public"."creator_rev" | 371 MB | 456 MB | 827 MB
+ "public"."creator_edit" | 377 MB | 421 MB | 798 MB
+ "public"."changelog" | 365 MB | 381 MB | 746 MB
+ "public"."creator_ident" | 255 MB | 412 MB | 667 MB
+ "public"."container_rev" | 20 MB | 9272 kB | 29 MB
+ "public"."container_edit" | 9472 kB | 8880 kB | 18 MB
+ "public"."container_ident" | 7592 kB | 9136 kB | 16 MB
+
+ relname | too_much_seq | case | rel_size | seq_scan | idx_scan
+ ----------------------+--------------+------+--------------+----------+------------
+ release_edit | -487544 | OK | 19594010624 | 26 | 487570
+ work_edit | -487615 | OK | 19594043392 | 26 | 487641
+ file_edit | -488168 | OK | 1237671936 | 19 | 488187
+ creator_edit | -488173 | OK | 395321344 | 26 | 488199
+ container_edit | -488306 | OK | 9666560 | 49 | 488355
+ file_rev_url | -2166808 | OK | 6448095232 | 2 | 2166810
+ file_rev_release | -2166881 | OK | 756015104 | 7 | 2166888
+ container_rev | -2264841 | OK | 21364736 | 3 | 2264844
+ changelog | -11338986 | OK | 382525440 | 2 | 11338988
+ creator_rev | -12726261 | OK | 388710400 | 3 | 12726264
+ creator_ident | -14563891 | OK | 267010048 | 6 | 14563897
+ abstracts | -15594992 | OK | 4052975616 | 1 | 15594993
+ file_ident | -23532116 | OK | 724213760 | 60366 | 23592482
+ file_rev | -84478438 | OK | 1470947328 | 10 | 84478448
+ release_contrib | -97501069 | OK | 55310950400 | 3 | 97501072
+ release_rev_abstract | -97505413 | OK | 734248960 | 3 | 97505416
+ refs_blob | -108179066 | OK | 15747162112 | 11 | 108179077
+ container_ident | -152392399 | OK | 7749632 | 5 | 152392404
+ release_ident | -307197678 | OK | 13256884224 | 3557 | 307201235
+ work_rev | -387420683 | OK | 8580505600 | 1 | 387420684
+ work_ident | -390871805 | OK | 13256515584 | 4074 | 390875879
+ editgroup | -409831715 | OK | 509853696 | 3 | 409831718
+ release_rev | -1112440989 | OK | 100107378688 | 9 | 1112440998
+
+ Size: 501.37G
+
+ select count(*) from refs_blob;
+ 22,322,742
+
+ select count(*) from release_ident;
+ 193,709,943
+