diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2018-09-25 15:44:39 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2018-09-25 15:44:39 -0700 |
commit | 186c03c2b9e1b08e4298383fc6593d1b2c3a5dd8 (patch) | |
tree | b116678710d9a69df023f7dc90525d7563506891 /notes/postgres_performance.txt | |
parent | c1391060cc4575365f81fc674e35d8c182ccde83 (diff) | |
download | fatcat-186c03c2b9e1b08e4298383fc6593d1b2c3a5dd8.tar.gz fatcat-186c03c2b9e1b08e4298383fc6593d1b2c3a5dd8.zip |
start organizing notes into subdirectories
Diffstat (limited to 'notes/postgres_performance.txt')
-rw-r--r-- | notes/postgres_performance.txt | 203 |
1 files changed, 0 insertions, 203 deletions
diff --git a/notes/postgres_performance.txt b/notes/postgres_performance.txt deleted file mode 100644 index cd2a5162..00000000 --- a/notes/postgres_performance.txt +++ /dev/null @@ -1,203 +0,0 @@ - -## Setup - -Add to postgres.conf: - - shared_preload_libraries = 'auto_explain,pg_stat_statements' - - # Increase the max size of the query strings Postgres records - track_activity_query_size = 2048 - - # Track statements generated by stored procedures as well - pg_stat_statements.track = all - -Also: - - track_counts (already default) - autovacuum (already default?) - log_min_error = warning - log_min_duration_statement = 5000 - -Then from shell: - - create extension pg_stat_statements; - -Regularly want to run: - - VACUUM ANALYZE - -## Tuning Values - -postgres config: - - max_connections = 100 (default) - shared_buffers = 128MB -> 10GB (while elastic on same machine; later 16 or more) - effective_cache_size = 4GB -> 24GB (while elastic on same machine) - work_mem = 4MB -> 128MB # relatively few connections/box - fsync = on - commit_delay = ??? (and siblings) - random_page_cost = 1 (for SSD) - default_statistics_target = 100 -> 200 - maintenance_work_mem = 64MB -> 8GB - synchronous_commit = off (during dev only! switch to on for production!) - wal_sync_method (keep default) - max_wal_size = 64 -> 128 (based on above HINT message) - # didn't mess with commit_delay/commit_siblings - -system: - - sysctl -w vm.overcommit_memory=2 - TODO: ulimit -n 65536 - TODO: ulimit -p 800 - LimitNOFILE - /lib/systemd/system/postgresql.service - -## Resources - -https://www.geekytidbits.com/performance-tuning-postgres/ - -Could try pgbadger to handle auto_explain type output. - -https://www.postgresql.org/docs/10/static/runtime-config-wal.html - -IA-specific resources: - https://git.archive.org/ia/mint/blob/master/postgres/postgres_9_2.yml - https://git.archive.org/ia/mint/blob/master/postgres/put_datadir_on_ssd.sh - https://git.archive.org/ia/mint/blob/master/postgres/templates/postgresql.conf.j2 - -For bulk inserts: -- make write-ahead-log larger (eg, 16MB. done.) -- transactions of ~1000+ inserts -- https://www.postgresql.org/docs/current/static/populate.html -- https://www.depesz.com/2007/07/05/how-to-insert-data-to-database-as-fast-as-possible/ -- https://stackoverflow.com/questions/12206600/how-to-speed-up-insertion-performance-in-postgresql - -## 2018-06-27 Measurements (pre-tuning) - -fatcat_prod=# select count(*) from release_ident; 20983019 -fatcat_prod=# select count(*) from work_ident; 20988140 -fatcat_prod=# select count(*) from file_ident; 1482335 -fatcat_prod=# select count(*) from creator_ident; 4167419 -fatcat_prod=# select count(*) from container_ident; 61793 - -select count(*) from release_contrib; 59798133 - -bnewbold@wbgrp-svc500$ sudo du -sh /var/lib/postgresql/ -43G - -running import-crossref with 20 threads, and manifest importer with one (at 33% -complete). had already imported ~7million works+releases previously. - - - PostgreSQL 10.4 - wbgrp-svc500.us.archive.org - postgres@localhost:5432/postgre - Size: 41.38G - 323.40K/s | TPS: 885 - Mem.: 50.80% - 23.86G/49.14G | IO Max: 79539/s - Swap: 0.80% - 408.89M/50.00G | Read : 67.04K/s - 16/s - Load: 6.69 7.41 7.69 | Write: 1.93M/s - 493/s - - -----total-cpu-usage---- -dsk/total- -net/total- ---paging-- ---system-- -usr sys idl wai hiq siq| read writ| recv send| in out | int csw - 32 6 62 0 0 0| 296k 3880k| 334k 3144B| 0 0 | 21k 65k - 31 6 62 0 0 0| 0 3072k| 391k 318B| 0 0 | 51k 141k - 31 6 63 0 0 0| 16k 1080k| 344k 1988B| 0 0 | 35k 104k - 29 6 65 0 0 0| 136k 2608k| 175k 332B| 0 0 |9835 15k - 28 5 67 0 0 0| 408k 4368k| 285k 832B| 0 0 | 14k 17k - 33 5 62 0 0 0| 56k 3256k| 219k 99B| 0 0 | 22k 49k - 31 6 63 0 0 0| 188k 5120k| 158k 318B| 0 0 | 17k 29k - 30 6 64 0 0 0| 200k 6984k| 239k 988B| 0 0 | 16k 24k - 30 6 64 0 0 0| 168k 5504k| 159k 152B| 0 0 | 14k 20k - 28 7 65 0 0 0| 440k 12M| 236k 420B| 0 0 | 15k 18k - 29 6 65 0 0 0| 428k 6968k| 352k 310B| 0 0 | 19k 31k - 32 6 62 0 0 0| 64k 3480k| 288k 318B| 0 0 | 18k 55k - 32 6 62 0 0 0| 32k 2080k| 155k 318B| 0 0 | 20k 52k - - -bnewbold@wbgrp-svc500$ uptime - 22:00:42 up 28 days, 22:31, 6 users, load average: 7.94, 7.56, 7.72 - - -2018-06-27 21:57:36.102 UTC [401] LOG: checkpoints are occurring too frequently (13 seconds apart) -2018-06-27 21:57:36.102 UTC [401] HINT: Consider increasing the configuration parameter "max_wal_size". - - - relname | too_much_seq | case | rel_size | seq_scan | idx_scan ------------------+--------------+----------------+-------------+----------+----------- - changelog | 1274670 | Missing Index? | 39411712 | 1274670 | 0 - file_edit | 612386 | Missing Index? | 108298240 | 612386 | 0 - creator_edit | 612386 | Missing Index? | 285540352 | 612386 | 0 - container_edit | 612386 | Missing Index? | 4784128 | 612386 | 0 - release_edit | 612386 | Missing Index? | 1454489600 | 612386 | 0 - work_edit | 612386 | Missing Index? | 1454415872 | 612386 | 0 - release_contrib | 296675 | Missing Index? | 4725645312 | 296675 | 0 - release_ref | 296663 | Missing Index? | 8999837696 | 296663 | 0 - file_release | -113 | OK | 13918208 | 110 | 223 - container_rev | -979326 | OK | 16285696 | 63 | 979389 - file_ident | -3671516 | OK | 109002752 | 362 | 3671878 - file_rev | -3944155 | OK | 302940160 | 95 | 3944250 - creator_rev | -8420205 | OK | 318283776 | 1226 | 8421431 - creator_ident | -9525338 | OK | 309141504 | 52330 | 9577668 - container_ident | -20581876 | OK | 4833280 | 272457 | 20854333 - release_ident | -40548858 | OK | 1440948224 | 4160919 | 44709777 - work_rev | -42534913 | OK | 1124671488 | 1161 | 42536074 - editgroup | -48864662 | OK | 34136064 | 1 | 48864663 - work_ident | -65008911 | OK | 1503313920 | 1239 | 65010150 - release_rev | -185735794 | OK | 13649428480 | 128 | 185735922 - -## 2018-06-28 (after basic tuning + indexes) - -Early loading (manifest and 20x release): - - PostgreSQL 10.4 - wbgrp-svc500.us.archive.org - postgres@localhost:5432/postgres - Ref.: 2s - Size: 4.57G - 6.45M/s | TPS: 18812 - Mem.: 59.70% - 23.62G/49.14G | IO Max: 3601/s - Swap: 1.30% - 675.05M/50.00G | Read : 0.00B/s - 0/s - Load: 12.98 10.58 5.25 | Write: 2.65M/s - 677/s - - - PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND - 24045 webcrawl 20 0 908872 204948 11756 S 153.3 0.4 16:13.03 fatcatd - 24328 webcrawl 20 0 78148 45220 4324 R 87.1 0.1 8:44.16 perl - 24056 postgres 20 0 10.441g 3.906g 3.886g R 69.9 7.9 6:57.47 postgres - 24063 postgres 20 0 10.447g 3.899g 3.873g S 67.9 7.9 6:55.89 postgres - 24059 postgres 20 0 10.426g 3.888g 3.883g R 67.5 7.9 6:59.15 postgres - 24057 postgres 20 0 10.430g 3.883g 3.874g S 67.2 7.9 6:58.68 postgres - 24061 postgres 20 0 10.448g 3.909g 3.881g R 66.2 8.0 6:54.30 postgres - 24058 postgres 20 0 10.428g 3.883g 3.876g R 65.9 7.9 6:59.35 postgres - 24062 postgres 20 0 10.426g 5.516g 5.511g R 64.9 11.2 6:58.29 postgres - 24055 postgres 20 0 10.426g 3.878g 3.873g R 64.2 7.9 6:59.38 postgres - 24054 postgres 20 0 10.430g 5.499g 5.491g R 63.6 11.2 6:57.27 postgres - 24060 postgres 20 0 10.448g 3.900g 3.873g R 61.9 7.9 6:55.45 postgres - 21711 postgres 20 0 10.419g 5.762g 5.760g D 16.6 11.7 3:00.67 postgres - 21713 postgres 20 0 10.419g 21432 19512 S 11.3 0.0 3:25.11 postgres - 24392 webcrawl 20 0 5309636 400912 8696 S 7.9 0.8 0:53.18 python3 - 24383 webcrawl 20 0 5309436 400628 8648 S 7.6 0.8 0:52.29 python3 - 24387 webcrawl 20 0 5309776 402968 8620 S 7.3 0.8 0:52.81 python3 - 24394 webcrawl 20 0 5309624 400732 8644 S 7.3 0.8 0:53.30 python3 - 24384 webcrawl 20 0 5309916 400948 8600 S 7.0 0.8 0:53.18 python3 - -Still get a *lot* of: - - 2018-06-29 00:14:05.948 UTC [21711] LOG: checkpoints are occurring too frequently (1 second apart) - 2018-06-29 00:14:05.948 UTC [21711] HINT: Consider increasing the configuration parameter "max_wal_size". - -VACUUM is running basically continuously; should prevent that? 6 hours or -longer on release_rev and release_ref tables. An auto-approve batch method -would resovle this, I think (no update after insert). - -max_wal_size wasn't getting set correctly. - -The statements taking the most time are the complex inserts (multi-table -inserts); they take a fraction of a second though (mean less than a -milisecond). - -Manifest import runs really slow if release import is concurrent; much faster -to wait until release import is done first (like a factor of 10x or more). - -With some 60 million releases: - - bnewbold@wbgrp-svc500$ sudo du -sh /var/lib/postgresql/ - 184G /var/lib/postgresql/ - -TODO: slow query log doesn't seem to be working (let alone auto_explain) - |