diff options
| -rw-r--r-- | notes/postgres_performance.txt | 169 | ||||
| -rw-r--r-- | notes/postgres_tuning.txt | 8 | 
2 files changed, 117 insertions, 60 deletions
| diff --git a/notes/postgres_performance.txt b/notes/postgres_performance.txt index 8f87d5c6..cd2a5162 100644 --- a/notes/postgres_performance.txt +++ b/notes/postgres_performance.txt @@ -1,5 +1,78 @@ -## 2018-06-27 Measurements +## Setup + +Add to postgres.conf: + +    shared_preload_libraries = 'auto_explain,pg_stat_statements' + +    # Increase the max size of the query strings Postgres records +    track_activity_query_size = 2048 + +    # Track statements generated by stored procedures as well +    pg_stat_statements.track = all + +Also: +     +    track_counts (already default) +    autovacuum (already default?) +    log_min_error = warning +    log_min_duration_statement = 5000 + +Then from shell: + +    create extension pg_stat_statements; + +Regularly want to run: + +    VACUUM ANALYZE + +## Tuning Values + +postgres config: + +    max_connections = 100 (default) +    shared_buffers = 128MB -> 10GB (while elastic on same machine; later 16 or more) +    effective_cache_size = 4GB -> 24GB (while elastic on same machine) +    work_mem = 4MB -> 128MB     # relatively few connections/box +    fsync = on +    commit_delay = ??? (and siblings) +    random_page_cost = 1 (for SSD) +    default_statistics_target = 100 -> 200 +    maintenance_work_mem = 64MB -> 8GB +    synchronous_commit = off (during dev only! switch to on for production!) +    wal_sync_method (keep default) +    max_wal_size = 64 -> 128 (based on above HINT message) +    # didn't mess with commit_delay/commit_siblings + +system: + +    sysctl -w vm.overcommit_memory=2 +    TODO: ulimit -n 65536 +    TODO: ulimit -p 800 +        LimitNOFILE +        /lib/systemd/system/postgresql.service + +## Resources  + +https://www.geekytidbits.com/performance-tuning-postgres/ + +Could try pgbadger to handle auto_explain type output. + +https://www.postgresql.org/docs/10/static/runtime-config-wal.html + +IA-specific resources: +    https://git.archive.org/ia/mint/blob/master/postgres/postgres_9_2.yml +    https://git.archive.org/ia/mint/blob/master/postgres/put_datadir_on_ssd.sh +    https://git.archive.org/ia/mint/blob/master/postgres/templates/postgresql.conf.j2 + +For bulk inserts: +- make write-ahead-log larger (eg, 16MB. done.) +- transactions of ~1000+ inserts +- https://www.postgresql.org/docs/current/static/populate.html +- https://www.depesz.com/2007/07/05/how-to-insert-data-to-database-as-fast-as-possible/ +- https://stackoverflow.com/questions/12206600/how-to-speed-up-insertion-performance-in-postgresql + +## 2018-06-27 Measurements (pre-tuning)  fatcat_prod=# select count(*) from release_ident; 20983019  fatcat_prod=# select count(*) from work_ident; 20988140 @@ -71,68 +144,60 @@ bnewbold@wbgrp-svc500$ uptime   work_ident      |    -65008911 | OK             |  1503313920 |     1239 |  65010150   release_rev     |   -185735794 | OK             | 13649428480 |      128 | 185735922 -## Setup +## 2018-06-28 (after basic tuning + indexes) -Add to postgres.conf: +Early loading (manifest and 20x release): -    shared_preload_libraries = 'auto_explain,pg_stat_statements' +    PostgreSQL 10.4 - wbgrp-svc500.us.archive.org - postgres@localhost:5432/postgres - Ref.: 2s +    Size:    4.57G -     6.45M/s        | TPS:       18812 +    Mem.:   59.70% -    23.62G/49.14G   | IO Max:     3601/s +    Swap:    1.30% -   675.05M/50.00G   | Read :      0.00B/s -      0/s +    Load:    12.98 10.58 5.25           | Write:      2.65M/s -    677/s -    # Increase the max size of the query strings Postgres records -    track_activity_query_size = 2048 -    # Track statements generated by stored procedures as well -    pg_stat_statements.track = all +      PID USER      PR  NI    VIRT    RES    SHR S  %CPU %MEM     TIME+ COMMAND  +    24045 webcrawl  20   0  908872 204948  11756 S 153.3  0.4  16:13.03 fatcatd  +    24328 webcrawl  20   0   78148  45220   4324 R  87.1  0.1   8:44.16 perl     +    24056 postgres  20   0 10.441g 3.906g 3.886g R  69.9  7.9   6:57.47 postgres +    24063 postgres  20   0 10.447g 3.899g 3.873g S  67.9  7.9   6:55.89 postgres +    24059 postgres  20   0 10.426g 3.888g 3.883g R  67.5  7.9   6:59.15 postgres +    24057 postgres  20   0 10.430g 3.883g 3.874g S  67.2  7.9   6:58.68 postgres +    24061 postgres  20   0 10.448g 3.909g 3.881g R  66.2  8.0   6:54.30 postgres +    24058 postgres  20   0 10.428g 3.883g 3.876g R  65.9  7.9   6:59.35 postgres +    24062 postgres  20   0 10.426g 5.516g 5.511g R  64.9 11.2   6:58.29 postgres +    24055 postgres  20   0 10.426g 3.878g 3.873g R  64.2  7.9   6:59.38 postgres +    24054 postgres  20   0 10.430g 5.499g 5.491g R  63.6 11.2   6:57.27 postgres +    24060 postgres  20   0 10.448g 3.900g 3.873g R  61.9  7.9   6:55.45 postgres +    21711 postgres  20   0 10.419g 5.762g 5.760g D  16.6 11.7   3:00.67 postgres +    21713 postgres  20   0 10.419g  21432  19512 S  11.3  0.0   3:25.11 postgres +    24392 webcrawl  20   0 5309636 400912   8696 S   7.9  0.8   0:53.18 python3  +    24383 webcrawl  20   0 5309436 400628   8648 S   7.6  0.8   0:52.29 python3  +    24387 webcrawl  20   0 5309776 402968   8620 S   7.3  0.8   0:52.81 python3  +    24394 webcrawl  20   0 5309624 400732   8644 S   7.3  0.8   0:53.30 python3  +    24384 webcrawl  20   0 5309916 400948   8600 S   7.0  0.8   0:53.18 python3  -Also: -     -    track_counts (already default) -    autovacuum (already default?) -    log_min_error = warning -    log_min_duration_statement = 5000 +Still get a *lot* of: -Then from shell: +    2018-06-29 00:14:05.948 UTC [21711] LOG:  checkpoints are occurring too frequently (1 second apart) +    2018-06-29 00:14:05.948 UTC [21711] HINT:  Consider increasing the configuration parameter "max_wal_size". -    create extension pg_stat_statements; +VACUUM is running basically continuously; should prevent that? 6 hours or +longer on release_rev and release_ref tables. An auto-approve batch method +would resovle this, I think (no update after insert). -Regularly want to run: +max_wal_size wasn't getting set correctly. -    VACUUM ANALYZE +The statements taking the most time are the complex inserts (multi-table +inserts); they take a fraction of a second though (mean less than a +milisecond). -## Tuning Values +Manifest import runs really slow if release import is concurrent; much faster +to wait until release import is done first (like a factor of 10x or more). -postgres config: +With some 60 million releases: -    max_connections = 100 (default) -    shared_buffers = 128MB -> 10GB (while elastic on same machine; later 16 or more) -    effective_cache_size = 4GB -> 24GB (while elastic on same machine) -    work_mem = 4MB -> 128MB     # relatively few connections/box -    fsync = on -    commit_delay = ??? (and siblings) -    random_page_cost = 1 (for SSD) -    default_statistics_target = 100 -> 200 -    maintenance_work_mem = 64MB -> 8GB -    synchronous_commit = off (during dev only! switch to on for production!) -    wal_sync_method (keep default) -    max_wal_size = 64 -> 128 (based on above HINT message) -    # didn't mess with commit_delay/commit_siblings +    bnewbold@wbgrp-svc500$ sudo du -sh /var/lib/postgresql/ +    184G    /var/lib/postgresql/ -system: +TODO: slow query log doesn't seem to be working (let alone auto_explain) -    sysctl -w vm.overcommit_memory=2 -    TODO: ulimit -n 65536 -    TODO: ulimit -p 800 -        LimitNOFILE -        /lib/systemd/system/postgresql.service - -## Resources  - -https://www.geekytidbits.com/performance-tuning-postgres/ - -Could try pgbadger to handle auto_explain type output. - -https://www.postgresql.org/docs/10/static/runtime-config-wal.html - -IA-specific resources: -    https://git.archive.org/ia/mint/blob/master/postgres/postgres_9_2.yml -    https://git.archive.org/ia/mint/blob/master/postgres/put_datadir_on_ssd.sh -    https://git.archive.org/ia/mint/blob/master/postgres/templates/postgresql.conf.j2 diff --git a/notes/postgres_tuning.txt b/notes/postgres_tuning.txt deleted file mode 100644 index d1e353da..00000000 --- a/notes/postgres_tuning.txt +++ /dev/null @@ -1,8 +0,0 @@ - -For bulk inserts: - -- make write-ahead-log larger (eg, 16MB) -- transactions of ~1000+ inserts -- https://www.postgresql.org/docs/current/static/populate.html -- https://www.depesz.com/2007/07/05/how-to-insert-data-to-database-as-fast-as-possible/ -- https://stackoverflow.com/questions/12206600/how-to-speed-up-insertion-performance-in-postgresql | 
