From 186c03c2b9e1b08e4298383fc6593d1b2c3a5dd8 Mon Sep 17 00:00:00 2001
From: Bryan Newbold <bnewbold@robocracy.org>
Date: Tue, 25 Sep 2018 15:44:39 -0700
Subject: start organizing notes into subdirectories

---
 notes/performance/postgres_performance.txt | 203 +++++++++++++++++++++++++++++
 notes/performance/speed.txt                |  82 ++++++++++++
 2 files changed, 285 insertions(+)
 create mode 100644 notes/performance/postgres_performance.txt
 create mode 100644 notes/performance/speed.txt

(limited to 'notes/performance')

diff --git a/notes/performance/postgres_performance.txt b/notes/performance/postgres_performance.txt
new file mode 100644
index 00000000..cd2a5162
--- /dev/null
+++ b/notes/performance/postgres_performance.txt
@@ -0,0 +1,203 @@
+
+## Setup
+
+Add to postgres.conf:
+
+    shared_preload_libraries = 'auto_explain,pg_stat_statements'
+
+    # Increase the max size of the query strings Postgres records
+    track_activity_query_size = 2048
+
+    # Track statements generated by stored procedures as well
+    pg_stat_statements.track = all
+
+Also:
+    
+    track_counts (already default)
+    autovacuum (already default?)
+    log_min_error = warning
+    log_min_duration_statement = 5000
+
+Then from shell:
+
+    create extension pg_stat_statements;
+
+Regularly want to run:
+
+    VACUUM ANALYZE
+
+## Tuning Values
+
+postgres config:
+
+    max_connections = 100 (default)
+    shared_buffers = 128MB -> 10GB (while elastic on same machine; later 16 or more)
+    effective_cache_size = 4GB -> 24GB (while elastic on same machine)
+    work_mem = 4MB -> 128MB     # relatively few connections/box
+    fsync = on
+    commit_delay = ??? (and siblings)
+    random_page_cost = 1 (for SSD)
+    default_statistics_target = 100 -> 200
+    maintenance_work_mem = 64MB -> 8GB
+    synchronous_commit = off (during dev only! switch to on for production!)
+    wal_sync_method (keep default)
+    max_wal_size = 64 -> 128 (based on above HINT message)
+    # didn't mess with commit_delay/commit_siblings
+
+system:
+
+    sysctl -w vm.overcommit_memory=2
+    TODO: ulimit -n 65536
+    TODO: ulimit -p 800
+        LimitNOFILE
+        /lib/systemd/system/postgresql.service
+
+## Resources 
+
+https://www.geekytidbits.com/performance-tuning-postgres/
+
+Could try pgbadger to handle auto_explain type output.
+
+https://www.postgresql.org/docs/10/static/runtime-config-wal.html
+
+IA-specific resources:
+    https://git.archive.org/ia/mint/blob/master/postgres/postgres_9_2.yml
+    https://git.archive.org/ia/mint/blob/master/postgres/put_datadir_on_ssd.sh
+    https://git.archive.org/ia/mint/blob/master/postgres/templates/postgresql.conf.j2
+
+For bulk inserts:
+- make write-ahead-log larger (eg, 16MB. done.)
+- transactions of ~1000+ inserts
+- https://www.postgresql.org/docs/current/static/populate.html
+- https://www.depesz.com/2007/07/05/how-to-insert-data-to-database-as-fast-as-possible/
+- https://stackoverflow.com/questions/12206600/how-to-speed-up-insertion-performance-in-postgresql
+
+## 2018-06-27 Measurements (pre-tuning)
+
+fatcat_prod=# select count(*) from release_ident; 20983019
+fatcat_prod=# select count(*) from work_ident; 20988140
+fatcat_prod=# select count(*) from file_ident; 1482335
+fatcat_prod=# select count(*) from creator_ident; 4167419
+fatcat_prod=# select count(*) from container_ident; 61793
+
+select count(*) from release_contrib; 59798133
+
+bnewbold@wbgrp-svc500$ sudo du -sh /var/lib/postgresql/
+43G
+
+running import-crossref with 20 threads, and manifest importer with one (at 33%
+complete). had already imported ~7million works+releases previously.
+
+
+ PostgreSQL 10.4 - wbgrp-svc500.us.archive.org - postgres@localhost:5432/postgre
+  Size:   41.38G -   323.40K/s        | TPS:         885
+  Mem.:   50.80% -    23.86G/49.14G   | IO Max:    79539/s
+  Swap:    0.80% -   408.89M/50.00G   | Read :     67.04K/s -     16/s
+  Load:    6.69 7.41 7.69             | Write:      1.93M/s -    493/s
+
+
+----total-cpu-usage---- -dsk/total- -net/total- ---paging-- ---system--
+usr sys idl wai hiq siq| read  writ| recv  send|  in   out | int   csw 
+ 32   6  62   0   0   0| 296k 3880k| 334k 3144B|   0     0 |  21k   65k
+ 31   6  62   0   0   0|   0  3072k| 391k  318B|   0     0 |  51k  141k
+ 31   6  63   0   0   0|  16k 1080k| 344k 1988B|   0     0 |  35k  104k
+ 29   6  65   0   0   0| 136k 2608k| 175k  332B|   0     0 |9835    15k
+ 28   5  67   0   0   0| 408k 4368k| 285k  832B|   0     0 |  14k   17k
+ 33   5  62   0   0   0|  56k 3256k| 219k   99B|   0     0 |  22k   49k
+ 31   6  63   0   0   0| 188k 5120k| 158k  318B|   0     0 |  17k   29k
+ 30   6  64   0   0   0| 200k 6984k| 239k  988B|   0     0 |  16k   24k
+ 30   6  64   0   0   0| 168k 5504k| 159k  152B|   0     0 |  14k   20k
+ 28   7  65   0   0   0| 440k   12M| 236k  420B|   0     0 |  15k   18k
+ 29   6  65   0   0   0| 428k 6968k| 352k  310B|   0     0 |  19k   31k
+ 32   6  62   0   0   0|  64k 3480k| 288k  318B|   0     0 |  18k   55k
+ 32   6  62   0   0   0|  32k 2080k| 155k  318B|   0     0 |  20k   52k
+
+
+bnewbold@wbgrp-svc500$ uptime
+ 22:00:42 up 28 days, 22:31,  6 users,  load average: 7.94, 7.56, 7.72
+
+
+2018-06-27 21:57:36.102 UTC [401] LOG:  checkpoints are occurring too frequently (13 seconds apart)
+2018-06-27 21:57:36.102 UTC [401] HINT:  Consider increasing the configuration parameter "max_wal_size".
+
+
+     relname     | too_much_seq |      case      |  rel_size   | seq_scan | idx_scan  
+-----------------+--------------+----------------+-------------+----------+-----------
+ changelog       |      1274670 | Missing Index? |    39411712 |  1274670 |         0
+ file_edit       |       612386 | Missing Index? |   108298240 |   612386 |         0
+ creator_edit    |       612386 | Missing Index? |   285540352 |   612386 |         0
+ container_edit  |       612386 | Missing Index? |     4784128 |   612386 |         0
+ release_edit    |       612386 | Missing Index? |  1454489600 |   612386 |         0
+ work_edit       |       612386 | Missing Index? |  1454415872 |   612386 |         0
+ release_contrib |       296675 | Missing Index? |  4725645312 |   296675 |         0
+ release_ref     |       296663 | Missing Index? |  8999837696 |   296663 |         0
+ file_release    |         -113 | OK             |    13918208 |      110 |       223
+ container_rev   |      -979326 | OK             |    16285696 |       63 |    979389
+ file_ident      |     -3671516 | OK             |   109002752 |      362 |   3671878
+ file_rev        |     -3944155 | OK             |   302940160 |       95 |   3944250
+ creator_rev     |     -8420205 | OK             |   318283776 |     1226 |   8421431
+ creator_ident   |     -9525338 | OK             |   309141504 |    52330 |   9577668
+ container_ident |    -20581876 | OK             |     4833280 |   272457 |  20854333
+ release_ident   |    -40548858 | OK             |  1440948224 |  4160919 |  44709777
+ work_rev        |    -42534913 | OK             |  1124671488 |     1161 |  42536074
+ editgroup       |    -48864662 | OK             |    34136064 |        1 |  48864663
+ work_ident      |    -65008911 | OK             |  1503313920 |     1239 |  65010150
+ release_rev     |   -185735794 | OK             | 13649428480 |      128 | 185735922
+
+## 2018-06-28 (after basic tuning + indexes)
+
+Early loading (manifest and 20x release):
+
+    PostgreSQL 10.4 - wbgrp-svc500.us.archive.org - postgres@localhost:5432/postgres - Ref.: 2s
+    Size:    4.57G -     6.45M/s        | TPS:       18812
+    Mem.:   59.70% -    23.62G/49.14G   | IO Max:     3601/s
+    Swap:    1.30% -   675.05M/50.00G   | Read :      0.00B/s -      0/s
+    Load:    12.98 10.58 5.25           | Write:      2.65M/s -    677/s
+
+
+      PID USER      PR  NI    VIRT    RES    SHR S  %CPU %MEM     TIME+ COMMAND 
+    24045 webcrawl  20   0  908872 204948  11756 S 153.3  0.4  16:13.03 fatcatd 
+    24328 webcrawl  20   0   78148  45220   4324 R  87.1  0.1   8:44.16 perl    
+    24056 postgres  20   0 10.441g 3.906g 3.886g R  69.9  7.9   6:57.47 postgres
+    24063 postgres  20   0 10.447g 3.899g 3.873g S  67.9  7.9   6:55.89 postgres
+    24059 postgres  20   0 10.426g 3.888g 3.883g R  67.5  7.9   6:59.15 postgres
+    24057 postgres  20   0 10.430g 3.883g 3.874g S  67.2  7.9   6:58.68 postgres
+    24061 postgres  20   0 10.448g 3.909g 3.881g R  66.2  8.0   6:54.30 postgres
+    24058 postgres  20   0 10.428g 3.883g 3.876g R  65.9  7.9   6:59.35 postgres
+    24062 postgres  20   0 10.426g 5.516g 5.511g R  64.9 11.2   6:58.29 postgres
+    24055 postgres  20   0 10.426g 3.878g 3.873g R  64.2  7.9   6:59.38 postgres
+    24054 postgres  20   0 10.430g 5.499g 5.491g R  63.6 11.2   6:57.27 postgres
+    24060 postgres  20   0 10.448g 3.900g 3.873g R  61.9  7.9   6:55.45 postgres
+    21711 postgres  20   0 10.419g 5.762g 5.760g D  16.6 11.7   3:00.67 postgres
+    21713 postgres  20   0 10.419g  21432  19512 S  11.3  0.0   3:25.11 postgres
+    24392 webcrawl  20   0 5309636 400912   8696 S   7.9  0.8   0:53.18 python3 
+    24383 webcrawl  20   0 5309436 400628   8648 S   7.6  0.8   0:52.29 python3 
+    24387 webcrawl  20   0 5309776 402968   8620 S   7.3  0.8   0:52.81 python3 
+    24394 webcrawl  20   0 5309624 400732   8644 S   7.3  0.8   0:53.30 python3 
+    24384 webcrawl  20   0 5309916 400948   8600 S   7.0  0.8   0:53.18 python3 
+
+Still get a *lot* of:
+
+    2018-06-29 00:14:05.948 UTC [21711] LOG:  checkpoints are occurring too frequently (1 second apart)
+    2018-06-29 00:14:05.948 UTC [21711] HINT:  Consider increasing the configuration parameter "max_wal_size".
+
+VACUUM is running basically continuously; should prevent that? 6 hours or
+longer on release_rev and release_ref tables. An auto-approve batch method
+would resovle this, I think (no update after insert).
+
+max_wal_size wasn't getting set correctly.
+
+The statements taking the most time are the complex inserts (multi-table
+inserts); they take a fraction of a second though (mean less than a
+milisecond).
+
+Manifest import runs really slow if release import is concurrent; much faster
+to wait until release import is done first (like a factor of 10x or more).
+
+With some 60 million releases:
+
+    bnewbold@wbgrp-svc500$ sudo du -sh /var/lib/postgresql/
+    184G    /var/lib/postgresql/
+
+TODO: slow query log doesn't seem to be working (let alone auto_explain)
+
diff --git a/notes/performance/speed.txt b/notes/performance/speed.txt
new file mode 100644
index 00000000..f885aea7
--- /dev/null
+++ b/notes/performance/speed.txt
@@ -0,0 +1,82 @@
+
+## Early Prototyping
+
+### 2018-04-23
+
+- fatcat as marshmallow+sqlalchemy+flask, with API client
+- no refs, contibs, files, release contribs, containers, etc
+- no extra_json
+- sqlite
+- laptop
+- editgroup every 250 edits
+
+
+    /data/crossref/crossref-works.2018-01-21.badsample_5k.json
+
+    real    3m42.912s
+    user    0m20.448s
+    sys     0m2.852s
+
+    ~22 lines per second
+    12.5 hours per million
+    ~52 days for crossref (100 million)
+
+target:
+    crossref (100 million) loaded in 48 hours
+    579 lines per second
+    this test in under 10 seconds
+    ... but could be in parallel
+
+same except postgres, via:
+
+    docker run -p 5432:5432 postgres:latest
+    ./run.py --init-db --database-uri postgres://postgres@localhost:5432
+    ./run.py --database-uri postgres://postgres@localhost:5432
+
+    API processing using 60-100% of a core. postgres 12% of a core;
+    docker-proxy similar (!). overall 70 of system CPU idle.
+
+    real    2m27.771s
+    user    0m22.860s
+    sys     0m2.852s
+
+no profiling yet; need to look at database ops. probably don't even have any
+indices!
+
+## Rust Updates (2018-05-23)
+
+Re-running with tweaked python code, 5k sample file, postgres 9.6 running locally (not in docker):
+
+    real    2m27.598s
+    user    0m24.892s
+    sys     0m2.836s
+
+Using postgres and fatcat rust:
+
+    real    0m44.443s
+    user    0m25.288s
+    sys     0m0.880s
+
+api_client about half a core; fatcatd 3x processes, about 10% each; postgres
+very small.
+
+a bit faster, basically maxing out CPU:
+
+    time cat /data/crossref/crossref-works.2018-01-21.badsample_5k.json | parallel -j4 --pipe ./fatcat_client.py --host-url http://localhost:9411 ic -
+
+    real    0m28.998s
+    user    1m5.304s
+    sys     0m3.420s
+
+    200 lines per second; within a factor of 3; can perhaps hit target with
+    non-python client?
+
+python processes (clients) seem to be CPU limit in this case; all 4 cores
+effectively maxed out.
+
+running python again in parallel mode:
+
+    real    2m29.532s
+    user    0m47.692s
+    sys     0m4.840s
+
-- 
cgit v1.2.3