From 71a4210f1e27545cadc216301b4529912fc57591 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 24 Apr 2018 16:12:05 -0700 Subject: backup notes and TODO --- notes/golang.txt | 17 +++++++++++++++++ notes/speed.txt | 44 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+) create mode 100644 notes/golang.txt create mode 100644 notes/speed.txt (limited to 'notes') diff --git a/notes/golang.txt b/notes/golang.txt new file mode 100644 index 00000000..8527711e --- /dev/null +++ b/notes/golang.txt @@ -0,0 +1,17 @@ + +- pq: basic postgres driver and ORM (similar to sqlalchemy?) +- sqlx: small extensions to builtin sql; row to struct mapping + + +code generation from SQL schema: +- https://github.com/xo/xo +- https://github.com/volatiletech/sqlboiler +- kallax + +database migrations: +- goose +- https://github.com/mattes/migrate + +maybe also: +- https://github.com/oklog/ulid + like a UUID, but base32 and "sortable" (timestamp + random) diff --git a/notes/speed.txt b/notes/speed.txt new file mode 100644 index 00000000..69be3253 --- /dev/null +++ b/notes/speed.txt @@ -0,0 +1,44 @@ + +## Early Prototyping + +### 2018-04-23 + +- fatcat as marshmallow+sqlalchemy+flask, with API client +- no refs, contibs, files, release contribs, containers, etc +- no extra_json +- sqlite +- laptop +- editgroup every 250 edits + + + /data/crossref/crossref-works.2018-01-21.badsample_5k.json + + real 3m42.912s + user 0m20.448s + sys 0m2.852s + + ~22 lines per second + 12.5 hours per million + ~52 days for crossref (100 million) + +target: + crossref (100 million) loaded in 48 hours + 579 lines per second + this test in under 10 seconds + ... but could be in parallel + +same except postgres, via: + + docker run -p 5432:5432 postgres:latest + ./run.py --init-db --database-uri postgres://postgres@localhost:5432 + ./run.py --database-uri postgres://postgres@localhost:5432 + + API processing using 60-100% of a core. postgres 12% of a core; + docker-proxy similar (!). overall 70 of system CPU idle. + + real 2m27.771s + user 0m22.860s + sys 0m2.852s + +no profiling yet; need to look at database ops. probably don't even have any +indices! -- cgit v1.2.3