From ca00cb327cb066c85a0f11a947b4497655ddf0de Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Fri, 24 Aug 2018 13:37:43 -0700 Subject: database dump notes --- notes/database_dumps_backups.txt | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'notes/database_dumps_backups.txt') diff --git a/notes/database_dumps_backups.txt b/notes/database_dumps_backups.txt index 0b05b9b8..60d4bba0 100644 --- a/notes/database_dumps_backups.txt +++ b/notes/database_dumps_backups.txt @@ -23,9 +23,31 @@ There are a few different database dump formats folks might want: should be on a work or release basis; will go with release for now. Harder to do using public interface because of the need for transaction locking. +## Full Postgres Backup + Backing up the entire database using `pg_dump`, with parallelism 1 (use more on larger machine with fast disks; try 4 or 8?), assuming the database name is 'fatcat', and the current user has access: pg_dump -j1 -Fd -f test-dump fatcat +## Identifier Dumps + +The `extras/quick_dump.sql` script will dump abstracts and identifiers as TSV +files to `/tmp/`. Pretty quick; takes about 15 GB of disk space (uncompressed). + +## Releases Export + + # simple command + ./fatcat_export.py releases /tmp/fatcat_ident_releases.tsv /tmp/releases-dump.json + + # usual command + time ./fatcat_export.py releases /tmp/fatcat_ident_releases.tsv - | pv -l | wc + +## Changelog Export + + # simple command + ./fatcat_export.py changelog /tmp/changelog-dump.json + + # usual command + time ./fatcat_export.py changelog - | pv -l | wc -- cgit v1.2.3