From 22df32157f4846a4069d76c47f0adb3ad073a7bc Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Mon, 3 Jun 2019 20:23:32 -0700 Subject: move export README info to sql_dumps doc --- extra/sql_dumps/README.md | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) (limited to 'extra/sql_dumps') diff --git a/extra/sql_dumps/README.md b/extra/sql_dumps/README.md index 91bc3e98..5addf920 100644 --- a/extra/sql_dumps/README.md +++ b/extra/sql_dumps/README.md @@ -14,6 +14,32 @@ Or, in production: sudo su postgres DATABASE_URL=fatcat_prod ./ident_table_snapshot.sh /tmp +## HOWTO: Entity Dumps + +First create the entity ident table dumps (above). Note that *most* of the +metadata will be pinned to the consistent ident dump snapshot transaction +(based on revision references), but that "expanded" entities will be the most +recent version, which may have been updated. This mostly impacts expanded +releases (containers, files). + +Dump locally to stdout, eg: + + # local/development + cat /tmp/fatcat_ident_releases.tsv | ./target/debug/fatcat-export releases + +Or, in production: + + # production, as 'fatcat' user, in /srv/fatcat/src/rust: + cat /tmp/fatcat_ident_releases.tsv | ./target/release/fatcat-export release --expand files,filesets,webcaptures,container -j8 | pigz > /srv/fatcat/snapshots/release_export_expanded.json.gz + cat /tmp/fatcat_ident_releases.tsv | ./target/release/fatcat-export release -j8 | pigz > /srv/fatcat/snapshots/release_export.json.gz + cat /tmp/fatcat_ident_creators.tsv | ./target/release/fatcat-export creator -j8 | pigz > /srv/fatcat/snapshots/creator_export.json.gz + cat /tmp/fatcat_ident_containers.tsv | ./target/release/fatcat-export container -j8 | pigz > /srv/fatcat/snapshots/container_export.json.gz + cat /tmp/fatcat_ident_files.tsv | ./target/release/fatcat-export file -j8 | pigz > /srv/fatcat/snapshots/file_export.json.gz + cat /tmp/fatcat_ident_filesets.tsv | ./target/release/fatcat-export fileset -j8 | pigz > /srv/fatcat/snapshots/fileset_export.json.gz + cat /tmp/fatcat_ident_webcaptures.tsv | ./target/release/fatcat-export webcapture -j8 | pigz > /srv/fatcat/snapshots/webcapture_export.json.gz + +Then usually move all these files to `/srv/fatcat/snapshots/`. + ## HOWTO: Dump abstracts, release identifiers, file hashes, etc These are run as regular old commands, and can run across the network in a @@ -33,7 +59,7 @@ In production: sudo -u postgres psql fatcat_prod < dump_file_hashes.sql | egrep -v ^BEGIN$ | egrep -v ^ROLLBACK$ | pv -l | pigz > /srv/fatcat/snapshots/file_hashes.tsv.gz sudo -u postgres psql fatcat_prod < dump_release_extid.sql | egrep -v ^BEGIN$ | egrep -v ^ROLLBACK$ | pv -l | pigz > /srv/fatcat/snapshots/release_extid.tsv.gz -## HOWTO: Full ("private") database backup and restore +## HOWTO: Full ("private") database backup export DATESLUG="`date +%Y-%m-%d.%H%M%S`" time sudo -u postgres pg_dump --verbose --format=tar fatcat_prod | pigz > /srv/fatcat/snapshots/fatcat_full_dbdump_${DATESLUG}.tar.gz @@ -46,6 +72,8 @@ export, it would be possible to use parallel dumping. However, this would put additional load on both the database and underlying disk. Could also cause issues with users/permissions. +## HOWTO: Restor full database backup + To restore, CAREFULLY, run: sudo -u postgres pg_restore --clean --if-exists --create --exit-on-error --jobs=16 DUMP_FILE.tar -- cgit v1.2.3