From 05942d40b29f397372b9abcd1057e12c72e93743 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 5 Feb 2019 12:08:36 -0800 Subject: use pigz over gzip in more places --- extra/sql_dumps/README.md | 19 +++++++++++++------ extra/sql_dumps/ident_table_snapshot.sh | 3 ++- 2 files changed, 15 insertions(+), 7 deletions(-) (limited to 'extra/sql_dumps') diff --git a/extra/sql_dumps/README.md b/extra/sql_dumps/README.md index 45adae09..4d420190 100644 --- a/extra/sql_dumps/README.md +++ b/extra/sql_dumps/README.md @@ -29,14 +29,14 @@ forwarding anyways. In production: - sudo -u postgres psql fatcat_prod < dump_abstracts.sql | egrep -v ^BEGIN$ | egrep -v ^ROLLBACK$ | pv -l | gzip > /srv/fatcat/snapshots/abstracts.json.gz - sudo -u postgres psql fatcat_prod < dump_file_hashes.sql | egrep -v ^BEGIN$ | egrep -v ^ROLLBACK$ | pv -l | gzip > /srv/fatcat/snapshots/file_hashes.tsv.gz - sudo -u postgres psql fatcat_prod < dump_release_extid.sql | egrep -v ^BEGIN$ | egrep -v ^ROLLBACK$ | pv -l | gzip > /srv/fatcat/snapshots/release_extid.tsv.gz + sudo -u postgres psql fatcat_prod < dump_abstracts.sql | egrep -v ^BEGIN$ | egrep -v ^ROLLBACK$ | pv -l | pigz > /srv/fatcat/snapshots/abstracts.json.gz + sudo -u postgres psql fatcat_prod < dump_file_hashes.sql | egrep -v ^BEGIN$ | egrep -v ^ROLLBACK$ | pv -l | pigz > /srv/fatcat/snapshots/file_hashes.tsv.gz + sudo -u postgres psql fatcat_prod < dump_release_extid.sql | egrep -v ^BEGIN$ | egrep -v ^ROLLBACK$ | pv -l | pigz > /srv/fatcat/snapshots/release_extid.tsv.gz ## HOWTO: Full private database backup and restore export DATESLUG="`date +%Y-%m-%d.%H%M%S`" - time sudo -u postgres pg_dump --verbose --format=tar fatcat_prod | gzip > /srv/fatcat/snapshots/fatcat_private_dbdump_${DATESLUG}.tar.gz + time sudo -u postgres pg_dump --verbose --format=tar fatcat_prod | pigz > /srv/fatcat/snapshots/fatcat_private_dbdump_${DATESLUG}.tar.gz NOTE: by using the "directory" export (along with `--file`) instead of "tar" export, it would be possible to use parallel dumping. However, this would put @@ -45,7 +45,14 @@ issues with users/permissions. To restore, CAREFULLY, run: - sudo -u postgres pg_restore --clean --if-exists --create -exit-on-error --jobs=16 DUMP_FILE.tar.gz + sudo -u postgres pg_restore --clean --if-exists --create --exit-on-error --jobs=16 DUMP_FILE.tar.gz + + zcat sudo -u postgres pg_restore --clean --if-exists --create --exit-on-error --jobs=16 DUMP_FILE.tar.gz + +Or, in production: + + sudo su postgres + time zcat fatcat_private_dbdump_2020-02-02.022209.tar.gz | pg_restore --exit-on-error --clean --if-exists --dbname fatcat_prod To just inspect a dump: @@ -59,6 +66,6 @@ replace the `fatcat_prod` database name. # TODO: for production, probably want consistent serialization mode export DATESLUG="`date +%Y-%m-%d.%H%M%S`" - sudo -u postgres pg_dump --verbose --format=tar --exclude-table-data=auth_oidc fatcat_prod | gzip > /srv/fatcat/snapshots/fatcat_public_dbdump_${DATESLUG}.tar.gz + sudo -u postgres pg_dump --verbose --format=tar --exclude-table-data=auth_oidc fatcat_prod | pigz > /srv/fatcat/snapshots/fatcat_public_dbdump_${DATESLUG}.tar.gz Can also run using the remote/SSH options above. diff --git a/extra/sql_dumps/ident_table_snapshot.sh b/extra/sql_dumps/ident_table_snapshot.sh index 16c91612..dbd4caf0 100755 --- a/extra/sql_dumps/ident_table_snapshot.sh +++ b/extra/sql_dumps/ident_table_snapshot.sh @@ -22,7 +22,8 @@ CHANGELOG_REV="`head -n1 /tmp/fatcat_ident_latest_changelog.tsv`" OUTFILE="${OUTPUT_DIR}/fatcat_idents.$DATESLUG.r$CHANGELOG_REV.tar.gz" echo "Compressing..." -tar -C /tmp -c --gzip --verbose \ +tar -C /tmp -c --verbose \ + --use-compress-program=pigz \ -f $OUTFILE \ fatcat_ident_latest_changelog.tsv \ fatcat_ident_containers.tsv \ -- cgit v1.2.3