From 2219d6f04429ea42eb0381c29780f55b44770b87 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Mon, 28 Jan 2019 15:30:32 -0800 Subject: add filesets and webcaptures to dumps --- extra/sql_dumps/README.md | 28 ++++++++++++++++++++++++++++ extra/sql_dumps/dump_idents.sql | 2 ++ extra/sql_dumps/dump_release_extid.sql | 2 +- extra/sql_dumps/ident_table_snapshot.sh | 2 ++ 4 files changed, 33 insertions(+), 1 deletion(-) (limited to 'extra') diff --git a/extra/sql_dumps/README.md b/extra/sql_dumps/README.md index 6f24207d..3538ce8f 100644 --- a/extra/sql_dumps/README.md +++ b/extra/sql_dumps/README.md @@ -24,3 +24,31 @@ forwarding anyways. # Run on database server, write to file on remote host psql fatcat < dump_abstracts.sql | egrep -v ^BEGIN$ | egrep -v ^ROLLBACK$ | pv -l | gzip | ssh user@host 'cat > abstracts.json.gz' +## HOWTO: Full private database backup and restore + + export DATESLUG="`date +%Y-%m-%d.%H%M%S`" + sudo -u postgres pg_dump --verbose --format=tar fatcat_prod | gzip > /srv/fatcat/snapshots/fatcat_private_dbdump_${DATESLUG}.tar.gz + +NOTE: by using the "directory" export (along with `--file`) instead of "tar" +export, it would be possible to use parallel dumping. However, this would put +additional load on both the database and underlying disk. Could also cause +issues with users/permissions. + +To restore, CAREFULLY, run: + + sudo -u postgres pg_restore --clean --if-exists --create -exit-on-error --jobs=16 DUMP_FILE.tar.gz + +To just inspect a dump: + + pg_restore -l DUMP_FILE.tar.gz + +## HOWTO: Public database dump + +This dump will contain all tables in the backend schema, except for "private" +authentication tables. For local or non-production machines, might need to +replace the `fatcat_prod` database name. + + export DATESLUG="`date +%Y-%m-%d.%H%M%S`" + sudo -u postgres pg_dump --verbose --format=tar --exclude-table-data=auth_oidc fatcat_prod | gzip > /srv/fatcat/snapshots/fatcat_public_dbdump_${DATESLUG}.tar.gz + +Can also run using the remote/SSH options above. diff --git a/extra/sql_dumps/dump_idents.sql b/extra/sql_dumps/dump_idents.sql index 22640cbc..e8126347 100644 --- a/extra/sql_dumps/dump_idents.sql +++ b/extra/sql_dumps/dump_idents.sql @@ -5,6 +5,8 @@ COPY (SELECT id FROM changelog ORDER BY id DESC LIMIT 1) COPY (SELECT id, rev_id, redirect_id FROM creator_ident WHERE is_live=true) TO '/tmp/fatcat_ident_creators.tsv' WITH NULL ''; COPY (SELECT id, rev_id, redirect_id FROM container_ident WHERE is_live=true) TO '/tmp/fatcat_ident_containers.tsv' WITH NULL ''; COPY (SELECT id, rev_id, redirect_id FROM file_ident WHERE is_live=true) TO '/tmp/fatcat_ident_files.tsv' WITH NULL ''; +COPY (SELECT id, rev_id, redirect_id FROM fileset_ident WHERE is_live=true) TO '/tmp/fatcat_ident_filesets.tsv' WITH NULL ''; +COPY (SELECT id, rev_id, redirect_id FROM webcapture_ident WHERE is_live=true) TO '/tmp/fatcat_ident_webcaptures.tsv' WITH NULL ''; COPY (SELECT id, rev_id, redirect_id FROM release_ident WHERE is_live=true) TO '/tmp/fatcat_ident_releases.tsv' WITH NULL ''; COPY (SELECT id, rev_id, redirect_id FROM work_ident WHERE is_live=true) TO '/tmp/fatcat_ident_works.tsv' WITH NULL ''; COPY (SELECT id, editgroup_id, timestamp FROM changelog) TO '/tmp/fatcat_ident_changelog.tsv' WITH NULL ''; diff --git a/extra/sql_dumps/dump_release_extid.sql b/extra/sql_dumps/dump_release_extid.sql index 5e93cb1d..fe1cf390 100644 --- a/extra/sql_dumps/dump_release_extid.sql +++ b/extra/sql_dumps/dump_release_extid.sql @@ -2,7 +2,7 @@ BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE READ ONLY DEFERRABLE; COPY (SELECT release_ident.id, release_rev.id, release_rev.doi, release_rev.pmcid, release_rev.pmid, - release_rev.core_id, release_rev.wikidata_qid + release_rev.core_id, release_rev.wikidata_qid, release_rev.arxiv_id, release_rev.jstor_id FROM release_rev INNER JOIN release_ident ON release_ident.rev_id = release_rev.id WHERE release_ident.is_live = 't' AND release_ident.redirect_id IS NULL) diff --git a/extra/sql_dumps/ident_table_snapshot.sh b/extra/sql_dumps/ident_table_snapshot.sh index 66144fd3..16c91612 100755 --- a/extra/sql_dumps/ident_table_snapshot.sh +++ b/extra/sql_dumps/ident_table_snapshot.sh @@ -28,6 +28,8 @@ tar -C /tmp -c --gzip --verbose \ fatcat_ident_containers.tsv \ fatcat_ident_creators.tsv \ fatcat_ident_files.tsv \ + fatcat_ident_filesets.tsv \ + fatcat_ident_webcaptures.tsv \ fatcat_ident_releases.tsv \ fatcat_ident_works.tsv -- cgit v1.2.3