diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2019-01-28 15:30:32 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2019-01-28 15:31:43 -0800 |
commit | 2219d6f04429ea42eb0381c29780f55b44770b87 (patch) | |
tree | 0bad9c5b3e8bbb278e50c895b93d4d37a2728451 /extra | |
parent | 1eeecb90a4ce743d4d83c651b9e8c0128541404a (diff) | |
download | fatcat-2219d6f04429ea42eb0381c29780f55b44770b87.tar.gz fatcat-2219d6f04429ea42eb0381c29780f55b44770b87.zip |
add filesets and webcaptures to dumps
Diffstat (limited to 'extra')
-rw-r--r-- | extra/sql_dumps/README.md | 28 | ||||
-rw-r--r-- | extra/sql_dumps/dump_idents.sql | 2 | ||||
-rw-r--r-- | extra/sql_dumps/dump_release_extid.sql | 2 | ||||
-rwxr-xr-x | extra/sql_dumps/ident_table_snapshot.sh | 2 |
4 files changed, 33 insertions, 1 deletions
diff --git a/extra/sql_dumps/README.md b/extra/sql_dumps/README.md index 6f24207d..3538ce8f 100644 --- a/extra/sql_dumps/README.md +++ b/extra/sql_dumps/README.md @@ -24,3 +24,31 @@ forwarding anyways. # Run on database server, write to file on remote host psql fatcat < dump_abstracts.sql | egrep -v ^BEGIN$ | egrep -v ^ROLLBACK$ | pv -l | gzip | ssh user@host 'cat > abstracts.json.gz' +## HOWTO: Full private database backup and restore + + export DATESLUG="`date +%Y-%m-%d.%H%M%S`" + sudo -u postgres pg_dump --verbose --format=tar fatcat_prod | gzip > /srv/fatcat/snapshots/fatcat_private_dbdump_${DATESLUG}.tar.gz + +NOTE: by using the "directory" export (along with `--file`) instead of "tar" +export, it would be possible to use parallel dumping. However, this would put +additional load on both the database and underlying disk. Could also cause +issues with users/permissions. + +To restore, CAREFULLY, run: + + sudo -u postgres pg_restore --clean --if-exists --create -exit-on-error --jobs=16 DUMP_FILE.tar.gz + +To just inspect a dump: + + pg_restore -l DUMP_FILE.tar.gz + +## HOWTO: Public database dump + +This dump will contain all tables in the backend schema, except for "private" +authentication tables. For local or non-production machines, might need to +replace the `fatcat_prod` database name. + + export DATESLUG="`date +%Y-%m-%d.%H%M%S`" + sudo -u postgres pg_dump --verbose --format=tar --exclude-table-data=auth_oidc fatcat_prod | gzip > /srv/fatcat/snapshots/fatcat_public_dbdump_${DATESLUG}.tar.gz + +Can also run using the remote/SSH options above. diff --git a/extra/sql_dumps/dump_idents.sql b/extra/sql_dumps/dump_idents.sql index 22640cbc..e8126347 100644 --- a/extra/sql_dumps/dump_idents.sql +++ b/extra/sql_dumps/dump_idents.sql @@ -5,6 +5,8 @@ COPY (SELECT id FROM changelog ORDER BY id DESC LIMIT 1) COPY (SELECT id, rev_id, redirect_id FROM creator_ident WHERE is_live=true) TO '/tmp/fatcat_ident_creators.tsv' WITH NULL ''; COPY (SELECT id, rev_id, redirect_id FROM container_ident WHERE is_live=true) TO '/tmp/fatcat_ident_containers.tsv' WITH NULL ''; COPY (SELECT id, rev_id, redirect_id FROM file_ident WHERE is_live=true) TO '/tmp/fatcat_ident_files.tsv' WITH NULL ''; +COPY (SELECT id, rev_id, redirect_id FROM fileset_ident WHERE is_live=true) TO '/tmp/fatcat_ident_filesets.tsv' WITH NULL ''; +COPY (SELECT id, rev_id, redirect_id FROM webcapture_ident WHERE is_live=true) TO '/tmp/fatcat_ident_webcaptures.tsv' WITH NULL ''; COPY (SELECT id, rev_id, redirect_id FROM release_ident WHERE is_live=true) TO '/tmp/fatcat_ident_releases.tsv' WITH NULL ''; COPY (SELECT id, rev_id, redirect_id FROM work_ident WHERE is_live=true) TO '/tmp/fatcat_ident_works.tsv' WITH NULL ''; COPY (SELECT id, editgroup_id, timestamp FROM changelog) TO '/tmp/fatcat_ident_changelog.tsv' WITH NULL ''; diff --git a/extra/sql_dumps/dump_release_extid.sql b/extra/sql_dumps/dump_release_extid.sql index 5e93cb1d..fe1cf390 100644 --- a/extra/sql_dumps/dump_release_extid.sql +++ b/extra/sql_dumps/dump_release_extid.sql @@ -2,7 +2,7 @@ BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE READ ONLY DEFERRABLE; COPY (SELECT release_ident.id, release_rev.id, release_rev.doi, release_rev.pmcid, release_rev.pmid, - release_rev.core_id, release_rev.wikidata_qid + release_rev.core_id, release_rev.wikidata_qid, release_rev.arxiv_id, release_rev.jstor_id FROM release_rev INNER JOIN release_ident ON release_ident.rev_id = release_rev.id WHERE release_ident.is_live = 't' AND release_ident.redirect_id IS NULL) diff --git a/extra/sql_dumps/ident_table_snapshot.sh b/extra/sql_dumps/ident_table_snapshot.sh index 66144fd3..16c91612 100755 --- a/extra/sql_dumps/ident_table_snapshot.sh +++ b/extra/sql_dumps/ident_table_snapshot.sh @@ -28,6 +28,8 @@ tar -C /tmp -c --gzip --verbose \ fatcat_ident_containers.tsv \ fatcat_ident_creators.tsv \ fatcat_ident_files.tsv \ + fatcat_ident_filesets.tsv \ + fatcat_ident_webcaptures.tsv \ fatcat_ident_releases.tsv \ fatcat_ident_works.tsv |