summaryrefslogtreecommitdiffstats
path: root/extra
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2019-01-28 15:30:32 -0800
committerBryan Newbold <bnewbold@robocracy.org>2019-01-28 15:31:43 -0800
commit2219d6f04429ea42eb0381c29780f55b44770b87 (patch)
tree0bad9c5b3e8bbb278e50c895b93d4d37a2728451 /extra
parent1eeecb90a4ce743d4d83c651b9e8c0128541404a (diff)
downloadfatcat-2219d6f04429ea42eb0381c29780f55b44770b87.tar.gz
fatcat-2219d6f04429ea42eb0381c29780f55b44770b87.zip
add filesets and webcaptures to dumps
Diffstat (limited to 'extra')
-rw-r--r--extra/sql_dumps/README.md28
-rw-r--r--extra/sql_dumps/dump_idents.sql2
-rw-r--r--extra/sql_dumps/dump_release_extid.sql2
-rwxr-xr-xextra/sql_dumps/ident_table_snapshot.sh2
4 files changed, 33 insertions, 1 deletions
diff --git a/extra/sql_dumps/README.md b/extra/sql_dumps/README.md
index 6f24207d..3538ce8f 100644
--- a/extra/sql_dumps/README.md
+++ b/extra/sql_dumps/README.md
@@ -24,3 +24,31 @@ forwarding anyways.
# Run on database server, write to file on remote host
psql fatcat < dump_abstracts.sql | egrep -v ^BEGIN$ | egrep -v ^ROLLBACK$ | pv -l | gzip | ssh user@host 'cat > abstracts.json.gz'
+## HOWTO: Full private database backup and restore
+
+ export DATESLUG="`date +%Y-%m-%d.%H%M%S`"
+ sudo -u postgres pg_dump --verbose --format=tar fatcat_prod | gzip > /srv/fatcat/snapshots/fatcat_private_dbdump_${DATESLUG}.tar.gz
+
+NOTE: by using the "directory" export (along with `--file`) instead of "tar"
+export, it would be possible to use parallel dumping. However, this would put
+additional load on both the database and underlying disk. Could also cause
+issues with users/permissions.
+
+To restore, CAREFULLY, run:
+
+ sudo -u postgres pg_restore --clean --if-exists --create -exit-on-error --jobs=16 DUMP_FILE.tar.gz
+
+To just inspect a dump:
+
+ pg_restore -l DUMP_FILE.tar.gz
+
+## HOWTO: Public database dump
+
+This dump will contain all tables in the backend schema, except for "private"
+authentication tables. For local or non-production machines, might need to
+replace the `fatcat_prod` database name.
+
+ export DATESLUG="`date +%Y-%m-%d.%H%M%S`"
+ sudo -u postgres pg_dump --verbose --format=tar --exclude-table-data=auth_oidc fatcat_prod | gzip > /srv/fatcat/snapshots/fatcat_public_dbdump_${DATESLUG}.tar.gz
+
+Can also run using the remote/SSH options above.
diff --git a/extra/sql_dumps/dump_idents.sql b/extra/sql_dumps/dump_idents.sql
index 22640cbc..e8126347 100644
--- a/extra/sql_dumps/dump_idents.sql
+++ b/extra/sql_dumps/dump_idents.sql
@@ -5,6 +5,8 @@ COPY (SELECT id FROM changelog ORDER BY id DESC LIMIT 1)
COPY (SELECT id, rev_id, redirect_id FROM creator_ident WHERE is_live=true) TO '/tmp/fatcat_ident_creators.tsv' WITH NULL '';
COPY (SELECT id, rev_id, redirect_id FROM container_ident WHERE is_live=true) TO '/tmp/fatcat_ident_containers.tsv' WITH NULL '';
COPY (SELECT id, rev_id, redirect_id FROM file_ident WHERE is_live=true) TO '/tmp/fatcat_ident_files.tsv' WITH NULL '';
+COPY (SELECT id, rev_id, redirect_id FROM fileset_ident WHERE is_live=true) TO '/tmp/fatcat_ident_filesets.tsv' WITH NULL '';
+COPY (SELECT id, rev_id, redirect_id FROM webcapture_ident WHERE is_live=true) TO '/tmp/fatcat_ident_webcaptures.tsv' WITH NULL '';
COPY (SELECT id, rev_id, redirect_id FROM release_ident WHERE is_live=true) TO '/tmp/fatcat_ident_releases.tsv' WITH NULL '';
COPY (SELECT id, rev_id, redirect_id FROM work_ident WHERE is_live=true) TO '/tmp/fatcat_ident_works.tsv' WITH NULL '';
COPY (SELECT id, editgroup_id, timestamp FROM changelog) TO '/tmp/fatcat_ident_changelog.tsv' WITH NULL '';
diff --git a/extra/sql_dumps/dump_release_extid.sql b/extra/sql_dumps/dump_release_extid.sql
index 5e93cb1d..fe1cf390 100644
--- a/extra/sql_dumps/dump_release_extid.sql
+++ b/extra/sql_dumps/dump_release_extid.sql
@@ -2,7 +2,7 @@
BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE READ ONLY DEFERRABLE;
COPY (SELECT release_ident.id, release_rev.id, release_rev.doi, release_rev.pmcid, release_rev.pmid,
- release_rev.core_id, release_rev.wikidata_qid
+ release_rev.core_id, release_rev.wikidata_qid, release_rev.arxiv_id, release_rev.jstor_id
FROM release_rev
INNER JOIN release_ident ON release_ident.rev_id = release_rev.id
WHERE release_ident.is_live = 't' AND release_ident.redirect_id IS NULL)
diff --git a/extra/sql_dumps/ident_table_snapshot.sh b/extra/sql_dumps/ident_table_snapshot.sh
index 66144fd3..16c91612 100755
--- a/extra/sql_dumps/ident_table_snapshot.sh
+++ b/extra/sql_dumps/ident_table_snapshot.sh
@@ -28,6 +28,8 @@ tar -C /tmp -c --gzip --verbose \
fatcat_ident_containers.tsv \
fatcat_ident_creators.tsv \
fatcat_ident_files.tsv \
+ fatcat_ident_filesets.tsv \
+ fatcat_ident_webcaptures.tsv \
fatcat_ident_releases.tsv \
fatcat_ident_works.tsv