aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2019-01-28 15:30:32 -0800
committerBryan Newbold <bnewbold@robocracy.org>2019-01-28 15:31:43 -0800
commit2219d6f04429ea42eb0381c29780f55b44770b87 (patch)
tree0bad9c5b3e8bbb278e50c895b93d4d37a2728451
parent1eeecb90a4ce743d4d83c651b9e8c0128541404a (diff)
downloadfatcat-2219d6f04429ea42eb0381c29780f55b44770b87.tar.gz
fatcat-2219d6f04429ea42eb0381c29780f55b44770b87.zip
add filesets and webcaptures to dumps
-rw-r--r--extra/sql_dumps/README.md28
-rw-r--r--extra/sql_dumps/dump_idents.sql2
-rw-r--r--extra/sql_dumps/dump_release_extid.sql2
-rwxr-xr-xextra/sql_dumps/ident_table_snapshot.sh2
-rw-r--r--python/env.example3
5 files changed, 35 insertions, 2 deletions
diff --git a/extra/sql_dumps/README.md b/extra/sql_dumps/README.md
index 6f24207d..3538ce8f 100644
--- a/extra/sql_dumps/README.md
+++ b/extra/sql_dumps/README.md
@@ -24,3 +24,31 @@ forwarding anyways.
# Run on database server, write to file on remote host
psql fatcat < dump_abstracts.sql | egrep -v ^BEGIN$ | egrep -v ^ROLLBACK$ | pv -l | gzip | ssh user@host 'cat > abstracts.json.gz'
+## HOWTO: Full private database backup and restore
+
+ export DATESLUG="`date +%Y-%m-%d.%H%M%S`"
+ sudo -u postgres pg_dump --verbose --format=tar fatcat_prod | gzip > /srv/fatcat/snapshots/fatcat_private_dbdump_${DATESLUG}.tar.gz
+
+NOTE: by using the "directory" export (along with `--file`) instead of "tar"
+export, it would be possible to use parallel dumping. However, this would put
+additional load on both the database and underlying disk. Could also cause
+issues with users/permissions.
+
+To restore, CAREFULLY, run:
+
+ sudo -u postgres pg_restore --clean --if-exists --create -exit-on-error --jobs=16 DUMP_FILE.tar.gz
+
+To just inspect a dump:
+
+ pg_restore -l DUMP_FILE.tar.gz
+
+## HOWTO: Public database dump
+
+This dump will contain all tables in the backend schema, except for "private"
+authentication tables. For local or non-production machines, might need to
+replace the `fatcat_prod` database name.
+
+ export DATESLUG="`date +%Y-%m-%d.%H%M%S`"
+ sudo -u postgres pg_dump --verbose --format=tar --exclude-table-data=auth_oidc fatcat_prod | gzip > /srv/fatcat/snapshots/fatcat_public_dbdump_${DATESLUG}.tar.gz
+
+Can also run using the remote/SSH options above.
diff --git a/extra/sql_dumps/dump_idents.sql b/extra/sql_dumps/dump_idents.sql
index 22640cbc..e8126347 100644
--- a/extra/sql_dumps/dump_idents.sql
+++ b/extra/sql_dumps/dump_idents.sql
@@ -5,6 +5,8 @@ COPY (SELECT id FROM changelog ORDER BY id DESC LIMIT 1)
COPY (SELECT id, rev_id, redirect_id FROM creator_ident WHERE is_live=true) TO '/tmp/fatcat_ident_creators.tsv' WITH NULL '';
COPY (SELECT id, rev_id, redirect_id FROM container_ident WHERE is_live=true) TO '/tmp/fatcat_ident_containers.tsv' WITH NULL '';
COPY (SELECT id, rev_id, redirect_id FROM file_ident WHERE is_live=true) TO '/tmp/fatcat_ident_files.tsv' WITH NULL '';
+COPY (SELECT id, rev_id, redirect_id FROM fileset_ident WHERE is_live=true) TO '/tmp/fatcat_ident_filesets.tsv' WITH NULL '';
+COPY (SELECT id, rev_id, redirect_id FROM webcapture_ident WHERE is_live=true) TO '/tmp/fatcat_ident_webcaptures.tsv' WITH NULL '';
COPY (SELECT id, rev_id, redirect_id FROM release_ident WHERE is_live=true) TO '/tmp/fatcat_ident_releases.tsv' WITH NULL '';
COPY (SELECT id, rev_id, redirect_id FROM work_ident WHERE is_live=true) TO '/tmp/fatcat_ident_works.tsv' WITH NULL '';
COPY (SELECT id, editgroup_id, timestamp FROM changelog) TO '/tmp/fatcat_ident_changelog.tsv' WITH NULL '';
diff --git a/extra/sql_dumps/dump_release_extid.sql b/extra/sql_dumps/dump_release_extid.sql
index 5e93cb1d..fe1cf390 100644
--- a/extra/sql_dumps/dump_release_extid.sql
+++ b/extra/sql_dumps/dump_release_extid.sql
@@ -2,7 +2,7 @@
BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE READ ONLY DEFERRABLE;
COPY (SELECT release_ident.id, release_rev.id, release_rev.doi, release_rev.pmcid, release_rev.pmid,
- release_rev.core_id, release_rev.wikidata_qid
+ release_rev.core_id, release_rev.wikidata_qid, release_rev.arxiv_id, release_rev.jstor_id
FROM release_rev
INNER JOIN release_ident ON release_ident.rev_id = release_rev.id
WHERE release_ident.is_live = 't' AND release_ident.redirect_id IS NULL)
diff --git a/extra/sql_dumps/ident_table_snapshot.sh b/extra/sql_dumps/ident_table_snapshot.sh
index 66144fd3..16c91612 100755
--- a/extra/sql_dumps/ident_table_snapshot.sh
+++ b/extra/sql_dumps/ident_table_snapshot.sh
@@ -28,6 +28,8 @@ tar -C /tmp -c --gzip --verbose \
fatcat_ident_containers.tsv \
fatcat_ident_creators.tsv \
fatcat_ident_files.tsv \
+ fatcat_ident_filesets.tsv \
+ fatcat_ident_webcaptures.tsv \
fatcat_ident_releases.tsv \
fatcat_ident_works.tsv
diff --git a/python/env.example b/python/env.example
index 75fc5238..4bf76e21 100644
--- a/python/env.example
+++ b/python/env.example
@@ -4,7 +4,8 @@ FATCAT_DOMAIN="dev.fatcat.wiki"
FATCAT_API_AUTH_TOKEN="AgEPZGV2LmZhdGNhdC53aWtpAhYyMDE5MDEwMS1kZXYtZHVtbXkta2V5AAImZWRpdG9yX2lkID0gYWFhYWFhYWFhYWFhYmt2a2FhYWFhYWFhYWkAAht0aW1lID4gMjAxOS0wMS0wOVQwMDo1Nzo1MloAAAYgnroNha1hSftChtxHGTnLEmM/pY8MeQS/jBSV0UNvXug="
FATCAT_API_HOST="http://localhost:9411/v0"
ELASTICSEARCH_BACKEND="http://localhost:9200"
-ELASTICSEARCH_INDEX="fatcat"
+ELASTICSEARCH_RELEASE_INDEX="fatcat_release"
+ELASTICSEARCH_CONTAINER_INDEX="fatcat_container"
GITLAB_CLIENT_ID=""
GITLAB_CLIENT_SECRET=""
IA_XAUTH_CLIENT_ID=""