From 2219d6f04429ea42eb0381c29780f55b44770b87 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Mon, 28 Jan 2019 15:30:32 -0800 Subject: add filesets and webcaptures to dumps --- extra/sql_dumps/README.md | 28 ++++++++++++++++++++++++++++ extra/sql_dumps/dump_idents.sql | 2 ++ extra/sql_dumps/dump_release_extid.sql | 2 +- extra/sql_dumps/ident_table_snapshot.sh | 2 ++ python/env.example | 3 ++- 5 files changed, 35 insertions(+), 2 deletions(-) diff --git a/extra/sql_dumps/README.md b/extra/sql_dumps/README.md index 6f24207d..3538ce8f 100644 --- a/extra/sql_dumps/README.md +++ b/extra/sql_dumps/README.md @@ -24,3 +24,31 @@ forwarding anyways. # Run on database server, write to file on remote host psql fatcat < dump_abstracts.sql | egrep -v ^BEGIN$ | egrep -v ^ROLLBACK$ | pv -l | gzip | ssh user@host 'cat > abstracts.json.gz' +## HOWTO: Full private database backup and restore + + export DATESLUG="`date +%Y-%m-%d.%H%M%S`" + sudo -u postgres pg_dump --verbose --format=tar fatcat_prod | gzip > /srv/fatcat/snapshots/fatcat_private_dbdump_${DATESLUG}.tar.gz + +NOTE: by using the "directory" export (along with `--file`) instead of "tar" +export, it would be possible to use parallel dumping. However, this would put +additional load on both the database and underlying disk. Could also cause +issues with users/permissions. + +To restore, CAREFULLY, run: + + sudo -u postgres pg_restore --clean --if-exists --create -exit-on-error --jobs=16 DUMP_FILE.tar.gz + +To just inspect a dump: + + pg_restore -l DUMP_FILE.tar.gz + +## HOWTO: Public database dump + +This dump will contain all tables in the backend schema, except for "private" +authentication tables. For local or non-production machines, might need to +replace the `fatcat_prod` database name. + + export DATESLUG="`date +%Y-%m-%d.%H%M%S`" + sudo -u postgres pg_dump --verbose --format=tar --exclude-table-data=auth_oidc fatcat_prod | gzip > /srv/fatcat/snapshots/fatcat_public_dbdump_${DATESLUG}.tar.gz + +Can also run using the remote/SSH options above. diff --git a/extra/sql_dumps/dump_idents.sql b/extra/sql_dumps/dump_idents.sql index 22640cbc..e8126347 100644 --- a/extra/sql_dumps/dump_idents.sql +++ b/extra/sql_dumps/dump_idents.sql @@ -5,6 +5,8 @@ COPY (SELECT id FROM changelog ORDER BY id DESC LIMIT 1) COPY (SELECT id, rev_id, redirect_id FROM creator_ident WHERE is_live=true) TO '/tmp/fatcat_ident_creators.tsv' WITH NULL ''; COPY (SELECT id, rev_id, redirect_id FROM container_ident WHERE is_live=true) TO '/tmp/fatcat_ident_containers.tsv' WITH NULL ''; COPY (SELECT id, rev_id, redirect_id FROM file_ident WHERE is_live=true) TO '/tmp/fatcat_ident_files.tsv' WITH NULL ''; +COPY (SELECT id, rev_id, redirect_id FROM fileset_ident WHERE is_live=true) TO '/tmp/fatcat_ident_filesets.tsv' WITH NULL ''; +COPY (SELECT id, rev_id, redirect_id FROM webcapture_ident WHERE is_live=true) TO '/tmp/fatcat_ident_webcaptures.tsv' WITH NULL ''; COPY (SELECT id, rev_id, redirect_id FROM release_ident WHERE is_live=true) TO '/tmp/fatcat_ident_releases.tsv' WITH NULL ''; COPY (SELECT id, rev_id, redirect_id FROM work_ident WHERE is_live=true) TO '/tmp/fatcat_ident_works.tsv' WITH NULL ''; COPY (SELECT id, editgroup_id, timestamp FROM changelog) TO '/tmp/fatcat_ident_changelog.tsv' WITH NULL ''; diff --git a/extra/sql_dumps/dump_release_extid.sql b/extra/sql_dumps/dump_release_extid.sql index 5e93cb1d..fe1cf390 100644 --- a/extra/sql_dumps/dump_release_extid.sql +++ b/extra/sql_dumps/dump_release_extid.sql @@ -2,7 +2,7 @@ BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE READ ONLY DEFERRABLE; COPY (SELECT release_ident.id, release_rev.id, release_rev.doi, release_rev.pmcid, release_rev.pmid, - release_rev.core_id, release_rev.wikidata_qid + release_rev.core_id, release_rev.wikidata_qid, release_rev.arxiv_id, release_rev.jstor_id FROM release_rev INNER JOIN release_ident ON release_ident.rev_id = release_rev.id WHERE release_ident.is_live = 't' AND release_ident.redirect_id IS NULL) diff --git a/extra/sql_dumps/ident_table_snapshot.sh b/extra/sql_dumps/ident_table_snapshot.sh index 66144fd3..16c91612 100755 --- a/extra/sql_dumps/ident_table_snapshot.sh +++ b/extra/sql_dumps/ident_table_snapshot.sh @@ -28,6 +28,8 @@ tar -C /tmp -c --gzip --verbose \ fatcat_ident_containers.tsv \ fatcat_ident_creators.tsv \ fatcat_ident_files.tsv \ + fatcat_ident_filesets.tsv \ + fatcat_ident_webcaptures.tsv \ fatcat_ident_releases.tsv \ fatcat_ident_works.tsv diff --git a/python/env.example b/python/env.example index 75fc5238..4bf76e21 100644 --- a/python/env.example +++ b/python/env.example @@ -4,7 +4,8 @@ FATCAT_DOMAIN="dev.fatcat.wiki" FATCAT_API_AUTH_TOKEN="AgEPZGV2LmZhdGNhdC53aWtpAhYyMDE5MDEwMS1kZXYtZHVtbXkta2V5AAImZWRpdG9yX2lkID0gYWFhYWFhYWFhYWFhYmt2a2FhYWFhYWFhYWkAAht0aW1lID4gMjAxOS0wMS0wOVQwMDo1Nzo1MloAAAYgnroNha1hSftChtxHGTnLEmM/pY8MeQS/jBSV0UNvXug=" FATCAT_API_HOST="http://localhost:9411/v0" ELASTICSEARCH_BACKEND="http://localhost:9200" -ELASTICSEARCH_INDEX="fatcat" +ELASTICSEARCH_RELEASE_INDEX="fatcat_release" +ELASTICSEARCH_CONTAINER_INDEX="fatcat_container" GITLAB_CLIENT_ID="" GITLAB_CLIENT_SECRET="" IA_XAUTH_CLIENT_ID="" -- cgit v1.2.3