From 22df32157f4846a4069d76c47f0adb3ad073a7bc Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Mon, 3 Jun 2019 20:23:32 -0700 Subject: move export README info to sql_dumps doc --- extra/sql_dumps/README.md | 30 +++++++++++++++++++++++++++++- guide/src/bulk_exports.md | 2 -- rust/README.export.md | 17 ----------------- rust/README.md | 2 +- 4 files changed, 30 insertions(+), 21 deletions(-) delete mode 100644 rust/README.export.md diff --git a/extra/sql_dumps/README.md b/extra/sql_dumps/README.md index 91bc3e98..5addf920 100644 --- a/extra/sql_dumps/README.md +++ b/extra/sql_dumps/README.md @@ -14,6 +14,32 @@ Or, in production: sudo su postgres DATABASE_URL=fatcat_prod ./ident_table_snapshot.sh /tmp +## HOWTO: Entity Dumps + +First create the entity ident table dumps (above). Note that *most* of the +metadata will be pinned to the consistent ident dump snapshot transaction +(based on revision references), but that "expanded" entities will be the most +recent version, which may have been updated. This mostly impacts expanded +releases (containers, files). + +Dump locally to stdout, eg: + + # local/development + cat /tmp/fatcat_ident_releases.tsv | ./target/debug/fatcat-export releases + +Or, in production: + + # production, as 'fatcat' user, in /srv/fatcat/src/rust: + cat /tmp/fatcat_ident_releases.tsv | ./target/release/fatcat-export release --expand files,filesets,webcaptures,container -j8 | pigz > /srv/fatcat/snapshots/release_export_expanded.json.gz + cat /tmp/fatcat_ident_releases.tsv | ./target/release/fatcat-export release -j8 | pigz > /srv/fatcat/snapshots/release_export.json.gz + cat /tmp/fatcat_ident_creators.tsv | ./target/release/fatcat-export creator -j8 | pigz > /srv/fatcat/snapshots/creator_export.json.gz + cat /tmp/fatcat_ident_containers.tsv | ./target/release/fatcat-export container -j8 | pigz > /srv/fatcat/snapshots/container_export.json.gz + cat /tmp/fatcat_ident_files.tsv | ./target/release/fatcat-export file -j8 | pigz > /srv/fatcat/snapshots/file_export.json.gz + cat /tmp/fatcat_ident_filesets.tsv | ./target/release/fatcat-export fileset -j8 | pigz > /srv/fatcat/snapshots/fileset_export.json.gz + cat /tmp/fatcat_ident_webcaptures.tsv | ./target/release/fatcat-export webcapture -j8 | pigz > /srv/fatcat/snapshots/webcapture_export.json.gz + +Then usually move all these files to `/srv/fatcat/snapshots/`. + ## HOWTO: Dump abstracts, release identifiers, file hashes, etc These are run as regular old commands, and can run across the network in a @@ -33,7 +59,7 @@ In production: sudo -u postgres psql fatcat_prod < dump_file_hashes.sql | egrep -v ^BEGIN$ | egrep -v ^ROLLBACK$ | pv -l | pigz > /srv/fatcat/snapshots/file_hashes.tsv.gz sudo -u postgres psql fatcat_prod < dump_release_extid.sql | egrep -v ^BEGIN$ | egrep -v ^ROLLBACK$ | pv -l | pigz > /srv/fatcat/snapshots/release_extid.tsv.gz -## HOWTO: Full ("private") database backup and restore +## HOWTO: Full ("private") database backup export DATESLUG="`date +%Y-%m-%d.%H%M%S`" time sudo -u postgres pg_dump --verbose --format=tar fatcat_prod | pigz > /srv/fatcat/snapshots/fatcat_full_dbdump_${DATESLUG}.tar.gz @@ -46,6 +72,8 @@ export, it would be possible to use parallel dumping. However, this would put additional load on both the database and underlying disk. Could also cause issues with users/permissions. +## HOWTO: Restor full database backup + To restore, CAREFULLY, run: sudo -u postgres pg_restore --clean --if-exists --create --exit-on-error --jobs=16 DUMP_FILE.tar diff --git a/guide/src/bulk_exports.md b/guide/src/bulk_exports.md index 052b667e..4ac4206c 100644 --- a/guide/src/bulk_exports.md +++ b/guide/src/bulk_exports.md @@ -74,8 +74,6 @@ sub-entities for each record. Only "active" entities are included (not deleted, work-in-progress, or redirected entities). -The `./rust/README.export.md` file has more context. - These dumps can be quite large when expanded (over 100 GBytes compressed), but do not include history so will not grow as fast as other exports over time. Not all entity types are dumped at the moment; if you would like specific dumps get diff --git a/rust/README.export.md b/rust/README.export.md deleted file mode 100644 index 44ca8afd..00000000 --- a/rust/README.export.md +++ /dev/null @@ -1,17 +0,0 @@ - -First create ident files, following `../extra/sql_dumps/README.md`. - -Then, to dump locally to stdout: - - cat /tmp/fatcat_ident_releases.tsv | ./target/debug/fatcat-export releases - -Or, in production, as the fatcat user: - - cat /tmp/fatcat_ident_releases.tsv | ./target/release/fatcat-export release --expand files,filesets,webcaptures,container -j8 | pigz > /srv/fatcat/snapshots/release_export_expanded.json.gz - cat /tmp/fatcat_ident_releases.tsv | ./target/release/fatcat-export release -j8 | pigz > /srv/fatcat/snapshots/release_export.json.gz - cat /tmp/fatcat_ident_creators.tsv | ./target/release/fatcat-export creator -j8 | pigz > /srv/fatcat/snapshots/creator_export.json.gz - cat /tmp/fatcat_ident_containers.tsv | ./target/release/fatcat-export container -j8 | pigz > /srv/fatcat/snapshots/container_export.json.gz - cat /tmp/fatcat_ident_files.tsv | ./target/release/fatcat-export file -j8 | pigz > /srv/fatcat/snapshots/file_export.json.gz - cat /tmp/fatcat_ident_filesets.tsv | ./target/release/fatcat-export fileset -j8 | pigz > /srv/fatcat/snapshots/fileset_export.json.gz - cat /tmp/fatcat_ident_webcaptures.tsv | ./target/release/fatcat-export webcapture -j8 | pigz > /srv/fatcat/snapshots/webcapture_export.json.gz - diff --git a/rust/README.md b/rust/README.md index e191d03e..cba926dc 100644 --- a/rust/README.md +++ b/rust/README.md @@ -5,7 +5,7 @@ Rust implementation of fatcat API server. Commands include: - `fatcat-auth`: privileged command to manage authentication keys, tokens, and accounts. Useful to generate admin accounts, new signing keys, etc. - `fatcat-export`: high-speed JSON export tool, which talks directly to the - database (instead of going through the API). See `README.export.md`. + database (instead of going through the API). The `fatcat-api-spec` crate is generated from the openapi/swagger spec and contains Rust models, response types, and endpoint definitions (but not -- cgit v1.2.3