3 files changed, 22 insertions, 12 deletions
diff --git a/extra/partition_dumps/README.md b/extra/partition_dumps/README.md
index 5e42ff48..463bf42d 100644
--- a/extra/partition_dumps/README.md
+++ b/extra/partition_dumps/README.md
@@ -6,8 +6,8 @@ journal/container.
 Example parititoning a sample by release type:
 
     cat release_export_expanded_sample.json | jq .release_type -r > release_export_expanded_sample.release_type
-    cat release_export_expanded_sample.release_type | sort | uniq -c | sort -nr > release_export_expanded_sample.release_type.counts
-    cat release_export_expanded_sample.json | paste release_export_expanded_sample.release_type - | sort > out
+    cat release_export_expanded_sample.release_type | sort -S 4G | uniq -c | sort -S 500M -nr > release_export_expanded_sample.release_type.counts
+    cat release_export_expanded_sample.json | paste release_export_expanded_sample.release_type - | sort -S 4G > out
 
 More production-y example using ISSN-L:
 
@@ -16,10 +16,10 @@ More production-y example using ISSN-L:
 
     # it's a pretty huge sort, will need 300+ GB scratch space? this might not scale.
     zcat release_export_expanded.json.gz | jq .container.issnl -r > release_export_expanded.issnl
-    zcat release_export_expanded.json.gz | paste release_export_expanded.issnl - | sort  | ./partition_script.py
+    zcat release_export_expanded.json.gz | paste release_export_expanded.issnl - | sort -S 8G | ./partition_script.py
 
     # for verification/stats
-    cat release_export_expanded.issnl | sort | uniq -c | sort -nr > release_export_expanded.issnl.counts
+    cat release_export_expanded.issnl | sort -S 1G | uniq -c | sort -S 1G -nr > release_export_expanded.issnl.counts
     
     # cleanup
     rm release_export_expanded.issnl
diff --git a/extra/sql_dumps/README.md b/extra/sql_dumps/README.md
index 3538ce8f..45adae09 100644
--- a/extra/sql_dumps/README.md
+++ b/extra/sql_dumps/README.md
@@ -1,8 +1,6 @@
 
 ## HOWTO: Ident Table Snapshots
 
-How to take a consistent (single transaction) snapshot of 
-
 This will take somewhere around 15-25 GB of disk space on the database server
 (under /tmp). It would probably be better to stream this transaction over a
 network connection (saving database disk I/O), but I can't figure out how to do
@@ -11,6 +9,11 @@ to be a custom client.
 
     ./ident_table_snapshot.sh
 
+Or, in production:
+
+    sudo su postgres
+    DATABASE_URL=fatcat_prod ./ident_table_snapshot.sh /tmp
+
 ## HOWTO: Dump abstracts, release identifiers, file hashes, etc
 
 These are run as regular old commands, and can run across the network in a
@@ -24,10 +27,16 @@ forwarding anyways.
     # Run on database server, write to file on remote host
     psql fatcat < dump_abstracts.sql | egrep -v ^BEGIN$ | egrep -v ^ROLLBACK$ | pv -l | gzip | ssh user@host 'cat > abstracts.json.gz'
 
+In production:
+
+    sudo -u postgres psql fatcat_prod < dump_abstracts.sql | egrep -v ^BEGIN$ | egrep -v ^ROLLBACK$ | pv -l | gzip > /srv/fatcat/snapshots/abstracts.json.gz
+    sudo -u postgres psql fatcat_prod < dump_file_hashes.sql | egrep -v ^BEGIN$ | egrep -v ^ROLLBACK$ | pv -l | gzip > /srv/fatcat/snapshots/file_hashes.tsv.gz
+    sudo -u postgres psql fatcat_prod < dump_release_extid.sql | egrep -v ^BEGIN$ | egrep -v ^ROLLBACK$ | pv -l | gzip > /srv/fatcat/snapshots/release_extid.tsv.gz
+
 ## HOWTO: Full private database backup and restore
 
     export DATESLUG="`date +%Y-%m-%d.%H%M%S`"
-    sudo -u postgres pg_dump --verbose --format=tar fatcat_prod | gzip > /srv/fatcat/snapshots/fatcat_private_dbdump_${DATESLUG}.tar.gz
+    time sudo -u postgres pg_dump --verbose --format=tar fatcat_prod | gzip > /srv/fatcat/snapshots/fatcat_private_dbdump_${DATESLUG}.tar.gz
 
 NOTE: by using the "directory" export (along with `--file`) instead of "tar"
 export, it would be possible to use parallel dumping. However, this would put
@@ -48,6 +57,7 @@ This dump will contain all tables in the backend schema, except for "private"
 authentication tables. For local or non-production machines, might need to
 replace the `fatcat_prod` database name.
 
+    # TODO: for production, probably want consistent serialization mode
     export DATESLUG="`date +%Y-%m-%d.%H%M%S`"
     sudo -u postgres pg_dump --verbose --format=tar --exclude-table-data=auth_oidc fatcat_prod | gzip > /srv/fatcat/snapshots/fatcat_public_dbdump_${DATESLUG}.tar.gz
 
diff --git a/rust/README.export.md b/rust/README.export.md
index f9965a6f..f02c7f45 100644
--- a/rust/README.export.md
+++ b/rust/README.export.md
@@ -1,5 +1,5 @@
 
-First create ident files, following `../extra/sql_dumps/`.
+First create ident files, following `../extra/sql_dumps/README.md`.
 
 Then, to dump locally to stdout:
 
@@ -7,8 +7,8 @@ Then, to dump locally to stdout:
 
 Or, in production:
 
-    cat /tmp/fatcat_ident_releases.tsv | ./target/release/fatcat-export release --expand files,filesets,webcaptures,container -j8 | pv -l | gzip > /srv/fatcat/snapshots/release_export_expanded.json.gz
-    cat /tmp/fatcat_ident_releases.tsv | ./target/release/fatcat-export release -j8 | pv -l | gzip > /srv/fatcat/snapshots/release_export.json.gz
-    cat /tmp/fatcat_ident_containers.tsv | ./target/release/fatcat-export container -j8 | pv -l | gzip > /srv/fatcat/snapshots/container_export.json.gz
-    cat /tmp/fatcat_ident_files.tsv | ./target/release/fatcat-export file -j8 | pv -l | gzip > /srv/fatcat/snapshots/file_export.json.gz
+    cat /tmp/fatcat_ident_releases.tsv | ./target/release/fatcat-export release --expand files,filesets,webcaptures,container -j8 | gzip > /srv/fatcat/snapshots/release_export_expanded.json.gz
+    cat /tmp/fatcat_ident_releases.tsv | ./target/release/fatcat-export release -j8 | gzip > /srv/fatcat/snapshots/release_export.json.gz
+    cat /tmp/fatcat_ident_containers.tsv | ./target/release/fatcat-export container -j8 | gzip > /srv/fatcat/snapshots/container_export.json.gz
+    cat /tmp/fatcat_ident_files.tsv | ./target/release/fatcat-export file -j8 | gzip > /srv/fatcat/snapshots/file_export.json.gz