aboutsummaryrefslogtreecommitdiffstats
path: root/extra
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2022-04-26 15:12:49 -0700
committerBryan Newbold <bnewbold@robocracy.org>2022-04-26 15:12:49 -0700
commitba0946a86d948bef14c1b990a8b0d08f7caf9765 (patch)
treebcd2dae113f6d66ec5a84c1d16022d10202ea380 /extra
parent2e3706ad6005366a4084731a7a33db7b800f7bfe (diff)
downloadfatcat-ba0946a86d948bef14c1b990a8b0d08f7caf9765.tar.gz
fatcat-ba0946a86d948bef14c1b990a8b0d08f7caf9765.zip
bulk dumps: update Makefile with bugfixes
Diffstat (limited to 'extra')
-rw-r--r--extra/sql_dumps/Makefile56
1 files changed, 28 insertions, 28 deletions
diff --git a/extra/sql_dumps/Makefile b/extra/sql_dumps/Makefile
index 68a96114..01607d34 100644
--- a/extra/sql_dumps/Makefile
+++ b/extra/sql_dumps/Makefile
@@ -1,7 +1,9 @@
+SHELL=/bin/bash -euo pipefail
TODAY ?= $(shell date --iso --utc)
DATADIR ?= /srv/fatcat/snapshots/$(TODAY)
DATESLUG ?= $(shell date +%Y-%m-%d.%H%M%S)
+DATABASE_URL ?= fatcat_prod
.PHONY: help
help: ## Print info about all commands
@@ -14,80 +16,78 @@ create_datadir:
mkdir -p $(DATADIR)/
sudo chmod a+rw $(DATADIR)/
-$(DATADIR)/.IDENTS: create_datadir
- sudo -u postgres DATABASE_URL=fatcat_prod ./ident_table_snapshot.sh $(DATADIR)
+$(DATADIR)/.IDENTS:
+ sudo -u postgres DATABASE_URL=$(DATABASE_URL) ./ident_table_snapshot.sh $(DATADIR)
sudo -u postgres mv /tmp/fatcat_ident_*.tsv $(DATADIR)
touch $@
$(DATADIR)/release_export_expanded.json.gz: $(DATADIR)/.IDENTS
- cd ../../rust
- cat $(DATADIR)/fatcat_ident_releases_by_work.tsv | ./target/release/fatcat-export releasebywork --expand files,filesets,webcaptures,container -j8 | pigz > $@.wip
+ cd ../../rust; cat $(DATADIR)/fatcat_ident_releases_by_work.tsv | sudo -u fatcat ./target/release/fatcat-export releasebywork --expand files,filesets,webcaptures,container -j8 | pigz > $@.wip
mv $@.wip $@
$(DATADIR)/creator_export.json.gz: $(DATADIR)/.IDENTS
- cd ../../rust
- cat $(DATADIR)/fatcat_ident_creators.tsv | ./target/release/fatcat-export creator -j8 | pigz > $@.wip
+ cd ../../rust; cat $(DATADIR)/fatcat_ident_creators.tsv | sudo -u fatcat ./target/release/fatcat-export creator -j8 | pigz > $@.wip
mv $@.wip $@
$(DATADIR)/container_export.json.gz: $(DATADIR)/.IDENTS
- cd ../../rust
- cat $(DATADIR)/fatcat_ident_containers.tsv | ./target/release/fatcat-export container -j8 | pigz > $@.wip
+ cd ../../rust; cat $(DATADIR)/fatcat_ident_containers.tsv | sudo -u fatcat ./target/release/fatcat-export container -j8 | pigz > $@.wip
mv $@.wip $@
$(DATADIR)/file_export.json.gz: $(DATADIR)/.IDENTS
- cd ../../rust
- cat $(DATADIR)/fatcat_ident_files.tsv | ./target/release/fatcat-export file -j8 | pigz > $@.wip
+ cd ../../rust; cat $(DATADIR)/fatcat_ident_files.tsv | sudo -u fatcat ./target/release/fatcat-export file -j8 | pigz > $@.wip
mv $@.wip $@
$(DATADIR)/fileset_export.json.gz: $(DATADIR)/.IDENTS
- cd ../../rust
- cat $(DATADIR)/fatcat_ident_filesets.tsv | ./target/release/fatcat-export fileset -j8 | pigz > $@.wip
+ cd ../../rust; cat $(DATADIR)/fatcat_ident_filesets.tsv | sudo -u fatcat ./target/release/fatcat-export fileset -j8 | pigz > $@.wip
mv $@.wip $@
$(DATADIR)/webcapture_export.json.gz: $(DATADIR)/.IDENTS
- cd ../../rust
- cat $(DATADIR)/fatcat_ident_webcaptures.tsv | ./target/release/fatcat-export webcapture -j8 | pigz > $@.wip
+ cd ../../rust; cat $(DATADIR)/fatcat_ident_webcaptures.tsv | sudo -u fatcat ./target/release/fatcat-export webcapture -j8 | pigz > $@.wip
mv $@.wip $@
-$(DATADIR)/abstracts.json.gz: $(DATADIR)/
- sudo -u postgres psql fatcat_prod < dump_abstracts.sql | egrep -v ^BEGIN$ | egrep -v ^ROLLBACK$ | pv -l | pigz > $@.wip
+$(DATADIR)/abstracts.json.gz:
+ sudo -u postgres psql $(DATABASE_URL) < dump_abstracts.sql | egrep -v ^BEGIN$ | egrep -v ^ROLLBACK$ | pv -l | pigz > $@.wip
mv $@.wip $@
-$(DATADIR)/file_hashes.tsv.gz: $(DATADIR)/
- sudo -u postgres psql fatcat_prod < dump_file_hashes.sql | egrep -v ^BEGIN$ | egrep -v ^ROLLBACK$ | pv -l | pigz > $@.wip
+$(DATADIR)/file_hashes.tsv.gz:
+ sudo -u postgres psql $(DATABASE_URL) < dump_file_hashes.sql | egrep -v ^BEGIN$ | egrep -v ^ROLLBACK$ | pv -l | pigz > $@.wip
mv $@.wip $@
-$(DATADIR)/release_extid.tsv.gz: $(DATADIR)/
- sudo -u postgres psql fatcat_prod < dump_release_extid.sql | egrep -v ^BEGIN$ | egrep -v ^ROLLBACK$ | pv -l | pigz > $@.wip
+$(DATADIR)/release_extid.tsv.gz:
+ sudo -u postgres psql $(DATABASE_URL) < dump_release_extid.sql | egrep -v ^BEGIN$ | egrep -v ^ROLLBACK$ | pv -l | pigz > $@.wip
mv $@.wip $@
+$(DATADIR)/.METADATA_EXPORT: $(DATADIR)/.IDENTS $(DATADIR)/release_export_expanded.json.gz $(DATADIR)/creator_export.json.gz $(DATADIR)/container_export.json.gz $(DATADIR)/file_export.json.gz $(DATADIR)/fileset_export.json.gz $(DATADIR)/webcapture_export.json.gz $(DATADIR)/abstracts.json.gz $(DATADIR)/file_hashes.tsv.gz $(DATADIR)/release_extid.tsv.gz ## Dump bulk metadata to disk
+ touch $@
+
.PHONY: metadata-exports
-metadata-exports: $(DATADIR)/.IDENTS $(DATADIR)/release_export_expanded.json.gz $(DATADIR)/creator_export.json.gz $(DATADIR)/container_export.json.gz $(DATADIR)/file_export.json.gz $(DATADIR)/fileset_export.json.gz $(DATADIR)/webcapture_export.json.gz $(DATADIR)/abstracts.json.gz $(DATADIR)/file_hashes.tsv.gz $(DATADIR)/release_extid.tsv.gz ## Dump bulk metadata to disk
+metadata-exports: create_datadir $(DATADIR)/.METADATA_EXPORT ## Dump bulk metadata to disk
@echo
-$(DATADIR)/.METADATA_UPLOADED: metadata-exports
+
+$(DATADIR)/.METADATA_UPLOADED: $(DATADIR)/.METADATA_EXPORT
ia upload --checksum fatcat_bulk_exports_$(TODAY) ia_exports_item_readme.md --remote-name=README.md -m collection:fatcat_snapshots_and_exports -m mediatype:data -m creator:"Internet Archive Web Group" -m date:$(TODAY) -m title:"Fatcat Bulk Metadata Exports ($(TODAY))"
- ia upload fatcat_bulk_exports_$(TODAY) $(DATADIR)/*_export.json.gz $(DATADIR)/abstracts.json.gz $(DATADIR)/file_hashes.tsv.gz $(DATADIR)/release_extid.tsv.gz
+ ia upload fatcat_bulk_exports_$(TODAY) $(DATADIR)/*_export.json.gz $(DATADIR)/*_export_expanded.json.gz $(DATADIR)/abstracts.json.gz $(DATADIR)/file_hashes.tsv.gz $(DATADIR)/release_extid.tsv.gz
touch $@
.PHONY: upload-metadata-exports
-upload-metadata-exports: $(DATADIR)/.METADATA_UPLOADED ## Upload bulk metadata exports to archive.org
+upload-metadata-exports: create_datadir $(DATADIR)/.METADATA_UPLOADED ## Upload bulk metadata exports to archive.org
@echo
-$(DATADIR)/.PUBLIC_DB_DUMP: create_datadir
+$(DATADIR)/.PUBLIC_DB_DUMP:
sudo -u postgres pg_dump --verbose --format=custom --exclude-table-data=auth_oidc fatcat_prod > $(DATADIR)/fatcat_public_dbdump_${DATESLUG}.pgdump.wip
mv $(DATADIR)/fatcat_public_dbdump_${DATESLUG}.pgdump.wip $(DATADIR)/fatcat_public_dbdump_${DATESLUG}.pgdump
touch $@
.PHONY: public-database-snapshot
-public-database-snapshot: $(DATADIR)/.PUBLIC_DB_DUMP ## Create SQL database snapshot which can be shared publicly
+public-database-snapshot: create_datadir $(DATADIR)/.PUBLIC_DB_DUMP ## Create SQL database snapshot which can be shared publicly
@echo
-$(DATADIR)/.PUBLIC_DB_UPLOADED: public-database-snapshot
+$(DATADIR)/.PUBLIC_DB_UPLOADED: $(DATADIR)/.PUBLIC_DB_DUMP
ia upload --checksum fatcat_sqldump_public_$(TODAY) ia_sqldump_item_readme.md --remote-name=README.md -m collection:fatcat_snapshots_and_exports -m mediatype:data -m creator:"Internet Archive Web Group" -m date:$(TODAY) -m title:"Fatcat Public Database Snapshot ($(TODAY))"
ia upload --checksum fatcat_sqldump_public_$(TODAY) $(DATADIR)/fatcat_public_dbdump_*.pgdump
touch $@
.PHONY: upload-public-database-snapshot
-upload-public-database-snapshot: public-database-snapshot $(DATADIR)/.PUBLIC_DB_UPLOADED ## Upload metadata snapshot to archive.org
+upload-public-database-snapshot: create_datadir public-database-snapshot $(DATADIR)/.PUBLIC_DB_UPLOADED ## Upload metadata snapshot to archive.org
@echo