aboutsummaryrefslogtreecommitdiffstats
path: root/extra/sql_dumps/Makefile
blob: 68a9611424e7094a49c18ba4880e03b54411c375 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93

TODAY ?= $(shell date --iso --utc)
DATADIR ?= /srv/fatcat/snapshots/$(TODAY)
DATESLUG ?= $(shell date +%Y-%m-%d.%H%M%S)

.PHONY: help
help: ## Print info about all commands
	@echo "Commands:"
	@echo
	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "    \033[01;32m%-20s\033[0m %s\n", $$1, $$2}'

.PHONY: create_datadir
create_datadir:
	mkdir -p $(DATADIR)/
	sudo chmod a+rw $(DATADIR)/

$(DATADIR)/.IDENTS: create_datadir
	sudo -u postgres DATABASE_URL=fatcat_prod ./ident_table_snapshot.sh $(DATADIR)
	sudo -u postgres mv /tmp/fatcat_ident_*.tsv $(DATADIR)
	touch $@

$(DATADIR)/release_export_expanded.json.gz: $(DATADIR)/.IDENTS
	cd ../../rust
	cat $(DATADIR)/fatcat_ident_releases_by_work.tsv | ./target/release/fatcat-export releasebywork --expand files,filesets,webcaptures,container -j8 | pigz > $@.wip
	mv  $@.wip  $@

$(DATADIR)/creator_export.json.gz: $(DATADIR)/.IDENTS
	cd ../../rust
	cat $(DATADIR)/fatcat_ident_creators.tsv | ./target/release/fatcat-export creator -j8 | pigz > $@.wip
	mv  $@.wip  $@

$(DATADIR)/container_export.json.gz: $(DATADIR)/.IDENTS
	cd ../../rust
	cat $(DATADIR)/fatcat_ident_containers.tsv | ./target/release/fatcat-export container -j8 | pigz > $@.wip
	mv  $@.wip  $@

$(DATADIR)/file_export.json.gz: $(DATADIR)/.IDENTS
	cd ../../rust
	cat $(DATADIR)/fatcat_ident_files.tsv | ./target/release/fatcat-export file -j8 | pigz > $@.wip
	mv  $@.wip  $@

$(DATADIR)/fileset_export.json.gz: $(DATADIR)/.IDENTS
	cd ../../rust
	cat $(DATADIR)/fatcat_ident_filesets.tsv | ./target/release/fatcat-export fileset -j8 | pigz > $@.wip
	mv  $@.wip  $@

$(DATADIR)/webcapture_export.json.gz: $(DATADIR)/.IDENTS
	cd ../../rust
	cat $(DATADIR)/fatcat_ident_webcaptures.tsv | ./target/release/fatcat-export webcapture -j8 | pigz > $@.wip
	mv  $@.wip  $@

$(DATADIR)/abstracts.json.gz: $(DATADIR)/
	sudo -u postgres psql fatcat_prod < dump_abstracts.sql | egrep -v ^BEGIN$ | egrep -v ^ROLLBACK$ | pv -l | pigz > $@.wip
	mv  $@.wip  $@

$(DATADIR)/file_hashes.tsv.gz: $(DATADIR)/
	sudo -u postgres psql fatcat_prod < dump_file_hashes.sql | egrep -v ^BEGIN$ | egrep -v ^ROLLBACK$ | pv -l | pigz > $@.wip
	mv  $@.wip  $@

$(DATADIR)/release_extid.tsv.gz: $(DATADIR)/
	sudo -u postgres psql fatcat_prod < dump_release_extid.sql | egrep -v ^BEGIN$ | egrep -v ^ROLLBACK$ | pv -l | pigz > $@.wip
	mv  $@.wip  $@

.PHONY: metadata-exports
metadata-exports: $(DATADIR)/.IDENTS $(DATADIR)/release_export_expanded.json.gz $(DATADIR)/creator_export.json.gz $(DATADIR)/container_export.json.gz $(DATADIR)/file_export.json.gz $(DATADIR)/fileset_export.json.gz $(DATADIR)/webcapture_export.json.gz $(DATADIR)/abstracts.json.gz $(DATADIR)/file_hashes.tsv.gz $(DATADIR)/release_extid.tsv.gz ## Dump bulk metadata to disk
	@echo

$(DATADIR)/.METADATA_UPLOADED: metadata-exports
	ia upload --checksum fatcat_bulk_exports_$(TODAY) ia_exports_item_readme.md --remote-name=README.md -m collection:fatcat_snapshots_and_exports -m mediatype:data -m creator:"Internet Archive Web Group" -m date:$(TODAY) -m title:"Fatcat Bulk Metadata Exports ($(TODAY))"
	ia upload fatcat_bulk_exports_$(TODAY) $(DATADIR)/*_export.json.gz $(DATADIR)/abstracts.json.gz $(DATADIR)/file_hashes.tsv.gz $(DATADIR)/release_extid.tsv.gz
	touch $@

.PHONY: upload-metadata-exports
upload-metadata-exports: $(DATADIR)/.METADATA_UPLOADED ## Upload bulk metadata exports to archive.org
	@echo

$(DATADIR)/.PUBLIC_DB_DUMP: create_datadir
	sudo -u postgres pg_dump --verbose --format=custom --exclude-table-data=auth_oidc fatcat_prod > $(DATADIR)/fatcat_public_dbdump_${DATESLUG}.pgdump.wip
	mv $(DATADIR)/fatcat_public_dbdump_${DATESLUG}.pgdump.wip $(DATADIR)/fatcat_public_dbdump_${DATESLUG}.pgdump
	touch $@

.PHONY: public-database-snapshot
public-database-snapshot: $(DATADIR)/.PUBLIC_DB_DUMP ## Create SQL database snapshot which can be shared publicly
	@echo

$(DATADIR)/.PUBLIC_DB_UPLOADED: public-database-snapshot
	ia upload --checksum fatcat_sqldump_public_$(TODAY) ia_sqldump_item_readme.md --remote-name=README.md -m collection:fatcat_snapshots_and_exports -m mediatype:data -m creator:"Internet Archive Web Group" -m date:$(TODAY) -m title:"Fatcat Public Database Snapshot ($(TODAY))"
	ia upload --checksum fatcat_sqldump_public_$(TODAY) $(DATADIR)/fatcat_public_dbdump_*.pgdump
	touch $@

.PHONY: upload-public-database-snapshot
upload-public-database-snapshot: public-database-snapshot $(DATADIR)/.PUBLIC_DB_UPLOADED ## Upload metadata snapshot to archive.org
	@echo