aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-10-16 19:32:49 -0700
committerBryan Newbold <bnewbold@archive.org>2020-10-16 19:32:49 -0700
commitf7fcb8feb363c4441cf3b37cc3811ca302ad9cca (patch)
tree23ffdc87d7f085fc24c42d9178eea1d953e6ba8a
parent7497d1baf0c3a9c24f5b9ce05c9567e555e4e6c9 (diff)
downloadfatcat-scholar-f7fcb8feb363c4441cf3b37cc3811ca302ad9cca.tar.gz
fatcat-scholar-f7fcb8feb363c4441cf3b37cc3811ca302ad9cca.zip
make: add issue-db target
-rw-r--r--Makefile35
1 files changed, 35 insertions, 0 deletions
diff --git a/Makefile b/Makefile
index 67598f2..ff15274 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,7 @@
SHELL = /bin/bash
.SHELLFLAGS = -o pipefail -c
+TODAY ?= $(shell date --iso --utc)
.PHONY: help
help: ## Print info about all commands
@@ -62,3 +63,37 @@ update-i18n: ## Re-extract and compile translation files
pipenv run pybabel extract -F extra/i18n/babel.cfg -o extra/i18n/web_interface.pot fatcat_scholar/
pipenv run pybabel update -i extra/i18n/web_interface.pot -d fatcat_scholar/translations
pipenv run pybabel compile -d fatcat_scholar/translations
+
+data/$(TODAY):
+ mkdir -p $@
+
+data/$(TODAY)/sim_collections.tsv: data/$(TODAY)
+ mkdir -p data/$(TODAY)
+ ia search "collection:periodicals collection:sim_microfilm mediatype:collection" --itemlist | rg "^pub_" > $@.wip
+ mv $@.wip $@
+
+data/$(TODAY)/sim_items.tsv: data/$(TODAY)
+ mkdir -p data/$(TODAY)
+ ia search "collection:periodicals collection:sim_microfilm mediatype:texts" --itemlist | rg "^sim_" > $@.wip
+ mv $@.wip $@
+
+data/$(TODAY)/sim_collections.json: data/$(TODAY)/sim_collections.tsv
+ cat data/$(TODAY)/sim_collections.tsv | parallel -j4 ia metadata {} | jq . -c | pv -l > $@.wip
+ mv $@.wip $@
+
+data/$(TODAY)/sim_items.json: data/$(TODAY)/sim_items.tsv
+ cat data/$(TODAY)/sim_items.tsv | parallel -j8 ia metadata {} | jq . -c | pv -l > $@.wip
+ mv $@.wip $@
+
+data/$(TODAY)/issue_db.sqlite: data/$(TODAY)/sim_collections.json data/$(TODAY)/sim_items.json
+ pipenv run python -m fatcat_scholar.issue_db --db-file $@.wip init_db
+ cat data/sim_collections.json | pv -l | pipenv run python -m fatcat_scholar.issue_db --db-file $@.wip load_pub
+ cat data/sim_items.json | pv -l | python -m fatcat_scholar.issue_db load_issues
+ python -m fatcat_scholar.issue_db load_counts
+ mv $@.wip $@
+
+data/issue_db.sqlite: data/$(TODAY)/issue_db.sqlite
+ cp data/$(TODAY)/issue_db.sqlite data/issue_db.sqlite
+
+.PHONY: issue-db
+issue-db: data/issue_db.sqlite ## Build SIM issue database with today's metadata, then move to default location