aboutsummaryrefslogtreecommitdiffstats
path: root/Makefile
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-06-01 17:01:20 -0700
committerBryan Newbold <bnewbold@archive.org>2020-06-01 17:01:20 -0700
commit57db2db336c08031324e44b2d2880fbd4b6893c9 (patch)
treef5ad462ab6b3e7d3ac7987049e8c604bd5ee9fbe /Makefile
parent08867f9b8de576f0831e6bb9f7b88acddcc31dee (diff)
downloadchocula-57db2db336c08031324e44b2d2880fbd4b6893c9.tar.gz
chocula-57db2db336c08031324e44b2d2880fbd4b6893c9.zip
'everything' at least partially working
Diffstat (limited to 'Makefile')
-rw-r--r--Makefile59
1 files changed, 32 insertions, 27 deletions
diff --git a/Makefile b/Makefile
index 53e0c6d..977c80e 100644
--- a/Makefile
+++ b/Makefile
@@ -4,39 +4,37 @@ SNAPSHOTITEM := $(shell grep ia_item sources.toml | cut -f2 -d'"')
.PHONY: help
help: ## Print info about all commands
- @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
+ @echo "Commands:"
+ @echo
+ @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf " \033[01;32m%-20s\033[0m %s\n", $$1, $$2}'
.PHONY: test
test: ## Run all tests and lints
pipenv run pytest
pipenv run mypy *.py chocula/*.py chocula/*/*.py --ignore-missing-imports
-#.PHONY: database
-#database: ## Build database from sources
-# @if [ ! -f data/ISSN-to-ISSN-L.txt ]; then echo "You must run 'make fetch-sources' first"; exit -1; fi
-# pipenv run ./chocula_tool.py everything
-
-#data/container_stats.json:
-# cat data/container_export.json | jq .issnl -r | sort -u > /tmp/container_issnl.tsv
-# cat /tmp/container_issnl.tsv | parallel -j10 curl -s 'https://fatcat.wiki/container/issnl/{}/stats.json' | jq -c . > /tmp/container_stats.json
-# mv /tmp/container_stats.json data
+data/container_stats.json:
+ mkdir -p data
+ cat data/container_export.json | jq .issnl -r | sort -u > /tmp/container_issnl.tsv
+ cat /tmp/container_issnl.tsv | parallel -j10 curl -s 'https://fatcat.wiki/container/issnl/{}/stats.json' | jq -c . > /tmp/container_stats.json
+ mv /tmp/container_stats.json data
-#.PHONY: container-stats
-#container-stats: data/container_stats.json
-# wc -l data/container_stats.json
-# @echo
-# @echo Done
+.PHONY: container-stats
+container-stats: data/container_stats.json
+ wc -l data/container_stats.json
+ @echo
+ @echo Done
-#data/homepage_status.json:
-# pipenv run ./chocula.py export_urls | shuf > /tmp/chocula_urls_to_crawl.tsv
-# pipenv run parallel -j10 --bar --pipepart -a /tmp/chocula_urls_to_crawl.shuf.tsv ./check_issn_urls.py > /tmp/homepage_status.json
-# cp /tmp/homepage_status.json data/
+data/homepage_status.json:
+ pipenv run ./chocula.py export_urls | shuf > /tmp/chocula_urls_to_crawl.tsv
+ pipenv run parallel -j10 --bar --pipepart -a /tmp/chocula_urls_to_crawl.shuf.tsv ./check_issn_urls.py > /tmp/homepage_status.json
+ cp /tmp/homepage_status.json data/
-#.PHONY: homepage-status
-#homepage-status: data/homepage_status.json
-# wc -l data/homepage-status.json
-# @echo
-# @echo Done
+.PHONY: homepage-status
+homepage-status: data/homepage_status.json
+ wc -l data/homepage-status.json
+ @echo
+ @echo Done
.PHONY: fetch-sources
fetch-sources: ## Download existing snapshot versions of all sources from archive.org
@@ -45,6 +43,7 @@ fetch-sources: ## Download existing snapshot versions of all sources from archiv
.PHONY: update-sources
update-sources: ## Download new versions of updatable sources
+ @# TODO: refactor to be individual targets-per-file (see fatcat-covid19 example)
mkdir -p data/$(TODAY)
wget -c "https://www.issn.org/wp-content/uploads/2014/03/issnltables.zip" -O /tmp/issnltables.$(TODAY).zip
unzip -p /tmp/issnltables.$(TODAY).zip "*.ISSN-to-ISSN-L.txt" > /tmp/ISSN-to-ISSN-L.$(TODAY).txt
@@ -58,11 +57,17 @@ update-sources: ## Download new versions of updatable sources
@echo
@echo "Successfully updated for date (UTC): $(TODAY)"
-#.PHONY: upload-sources
-#upload-sources: ## Upload an updated snapshot of sources to archive.org
-# ia upload --checksum chocula-sources-$(TODAY) data/*.tsv data/*.csv data/*.json data/*.txt
+.PHONY: upload-sources
+upload-sources: update-sources ## Upload most recent update-able sources to a new IA item
+ ia upload --checksum chocula-sources-snapshot-$(TODAY) data/$(TODAY)/*
+ # TODO: ia upload --checksum chocula-sources-$(TODAY) data/*.tsv data/*.csv data/*.json data/*.txt
#.PHONY: upload-snapshot
#upload-snapshot: ## Upload an sqlite snapshot to archive.org
# ia upload --checksum --no-derive chocula-snapshot-$(TODAY) chocula.sqlite3 README.md extra/count_chocula.jpg
+
+.PHONY: database
+database: ## Build database from sources
+ @if [ ! -f data/ISSN-to-ISSN-L.txt ]; then echo "You must run 'make fetch-sources' first"; exit -1; fi
+ pipenv run python -m chocula everything