diff options
| author | Bryan Newbold <bnewbold@archive.org> | 2020-06-01 17:01:20 -0700 | 
|---|---|---|
| committer | Bryan Newbold <bnewbold@archive.org> | 2020-06-01 17:01:20 -0700 | 
| commit | 57db2db336c08031324e44b2d2880fbd4b6893c9 (patch) | |
| tree | f5ad462ab6b3e7d3ac7987049e8c604bd5ee9fbe /Makefile | |
| parent | 08867f9b8de576f0831e6bb9f7b88acddcc31dee (diff) | |
| download | chocula-57db2db336c08031324e44b2d2880fbd4b6893c9.tar.gz chocula-57db2db336c08031324e44b2d2880fbd4b6893c9.zip | |
'everything' at least partially working
Diffstat (limited to 'Makefile')
| -rw-r--r-- | Makefile | 59 | 
1 files changed, 32 insertions, 27 deletions
| @@ -4,39 +4,37 @@ SNAPSHOTITEM := $(shell grep ia_item sources.toml | cut -f2 -d'"')  .PHONY: help  help: ## Print info about all commands -	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' +	@echo "Commands:" +	@echo +	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "    \033[01;32m%-20s\033[0m %s\n", $$1, $$2}'  .PHONY: test  test: ## Run all tests and lints  	pipenv run pytest  	pipenv run mypy *.py chocula/*.py chocula/*/*.py --ignore-missing-imports -#.PHONY: database -#database: ## Build database from sources -#	@if [ ! -f data/ISSN-to-ISSN-L.txt ]; then echo "You must run 'make fetch-sources' first"; exit -1; fi -#	pipenv run ./chocula_tool.py everything - -#data/container_stats.json: -#	cat data/container_export.json | jq .issnl -r | sort -u > /tmp/container_issnl.tsv -#	cat /tmp/container_issnl.tsv | parallel -j10 curl -s 'https://fatcat.wiki/container/issnl/{}/stats.json' | jq -c . > /tmp/container_stats.json -#	mv /tmp/container_stats.json data +data/container_stats.json: +	mkdir -p data +	cat data/container_export.json | jq .issnl -r | sort -u > /tmp/container_issnl.tsv +	cat /tmp/container_issnl.tsv | parallel -j10 curl -s 'https://fatcat.wiki/container/issnl/{}/stats.json' | jq -c . > /tmp/container_stats.json +	mv /tmp/container_stats.json data -#.PHONY: container-stats -#container-stats: data/container_stats.json -#	wc -l data/container_stats.json -#	@echo -#	@echo Done +.PHONY: container-stats +container-stats: data/container_stats.json +	wc -l data/container_stats.json +	@echo +	@echo Done -#data/homepage_status.json: -#	pipenv run ./chocula.py export_urls | shuf > /tmp/chocula_urls_to_crawl.tsv -#	pipenv run parallel -j10 --bar --pipepart -a /tmp/chocula_urls_to_crawl.shuf.tsv ./check_issn_urls.py > /tmp/homepage_status.json -#	cp /tmp/homepage_status.json data/ +data/homepage_status.json: +	pipenv run ./chocula.py export_urls | shuf > /tmp/chocula_urls_to_crawl.tsv +	pipenv run parallel -j10 --bar --pipepart -a /tmp/chocula_urls_to_crawl.shuf.tsv ./check_issn_urls.py > /tmp/homepage_status.json +	cp /tmp/homepage_status.json data/ -#.PHONY: homepage-status -#homepage-status: data/homepage_status.json -#	wc -l data/homepage-status.json -#	@echo -#	@echo Done +.PHONY: homepage-status +homepage-status: data/homepage_status.json +	wc -l data/homepage-status.json +	@echo +	@echo Done  .PHONY: fetch-sources  fetch-sources: ## Download existing snapshot versions of all sources from archive.org @@ -45,6 +43,7 @@ fetch-sources: ## Download existing snapshot versions of all sources from archiv  .PHONY: update-sources  update-sources: ## Download new versions of updatable sources +	@# TODO: refactor to be individual targets-per-file (see fatcat-covid19 example)  	mkdir -p data/$(TODAY)  	wget -c "https://www.issn.org/wp-content/uploads/2014/03/issnltables.zip" -O /tmp/issnltables.$(TODAY).zip  	unzip -p /tmp/issnltables.$(TODAY).zip "*.ISSN-to-ISSN-L.txt" > /tmp/ISSN-to-ISSN-L.$(TODAY).txt @@ -58,11 +57,17 @@ update-sources: ## Download new versions of updatable sources  	@echo  	@echo "Successfully updated for date (UTC): $(TODAY)" -#.PHONY: upload-sources -#upload-sources: ## Upload an updated snapshot of sources to archive.org -#	ia upload --checksum chocula-sources-$(TODAY) data/*.tsv data/*.csv data/*.json data/*.txt +.PHONY: upload-sources +upload-sources: update-sources ## Upload most recent update-able sources to a new IA item +	ia upload --checksum chocula-sources-snapshot-$(TODAY) data/$(TODAY)/* +	# TODO: ia upload --checksum chocula-sources-$(TODAY) data/*.tsv data/*.csv data/*.json data/*.txt  #.PHONY: upload-snapshot  #upload-snapshot: ## Upload an sqlite snapshot to archive.org  #	ia upload --checksum --no-derive chocula-snapshot-$(TODAY) chocula.sqlite3 README.md extra/count_chocula.jpg + +.PHONY: database +database: ## Build database from sources +	@if [ ! -f data/ISSN-to-ISSN-L.txt ]; then echo "You must run 'make fetch-sources' first"; exit -1; fi +	pipenv run python -m chocula everything | 
