aboutsummaryrefslogtreecommitdiffstats
path: root/Makefile
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-11-24 18:12:25 -0800
committerBryan Newbold <bnewbold@archive.org>2021-11-24 18:12:25 -0800
commit3e0a14e3d61f65e25f659d7f8b34aac7d0d223e6 (patch)
tree8528942cd70ac3f29cea1a2454230df7052c6e3c /Makefile
parentc12d77f60a767414d5ba69989566b8c65eb6adae (diff)
downloadchocula-3e0a14e3d61f65e25f659d7f8b34aac7d0d223e6.tar.gz
chocula-3e0a14e3d61f65e25f659d7f8b34aac7d0d223e6.zip
make: homepage-status skip some large publisher domains for speed
Diffstat (limited to 'Makefile')
-rw-r--r--Makefile4
1 files changed, 2 insertions, 2 deletions
diff --git a/Makefile b/Makefile
index 477de20..5614216 100644
--- a/Makefile
+++ b/Makefile
@@ -95,9 +95,9 @@ update-sources: data/$(TODAY)/kbart_JSTOR.txt data/$(TODAY)/kbart_CLOCKSS.txt da
@echo "Successfully updated for date (UTC): $(TODAY)"
data/$(TODAY)/homepage_status.json:
- pipenv run python -m chocula export_urls | shuf | pv -l > /tmp/chocula_urls.tsv
+ pipenv run python -m chocula export_urls | rg -v www.jstor.org | rg -v www.tandfonline.com | rg -v www.sciencedirect.com | rg -v link.springer.com | rg -v onlinelibrary.wiley.com | rg -v dialnet.unirioja.es | rg -v www.springer.com | rg -v www.journals.elsevier.com | rg -v web.archive.org | rg -v catalog.hathitrust.org | shuf | pv -l > /tmp/chocula_urls.tsv
pipenv run parallel -j10 --pipepart --line-buffer -a /tmp/chocula_urls.tsv ./check_issn_urls.py | pv -l > /tmp/homepage_status.json
- mv /tmp/url_status.json $@
+ mv /tmp/homepage_status.json $@
data/$(TODAY)/container_stats.json: data/container_export.json
cat data/container_export.json | jq .ident -r | sort -u > /tmp/container_ident.tsv