From 3e0a14e3d61f65e25f659d7f8b34aac7d0d223e6 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 24 Nov 2021 18:12:25 -0800 Subject: make: homepage-status skip some large publisher domains for speed --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 477de20..5614216 100644 --- a/Makefile +++ b/Makefile @@ -95,9 +95,9 @@ update-sources: data/$(TODAY)/kbart_JSTOR.txt data/$(TODAY)/kbart_CLOCKSS.txt da @echo "Successfully updated for date (UTC): $(TODAY)" data/$(TODAY)/homepage_status.json: - pipenv run python -m chocula export_urls | shuf | pv -l > /tmp/chocula_urls.tsv + pipenv run python -m chocula export_urls | rg -v www.jstor.org | rg -v www.tandfonline.com | rg -v www.sciencedirect.com | rg -v link.springer.com | rg -v onlinelibrary.wiley.com | rg -v dialnet.unirioja.es | rg -v www.springer.com | rg -v www.journals.elsevier.com | rg -v web.archive.org | rg -v catalog.hathitrust.org | shuf | pv -l > /tmp/chocula_urls.tsv pipenv run parallel -j10 --pipepart --line-buffer -a /tmp/chocula_urls.tsv ./check_issn_urls.py | pv -l > /tmp/homepage_status.json - mv /tmp/url_status.json $@ + mv /tmp/homepage_status.json $@ data/$(TODAY)/container_stats.json: data/container_export.json cat data/container_export.json | jq .ident -r | sort -u > /tmp/container_ident.tsv -- cgit v1.2.3