From a13787eaa6738e5f2ffb29d1d4d9a83617a1b943 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 30 Nov 2021 14:18:00 -0800 Subject: move skip logic from Makefile to check_issn_urls --- check_issn_urls.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'check_issn_urls.py') diff --git a/check_issn_urls.py b/check_issn_urls.py index 6fbb05d..b00609f 100755 --- a/check_issn_urls.py +++ b/check_issn_urls.py @@ -120,6 +120,18 @@ def check_gwb(url, match_type="exact"): else: return None +HOST_SKIP_LIST = [ + "www.jstor.org", + "www.tandfonline.com", + "www.sciencedirect.com", + "link.springer.com", + "onlinelibrary.wiley.com", + "dialnet.unirioja.es", + "www.springer.com", + "www.journals.elsevier.com", + "web.archive.org", + "catalog.hathitrust.org", +] def check_url(issnl, url): # print("Fetching: %s" % url) @@ -132,6 +144,12 @@ def check_url(issnl, url): info["error"] = "url-not-http" info["terminal_status_code"] = -1 return info + for host in HOST_SKIP_LIST: + if f"://{host}/" in url: + info["error"] = "skip-host" + info["terminal_status_code"] = -1 + return info + try: resp = requests.get( url, -- cgit v1.2.3