aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-11-24 18:07:07 -0800
committerBryan Newbold <bnewbold@archive.org>2021-11-24 18:07:07 -0800
commitc12d77f60a767414d5ba69989566b8c65eb6adae (patch)
tree04a16fa0e57193c8125061c65f7b8c7c700db821
parent2eec63919e758b6f037765c8ca46f53a42c5f138 (diff)
downloadchocula-c12d77f60a767414d5ba69989566b8c65eb6adae.tar.gz
chocula-c12d77f60a767414d5ba69989566b8c65eb6adae.zip
more HomepageUrl filtering
-rw-r--r--chocula/database.py3
1 files changed, 3 insertions, 0 deletions
diff --git a/chocula/database.py b/chocula/database.py
index ecc2858..1982108 100644
--- a/chocula/database.py
+++ b/chocula/database.py
@@ -46,6 +46,8 @@ class HomepageUrl:
"""
if (
not url
+ or '://' not in url
+ or not url.lower().startswith('http')
or "mailto:" in url.lower()
or url.lower() in ("http://n/a", "http://na/", "http://na")
or "LOCKSS_RESOLVER" in url
@@ -57,6 +59,7 @@ class HomepageUrl:
or "://search.ebscohost.com" in url
or "://search.proquest.com" in url
or "://gateway.proquest.com" in url
+ or "://doaj.org" in url
):
return None
if url.startswith("www."):