From c12d77f60a767414d5ba69989566b8c65eb6adae Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 24 Nov 2021 18:07:07 -0800 Subject: more HomepageUrl filtering --- chocula/database.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/chocula/database.py b/chocula/database.py index ecc2858..1982108 100644 --- a/chocula/database.py +++ b/chocula/database.py @@ -46,6 +46,8 @@ class HomepageUrl: """ if ( not url + or '://' not in url + or not url.lower().startswith('http') or "mailto:" in url.lower() or url.lower() in ("http://n/a", "http://na/", "http://na") or "LOCKSS_RESOLVER" in url @@ -57,6 +59,7 @@ class HomepageUrl: or "://search.ebscohost.com" in url or "://search.proquest.com" in url or "://gateway.proquest.com" in url + or "://doaj.org" in url ): return None if url.startswith("www."): -- cgit v1.2.3