diff options
author | Bryan Newbold <bnewbold@archive.org> | 2021-11-24 18:07:07 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2021-11-24 18:07:07 -0800 |
commit | c12d77f60a767414d5ba69989566b8c65eb6adae (patch) | |
tree | 04a16fa0e57193c8125061c65f7b8c7c700db821 | |
parent | 2eec63919e758b6f037765c8ca46f53a42c5f138 (diff) | |
download | chocula-c12d77f60a767414d5ba69989566b8c65eb6adae.tar.gz chocula-c12d77f60a767414d5ba69989566b8c65eb6adae.zip |
more HomepageUrl filtering
-rw-r--r-- | chocula/database.py | 3 |
1 files changed, 3 insertions, 0 deletions
diff --git a/chocula/database.py b/chocula/database.py index ecc2858..1982108 100644 --- a/chocula/database.py +++ b/chocula/database.py @@ -46,6 +46,8 @@ class HomepageUrl: """ if ( not url + or '://' not in url + or not url.lower().startswith('http') or "mailto:" in url.lower() or url.lower() in ("http://n/a", "http://na/", "http://na") or "LOCKSS_RESOLVER" in url @@ -57,6 +59,7 @@ class HomepageUrl: or "://search.ebscohost.com" in url or "://search.proquest.com" in url or "://gateway.proquest.com" in url + or "://doaj.org" in url ): return None if url.startswith("www."): |