aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-06-23 18:39:45 -0700
committerBryan Newbold <bnewbold@archive.org>2020-06-23 18:39:45 -0700
commit3945ef26d5024e4efe81374b8eb562ffd5b09613 (patch)
treeca846e7083d5aa823af9a21ba3bd5e3389c04ae7
parent613e8f6e58838ce7c81f6a1c05fe52bd9089f383 (diff)
downloadchocula-3945ef26d5024e4efe81374b8eb562ffd5b09613.tar.gz
chocula-3945ef26d5024e4efe81374b8eb562ffd5b09613.zip
block/skip more homepage patterns
-rw-r--r--chocula/database.py9
1 files changed, 9 insertions, 0 deletions
diff --git a/chocula/database.py b/chocula/database.py
index 54d3b60..b800f65 100644
--- a/chocula/database.py
+++ b/chocula/database.py
@@ -49,6 +49,11 @@ class HomepageUrl:
or "mailto:" in url.lower()
or url.lower() in ("http://n/a", "http://na/", "http://na")
or "LOCKSS_RESOLVER" in url
+ or "$result.AccessURL" in url
+ or "://firstsearch.oclc.org" in url
+ or "://bibpurl.oclc.org" in url
+ or "://books.google.com" in url
+ or "://search.ebscohost.com" in url
):
return None
if url.startswith("www."):
@@ -703,6 +708,10 @@ class ChoculaDatabase:
"www.thefreelibrary.com",
"goo.gl",
"dx.doi.org",
+ "firstsearch.oclc.org",
+ "www.umi.com",
+ "search.informit.com.au",
+ "search.ebscohost.com",
):
# individual books or google searches, not journal/conference homepages
# LOC scanned newspapers