diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-06-23 18:39:45 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-06-23 18:39:45 -0700 |
commit | 3945ef26d5024e4efe81374b8eb562ffd5b09613 (patch) | |
tree | ca846e7083d5aa823af9a21ba3bd5e3389c04ae7 | |
parent | 613e8f6e58838ce7c81f6a1c05fe52bd9089f383 (diff) | |
download | chocula-3945ef26d5024e4efe81374b8eb562ffd5b09613.tar.gz chocula-3945ef26d5024e4efe81374b8eb562ffd5b09613.zip |
block/skip more homepage patterns
-rw-r--r-- | chocula/database.py | 9 |
1 files changed, 9 insertions, 0 deletions
diff --git a/chocula/database.py b/chocula/database.py index 54d3b60..b800f65 100644 --- a/chocula/database.py +++ b/chocula/database.py @@ -49,6 +49,11 @@ class HomepageUrl: or "mailto:" in url.lower() or url.lower() in ("http://n/a", "http://na/", "http://na") or "LOCKSS_RESOLVER" in url + or "$result.AccessURL" in url + or "://firstsearch.oclc.org" in url + or "://bibpurl.oclc.org" in url + or "://books.google.com" in url + or "://search.ebscohost.com" in url ): return None if url.startswith("www."): @@ -703,6 +708,10 @@ class ChoculaDatabase: "www.thefreelibrary.com", "goo.gl", "dx.doi.org", + "firstsearch.oclc.org", + "www.umi.com", + "search.informit.com.au", + "search.ebscohost.com", ): # individual books or google searches, not journal/conference homepages # LOC scanned newspapers |