aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2019-09-03 13:49:09 -0700
committerBryan Newbold <bnewbold@archive.org>2019-09-03 13:49:09 -0700
commit1cc6dc4749750bc5e51c9877018e474367a64384 (patch)
tree8be5f8fc7b96a8a1c75c6f7196072bd429116db7
parentad46c83e87512bc3e6c1fa992c135b7dc3db3ee5 (diff)
downloadchocula-1cc6dc4749750bc5e51c9877018e474367a64384.tar.gz
chocula-1cc6dc4749750bc5e51c9877018e474367a64384.zip
don't include doaj.org or NCBI homepage URLs
-rwxr-xr-xchocula.py4
1 files changed, 4 insertions, 0 deletions
diff --git a/chocula.py b/chocula.py
index 72268cc..36a880d 100755
--- a/chocula.py
+++ b/chocula.py
@@ -1326,6 +1326,10 @@ class ChoculaDatabase():
webarchive_urls = []
cur = self.db.execute("SELECT * FROM homepage WHERE issnl = ?;", [row['issnl']])
for hrow in cur:
+ if '://doaj.org/' in hrow['url'] or '://www.doaj.org/' in hrow['url']:
+ continue
+ if '://www.ncbi.nlm.nih.gov/' in hrow['url']:
+ continue
if 'web.archive.org/web' in hrow['url']:
webarchive_urls.append(hrow['url'])
urls.append(hrow['url'])