From 1cc6dc4749750bc5e51c9877018e474367a64384 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 3 Sep 2019 13:49:09 -0700 Subject: don't include doaj.org or NCBI homepage URLs --- chocula.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/chocula.py b/chocula.py index 72268cc..36a880d 100755 --- a/chocula.py +++ b/chocula.py @@ -1326,6 +1326,10 @@ class ChoculaDatabase(): webarchive_urls = [] cur = self.db.execute("SELECT * FROM homepage WHERE issnl = ?;", [row['issnl']]) for hrow in cur: + if '://doaj.org/' in hrow['url'] or '://www.doaj.org/' in hrow['url']: + continue + if '://www.ncbi.nlm.nih.gov/' in hrow['url']: + continue if 'web.archive.org/web' in hrow['url']: webarchive_urls.append(hrow['url']) urls.append(hrow['url']) -- cgit v1.2.3