diff options
author | Bryan Newbold <bnewbold@archive.org> | 2019-09-03 13:49:09 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2019-09-03 13:49:09 -0700 |
commit | 1cc6dc4749750bc5e51c9877018e474367a64384 (patch) | |
tree | 8be5f8fc7b96a8a1c75c6f7196072bd429116db7 | |
parent | ad46c83e87512bc3e6c1fa992c135b7dc3db3ee5 (diff) | |
download | chocula-1cc6dc4749750bc5e51c9877018e474367a64384.tar.gz chocula-1cc6dc4749750bc5e51c9877018e474367a64384.zip |
don't include doaj.org or NCBI homepage URLs
-rwxr-xr-x | chocula.py | 4 |
1 files changed, 4 insertions, 0 deletions
@@ -1326,6 +1326,10 @@ class ChoculaDatabase(): webarchive_urls = [] cur = self.db.execute("SELECT * FROM homepage WHERE issnl = ?;", [row['issnl']]) for hrow in cur: + if '://doaj.org/' in hrow['url'] or '://www.doaj.org/' in hrow['url']: + continue + if '://www.ncbi.nlm.nih.gov/' in hrow['url']: + continue if 'web.archive.org/web' in hrow['url']: webarchive_urls.append(hrow['url']) urls.append(hrow['url']) |