diff options
Diffstat (limited to 'python/sandcrawler/ingest.py')
-rw-r--r-- | python/sandcrawler/ingest.py | 5 |
1 files changed, 3 insertions, 2 deletions
diff --git a/python/sandcrawler/ingest.py b/python/sandcrawler/ingest.py index eb8e256..b610ab4 100644 --- a/python/sandcrawler/ingest.py +++ b/python/sandcrawler/ingest.py @@ -128,8 +128,9 @@ class IngestFileWorker(SandcrawlerWorker): self.wall_blocklist = [ # loginwall "://profile.thieme.de/HTML/sso/ejournals/login.htm", - "://login.bepress.com/" - "?SAMLRequest=" + "://login.bepress.com/", + "?SAMLRequest=", + "://osapublishing.org/captcha/", ] # these are special-case web domains for which we want SPN2 to not run |