aboutsummaryrefslogtreecommitdiffstats
path: root/python/sandcrawler/ingest.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/sandcrawler/ingest.py')
-rw-r--r--python/sandcrawler/ingest.py5
1 files changed, 3 insertions, 2 deletions
diff --git a/python/sandcrawler/ingest.py b/python/sandcrawler/ingest.py
index eb8e256..b610ab4 100644
--- a/python/sandcrawler/ingest.py
+++ b/python/sandcrawler/ingest.py
@@ -128,8 +128,9 @@ class IngestFileWorker(SandcrawlerWorker):
self.wall_blocklist = [
# loginwall
"://profile.thieme.de/HTML/sso/ejournals/login.htm",
- "://login.bepress.com/"
- "?SAMLRequest="
+ "://login.bepress.com/",
+ "?SAMLRequest=",
+ "://osapublishing.org/captcha/",
]
# these are special-case web domains for which we want SPN2 to not run