From 8782f83631365bffdd6a54fa436f5c477386fd1f Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 11 Aug 2020 19:49:08 -0700 Subject: additional loginwall patterns --- python/sandcrawler/ingest.py | 2 ++ 1 file changed, 2 insertions(+) (limited to 'python') diff --git a/python/sandcrawler/ingest.py b/python/sandcrawler/ingest.py index d910665..c45437d 100644 --- a/python/sandcrawler/ingest.py +++ b/python/sandcrawler/ingest.py @@ -105,6 +105,8 @@ class IngestFileWorker(SandcrawlerWorker): self.wall_blocklist = [ # loginwall "://profile.thieme.de/HTML/sso/ejournals/login.htm", + "://login.bepress.com/" + "?SAMLRequest=" ] # these are special-case web domains for which we want SPN2 to not run -- cgit v1.2.3