diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-08-11 19:49:08 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-08-11 19:49:08 -0700 |
commit | 8782f83631365bffdd6a54fa436f5c477386fd1f (patch) | |
tree | 6e163fd5dc1da579758d2316da63c9e6c4c4fa8f /python | |
parent | bb8d4a3591826af3cb15f32404b98eb7995d005b (diff) | |
download | sandcrawler-8782f83631365bffdd6a54fa436f5c477386fd1f.tar.gz sandcrawler-8782f83631365bffdd6a54fa436f5c477386fd1f.zip |
additional loginwall patterns
Diffstat (limited to 'python')
-rw-r--r-- | python/sandcrawler/ingest.py | 2 |
1 files changed, 2 insertions, 0 deletions
diff --git a/python/sandcrawler/ingest.py b/python/sandcrawler/ingest.py index d910665..c45437d 100644 --- a/python/sandcrawler/ingest.py +++ b/python/sandcrawler/ingest.py @@ -105,6 +105,8 @@ class IngestFileWorker(SandcrawlerWorker): self.wall_blocklist = [ # loginwall "://profile.thieme.de/HTML/sso/ejournals/login.htm", + "://login.bepress.com/" + "?SAMLRequest=" ] # these are special-case web domains for which we want SPN2 to not run |