diff options
author | Bryan Newbold <bnewbold@archive.org> | 2022-05-05 11:08:52 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2022-05-05 11:08:52 -0700 |
commit | 1f9ca570bd168154a72adcd2454b992dbc7e8d0a (patch) | |
tree | 65c01f34f26d53ff7ed7601e6a159581f463a347 | |
parent | 1ec661af75f37b3ae5031851f6c452039e08503c (diff) | |
download | sandcrawler-1f9ca570bd168154a72adcd2454b992dbc7e8d0a.tar.gz sandcrawler-1f9ca570bd168154a72adcd2454b992dbc7e8d0a.zip |
ingest: more loginwall patterns
-rw-r--r-- | python/sandcrawler/ingest_file.py | 3 |
1 files changed, 3 insertions, 0 deletions
diff --git a/python/sandcrawler/ingest_file.py b/python/sandcrawler/ingest_file.py index 11f2df9..f7c7d78 100644 --- a/python/sandcrawler/ingest_file.py +++ b/python/sandcrawler/ingest_file.py @@ -166,6 +166,9 @@ class IngestFileWorker(SandcrawlerWorker): "/password-login", "://gateway.isiknowledge.com/", "/login?TARGET=", + "jstage.jst.go.jp/sblogin", + "://acw.elsevier.com/SSOCore", + "://acw.sciencedirect.com/SSOCore", ] self.cookie_blocklist = [ |