aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2022-05-05 11:08:52 -0700
committerBryan Newbold <bnewbold@archive.org>2022-05-05 11:08:52 -0700
commit1f9ca570bd168154a72adcd2454b992dbc7e8d0a (patch)
tree65c01f34f26d53ff7ed7601e6a159581f463a347
parent1ec661af75f37b3ae5031851f6c452039e08503c (diff)
downloadsandcrawler-1f9ca570bd168154a72adcd2454b992dbc7e8d0a.tar.gz
sandcrawler-1f9ca570bd168154a72adcd2454b992dbc7e8d0a.zip
ingest: more loginwall patterns
-rw-r--r--python/sandcrawler/ingest_file.py3
1 files changed, 3 insertions, 0 deletions
diff --git a/python/sandcrawler/ingest_file.py b/python/sandcrawler/ingest_file.py
index 11f2df9..f7c7d78 100644
--- a/python/sandcrawler/ingest_file.py
+++ b/python/sandcrawler/ingest_file.py
@@ -166,6 +166,9 @@ class IngestFileWorker(SandcrawlerWorker):
"/password-login",
"://gateway.isiknowledge.com/",
"/login?TARGET=",
+ "jstage.jst.go.jp/sblogin",
+ "://acw.elsevier.com/SSOCore",
+ "://acw.sciencedirect.com/SSOCore",
]
self.cookie_blocklist = [