From 1f9ca570bd168154a72adcd2454b992dbc7e8d0a Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Thu, 5 May 2022 11:08:52 -0700 Subject: ingest: more loginwall patterns --- python/sandcrawler/ingest_file.py | 3 +++ 1 file changed, 3 insertions(+) (limited to 'python/sandcrawler') diff --git a/python/sandcrawler/ingest_file.py b/python/sandcrawler/ingest_file.py index 11f2df9..f7c7d78 100644 --- a/python/sandcrawler/ingest_file.py +++ b/python/sandcrawler/ingest_file.py @@ -166,6 +166,9 @@ class IngestFileWorker(SandcrawlerWorker): "/password-login", "://gateway.isiknowledge.com/", "/login?TARGET=", + "jstage.jst.go.jp/sblogin", + "://acw.elsevier.com/SSOCore", + "://acw.sciencedirect.com/SSOCore", ] self.cookie_blocklist = [ -- cgit v1.2.3