From b9237268c61777a28f5d8e512b326337715aab44 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 28 Jan 2020 12:52:48 -0800 Subject: block springer page-one domain --- python/sandcrawler/ingest.py | 3 +++ 1 file changed, 3 insertions(+) (limited to 'python') diff --git a/python/sandcrawler/ingest.py b/python/sandcrawler/ingest.py index 148abdb..9f07fc3 100644 --- a/python/sandcrawler/ingest.py +++ b/python/sandcrawler/ingest.py @@ -78,6 +78,9 @@ class IngestFileWorker(SandcrawlerWorker): "://jist.net.in", "://croisements-revue.org", + # all stubs/previews, not full papers + "://page-one.live.cf.public.springer.com", + ] def check_existing_ingest(self, base_url): -- cgit v1.2.3