aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-01-28 12:52:48 -0800
committerBryan Newbold <bnewbold@archive.org>2020-01-28 12:52:48 -0800
commitb9237268c61777a28f5d8e512b326337715aab44 (patch)
tree5aef0fbaebedb3ec3be241c61835306631700b5a
parent084807ee51f6b5844b323a1217a70b2f12ee966d (diff)
downloadsandcrawler-b9237268c61777a28f5d8e512b326337715aab44.tar.gz
sandcrawler-b9237268c61777a28f5d8e512b326337715aab44.zip
block springer page-one domain
-rw-r--r--python/sandcrawler/ingest.py3
1 files changed, 3 insertions, 0 deletions
diff --git a/python/sandcrawler/ingest.py b/python/sandcrawler/ingest.py
index 148abdb..9f07fc3 100644
--- a/python/sandcrawler/ingest.py
+++ b/python/sandcrawler/ingest.py
@@ -78,6 +78,9 @@ class IngestFileWorker(SandcrawlerWorker):
"://jist.net.in",
"://croisements-revue.org",
+ # all stubs/previews, not full papers
+ "://page-one.live.cf.public.springer.com",
+
]
def check_existing_ingest(self, base_url):