aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--python/sandcrawler/ingest.py3
1 files changed, 3 insertions, 0 deletions
diff --git a/python/sandcrawler/ingest.py b/python/sandcrawler/ingest.py
index d2a9980..6d8b162 100644
--- a/python/sandcrawler/ingest.py
+++ b/python/sandcrawler/ingest.py
@@ -101,6 +101,9 @@ class IngestFileWorker(SandcrawlerWorker):
# Historical non-paper content:
"dhz.uni-passau.de/", # newspapers
"digital.ucd.ie/", # ireland national historical
+
+ # DOI prefixes
+ "://doi.org/10.2307/", # JSTOR; slow and many redirects
]
self.wall_blocklist = [