aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-01-21 11:34:37 -0800
committerBryan Newbold <bnewbold@archive.org>2020-01-21 11:34:39 -0800
commit2e93f94c9ebba689dde252ca8f5b106765cece88 (patch)
treedb30238693a42249a57420c7fcc0b54d0d50f6d7
parent20291471b34ea559d2ea5d45f3b05884e54d179a (diff)
downloadsandcrawler-2e93f94c9ebba689dde252ca8f5b106765cece88.tar.gz
sandcrawler-2e93f94c9ebba689dde252ca8f5b106765cece88.zip
re-enable figshare and zenodo crawling
For daily imports
-rw-r--r--python/sandcrawler/ingest.py8
1 files changed, 0 insertions, 8 deletions
diff --git a/python/sandcrawler/ingest.py b/python/sandcrawler/ingest.py
index 94eb9a5..148abdb 100644
--- a/python/sandcrawler/ingest.py
+++ b/python/sandcrawler/ingest.py
@@ -65,14 +65,6 @@ class IngestFileWorker(SandcrawlerWorker):
self.try_spn2 = kwargs.get('try_spn2', True)
self.base_url_blocklist = [
- # temporary, until we do specific crawls
- "://doi.org/10.5281/zenodo",
- "://doi.org/10.6084/",
- "://doi.org/10.11583/",
- "://doi.org/10.1184/",
- "://zenodo.org/",
- "://figshare.com/",
-
# temporary, until we implement specific fetch and 'petabox' output
"://archive.org/",
"://web.archive.org/web/",