aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-03-27 16:37:55 -0700
committerBryan Newbold <bnewbold@archive.org>2020-03-27 16:37:55 -0700
commitafa7aa6006a0de41de6cb08b61aaff61109e2792 (patch)
tree59fbbcba3d597c5e1c814f7e28447c32cc1c0e9e
parent428e55998a861e48ccc951c7e64ee1d150ac1a51 (diff)
downloadsandcrawler-afa7aa6006a0de41de6cb08b61aaff61109e2792.tar.gz
sandcrawler-afa7aa6006a0de41de6cb08b61aaff61109e2792.zip
ingest: block another large domain (and DOI prefix)
-rw-r--r--python/sandcrawler/ingest.py2
1 files changed, 2 insertions, 0 deletions
diff --git a/python/sandcrawler/ingest.py b/python/sandcrawler/ingest.py
index 4159e26..5cb3ef8 100644
--- a/python/sandcrawler/ingest.py
+++ b/python/sandcrawler/ingest.py
@@ -87,6 +87,8 @@ class IngestFileWorker(SandcrawlerWorker):
"www.gbif.org/",
"doi.pangaea.de/",
"www.plate-archive.org/",
+ "://doi.org/10.25642/ipk/gbis/",
+ "://apex.ipk-gatersleben.de/",
# Historical non-paper content:
"dhz.uni-passau.de/", # newspapers