From afa7aa6006a0de41de6cb08b61aaff61109e2792 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Fri, 27 Mar 2020 16:37:55 -0700 Subject: ingest: block another large domain (and DOI prefix) --- python/sandcrawler/ingest.py | 2 ++ 1 file changed, 2 insertions(+) (limited to 'python') diff --git a/python/sandcrawler/ingest.py b/python/sandcrawler/ingest.py index 4159e26..5cb3ef8 100644 --- a/python/sandcrawler/ingest.py +++ b/python/sandcrawler/ingest.py @@ -87,6 +87,8 @@ class IngestFileWorker(SandcrawlerWorker): "www.gbif.org/", "doi.pangaea.de/", "www.plate-archive.org/", + "://doi.org/10.25642/ipk/gbis/", + "://apex.ipk-gatersleben.de/", # Historical non-paper content: "dhz.uni-passau.de/", # newspapers -- cgit v1.2.3