diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-03-27 16:37:55 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-03-27 16:37:55 -0700 |
commit | afa7aa6006a0de41de6cb08b61aaff61109e2792 (patch) | |
tree | 59fbbcba3d597c5e1c814f7e28447c32cc1c0e9e | |
parent | 428e55998a861e48ccc951c7e64ee1d150ac1a51 (diff) | |
download | sandcrawler-afa7aa6006a0de41de6cb08b61aaff61109e2792.tar.gz sandcrawler-afa7aa6006a0de41de6cb08b61aaff61109e2792.zip |
ingest: block another large domain (and DOI prefix)
-rw-r--r-- | python/sandcrawler/ingest.py | 2 |
1 files changed, 2 insertions, 0 deletions
diff --git a/python/sandcrawler/ingest.py b/python/sandcrawler/ingest.py index 4159e26..5cb3ef8 100644 --- a/python/sandcrawler/ingest.py +++ b/python/sandcrawler/ingest.py @@ -87,6 +87,8 @@ class IngestFileWorker(SandcrawlerWorker): "www.gbif.org/", "doi.pangaea.de/", "www.plate-archive.org/", + "://doi.org/10.25642/ipk/gbis/", + "://apex.ipk-gatersleben.de/", # Historical non-paper content: "dhz.uni-passau.de/", # newspapers |