aboutsummaryrefslogtreecommitdiffstats
path: root/python/sandcrawler/ingest_file.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/sandcrawler/ingest_file.py')
-rw-r--r--python/sandcrawler/ingest_file.py1
1 files changed, 1 insertions, 0 deletions
diff --git a/python/sandcrawler/ingest_file.py b/python/sandcrawler/ingest_file.py
index f7c7d78..eca8bf8 100644
--- a/python/sandcrawler/ingest_file.py
+++ b/python/sandcrawler/ingest_file.py
@@ -151,6 +151,7 @@ class IngestFileWorker(SandcrawlerWorker):
"doi.org/10.2307/", # JSTOR; slow and many redirects
"doi.org/10.18730/", # fao.org: database entry
"doi.org/10.15468/", # gbif.org: database entry
+ "doi.org/10.48550/", # arxiv.org: redundant with direct ingest
# deprecated domain (doesn't redirect correctly)
"://edoc.mpg.de/",
# bogus/spam PDFs