diff options
Diffstat (limited to 'python')
-rw-r--r-- | python/sandcrawler/ingest_file.py | 1 |
1 files changed, 1 insertions, 0 deletions
diff --git a/python/sandcrawler/ingest_file.py b/python/sandcrawler/ingest_file.py index f7c7d78..eca8bf8 100644 --- a/python/sandcrawler/ingest_file.py +++ b/python/sandcrawler/ingest_file.py @@ -151,6 +151,7 @@ class IngestFileWorker(SandcrawlerWorker): "doi.org/10.2307/", # JSTOR; slow and many redirects "doi.org/10.18730/", # fao.org: database entry "doi.org/10.15468/", # gbif.org: database entry + "doi.org/10.48550/", # arxiv.org: redundant with direct ingest # deprecated domain (doesn't redirect correctly) "://edoc.mpg.de/", # bogus/spam PDFs |