From a72019e6e788be64420719c5045e40614098c106 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 20 Jul 2022 18:02:56 -0700 Subject: doaj and unpaywall transforms: more domains to skip --- python/scripts/doaj2ingestrequest.py | 1 + python/scripts/unpaywall2ingestrequest.py | 3 --- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'python/scripts') diff --git a/python/scripts/doaj2ingestrequest.py b/python/scripts/doaj2ingestrequest.py index 67286b9..aef5c12 100755 --- a/python/scripts/doaj2ingestrequest.py +++ b/python/scripts/doaj2ingestrequest.py @@ -23,6 +23,7 @@ DOMAIN_BLOCKLIST = [ "ncbi.nlm.nih.gov/", # "semanticscholar.org/", "://doi.org/", + "://dx.doi.org/", "zenodo.org/", "figshare.com/", "://archive.org/", diff --git a/python/scripts/unpaywall2ingestrequest.py b/python/scripts/unpaywall2ingestrequest.py index ad5353b..cb64a1a 100755 --- a/python/scripts/unpaywall2ingestrequest.py +++ b/python/scripts/unpaywall2ingestrequest.py @@ -15,12 +15,9 @@ DOMAIN_BLOCKLIST = [ "://arxiv.org/", "://europepmc.org/", "ncbi.nlm.nih.gov/", - "semanticscholar.org/", "://doi.org/", "zenodo.org/", "figshare.com/", - "://archive.org/", - ".archive.org/", ] RELEASE_STAGE_MAP = { -- cgit v1.2.3