aboutsummaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools/workers
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2020-08-10 15:07:19 -0700
committerBryan Newbold <bnewbold@robocracy.org>2020-08-10 15:07:19 -0700
commite9dd3c73f036d3fba2680eeaff8e62ecf2dbf9a1 (patch)
treef3ac6aab6c5ca7aafb957063a2b5c1482cded5ea /python/fatcat_tools/workers
parentde0fb59f0e36d8079649feefb7592189d8f7c6ed (diff)
downloadfatcat-e9dd3c73f036d3fba2680eeaff8e62ecf2dbf9a1.tar.gz
fatcat-e9dd3c73f036d3fba2680eeaff8e62ecf2dbf9a1.zip
update crawl blocklist for SPNv2 requests which mostly fail
Diffstat (limited to 'python/fatcat_tools/workers')
-rw-r--r--python/fatcat_tools/workers/changelog.py12
1 files changed, 10 insertions, 2 deletions
diff --git a/python/fatcat_tools/workers/changelog.py b/python/fatcat_tools/workers/changelog.py
index d5891ad1..1ac7a865 100644
--- a/python/fatcat_tools/workers/changelog.py
+++ b/python/fatcat_tools/workers/changelog.py
@@ -101,12 +101,20 @@ class EntityUpdatesWorker(FatcatWorker):
"10.3932/",
# ccdc.cam.ac.uk: crystal structures
"10.5517/",
+ # researchgate: mostly blocks our crawler
+ "10.13140/",
+ # springerlink: mostly blocks crawler
+ "10.1007/",
+ # nature group: mostly blocks crawler
+ "10.1038/",
+ # SAGE: mostly blocks crawler
+ "10.1177/",
+ # IOP: mostly blocks crawler
+ "10.1088/",
]
self.live_pdf_ingest_doi_prefix_acceptlist = [
# biorxiv and medrxiv
"10.1101/",
- # researchgate
- "10.13140/",
# the lancet (often hybrid OA)
"10.1016/s0140-6736",
"10.1016/s2213-2600",