summaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2020-02-10 23:18:50 -0800
committerBryan Newbold <bnewbold@robocracy.org>2020-02-10 23:18:50 -0800
commit07fabec32aada55a75c064e5c1e01a46da30d854 (patch)
tree075556473f03498dd3e817e802ed1c5848627b97 /python
parentb8003d9af22a735102549d4d5a7cd39aa6608d1c (diff)
downloadfatcat-07fabec32aada55a75c064e5c1e01a46da30d854.tar.gz
fatcat-07fabec32aada55a75c064e5c1e01a46da30d854.zip
add acceptlist override for biorxiv/medrxiv
Diffstat (limited to 'python')
-rw-r--r--python/fatcat_tools/workers/changelog.py14
1 files changed, 12 insertions, 2 deletions
diff --git a/python/fatcat_tools/workers/changelog.py b/python/fatcat_tools/workers/changelog.py
index c8584ccf..7a9a585d 100644
--- a/python/fatcat_tools/workers/changelog.py
+++ b/python/fatcat_tools/workers/changelog.py
@@ -102,6 +102,10 @@ class EntityUpdatesWorker(FatcatWorker):
# ccdc.cam.ac.uk: crystal structures
"10.5517/",
]
+ self.live_pdf_ingest_doi_prefix_acceptlist = [
+ # biorxiv and medrxiv
+ "10.1101/",
+ ]
def want_live_ingest(self, release, ingest_request):
"""
@@ -115,13 +119,19 @@ class EntityUpdatesWorker(FatcatWorker):
link_source = ingest_request.get('ingest_request')
ingest_type = ingest_request.get('ingest_type')
+ doi = ingest_request.get('ext_ids', {}).get('doi')
+
+ in_acceptlist = False
+ if doi:
+ for prefix in self.live_pdf_ingest_doi_prefix_acceptlist:
+ if doi.startswith(prefix):
+ in_acceptlist = True
if self.ingest_oa_only and link_source not in ('arxiv', 'pmc'):
es = release_to_elasticsearch(release)
- if not es['is_oa']:
+ if not es['is_oa'] and not in_acceptlist:
return False
- doi = ingest_request.get('ext_ids', {}).get('doi')
if ingest_type == "pdf" and doi:
for prefix in self.ingest_pdf_doi_prefix_blocklist:
if doi.startswith(prefix):