diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2020-02-10 23:18:50 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2020-02-10 23:18:50 -0800 |
commit | 07fabec32aada55a75c064e5c1e01a46da30d854 (patch) | |
tree | 075556473f03498dd3e817e802ed1c5848627b97 | |
parent | b8003d9af22a735102549d4d5a7cd39aa6608d1c (diff) | |
download | fatcat-07fabec32aada55a75c064e5c1e01a46da30d854.tar.gz fatcat-07fabec32aada55a75c064e5c1e01a46da30d854.zip |
add acceptlist override for biorxiv/medrxiv
-rw-r--r-- | python/fatcat_tools/workers/changelog.py | 14 |
1 files changed, 12 insertions, 2 deletions
diff --git a/python/fatcat_tools/workers/changelog.py b/python/fatcat_tools/workers/changelog.py index c8584ccf..7a9a585d 100644 --- a/python/fatcat_tools/workers/changelog.py +++ b/python/fatcat_tools/workers/changelog.py @@ -102,6 +102,10 @@ class EntityUpdatesWorker(FatcatWorker): # ccdc.cam.ac.uk: crystal structures "10.5517/", ] + self.live_pdf_ingest_doi_prefix_acceptlist = [ + # biorxiv and medrxiv + "10.1101/", + ] def want_live_ingest(self, release, ingest_request): """ @@ -115,13 +119,19 @@ class EntityUpdatesWorker(FatcatWorker): link_source = ingest_request.get('ingest_request') ingest_type = ingest_request.get('ingest_type') + doi = ingest_request.get('ext_ids', {}).get('doi') + + in_acceptlist = False + if doi: + for prefix in self.live_pdf_ingest_doi_prefix_acceptlist: + if doi.startswith(prefix): + in_acceptlist = True if self.ingest_oa_only and link_source not in ('arxiv', 'pmc'): es = release_to_elasticsearch(release) - if not es['is_oa']: + if not es['is_oa'] and not in_acceptlist: return False - doi = ingest_request.get('ext_ids', {}).get('doi') if ingest_type == "pdf" and doi: for prefix in self.ingest_pdf_doi_prefix_blocklist: if doi.startswith(prefix): |