From 2a492914082444690f853a55ab1394fc0cf50108 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 11 Aug 2020 14:52:47 -0700 Subject: entity update: skip ingest of figshare+zenodo 'group' DOIs --- python/fatcat_tools/workers/changelog.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'python/fatcat_tools/workers/changelog.py') diff --git a/python/fatcat_tools/workers/changelog.py b/python/fatcat_tools/workers/changelog.py index 1ac7a865..dc5ef299 100644 --- a/python/fatcat_tools/workers/changelog.py +++ b/python/fatcat_tools/workers/changelog.py @@ -175,6 +175,7 @@ class EntityUpdatesWorker(FatcatWorker): 'paper-conference', 'patent', 'peer_review', + 'post', 'report', 'retraction', 'review', @@ -217,6 +218,20 @@ class EntityUpdatesWorker(FatcatWorker): if doi.startswith(prefix): return False + # figshare + if doi and doi.startswith('10.6084/') or doi.startswith('10.25384/'): + # don't crawl "most recent version" (aka "group") DOIs + if not release.version: + return False + + # zenodo + if doi and doi.startswith('10.5281/'): + # if this is a "grouping" DOI of multiple "version" DOIs, do not crawl (will crawl the versioned DOIs) + if release.extra and release.extra.get('relations'): + for rel in release.extra['relations']: + if (rel.get('relationType') == 'HasVersion' and rel.get('relatedIdentifier', '').startswith('10.5281/')): + return False + return True def run(self): -- cgit v1.2.3