diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2020-08-11 14:52:47 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2020-08-11 15:32:28 -0700 |
commit | 2a492914082444690f853a55ab1394fc0cf50108 (patch) | |
tree | 322cbb9067b339002df1744b173a35235926fe3f /python/fatcat_tools/workers | |
parent | ff05a03a3874e17557174d3534a1c2d11e01c4a6 (diff) | |
download | fatcat-2a492914082444690f853a55ab1394fc0cf50108.tar.gz fatcat-2a492914082444690f853a55ab1394fc0cf50108.zip |
entity update: skip ingest of figshare+zenodo 'group' DOIs
Diffstat (limited to 'python/fatcat_tools/workers')
-rw-r--r-- | python/fatcat_tools/workers/changelog.py | 15 |
1 files changed, 15 insertions, 0 deletions
diff --git a/python/fatcat_tools/workers/changelog.py b/python/fatcat_tools/workers/changelog.py index 1ac7a865..dc5ef299 100644 --- a/python/fatcat_tools/workers/changelog.py +++ b/python/fatcat_tools/workers/changelog.py @@ -175,6 +175,7 @@ class EntityUpdatesWorker(FatcatWorker): 'paper-conference', 'patent', 'peer_review', + 'post', 'report', 'retraction', 'review', @@ -217,6 +218,20 @@ class EntityUpdatesWorker(FatcatWorker): if doi.startswith(prefix): return False + # figshare + if doi and doi.startswith('10.6084/') or doi.startswith('10.25384/'): + # don't crawl "most recent version" (aka "group") DOIs + if not release.version: + return False + + # zenodo + if doi and doi.startswith('10.5281/'): + # if this is a "grouping" DOI of multiple "version" DOIs, do not crawl (will crawl the versioned DOIs) + if release.extra and release.extra.get('relations'): + for rel in release.extra['relations']: + if (rel.get('relationType') == 'HasVersion' and rel.get('relatedIdentifier', '').startswith('10.5281/')): + return False + return True def run(self): |