summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2020-08-11 14:52:47 -0700
committerBryan Newbold <bnewbold@robocracy.org>2020-08-11 15:32:28 -0700
commit2a492914082444690f853a55ab1394fc0cf50108 (patch)
tree322cbb9067b339002df1744b173a35235926fe3f
parentff05a03a3874e17557174d3534a1c2d11e01c4a6 (diff)
downloadfatcat-2a492914082444690f853a55ab1394fc0cf50108.tar.gz
fatcat-2a492914082444690f853a55ab1394fc0cf50108.zip
entity update: skip ingest of figshare+zenodo 'group' DOIs
-rw-r--r--python/fatcat_tools/workers/changelog.py15
1 files changed, 15 insertions, 0 deletions
diff --git a/python/fatcat_tools/workers/changelog.py b/python/fatcat_tools/workers/changelog.py
index 1ac7a865..dc5ef299 100644
--- a/python/fatcat_tools/workers/changelog.py
+++ b/python/fatcat_tools/workers/changelog.py
@@ -175,6 +175,7 @@ class EntityUpdatesWorker(FatcatWorker):
'paper-conference',
'patent',
'peer_review',
+ 'post',
'report',
'retraction',
'review',
@@ -217,6 +218,20 @@ class EntityUpdatesWorker(FatcatWorker):
if doi.startswith(prefix):
return False
+ # figshare
+ if doi and doi.startswith('10.6084/') or doi.startswith('10.25384/'):
+ # don't crawl "most recent version" (aka "group") DOIs
+ if not release.version:
+ return False
+
+ # zenodo
+ if doi and doi.startswith('10.5281/'):
+ # if this is a "grouping" DOI of multiple "version" DOIs, do not crawl (will crawl the versioned DOIs)
+ if release.extra and release.extra.get('relations'):
+ for rel in release.extra['relations']:
+ if (rel.get('relationType') == 'HasVersion' and rel.get('relatedIdentifier', '').startswith('10.5281/')):
+ return False
+
return True
def run(self):