aboutsummaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools/importers/ingest.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2021-06-17 16:26:50 -0700
committerBryan Newbold <bnewbold@robocracy.org>2021-06-23 17:51:15 -0700
commitfa11747574f086e99459914f93d24bad7a8eacce (patch)
tree9e05825c26de60042142dd846cee31d85661a6b4 /python/fatcat_tools/importers/ingest.py
parentcf2c3432d3c28d2ba0a92c62fabda4ca434afe46 (diff)
downloadfatcat-fa11747574f086e99459914f93d24bad7a8eacce.tar.gz
fatcat-fa11747574f086e99459914f93d24bad7a8eacce.zip
more consistent and defensive lower-casing of DOIs
After noticing more upper/lower ambiguity in production. In particular, we have some old ingest requests in sandcrawler DB, which get re-submitted/re-tried, which have capitalized DOIs in the link source id field.
Diffstat (limited to 'python/fatcat_tools/importers/ingest.py')
-rw-r--r--python/fatcat_tools/importers/ingest.py4
1 files changed, 4 insertions, 0 deletions
diff --git a/python/fatcat_tools/importers/ingest.py b/python/fatcat_tools/importers/ingest.py
index 483932ad..ae3e147a 100644
--- a/python/fatcat_tools/importers/ingest.py
+++ b/python/fatcat_tools/importers/ingest.py
@@ -133,6 +133,8 @@ class IngestFileResultImporter(EntityImporter):
extid = request['ext_ids'].get(extid_type)
if not extid:
continue
+ if extid_type == 'doi':
+ extid = extid.lower()
try:
release = self.api.lookup_release(**{extid_type: extid})
except fatcat_openapi_client.rest.ApiException as err:
@@ -217,6 +219,8 @@ class IngestFileResultImporter(EntityImporter):
if request.get('link_source') and request.get('link_source_id'):
edit_extra['link_source'] = request['link_source']
edit_extra['link_source_id'] = request['link_source_id']
+ if edit_extra['link_source'] == 'doi':
+ edit_extra['link_source_id'] = edit_extra['link_source_id'].lower()
return edit_extra