aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Czygan <martin@archive.org>2021-07-02 16:13:42 +0000
committerMartin Czygan <martin@archive.org>2021-07-02 16:13:42 +0000
commit31ce8147ee2e568314eea838df0e1ef182fe40a1 (patch)
treeb1c019aff5c8c10fe497715110660633f23f0dfe
parent5f631cba958c1b67c64202f034569f788689e289 (diff)
parentfa11747574f086e99459914f93d24bad7a8eacce (diff)
downloadfatcat-31ce8147ee2e568314eea838df0e1ef182fe40a1.tar.gz
fatcat-31ce8147ee2e568314eea838df0e1ef182fe40a1.zip
Merge branch 'bnewbold-more-doi-lower' into 'master'
more consistent and defensive lower-casing of DOIs See merge request webgroup/fatcat!109
-rw-r--r--python/fatcat_tools/importers/ingest.py4
-rw-r--r--python/fatcat_tools/importers/jalc.py3
-rw-r--r--python/fatcat_tools/transforms/ingest.py4
3 files changed, 8 insertions, 3 deletions
diff --git a/python/fatcat_tools/importers/ingest.py b/python/fatcat_tools/importers/ingest.py
index 483932ad..ae3e147a 100644
--- a/python/fatcat_tools/importers/ingest.py
+++ b/python/fatcat_tools/importers/ingest.py
@@ -133,6 +133,8 @@ class IngestFileResultImporter(EntityImporter):
extid = request['ext_ids'].get(extid_type)
if not extid:
continue
+ if extid_type == 'doi':
+ extid = extid.lower()
try:
release = self.api.lookup_release(**{extid_type: extid})
except fatcat_openapi_client.rest.ApiException as err:
@@ -217,6 +219,8 @@ class IngestFileResultImporter(EntityImporter):
if request.get('link_source') and request.get('link_source_id'):
edit_extra['link_source'] = request['link_source']
edit_extra['link_source_id'] = request['link_source_id']
+ if edit_extra['link_source'] == 'doi':
+ edit_extra['link_source_id'] = edit_extra['link_source_id'].lower()
return edit_extra
diff --git a/python/fatcat_tools/importers/jalc.py b/python/fatcat_tools/importers/jalc.py
index 9bf2621c..12f5450f 100644
--- a/python/fatcat_tools/importers/jalc.py
+++ b/python/fatcat_tools/importers/jalc.py
@@ -5,6 +5,7 @@ import datetime
from bs4 import BeautifulSoup
import fatcat_openapi_client
+from fatcat_tools.normal import clean_doi
from .common import EntityImporter, clean, is_cjk, DATE_FMT
@@ -171,7 +172,7 @@ class JalcImporter(EntityImporter):
doi = None
if record.doi:
- doi = record.doi.string.lower().strip()
+ doi = clean_doi(record.doi.string.strip().lower())
if doi.startswith('http://dx.doi.org/'):
doi = doi.replace('http://dx.doi.org/', '')
elif doi.startswith('https://dx.doi.org/'):
diff --git a/python/fatcat_tools/transforms/ingest.py b/python/fatcat_tools/transforms/ingest.py
index 9aaeaa84..42927b2a 100644
--- a/python/fatcat_tools/transforms/ingest.py
+++ b/python/fatcat_tools/transforms/ingest.py
@@ -50,9 +50,9 @@ def release_ingest_request(release, ingest_request_source='fatcat', ingest_type=
link_source = "pmc"
link_source_id = release.ext_ids.pmcid
elif release.ext_ids.doi:
- url = "https://doi.org/{}".format(release.ext_ids.doi)
+ url = "https://doi.org/{}".format(release.ext_ids.doi.lower())
link_source = "doi"
- link_source_id = release.ext_ids.doi
+ link_source_id = release.ext_ids.doi.lower()
if not url:
return None