diff options
author | Martin Czygan <martin@archive.org> | 2021-07-02 16:13:42 +0000 |
---|---|---|
committer | Martin Czygan <martin@archive.org> | 2021-07-02 16:13:42 +0000 |
commit | 31ce8147ee2e568314eea838df0e1ef182fe40a1 (patch) | |
tree | b1c019aff5c8c10fe497715110660633f23f0dfe | |
parent | 5f631cba958c1b67c64202f034569f788689e289 (diff) | |
parent | fa11747574f086e99459914f93d24bad7a8eacce (diff) | |
download | fatcat-31ce8147ee2e568314eea838df0e1ef182fe40a1.tar.gz fatcat-31ce8147ee2e568314eea838df0e1ef182fe40a1.zip |
Merge branch 'bnewbold-more-doi-lower' into 'master'
more consistent and defensive lower-casing of DOIs
See merge request webgroup/fatcat!109
-rw-r--r-- | python/fatcat_tools/importers/ingest.py | 4 | ||||
-rw-r--r-- | python/fatcat_tools/importers/jalc.py | 3 | ||||
-rw-r--r-- | python/fatcat_tools/transforms/ingest.py | 4 |
3 files changed, 8 insertions, 3 deletions
diff --git a/python/fatcat_tools/importers/ingest.py b/python/fatcat_tools/importers/ingest.py index 483932ad..ae3e147a 100644 --- a/python/fatcat_tools/importers/ingest.py +++ b/python/fatcat_tools/importers/ingest.py @@ -133,6 +133,8 @@ class IngestFileResultImporter(EntityImporter): extid = request['ext_ids'].get(extid_type) if not extid: continue + if extid_type == 'doi': + extid = extid.lower() try: release = self.api.lookup_release(**{extid_type: extid}) except fatcat_openapi_client.rest.ApiException as err: @@ -217,6 +219,8 @@ class IngestFileResultImporter(EntityImporter): if request.get('link_source') and request.get('link_source_id'): edit_extra['link_source'] = request['link_source'] edit_extra['link_source_id'] = request['link_source_id'] + if edit_extra['link_source'] == 'doi': + edit_extra['link_source_id'] = edit_extra['link_source_id'].lower() return edit_extra diff --git a/python/fatcat_tools/importers/jalc.py b/python/fatcat_tools/importers/jalc.py index 9bf2621c..12f5450f 100644 --- a/python/fatcat_tools/importers/jalc.py +++ b/python/fatcat_tools/importers/jalc.py @@ -5,6 +5,7 @@ import datetime from bs4 import BeautifulSoup import fatcat_openapi_client +from fatcat_tools.normal import clean_doi from .common import EntityImporter, clean, is_cjk, DATE_FMT @@ -171,7 +172,7 @@ class JalcImporter(EntityImporter): doi = None if record.doi: - doi = record.doi.string.lower().strip() + doi = clean_doi(record.doi.string.strip().lower()) if doi.startswith('http://dx.doi.org/'): doi = doi.replace('http://dx.doi.org/', '') elif doi.startswith('https://dx.doi.org/'): diff --git a/python/fatcat_tools/transforms/ingest.py b/python/fatcat_tools/transforms/ingest.py index 9aaeaa84..42927b2a 100644 --- a/python/fatcat_tools/transforms/ingest.py +++ b/python/fatcat_tools/transforms/ingest.py @@ -50,9 +50,9 @@ def release_ingest_request(release, ingest_request_source='fatcat', ingest_type= link_source = "pmc" link_source_id = release.ext_ids.pmcid elif release.ext_ids.doi: - url = "https://doi.org/{}".format(release.ext_ids.doi) + url = "https://doi.org/{}".format(release.ext_ids.doi.lower()) link_source = "doi" - link_source_id = release.ext_ids.doi + link_source_id = release.ext_ids.doi.lower() if not url: return None |