aboutsummaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools/importers/jalc.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2021-06-17 16:26:50 -0700
committerBryan Newbold <bnewbold@robocracy.org>2021-06-23 17:51:15 -0700
commitfa11747574f086e99459914f93d24bad7a8eacce (patch)
tree9e05825c26de60042142dd846cee31d85661a6b4 /python/fatcat_tools/importers/jalc.py
parentcf2c3432d3c28d2ba0a92c62fabda4ca434afe46 (diff)
downloadfatcat-fa11747574f086e99459914f93d24bad7a8eacce.tar.gz
fatcat-fa11747574f086e99459914f93d24bad7a8eacce.zip
more consistent and defensive lower-casing of DOIs
After noticing more upper/lower ambiguity in production. In particular, we have some old ingest requests in sandcrawler DB, which get re-submitted/re-tried, which have capitalized DOIs in the link source id field.
Diffstat (limited to 'python/fatcat_tools/importers/jalc.py')
-rw-r--r--python/fatcat_tools/importers/jalc.py3
1 files changed, 2 insertions, 1 deletions
diff --git a/python/fatcat_tools/importers/jalc.py b/python/fatcat_tools/importers/jalc.py
index 9bf2621c..12f5450f 100644
--- a/python/fatcat_tools/importers/jalc.py
+++ b/python/fatcat_tools/importers/jalc.py
@@ -5,6 +5,7 @@ import datetime
from bs4 import BeautifulSoup
import fatcat_openapi_client
+from fatcat_tools.normal import clean_doi
from .common import EntityImporter, clean, is_cjk, DATE_FMT
@@ -171,7 +172,7 @@ class JalcImporter(EntityImporter):
doi = None
if record.doi:
- doi = record.doi.string.lower().strip()
+ doi = clean_doi(record.doi.string.strip().lower())
if doi.startswith('http://dx.doi.org/'):
doi = doi.replace('http://dx.doi.org/', '')
elif doi.startswith('https://dx.doi.org/'):