summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-02-04 18:09:25 -0800
committerBryan Newbold <bnewbold@archive.org>2021-02-04 18:09:27 -0800
commit9b5e1827331f1c609b3af9fef6067dc91bad53f2 (patch)
treed4fabca088d83da7880586d830633b84877aaca6
parentdbff9cb6342261bc12ccd831d5bf06b485e5a794 (diff)
downloadfatcat-scholar-9b5e1827331f1c609b3af9fef6067dc91bad53f2.tar.gz
fatcat-scholar-9b5e1827331f1c609b3af9fef6067dc91bad53f2.zip
make identifiers a bit more case-insensitive
This only works for a subset of query patterns.
-rw-r--r--fatcat_scholar/identifiers.py6
1 files changed, 4 insertions, 2 deletions
diff --git a/fatcat_scholar/identifiers.py b/fatcat_scholar/identifiers.py
index 34e9ebb..583c8e6 100644
--- a/fatcat_scholar/identifiers.py
+++ b/fatcat_scholar/identifiers.py
@@ -17,7 +17,7 @@ def clean_doi(raw: Optional[str]) -> Optional[str]:
"""
if not raw:
return None
- raw = raw.strip()
+ raw = raw.strip().lower()
if "\u2013" in raw:
# Do not attempt to normalize "en dash" and since FC does not allow
# unicode in DOI, treat this as invalid.
@@ -62,6 +62,7 @@ def test_clean_doi() -> None:
assert clean_doi("asdf") == None
assert clean_doi("10.123") == None
assert clean_doi("10.1234/asdf ") == "10.1234/asdf"
+ assert clean_doi("10.1234/ASdf ") == "10.1234/asdf"
assert clean_doi("10.1037//0002-9432.72.1.50") == "10.1037/0002-9432.72.1.50"
assert clean_doi("10.1037/0002-9432.72.1.50") == "10.1037/0002-9432.72.1.50"
assert clean_doi("10.23750/abm.v88i2 -s.6506") == None
@@ -79,7 +80,7 @@ def test_clean_doi() -> None:
def clean_pmcid(raw: Optional[str]) -> Optional[str]:
if not raw:
return None
- raw = raw.strip()
+ raw = raw.strip().upper()
if len(raw.split()) != 1:
return None
if raw.startswith("PMC") and raw[3:] and raw[3:].isdigit():
@@ -93,3 +94,4 @@ def test_clean_pmcid() -> None:
assert clean_pmcid("1 2") == None
assert clean_pmcid(None) == None
assert clean_pmcid("PMC123") == "PMC123"
+ assert clean_pmcid("pmc123") == "PMC123"