diff options
author | Bryan Newbold <bnewbold@archive.org> | 2021-02-04 18:09:25 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2021-02-04 18:09:27 -0800 |
commit | 9b5e1827331f1c609b3af9fef6067dc91bad53f2 (patch) | |
tree | d4fabca088d83da7880586d830633b84877aaca6 | |
parent | dbff9cb6342261bc12ccd831d5bf06b485e5a794 (diff) | |
download | fatcat-scholar-9b5e1827331f1c609b3af9fef6067dc91bad53f2.tar.gz fatcat-scholar-9b5e1827331f1c609b3af9fef6067dc91bad53f2.zip |
make identifiers a bit more case-insensitive
This only works for a subset of query patterns.
-rw-r--r-- | fatcat_scholar/identifiers.py | 6 |
1 files changed, 4 insertions, 2 deletions
diff --git a/fatcat_scholar/identifiers.py b/fatcat_scholar/identifiers.py index 34e9ebb..583c8e6 100644 --- a/fatcat_scholar/identifiers.py +++ b/fatcat_scholar/identifiers.py @@ -17,7 +17,7 @@ def clean_doi(raw: Optional[str]) -> Optional[str]: """ if not raw: return None - raw = raw.strip() + raw = raw.strip().lower() if "\u2013" in raw: # Do not attempt to normalize "en dash" and since FC does not allow # unicode in DOI, treat this as invalid. @@ -62,6 +62,7 @@ def test_clean_doi() -> None: assert clean_doi("asdf") == None assert clean_doi("10.123") == None assert clean_doi("10.1234/asdf ") == "10.1234/asdf" + assert clean_doi("10.1234/ASdf ") == "10.1234/asdf" assert clean_doi("10.1037//0002-9432.72.1.50") == "10.1037/0002-9432.72.1.50" assert clean_doi("10.1037/0002-9432.72.1.50") == "10.1037/0002-9432.72.1.50" assert clean_doi("10.23750/abm.v88i2 -s.6506") == None @@ -79,7 +80,7 @@ def test_clean_doi() -> None: def clean_pmcid(raw: Optional[str]) -> Optional[str]: if not raw: return None - raw = raw.strip() + raw = raw.strip().upper() if len(raw.split()) != 1: return None if raw.startswith("PMC") and raw[3:] and raw[3:].isdigit(): @@ -93,3 +94,4 @@ def test_clean_pmcid() -> None: assert clean_pmcid("1 2") == None assert clean_pmcid(None) == None assert clean_pmcid("PMC123") == "PMC123" + assert clean_pmcid("pmc123") == "PMC123" |