diff options
Diffstat (limited to 'python/fatcat_tools/normal.py')
-rw-r--r-- | python/fatcat_tools/normal.py | 22 |
1 files changed, 12 insertions, 10 deletions
diff --git a/python/fatcat_tools/normal.py b/python/fatcat_tools/normal.py index 12c58829..daf47ded 100644 --- a/python/fatcat_tools/normal.py +++ b/python/fatcat_tools/normal.py @@ -15,7 +15,7 @@ import pycountry DOI_REGEX = re.compile(r"^10.\d{3,6}/\S+$") -def clean_doi(raw: str) -> Optional[str]: +def clean_doi(raw: Optional[str]) -> Optional[str]: """ Removes any: - padding whitespace @@ -95,7 +95,7 @@ def test_clean_doi(): ARXIV_ID_REGEX = re.compile(r"^(\d{4}.\d{4,5}|[a-z\-]+(\.[A-Z]{2})?/\d{7})(v\d+)?$") -def clean_arxiv_id(raw: str) -> Optional[str]: +def clean_arxiv_id(raw: Optional[str]) -> Optional[str]: """ Removes any: - 'arxiv:' prefix @@ -170,7 +170,7 @@ def test_clean_wikidata_qid(): assert clean_wikidata_qid("") is None -def clean_pmid(raw: str) -> Optional[str]: +def clean_pmid(raw: Optional[str]) -> Optional[str]: if not raw: return None raw = raw.strip() @@ -189,7 +189,7 @@ def test_clean_pmid(): assert clean_pmid("") is None -def clean_pmcid(raw: str) -> Optional[str]: +def clean_pmcid(raw: Optional[str]) -> Optional[str]: if not raw: return None raw = raw.strip() @@ -200,7 +200,7 @@ def clean_pmcid(raw: str) -> Optional[str]: return None -def clean_sha1(raw: str) -> Optional[str]: +def clean_sha1(raw: Optional[str]) -> Optional[str]: if not raw: return None raw = raw.strip().lower() @@ -228,7 +228,9 @@ def test_clean_sha1(): assert clean_sha1("0fba3fb a0e1937aa0297de3836b768b5dfb23d7b") is None -def clean_sha256(raw: str) -> Optional[str]: +def clean_sha256(raw: Optional[str]) -> Optional[str]: + if not raw: + return None raw = raw.strip().lower() if len(raw.split()) != 1: return None @@ -251,7 +253,7 @@ def test_clean_sha256(): ISSN_REGEX = re.compile(r"^\d{4}-\d{3}[0-9X]$") -def clean_issn(raw: str) -> Optional[str]: +def clean_issn(raw: Optional[str]) -> Optional[str]: if not raw: return None raw = raw.strip().upper() @@ -272,7 +274,7 @@ def test_clean_issn(): ISBN13_REGEX = re.compile(r"^97(?:8|9)-\d{1,5}-\d{1,7}-\d{1,6}-\d$") -def clean_isbn13(raw: str) -> Optional[str]: +def clean_isbn13(raw: Optional[str]) -> Optional[str]: if not raw: return None raw = raw.strip() @@ -291,7 +293,7 @@ def test_clean_isbn13(): ORCID_REGEX = re.compile(r"^\d{4}-\d{4}-\d{4}-\d{3}[\dX]$") -def clean_orcid(raw: str) -> Optional[str]: +def clean_orcid(raw: Optional[str]) -> Optional[str]: if not raw: return None raw = raw.strip() @@ -472,7 +474,7 @@ def test_parse_month() -> None: assert parse_month("September") == 9 -def detect_text_lang(raw: str) -> Optional[str]: +def detect_text_lang(raw: Optional[str]) -> Optional[str]: """ Tries to determine language of, eg, an abstract. |