summaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools/normal.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/fatcat_tools/normal.py')
-rw-r--r--python/fatcat_tools/normal.py22
1 files changed, 12 insertions, 10 deletions
diff --git a/python/fatcat_tools/normal.py b/python/fatcat_tools/normal.py
index 12c58829..daf47ded 100644
--- a/python/fatcat_tools/normal.py
+++ b/python/fatcat_tools/normal.py
@@ -15,7 +15,7 @@ import pycountry
DOI_REGEX = re.compile(r"^10.\d{3,6}/\S+$")
-def clean_doi(raw: str) -> Optional[str]:
+def clean_doi(raw: Optional[str]) -> Optional[str]:
"""
Removes any:
- padding whitespace
@@ -95,7 +95,7 @@ def test_clean_doi():
ARXIV_ID_REGEX = re.compile(r"^(\d{4}.\d{4,5}|[a-z\-]+(\.[A-Z]{2})?/\d{7})(v\d+)?$")
-def clean_arxiv_id(raw: str) -> Optional[str]:
+def clean_arxiv_id(raw: Optional[str]) -> Optional[str]:
"""
Removes any:
- 'arxiv:' prefix
@@ -170,7 +170,7 @@ def test_clean_wikidata_qid():
assert clean_wikidata_qid("") is None
-def clean_pmid(raw: str) -> Optional[str]:
+def clean_pmid(raw: Optional[str]) -> Optional[str]:
if not raw:
return None
raw = raw.strip()
@@ -189,7 +189,7 @@ def test_clean_pmid():
assert clean_pmid("") is None
-def clean_pmcid(raw: str) -> Optional[str]:
+def clean_pmcid(raw: Optional[str]) -> Optional[str]:
if not raw:
return None
raw = raw.strip()
@@ -200,7 +200,7 @@ def clean_pmcid(raw: str) -> Optional[str]:
return None
-def clean_sha1(raw: str) -> Optional[str]:
+def clean_sha1(raw: Optional[str]) -> Optional[str]:
if not raw:
return None
raw = raw.strip().lower()
@@ -228,7 +228,9 @@ def test_clean_sha1():
assert clean_sha1("0fba3fb a0e1937aa0297de3836b768b5dfb23d7b") is None
-def clean_sha256(raw: str) -> Optional[str]:
+def clean_sha256(raw: Optional[str]) -> Optional[str]:
+ if not raw:
+ return None
raw = raw.strip().lower()
if len(raw.split()) != 1:
return None
@@ -251,7 +253,7 @@ def test_clean_sha256():
ISSN_REGEX = re.compile(r"^\d{4}-\d{3}[0-9X]$")
-def clean_issn(raw: str) -> Optional[str]:
+def clean_issn(raw: Optional[str]) -> Optional[str]:
if not raw:
return None
raw = raw.strip().upper()
@@ -272,7 +274,7 @@ def test_clean_issn():
ISBN13_REGEX = re.compile(r"^97(?:8|9)-\d{1,5}-\d{1,7}-\d{1,6}-\d$")
-def clean_isbn13(raw: str) -> Optional[str]:
+def clean_isbn13(raw: Optional[str]) -> Optional[str]:
if not raw:
return None
raw = raw.strip()
@@ -291,7 +293,7 @@ def test_clean_isbn13():
ORCID_REGEX = re.compile(r"^\d{4}-\d{4}-\d{4}-\d{3}[\dX]$")
-def clean_orcid(raw: str) -> Optional[str]:
+def clean_orcid(raw: Optional[str]) -> Optional[str]:
if not raw:
return None
raw = raw.strip()
@@ -472,7 +474,7 @@ def test_parse_month() -> None:
assert parse_month("September") == 9
-def detect_text_lang(raw: str) -> Optional[str]:
+def detect_text_lang(raw: Optional[str]) -> Optional[str]:
"""
Tries to determine language of, eg, an abstract.