From eeb456c16d016d8523023f787597efae7a6317b9 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Mon, 26 Jul 2021 15:11:44 -0700 Subject: better parsing of year as integer in refs pipeline --- fatcat_scholar/schema.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'fatcat_scholar/schema.py') diff --git a/fatcat_scholar/schema.py b/fatcat_scholar/schema.py index bc6b016..0fcf56e 100644 --- a/fatcat_scholar/schema.py +++ b/fatcat_scholar/schema.py @@ -301,9 +301,12 @@ class RefTarget(BaseModel): def clean_small_int(raw: Optional[str]) -> Optional[int]: - if not raw or not raw.isdigit(): + if not raw or not raw.strip().isdigit(): + return None + try: + val = int(raw.strip()) + except ValueError: return None - val = int(raw) if abs(val) > 30000: return None return val @@ -318,6 +321,7 @@ def test_clean_small_int() -> None: assert clean_small_int("1200003") == None assert clean_small_int("-123") == None assert clean_small_int("48844") == None + assert clean_small_int("1990²") == None def doi_split_prefix(doi: str) -> str: -- cgit v1.2.3