diff options
-rw-r--r-- | fatcat_scholar/schema.py | 8 | ||||
-rw-r--r-- | fatcat_scholar/transform.py | 4 |
2 files changed, 8 insertions, 4 deletions
diff --git a/fatcat_scholar/schema.py b/fatcat_scholar/schema.py index bc6b016..0fcf56e 100644 --- a/fatcat_scholar/schema.py +++ b/fatcat_scholar/schema.py @@ -301,9 +301,12 @@ class RefTarget(BaseModel): def clean_small_int(raw: Optional[str]) -> Optional[int]: - if not raw or not raw.isdigit(): + if not raw or not raw.strip().isdigit(): + return None + try: + val = int(raw.strip()) + except ValueError: return None - val = int(raw) if abs(val) > 30000: return None return val @@ -318,6 +321,7 @@ def test_clean_small_int() -> None: assert clean_small_int("1200003") == None assert clean_small_int("-123") == None assert clean_small_int("48844") == None + assert clean_small_int("1990²") == None def doi_split_prefix(doi: str) -> str: diff --git a/fatcat_scholar/transform.py b/fatcat_scholar/transform.py index 13bedb9..3a7102a 100644 --- a/fatcat_scholar/transform.py +++ b/fatcat_scholar/transform.py @@ -798,8 +798,8 @@ def refs_from_crossref( ref_container_name = series_title year = ref.get("year") - if year and year.isdigit(): - year = int(year) + if year: + year = clean_small_int(year) else: year = None date = ref.get("date") |