From ddf41a84f6cde6d5489a291a39f026e7c2672b87 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Mon, 29 Jun 2020 22:13:17 -0700 Subject: handle large/bad 'first_page' metadata This was causing elasticsearch indexing errors --- fatcat_scholar/schema.py | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fatcat_scholar') diff --git a/fatcat_scholar/schema.py b/fatcat_scholar/schema.py index 2ee7d54..19f148b 100644 --- a/fatcat_scholar/schema.py +++ b/fatcat_scholar/schema.py @@ -314,6 +314,9 @@ def es_biblio_from_release(release: ReleaseEntity) -> ScholarBiblio: first_page_int: Optional[int] = None if first_page and first_page.isdigit(): first_page_int = int(first_page) + # catch metadata errors which result in ES indexing errors + if abs(first_page_int) > 1000000: + first_page_int = None ret = ScholarBiblio( release_ident=release.ident, -- cgit v1.2.3