summaryrefslogtreecommitdiffstats
path: root/fatcat_scholar/schema.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-06-29 22:13:17 -0700
committerBryan Newbold <bnewbold@archive.org>2020-06-29 22:13:19 -0700
commitddf41a84f6cde6d5489a291a39f026e7c2672b87 (patch)
tree939e433a3666b00bea9e5c42b455f1c3b82e272c /fatcat_scholar/schema.py
parentac1c97af86e4072cf898e46de61bea9a2bfe0b93 (diff)
downloadfatcat-scholar-ddf41a84f6cde6d5489a291a39f026e7c2672b87.tar.gz
fatcat-scholar-ddf41a84f6cde6d5489a291a39f026e7c2672b87.zip
handle large/bad 'first_page' metadata
This was causing elasticsearch indexing errors
Diffstat (limited to 'fatcat_scholar/schema.py')
-rw-r--r--fatcat_scholar/schema.py3
1 files changed, 3 insertions, 0 deletions
diff --git a/fatcat_scholar/schema.py b/fatcat_scholar/schema.py
index 2ee7d54..19f148b 100644
--- a/fatcat_scholar/schema.py
+++ b/fatcat_scholar/schema.py
@@ -314,6 +314,9 @@ def es_biblio_from_release(release: ReleaseEntity) -> ScholarBiblio:
first_page_int: Optional[int] = None
if first_page and first_page.isdigit():
first_page_int = int(first_page)
+ # catch metadata errors which result in ES indexing errors
+ if abs(first_page_int) > 1000000:
+ first_page_int = None
ret = ScholarBiblio(
release_ident=release.ident,