diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-06-29 22:13:17 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-06-29 22:13:19 -0700 |
commit | ddf41a84f6cde6d5489a291a39f026e7c2672b87 (patch) | |
tree | 939e433a3666b00bea9e5c42b455f1c3b82e272c /fatcat_scholar/schema.py | |
parent | ac1c97af86e4072cf898e46de61bea9a2bfe0b93 (diff) | |
download | fatcat-scholar-ddf41a84f6cde6d5489a291a39f026e7c2672b87.tar.gz fatcat-scholar-ddf41a84f6cde6d5489a291a39f026e7c2672b87.zip |
handle large/bad 'first_page' metadata
This was causing elasticsearch indexing errors
Diffstat (limited to 'fatcat_scholar/schema.py')
-rw-r--r-- | fatcat_scholar/schema.py | 3 |
1 files changed, 3 insertions, 0 deletions
diff --git a/fatcat_scholar/schema.py b/fatcat_scholar/schema.py index 2ee7d54..19f148b 100644 --- a/fatcat_scholar/schema.py +++ b/fatcat_scholar/schema.py @@ -314,6 +314,9 @@ def es_biblio_from_release(release: ReleaseEntity) -> ScholarBiblio: first_page_int: Optional[int] = None if first_page and first_page.isdigit(): first_page_int = int(first_page) + # catch metadata errors which result in ES indexing errors + if abs(first_page_int) > 1000000: + first_page_int = None ret = ScholarBiblio( release_ident=release.ident, |