From c56a5361e3c7ec19e8c461c48c4619e3b62e10fd Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Thu, 6 Aug 2020 16:21:30 -0700 Subject: don't index sim_page without issue_item and first_page --- fatcat_scholar/transform.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fatcat_scholar/transform.py b/fatcat_scholar/transform.py index e02043b..a238d81 100644 --- a/fatcat_scholar/transform.py +++ b/fatcat_scholar/transform.py @@ -216,6 +216,9 @@ def transform_heavy(heavy: IntermediateBundle) -> Optional[ScholarDoc]: if heavy.doc_type == DocType.sim_page: assert ia_sim is not None assert heavy.sim_fulltext is not None + if not ia_sim.first_page or not ia_sim.issue_item: + # can't create a valid key if we don't have these fields, so shouldn't index + return None key = f"page_{ia_sim.issue_item}_{ia_sim.first_page}" sim_issue = ia_sim.issue_item biblio = es_biblio_from_sim(heavy.sim_fulltext) -- cgit v1.2.3