diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-08-06 16:21:30 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-08-06 19:01:12 -0700 |
commit | c56a5361e3c7ec19e8c461c48c4619e3b62e10fd (patch) | |
tree | e98456af5198c6edcf4257913bd534ab16bda373 /fatcat_scholar/transform.py | |
parent | 45a33f49d111d929d4b24321af94060b1942f7ba (diff) | |
download | fatcat-scholar-c56a5361e3c7ec19e8c461c48c4619e3b62e10fd.tar.gz fatcat-scholar-c56a5361e3c7ec19e8c461c48c4619e3b62e10fd.zip |
don't index sim_page without issue_item and first_page
Diffstat (limited to 'fatcat_scholar/transform.py')
-rw-r--r-- | fatcat_scholar/transform.py | 3 |
1 files changed, 3 insertions, 0 deletions
diff --git a/fatcat_scholar/transform.py b/fatcat_scholar/transform.py index e02043b..a238d81 100644 --- a/fatcat_scholar/transform.py +++ b/fatcat_scholar/transform.py @@ -216,6 +216,9 @@ def transform_heavy(heavy: IntermediateBundle) -> Optional[ScholarDoc]: if heavy.doc_type == DocType.sim_page: assert ia_sim is not None assert heavy.sim_fulltext is not None + if not ia_sim.first_page or not ia_sim.issue_item: + # can't create a valid key if we don't have these fields, so shouldn't index + return None key = f"page_{ia_sim.issue_item}_{ia_sim.first_page}" sim_issue = ia_sim.issue_item biblio = es_biblio_from_sim(heavy.sim_fulltext) |