From 198db52d3a93a2b7d7cab0a4140c6402a14eca84 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Thu, 4 Jun 2020 13:18:35 -0700 Subject: collapse pages by SIM issue --- fatcat_scholar/transform.py | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fatcat_scholar/transform.py') diff --git a/fatcat_scholar/transform.py b/fatcat_scholar/transform.py index 3d47fb4..847cc6e 100644 --- a/fatcat_scholar/transform.py +++ b/fatcat_scholar/transform.py @@ -187,6 +187,7 @@ def transform_heavy(heavy: IntermediateBundle) -> Optional[ScholarDoc]: tags: List[str] = [] work_ident: Optional[str] = None + sim_issue: Optional[str] = None abstracts: List[ScholarAbstract] = [] fulltext: Optional[ScholarFulltext] = None primary_release: Optional[ReleaseEntity] = None @@ -199,6 +200,7 @@ def transform_heavy(heavy: IntermediateBundle) -> Optional[ScholarDoc]: if heavy.doc_type == DocType.sim_page: assert ia_sim is not None key = f"page_{ia_sim.issue_item}_{ia_sim.first_page}" + sim_issue = ia_sim.issue_item biblio = es_biblio_from_sim(heavy.sim_fulltext) fulltext = es_fulltext_from_sim(heavy.sim_fulltext) elif heavy.doc_type == DocType.work: @@ -316,6 +318,7 @@ def transform_heavy(heavy: IntermediateBundle) -> Optional[ScholarDoc]: return ScholarDoc( key=key, + collapse_key=sim_issue or work_ident, doc_type=heavy.doc_type.value, doc_index_ts=datetime.datetime.utcnow(), work_ident=work_ident, -- cgit v1.2.3