diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-06-04 13:18:35 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-06-04 13:18:35 -0700 |
commit | 198db52d3a93a2b7d7cab0a4140c6402a14eca84 (patch) | |
tree | b34d79b605c0a79e0f875f5b0bd3944e72381cd1 /fatcat_scholar/transform.py | |
parent | 35ff62b6383ba07f9549edbb652f04fa69fb046c (diff) | |
download | fatcat-scholar-198db52d3a93a2b7d7cab0a4140c6402a14eca84.tar.gz fatcat-scholar-198db52d3a93a2b7d7cab0a4140c6402a14eca84.zip |
collapse pages by SIM issue
Diffstat (limited to 'fatcat_scholar/transform.py')
-rw-r--r-- | fatcat_scholar/transform.py | 3 |
1 files changed, 3 insertions, 0 deletions
diff --git a/fatcat_scholar/transform.py b/fatcat_scholar/transform.py index 3d47fb4..847cc6e 100644 --- a/fatcat_scholar/transform.py +++ b/fatcat_scholar/transform.py @@ -187,6 +187,7 @@ def transform_heavy(heavy: IntermediateBundle) -> Optional[ScholarDoc]: tags: List[str] = [] work_ident: Optional[str] = None + sim_issue: Optional[str] = None abstracts: List[ScholarAbstract] = [] fulltext: Optional[ScholarFulltext] = None primary_release: Optional[ReleaseEntity] = None @@ -199,6 +200,7 @@ def transform_heavy(heavy: IntermediateBundle) -> Optional[ScholarDoc]: if heavy.doc_type == DocType.sim_page: assert ia_sim is not None key = f"page_{ia_sim.issue_item}_{ia_sim.first_page}" + sim_issue = ia_sim.issue_item biblio = es_biblio_from_sim(heavy.sim_fulltext) fulltext = es_fulltext_from_sim(heavy.sim_fulltext) elif heavy.doc_type == DocType.work: @@ -316,6 +318,7 @@ def transform_heavy(heavy: IntermediateBundle) -> Optional[ScholarDoc]: return ScholarDoc( key=key, + collapse_key=sim_issue or work_ident, doc_type=heavy.doc_type.value, doc_index_ts=datetime.datetime.utcnow(), work_ident=work_ident, |