aboutsummaryrefslogtreecommitdiffstats
path: root/fatcat_scholar/transform.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-06-04 13:18:35 -0700
committerBryan Newbold <bnewbold@archive.org>2020-06-04 13:18:35 -0700
commit198db52d3a93a2b7d7cab0a4140c6402a14eca84 (patch)
treeb34d79b605c0a79e0f875f5b0bd3944e72381cd1 /fatcat_scholar/transform.py
parent35ff62b6383ba07f9549edbb652f04fa69fb046c (diff)
downloadfatcat-scholar-198db52d3a93a2b7d7cab0a4140c6402a14eca84.tar.gz
fatcat-scholar-198db52d3a93a2b7d7cab0a4140c6402a14eca84.zip
collapse pages by SIM issue
Diffstat (limited to 'fatcat_scholar/transform.py')
-rw-r--r--fatcat_scholar/transform.py3
1 files changed, 3 insertions, 0 deletions
diff --git a/fatcat_scholar/transform.py b/fatcat_scholar/transform.py
index 3d47fb4..847cc6e 100644
--- a/fatcat_scholar/transform.py
+++ b/fatcat_scholar/transform.py
@@ -187,6 +187,7 @@ def transform_heavy(heavy: IntermediateBundle) -> Optional[ScholarDoc]:
tags: List[str] = []
work_ident: Optional[str] = None
+ sim_issue: Optional[str] = None
abstracts: List[ScholarAbstract] = []
fulltext: Optional[ScholarFulltext] = None
primary_release: Optional[ReleaseEntity] = None
@@ -199,6 +200,7 @@ def transform_heavy(heavy: IntermediateBundle) -> Optional[ScholarDoc]:
if heavy.doc_type == DocType.sim_page:
assert ia_sim is not None
key = f"page_{ia_sim.issue_item}_{ia_sim.first_page}"
+ sim_issue = ia_sim.issue_item
biblio = es_biblio_from_sim(heavy.sim_fulltext)
fulltext = es_fulltext_from_sim(heavy.sim_fulltext)
elif heavy.doc_type == DocType.work:
@@ -316,6 +318,7 @@ def transform_heavy(heavy: IntermediateBundle) -> Optional[ScholarDoc]:
return ScholarDoc(
key=key,
+ collapse_key=sim_issue or work_ident,
doc_type=heavy.doc_type.value,
doc_index_ts=datetime.datetime.utcnow(),
work_ident=work_ident,