From 0591b388a55771ed0581102989e23ba22a720a27 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Fri, 22 Jan 2021 19:23:51 -0800 Subject: add container_sherpa_color field, and populate it --- fatcat_scholar/schema.py | 36 ++++++++++++++++++------------------ fatcat_scholar/transform.py | 1 + schema/scholar_fulltext.v01.json | 1 + 3 files changed, 20 insertions(+), 18 deletions(-) diff --git a/fatcat_scholar/schema.py b/fatcat_scholar/schema.py index 480b8fa..434f735 100644 --- a/fatcat_scholar/schema.py +++ b/fatcat_scholar/schema.py @@ -99,6 +99,7 @@ class ScholarBiblio(BaseModel): container_ident: Optional[str] container_issnl: Optional[str] container_wikidata_qid: Optional[str] + container_sherpa_color: Optional[str] issns: List[str] container_type: Optional[str] contrib_count: Optional[int] @@ -469,38 +470,36 @@ def es_abstracts_from_release(release: ReleaseEntity) -> List[ScholarAbstract]: def es_biblio_from_release(release: ReleaseEntity) -> ScholarBiblio: + container_name = release.extra and release.extra.get("container_name") + if release.container: - publisher = release.container.publisher or release.publisher - publisher_type = release.container.extra and release.container.extra.get( - "publisher_type", None - ) - if not publisher_type or not isinstance(publisher_type, str): - publisher_type = None - container_name = release.container.name - container_original_name = ( - release.container.extra and release.container.extra.get("original_name") - ) - if not container_original_name or not isinstance(container_original_name, str): - container_original_name = None + publisher = release.publisher or release.container.publisher + container_name = container_name or release.container.name container_ident = release.container.ident container_type = release.container.container_type container_issnl = release.container.issnl issns = [] if container_issnl: issns.append(container_issnl) - if release.container.extra and release.container.extra.get("issne"): - issns.append(release.container.extra["issne"]) - if release.container.extra and release.container.extra.get("issnp"): - issns.append(release.container.extra["issnp"]) + publisher_type = None + container_original_name = None + if release.container.extra: + publisher_type = release.container.extra.get("publisher_type") + container_original_name = release.container.extra.get("original_name") + container_sherpa_color = release.container.extra.get("sherpa_romeo", {}).get("color") + if release.container.extra.get("issne"): + issns.append(release.container.extra["issne"]) + if release.container.extra.get("issnp"): + issns.append(release.container.extra["issnp"]) issns = list(set(issns)) else: - publisher = release.extra and release.extra.get("publisher") publisher_type = None - container_name = release.extra and release.extra.get("container_name") + publisher = release.publisher container_original_name = None container_ident = None container_type = None container_issnl = None + container_sherpa_color = None issns = [] first_page: Optional[str] = None @@ -547,6 +546,7 @@ def es_biblio_from_release(release: ReleaseEntity) -> ScholarBiblio: container_ident=container_ident, container_type=container_type, container_issnl=container_issnl, + container_sherpa_color=container_sherpa_color, issns=issns, # TODO; these filters sort of meh. refactor to be above? contrib_names=list( diff --git a/fatcat_scholar/transform.py b/fatcat_scholar/transform.py index 6c2df90..ba79e50 100644 --- a/fatcat_scholar/transform.py +++ b/fatcat_scholar/transform.py @@ -168,6 +168,7 @@ def es_biblio_from_sim(sim: Dict[str, Any]) -> ScholarBiblio: container_ident=None, # TODO container_type=None, # TODO container_issnl=None, # TODO + # container_sherpa_color issns=issns, # no contrib/affiliation info contrib_names=[], diff --git a/schema/scholar_fulltext.v01.json b/schema/scholar_fulltext.v01.json index 91afdc1..9e99026 100644 --- a/schema/scholar_fulltext.v01.json +++ b/schema/scholar_fulltext.v01.json @@ -118,6 +118,7 @@ "container_ident": { "type": "keyword", "normalizer": "default" }, "container_issnl": { "type": "keyword", "normalizer": "default" }, "container_wikidata_qid": { "type": "keyword", "normalizer": "default" }, + "container_sherpa_color": { "type": "keyword", "normalizer": "default" }, "issns": { "type": "keyword", "normalizer": "default" }, "container_type": { "type": "keyword", "normalizer": "default" }, "contrib_count": { "type": "short" }, -- cgit v1.2.3