summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-01-22 19:23:51 -0800
committerBryan Newbold <bnewbold@archive.org>2021-01-22 19:23:51 -0800
commit0591b388a55771ed0581102989e23ba22a720a27 (patch)
treeb84726f03fa2dd0a4967cc1d27230de7cac6da32
parent4b77e914b6ceb8aab7d5be5ecca9f70472ba9b28 (diff)
downloadfatcat-scholar-0591b388a55771ed0581102989e23ba22a720a27.tar.gz
fatcat-scholar-0591b388a55771ed0581102989e23ba22a720a27.zip
add container_sherpa_color field, and populate it
-rw-r--r--fatcat_scholar/schema.py36
-rw-r--r--fatcat_scholar/transform.py1
-rw-r--r--schema/scholar_fulltext.v01.json1
3 files changed, 20 insertions, 18 deletions
diff --git a/fatcat_scholar/schema.py b/fatcat_scholar/schema.py
index 480b8fa..434f735 100644
--- a/fatcat_scholar/schema.py
+++ b/fatcat_scholar/schema.py
@@ -99,6 +99,7 @@ class ScholarBiblio(BaseModel):
container_ident: Optional[str]
container_issnl: Optional[str]
container_wikidata_qid: Optional[str]
+ container_sherpa_color: Optional[str]
issns: List[str]
container_type: Optional[str]
contrib_count: Optional[int]
@@ -469,38 +470,36 @@ def es_abstracts_from_release(release: ReleaseEntity) -> List[ScholarAbstract]:
def es_biblio_from_release(release: ReleaseEntity) -> ScholarBiblio:
+ container_name = release.extra and release.extra.get("container_name")
+
if release.container:
- publisher = release.container.publisher or release.publisher
- publisher_type = release.container.extra and release.container.extra.get(
- "publisher_type", None
- )
- if not publisher_type or not isinstance(publisher_type, str):
- publisher_type = None
- container_name = release.container.name
- container_original_name = (
- release.container.extra and release.container.extra.get("original_name")
- )
- if not container_original_name or not isinstance(container_original_name, str):
- container_original_name = None
+ publisher = release.publisher or release.container.publisher
+ container_name = container_name or release.container.name
container_ident = release.container.ident
container_type = release.container.container_type
container_issnl = release.container.issnl
issns = []
if container_issnl:
issns.append(container_issnl)
- if release.container.extra and release.container.extra.get("issne"):
- issns.append(release.container.extra["issne"])
- if release.container.extra and release.container.extra.get("issnp"):
- issns.append(release.container.extra["issnp"])
+ publisher_type = None
+ container_original_name = None
+ if release.container.extra:
+ publisher_type = release.container.extra.get("publisher_type")
+ container_original_name = release.container.extra.get("original_name")
+ container_sherpa_color = release.container.extra.get("sherpa_romeo", {}).get("color")
+ if release.container.extra.get("issne"):
+ issns.append(release.container.extra["issne"])
+ if release.container.extra.get("issnp"):
+ issns.append(release.container.extra["issnp"])
issns = list(set(issns))
else:
- publisher = release.extra and release.extra.get("publisher")
publisher_type = None
- container_name = release.extra and release.extra.get("container_name")
+ publisher = release.publisher
container_original_name = None
container_ident = None
container_type = None
container_issnl = None
+ container_sherpa_color = None
issns = []
first_page: Optional[str] = None
@@ -547,6 +546,7 @@ def es_biblio_from_release(release: ReleaseEntity) -> ScholarBiblio:
container_ident=container_ident,
container_type=container_type,
container_issnl=container_issnl,
+ container_sherpa_color=container_sherpa_color,
issns=issns,
# TODO; these filters sort of meh. refactor to be above?
contrib_names=list(
diff --git a/fatcat_scholar/transform.py b/fatcat_scholar/transform.py
index 6c2df90..ba79e50 100644
--- a/fatcat_scholar/transform.py
+++ b/fatcat_scholar/transform.py
@@ -168,6 +168,7 @@ def es_biblio_from_sim(sim: Dict[str, Any]) -> ScholarBiblio:
container_ident=None, # TODO
container_type=None, # TODO
container_issnl=None, # TODO
+ # container_sherpa_color
issns=issns,
# no contrib/affiliation info
contrib_names=[],
diff --git a/schema/scholar_fulltext.v01.json b/schema/scholar_fulltext.v01.json
index 91afdc1..9e99026 100644
--- a/schema/scholar_fulltext.v01.json
+++ b/schema/scholar_fulltext.v01.json
@@ -118,6 +118,7 @@
"container_ident": { "type": "keyword", "normalizer": "default" },
"container_issnl": { "type": "keyword", "normalizer": "default" },
"container_wikidata_qid": { "type": "keyword", "normalizer": "default" },
+ "container_sherpa_color": { "type": "keyword", "normalizer": "default" },
"issns": { "type": "keyword", "normalizer": "default" },
"container_type": { "type": "keyword", "normalizer": "default" },
"contrib_count": { "type": "short" },