From 88497e378e2e4c93906c7485865724133511d4e6 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 12 Oct 2021 20:05:02 -0700 Subject: python: implement ES schema changes --- python/fatcat_tools/transforms/elasticsearch.py | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/python/fatcat_tools/transforms/elasticsearch.py b/python/fatcat_tools/transforms/elasticsearch.py index 4c6ee2e2..f7c8e3f3 100644 --- a/python/fatcat_tools/transforms/elasticsearch.py +++ b/python/fatcat_tools/transforms/elasticsearch.py @@ -63,6 +63,7 @@ def release_to_elasticsearch(entity: ReleaseEntity, force_bool: bool = True) -> pages = release.pages, number = release.number, license = release.license_slug, + version = release.version, doi = release.ext_ids.doi, pmid = release.ext_ids.pmid, pmcid = release.ext_ids.pmcid, @@ -75,6 +76,8 @@ def release_to_elasticsearch(entity: ReleaseEntity, force_bool: bool = True) -> mag_id = release.ext_ids.mag, dblp_id = release.ext_ids.dblp, doaj_id = release.ext_ids.doaj, + hdl = release.ext_ids.hdl, + tags = [], ) t.update(dict( @@ -157,11 +160,14 @@ def release_to_elasticsearch(entity: ReleaseEntity, force_bool: bool = True) -> if release.license_slug.startswith("ARXIV-"): t['is_oa'] = True + t['is_work_alias'] = None extra = release.extra or dict() if extra: if extra.get('is_oa'): # NOTE: not actually setting this anywhere... but could t['is_oa'] = True + if extra.get('is_work_alias') != None: + t['is_work_alias'] = bool(extra.get('is_work_alias')) if extra.get('longtail_oa'): # sometimes set by GROBID/matcher t['is_oa'] = True @@ -220,7 +226,8 @@ def release_to_elasticsearch(entity: ReleaseEntity, force_bool: bool = True) -> # optionally coerce all flags from Optional[bool] to bool if force_bool: for k in ('is_oa', 'is_longtail_oa', 'in_kbart', 'in_ia_sim', - 'in_jstor', 'in_web', 'in_dweb', 'in_shadows'): + 'in_jstor', 'in_web', 'in_dweb', 'in_shadows', + 'is_work_alias'): t[k] = bool(t[k]) t['in_ia'] = bool(t['in_ia']) @@ -256,7 +263,11 @@ def _rte_container_helper(container: ContainerEntity, release_year: Optional[int # be a redirect involved t['container_id'] = container.ident t['container_issnl'] = container.issnl + issns = [container.issnl, container.issne, container.issnp] + issns = list(set([i for i in issns if i])) + t['container_issns'] = issns t['container_type'] = container.container_type + t['container_publication_status'] = container.publication_status if container.extra: c_extra = container.extra if c_extra.get('kbart') and release_year: @@ -406,7 +417,10 @@ def container_to_elasticsearch(entity, force_bool=True, stats=None): name = entity.name, publisher = entity.publisher, container_type = entity.container_type, + publication_status= entity.publication_status, issnl = entity.issnl, + issne = entity.issne, + issnp = entity.issnp, wikidata_qid = entity.wikidata_qid, ) @@ -424,12 +438,11 @@ def container_to_elasticsearch(entity, force_bool=True, stats=None): if 'country' in t: t['country_code'] = t.pop('country') - t['issns'] = [] - if entity.issnl: - t['issns'].append(entity.issnl) + t['issns'] = [entity.issnl, entity.issne, entity.issnp] for key in ('issnp', 'issne'): if entity.extra.get(key): t['issns'].append(entity.extra[key]) + t['issns'] = list(set([i for i in t['issns'] if i])) in_doaj = None in_road = None -- cgit v1.2.3