From de0fb59f0e36d8079649feefb7592189d8f7c6ed Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Fri, 7 Aug 2020 20:05:29 -0700 Subject: release ES transform tweaks pass-through publisher_type from container extra metadata (ES field already existed; this is from newer chocula metadata) count arxiv and PMCID papers which haven't been crawled (by IA) as "dark", not "bright" --- python/fatcat_tools/transforms/elasticsearch.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'python/fatcat_tools/transforms') diff --git a/python/fatcat_tools/transforms/elasticsearch.py b/python/fatcat_tools/transforms/elasticsearch.py index 2eb18fbf..a618992c 100644 --- a/python/fatcat_tools/transforms/elasticsearch.py +++ b/python/fatcat_tools/transforms/elasticsearch.py @@ -162,6 +162,8 @@ def release_to_elasticsearch(entity, force_bool=True): if c_extra.get('country'): t['country_code'] = c_extra['country'] t['country_code_upper'] = c_extra['country'].upper() + if c_extra.get('publisher_type'): + t['publisher_type'] = c_extra['publisher_type'] # fall back to release-level container metadata if container not linked or # missing context @@ -297,11 +299,11 @@ def release_to_elasticsearch(entity, force_bool=True): t['in_shadows'] = in_shadows t['in_ia'] = bool(in_ia) - t['is_preserved'] = bool(is_preserved or in_ia or in_kbart or in_jstor) + t['is_preserved'] = bool(is_preserved or in_ia or in_kbart or in_jstor or t.get('pmcid') or t.get('arxiv_id')) - if in_ia or t.get('pmcid') or t.get('arxiv_id'): + if in_ia: t['preservation'] = 'bright' - elif in_kbart or in_jstor: + elif in_kbart or in_jstor or t.get('pmcid') or t.get('arxiv_id'): t['preservation'] = 'dark' elif in_shadows: t['preservation'] = 'shadows_only' -- cgit v1.2.3