diff options
author | bnewbold <bnewbold@archive.org> | 2021-02-23 23:46:19 +0000 |
---|---|---|
committer | bnewbold <bnewbold@archive.org> | 2021-02-23 23:46:19 +0000 |
commit | bebd90f8010efe4bc5fff0987fd32d66f438913c (patch) | |
tree | 4e98ffcdb1add11d642fe9e2ed72eab571105ba6 | |
parent | 99e37fca2ad74cd3e48d09b12b0d8a005d0de59e (diff) | |
parent | d31434324cfc1924d4d27b52dcc89f3b3e81ef19 (diff) | |
download | fatcat-bebd90f8010efe4bc5fff0987fd32d66f438913c.tar.gz fatcat-bebd90f8010efe4bc5fff0987fd32d66f438913c.zip |
Merge branch 'bnewbold-es-schema' into 'master'
ES schema tweaks
See merge request webgroup/fatcat!95
-rw-r--r-- | extra/elasticsearch/release_schema.json | 3 | ||||
-rw-r--r-- | proposals/2020_elasticsearch_schemas.md | 2 | ||||
-rw-r--r-- | python/fatcat_tools/transforms/elasticsearch.py | 4 |
3 files changed, 8 insertions, 1 deletions
diff --git a/extra/elasticsearch/release_schema.json b/extra/elasticsearch/release_schema.json index 666a672f..e1f7a79a 100644 --- a/extra/elasticsearch/release_schema.json +++ b/extra/elasticsearch/release_schema.json @@ -72,6 +72,8 @@ "ark_id": { "type": "keyword", "normalizer": "default", "doc_values": false }, "mag_id": { "type": "keyword", "normalizer": "default", "doc_values": false }, "s2_id": { "type": "keyword", "normalizer": "default", "doc_values": false }, + "dblp_id": { "type": "keyword", "normalizer": "default", "doc_values": false }, + "doaj_id": { "type": "keyword", "normalizer": "default", "doc_values": false }, "license": { "type": "keyword", "normalizer": "default" }, "publisher": { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch" }, "publisher_type": { "type": "keyword", "normalizer": "default" }, @@ -103,6 +105,7 @@ "is_preserved": { "type": "boolean" }, "in_kbart": { "type": "boolean" }, "in_jstor": { "type": "boolean" }, + "in_doaj": { "type": "boolean" }, "in_dweb": { "type": "boolean" }, "in_web": { "type": "boolean" }, "in_ia": { "type": "boolean" }, diff --git a/proposals/2020_elasticsearch_schemas.md b/proposals/2020_elasticsearch_schemas.md index c3e79073..8dbff219 100644 --- a/proposals/2020_elasticsearch_schemas.md +++ b/proposals/2020_elasticsearch_schemas.md @@ -1,5 +1,5 @@ -status: planning +status: implemented This document tracks "easy" elasticsearch schema and behavior changes that could be made while being backwards compatible with the current v0.3 schema and diff --git a/python/fatcat_tools/transforms/elasticsearch.py b/python/fatcat_tools/transforms/elasticsearch.py index db860a09..f37aadba 100644 --- a/python/fatcat_tools/transforms/elasticsearch.py +++ b/python/fatcat_tools/transforms/elasticsearch.py @@ -72,6 +72,8 @@ def release_to_elasticsearch(entity: ReleaseEntity, force_bool: bool = True) -> jstor_id = release.ext_ids.jstor, ark_id = release.ext_ids.ark, mag_id = release.ext_ids.mag, + dblp_id = release.ext_ids.dblp, + doaj_id = release.ext_ids.doaj, ) t.update(dict( @@ -84,6 +86,7 @@ def release_to_elasticsearch(entity: ReleaseEntity, force_bool: bool = True) -> in_ia_sim = False, in_kbart = None, in_jstor = False, + in_doaj= bool(release.ext_ids.doaj), in_shadows = False, )) @@ -280,6 +283,7 @@ def _rte_container_helper(container: ContainerEntity, release_year: Optional[int if c_extra.get('doaj'): if c_extra['doaj'].get('as_of'): t['is_oa'] = True + t['in_doaj'] = True if c_extra.get('road'): if c_extra['road'].get('as_of'): t['is_oa'] = True |