aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorbnewbold <bnewbold@archive.org>2021-02-23 23:46:19 +0000
committerbnewbold <bnewbold@archive.org>2021-02-23 23:46:19 +0000
commitbebd90f8010efe4bc5fff0987fd32d66f438913c (patch)
tree4e98ffcdb1add11d642fe9e2ed72eab571105ba6
parent99e37fca2ad74cd3e48d09b12b0d8a005d0de59e (diff)
parentd31434324cfc1924d4d27b52dcc89f3b3e81ef19 (diff)
downloadfatcat-bebd90f8010efe4bc5fff0987fd32d66f438913c.tar.gz
fatcat-bebd90f8010efe4bc5fff0987fd32d66f438913c.zip
Merge branch 'bnewbold-es-schema' into 'master'
ES schema tweaks See merge request webgroup/fatcat!95
-rw-r--r--extra/elasticsearch/release_schema.json3
-rw-r--r--proposals/2020_elasticsearch_schemas.md2
-rw-r--r--python/fatcat_tools/transforms/elasticsearch.py4
3 files changed, 8 insertions, 1 deletions
diff --git a/extra/elasticsearch/release_schema.json b/extra/elasticsearch/release_schema.json
index 666a672f..e1f7a79a 100644
--- a/extra/elasticsearch/release_schema.json
+++ b/extra/elasticsearch/release_schema.json
@@ -72,6 +72,8 @@
"ark_id": { "type": "keyword", "normalizer": "default", "doc_values": false },
"mag_id": { "type": "keyword", "normalizer": "default", "doc_values": false },
"s2_id": { "type": "keyword", "normalizer": "default", "doc_values": false },
+ "dblp_id": { "type": "keyword", "normalizer": "default", "doc_values": false },
+ "doaj_id": { "type": "keyword", "normalizer": "default", "doc_values": false },
"license": { "type": "keyword", "normalizer": "default" },
"publisher": { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch" },
"publisher_type": { "type": "keyword", "normalizer": "default" },
@@ -103,6 +105,7 @@
"is_preserved": { "type": "boolean" },
"in_kbart": { "type": "boolean" },
"in_jstor": { "type": "boolean" },
+ "in_doaj": { "type": "boolean" },
"in_dweb": { "type": "boolean" },
"in_web": { "type": "boolean" },
"in_ia": { "type": "boolean" },
diff --git a/proposals/2020_elasticsearch_schemas.md b/proposals/2020_elasticsearch_schemas.md
index c3e79073..8dbff219 100644
--- a/proposals/2020_elasticsearch_schemas.md
+++ b/proposals/2020_elasticsearch_schemas.md
@@ -1,5 +1,5 @@
-status: planning
+status: implemented
This document tracks "easy" elasticsearch schema and behavior changes that
could be made while being backwards compatible with the current v0.3 schema and
diff --git a/python/fatcat_tools/transforms/elasticsearch.py b/python/fatcat_tools/transforms/elasticsearch.py
index db860a09..f37aadba 100644
--- a/python/fatcat_tools/transforms/elasticsearch.py
+++ b/python/fatcat_tools/transforms/elasticsearch.py
@@ -72,6 +72,8 @@ def release_to_elasticsearch(entity: ReleaseEntity, force_bool: bool = True) ->
jstor_id = release.ext_ids.jstor,
ark_id = release.ext_ids.ark,
mag_id = release.ext_ids.mag,
+ dblp_id = release.ext_ids.dblp,
+ doaj_id = release.ext_ids.doaj,
)
t.update(dict(
@@ -84,6 +86,7 @@ def release_to_elasticsearch(entity: ReleaseEntity, force_bool: bool = True) ->
in_ia_sim = False,
in_kbart = None,
in_jstor = False,
+ in_doaj= bool(release.ext_ids.doaj),
in_shadows = False,
))
@@ -280,6 +283,7 @@ def _rte_container_helper(container: ContainerEntity, release_year: Optional[int
if c_extra.get('doaj'):
if c_extra['doaj'].get('as_of'):
t['is_oa'] = True
+ t['in_doaj'] = True
if c_extra.get('road'):
if c_extra['road'].get('as_of'):
t['is_oa'] = True