From 9946a6a02d879b8d7a9ee1c141801a33f4f2aa57 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Thu, 8 Apr 2021 15:10:24 -0700 Subject: ES: rename fatcat_ref.json to ref_schema.json for consistency; add to README --- extra/elasticsearch/README.md | 5 +- extra/elasticsearch/fatcat_ref.json | 111 ------------------------------------ extra/elasticsearch/ref_schema.json | 111 ++++++++++++++++++++++++++++++++++++ 3 files changed, 115 insertions(+), 112 deletions(-) delete mode 100644 extra/elasticsearch/fatcat_ref.json create mode 100644 extra/elasticsearch/ref_schema.json diff --git a/extra/elasticsearch/README.md b/extra/elasticsearch/README.md index 196ac588..d2a34a24 100644 --- a/extra/elasticsearch/README.md +++ b/extra/elasticsearch/README.md @@ -27,7 +27,8 @@ from it's `files`. ## Setting Up Elasticsearch -We use elasticsearch version 6.x, with the `analysis-icu` plugin installed: +We use elasticsearch version 7.10 (the final "open source" release), with the +`analysis-icu` plugin installed: sudo /usr/share/elasticsearch/bin/elasticsearch-plugin install analysis-icu sudo service elasticsearch restart @@ -42,10 +43,12 @@ Drop and rebuild the schema: http delete :9200/fatcat_container http delete :9200/fatcat_file http delete :9200/fatcat_changelog + http delete :9200/fatcat_ref http put :9200/fatcat_release?include_type_name=true < release_schema.json http put :9200/fatcat_container?include_type_name=true < container_schema.json http put :9200/fatcat_file?include_type_name=true < file_schema.json http put :9200/fatcat_changelog?include_type_name=true < changelog_schema.json + http put :9200/fatcat_ref?include_type_name=true < ref_schema.json Put a single object (good for debugging): diff --git a/extra/elasticsearch/fatcat_ref.json b/extra/elasticsearch/fatcat_ref.json deleted file mode 100644 index 32c7a3cd..00000000 --- a/extra/elasticsearch/fatcat_ref.json +++ /dev/null @@ -1,111 +0,0 @@ -{ - "settings": { - "index": { - "number_of_shards": 12, - "number_of_replicas": 0, - "analysis": { - "normalizer": { - "default": { - "type": "custom", - "char_filter": [], - "filter": [ - "lowercase" - ] - }, - "caseSensitive": { - "type": "custom", - "char_filter": [], - "filter": [] - } - } - } - } - }, - "mappings": { - "properties": { - "indexed_ts": { - "type": "date" - }, - "source_release_ident": { - "type": "keyword", - "normalizer": "default" - }, - "source_work_ident": { - "type": "keyword", - "normalizer": "default" - }, - "source_wikipedia_article": { - "type": "keyword", - "normalizer": "default" - }, - "source_release_stage": { - "type": "keyword", - "normalizer": "default", - "doc_values": false - }, - "source_release_year": { - "type": "integer" - }, - "ref_index": { - "type": "integer", - "doc_values": false, - "index": false - }, - "ref_key": { - "type": "keyword", - "normalizer": "default", - "doc_values": false, - "index": false - }, - "ref_locator": { - "type": "keyword", - "normalizer": "default", - "doc_values": false, - "index": false - }, - "target_release_ident": { - "type": "keyword", - "normalizer": "default" - }, - "target_work_ident": { - "type": "keyword", - "normalizer": "default" - }, - "target_openlibrary_work": { - "type": "keyword", - "normalizer": "default" - }, - "target_url_surt": { - "type": "keyword", - "normalizer": "default", - "doc_values": false - }, - "match_provenance": { - "type": "keyword", - "normalizer": "default" - }, - "match_status": { - "type": "keyword", - "normalizer": "default" - }, - "match_reason": { - "type": "keyword", - "normalizer": "default" - }, - "target_unstructured": { - "type": "text", - "doc_values": false, - "index": false - }, - "target_csl": { - "type": "object", - "enabled": false - }, - - "doc_index_ts": { - "type": "alias", - "path": "indexed_ts" - } - } - } -} diff --git a/extra/elasticsearch/ref_schema.json b/extra/elasticsearch/ref_schema.json new file mode 100644 index 00000000..32c7a3cd --- /dev/null +++ b/extra/elasticsearch/ref_schema.json @@ -0,0 +1,111 @@ +{ + "settings": { + "index": { + "number_of_shards": 12, + "number_of_replicas": 0, + "analysis": { + "normalizer": { + "default": { + "type": "custom", + "char_filter": [], + "filter": [ + "lowercase" + ] + }, + "caseSensitive": { + "type": "custom", + "char_filter": [], + "filter": [] + } + } + } + } + }, + "mappings": { + "properties": { + "indexed_ts": { + "type": "date" + }, + "source_release_ident": { + "type": "keyword", + "normalizer": "default" + }, + "source_work_ident": { + "type": "keyword", + "normalizer": "default" + }, + "source_wikipedia_article": { + "type": "keyword", + "normalizer": "default" + }, + "source_release_stage": { + "type": "keyword", + "normalizer": "default", + "doc_values": false + }, + "source_release_year": { + "type": "integer" + }, + "ref_index": { + "type": "integer", + "doc_values": false, + "index": false + }, + "ref_key": { + "type": "keyword", + "normalizer": "default", + "doc_values": false, + "index": false + }, + "ref_locator": { + "type": "keyword", + "normalizer": "default", + "doc_values": false, + "index": false + }, + "target_release_ident": { + "type": "keyword", + "normalizer": "default" + }, + "target_work_ident": { + "type": "keyword", + "normalizer": "default" + }, + "target_openlibrary_work": { + "type": "keyword", + "normalizer": "default" + }, + "target_url_surt": { + "type": "keyword", + "normalizer": "default", + "doc_values": false + }, + "match_provenance": { + "type": "keyword", + "normalizer": "default" + }, + "match_status": { + "type": "keyword", + "normalizer": "default" + }, + "match_reason": { + "type": "keyword", + "normalizer": "default" + }, + "target_unstructured": { + "type": "text", + "doc_values": false, + "index": false + }, + "target_csl": { + "type": "object", + "enabled": false + }, + + "doc_index_ts": { + "type": "alias", + "path": "indexed_ts" + } + } + } +} -- cgit v1.2.3