summaryrefslogtreecommitdiffstats
path: root/extra/elasticsearch
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2018-11-04 18:51:09 -0800
committerBryan Newbold <bnewbold@robocracy.org>2018-11-04 18:51:09 -0800
commit66953b184d9b664e15cd7d7fddcb250c2b767df5 (patch)
treee9841cbc26d6a0d21e9b236dcffd474632c66673 /extra/elasticsearch
parent881b46e3b1682974f48fc196f483c3fa2648b998 (diff)
downloadfatcat-66953b184d9b664e15cd7d7fddcb250c2b767df5.tar.gz
fatcat-66953b184d9b664e15cd7d7fddcb250c2b767df5.zip
note elastic plugin needed
Diffstat (limited to 'extra/elasticsearch')
-rw-r--r--extra/elasticsearch/README.md10
-rw-r--r--extra/elasticsearch/release_schema.no-plugins.json42
2 files changed, 52 insertions, 0 deletions
diff --git a/extra/elasticsearch/README.md b/extra/elasticsearch/README.md
index c94c3109..d5ac0c53 100644
--- a/extra/elasticsearch/README.md
+++ b/extra/elasticsearch/README.md
@@ -25,6 +25,16 @@ relation is *removed*. For example, if a file match against a given release is
removed, the old release elastic object needs to be updated to remove the file
from it's `files`.
+## Setting Up Elasticsearch
+
+We use elasticsearch version 6.x, with the `analysis-icu` plugin installed:
+
+ sudo /usr/share/elasticsearch/bin/elasticsearch-plugin install analysis-icu
+ sudo service elasticsearch restart
+
+If you can't install this plugin easily (eg, in a docker container), you can
+use the `no-plugin` variant (though, warning, it might not be up to date).
+
## Loading Data
Drop and rebuild the schema:
diff --git a/extra/elasticsearch/release_schema.no-plugins.json b/extra/elasticsearch/release_schema.no-plugins.json
new file mode 100644
index 00000000..c879c176
--- /dev/null
+++ b/extra/elasticsearch/release_schema.no-plugins.json
@@ -0,0 +1,42 @@
+{
+"mappings": {
+ "release": {
+ "properties": {
+ "ident": { "type": "keyword" },
+ "revision": { "type": "keyword" },
+ "title": { "type": "text", "index": true },
+ "author": { "type": "alias", "path": "contrib_names" },
+ "journal": { "type": "alias", "path": "container_name" },
+ "date": { "type": "alias", "path": "release_date" },
+ "issn": { "type": "alias", "path": "container_issnl" },
+ "oa": { "type": "alias", "path": "container_is_oa" },
+ "longtail": { "type": "alias", "path": "container_is_longtail_oa" },
+ "release_date": { "type": "date" },
+ "release_type": { "type": "keyword" },
+ "release_status": { "type": "keyword" },
+ "language": { "type": "keyword" },
+ "doi": { "type": "keyword" },
+ "pmid": { "type": "keyword" },
+ "pmcid": { "type": "keyword" },
+ "isbn13": { "type": "keyword" },
+ "core_id": { "type": "keyword" },
+ "wikidata_qid": { "type": "keyword" },
+ "publisher": { "type": "text", "index": true },
+ "container_name": { "type": "text", "index": true },
+ "container_issnl": { "type": "keyword" },
+ "container_is_oa": { "type": "boolean" },
+ "container_is_longtail_oa": { "type": "boolean" },
+ "contrib_count": { "type": "integer" },
+ "contrib_names": { "type": "text", "index": true },
+ "ref_count": { "type": "integer" },
+ "file_count": { "type": "integer" },
+ "file_pdf_url": { "type": "keyword" },
+ "file_in_webarchive": { "type": "boolean" },
+ "file_in_ia": { "type": "boolean" },
+ "any_abstract": { "type": "boolean" },
+ "is_kept": { "type": "boolean" },
+ "in_shadow": { "type": "boolean" }
+ }
+ }
+}
+}