aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2018-11-04 18:51:09 -0800
committerBryan Newbold <bnewbold@robocracy.org>2018-11-07 11:15:13 -0800
commitfb0b7a13435adb33c30cb5aaed388a77c44f52cf (patch)
tree985ddf5938d1ac52bb02614517d26df2a3e20d6d
parent669cb3fe63ee1d506a75cb9aea76bcbd14df31be (diff)
downloadfatcat-fb0b7a13435adb33c30cb5aaed388a77c44f52cf.tar.gz
fatcat-fb0b7a13435adb33c30cb5aaed388a77c44f52cf.zip
note elastic plugin needed
-rw-r--r--extra/elasticsearch/README.md10
-rw-r--r--extra/elasticsearch/release_schema.no-plugins.json42
2 files changed, 52 insertions, 0 deletions
diff --git a/extra/elasticsearch/README.md b/extra/elasticsearch/README.md
index c94c3109..d5ac0c53 100644
--- a/extra/elasticsearch/README.md
+++ b/extra/elasticsearch/README.md
@@ -25,6 +25,16 @@ relation is *removed*. For example, if a file match against a given release is
removed, the old release elastic object needs to be updated to remove the file
from it's `files`.
+## Setting Up Elasticsearch
+
+We use elasticsearch version 6.x, with the `analysis-icu` plugin installed:
+
+ sudo /usr/share/elasticsearch/bin/elasticsearch-plugin install analysis-icu
+ sudo service elasticsearch restart
+
+If you can't install this plugin easily (eg, in a docker container), you can
+use the `no-plugin` variant (though, warning, it might not be up to date).
+
## Loading Data
Drop and rebuild the schema:
diff --git a/extra/elasticsearch/release_schema.no-plugins.json b/extra/elasticsearch/release_schema.no-plugins.json
new file mode 100644
index 00000000..c879c176
--- /dev/null
+++ b/extra/elasticsearch/release_schema.no-plugins.json
@@ -0,0 +1,42 @@
+{
+"mappings": {
+ "release": {
+ "properties": {
+ "ident": { "type": "keyword" },
+ "revision": { "type": "keyword" },
+ "title": { "type": "text", "index": true },
+ "author": { "type": "alias", "path": "contrib_names" },
+ "journal": { "type": "alias", "path": "container_name" },
+ "date": { "type": "alias", "path": "release_date" },
+ "issn": { "type": "alias", "path": "container_issnl" },
+ "oa": { "type": "alias", "path": "container_is_oa" },
+ "longtail": { "type": "alias", "path": "container_is_longtail_oa" },
+ "release_date": { "type": "date" },
+ "release_type": { "type": "keyword" },
+ "release_status": { "type": "keyword" },
+ "language": { "type": "keyword" },
+ "doi": { "type": "keyword" },
+ "pmid": { "type": "keyword" },
+ "pmcid": { "type": "keyword" },
+ "isbn13": { "type": "keyword" },
+ "core_id": { "type": "keyword" },
+ "wikidata_qid": { "type": "keyword" },
+ "publisher": { "type": "text", "index": true },
+ "container_name": { "type": "text", "index": true },
+ "container_issnl": { "type": "keyword" },
+ "container_is_oa": { "type": "boolean" },
+ "container_is_longtail_oa": { "type": "boolean" },
+ "contrib_count": { "type": "integer" },
+ "contrib_names": { "type": "text", "index": true },
+ "ref_count": { "type": "integer" },
+ "file_count": { "type": "integer" },
+ "file_pdf_url": { "type": "keyword" },
+ "file_in_webarchive": { "type": "boolean" },
+ "file_in_ia": { "type": "boolean" },
+ "any_abstract": { "type": "boolean" },
+ "is_kept": { "type": "boolean" },
+ "in_shadow": { "type": "boolean" }
+ }
+ }
+}
+}