summaryrefslogtreecommitdiffstats
path: root/extra/elasticsearch
diff options
context:
space:
mode:
Diffstat (limited to 'extra/elasticsearch')
-rw-r--r--extra/elasticsearch/changelog_schema.json39
-rw-r--r--extra/elasticsearch/container_schema.json74
-rw-r--r--extra/elasticsearch/release_schema.json62
3 files changed, 153 insertions, 22 deletions
diff --git a/extra/elasticsearch/changelog_schema.json b/extra/elasticsearch/changelog_schema.json
new file mode 100644
index 00000000..7a7ec90c
--- /dev/null
+++ b/extra/elasticsearch/changelog_schema.json
@@ -0,0 +1,39 @@
+{
+"settings": {
+ "index": {
+ "analysis": {
+ "analyzer": {
+ "default": {
+ "type": "custom",
+ "tokenizer": "standard",
+ "filter": [ "lowercase", "asciifolding" ]
+ }
+ }
+ }
+ }
+},
+"mappings": {
+ "changelog": {
+ "properties": {
+ "index": { "type": "integer" },
+ "editgorup_id": { "type": "keyword" },
+ "timestamp": { "type": "datetime" },
+ "username": { "type": "keyword" },
+ "is_bot": { "type": "boolean" },
+ "is_admin": { "type": "boolean" },
+ "agent": { "type": "keyword" },
+ "containers": { "type": "integer" },
+ "creators": { "type": "integer" },
+ "files": { "type": "integer" },
+ "filessets": { "type": "integer" },
+ "webcaptures": { "type": "integer" },
+ "releases": { "type": "integer" },
+ "works": { "type": "integer" },
+ "created": { "type": "integer" },
+ "updated": { "type": "integer" },
+ "deleted": { "type": "integer" },
+ "total": { "type": "integer" }
+ }
+ }
+}
+}
diff --git a/extra/elasticsearch/container_schema.json b/extra/elasticsearch/container_schema.json
new file mode 100644
index 00000000..83791ab8
--- /dev/null
+++ b/extra/elasticsearch/container_schema.json
@@ -0,0 +1,74 @@
+{
+"settings": {
+ "index": {
+ "analysis": {
+ "analyzer": {
+ "default": {
+ "type": "custom",
+ "tokenizer": "standard",
+ "filter": [ "lowercase", "asciifolding" ]
+ },
+ "textIcu": {
+ "type": "custom",
+ "tokenizer": "icu_tokenizer",
+ "char_filter": [ "icu_normalizer" ],
+ "filter": [ "icu_folding" ]
+ },
+ "textIcuSearch": {
+ "type": "custom",
+ "tokenizer": "icu_tokenizer",
+ "char_filter": [ "icu_normalizer" ],
+ "filter": [ "icu_folding" ]
+ }
+ }
+ }
+ }
+},
+"mappings": {
+ "container": {
+ "properties": {
+ "ident": { "type": "keyword" },
+ "state": { "type": "keyword" },
+ "revision": { "type": "keyword" },
+ "name": { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch" },
+ "publisher": { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch" },
+ "container_type": { "type": "keyword" },
+ "wikidata_qid": { "type": "keyword" },
+ "issnl": { "type": "keyword" },
+ "region": { "type": "keyword" },
+ "nation": { "type": "keyword" },
+ "discipline": { "type": "keyword" },
+ "languages": { "type": "keyword" },
+ "mimetypes": { "type": "keyword" },
+ "first_year": { "type": "integer" },
+ "last_year": { "type": "integer" },
+
+ "in_doaj": { "type": "boolean" },
+ "in_road": { "type": "boolean" },
+ "in_doi": { "type": "boolean" },
+ "in_doaj_works": { "type": "boolean" },
+ "in_sherpa_romeo":{ "type": "boolean" },
+ "is_oa": { "type": "boolean" },
+ "is_longtail_oa": { "type": "boolean" },
+ "any_kbart": { "type": "boolean" },
+ "any_jstor": { "type": "boolean" },
+ "any_sim": { "type": "boolean" },
+ "ia_homepage_status": { "type": "boolean" },
+
+ "releases_total": { "type": "integer" },
+ "releases_kbart": { "type": "integer" },
+ "releases_ia": { "type": "integer" },
+ "releases_sim": { "type": "integer" },
+ "releases_shadow": { "type": "integer" },
+ "releases_any_file": { "type": "integer" },
+ "releases_any_fileset": { "type": "integer" },
+ "releases_any_webcapture": { "type": "integer" },
+
+ "year": { "type": "alias", "path": "first_year" },
+ "type": { "type": "alias", "path": "container_type" },
+ "oa": { "type": "alias", "path": "is_oa" },
+ "longtail": { "type": "alias", "path": "is_longtail_oa" }
+ }
+ }
+}
+}
diff --git a/extra/elasticsearch/release_schema.json b/extra/elasticsearch/release_schema.json
index c9b77301..4cfe0894 100644
--- a/extra/elasticsearch/release_schema.json
+++ b/extra/elasticsearch/release_schema.json
@@ -28,41 +28,59 @@
"release": {
"properties": {
"ident": { "type": "keyword" },
+ "state": { "type": "keyword" },
"revision": { "type": "keyword" },
"title": { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch" },
- "author": { "type": "alias", "path": "contrib_names" },
- "journal": { "type": "alias", "path": "container_name" },
- "date": { "type": "alias", "path": "release_date" },
- "year": { "type": "alias", "path": "release_year" },
- "issn": { "type": "alias", "path": "container_issnl" },
- "oa": { "type": "alias", "path": "container_is_oa" },
- "longtail": { "type": "alias", "path": "container_is_longtail_oa" },
+ "original_title": { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch" },
"release_date": { "type": "date" },
"release_year": { "type": "integer" },
"release_type": { "type": "keyword" },
"release_status": { "type": "keyword" },
- "language": { "type": "keyword" },
- "doi": { "type": "keyword" },
- "pmid": { "type": "keyword" },
- "pmcid": { "type": "keyword" },
- "isbn13": { "type": "keyword" },
- "core_id": { "type": "keyword" },
- "wikidata_qid": { "type": "keyword" },
+ "language": { "type": "keyword" },
+ "doi": { "type": "keyword" },
+ "pmid": { "type": "keyword" },
+ "pmcid": { "type": "keyword" },
+ "isbn13": { "type": "keyword" },
+ "wikidata_qid": { "type": "keyword" },
+ "core_id": { "type": "keyword" },
+ "axiv_id": { "type": "keyword" },
+ "jstor_id": { "type": "keyword" },
+ "license": { "type": "keyword" },
"publisher": { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch" },
"container_name": { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch" },
"container_issnl": { "type": "keyword" },
- "container_is_oa": { "type": "boolean" },
- "container_is_longtail_oa": { "type": "boolean" },
+ "container_type": { "type": "keyword" },
"contrib_count": { "type": "integer" },
- "contrib_names": { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch" },
+ "contrib_names": { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch" },
"ref_count": { "type": "integer" },
"file_count": { "type": "integer" },
- "file_pdf_url": { "type": "keyword" },
- "file_in_webarchive": { "type": "boolean" },
- "file_in_ia": { "type": "boolean" },
+ "fileset_count": { "type": "integer" },
+ "webcapture_count": { "type": "integer" },
"any_abstract": { "type": "boolean" },
- "is_kept": { "type": "boolean" },
- "in_shadow": { "type": "boolean" }
+
+ "best_pdf_url": { "type": "keyword" },
+ "ia_pdf_url": { "type": "keyword" },
+ "is_oa": { "type": "boolean" },
+ "is_longtail_oa": { "type": "boolean" },
+ "is_preserved": { "type": "boolean" },
+ "in_kbart": { "type": "boolean" },
+ "in_jstor": { "type": "boolean" },
+ "in_dweb": { "type": "boolean" },
+ "in_web": { "type": "boolean" },
+ "in_ia": { "type": "boolean" },
+ "in_sim": { "type": "boolean" },
+ "in_shadows": { "type": "boolean" },
+
+ "author": { "type": "alias", "path": "contrib_names" },
+ "journal": { "type": "alias", "path": "container_name" },
+ "date": { "type": "alias", "path": "release_date" },
+ "year": { "type": "alias", "path": "release_year" },
+ "issn": { "type": "alias", "path": "container_issnl" },
+ "oa": { "type": "alias", "path": "is_oa" },
+ "longtail": { "type": "alias", "path": "is_longtail_oa" },
+ "lang": { "type": "alias", "path": "language" },
+ "file_pdf_url": { "type": "alias", "path": "best_pdf_url" },
+ "is_kept": { "type": "alias", "path": "in_kbart" }
}
}
}