diff options
Diffstat (limited to 'extra/elasticsearch')
-rw-r--r-- | extra/elasticsearch/changelog_schema.json | 39 | ||||
-rw-r--r-- | extra/elasticsearch/container_schema.json | 74 | ||||
-rw-r--r-- | extra/elasticsearch/release_schema.json | 62 |
3 files changed, 153 insertions, 22 deletions
diff --git a/extra/elasticsearch/changelog_schema.json b/extra/elasticsearch/changelog_schema.json new file mode 100644 index 00000000..7a7ec90c --- /dev/null +++ b/extra/elasticsearch/changelog_schema.json @@ -0,0 +1,39 @@ +{ +"settings": { + "index": { + "analysis": { + "analyzer": { + "default": { + "type": "custom", + "tokenizer": "standard", + "filter": [ "lowercase", "asciifolding" ] + } + } + } + } +}, +"mappings": { + "changelog": { + "properties": { + "index": { "type": "integer" }, + "editgorup_id": { "type": "keyword" }, + "timestamp": { "type": "datetime" }, + "username": { "type": "keyword" }, + "is_bot": { "type": "boolean" }, + "is_admin": { "type": "boolean" }, + "agent": { "type": "keyword" }, + "containers": { "type": "integer" }, + "creators": { "type": "integer" }, + "files": { "type": "integer" }, + "filessets": { "type": "integer" }, + "webcaptures": { "type": "integer" }, + "releases": { "type": "integer" }, + "works": { "type": "integer" }, + "created": { "type": "integer" }, + "updated": { "type": "integer" }, + "deleted": { "type": "integer" }, + "total": { "type": "integer" } + } + } +} +} diff --git a/extra/elasticsearch/container_schema.json b/extra/elasticsearch/container_schema.json new file mode 100644 index 00000000..83791ab8 --- /dev/null +++ b/extra/elasticsearch/container_schema.json @@ -0,0 +1,74 @@ +{ +"settings": { + "index": { + "analysis": { + "analyzer": { + "default": { + "type": "custom", + "tokenizer": "standard", + "filter": [ "lowercase", "asciifolding" ] + }, + "textIcu": { + "type": "custom", + "tokenizer": "icu_tokenizer", + "char_filter": [ "icu_normalizer" ], + "filter": [ "icu_folding" ] + }, + "textIcuSearch": { + "type": "custom", + "tokenizer": "icu_tokenizer", + "char_filter": [ "icu_normalizer" ], + "filter": [ "icu_folding" ] + } + } + } + } +}, +"mappings": { + "container": { + "properties": { + "ident": { "type": "keyword" }, + "state": { "type": "keyword" }, + "revision": { "type": "keyword" }, + "name": { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch" }, + "publisher": { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch" }, + "container_type": { "type": "keyword" }, + "wikidata_qid": { "type": "keyword" }, + "issnl": { "type": "keyword" }, + "region": { "type": "keyword" }, + "nation": { "type": "keyword" }, + "discipline": { "type": "keyword" }, + "languages": { "type": "keyword" }, + "mimetypes": { "type": "keyword" }, + "first_year": { "type": "integer" }, + "last_year": { "type": "integer" }, + + "in_doaj": { "type": "boolean" }, + "in_road": { "type": "boolean" }, + "in_doi": { "type": "boolean" }, + "in_doaj_works": { "type": "boolean" }, + "in_sherpa_romeo":{ "type": "boolean" }, + "is_oa": { "type": "boolean" }, + "is_longtail_oa": { "type": "boolean" }, + "any_kbart": { "type": "boolean" }, + "any_jstor": { "type": "boolean" }, + "any_sim": { "type": "boolean" }, + "ia_homepage_status": { "type": "boolean" }, + + "releases_total": { "type": "integer" }, + "releases_kbart": { "type": "integer" }, + "releases_ia": { "type": "integer" }, + "releases_sim": { "type": "integer" }, + "releases_shadow": { "type": "integer" }, + "releases_any_file": { "type": "integer" }, + "releases_any_fileset": { "type": "integer" }, + "releases_any_webcapture": { "type": "integer" }, + + "year": { "type": "alias", "path": "first_year" }, + "type": { "type": "alias", "path": "container_type" }, + "oa": { "type": "alias", "path": "is_oa" }, + "longtail": { "type": "alias", "path": "is_longtail_oa" } + } + } +} +} diff --git a/extra/elasticsearch/release_schema.json b/extra/elasticsearch/release_schema.json index c9b77301..4cfe0894 100644 --- a/extra/elasticsearch/release_schema.json +++ b/extra/elasticsearch/release_schema.json @@ -28,41 +28,59 @@ "release": { "properties": { "ident": { "type": "keyword" }, + "state": { "type": "keyword" }, "revision": { "type": "keyword" }, "title": { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch" }, - "author": { "type": "alias", "path": "contrib_names" }, - "journal": { "type": "alias", "path": "container_name" }, - "date": { "type": "alias", "path": "release_date" }, - "year": { "type": "alias", "path": "release_year" }, - "issn": { "type": "alias", "path": "container_issnl" }, - "oa": { "type": "alias", "path": "container_is_oa" }, - "longtail": { "type": "alias", "path": "container_is_longtail_oa" }, + "original_title": { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch" }, "release_date": { "type": "date" }, "release_year": { "type": "integer" }, "release_type": { "type": "keyword" }, "release_status": { "type": "keyword" }, - "language": { "type": "keyword" }, - "doi": { "type": "keyword" }, - "pmid": { "type": "keyword" }, - "pmcid": { "type": "keyword" }, - "isbn13": { "type": "keyword" }, - "core_id": { "type": "keyword" }, - "wikidata_qid": { "type": "keyword" }, + "language": { "type": "keyword" }, + "doi": { "type": "keyword" }, + "pmid": { "type": "keyword" }, + "pmcid": { "type": "keyword" }, + "isbn13": { "type": "keyword" }, + "wikidata_qid": { "type": "keyword" }, + "core_id": { "type": "keyword" }, + "axiv_id": { "type": "keyword" }, + "jstor_id": { "type": "keyword" }, + "license": { "type": "keyword" }, "publisher": { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch" }, "container_name": { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch" }, "container_issnl": { "type": "keyword" }, - "container_is_oa": { "type": "boolean" }, - "container_is_longtail_oa": { "type": "boolean" }, + "container_type": { "type": "keyword" }, "contrib_count": { "type": "integer" }, - "contrib_names": { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch" }, + "contrib_names": { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch" }, "ref_count": { "type": "integer" }, "file_count": { "type": "integer" }, - "file_pdf_url": { "type": "keyword" }, - "file_in_webarchive": { "type": "boolean" }, - "file_in_ia": { "type": "boolean" }, + "fileset_count": { "type": "integer" }, + "webcapture_count": { "type": "integer" }, "any_abstract": { "type": "boolean" }, - "is_kept": { "type": "boolean" }, - "in_shadow": { "type": "boolean" } + + "best_pdf_url": { "type": "keyword" }, + "ia_pdf_url": { "type": "keyword" }, + "is_oa": { "type": "boolean" }, + "is_longtail_oa": { "type": "boolean" }, + "is_preserved": { "type": "boolean" }, + "in_kbart": { "type": "boolean" }, + "in_jstor": { "type": "boolean" }, + "in_dweb": { "type": "boolean" }, + "in_web": { "type": "boolean" }, + "in_ia": { "type": "boolean" }, + "in_sim": { "type": "boolean" }, + "in_shadows": { "type": "boolean" }, + + "author": { "type": "alias", "path": "contrib_names" }, + "journal": { "type": "alias", "path": "container_name" }, + "date": { "type": "alias", "path": "release_date" }, + "year": { "type": "alias", "path": "release_year" }, + "issn": { "type": "alias", "path": "container_issnl" }, + "oa": { "type": "alias", "path": "is_oa" }, + "longtail": { "type": "alias", "path": "is_longtail_oa" }, + "lang": { "type": "alias", "path": "language" }, + "file_pdf_url": { "type": "alias", "path": "best_pdf_url" }, + "is_kept": { "type": "alias", "path": "in_kbart" } } } } |