aboutsummaryrefslogtreecommitdiffstats
path: root/schema
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-04-01 15:28:40 -0700
committerBryan Newbold <bnewbold@archive.org>2020-04-01 15:28:40 -0700
commit7bd3d790f29fe791915f68852086c69abb2f06bb (patch)
tree385f937b3a3acefa7d871c0f68bf10b94f4ec99c /schema
parent45e08a732f27e1a5f728a2f8fafb3f8eddd95251 (diff)
downloadfatcat-covid19-7bd3d790f29fe791915f68852086c69abb2f06bb.tar.gz
fatcat-covid19-7bd3d790f29fe791915f68852086c69abb2f06bb.zip
elasticsearch schemas
Diffstat (limited to 'schema')
-rw-r--r--schema/fulltext_schema.v00.json134
-rw-r--r--schema/release_schema.v03b.json136
2 files changed, 270 insertions, 0 deletions
diff --git a/schema/fulltext_schema.v00.json b/schema/fulltext_schema.v00.json
new file mode 100644
index 0000000..694048e
--- /dev/null
+++ b/schema/fulltext_schema.v00.json
@@ -0,0 +1,134 @@
+{
+"settings": {
+ "index": {
+ "analysis": {
+ "analyzer": {
+ "default": {
+ "type": "custom",
+ "tokenizer": "standard",
+ "filter": [ "lowercase", "asciifolding" ]
+ },
+ "textIcu": {
+ "type": "custom",
+ "tokenizer": "icu_tokenizer",
+ "char_filter": [ "icu_normalizer" ],
+ "filter": [ "icu_folding" ]
+ },
+ "textIcuSearch": {
+ "type": "custom",
+ "tokenizer": "icu_tokenizer",
+ "char_filter": [ "icu_normalizer" ],
+ "filter": [ "icu_folding" ]
+ }
+ },
+ "normalizer": {
+ "default": {
+ "type": "custom",
+ "char_filter": [],
+ "filter": ["lowercase"]
+ },
+ "caseSensitive": {
+ "type": "custom",
+ "char_filter": [],
+ "filter": []
+ }
+ }
+ }
+ }
+},
+"mappings": {
+ "release": {
+ "dynamic": false,
+ "_source": {
+ "excludes": [
+ "fulltext.abstract",
+ "fulltext.body",
+ "fulltext.acknowledgment",
+ "fulltext.annex",
+ "everything"
+ ]
+ },
+ "properties": {
+ "fatcat_ident": { "type": "keyword", "normalizer": "default", "doc_values": false },
+ "fatcat_revision": { "type": "keyword", "normalizer": "default", "doc_values": false },
+ "cord19_uid": { "type": "keyword", "normalizer": "default", "doc_values": false },
+
+ "work_id": { "type": "keyword", "normalizer": "default" },
+ "title": { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch", "copy_to": ["biblio_all", "everything"] },
+ "subtitle": { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch", "copy_to": ["biblio_all", "everything"] },
+ "original_title": { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch", "copy_to": ["biblio_all", "everything"] },
+ "release_date": { "type": "date" },
+ "release_year": { "type": "integer", "copy_to": ["biblio_all", "everything"] },
+ "release_type": { "type": "keyword", "normalizer": "default", "copy_to": ["biblio_all", "everything"] },
+ "release_stage": { "type": "keyword", "normalizer": "default" },
+ "withdrawn_status": { "type": "keyword", "normalizer": "default", "copy_to": ["biblio_all", "everything"] },
+ "language": { "type": "keyword", "normalizer": "default" },
+ "country_code": { "type": "keyword", "normalizer": "default" },
+ "volume": { "type": "keyword", "normalizer": "default", "copy_to": ["biblio_all", "everything"] },
+ "issue": { "type": "keyword", "normalizer": "default", "copy_to": ["biblio_all", "everything"] },
+ "pages": { "type": "keyword", "normalizer": "default", "copy_to": ["biblio_all", "everything"] },
+ "first_page": { "type": "keyword", "normalizer": "default" },
+ "number": { "type": "keyword", "normalizer": "default", "copy_to": ["biblio_all", "everything"] },
+ "doi": { "type": "keyword", "normalizer": "default" },
+ "doi_prefix": { "type": "keyword", "normalizer": "default" },
+ "doi_registrar": { "type": "keyword", "normalizer": "default" },
+ "pmid": { "type": "keyword", "normalizer": "default", "doc_values": false },
+ "pmcid": { "type": "keyword", "normalizer": "default", "doc_values": false },
+ "isbn13": { "type": "keyword", "normalizer": "default", "doc_values": false },
+ "wikidata_qid": { "type": "keyword", "normalizer": "default", "doc_values": false },
+ "arxiv_id": { "type": "keyword", "normalizer": "default", "doc_values": false },
+ "jstor_id": { "type": "keyword", "normalizer": "default", "doc_values": false },
+ "mag_id": { "type": "keyword", "normalizer": "default", "doc_values": false },
+ "s2_id": { "type": "keyword", "normalizer": "default", "doc_values": false },
+ "license": { "type": "keyword", "normalizer": "default" },
+ "publisher": { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch" },
+ "publisher_type": { "type": "keyword", "normalizer": "default" },
+ "container_name": { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch", "copy_to": ["biblio_all", "everything"] },
+ "container_id": { "type": "keyword", "normalizer": "default" },
+ "container_issnl": { "type": "keyword", "normalizer": "default" },
+ "container_type": { "type": "keyword", "normalizer": "default" },
+ "contrib_count": { "type": "integer" },
+ "contrib_names": { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch", "copy_to": ["biblio_all", "everything"] },
+ "affiliations": { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch" },
+ "creator_ids": { "type": "keyword", "normalizer": "default" },
+ "abstract": { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch", "copy_to": "everything" },
+ "abstract_lang": { "type": "keyword", "normalizer": "default" },
+
+ "fulltext": {
+ "dynamic": false,
+ "properties": {
+ "status": { "type": "keyword", "normalizer": "default" },
+ "sha1": { "type": "keyword", "normalizer": "default", "doc_values": false },
+ "abstract": { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch", "copy_to": "everything", "store": true },
+ "body": { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch", "copy_to": "everything", "store": true },
+ "lang": { "type": "keyword", "normalizer": "default" },
+ "acknowledgement": { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch", "copy_to": "everything", "store": true },
+ "annex": { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch", "copy_to": "everything", "store": true },
+
+ "pdf_url": { "type": "keyword", "normalizer": "default", "doc_values": false },
+ "pdf_sha1": { "type": "keyword", "normalizer": "default", "doc_values": false },
+ "thumbnail_url": { "type": "keyword", "normalizer": "default", "doc_values": false },
+ "grobid_xml_url": { "type": "keyword", "normalizer": "default", "doc_values": false }
+
+ }
+ },
+
+ "everything": { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch" },
+ "biblio_all": { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch" },
+
+ "affiliation": { "type": "alias", "path": "affiliations" },
+ "author": { "type": "alias", "path": "contrib_names" },
+ "journal": { "type": "alias", "path": "container_name" },
+ "date": { "type": "alias", "path": "release_date" },
+ "year": { "type": "alias", "path": "release_year" },
+ "issn": { "type": "alias", "path": "container_issnl" },
+ "lang": { "type": "alias", "path": "language" },
+ "stage": { "type": "alias", "path": "release_stage" },
+ "type": { "type": "alias", "path": "release_type" },
+
+ "body": { "type": "alias", "path": "fulltext.body" }
+
+ }
+ }
+}
+}
diff --git a/schema/release_schema.v03b.json b/schema/release_schema.v03b.json
new file mode 100644
index 0000000..666a672
--- /dev/null
+++ b/schema/release_schema.v03b.json
@@ -0,0 +1,136 @@
+{
+"settings": {
+ "index": {
+ "analysis": {
+ "analyzer": {
+ "default": {
+ "type": "custom",
+ "tokenizer": "standard",
+ "filter": [ "lowercase", "asciifolding" ]
+ },
+ "textIcu": {
+ "type": "custom",
+ "tokenizer": "icu_tokenizer",
+ "char_filter": [ "icu_normalizer" ],
+ "filter": [ "icu_folding" ]
+ },
+ "textIcuSearch": {
+ "type": "custom",
+ "tokenizer": "icu_tokenizer",
+ "char_filter": [ "icu_normalizer" ],
+ "filter": [ "icu_folding" ]
+ }
+ },
+ "normalizer": {
+ "default": {
+ "type": "custom",
+ "char_filter": [],
+ "filter": ["lowercase"]
+ },
+ "caseSensitive": {
+ "type": "custom",
+ "char_filter": [],
+ "filter": []
+ }
+ }
+ }
+ }
+},
+"mappings": {
+ "release": {
+ "properties": {
+ "ident": { "type": "keyword", "normalizer": "default", "doc_values": false },
+ "state": { "type": "keyword", "normalizer": "default" },
+ "revision": { "type": "keyword", "normalizer": "default", "doc_values": false },
+ "work_id": { "type": "keyword", "normalizer": "default" },
+ "title": { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch", "copy_to": "biblio" },
+ "subtitle": { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch", "copy_to": "biblio" },
+ "original_title": { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch", "copy_to": "biblio" },
+ "release_date": { "type": "date" },
+ "release_year": { "type": "integer", "copy_to": "biblio" },
+ "release_type": { "type": "keyword", "normalizer": "default", "copy_to": "biblio" },
+ "release_stage": { "type": "keyword", "normalizer": "default" },
+ "withdrawn_status": { "type": "keyword", "normalizer": "default", "copy_to": "biblio" },
+ "language": { "type": "keyword", "normalizer": "default" },
+ "country_code": { "type": "keyword", "normalizer": "default" },
+ "country_code_upper": { "type": "keyword", "normalizer": "caseSensitive" },
+ "volume": { "type": "keyword", "normalizer": "default", "copy_to": "biblio" },
+ "issue": { "type": "keyword", "normalizer": "default", "copy_to": "biblio" },
+ "pages": { "type": "keyword", "normalizer": "default", "copy_to": "biblio" },
+ "first_page": { "type": "keyword", "normalizer": "default" },
+ "number": { "type": "keyword", "normalizer": "default", "copy_to": "biblio" },
+ "doi": { "type": "keyword", "normalizer": "default" },
+ "doi_prefix": { "type": "keyword", "normalizer": "default" },
+ "doi_registrar": { "type": "keyword", "normalizer": "default" },
+ "pmid": { "type": "keyword", "normalizer": "default", "doc_values": false },
+ "pmcid": { "type": "keyword", "normalizer": "default", "doc_values": false },
+ "isbn13": { "type": "keyword", "normalizer": "default", "doc_values": false },
+ "wikidata_qid": { "type": "keyword", "normalizer": "default", "doc_values": false },
+ "core_id": { "type": "keyword", "normalizer": "default", "doc_values": false },
+ "arxiv_id": { "type": "keyword", "normalizer": "default", "doc_values": false },
+ "jstor_id": { "type": "keyword", "normalizer": "default", "doc_values": false },
+ "ark_id": { "type": "keyword", "normalizer": "default", "doc_values": false },
+ "mag_id": { "type": "keyword", "normalizer": "default", "doc_values": false },
+ "s2_id": { "type": "keyword", "normalizer": "default", "doc_values": false },
+ "license": { "type": "keyword", "normalizer": "default" },
+ "publisher": { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch" },
+ "publisher_type": { "type": "keyword", "normalizer": "default" },
+ "container_name": { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch", "copy_to": "biblio" },
+ "container_id": { "type": "keyword", "normalizer": "default" },
+ "container_issnl": { "type": "keyword", "normalizer": "default" },
+ "container_type": { "type": "keyword", "normalizer": "default" },
+ "contrib_count": { "type": "integer" },
+ "contrib_names": { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch", "copy_to": "biblio" },
+ "affiliations": { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch" },
+ "affiliation_rors": { "type": "keyword", "normalizer": "default" },
+ "creator_ids": { "type": "keyword", "normalizer": "default" },
+ "ref_count": { "type": "integer" },
+ "ref_linked_count": { "type": "integer" },
+ "ref_release_ids": { "type": "keyword", "normalizer": "default" },
+ "file_count": { "type": "integer" },
+ "fileset_count": { "type": "integer" },
+ "webcapture_count": { "type": "integer" },
+ "any_abstract": { "type": "boolean" },
+
+ "biblio": { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch" },
+
+ "best_pdf_url": { "type": "keyword", "normalizer": "caseSensitive", "doc_values": false },
+ "ia_pdf_url": { "type": "keyword", "normalizer": "caseSensitive", "doc_values": false },
+ "ia_microfilm_url": { "type": "keyword", "normalizer": "caseSensitive", "doc_values": false },
+ "is_oa": { "type": "boolean" },
+ "oa_color": { "type": "keyword", "normalizer": "default" },
+ "is_longtail_oa": { "type": "boolean" },
+ "is_preserved": { "type": "boolean" },
+ "in_kbart": { "type": "boolean" },
+ "in_jstor": { "type": "boolean" },
+ "in_dweb": { "type": "boolean" },
+ "in_web": { "type": "boolean" },
+ "in_ia": { "type": "boolean" },
+ "in_ia_sim": { "type": "boolean" },
+ "in_shadows": { "type": "boolean" },
+ "is_superceded": { "type": "boolean" },
+ "is_retracted": { "type": "boolean" },
+ "preservation": { "type": "keyword", "normalizer": "default" },
+
+ "affiliation": { "type": "alias", "path": "affiliations" },
+ "ror": { "type": "alias", "path": "affiliation_rors" },
+ "creator_id": { "type": "alias", "path": "creator_ids" },
+ "ref_release_id": { "type": "alias", "path": "ref_release_ids" },
+ "author": { "type": "alias", "path": "contrib_names" },
+ "journal": { "type": "alias", "path": "container_name" },
+ "date": { "type": "alias", "path": "release_date" },
+ "year": { "type": "alias", "path": "release_year" },
+ "issn": { "type": "alias", "path": "container_issnl" },
+ "oa": { "type": "alias", "path": "is_oa" },
+ "longtail": { "type": "alias", "path": "is_longtail_oa" },
+ "lang": { "type": "alias", "path": "language" },
+ "file_pdf_url": { "type": "alias", "path": "best_pdf_url" },
+ "release_status": { "type": "alias", "path": "release_stage" },
+ "stage": { "type": "alias", "path": "release_stage" },
+ "type": { "type": "alias", "path": "release_type" },
+ "retracted": { "type": "alias", "path": "is_retracted" },
+ "is_kept": { "type": "alias", "path": "in_kbart" }
+ }
+ }
+}
+}