diff options
| -rw-r--r-- | schema/fulltext_schema.v01.json | 141 | 
1 files changed, 141 insertions, 0 deletions
| diff --git a/schema/fulltext_schema.v01.json b/schema/fulltext_schema.v01.json new file mode 100644 index 0000000..b2bd289 --- /dev/null +++ b/schema/fulltext_schema.v01.json @@ -0,0 +1,141 @@ +{ +"settings": { +    "index": { +        "analysis": { +            "analyzer": { +                "default": { +                    "type": "custom", +                    "tokenizer": "standard", +                    "filter": [ "lowercase", "asciifolding" ] +                }, +                "textIcu": { +                    "type": "custom", +                    "tokenizer": "icu_tokenizer", +                    "char_filter": [ "icu_normalizer" ], +                    "filter": [ "icu_folding" ] +                }, +                "textIcuSearch": { +                    "type": "custom", +                    "tokenizer": "icu_tokenizer", +                    "char_filter": [ "icu_normalizer" ], +                    "filter": [ "icu_folding" ] +                } +            }, +            "normalizer": { +                "default": { +                    "type": "custom", +                    "char_filter": [], +                    "filter": ["lowercase"] +                }, +                "caseSensitive": { +                    "type": "custom", +                    "char_filter": [], +                    "filter": [] +                } +            } +        } +    } +}, +"mappings": { +  "_doc": { +    "dynamic": false, +    "_source": { +      "excludes": [ +        "fulltext.abstract", +        "fulltext.body", +        "fulltext.acknowledgment", +        "fulltext.annex", +        "biblio_all", +        "everything" +      ] +    }, +    "properties": { +        "fatcat_ident":     { "type": "keyword", "normalizer": "default", "doc_values": false }, +        "fatcat_revision":  { "type": "keyword", "normalizer": "default", "doc_values": false }, +        "source_tags":      { "type": "keyword", "normalizer": "default", "doc_values": false }, + +        "work_id":        { "type": "keyword", "normalizer": "default" }, +        "title":          { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch", "copy_to": ["biblio_all", "everything"] }, +        "subtitle":       { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch", "copy_to": ["biblio_all", "everything"] }, +        "original_title": { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch", "copy_to": ["biblio_all", "everything"] }, +        "release_date":   { "type": "date" }, +        "release_year":   { "type": "integer", "copy_to": ["biblio_all", "everything"] }, +        "release_type":   { "type": "keyword", "normalizer": "default", "copy_to": ["biblio_all", "everything"] }, +        "release_stage":  { "type": "keyword", "normalizer": "default" }, +        "withdrawn_status": { "type": "keyword", "normalizer": "default", "copy_to": ["biblio_all", "everything"] }, +        "language":       { "type": "keyword", "normalizer": "default" }, +        "country_code":   { "type": "keyword", "normalizer": "default" }, +        "volume":         { "type": "keyword", "normalizer": "default", "copy_to": ["biblio_all", "everything"] }, +        "issue":          { "type": "keyword", "normalizer": "default", "copy_to": ["biblio_all", "everything"] }, +        "pages":          { "type": "keyword", "normalizer": "default", "copy_to": ["biblio_all", "everything"] }, +        "first_page":     { "type": "keyword", "normalizer": "default" }, +        "number":         { "type": "keyword", "normalizer": "default", "copy_to": ["biblio_all", "everything"] }, +        "doi":            { "type": "keyword", "normalizer": "default" }, +        "doi_prefix":     { "type": "keyword", "normalizer": "default" }, +        "doi_registrar":  { "type": "keyword", "normalizer": "default" }, +        "pmid":           { "type": "keyword", "normalizer": "default", "doc_values": false }, +        "pmcid":          { "type": "keyword", "normalizer": "default", "doc_values": false }, +        "isbn13":         { "type": "keyword", "normalizer": "default", "doc_values": false }, +        "wikidata_qid":   { "type": "keyword", "normalizer": "default", "doc_values": false }, +        "arxiv_id":       { "type": "keyword", "normalizer": "default", "doc_values": false }, +        "jstor_id":       { "type": "keyword", "normalizer": "default", "doc_values": false }, +        "mag_id":         { "type": "keyword", "normalizer": "default", "doc_values": false }, +        "s2_id":          { "type": "keyword", "normalizer": "default", "doc_values": false }, +        "cord19_uid":     { "type": "keyword", "normalizer": "default", "doc_values": false }, +        "who_covidence_id": { "type": "keyword", "normalizer": "default", "doc_values": false }, +        "license":        { "type": "keyword", "normalizer": "default" }, +        "publisher":            { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch" }, +        "publisher_type":       { "type": "keyword", "normalizer": "default" }, +        "container_name":       { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch", "copy_to": ["biblio_all", "everything"] }, +        "container_original_name":       { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch", "copy_to": ["biblio_all", "everything"] }, +        "container_id":         { "type": "keyword", "normalizer": "default" }, +        "container_issnl":      { "type": "keyword", "normalizer": "default" }, +        "container_type":       { "type": "keyword", "normalizer": "default" }, +        "contrib_count":        { "type": "integer" }, +        "contrib_names":        { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch", "copy_to": ["biblio_all", "everything"] }, +        "affiliations":         { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch" }, +        "creator_ids":          { "type": "keyword", "normalizer": "default" }, +        "abstract":             { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch", "copy_to": "everything" }, +        "abstract_lang":        { "type": "keyword", "normalizer": "default" }, + +        "fulltext": { +          "dynamic": false, +          "properties": { +            "status":           { "type": "keyword", "normalizer": "default" }, +            "sha1":             { "type": "keyword", "normalizer": "default", "doc_values": false }, +            "abstract":         { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch", "copy_to": "everything", "store": true }, +            "body":             { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch", "copy_to": "everything", "store": true }, +            "lang":             { "type": "keyword", "normalizer": "default" }, +            "acknowledgement":  { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch", "copy_to": "everything", "store": true }, +            "annex":            { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch", "copy_to": "everything", "store": true }, + +            "ia_pdf_url":       { "type": "keyword", "normalizer": "default", "doc_values": false }, +            "pdf_url":          { "type": "keyword", "normalizer": "default", "doc_values": false }, +            "pdf_sha1":         { "type": "keyword", "normalizer": "default", "doc_values": false }, +            "thumbnail_url":    { "type": "keyword", "normalizer": "default", "doc_values": false }, +            "grobid_xml_url":   { "type": "keyword", "normalizer": "default", "doc_values": false } + +          } +        }, + +        "everything":           { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch" }, +        "biblio_all":           { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch" }, + +        "affiliation":    { "type": "alias", "path": "affiliations" }, +        "author":         { "type": "alias", "path": "contrib_names" }, +        "journal":        { "type": "alias", "path": "container_name" }, +        "date":           { "type": "alias", "path": "release_date" }, +        "year":           { "type": "alias", "path": "release_year" }, +        "issn":           { "type": "alias", "path": "container_issnl" }, +        "lang":           { "type": "alias", "path": "language" }, +        "stage":          { "type": "alias", "path": "release_stage" }, +        "type":           { "type": "alias", "path": "release_type" }, +        "country":        { "type": "alias", "path": "country_code" }, + +        "source":         { "type": "alias", "path": "source_tags" }, +        "body":           { "type": "alias", "path": "fulltext.body" } + +    } +  } +} +} | 
