aboutsummaryrefslogtreecommitdiffstats
path: root/schema/fulltext_schema.v00.json
blob: 11bb49b0deea0888239c9904854e7586f5c841fc (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
{
"settings": {
    "index": {
        "analysis": {
            "analyzer": {
                "default": {
                    "type": "custom",
                    "tokenizer": "standard",
                    "filter": [ "lowercase", "asciifolding" ]
                },
                "textIcu": {
                    "type": "custom",
                    "tokenizer": "icu_tokenizer",
                    "char_filter": [ "icu_normalizer" ],
                    "filter": [ "icu_folding" ]
                },
                "textIcuSearch": {
                    "type": "custom",
                    "tokenizer": "icu_tokenizer",
                    "char_filter": [ "icu_normalizer" ],
                    "filter": [ "icu_folding" ]
                }
            },
            "normalizer": {
                "default": {
                    "type": "custom",
                    "char_filter": [],
                    "filter": ["lowercase"]
                },
                "caseSensitive": {
                    "type": "custom",
                    "char_filter": [],
                    "filter": []
                }
            }
        }
    }
},
"mappings": {
  "release": {
    "dynamic": false,
    "_source": {
      "excludes": [
        "fulltext.abstract",
        "fulltext.body",
        "fulltext.acknowledgment",
        "fulltext.annex",
        "biblio_all",
        "everything"
      ]
    },
    "properties": {
        "fatcat_ident":     { "type": "keyword", "normalizer": "default", "doc_values": false },
        "fatcat_revision":  { "type": "keyword", "normalizer": "default", "doc_values": false },
        "source_tags":      { "type": "keyword", "normalizer": "default", "doc_values": false },

        "work_id":        { "type": "keyword", "normalizer": "default" },
        "title":          { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch", "copy_to": ["biblio_all", "everything"] },
        "subtitle":       { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch", "copy_to": ["biblio_all", "everything"] },
        "original_title": { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch", "copy_to": ["biblio_all", "everything"] },
        "release_date":   { "type": "date" },
        "release_year":   { "type": "integer", "copy_to": ["biblio_all", "everything"] },
        "release_type":   { "type": "keyword", "normalizer": "default", "copy_to": ["biblio_all", "everything"] },
        "release_stage":  { "type": "keyword", "normalizer": "default" },
        "withdrawn_status": { "type": "keyword", "normalizer": "default", "copy_to": ["biblio_all", "everything"] },
        "language":       { "type": "keyword", "normalizer": "default" },
        "country_code":   { "type": "keyword", "normalizer": "default" },
        "volume":         { "type": "keyword", "normalizer": "default", "copy_to": ["biblio_all", "everything"] },
        "issue":          { "type": "keyword", "normalizer": "default", "copy_to": ["biblio_all", "everything"] },
        "pages":          { "type": "keyword", "normalizer": "default", "copy_to": ["biblio_all", "everything"] },
        "first_page":     { "type": "keyword", "normalizer": "default" },
        "number":         { "type": "keyword", "normalizer": "default", "copy_to": ["biblio_all", "everything"] },
        "doi":            { "type": "keyword", "normalizer": "default" },
        "doi_prefix":     { "type": "keyword", "normalizer": "default" },
        "doi_registrar":  { "type": "keyword", "normalizer": "default" },
        "pmid":           { "type": "keyword", "normalizer": "default", "doc_values": false },
        "pmcid":          { "type": "keyword", "normalizer": "default", "doc_values": false },
        "isbn13":         { "type": "keyword", "normalizer": "default", "doc_values": false },
        "wikidata_qid":   { "type": "keyword", "normalizer": "default", "doc_values": false },
        "arxiv_id":       { "type": "keyword", "normalizer": "default", "doc_values": false },
        "jstor_id":       { "type": "keyword", "normalizer": "default", "doc_values": false },
        "mag_id":         { "type": "keyword", "normalizer": "default", "doc_values": false },
        "s2_id":          { "type": "keyword", "normalizer": "default", "doc_values": false },
        "cord19_uid":     { "type": "keyword", "normalizer": "default", "doc_values": false },
        "who_covidence_id": { "type": "keyword", "normalizer": "default", "doc_values": false },
        "license":        { "type": "keyword", "normalizer": "default" },
        "publisher":            { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch" },
        "publisher_type":       { "type": "keyword", "normalizer": "default" },
        "container_name":       { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch", "copy_to": ["biblio_all", "everything"] },
        "container_original_name":       { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch", "copy_to": ["biblio_all", "everything"] },
        "container_id":         { "type": "keyword", "normalizer": "default" },
        "container_issnl":      { "type": "keyword", "normalizer": "default" },
        "container_type":       { "type": "keyword", "normalizer": "default" },
        "contrib_count":        { "type": "integer" },
        "contrib_names":        { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch", "copy_to": ["biblio_all", "everything"] },
        "affiliations":         { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch" },
        "creator_ids":          { "type": "keyword", "normalizer": "default" },
        "abstract":             { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch", "copy_to": "everything" },
        "abstract_lang":        { "type": "keyword", "normalizer": "default" },

        "fulltext": {
          "dynamic": false,
          "properties": {
            "status":           { "type": "keyword", "normalizer": "default" },
            "sha1":             { "type": "keyword", "normalizer": "default", "doc_values": false },
            "abstract":         { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch", "copy_to": "everything", "store": true },
            "body":             { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch", "copy_to": "everything", "store": true },
            "lang":             { "type": "keyword", "normalizer": "default" },
            "acknowledgement":  { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch", "copy_to": "everything", "store": true },
            "annex":            { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch", "copy_to": "everything", "store": true },

            "ia_pdf_url":       { "type": "keyword", "normalizer": "default", "doc_values": false },
            "pdf_url":          { "type": "keyword", "normalizer": "default", "doc_values": false },
            "pdf_sha1":         { "type": "keyword", "normalizer": "default", "doc_values": false },
            "thumbnail_url":    { "type": "keyword", "normalizer": "default", "doc_values": false },
            "grobid_xml_url":   { "type": "keyword", "normalizer": "default", "doc_values": false }

          }
        },

        "everything":           { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch" },
        "biblio_all":           { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch" },

        "affiliation":    { "type": "alias", "path": "affiliations" },
        "author":         { "type": "alias", "path": "contrib_names" },
        "journal":        { "type": "alias", "path": "container_name" },
        "date":           { "type": "alias", "path": "release_date" },
        "year":           { "type": "alias", "path": "release_year" },
        "issn":           { "type": "alias", "path": "container_issnl" },
        "lang":           { "type": "alias", "path": "language" },
        "stage":          { "type": "alias", "path": "release_stage" },
        "type":           { "type": "alias", "path": "release_type" },
        "country":        { "type": "alias", "path": "country_code" },

        "source":         { "type": "alias", "path": "source_tags" },
        "body":           { "type": "alias", "path": "fulltext.body" }

    }
  }
}
}