aboutsummaryrefslogtreecommitdiffstats
path: root/schema/scholar_fulltext.v01.json
blob: 9e99026df86594edafdb2b82425cb71bbb67994b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
{
"settings": {
    "index": {
        "number_of_shards": 12,
        "number_of_replicas": 0,
        "codec": "best_compression",
        "analysis": {
            "analyzer": {
                "default": {
                    "type": "custom",
                    "tokenizer": "standard",
                    "filter": [ "lowercase", "asciifolding" ]
                },
                "textIcu": {
                    "type": "custom",
                    "tokenizer": "icu_tokenizer",
                    "char_filter": [ "html_strip", "icu_normalizer" ],
                    "filter": [ "icu_folding", "stemmer" ]
                },
                "textIcuSearch": {
                    "type": "custom",
                    "tokenizer": "icu_tokenizer",
                    "char_filter": [ "html_strip", "icu_normalizer" ],
                    "filter": [ "icu_folding", "stemmer" ]
                },
                "textIcuExact": {
                    "type": "custom",
                    "tokenizer": "icu_tokenizer",
                    "char_filter": [ "html_strip", "icu_normalizer" ],
                    "filter": [ "icu_folding" ]
                },
                "textIcuSearchExact": {
                    "type": "custom",
                    "tokenizer": "icu_tokenizer",
                    "char_filter": [ "html_strip", "icu_normalizer" ],
                    "filter": [ "icu_folding" ]
                }
            },
            "normalizer": {
                "default": {
                    "type": "custom",
                    "char_filter": [],
                    "filter": ["lowercase"]
                },
                "caseSensitive": {
                    "type": "custom",
                    "char_filter": [],
                    "filter": []
                }
            }
        }
    }
},
"mappings": {
  "_doc": {
    "dynamic": false,
    "_source": {
      "excludes": [
        "fulltext.body",
        "fulltext.acknowledgment",
        "fulltext.annex",
        "biblio_all",
        "everything"
      ]
    },
    "properties": {

        "key":          { "type": "keyword", "normalizer": "default", "doc_values": false },
        "collapse_key": { "type": "keyword", "normalizer": "default" },
        "doc_type":     { "type": "keyword", "normalizer": "default" },
        "doc_index_ts": { "type": "date" },
        "work_ident":   { "type": "keyword", "normalizer": "default", "doc_values": false },
        "tags":         { "type": "keyword", "normalizer": "default" },

        "biblio": {
          "type": "object",
          "dynamic": false,
          "properties": {
            "release_ident":  { "type": "keyword", "normalizer": "default", "doc_values": false },
            "title":          {
                "type": "text","index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch", "copy_to": ["title_all", "biblio_all", "everything"],
                "fields": {"exact": { "type": "text", "analyzer": "textIcuExact", "search_analyzer": "textIcuSearchExact" }}
            },
            "subtitle":       { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch", "copy_to": ["title_all", "biblio_all", "everything"] },
            "original_title": { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch", "copy_to": ["title_all", "biblio_all", "everything"] },
            "release_date":   { "type": "date" },
            "release_year":   { "type": "short", "copy_to": ["biblio_all", "everything"] },
            "release_type":   { "type": "keyword", "normalizer": "default", "copy_to": ["biblio_all", "everything"] },
            "release_stage":  { "type": "keyword", "normalizer": "default" },
            "withdrawn_status": { "type": "keyword", "normalizer": "default", "copy_to": ["biblio_all", "everything"] },
            "lang_code":       { "type": "keyword", "normalizer": "default" },
            "country_code":   { "type": "keyword", "normalizer": "default" },
            "volume":         { "type": "keyword", "normalizer": "default", "copy_to": ["biblio_all", "everything"] },
            "volume_int":     { "type": "short" },
            "issue":          { "type": "keyword", "normalizer": "default", "copy_to": ["biblio_all", "everything"] },
            "issue_int":      { "type": "short" },
            "pages":          { "type": "keyword", "normalizer": "default", "copy_to": ["biblio_all", "everything"] },
            "first_page":     { "type": "keyword", "normalizer": "default" },
            "first_page_int": { "type": "integer" },
            "number":         { "type": "keyword", "normalizer": "default", "copy_to": ["biblio_all", "everything"] },
            "doi":            { "type": "keyword", "normalizer": "default" },
            "doi_prefix":     { "type": "keyword", "normalizer": "default" },
            "doi_registrar":  { "type": "keyword", "normalizer": "default" },
            "pmid":           { "type": "keyword", "normalizer": "default", "doc_values": false },
            "pmcid":          { "type": "keyword", "normalizer": "default", "doc_values": false },
            "isbn13":         { "type": "keyword", "normalizer": "default", "doc_values": false },
            "wikidata_qid":   { "type": "keyword", "normalizer": "default", "doc_values": false },
            "arxiv_id":       { "type": "keyword", "normalizer": "default", "doc_values": false },
            "jstor_id":       { "type": "keyword", "normalizer": "default", "doc_values": false },
            "doaj_id":        { "type": "keyword", "normalizer": "default", "doc_values": false },
            "dblp_id":        { "type": "keyword", "normalizer": "default", "doc_values": false },
            "oai_id":         { "type": "keyword", "normalizer": "default", "doc_values": false },
            "license_slug":   { "type": "keyword", "normalizer": "default" },
            "publisher":            { "type": "text", "index": true, "analyzer": "textIcuExact", "search_analyzer":"textIcuSearchExact" },
            "publisher_type":       { "type": "keyword", "normalizer": "default" },
            "container_name":       { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch", "copy_to": ["biblio_all", "everything"] },
            "container_original_name":       { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch", "copy_to": ["biblio_all", "everything"] },
            "container_ident":      { "type": "keyword", "normalizer": "default" },
            "container_issnl":      { "type": "keyword", "normalizer": "default" },
            "container_wikidata_qid": { "type": "keyword", "normalizer": "default" },
            "container_sherpa_color": { "type": "keyword", "normalizer": "default" },
            "issns":                { "type": "keyword", "normalizer": "default" },
            "container_type":       { "type": "keyword", "normalizer": "default" },
            "contrib_count":        { "type": "short" },
            "contrib_names":        { "type": "text", "index": true, "analyzer": "textIcuExact", "search_analyzer":"textIcuSearchExact", "copy_to": ["biblio_all", "everything"] },
            "affiliations":         { "type": "text", "index": true, "analyzer": "textIcuExact", "search_analyzer":"textIcuSearchExact" }
          }
        },

        "fulltext": {
          "type": "object",
          "dynamic": false,
          "properties": {
            "lang_code":        { "type": "keyword", "normalizer": "default" },
            "body":             { "type": "text", "index": false, "analyzer": "textIcu", "search_analyzer":"textIcuSearch", "copy_to": "everything", "store": true },
            "acknowledgement":  { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch", "copy_to": "everything", "store": true },
            "annex":            { "type": "text", "index": false, "analyzer": "textIcu", "search_analyzer":"textIcuSearch", "copy_to": "everything", "store": true },
            "release_ident":    { "type": "keyword", "normalizer": "default", "doc_values": false },
            "file_ident":       { "type": "keyword", "normalizer": "default", "doc_values": false },
            "file_sha1":        { "type": "keyword", "normalizer": "default", "doc_values": false },
            "file_mimetype":    { "type": "keyword", "normalizer": "default" },
            "size_bytes":       { "type": "integer" },
            "thumbnail_url":    { "type": "keyword", "normalizer": "default", "doc_values": false },
            "access_url":       { "type": "keyword", "normalizer": "default", "doc_values": false },
            "access_type":      { "type": "keyword", "normalizer": "default" }
          }
        },

        "ia_sim": {
          "type": "object",
          "dynamic": false,
          "properties": {
            "issue_item":       { "type": "keyword", "normalizer": "default" },
            "pub_collection":   { "type": "keyword", "normalizer": "default" },
            "sim_pubid":        { "type": "keyword", "normalizer": "default" },
            "first_page":       { "type": "keyword", "normalizer": "default" }
          }
        },

        "abstracts": {
          "type": "object",
          "dynamic": false,
          "properties": {
            "body":         { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch", "copy_to": ["everything"] },
            "lang_code":    { "type": "keyword", "normalizer": "default" }
          }
        },

        "releases": {
          "type": "object",
          "dynamic": false,
          "properties": {

            "ident":  { "type": "keyword", "normalizer": "default", "doc_values": false },
            "revision":  { "type": "keyword", "normalizer": "default", "doc_values": false },
            "title":          { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch", "copy_to": ["biblio_all", "everything"] },
            "release_date":   { "type": "date" },
            "release_year":   { "type": "short", "copy_to": ["biblio_all", "everything"] },
            "release_type":   { "type": "keyword", "normalizer": "default", "copy_to": ["biblio_all", "everything"] },
            "release_stage":  { "type": "keyword", "normalizer": "default" },
            "withdrawn_status": { "type": "keyword", "normalizer": "default", "copy_to": ["biblio_all", "everything"] },

            "doi":            { "type": "keyword", "normalizer": "default" },
            "doi_prefix":     { "type": "keyword", "normalizer": "default" },
            "doi_registrar":  { "type": "keyword", "normalizer": "default" },
            "pmid":           { "type": "keyword", "normalizer": "default", "doc_values": false },
            "pmcid":          { "type": "keyword", "normalizer": "default", "doc_values": false },
            "isbn13":         { "type": "keyword", "normalizer": "default", "doc_values": false },
            "wikidata_qid":   { "type": "keyword", "normalizer": "default", "doc_values": false },
            "arxiv_id":       { "type": "keyword", "normalizer": "default", "doc_values": false },
            "jstor_id":       { "type": "keyword", "normalizer": "default", "doc_values": false },
            "doaj_id":        { "type": "keyword", "normalizer": "default", "doc_values": false },
            "dblp_id":        { "type": "keyword", "normalizer": "default", "doc_values": false },
            "oai_id":         { "type": "keyword", "normalizer": "default", "doc_values": false },

            "license_slug":   { "type": "keyword", "normalizer": "default" },
            "container_name":       { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch", "copy_to": ["biblio_all", "everything"] },
            "container_ident":      { "type": "keyword", "normalizer": "default" },
            "container_issnl":      { "type": "keyword", "normalizer": "default" },
            "container_type":       { "type": "keyword", "normalizer": "default" }
          }
        },

        "access": {
          "type": "object",
          "dynamic": false,
          "properties": {
            "access_type":      { "type": "keyword", "normalizer": "default" },
            "access_url":       { "type": "keyword", "normalizer": "default", "doc_values": false },

            "mimetype":         { "type": "keyword", "normalizer": "default" },
            "file_ident":       { "type": "keyword", "normalizer": "default", "doc_values": false },
            "release_ident":    { "type": "keyword", "normalizer": "default", "doc_values": false }
          }
        },

        "everything":     { "type": "text", "index": true, "index_phrases": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch" },
        "biblio_all":     { "type": "text", "index": true, "index_phrases": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch" },
        "title_all":      { "type": "text", "index": true, "index_phrases": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch" },

        "doctype":        { "type": "alias", "path": "doc_type" },
        "tag":            { "type": "alias", "path": "tags" },

        "abstract":       { "type": "alias", "path": "abstracts.body" },
        "acknowledgement":{ "type": "alias", "path": "fulltext.acknowledgement" },
        "access_type":    { "type": "alias", "path": "access.access_type" },

        "doi":            { "type": "alias", "path": "releases.doi" },
        "doi_prefix":     { "type": "alias", "path": "releases.doi_prefix" },
        "doi_registrar":  { "type": "alias", "path": "releases.doi_registrar" },
        "pmid":           { "type": "alias", "path": "releases.pmid" },
        "pmcid":          { "type": "alias", "path": "releases.pmcid" },
        "isbn13":         { "type": "alias", "path": "releases.isbn13" },
        "wikidata_qid":   { "type": "alias", "path": "releases.wikidata_qid" },
        "arxiv_id":       { "type": "alias", "path": "releases.arxiv_id" },
        "jstor_id":       { "type": "alias", "path": "releases.jstor_id" },
        "dblp_id":        { "type": "alias", "path": "releases.dblp_id" },
        "doaj_id":        { "type": "alias", "path": "releases.doaj_id" },
        "oai_id":         { "type": "alias", "path": "releases.oai_id" },

        "title":          { "type": "alias", "path": "title_all" },
        "subtitle":       { "type": "alias", "path": "biblio.subtitle" },
        "original_title": { "type": "alias", "path": "biblio.original_title" },
        "withdrawn_status":{"type": "alias", "path": "biblio.withdrawn_status" },
        "volume":         { "type": "alias", "path": "biblio.volume" },
        "issue":          { "type": "alias", "path": "biblio.issue" },
        "publisher":      { "type": "alias", "path": "biblio.publisher" },
        "affiliation":    { "type": "alias", "path": "biblio.affiliations" },
        "author":         { "type": "alias", "path": "biblio.contrib_names" },
        "journal":        { "type": "alias", "path": "biblio.container_name" },
        "date":           { "type": "alias", "path": "biblio.release_date" },
        "year":           { "type": "alias", "path": "biblio.release_year" },
        "issn":           { "type": "alias", "path": "biblio.issns" },
        "lang":           { "type": "alias", "path": "biblio.lang_code" },
        "stage":          { "type": "alias", "path": "biblio.release_stage" },
        "type":           { "type": "alias", "path": "biblio.release_type" },
        "country":        { "type": "alias", "path": "biblio.country_code" },

        "issue_item":     { "type": "alias", "path": "ia_sim.issue_item" },
        "pub_collection": { "type": "alias", "path": "ia_sim.pub_collection" },
        "sim_pubid":      { "type": "alias", "path": "ia_sim.sim_pubid" }
    }
  }
}
}