summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-05-21 20:23:45 -0700
committerBryan Newbold <bnewbold@archive.org>2020-05-21 20:23:45 -0700
commitfee17cf6518e13b6f1c3945dd769aba56d7606d5 (patch)
tree17f695c71bb615768392b31b71336cb8bc169a8b
parent9eee6f9aef0469d81e57543f0488254c39ac2b66 (diff)
downloadfatcat-scholar-fee17cf6518e13b6f1c3945dd769aba56d7606d5.tar.gz
fatcat-scholar-fee17cf6518e13b6f1c3945dd769aba56d7606d5.zip
HTML strip in ES indexing
-rw-r--r--schema/scholar_fulltext.v01.json8
1 files changed, 4 insertions, 4 deletions
diff --git a/schema/scholar_fulltext.v01.json b/schema/scholar_fulltext.v01.json
index 23cadf2..7653633 100644
--- a/schema/scholar_fulltext.v01.json
+++ b/schema/scholar_fulltext.v01.json
@@ -11,25 +11,25 @@
"textIcu": {
"type": "custom",
"tokenizer": "icu_tokenizer",
- "char_filter": [ "icu_normalizer" ],
+ "char_filter": [ "html_strip", "icu_normalizer" ],
"filter": [ "icu_folding", "stemmer" ]
},
"textIcuSearch": {
"type": "custom",
"tokenizer": "icu_tokenizer",
- "char_filter": [ "icu_normalizer" ],
+ "char_filter": [ "html_strip", "icu_normalizer" ],
"filter": [ "icu_folding", "stemmer" ]
},
"textIcuExact": {
"type": "custom",
"tokenizer": "icu_tokenizer",
- "char_filter": [ "icu_normalizer" ],
+ "char_filter": [ "html_strip", "icu_normalizer" ],
"filter": [ "icu_folding" ]
},
"textIcuSearchExact": {
"type": "custom",
"tokenizer": "icu_tokenizer",
- "char_filter": [ "icu_normalizer" ],
+ "char_filter": [ "html_strip", "icu_normalizer" ],
"filter": [ "icu_folding" ]
}
},