diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-05-21 20:23:45 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-05-21 20:23:45 -0700 |
commit | fee17cf6518e13b6f1c3945dd769aba56d7606d5 (patch) | |
tree | 17f695c71bb615768392b31b71336cb8bc169a8b /schema | |
parent | 9eee6f9aef0469d81e57543f0488254c39ac2b66 (diff) | |
download | fatcat-scholar-fee17cf6518e13b6f1c3945dd769aba56d7606d5.tar.gz fatcat-scholar-fee17cf6518e13b6f1c3945dd769aba56d7606d5.zip |
HTML strip in ES indexing
Diffstat (limited to 'schema')
-rw-r--r-- | schema/scholar_fulltext.v01.json | 8 |
1 files changed, 4 insertions, 4 deletions
diff --git a/schema/scholar_fulltext.v01.json b/schema/scholar_fulltext.v01.json index 23cadf2..7653633 100644 --- a/schema/scholar_fulltext.v01.json +++ b/schema/scholar_fulltext.v01.json @@ -11,25 +11,25 @@ "textIcu": { "type": "custom", "tokenizer": "icu_tokenizer", - "char_filter": [ "icu_normalizer" ], + "char_filter": [ "html_strip", "icu_normalizer" ], "filter": [ "icu_folding", "stemmer" ] }, "textIcuSearch": { "type": "custom", "tokenizer": "icu_tokenizer", - "char_filter": [ "icu_normalizer" ], + "char_filter": [ "html_strip", "icu_normalizer" ], "filter": [ "icu_folding", "stemmer" ] }, "textIcuExact": { "type": "custom", "tokenizer": "icu_tokenizer", - "char_filter": [ "icu_normalizer" ], + "char_filter": [ "html_strip", "icu_normalizer" ], "filter": [ "icu_folding" ] }, "textIcuSearchExact": { "type": "custom", "tokenizer": "icu_tokenizer", - "char_filter": [ "icu_normalizer" ], + "char_filter": [ "html_strip", "icu_normalizer" ], "filter": [ "icu_folding" ] } }, |