From fee17cf6518e13b6f1c3945dd769aba56d7606d5 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Thu, 21 May 2020 20:23:45 -0700 Subject: HTML strip in ES indexing --- schema/scholar_fulltext.v01.json | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/schema/scholar_fulltext.v01.json b/schema/scholar_fulltext.v01.json index 23cadf2..7653633 100644 --- a/schema/scholar_fulltext.v01.json +++ b/schema/scholar_fulltext.v01.json @@ -11,25 +11,25 @@ "textIcu": { "type": "custom", "tokenizer": "icu_tokenizer", - "char_filter": [ "icu_normalizer" ], + "char_filter": [ "html_strip", "icu_normalizer" ], "filter": [ "icu_folding", "stemmer" ] }, "textIcuSearch": { "type": "custom", "tokenizer": "icu_tokenizer", - "char_filter": [ "icu_normalizer" ], + "char_filter": [ "html_strip", "icu_normalizer" ], "filter": [ "icu_folding", "stemmer" ] }, "textIcuExact": { "type": "custom", "tokenizer": "icu_tokenizer", - "char_filter": [ "icu_normalizer" ], + "char_filter": [ "html_strip", "icu_normalizer" ], "filter": [ "icu_folding" ] }, "textIcuSearchExact": { "type": "custom", "tokenizer": "icu_tokenizer", - "char_filter": [ "icu_normalizer" ], + "char_filter": [ "html_strip", "icu_normalizer" ], "filter": [ "icu_folding" ] } }, -- cgit v1.2.3