From f9cc2c2d40c2083549c333064582183c96162e05 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 21 Jul 2020 17:05:05 -0700 Subject: strip tags explicitly --- fatcat_scholar/schema.py | 1 + 1 file changed, 1 insertion(+) diff --git a/fatcat_scholar/schema.py b/fatcat_scholar/schema.py index 19f148b..64e9268 100644 --- a/fatcat_scholar/schema.py +++ b/fatcat_scholar/schema.py @@ -227,6 +227,7 @@ def scrub_text(raw: str, mimetype: str = None) -> Optional[str]: text = re.sub(r"[`‘’‛⸂⸃⸌⸍⸜⸝]", "'", text) text = re.sub(r"[„“]|(\'\')|(,,)", '"', text) text = re.sub(r"\s+", " ", text).strip() + text = text.replace("", "").replace("", "") # hack to remove abstract prefixes for prefix in UNWANTED_ABSTRACT_PREFIXES: -- cgit v1.2.3