diff options
-rw-r--r-- | fatcat_scholar/schema.py | 1 |
1 files changed, 1 insertions, 0 deletions
diff --git a/fatcat_scholar/schema.py b/fatcat_scholar/schema.py index 19f148b..64e9268 100644 --- a/fatcat_scholar/schema.py +++ b/fatcat_scholar/schema.py @@ -227,6 +227,7 @@ def scrub_text(raw: str, mimetype: str = None) -> Optional[str]: text = re.sub(r"[`‘’‛⸂⸃⸌⸍⸜⸝]", "'", text) text = re.sub(r"[„“]|(\'\')|(,,)", '"', text) text = re.sub(r"\s+", " ", text).strip() + text = text.replace("<em>", "").replace("</em>", "") # hack to remove abstract prefixes for prefix in UNWANTED_ABSTRACT_PREFIXES: |