summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fatcat_scholar/schema.py1
1 files changed, 1 insertions, 0 deletions
diff --git a/fatcat_scholar/schema.py b/fatcat_scholar/schema.py
index 19f148b..64e9268 100644
--- a/fatcat_scholar/schema.py
+++ b/fatcat_scholar/schema.py
@@ -227,6 +227,7 @@ def scrub_text(raw: str, mimetype: str = None) -> Optional[str]:
text = re.sub(r"[`‘’‛⸂⸃⸌⸍⸜⸝]", "'", text)
text = re.sub(r"[„“]|(\'\')|(,,)", '"', text)
text = re.sub(r"\s+", " ", text).strip()
+ text = text.replace("<em>", "").replace("</em>", "")
# hack to remove abstract prefixes
for prefix in UNWANTED_ABSTRACT_PREFIXES: