aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-06-23 18:38:33 -0700
committerBryan Newbold <bnewbold@archive.org>2020-06-23 18:38:33 -0700
commit9c0e5d714ea9fedeca64f1ceb5b47f67438629af (patch)
tree7cee18d19d5590b79551308e46acec02174a0322
parent73197cc91df821ee5ad36ac553b239c4467b793c (diff)
downloadchocula-9c0e5d714ea9fedeca64f1ceb5b47f67438629af.tar.gz
chocula-9c0e5d714ea9fedeca64f1ceb5b47f67438629af.zip
strip control characters from titles (issn_meta)
-rw-r--r--chocula/util.py4
1 files changed, 4 insertions, 0 deletions
diff --git a/chocula/util.py b/chocula/util.py
index 894af98..c2466cd 100644
--- a/chocula/util.py
+++ b/chocula/util.py
@@ -279,6 +279,10 @@ def clean_str(s: Optional[str]) -> Optional[str]:
if not s:
return None
s = unquote(ftfy.fix_text(s))
+ # these unicode characters are used by, eg, ISSN portal to mare prefixes as
+ # non-sorting
+ s.replace("\u02dc", "")
+ s.replace("\u0153", "")
return s or None