diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-06-23 18:38:33 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-06-23 18:38:33 -0700 |
commit | 9c0e5d714ea9fedeca64f1ceb5b47f67438629af (patch) | |
tree | 7cee18d19d5590b79551308e46acec02174a0322 | |
parent | 73197cc91df821ee5ad36ac553b239c4467b793c (diff) | |
download | chocula-9c0e5d714ea9fedeca64f1ceb5b47f67438629af.tar.gz chocula-9c0e5d714ea9fedeca64f1ceb5b47f67438629af.zip |
strip control characters from titles (issn_meta)
-rw-r--r-- | chocula/util.py | 4 |
1 files changed, 4 insertions, 0 deletions
diff --git a/chocula/util.py b/chocula/util.py index 894af98..c2466cd 100644 --- a/chocula/util.py +++ b/chocula/util.py @@ -279,6 +279,10 @@ def clean_str(s: Optional[str]) -> Optional[str]: if not s: return None s = unquote(ftfy.fix_text(s)) + # these unicode characters are used by, eg, ISSN portal to mare prefixes as + # non-sorting + s.replace("\u02dc", "") + s.replace("\u0153", "") return s or None |