summaryrefslogtreecommitdiffstats
path: root/tests/test_scrub.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-08-12 12:27:34 -0700
committerBryan Newbold <bnewbold@archive.org>2020-08-12 12:27:34 -0700
commit3260b880c0003b7d55bd026b9f1d259256648b0e (patch)
tree5faed3e7a12c7f065dc86cdeb71232f7a12a02dc /tests/test_scrub.py
parent02c93dd1b58d9ec26a6835ebd5e0ebbffb4a28cb (diff)
downloadfatcat-scholar-3260b880c0003b7d55bd026b9f1d259256648b0e.tar.gz
fatcat-scholar-3260b880c0003b7d55bd026b9f1d259256648b0e.zip
transform: more string cleaning
Diffstat (limited to 'tests/test_scrub.py')
-rw-r--r--tests/test_scrub.py20
1 files changed, 19 insertions, 1 deletions
diff --git a/tests/test_scrub.py b/tests/test_scrub.py
index 6541e13..b142c10 100644
--- a/tests/test_scrub.py
+++ b/tests/test_scrub.py
@@ -1,4 +1,4 @@
-from fatcat_scholar.schema import scrub_text
+from fatcat_scholar.schema import scrub_text, clean_str
def test_scrub() -> None:
@@ -12,3 +12,21 @@ def test_scrub() -> None:
for raw, fixed in vectors:
assert fixed == scrub_text(raw)
+
+
+def test_clean_str() -> None:
+ vectors = [
+ (
+ "Di� Hekimli�i Fak�ltesi ��rencilerinde Temporomandibular Eklem Rahats�zl�klar�n�n ve A��z Sa�l��� Al��kanl�klar�n�n De�erlendirilmesi",
+ "Di� Hekimli�i Fak�ltesi ��rencilerinde Temporomandibular Eklem Rahats�zl�klar�n�n ve A��z Sa�l��� Al��kanl�klar�n�n De�erlendirilmesi",
+ ),
+ ("<jats:p>blah thing", "blah thing"),
+ ("title with <i>italics</i>", "title with italics"),
+ ("title with <sup>partial super", "title with partial super"),
+ ("", None),
+ ("&NA", None),
+ (None, None),
+ ]
+
+ for raw, fixed in vectors:
+ assert fixed == clean_str(raw)