diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-08-12 12:27:34 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-08-12 12:27:34 -0700 |
commit | 3260b880c0003b7d55bd026b9f1d259256648b0e (patch) | |
tree | 5faed3e7a12c7f065dc86cdeb71232f7a12a02dc /tests/test_scrub.py | |
parent | 02c93dd1b58d9ec26a6835ebd5e0ebbffb4a28cb (diff) | |
download | fatcat-scholar-3260b880c0003b7d55bd026b9f1d259256648b0e.tar.gz fatcat-scholar-3260b880c0003b7d55bd026b9f1d259256648b0e.zip |
transform: more string cleaning
Diffstat (limited to 'tests/test_scrub.py')
-rw-r--r-- | tests/test_scrub.py | 20 |
1 files changed, 19 insertions, 1 deletions
diff --git a/tests/test_scrub.py b/tests/test_scrub.py index 6541e13..b142c10 100644 --- a/tests/test_scrub.py +++ b/tests/test_scrub.py @@ -1,4 +1,4 @@ -from fatcat_scholar.schema import scrub_text +from fatcat_scholar.schema import scrub_text, clean_str def test_scrub() -> None: @@ -12,3 +12,21 @@ def test_scrub() -> None: for raw, fixed in vectors: assert fixed == scrub_text(raw) + + +def test_clean_str() -> None: + vectors = [ + ( + "Di� Hekimli�i Fak�ltesi ��rencilerinde Temporomandibular Eklem Rahats�zl�klar�n�n ve A��z Sa�l��� Al��kanl�klar�n�n De�erlendirilmesi", + "Di� Hekimli�i Fak�ltesi ��rencilerinde Temporomandibular Eklem Rahats�zl�klar�n�n ve A��z Sa�l��� Al��kanl�klar�n�n De�erlendirilmesi", + ), + ("<jats:p>blah thing", "blah thing"), + ("title with <i>italics</i>", "title with italics"), + ("title with <sup>partial super", "title with partial super"), + ("", None), + ("&NA", None), + (None, None), + ] + + for raw, fixed in vectors: + assert fixed == clean_str(raw) |