summaryrefslogtreecommitdiffstats
path: root/tests/test_scrub.py
blob: b142c107f7055a1cbf29d01562e07ba487f86fd4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
from fatcat_scholar.schema import scrub_text, clean_str


def test_scrub() -> None:
    vectors = [
        (
            "“Please clean this piece… of text</b>„",
            '"Please clean this piece... of text"',
        ),
        ("<jats:p>blah thing", "blah thing"),
    ]

    for raw, fixed in vectors:
        assert fixed == scrub_text(raw)


def test_clean_str() -> None:
    vectors = [
        (
            "Di� Hekimli�i Fak�ltesi ��rencilerinde Temporomandibular Eklem Rahats�zl�klar�n�n ve A��z Sa�l��� Al��kanl�klar�n�n De�erlendirilmesi",
            "Di� Hekimli�i Fak�ltesi ��rencilerinde Temporomandibular Eklem Rahats�zl�klar�n�n ve A��z Sa�l��� Al��kanl�klar�n�n De�erlendirilmesi",
        ),
        ("<jats:p>blah thing", "blah thing"),
        ("title with <i>italics</i>", "title with italics"),
        ("title with <sup>partial super", "title with partial super"),
        ("", None),
        ("&NA", None),
        (None, None),
    ]

    for raw, fixed in vectors:
        assert fixed == clean_str(raw)