1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
|
from fatcat_scholar.schema import clean_str, scrub_text
def test_scrub() -> None:
vectors = [
(
"“Please clean this piece… of text</b>„",
'"Please clean this piece... of text"',
),
("<jats:p>blah thing", "blah thing"),
]
for raw, fixed in vectors:
assert fixed == scrub_text(raw)
def test_clean_str() -> None:
vectors = [
(
"Di� Hekimli�i Fak�ltesi ��rencilerinde Temporomandibular Eklem Rahats�zl�klar�n�n ve A��z Sa�l��� Al��kanl�klar�n�n De�erlendirilmesi",
"Di� Hekimli�i Fak�ltesi ��rencilerinde Temporomandibular Eklem Rahats�zl�klar�n�n ve A��z Sa�l��� Al��kanl�klar�n�n De�erlendirilmesi",
),
("<jats:p>blah thing", "blah thing"),
("title with <i>italics</i>", "title with italics"),
("title with <sup>partial super", "title with partial super"),
("", None),
("&NA", None),
(None, None),
(
"CO<SUB>2</SUB>レーザー光線及びYAGレーザー光線の気管線毛に対する影響について",
"CO2レーザー光線及びYAGレーザー光線の気管線毛に対する影響について",
),
]
for raw, fixed in vectors:
assert fixed == clean_str(raw)
|