aboutsummaryrefslogtreecommitdiffstats
path: root/python/tests/test_xml.py
blob: 1742f3a9bc664945a2e0179de4555c118240bb63 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
import pytest

from sandcrawler.xml import xml_reserialize


def test_xml_reserialize() -> None:

    with open('tests/files/scielo_article.jats.xml', 'rb') as f:
        raw_xml = f.read()

    assert b'encoding="ISO-8859-1"' in raw_xml
    raw_xml.decode("ISO-8859-1")
    with pytest.raises(UnicodeDecodeError):
        raw_xml.decode("utf-8")

    str_xml = xml_reserialize(raw_xml)
    assert 'encoding="UTF-8"' in str_xml