aboutsummaryrefslogtreecommitdiffstats
path: root/python/tests/test_grobid2json.py
blob: b00a88d7aa3fecd9cbdbf6c7ab9f969c53832283 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import json
import xml

import pytest
from grobid_tei_xml import parse_document_xml


def test_small_xml():
    """
    This used to be a test of grobid2json; now it is a compatability test for
    the to_legacy_dict() feature of grobid_tei_xml.
    """

    with open("tests/files/small.xml", "r") as f:
        tei_xml = f.read()
    with open("tests/files/small.json", "r") as f:
        json_form = json.loads(f.read())

    tei_doc = parse_document_xml(tei_xml)
    assert tei_doc.to_legacy_dict() == json_form


def test_invalid_xml():

    with pytest.raises(xml.etree.ElementTree.ParseError):
        parse_document_xml("this is not XML")
    with pytest.raises(ValueError):
        parse_document_xml("<xml></xml>")