From d7830b4a5aad0a59a588e98798711f0e694d50d6 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 25 Sep 2019 17:51:07 -0700 Subject: refactor old python hadoop code into new directory --- python_hadoop/tests/test_grobid2json.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 python_hadoop/tests/test_grobid2json.py (limited to 'python_hadoop/tests/test_grobid2json.py') diff --git a/python_hadoop/tests/test_grobid2json.py b/python_hadoop/tests/test_grobid2json.py new file mode 100644 index 0000000..8497b10 --- /dev/null +++ b/python_hadoop/tests/test_grobid2json.py @@ -0,0 +1,22 @@ + +import xml +import json +import pytest +from grobid2json import * + + +def test_small_xml(): + + with open('tests/files/small.xml', 'r') as f: + tei_xml = f.read() + with open('tests/files/small.json', 'r') as f: + json_form = json.loads(f.read()) + + assert teixml2json(tei_xml) == json_form + +def test_invalid_xml(): + + with pytest.raises(xml.etree.ElementTree.ParseError): + teixml2json("this is not XML") + with pytest.raises(ValueError): + teixml2json("") -- cgit v1.2.3