diff options
author | Bryan Newbold <bnewbold@archive.org> | 2019-09-25 17:51:07 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2019-09-25 17:51:07 -0700 |
commit | d7830b4a5aad0a59a588e98798711f0e694d50d6 (patch) | |
tree | 7565cbec74584a146b8ee79bb881fa9f78851f60 /python_hadoop/tests/test_grobid2json.py | |
parent | 6e24eec4b6d1861eba37a0a05220b257e829ebbb (diff) | |
download | sandcrawler-d7830b4a5aad0a59a588e98798711f0e694d50d6.tar.gz sandcrawler-d7830b4a5aad0a59a588e98798711f0e694d50d6.zip |
refactor old python hadoop code into new directory
Diffstat (limited to 'python_hadoop/tests/test_grobid2json.py')
-rw-r--r-- | python_hadoop/tests/test_grobid2json.py | 22 |
1 files changed, 22 insertions, 0 deletions
diff --git a/python_hadoop/tests/test_grobid2json.py b/python_hadoop/tests/test_grobid2json.py new file mode 100644 index 0000000..8497b10 --- /dev/null +++ b/python_hadoop/tests/test_grobid2json.py @@ -0,0 +1,22 @@ + +import xml +import json +import pytest +from grobid2json import * + + +def test_small_xml(): + + with open('tests/files/small.xml', 'r') as f: + tei_xml = f.read() + with open('tests/files/small.json', 'r') as f: + json_form = json.loads(f.read()) + + assert teixml2json(tei_xml) == json_form + +def test_invalid_xml(): + + with pytest.raises(xml.etree.ElementTree.ParseError): + teixml2json("this is not XML") + with pytest.raises(ValueError): + teixml2json("<xml></xml>") |