diff options
author | Bryan Newbold <bnewbold@archive.org> | 2018-04-06 14:17:30 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2018-04-06 14:17:30 -0700 |
commit | 82cbabc249e5c0375dbb82106451969467f8da00 (patch) | |
tree | 0369d2a2f834b18a051da0d7f41c1625543894ab /mapreduce/tests/test_grobid2json.py | |
parent | 159d049fe072d38c0297dea6b4ed82a37eff36b7 (diff) | |
download | sandcrawler-82cbabc249e5c0375dbb82106451969467f8da00.tar.gz sandcrawler-82cbabc249e5c0375dbb82106451969467f8da00.zip |
add test for grobid2json
Diffstat (limited to 'mapreduce/tests/test_grobid2json.py')
-rw-r--r-- | mapreduce/tests/test_grobid2json.py | 14 |
1 files changed, 14 insertions, 0 deletions
diff --git a/mapreduce/tests/test_grobid2json.py b/mapreduce/tests/test_grobid2json.py new file mode 100644 index 0000000..40853dd --- /dev/null +++ b/mapreduce/tests/test_grobid2json.py @@ -0,0 +1,14 @@ + +import json +import pytest +from grobid2json import * + + +def test_small_xml(): + + with open('tests/files/small.xml', 'r') as f: + tei_xml = f.read() + with open('tests/files/small.json', 'r') as f: + json_form = json.loads(f.read()) + + assert teixml2json(tei_xml) == json_form |