From 60f29aaa1efd90628c6a6dc503e23d694e0389ce Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 10 Apr 2018 17:26:40 -0700 Subject: grobid2json test fixes --- mapreduce/grobid2json.py | 2 ++ mapreduce/tests/test_grobid2json.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'mapreduce') diff --git a/mapreduce/grobid2json.py b/mapreduce/grobid2json.py index c1ff1f1..ca460f8 100755 --- a/mapreduce/grobid2json.py +++ b/mapreduce/grobid2json.py @@ -95,6 +95,8 @@ def teixml2json(content, encumbered=True): tei = tree.getroot() header = tei.find('.//{%s}teiHeader' % ns) + if header is None: + raise ValueError("XML does not look like TEI format") info['title'] = header.findtext('.//{%s}analytic/{%s}title' % (ns, ns)) info['authors'] = all_authors(header.find('.//{%s}sourceDesc/{%s}biblStruct' % (ns, ns))) info['journal'] = journal_info(header) diff --git a/mapreduce/tests/test_grobid2json.py b/mapreduce/tests/test_grobid2json.py index 1562006..8497b10 100644 --- a/mapreduce/tests/test_grobid2json.py +++ b/mapreduce/tests/test_grobid2json.py @@ -18,5 +18,5 @@ def test_invalid_xml(): with pytest.raises(xml.etree.ElementTree.ParseError): teixml2json("this is not XML") - with pytest.raises(xml.etree.ElementTree.ParseError): + with pytest.raises(ValueError): teixml2json("") -- cgit v1.2.3