aboutsummaryrefslogtreecommitdiffstats
path: root/mapreduce/grobid2json.py
diff options
context:
space:
mode:
Diffstat (limited to 'mapreduce/grobid2json.py')
-rwxr-xr-xmapreduce/grobid2json.py2
1 files changed, 2 insertions, 0 deletions
diff --git a/mapreduce/grobid2json.py b/mapreduce/grobid2json.py
index c1ff1f1..ca460f8 100755
--- a/mapreduce/grobid2json.py
+++ b/mapreduce/grobid2json.py
@@ -95,6 +95,8 @@ def teixml2json(content, encumbered=True):
tei = tree.getroot()
header = tei.find('.//{%s}teiHeader' % ns)
+ if header is None:
+ raise ValueError("XML does not look like TEI format")
info['title'] = header.findtext('.//{%s}analytic/{%s}title' % (ns, ns))
info['authors'] = all_authors(header.find('.//{%s}sourceDesc/{%s}biblStruct' % (ns, ns)))
info['journal'] = journal_info(header)