From 5e169ce051884d2ebbdcbfde7cdc1d2b2efc4f74 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 13 Nov 2019 20:54:30 -0800 Subject: grobid2json: make lang detection flexible --- python/grobid2json.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'python') diff --git a/python/grobid2json.py b/python/grobid2json.py index 1f7270c..75fdcba 100755 --- a/python/grobid2json.py +++ b/python/grobid2json.py @@ -146,7 +146,8 @@ def teixml2json(content, encumbered=True): text = tei.find('.//{%s}text' % (ns)) #print(text.attrib) - info['language_code'] = text.attrib['{%s}lang' % xml_ns] # xml:lang + if text.attrib.get('{%s}lang' % xml_ns): + info['language_code'] = text.attrib['{%s}lang' % xml_ns] # xml:lang if encumbered: el = tei.find('.//{%s}profileDesc/{%s}abstract' % (ns, ns)) -- cgit v1.2.3