diff options
| author | Bryan Newbold <bnewbold@archive.org> | 2019-11-13 20:54:30 -0800 |
|---|---|---|
| committer | Bryan Newbold <bnewbold@archive.org> | 2019-11-13 20:54:30 -0800 |
| commit | 5e169ce051884d2ebbdcbfde7cdc1d2b2efc4f74 (patch) | |
| tree | 99287be62661a4ea46b1235edafe1d87a56ce3e2 | |
| parent | 889928e49c90776b0203d6612b6d229f9bb7725e (diff) | |
| download | sandcrawler-5e169ce051884d2ebbdcbfde7cdc1d2b2efc4f74.tar.gz sandcrawler-5e169ce051884d2ebbdcbfde7cdc1d2b2efc4f74.zip | |
grobid2json: make lang detection flexible
| -rwxr-xr-x | python/grobid2json.py | 3 |
1 files changed, 2 insertions, 1 deletions
diff --git a/python/grobid2json.py b/python/grobid2json.py index 1f7270c..75fdcba 100755 --- a/python/grobid2json.py +++ b/python/grobid2json.py @@ -146,7 +146,8 @@ def teixml2json(content, encumbered=True): text = tei.find('.//{%s}text' % (ns)) #print(text.attrib) - info['language_code'] = text.attrib['{%s}lang' % xml_ns] # xml:lang + if text.attrib.get('{%s}lang' % xml_ns): + info['language_code'] = text.attrib['{%s}lang' % xml_ns] # xml:lang if encumbered: el = tei.find('.//{%s}profileDesc/{%s}abstract' % (ns, ns)) |
