diff options
Diffstat (limited to 'python/grobid2json.py')
-rwxr-xr-x | python/grobid2json.py | 5 |
1 files changed, 5 insertions, 0 deletions
diff --git a/python/grobid2json.py b/python/grobid2json.py index e5af9d3..ae31486 100755 --- a/python/grobid2json.py +++ b/python/grobid2json.py @@ -28,6 +28,7 @@ import json import argparse import xml.etree.ElementTree as ET +xml_ns = "http://www.w3.org/XML/1998/namespace" ns = "http://www.tei-c.org/ns/1.0" def all_authors(elem): @@ -143,6 +144,10 @@ def teixml2json(content, encumbered=True): refs.append(ref) info['citations'] = refs + text = tei.find('.//{%s}text' % (ns)) + print(text.attrib) + info['language_code'] = text.attrib['{%s}lang' % xml_ns] # xml:lang + if encumbered: el = tei.find('.//{%s}profileDesc/{%s}abstract' % (ns, ns)) info['abstract'] = (el or None) and " ".join(el.itertext()).strip() |