aboutsummaryrefslogtreecommitdiffstats
path: root/python/grobid2json.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/grobid2json.py')
-rwxr-xr-xpython/grobid2json.py5
1 files changed, 5 insertions, 0 deletions
diff --git a/python/grobid2json.py b/python/grobid2json.py
index e5af9d3..ae31486 100755
--- a/python/grobid2json.py
+++ b/python/grobid2json.py
@@ -28,6 +28,7 @@ import json
import argparse
import xml.etree.ElementTree as ET
+xml_ns = "http://www.w3.org/XML/1998/namespace"
ns = "http://www.tei-c.org/ns/1.0"
def all_authors(elem):
@@ -143,6 +144,10 @@ def teixml2json(content, encumbered=True):
refs.append(ref)
info['citations'] = refs
+ text = tei.find('.//{%s}text' % (ns))
+ print(text.attrib)
+ info['language_code'] = text.attrib['{%s}lang' % xml_ns] # xml:lang
+
if encumbered:
el = tei.find('.//{%s}profileDesc/{%s}abstract' % (ns, ns))
info['abstract'] = (el or None) and " ".join(el.itertext()).strip()