aboutsummaryrefslogtreecommitdiffstats
path: root/grobid_tei_xml/parse.py
diff options
context:
space:
mode:
Diffstat (limited to 'grobid_tei_xml/parse.py')
-rwxr-xr-xgrobid_tei_xml/parse.py2
1 files changed, 2 insertions, 0 deletions
diff --git a/grobid_tei_xml/parse.py b/grobid_tei_xml/parse.py
index 32c5d0f..bbe383f 100755
--- a/grobid_tei_xml/parse.py
+++ b/grobid_tei_xml/parse.py
@@ -173,6 +173,7 @@ def parse_document_xml(xml_text: AnyStr) -> GrobidDocument:
grobid_version=application_tag.attrib["version"].strip(),
grobid_timestamp=application_tag.attrib["when"].strip(),
header=_parse_header(header),
+ # TODO: pdf_md5=header.findtext(f'.//{{{ns}}}idno[@type="MD5"]') or None,
)
refs = []
@@ -183,6 +184,7 @@ def parse_document_xml(xml_text: AnyStr) -> GrobidDocument:
refs.append(ref)
doc.citations = refs
+
text = tei.find(f".//{{{ns}}}text")
# print(text.attrib)
if text and text.attrib.get(f"{{{xml_ns}}}lang"):