diff options
Diffstat (limited to 'grobid_tei_xml/parse.py')
-rwxr-xr-x | grobid_tei_xml/parse.py | 2 |
1 files changed, 2 insertions, 0 deletions
diff --git a/grobid_tei_xml/parse.py b/grobid_tei_xml/parse.py index 32c5d0f..bbe383f 100755 --- a/grobid_tei_xml/parse.py +++ b/grobid_tei_xml/parse.py @@ -173,6 +173,7 @@ def parse_document_xml(xml_text: AnyStr) -> GrobidDocument: grobid_version=application_tag.attrib["version"].strip(), grobid_timestamp=application_tag.attrib["when"].strip(), header=_parse_header(header), + # TODO: pdf_md5=header.findtext(f'.//{{{ns}}}idno[@type="MD5"]') or None, ) refs = [] @@ -183,6 +184,7 @@ def parse_document_xml(xml_text: AnyStr) -> GrobidDocument: refs.append(ref) doc.citations = refs + text = tei.find(f".//{{{ns}}}text") # print(text.attrib) if text and text.attrib.get(f"{{{xml_ns}}}lang"): |