diff options
author | Bryan Newbold <bnewbold@archive.org> | 2021-10-27 20:36:46 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2021-10-27 20:36:46 -0700 |
commit | 3e9cdaadf455ab1a9bb7289f7b9fa6c889f24669 (patch) | |
tree | 87800571c10cdad8f958080f19c60fb86109178e /grobid_tei_xml/parse.py | |
parent | 3e3902efe418677054d1711a3de1ab8c4cd2c57c (diff) | |
download | grobid_tei_xml-3e9cdaadf455ab1a9bb7289f7b9fa6c889f24669.tar.gz grobid_tei_xml-3e9cdaadf455ab1a9bb7289f7b9fa6c889f24669.zip |
add more explicit single and multiple citation parsing functions
Diffstat (limited to 'grobid_tei_xml/parse.py')
-rwxr-xr-x | grobid_tei_xml/parse.py | 26 |
1 files changed, 25 insertions, 1 deletions
diff --git a/grobid_tei_xml/parse.py b/grobid_tei_xml/parse.py index cd55f9a..4916b7f 100755 --- a/grobid_tei_xml/parse.py +++ b/grobid_tei_xml/parse.py @@ -270,7 +270,7 @@ def parse_document_xml(xml_text: AnyStr) -> GrobidDocument: return doc -def parse_citations_xml(xml_text: AnyStr) -> List[GrobidBiblio]: +def parse_citation_list_xml(xml_text: AnyStr) -> List[GrobidBiblio]: """ Use this function to parse TEI-XML of one or more references. This should work with either /api/processCitation or /api/processCitationList API @@ -299,3 +299,27 @@ def parse_citations_xml(xml_text: AnyStr) -> List[GrobidBiblio]: ref.index = i refs.append(ref) return refs + + +def parse_citations_xml(xml_text: AnyStr) -> List[GrobidBiblio]: + """ + Alias for `parse_citation_list_xml()` + """ + return parse_citation_list_xml(xml_text=xml_text) + + +def parse_citation_xml(xml_text: AnyStr) -> Optional[GrobidBiblio]: + """ + Parses a single citation. If the result is empty, or only contains the + 'unstructured' field, returns None. + """ + # internally, re-uses parse_citation_list_xml() + citation_list = parse_citation_list_xml(xml_text) + if not citation_list: + return None + citation = citation_list[0] + citation.index = None + if citation.is_empty(): + return None + else: + return citation |