aboutsummaryrefslogtreecommitdiffstats
path: root/grobid_tei_xml/parse.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-10-27 20:36:46 -0700
committerBryan Newbold <bnewbold@archive.org>2021-10-27 20:36:46 -0700
commit3e9cdaadf455ab1a9bb7289f7b9fa6c889f24669 (patch)
tree87800571c10cdad8f958080f19c60fb86109178e /grobid_tei_xml/parse.py
parent3e3902efe418677054d1711a3de1ab8c4cd2c57c (diff)
downloadgrobid_tei_xml-3e9cdaadf455ab1a9bb7289f7b9fa6c889f24669.tar.gz
grobid_tei_xml-3e9cdaadf455ab1a9bb7289f7b9fa6c889f24669.zip
add more explicit single and multiple citation parsing functions
Diffstat (limited to 'grobid_tei_xml/parse.py')
-rwxr-xr-xgrobid_tei_xml/parse.py26
1 files changed, 25 insertions, 1 deletions
diff --git a/grobid_tei_xml/parse.py b/grobid_tei_xml/parse.py
index cd55f9a..4916b7f 100755
--- a/grobid_tei_xml/parse.py
+++ b/grobid_tei_xml/parse.py
@@ -270,7 +270,7 @@ def parse_document_xml(xml_text: AnyStr) -> GrobidDocument:
return doc
-def parse_citations_xml(xml_text: AnyStr) -> List[GrobidBiblio]:
+def parse_citation_list_xml(xml_text: AnyStr) -> List[GrobidBiblio]:
"""
Use this function to parse TEI-XML of one or more references. This should
work with either /api/processCitation or /api/processCitationList API
@@ -299,3 +299,27 @@ def parse_citations_xml(xml_text: AnyStr) -> List[GrobidBiblio]:
ref.index = i
refs.append(ref)
return refs
+
+
+def parse_citations_xml(xml_text: AnyStr) -> List[GrobidBiblio]:
+ """
+ Alias for `parse_citation_list_xml()`
+ """
+ return parse_citation_list_xml(xml_text=xml_text)
+
+
+def parse_citation_xml(xml_text: AnyStr) -> Optional[GrobidBiblio]:
+ """
+ Parses a single citation. If the result is empty, or only contains the
+ 'unstructured' field, returns None.
+ """
+ # internally, re-uses parse_citation_list_xml()
+ citation_list = parse_citation_list_xml(xml_text)
+ if not citation_list:
+ return None
+ citation = citation_list[0]
+ citation.index = None
+ if citation.is_empty():
+ return None
+ else:
+ return citation