summaryrefslogtreecommitdiffstats
path: root/tests/test_parse.py
diff options
context:
space:
mode:
Diffstat (limited to 'tests/test_parse.py')
-rw-r--r--tests/test_parse.py44
1 files changed, 41 insertions, 3 deletions
diff --git a/tests/test_parse.py b/tests/test_parse.py
index 25529c4..70dcc98 100644
--- a/tests/test_parse.py
+++ b/tests/test_parse.py
@@ -5,7 +5,14 @@ import xml.etree.ElementTree
import pytest
-from grobid_tei_xml import GrobidBiblio, GrobidDocument, parse_citations_xml, parse_document_xml
+from grobid_tei_xml import (
+ GrobidBiblio,
+ GrobidDocument,
+ parse_citation_list_xml,
+ parse_citation_xml,
+ parse_citations_xml,
+ parse_document_xml,
+)
from grobid_tei_xml.types import *
@@ -198,7 +205,8 @@ def test_single_citations_xml() -> None:
</monogr>
</biblStruct>"""
- d = parse_citations_xml(citation_xml)[0]
+ d = parse_citation_xml(citation_xml)
+ assert d
assert (
d.title
== """Mesh migration following abdominal hernia repair: a comprehensive review"""
@@ -217,13 +225,21 @@ def test_single_citations_xml() -> None:
assert d.issue == "2"
assert d.journal == "Hernia"
+ d2 = parse_citations_xml(citation_xml)[0]
+ assert d.title == d2.title
+ assert d.authors == d2.authors
+
def test_citation_list_xml() -> None:
with open("tests/files/example_citation_list.xml", "r") as f:
tei_xml = f.read()
- citations = parse_citations_xml(tei_xml)
+ citations = parse_citation_list_xml(tei_xml)
+
+ # verify that old function still works
+ assert citations == parse_citations_xml(tei_xml)
+
assert len(citations) == 13
assert citations[3].note == "The Research Handbook on International Environmental Law"
@@ -300,3 +316,25 @@ def test_grobid_070_document() -> None:
c.to_csl_dict()
c.to_dict()
c.to_legacy_dict()
+
+
+def test_empty_citations() -> None:
+
+ with open("tests/files/empty_citation_unstructured.tei.xml", "r") as f:
+ mostly_empty_xml = f.read()
+
+ with open("tests/files/empty_citation.tei.xml", "r") as f:
+ empty_xml = f.read()
+
+ assert parse_citation_xml(empty_xml) is None
+ assert parse_citation_xml(mostly_empty_xml) is None
+
+ d = parse_citation_list_xml(empty_xml)
+ assert d
+ assert d[0].index == 0
+ assert d[0].unstructured is None
+
+ d2 = parse_citation_list_xml(mostly_empty_xml)
+ assert d2
+ assert d2[0].index == 0
+ assert d2[0].unstructured == "blah"