diff options
Diffstat (limited to 'tests/test_parse.py')
-rw-r--r-- | tests/test_parse.py | 44 |
1 files changed, 41 insertions, 3 deletions
diff --git a/tests/test_parse.py b/tests/test_parse.py index 25529c4..70dcc98 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -5,7 +5,14 @@ import xml.etree.ElementTree import pytest -from grobid_tei_xml import GrobidBiblio, GrobidDocument, parse_citations_xml, parse_document_xml +from grobid_tei_xml import ( + GrobidBiblio, + GrobidDocument, + parse_citation_list_xml, + parse_citation_xml, + parse_citations_xml, + parse_document_xml, +) from grobid_tei_xml.types import * @@ -198,7 +205,8 @@ def test_single_citations_xml() -> None: </monogr> </biblStruct>""" - d = parse_citations_xml(citation_xml)[0] + d = parse_citation_xml(citation_xml) + assert d assert ( d.title == """Mesh migration following abdominal hernia repair: a comprehensive review""" @@ -217,13 +225,21 @@ def test_single_citations_xml() -> None: assert d.issue == "2" assert d.journal == "Hernia" + d2 = parse_citations_xml(citation_xml)[0] + assert d.title == d2.title + assert d.authors == d2.authors + def test_citation_list_xml() -> None: with open("tests/files/example_citation_list.xml", "r") as f: tei_xml = f.read() - citations = parse_citations_xml(tei_xml) + citations = parse_citation_list_xml(tei_xml) + + # verify that old function still works + assert citations == parse_citations_xml(tei_xml) + assert len(citations) == 13 assert citations[3].note == "The Research Handbook on International Environmental Law" @@ -300,3 +316,25 @@ def test_grobid_070_document() -> None: c.to_csl_dict() c.to_dict() c.to_legacy_dict() + + +def test_empty_citations() -> None: + + with open("tests/files/empty_citation_unstructured.tei.xml", "r") as f: + mostly_empty_xml = f.read() + + with open("tests/files/empty_citation.tei.xml", "r") as f: + empty_xml = f.read() + + assert parse_citation_xml(empty_xml) is None + assert parse_citation_xml(mostly_empty_xml) is None + + d = parse_citation_list_xml(empty_xml) + assert d + assert d[0].index == 0 + assert d[0].unstructured is None + + d2 = parse_citation_list_xml(mostly_empty_xml) + assert d2 + assert d2[0].index == 0 + assert d2[0].unstructured == "blah" |