diff options
Diffstat (limited to 'tests/test_parse.py')
| -rw-r--r-- | tests/test_parse.py | 44 | 
1 files changed, 41 insertions, 3 deletions
| diff --git a/tests/test_parse.py b/tests/test_parse.py index 25529c4..70dcc98 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -5,7 +5,14 @@ import xml.etree.ElementTree  import pytest -from grobid_tei_xml import GrobidBiblio, GrobidDocument, parse_citations_xml, parse_document_xml +from grobid_tei_xml import ( +    GrobidBiblio, +    GrobidDocument, +    parse_citation_list_xml, +    parse_citation_xml, +    parse_citations_xml, +    parse_document_xml, +)  from grobid_tei_xml.types import * @@ -198,7 +205,8 @@ def test_single_citations_xml() -> None:      </monogr>  </biblStruct>""" -    d = parse_citations_xml(citation_xml)[0] +    d = parse_citation_xml(citation_xml) +    assert d      assert (          d.title          == """Mesh migration following abdominal hernia repair: a comprehensive review""" @@ -217,13 +225,21 @@ def test_single_citations_xml() -> None:      assert d.issue == "2"      assert d.journal == "Hernia" +    d2 = parse_citations_xml(citation_xml)[0] +    assert d.title == d2.title +    assert d.authors == d2.authors +  def test_citation_list_xml() -> None:      with open("tests/files/example_citation_list.xml", "r") as f:          tei_xml = f.read() -    citations = parse_citations_xml(tei_xml) +    citations = parse_citation_list_xml(tei_xml) + +    # verify that old function still works +    assert citations == parse_citations_xml(tei_xml) +      assert len(citations) == 13      assert citations[3].note == "The Research Handbook on International Environmental Law" @@ -300,3 +316,25 @@ def test_grobid_070_document() -> None:          c.to_csl_dict()          c.to_dict()          c.to_legacy_dict() + + +def test_empty_citations() -> None: + +    with open("tests/files/empty_citation_unstructured.tei.xml", "r") as f: +        mostly_empty_xml = f.read() + +    with open("tests/files/empty_citation.tei.xml", "r") as f: +        empty_xml = f.read() + +    assert parse_citation_xml(empty_xml) is None +    assert parse_citation_xml(mostly_empty_xml) is None + +    d = parse_citation_list_xml(empty_xml) +    assert d +    assert d[0].index == 0 +    assert d[0].unstructured is None + +    d2 = parse_citation_list_xml(mostly_empty_xml) +    assert d2 +    assert d2[0].index == 0 +    assert d2[0].unstructured == "blah" | 
