diff options
Diffstat (limited to 'tests')
| -rw-r--r-- | tests/files/empty_citation.tei.xml | 6 | ||||
| -rw-r--r-- | tests/files/empty_citation_unstructured.tei.xml | 7 | ||||
| -rw-r--r-- | tests/test_parse.py | 44 | 
3 files changed, 54 insertions, 3 deletions
| diff --git a/tests/files/empty_citation.tei.xml b/tests/files/empty_citation.tei.xml new file mode 100644 index 0000000..cb21f6e --- /dev/null +++ b/tests/files/empty_citation.tei.xml @@ -0,0 +1,6 @@ +<biblStruct > +        <monogr> +                <title/> +                <imprint/> +        </monogr> +</biblStruct> diff --git a/tests/files/empty_citation_unstructured.tei.xml b/tests/files/empty_citation_unstructured.tei.xml new file mode 100644 index 0000000..35aee19 --- /dev/null +++ b/tests/files/empty_citation_unstructured.tei.xml @@ -0,0 +1,7 @@ +<biblStruct > +        <monogr> +                <title/> +                <imprint/> +        </monogr> +        <note type="raw_reference">blah</note> +</biblStruct> diff --git a/tests/test_parse.py b/tests/test_parse.py index 25529c4..70dcc98 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -5,7 +5,14 @@ import xml.etree.ElementTree  import pytest -from grobid_tei_xml import GrobidBiblio, GrobidDocument, parse_citations_xml, parse_document_xml +from grobid_tei_xml import ( +    GrobidBiblio, +    GrobidDocument, +    parse_citation_list_xml, +    parse_citation_xml, +    parse_citations_xml, +    parse_document_xml, +)  from grobid_tei_xml.types import * @@ -198,7 +205,8 @@ def test_single_citations_xml() -> None:      </monogr>  </biblStruct>""" -    d = parse_citations_xml(citation_xml)[0] +    d = parse_citation_xml(citation_xml) +    assert d      assert (          d.title          == """Mesh migration following abdominal hernia repair: a comprehensive review""" @@ -217,13 +225,21 @@ def test_single_citations_xml() -> None:      assert d.issue == "2"      assert d.journal == "Hernia" +    d2 = parse_citations_xml(citation_xml)[0] +    assert d.title == d2.title +    assert d.authors == d2.authors +  def test_citation_list_xml() -> None:      with open("tests/files/example_citation_list.xml", "r") as f:          tei_xml = f.read() -    citations = parse_citations_xml(tei_xml) +    citations = parse_citation_list_xml(tei_xml) + +    # verify that old function still works +    assert citations == parse_citations_xml(tei_xml) +      assert len(citations) == 13      assert citations[3].note == "The Research Handbook on International Environmental Law" @@ -300,3 +316,25 @@ def test_grobid_070_document() -> None:          c.to_csl_dict()          c.to_dict()          c.to_legacy_dict() + + +def test_empty_citations() -> None: + +    with open("tests/files/empty_citation_unstructured.tei.xml", "r") as f: +        mostly_empty_xml = f.read() + +    with open("tests/files/empty_citation.tei.xml", "r") as f: +        empty_xml = f.read() + +    assert parse_citation_xml(empty_xml) is None +    assert parse_citation_xml(mostly_empty_xml) is None + +    d = parse_citation_list_xml(empty_xml) +    assert d +    assert d[0].index == 0 +    assert d[0].unstructured is None + +    d2 = parse_citation_list_xml(mostly_empty_xml) +    assert d2 +    assert d2[0].index == 0 +    assert d2[0].unstructured == "blah" | 
