diff options
author | Bryan Newbold <bnewbold@archive.org> | 2021-11-03 20:30:02 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2021-11-03 20:30:02 -0700 |
commit | 132d257befc2088a98b3fe10aa71713338d15673 (patch) | |
tree | 92e7be2493c779ed50be123bee353f0b5859f0e4 | |
parent | 6ad771e195065a1f22ddbe5a4098acc70137be9b (diff) | |
download | grobid_tei_xml-132d257befc2088a98b3fe10aa71713338d15673.tar.gz grobid_tei_xml-132d257befc2088a98b3fe10aa71713338d15673.zip |
add test for XML parse exception behavior
-rw-r--r-- | tests/files/citation_list/parse_error.tei.xml | 64 | ||||
-rw-r--r-- | tests/test_errors.py | 23 |
2 files changed, 87 insertions, 0 deletions
diff --git a/tests/files/citation_list/parse_error.tei.xml b/tests/files/citation_list/parse_error.tei.xml new file mode 100644 index 0000000..fc7b4fd --- /dev/null +++ b/tests/files/citation_list/parse_error.tei.xml @@ -0,0 +1,64 @@ +<TEI xmlns="http://www.tei-c.org/ns/1.0" xmlns:xlink="http://www.w3.org/1999/xlink" + xmlns:mml="http://www.w3.org/1998/Math/MathML"> + <teiHeader/> + <text> + <front/> + <body/> + <back> + <div> + <listBibl> + +<biblStruct xml:id="b3"> + <analytic> + <title level="a" type="main">Design and Analysis of an Optimi-zation Model by using Scheduling Algorithm for Electric Power Cycles</title> + <author> + <persName><forename type="first">S</forename><surname>Maheswari</surname></persName> + </author> + <author> + <persName><forename type="first">C</forename><surname>Vijayalakshmi</surname></persName> + </author> + </analytic> + <monogr> + <title level="m">Proceedings of the National Conference On Applied Ma-thematics</title> + <editor> + <persName><forename type="first">B</forename><forename type="middle">S</forename><surname>Abdur Rahman University</surname></persName> + <persName><surname>Chennai</surname></persName> + </editor> + <meeting>the National Conference On Applied Ma-thematics</meeting> + <imprint> + <date type="published" when="2010-01">NCAM 2010. January 2010</date> + <biblScope unit="page" from="160" to="163" /> + </imprint> + </monogr> + <note type="raw_reference">Maheswari, S., Vijayalakshmi, C.: Design and Analysis of an Optimi-zation Model by using Scheduling Algorithm for Electric Power Cycles. In: Proceedings of the National Conference On Applied Ma-thematics (NCAM 2010), B.S. Abdur Rahman University Chennai, pp. 160–163 (January 2010)</note> +</biblStruct> + +<biblStruct xml:id="b4"> + <analytic> + <title level="a" type="main">Optimization Model for Electricity Distribution System Control using Communication System by La-grangian Relaxation Technique</title> + <author> + <persName><forename type="first">S</forename><surname>Maheswari</surname></persName> + </author> + <author> + <persName><forename type="first">C</forename><surname>Vijayalakshmi</surname></persName> + </author> + </analytic> + <monogr> + <title level="j">CiiT International Journal of Wireless Communication</title> + <idno type="ISSN">0974 - 9756 & Online: 0974 - 9640</idno> + <imprint> + <biblScope unit="volume">3</biblScope> + <biblScope unit="issue">3</biblScope> + <biblScope unit="page" from="183" to="187" /> + <date type="published" when="2011">2011</date> + </imprint> + </monogr> + <note type="report_type">Print:</note> + <note type="raw_reference">Maheswari, S., Vijayalakshmi, C.: Optimization Model for Electricity Distribution System Control using Communication System by La-grangian Relaxation Technique. CiiT International Journal of Wireless Communication 3(3), 183–187 (2011) (Print: ISSN 0974 – 9756 & Online: ISSN 0974 – 9640)</note> +</biblStruct> + + </listBibl> + </div> + </back> + </text> +</TEI> diff --git a/tests/test_errors.py b/tests/test_errors.py new file mode 100644 index 0000000..8537551 --- /dev/null +++ b/tests/test_errors.py @@ -0,0 +1,23 @@ +import xml.etree.ElementTree + +import pytest + +from grobid_tei_xml import parse_citation_list_xml + + +def test_doc_parse_error() -> None: + """ + This XML document has a bare '&' (should be '&') and results in a parse + error. + + See also: https://github.com/kermitt2/grobid/issues/848 + + The intent of this test is to ensure that the exception raised is the one + expected, especially if that behavior changes in the future. + """ + + with open("tests/files/citation_list/parse_error.tei.xml", "r") as f: + tei_xml = f.read() + + with pytest.raises(xml.etree.ElementTree.ParseError): + parse_citation_list_xml(tei_xml) |