diff options
Diffstat (limited to 'tests')
-rw-r--r-- | tests/test_grobid2json.py | 35 | ||||
-rw-r--r-- | tests/test_parse.py | 99 |
2 files changed, 69 insertions, 65 deletions
diff --git a/tests/test_grobid2json.py b/tests/test_grobid2json.py index a1c975e..47ab293 100644 --- a/tests/test_grobid2json.py +++ b/tests/test_grobid2json.py @@ -1,12 +1,12 @@ -import xml import json +import xml + import pytest -from grobid_tei_xml import teixml2json, parse_document_xml, GrobidDocument, GrobidCitation -from grobid_tei_xml.grobid2json import transform_grobid_ref_xml +from grobid_tei_xml.grobid2json import teixml2json, transform_grobid_ref_xml -def test_small_xml(): +def test_small_xml() -> None: with open('tests/files/small.xml', 'r') as f: tei_xml = f.read() @@ -16,7 +16,7 @@ def test_small_xml(): assert teixml2json(tei_xml) == json_form -def test_invalid_xml(): +def test_invalid_xml() -> None: with pytest.raises(xml.etree.ElementTree.ParseError): teixml2json("this is not XML") @@ -31,29 +31,21 @@ def test_grobid_teixml2json() -> None: obj = teixml2json(blob, True) - assert ( - obj["title"] == - "Changes of patients' satisfaction with the health care services in Lithuanian Health Promoting Hospitals network" - ) + assert obj[ + "title"] == """Changes of patients' satisfaction with the health care services in Lithuanian Health Promoting Hospitals network""" ref = [c for c in obj["citations"] if c["id"] == "b12"][0] - assert ref["authors"][0] == { - "given_name": "K", - "name": "K Tasa", - "surname": "Tasa" - } + assert ref["authors"][0] == {"given_name": "K", "name": "K Tasa", "surname": "Tasa"} assert ref["journal"] == "Quality Management in Health Care" assert ref["title"] == "Using patient feedback for quality improvement" assert ref["date"] == "1996" assert ref["pages"] == "206-225" assert ref["volume"] == "8" - assert ( - ref["unstructured"] == - "Tasa K, Baker R, Murray M. Using patient feedback for qua- lity improvement. Quality Management in Health Care 1996;8:206-19." - ) + assert ref["unstructured"] == \ + """Tasa K, Baker R, Murray M. Using patient feedback for qua- lity improvement. Quality Management in Health Care 1996;8:206-19.""" -def test_transform_grobid_ref_xml(): +def test_transform_grobid_ref_xml() -> None: citation_xml = """ <biblStruct > <analytic> @@ -104,8 +96,9 @@ def test_transform_grobid_ref_xml(): </biblStruct>""" d = transform_grobid_ref_xml(citation_xml) - assert d[ - 'title'] == "Mesh migration following abdominal hernia repair: a comprehensive review" + assert d + assert d['title'] == \ + "Mesh migration following abdominal hernia repair: a comprehensive review" assert d['authors'][2]['given_name'] == "L" assert d['authors'][2]['surname'] == "Taveras" assert d['authors'][2]['name'] == "L R Taveras" diff --git a/tests/test_parse.py b/tests/test_parse.py index e79d41d..30b2926 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -1,17 +1,22 @@ -import xml +import io import json +import xml +import xml.etree.ElementTree + import pytest -from grobid_tei_xml import teixml2json, parse_document_xml, parse_citations_xml, GrobidDocument, GrobidCitation +from grobid_tei_xml import (GrobidCitation, GrobidDocument, parse_citations_xml, + parse_document_xml) from grobid_tei_xml.types import * -def test_small_xml(): +def test_small_xml() -> None: with open('tests/files/small.xml', 'r') as f: tei_xml = f.read() doc = parse_document_xml(tei_xml) + expected_body = """Introduction \nEverything starts somewhere, as somebody [1] once said. \n\n In Depth \n Meat \nYou know, for kids. \n Potatos \nQED.""" expected = GrobidDocument( grobid_version='0.5.1-SNAPSHOT', grobid_timestamp='2018-04-02T00:31+0000', @@ -19,20 +24,19 @@ def test_small_xml(): header=GrobidHeader( title="Dummy Example File", authors=[ - GrobidAuthor( - name="Brewster Kahle", - given_name="Brewster", - surname="Kahle", - affiliation=GrobidAffiliation( - department="Faculty ofAgricultrial Engineering", - laboratory="Plant Physiology Laboratory", - institution="Technion-Israel Institute of Technology", - address=GrobidAddress( - post_code="32000", - settlement="Haifa", - country="Israel", - ), - )), + GrobidAuthor(name="Brewster Kahle", + given_name="Brewster", + surname="Kahle", + affiliation=GrobidAffiliation( + department="Faculty ofAgricultrial Engineering", + laboratory="Plant Physiology Laboratory", + institution="Technion-Israel Institute of Technology", + address=GrobidAddress( + post_code="32000", + settlement="Haifa", + country="Israel", + ), + )), GrobidAuthor( name="J Doe", given_name="J", @@ -40,23 +44,16 @@ def test_small_xml(): ), ], journal=GrobidJournal( - name= - "Dummy Example File. Journal of Fake News. pp. 1-2. ISSN 1234-5678", - ), + name="Dummy Example File. Journal of Fake News. pp. 1-2. ISSN 1234-5678", ), date="2000", ), abstract="Everything you ever wanted to know about nothing", - body= - "Introduction \nEverything starts somewhere, as somebody [1] once said. \n\n In Depth \n Meat \nYou know, for kids. \n Potatos \nQED.", + body=expected_body, citations=[ GrobidCitation( index=0, id="b0", - authors=[ - GrobidAuthor(name="A Seaperson", - given_name="A", - surname="Seaperson") - ], + authors=[GrobidAuthor(name="A Seaperson", given_name="A", surname="Seaperson")], date="2001", journal="Letters in the Alphabet", title="Everything is Wonderful", @@ -78,7 +75,7 @@ def test_small_xml(): assert doc == expected -def test_small_xml_json(): +def test_small_xml_json() -> None: with open('tests/files/small.xml', 'r') as f: tei_xml = f.read() @@ -95,13 +92,13 @@ def test_small_xml_json(): # remove nulls from old JSON for c in json_form['citations']: for k in list(c.keys()): - if c[k] == None: + if c[k] is None: c.pop(k) assert d == json_form -def test_invalid_xml(): +def test_invalid_xml() -> None: with pytest.raises(xml.etree.ElementTree.ParseError): parse_document_xml("this is not XML") @@ -109,6 +106,25 @@ def test_invalid_xml(): parse_citations_xml("this is not XML") with pytest.raises(ValueError): parse_document_xml("<xml></xml>") + with pytest.raises(TypeError): + parse_document_xml(123) # type: ignore + + +def test_bytes() -> None: + + with open('tests/files/small.xml', 'rb') as f: + tei_xml = f.read() + + parse_document_xml(tei_xml) + parse_document_xml(io.BytesIO(tei_xml)) # type: ignore + + +def test_elementtree() -> None: + + with open('tests/files/small.xml', 'rb') as f: + tei_xml = f.read() + + parse_document_xml(xml.etree.ElementTree.parse(io.BytesIO(tei_xml))) # type: ignore def test_example_grobid_tei_xml() -> None: @@ -118,12 +134,10 @@ def test_example_grobid_tei_xml() -> None: doc = parse_document_xml(blob) - assert ( - doc.header.title == - "Changes of patients' satisfaction with the health care services in Lithuanian Health Promoting Hospitals network" - ) + assert doc.header.title == \ + """Changes of patients' satisfaction with the health care services in Lithuanian Health Promoting Hospitals network""" - ref = [c for c in doc.citations if c.id == "b12"][0] + ref = [c for c in doc.citations or [] if c.id == "b12"][0] assert ref.authors[0].name == "K Tasa" assert ref.authors[0].given_name == "K" assert ref.authors[0].surname == "Tasa" @@ -132,13 +146,11 @@ def test_example_grobid_tei_xml() -> None: assert ref.date == "1996" assert ref.pages == "206-225" assert ref.volume == "8" - assert ( - ref.unstructured == - "Tasa K, Baker R, Murray M. Using patient feedback for qua- lity improvement. Quality Management in Health Care 1996;8:206-19." - ) + assert ref.unstructured == \ + """Tasa K, Baker R, Murray M. Using patient feedback for qua- lity improvement. Quality Management in Health Care 1996;8:206-19.""" -def test_single_citations_xml(): +def test_single_citations_xml() -> None: citation_xml = """ <biblStruct > <analytic> @@ -189,7 +201,7 @@ def test_single_citations_xml(): </biblStruct>""" d = parse_citations_xml(citation_xml)[0] - assert d.title == "Mesh migration following abdominal hernia repair: a comprehensive review" + assert d.title == """Mesh migration following abdominal hernia repair: a comprehensive review""" assert d.authors[2].given_name == "L" assert d.authors[2].surname == "Taveras" assert d.authors[2].name == "L R Taveras" @@ -202,12 +214,11 @@ def test_single_citations_xml(): assert d.journal == "Hernia" -def test_citation_list_xml(): +def test_citation_list_xml() -> None: with open('tests/files/example_citation_list.xml', 'r') as f: tei_xml = f.read() citations = parse_citations_xml(tei_xml) assert len(citations) == 10 - assert citations[ - 7].title == "Global Hunger Index: The Challenge of Hidden Hunger" + assert citations[7].title == "Global Hunger Index: The Challenge of Hidden Hunger" |