diff options
Diffstat (limited to 'tests/test_grobid2json.py')
-rw-r--r-- | tests/test_grobid2json.py | 111 |
1 files changed, 0 insertions, 111 deletions
diff --git a/tests/test_grobid2json.py b/tests/test_grobid2json.py deleted file mode 100644 index 47ab293..0000000 --- a/tests/test_grobid2json.py +++ /dev/null @@ -1,111 +0,0 @@ -import json -import xml - -import pytest - -from grobid_tei_xml.grobid2json import teixml2json, transform_grobid_ref_xml - - -def test_small_xml() -> None: - - with open('tests/files/small.xml', 'r') as f: - tei_xml = f.read() - with open('tests/files/small.json', 'r') as f: - json_form = json.loads(f.read()) - - assert teixml2json(tei_xml) == json_form - - -def test_invalid_xml() -> None: - - with pytest.raises(xml.etree.ElementTree.ParseError): - teixml2json("this is not XML") - with pytest.raises(ValueError): - teixml2json("<xml></xml>") - - -def test_grobid_teixml2json() -> None: - - with open("tests/files/example_grobid.tei.xml", "r") as f: - blob = f.read() - - obj = teixml2json(blob, True) - - assert obj[ - "title"] == """Changes of patients' satisfaction with the health care services in Lithuanian Health Promoting Hospitals network""" - - ref = [c for c in obj["citations"] if c["id"] == "b12"][0] - assert ref["authors"][0] == {"given_name": "K", "name": "K Tasa", "surname": "Tasa"} - assert ref["journal"] == "Quality Management in Health Care" - assert ref["title"] == "Using patient feedback for quality improvement" - assert ref["date"] == "1996" - assert ref["pages"] == "206-225" - assert ref["volume"] == "8" - assert ref["unstructured"] == \ - """Tasa K, Baker R, Murray M. Using patient feedback for qua- lity improvement. Quality Management in Health Care 1996;8:206-19.""" - - -def test_transform_grobid_ref_xml() -> None: - citation_xml = """ -<biblStruct > - <analytic> - <title level="a" type="main">Mesh migration following abdominal hernia repair: a comprehensive review</title> - <author> - <persName - xmlns="http://www.tei-c.org/ns/1.0"> - <forename type="first">H</forename> - <forename type="middle">B</forename> - <surname>Cunningham</surname> - </persName> - </author> - <author> - <persName - xmlns="http://www.tei-c.org/ns/1.0"> - <forename type="first">J</forename> - <forename type="middle">J</forename> - <surname>Weis</surname> - </persName> - </author> - <author> - <persName - xmlns="http://www.tei-c.org/ns/1.0"> - <forename type="first">L</forename> - <forename type="middle">R</forename> - <surname>Taveras</surname> - </persName> - </author> - <author> - <persName - xmlns="http://www.tei-c.org/ns/1.0"> - <forename type="first">S</forename> - <surname>Huerta</surname> - </persName> - </author> - <idno type="DOI">10.1007/s10029-019-01898-9</idno> - <idno type="PMID">30701369</idno> - </analytic> - <monogr> - <title level="j">Hernia</title> - <imprint> - <biblScope unit="volume">23</biblScope> - <biblScope unit="issue">2</biblScope> - <biblScope unit="page" from="235" to="243" /> - <date type="published" when="2019-01-30" /> - </imprint> - </monogr> -</biblStruct>""" - - d = transform_grobid_ref_xml(citation_xml) - assert d - assert d['title'] == \ - "Mesh migration following abdominal hernia repair: a comprehensive review" - assert d['authors'][2]['given_name'] == "L" - assert d['authors'][2]['surname'] == "Taveras" - assert d['authors'][2]['name'] == "L R Taveras" - assert d['doi'] == "10.1007/s10029-019-01898-9" - assert d['pmid'] == "30701369" - assert d['date'] == "2019-01-30" - assert d['pages'] == "235-243" - assert d['volume'] == "23" - assert d['issue'] == "2" - assert d['journal'] == "Hernia" |