diff options
Diffstat (limited to 'tests/test_grobid2json.py')
-rw-r--r-- | tests/test_grobid2json.py | 30 |
1 files changed, 29 insertions, 1 deletions
diff --git a/tests/test_grobid2json.py b/tests/test_grobid2json.py index 8497b10..e970868 100644 --- a/tests/test_grobid2json.py +++ b/tests/test_grobid2json.py @@ -2,7 +2,7 @@ import xml import json import pytest -from grobid2json import * +from grobid_tei_xml.grobid2json import teixml2json def test_small_xml(): @@ -12,6 +12,9 @@ def test_small_xml(): with open('tests/files/small.json', 'r') as f: json_form = json.loads(f.read()) + parsed = teixml2json(tei_xml) + for i in range(len(parsed['citations'])): + assert parsed['citations'][i] == json_form['citations'][i] assert teixml2json(tei_xml) == json_form def test_invalid_xml(): @@ -20,3 +23,28 @@ def test_invalid_xml(): teixml2json("this is not XML") with pytest.raises(ValueError): teixml2json("<xml></xml>") + + +def test_grobid_teixml2json() -> None: + + with open("tests/files/example_grobid.tei.xml", "r") as f: + blob = f.read() + + obj = teixml2json(blob, True) + + assert ( + obj["title"] + == "Changes of patients' satisfaction with the health care services in Lithuanian Health Promoting Hospitals network" + ) + + ref = [c for c in obj["citations"] if c["id"] == "b12"][0] + assert ref["authors"][0] == {"given_name": "K", "name": "K Tasa", "surname": "Tasa"} + assert ref["journal"] == "Quality Management in Health Care" + assert ref["title"] == "Using patient feedback for quality improvement" + assert ref["date"] == "1996" + assert ref["pages"] == "206-225" + assert ref["volume"] == "8" + assert ( + ref["unstructured"] + == "Tasa K, Baker R, Murray M. Using patient feedback for qua- lity improvement. Quality Management in Health Care 1996;8:206-19." + ) |