From 560d5f7cc1672f95e2a953ab5908f4205151a703 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 27 Oct 2021 18:24:19 -0700 Subject: refactor use of grobid_tei_xml --- tests/test_grobid2json.py | 31 ---------------------- tests/test_grobid_parse.py | 62 ++++++++++++++++++++++++++++++++++++++++++++ tests/test_refs_transform.py | 3 +-- 3 files changed, 63 insertions(+), 33 deletions(-) delete mode 100644 tests/test_grobid2json.py create mode 100644 tests/test_grobid_parse.py (limited to 'tests') diff --git a/tests/test_grobid2json.py b/tests/test_grobid2json.py deleted file mode 100644 index adf36a1..0000000 --- a/tests/test_grobid2json.py +++ /dev/null @@ -1,31 +0,0 @@ -from grobid_tei_xml import parse_document_xml - - -def test_grobid_parse() -> None: - """ - This function formerly tested the grobid2json file in this project. Now it - tests backwards-compatibility of the grobid_tei_xml library. - """ - - with open("tests/files/example_grobid.tei.xml", "r") as f: - blob = f.read() - - doc = parse_document_xml(blob) - obj = doc.to_legacy_dict() - - assert ( - obj["title"] - == "Changes of patients' satisfaction with the health care services in Lithuanian Health Promoting Hospitals network" - ) - - ref = [c for c in obj["citations"] if c["id"] == "b12"][0] - assert ref["authors"][0] == {"given_name": "K", "name": "K Tasa", "surname": "Tasa"} - assert ref["journal"] == "Quality Management in Health Care" - assert ref["title"] == "Using patient feedback for quality improvement" - assert ref["date"] == "1996" - assert ref["pages"] == "206-225" - assert ref["volume"] == "8" - assert ( - ref["unstructured"] - == "Tasa K, Baker R, Murray M. Using patient feedback for qua- lity improvement. Quality Management in Health Care 1996;8:206-19." - ) diff --git a/tests/test_grobid_parse.py b/tests/test_grobid_parse.py new file mode 100644 index 0000000..c0adf9b --- /dev/null +++ b/tests/test_grobid_parse.py @@ -0,0 +1,62 @@ +from grobid_tei_xml import parse_document_xml + + +def test_grobid_parse_legacy() -> None: + """ + This function formerly tested the grobid2json file in this project. Now it + tests backwards-compatibility of the grobid_tei_xml library. + """ + + with open("tests/files/example_grobid.tei.xml", "r") as f: + blob = f.read() + + doc = parse_document_xml(blob) + obj = doc.to_legacy_dict() + + assert ( + obj["title"] + == "Changes of patients' satisfaction with the health care services in Lithuanian Health Promoting Hospitals network" + ) + + ref = [c for c in obj["citations"] if c["id"] == "b12"][0] + assert ref["authors"][0] == {"given_name": "K", "name": "K Tasa", "surname": "Tasa"} + assert ref["journal"] == "Quality Management in Health Care" + assert ref["title"] == "Using patient feedback for quality improvement" + assert ref["date"] == "1996" + assert ref["pages"] == "206-225" + assert ref["volume"] == "8" + assert ( + ref["unstructured"] + == "Tasa K, Baker R, Murray M. Using patient feedback for qua- lity improvement. Quality Management in Health Care 1996;8:206-19." + ) + + +def test_grobid_parse() -> None: + """ + Equivalent to test_grobid_parse_legacy(), but using the GrobidDocument type directly + """ + + with open("tests/files/example_grobid.tei.xml", "r") as f: + blob = f.read() + + doc = parse_document_xml(blob) + + assert ( + doc.header.title + == "Changes of patients' satisfaction with the health care services in Lithuanian Health Promoting Hospitals network" + ) + + assert doc.citations is not None + ref = [c for c in doc.citations if c.id == "b12"][0] + assert ref.authors[0].given_name == "K" + assert ref.authors[0].full_name == "K Tasa" + assert ref.authors[0].surname == "Tasa" + assert ref.journal == "Quality Management in Health Care" + assert ref.title == "Using patient feedback for quality improvement" + assert ref.date == "1996" + assert ref.pages == "206-225" + assert ref.volume == "8" + assert ( + ref.unstructured + == "Tasa K, Baker R, Murray M. Using patient feedback for qua- lity improvement. Quality Management in Health Care 1996;8:206-19." + ) diff --git a/tests/test_refs_transform.py b/tests/test_refs_transform.py index 2fc210f..4b1b588 100644 --- a/tests/test_refs_transform.py +++ b/tests/test_refs_transform.py @@ -20,8 +20,7 @@ def test_transform_refs_grobid() -> None: ) tei_doc = parse_document_xml(blob) - tei_dict = tei_doc.to_legacy_dict() - refs = refs_from_grobid(dummy_release, tei_dict) + refs = refs_from_grobid(dummy_release, tei_doc) ref = refs[12] assert ref.release_ident == "releasedummy22222222222222" -- cgit v1.2.3