diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-09-14 00:15:57 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-09-14 00:15:57 -0700 |
commit | 71ce30e7547871cb6fe02fa4237af735bd6b9c49 (patch) | |
tree | d3fc00dfc5619fbd942ccb4c166fcbbae6d9c672 /tests/test_grobid2json.py | |
parent | 419311b5ab69938e3a90a97353038435c50c732c (diff) | |
download | fatcat-scholar-71ce30e7547871cb6fe02fa4237af735bd6b9c49.tar.gz fatcat-scholar-71ce30e7547871cb6fe02fa4237af735bd6b9c49.zip |
minimum viable tests for GROBID XML parsing and refs transform
Diffstat (limited to 'tests/test_grobid2json.py')
-rw-r--r-- | tests/test_grobid2json.py | 26 |
1 files changed, 26 insertions, 0 deletions
diff --git a/tests/test_grobid2json.py b/tests/test_grobid2json.py new file mode 100644 index 0000000..345fd91 --- /dev/null +++ b/tests/test_grobid2json.py @@ -0,0 +1,26 @@ +from fatcat_scholar.grobid2json import teixml2json + + +def test_grobid_teixml2json() -> None: + + with open("tests/files/example_grobid.tei.xml", "r") as f: + blob = f.read() + + obj = teixml2json(blob, True) + + assert ( + obj["title"] + == "Changes of patients' satisfaction with the health care services in Lithuanian Health Promoting Hospitals network" + ) + + ref = [c for c in obj["citations"] if c["id"] == "b12"][0] + assert ref["authors"][0] == {"given_name": "K", "name": "K Tasa", "surname": "Tasa"} + assert ref["journal"] == "Quality Management in Health Care" + assert ref["title"] == "Using patient feedback for quality improvement" + assert ref["date"] == "1996" + assert ref["pages"] == "206-225" + assert ref["volume"] == "8" + assert ( + ref["unstructured"] + == "Tasa K, Baker R, Murray M. Using patient feedback for qua- lity improvement. Quality Management in Health Care 1996;8:206-19." + ) |