summaryrefslogtreecommitdiffstats
path: root/tests/test_grobid2json.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-09-14 00:15:57 -0700
committerBryan Newbold <bnewbold@archive.org>2020-09-14 00:15:57 -0700
commit71ce30e7547871cb6fe02fa4237af735bd6b9c49 (patch)
treed3fc00dfc5619fbd942ccb4c166fcbbae6d9c672 /tests/test_grobid2json.py
parent419311b5ab69938e3a90a97353038435c50c732c (diff)
downloadfatcat-scholar-71ce30e7547871cb6fe02fa4237af735bd6b9c49.tar.gz
fatcat-scholar-71ce30e7547871cb6fe02fa4237af735bd6b9c49.zip
minimum viable tests for GROBID XML parsing and refs transform
Diffstat (limited to 'tests/test_grobid2json.py')
-rw-r--r--tests/test_grobid2json.py26
1 files changed, 26 insertions, 0 deletions
diff --git a/tests/test_grobid2json.py b/tests/test_grobid2json.py
new file mode 100644
index 0000000..345fd91
--- /dev/null
+++ b/tests/test_grobid2json.py
@@ -0,0 +1,26 @@
+from fatcat_scholar.grobid2json import teixml2json
+
+
+def test_grobid_teixml2json() -> None:
+
+ with open("tests/files/example_grobid.tei.xml", "r") as f:
+ blob = f.read()
+
+ obj = teixml2json(blob, True)
+
+ assert (
+ obj["title"]
+ == "Changes of patients' satisfaction with the health care services in Lithuanian Health Promoting Hospitals network"
+ )
+
+ ref = [c for c in obj["citations"] if c["id"] == "b12"][0]
+ assert ref["authors"][0] == {"given_name": "K", "name": "K Tasa", "surname": "Tasa"}
+ assert ref["journal"] == "Quality Management in Health Care"
+ assert ref["title"] == "Using patient feedback for quality improvement"
+ assert ref["date"] == "1996"
+ assert ref["pages"] == "206-225"
+ assert ref["volume"] == "8"
+ assert (
+ ref["unstructured"]
+ == "Tasa K, Baker R, Murray M. Using patient feedback for qua- lity improvement. Quality Management in Health Care 1996;8:206-19."
+ )