summaryrefslogtreecommitdiffstats
path: root/tests/test_grobid2json.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-10-21 14:00:31 -0700
committerBryan Newbold <bnewbold@archive.org>2021-10-21 14:00:31 -0700
commite1679f1044b94d9ec2a9d3b67c8e12581e2f313c (patch)
tree8294dfcf3de91580b425ed58e3d04a329a00f401 /tests/test_grobid2json.py
parent45870aa17b5dec6d63df8b7a2eb7839feac9afec (diff)
downloadgrobid_tei_xml-e1679f1044b94d9ec2a9d3b67c8e12581e2f313c.tar.gz
grobid_tei_xml-e1679f1044b94d9ec2a9d3b67c8e12581e2f313c.zip
get tests to start passing
Diffstat (limited to 'tests/test_grobid2json.py')
-rw-r--r--tests/test_grobid2json.py30
1 files changed, 29 insertions, 1 deletions
diff --git a/tests/test_grobid2json.py b/tests/test_grobid2json.py
index 8497b10..e970868 100644
--- a/tests/test_grobid2json.py
+++ b/tests/test_grobid2json.py
@@ -2,7 +2,7 @@
import xml
import json
import pytest
-from grobid2json import *
+from grobid_tei_xml.grobid2json import teixml2json
def test_small_xml():
@@ -12,6 +12,9 @@ def test_small_xml():
with open('tests/files/small.json', 'r') as f:
json_form = json.loads(f.read())
+ parsed = teixml2json(tei_xml)
+ for i in range(len(parsed['citations'])):
+ assert parsed['citations'][i] == json_form['citations'][i]
assert teixml2json(tei_xml) == json_form
def test_invalid_xml():
@@ -20,3 +23,28 @@ def test_invalid_xml():
teixml2json("this is not XML")
with pytest.raises(ValueError):
teixml2json("<xml></xml>")
+
+
+def test_grobid_teixml2json() -> None:
+
+ with open("tests/files/example_grobid.tei.xml", "r") as f:
+ blob = f.read()
+
+ obj = teixml2json(blob, True)
+
+ assert (
+ obj["title"]
+ == "Changes of patients' satisfaction with the health care services in Lithuanian Health Promoting Hospitals network"
+ )
+
+ ref = [c for c in obj["citations"] if c["id"] == "b12"][0]
+ assert ref["authors"][0] == {"given_name": "K", "name": "K Tasa", "surname": "Tasa"}
+ assert ref["journal"] == "Quality Management in Health Care"
+ assert ref["title"] == "Using patient feedback for quality improvement"
+ assert ref["date"] == "1996"
+ assert ref["pages"] == "206-225"
+ assert ref["volume"] == "8"
+ assert (
+ ref["unstructured"]
+ == "Tasa K, Baker R, Murray M. Using patient feedback for qua- lity improvement. Quality Management in Health Care 1996;8:206-19."
+ )