summaryrefslogtreecommitdiffstats
path: root/tests/test_grobid2json.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-10-25 15:48:36 -0700
committerBryan Newbold <bnewbold@archive.org>2021-10-25 15:48:36 -0700
commitfcdb271193ca2c6b90eeeb5f4af4bbc15083319a (patch)
tree6e0c2640807dd3d4e9be5a560328bbbd7aa7b6b2 /tests/test_grobid2json.py
parentbaa6356b80b1a826eca77f74cc487d947d2fafd4 (diff)
downloadgrobid_tei_xml-fcdb271193ca2c6b90eeeb5f4af4bbc15083319a.tar.gz
grobid_tei_xml-fcdb271193ca2c6b90eeeb5f4af4bbc15083319a.zip
remove legacy grobid2json code itself
Diffstat (limited to 'tests/test_grobid2json.py')
-rw-r--r--tests/test_grobid2json.py111
1 files changed, 0 insertions, 111 deletions
diff --git a/tests/test_grobid2json.py b/tests/test_grobid2json.py
deleted file mode 100644
index 47ab293..0000000
--- a/tests/test_grobid2json.py
+++ /dev/null
@@ -1,111 +0,0 @@
-import json
-import xml
-
-import pytest
-
-from grobid_tei_xml.grobid2json import teixml2json, transform_grobid_ref_xml
-
-
-def test_small_xml() -> None:
-
- with open('tests/files/small.xml', 'r') as f:
- tei_xml = f.read()
- with open('tests/files/small.json', 'r') as f:
- json_form = json.loads(f.read())
-
- assert teixml2json(tei_xml) == json_form
-
-
-def test_invalid_xml() -> None:
-
- with pytest.raises(xml.etree.ElementTree.ParseError):
- teixml2json("this is not XML")
- with pytest.raises(ValueError):
- teixml2json("<xml></xml>")
-
-
-def test_grobid_teixml2json() -> None:
-
- with open("tests/files/example_grobid.tei.xml", "r") as f:
- blob = f.read()
-
- obj = teixml2json(blob, True)
-
- assert obj[
- "title"] == """Changes of patients' satisfaction with the health care services in Lithuanian Health Promoting Hospitals network"""
-
- ref = [c for c in obj["citations"] if c["id"] == "b12"][0]
- assert ref["authors"][0] == {"given_name": "K", "name": "K Tasa", "surname": "Tasa"}
- assert ref["journal"] == "Quality Management in Health Care"
- assert ref["title"] == "Using patient feedback for quality improvement"
- assert ref["date"] == "1996"
- assert ref["pages"] == "206-225"
- assert ref["volume"] == "8"
- assert ref["unstructured"] == \
- """Tasa K, Baker R, Murray M. Using patient feedback for qua- lity improvement. Quality Management in Health Care 1996;8:206-19."""
-
-
-def test_transform_grobid_ref_xml() -> None:
- citation_xml = """
-<biblStruct >
- <analytic>
- <title level="a" type="main">Mesh migration following abdominal hernia repair: a comprehensive review</title>
- <author>
- <persName
- xmlns="http://www.tei-c.org/ns/1.0">
- <forename type="first">H</forename>
- <forename type="middle">B</forename>
- <surname>Cunningham</surname>
- </persName>
- </author>
- <author>
- <persName
- xmlns="http://www.tei-c.org/ns/1.0">
- <forename type="first">J</forename>
- <forename type="middle">J</forename>
- <surname>Weis</surname>
- </persName>
- </author>
- <author>
- <persName
- xmlns="http://www.tei-c.org/ns/1.0">
- <forename type="first">L</forename>
- <forename type="middle">R</forename>
- <surname>Taveras</surname>
- </persName>
- </author>
- <author>
- <persName
- xmlns="http://www.tei-c.org/ns/1.0">
- <forename type="first">S</forename>
- <surname>Huerta</surname>
- </persName>
- </author>
- <idno type="DOI">10.1007/s10029-019-01898-9</idno>
- <idno type="PMID">30701369</idno>
- </analytic>
- <monogr>
- <title level="j">Hernia</title>
- <imprint>
- <biblScope unit="volume">23</biblScope>
- <biblScope unit="issue">2</biblScope>
- <biblScope unit="page" from="235" to="243" />
- <date type="published" when="2019-01-30" />
- </imprint>
- </monogr>
-</biblStruct>"""
-
- d = transform_grobid_ref_xml(citation_xml)
- assert d
- assert d['title'] == \
- "Mesh migration following abdominal hernia repair: a comprehensive review"
- assert d['authors'][2]['given_name'] == "L"
- assert d['authors'][2]['surname'] == "Taveras"
- assert d['authors'][2]['name'] == "L R Taveras"
- assert d['doi'] == "10.1007/s10029-019-01898-9"
- assert d['pmid'] == "30701369"
- assert d['date'] == "2019-01-30"
- assert d['pages'] == "235-243"
- assert d['volume'] == "23"
- assert d['issue'] == "2"
- assert d['journal'] == "Hernia"