aboutsummaryrefslogtreecommitdiffstats
path: root/tests/test_parse.py
diff options
context:
space:
mode:
Diffstat (limited to 'tests/test_parse.py')
-rw-r--r--tests/test_parse.py99
1 files changed, 55 insertions, 44 deletions
diff --git a/tests/test_parse.py b/tests/test_parse.py
index e79d41d..30b2926 100644
--- a/tests/test_parse.py
+++ b/tests/test_parse.py
@@ -1,17 +1,22 @@
-import xml
+import io
import json
+import xml
+import xml.etree.ElementTree
+
import pytest
-from grobid_tei_xml import teixml2json, parse_document_xml, parse_citations_xml, GrobidDocument, GrobidCitation
+from grobid_tei_xml import (GrobidCitation, GrobidDocument, parse_citations_xml,
+ parse_document_xml)
from grobid_tei_xml.types import *
-def test_small_xml():
+def test_small_xml() -> None:
with open('tests/files/small.xml', 'r') as f:
tei_xml = f.read()
doc = parse_document_xml(tei_xml)
+ expected_body = """Introduction \nEverything starts somewhere, as somebody [1] once said. \n\n In Depth \n Meat \nYou know, for kids. \n Potatos \nQED."""
expected = GrobidDocument(
grobid_version='0.5.1-SNAPSHOT',
grobid_timestamp='2018-04-02T00:31+0000',
@@ -19,20 +24,19 @@ def test_small_xml():
header=GrobidHeader(
title="Dummy Example File",
authors=[
- GrobidAuthor(
- name="Brewster Kahle",
- given_name="Brewster",
- surname="Kahle",
- affiliation=GrobidAffiliation(
- department="Faculty ofAgricultrial Engineering",
- laboratory="Plant Physiology Laboratory",
- institution="Technion-Israel Institute of Technology",
- address=GrobidAddress(
- post_code="32000",
- settlement="Haifa",
- country="Israel",
- ),
- )),
+ GrobidAuthor(name="Brewster Kahle",
+ given_name="Brewster",
+ surname="Kahle",
+ affiliation=GrobidAffiliation(
+ department="Faculty ofAgricultrial Engineering",
+ laboratory="Plant Physiology Laboratory",
+ institution="Technion-Israel Institute of Technology",
+ address=GrobidAddress(
+ post_code="32000",
+ settlement="Haifa",
+ country="Israel",
+ ),
+ )),
GrobidAuthor(
name="J Doe",
given_name="J",
@@ -40,23 +44,16 @@ def test_small_xml():
),
],
journal=GrobidJournal(
- name=
- "Dummy Example File. Journal of Fake News. pp. 1-2. ISSN 1234-5678",
- ),
+ name="Dummy Example File. Journal of Fake News. pp. 1-2. ISSN 1234-5678", ),
date="2000",
),
abstract="Everything you ever wanted to know about nothing",
- body=
- "Introduction \nEverything starts somewhere, as somebody [1] once said. \n\n In Depth \n Meat \nYou know, for kids. \n Potatos \nQED.",
+ body=expected_body,
citations=[
GrobidCitation(
index=0,
id="b0",
- authors=[
- GrobidAuthor(name="A Seaperson",
- given_name="A",
- surname="Seaperson")
- ],
+ authors=[GrobidAuthor(name="A Seaperson", given_name="A", surname="Seaperson")],
date="2001",
journal="Letters in the Alphabet",
title="Everything is Wonderful",
@@ -78,7 +75,7 @@ def test_small_xml():
assert doc == expected
-def test_small_xml_json():
+def test_small_xml_json() -> None:
with open('tests/files/small.xml', 'r') as f:
tei_xml = f.read()
@@ -95,13 +92,13 @@ def test_small_xml_json():
# remove nulls from old JSON
for c in json_form['citations']:
for k in list(c.keys()):
- if c[k] == None:
+ if c[k] is None:
c.pop(k)
assert d == json_form
-def test_invalid_xml():
+def test_invalid_xml() -> None:
with pytest.raises(xml.etree.ElementTree.ParseError):
parse_document_xml("this is not XML")
@@ -109,6 +106,25 @@ def test_invalid_xml():
parse_citations_xml("this is not XML")
with pytest.raises(ValueError):
parse_document_xml("<xml></xml>")
+ with pytest.raises(TypeError):
+ parse_document_xml(123) # type: ignore
+
+
+def test_bytes() -> None:
+
+ with open('tests/files/small.xml', 'rb') as f:
+ tei_xml = f.read()
+
+ parse_document_xml(tei_xml)
+ parse_document_xml(io.BytesIO(tei_xml)) # type: ignore
+
+
+def test_elementtree() -> None:
+
+ with open('tests/files/small.xml', 'rb') as f:
+ tei_xml = f.read()
+
+ parse_document_xml(xml.etree.ElementTree.parse(io.BytesIO(tei_xml))) # type: ignore
def test_example_grobid_tei_xml() -> None:
@@ -118,12 +134,10 @@ def test_example_grobid_tei_xml() -> None:
doc = parse_document_xml(blob)
- assert (
- doc.header.title ==
- "Changes of patients' satisfaction with the health care services in Lithuanian Health Promoting Hospitals network"
- )
+ assert doc.header.title == \
+ """Changes of patients' satisfaction with the health care services in Lithuanian Health Promoting Hospitals network"""
- ref = [c for c in doc.citations if c.id == "b12"][0]
+ ref = [c for c in doc.citations or [] if c.id == "b12"][0]
assert ref.authors[0].name == "K Tasa"
assert ref.authors[0].given_name == "K"
assert ref.authors[0].surname == "Tasa"
@@ -132,13 +146,11 @@ def test_example_grobid_tei_xml() -> None:
assert ref.date == "1996"
assert ref.pages == "206-225"
assert ref.volume == "8"
- assert (
- ref.unstructured ==
- "Tasa K, Baker R, Murray M. Using patient feedback for qua- lity improvement. Quality Management in Health Care 1996;8:206-19."
- )
+ assert ref.unstructured == \
+ """Tasa K, Baker R, Murray M. Using patient feedback for qua- lity improvement. Quality Management in Health Care 1996;8:206-19."""
-def test_single_citations_xml():
+def test_single_citations_xml() -> None:
citation_xml = """
<biblStruct >
<analytic>
@@ -189,7 +201,7 @@ def test_single_citations_xml():
</biblStruct>"""
d = parse_citations_xml(citation_xml)[0]
- assert d.title == "Mesh migration following abdominal hernia repair: a comprehensive review"
+ assert d.title == """Mesh migration following abdominal hernia repair: a comprehensive review"""
assert d.authors[2].given_name == "L"
assert d.authors[2].surname == "Taveras"
assert d.authors[2].name == "L R Taveras"
@@ -202,12 +214,11 @@ def test_single_citations_xml():
assert d.journal == "Hernia"
-def test_citation_list_xml():
+def test_citation_list_xml() -> None:
with open('tests/files/example_citation_list.xml', 'r') as f:
tei_xml = f.read()
citations = parse_citations_xml(tei_xml)
assert len(citations) == 10
- assert citations[
- 7].title == "Global Hunger Index: The Challenge of Hidden Hunger"
+ assert citations[7].title == "Global Hunger Index: The Challenge of Hidden Hunger"