aboutsummaryrefslogtreecommitdiffstats
path: root/tests/test_grobid2json.py
diff options
context:
space:
mode:
Diffstat (limited to 'tests/test_grobid2json.py')
-rw-r--r--tests/test_grobid2json.py80
1 files changed, 78 insertions, 2 deletions
diff --git a/tests/test_grobid2json.py b/tests/test_grobid2json.py
index 6e3dac2..ed5d996 100644
--- a/tests/test_grobid2json.py
+++ b/tests/test_grobid2json.py
@@ -1,10 +1,12 @@
import xml
import json
import pytest
-from grobid_tei_xml.grobid2json import teixml2json
+from grobid_tei_xml import teixml2json, parse_document_xml, GrobidDocument, GrobidCitation
+from grobid_tei_xml.types import *
-def test_small_xml():
+
+def test_teixml2json_small_xml():
with open('tests/files/small.xml', 'r') as f:
tei_xml = f.read()
@@ -13,6 +15,80 @@ def test_small_xml():
assert teixml2json(tei_xml) == json_form
+ assert parse_document_xml(tei_xml).to_dict() == json_form
+
+def test_teixml2json_small_xml():
+
+ with open('tests/files/small.xml', 'r') as f:
+ tei_xml = f.read()
+
+ doc = parse_document_xml(tei_xml)
+ expected = GrobidDocument(
+ grobid_version='0.5.1-SNAPSHOT',
+ grobid_timestamp='2018-04-02T00:31+0000',
+ language_code='en',
+ header=GrobidHeader(
+ title="Dummy Example File",
+ authors=[
+ GrobidAuthor(
+ name="Brewster Kahle",
+ given_name="Brewster",
+ surname="Kahle",
+ affiliation=GrobidAffiliation(
+ department="Faculty ofAgricultrial Engineering",
+ laboratory="Plant Physiology Laboratory",
+ institution="Technion-Israel Institute of Technology",
+ address=GrobidAddress(
+ post_code="32000",
+ settlement="Haifa",
+ country="Israel",
+ ),
+ )
+ ),
+ GrobidAuthor(
+ name="J Doe",
+ given_name="J",
+ surname="Doe",
+ ),
+ ],
+ journal=GrobidJournal(
+ name="Dummy Example File. Journal of Fake News. pp. 1-2. ISSN 1234-5678",
+ ),
+ date="2000",
+ ),
+ abstract="Everything you ever wanted to know about nothing",
+ body="Introduction \nEverything starts somewhere, as somebody [1] once said. \n\n In Depth \n Meat \nYou know, for kids. \n Potatos \nQED.",
+ citations=[
+ GrobidCitation(
+ index=0,
+ id="b0",
+ authors=[
+ GrobidAuthor(
+ name="A Seaperson",
+ given_name="A",
+ surname="Seaperson"
+ )
+ ],
+ date="2001",
+ journal="Letters in the Alphabet",
+ title="Everything is Wonderful",
+ volume="20",
+ pages="1-11",
+ ),
+ GrobidCitation(
+ index=1,
+ id="b1",
+ authors=[],
+ date="2011-03-28",
+ journal="The Dictionary",
+ title="All about Facts",
+ volume="14",
+ ),
+ ],
+ )
+
+ assert doc == expected
+
def test_invalid_xml():