aboutsummaryrefslogtreecommitdiffstats
path: root/tests
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-10-21 17:32:47 -0700
committerBryan Newbold <bnewbold@archive.org>2021-10-21 17:32:47 -0700
commit8c09c866d81854ab06b85bee6c39124c7b2faf44 (patch)
treeeb6e1a67a307512f18992fd383627a2d04c01931 /tests
parentd25fc52a7fc1d2b5de6bfaa16afe9256b3175181 (diff)
downloadgrobid_tei_xml-8c09c866d81854ab06b85bee6c39124c7b2faf44.tar.gz
grobid_tei_xml-8c09c866d81854ab06b85bee6c39124c7b2faf44.zip
start refactoring into new parser, with dataclass types
Diffstat (limited to 'tests')
-rw-r--r--tests/test_grobid2json.py80
-rw-r--r--tests/test_grobid_unstructured.py2
2 files changed, 79 insertions, 3 deletions
diff --git a/tests/test_grobid2json.py b/tests/test_grobid2json.py
index 6e3dac2..ed5d996 100644
--- a/tests/test_grobid2json.py
+++ b/tests/test_grobid2json.py
@@ -1,10 +1,12 @@
import xml
import json
import pytest
-from grobid_tei_xml.grobid2json import teixml2json
+from grobid_tei_xml import teixml2json, parse_document_xml, GrobidDocument, GrobidCitation
+from grobid_tei_xml.types import *
-def test_small_xml():
+
+def test_teixml2json_small_xml():
with open('tests/files/small.xml', 'r') as f:
tei_xml = f.read()
@@ -13,6 +15,80 @@ def test_small_xml():
assert teixml2json(tei_xml) == json_form
+ assert parse_document_xml(tei_xml).to_dict() == json_form
+
+def test_teixml2json_small_xml():
+
+ with open('tests/files/small.xml', 'r') as f:
+ tei_xml = f.read()
+
+ doc = parse_document_xml(tei_xml)
+ expected = GrobidDocument(
+ grobid_version='0.5.1-SNAPSHOT',
+ grobid_timestamp='2018-04-02T00:31+0000',
+ language_code='en',
+ header=GrobidHeader(
+ title="Dummy Example File",
+ authors=[
+ GrobidAuthor(
+ name="Brewster Kahle",
+ given_name="Brewster",
+ surname="Kahle",
+ affiliation=GrobidAffiliation(
+ department="Faculty ofAgricultrial Engineering",
+ laboratory="Plant Physiology Laboratory",
+ institution="Technion-Israel Institute of Technology",
+ address=GrobidAddress(
+ post_code="32000",
+ settlement="Haifa",
+ country="Israel",
+ ),
+ )
+ ),
+ GrobidAuthor(
+ name="J Doe",
+ given_name="J",
+ surname="Doe",
+ ),
+ ],
+ journal=GrobidJournal(
+ name="Dummy Example File. Journal of Fake News. pp. 1-2. ISSN 1234-5678",
+ ),
+ date="2000",
+ ),
+ abstract="Everything you ever wanted to know about nothing",
+ body="Introduction \nEverything starts somewhere, as somebody [1] once said. \n\n In Depth \n Meat \nYou know, for kids. \n Potatos \nQED.",
+ citations=[
+ GrobidCitation(
+ index=0,
+ id="b0",
+ authors=[
+ GrobidAuthor(
+ name="A Seaperson",
+ given_name="A",
+ surname="Seaperson"
+ )
+ ],
+ date="2001",
+ journal="Letters in the Alphabet",
+ title="Everything is Wonderful",
+ volume="20",
+ pages="1-11",
+ ),
+ GrobidCitation(
+ index=1,
+ id="b1",
+ authors=[],
+ date="2011-03-28",
+ journal="The Dictionary",
+ title="All about Facts",
+ volume="14",
+ ),
+ ],
+ )
+
+ assert doc == expected
+
def test_invalid_xml():
diff --git a/tests/test_grobid_unstructured.py b/tests/test_grobid_unstructured.py
index b203b30..91b7398 100644
--- a/tests/test_grobid_unstructured.py
+++ b/tests/test_grobid_unstructured.py
@@ -1,6 +1,6 @@
import pytest
-from grobid_tei_xml.grobid_unstructured import transform_grobid_ref_xml
+from grobid_tei_xml.grobid2json import transform_grobid_ref_xml
def test_transform_grobid_ref_xml():