aboutsummaryrefslogtreecommitdiffstats
path: root/tests/test_grobid2json.py
diff options
context:
space:
mode:
Diffstat (limited to 'tests/test_grobid2json.py')
-rw-r--r--tests/test_grobid2json.py143
1 files changed, 67 insertions, 76 deletions
diff --git a/tests/test_grobid2json.py b/tests/test_grobid2json.py
index ed5d996..a1c975e 100644
--- a/tests/test_grobid2json.py
+++ b/tests/test_grobid2json.py
@@ -3,10 +3,10 @@ import json
import pytest
from grobid_tei_xml import teixml2json, parse_document_xml, GrobidDocument, GrobidCitation
-from grobid_tei_xml.types import *
+from grobid_tei_xml.grobid2json import transform_grobid_ref_xml
-def test_teixml2json_small_xml():
+def test_small_xml():
with open('tests/files/small.xml', 'r') as f:
tei_xml = f.read()
@@ -15,80 +15,6 @@ def test_teixml2json_small_xml():
assert teixml2json(tei_xml) == json_form
- assert parse_document_xml(tei_xml).to_dict() == json_form
-
-def test_teixml2json_small_xml():
-
- with open('tests/files/small.xml', 'r') as f:
- tei_xml = f.read()
-
- doc = parse_document_xml(tei_xml)
- expected = GrobidDocument(
- grobid_version='0.5.1-SNAPSHOT',
- grobid_timestamp='2018-04-02T00:31+0000',
- language_code='en',
- header=GrobidHeader(
- title="Dummy Example File",
- authors=[
- GrobidAuthor(
- name="Brewster Kahle",
- given_name="Brewster",
- surname="Kahle",
- affiliation=GrobidAffiliation(
- department="Faculty ofAgricultrial Engineering",
- laboratory="Plant Physiology Laboratory",
- institution="Technion-Israel Institute of Technology",
- address=GrobidAddress(
- post_code="32000",
- settlement="Haifa",
- country="Israel",
- ),
- )
- ),
- GrobidAuthor(
- name="J Doe",
- given_name="J",
- surname="Doe",
- ),
- ],
- journal=GrobidJournal(
- name="Dummy Example File. Journal of Fake News. pp. 1-2. ISSN 1234-5678",
- ),
- date="2000",
- ),
- abstract="Everything you ever wanted to know about nothing",
- body="Introduction \nEverything starts somewhere, as somebody [1] once said. \n\n In Depth \n Meat \nYou know, for kids. \n Potatos \nQED.",
- citations=[
- GrobidCitation(
- index=0,
- id="b0",
- authors=[
- GrobidAuthor(
- name="A Seaperson",
- given_name="A",
- surname="Seaperson"
- )
- ],
- date="2001",
- journal="Letters in the Alphabet",
- title="Everything is Wonderful",
- volume="20",
- pages="1-11",
- ),
- GrobidCitation(
- index=1,
- id="b1",
- authors=[],
- date="2011-03-28",
- journal="The Dictionary",
- title="All about Facts",
- volume="14",
- ),
- ],
- )
-
- assert doc == expected
-
def test_invalid_xml():
@@ -125,3 +51,68 @@ def test_grobid_teixml2json() -> None:
ref["unstructured"] ==
"Tasa K, Baker R, Murray M. Using patient feedback for qua- lity improvement. Quality Management in Health Care 1996;8:206-19."
)
+
+
+def test_transform_grobid_ref_xml():
+ citation_xml = """
+<biblStruct >
+ <analytic>
+ <title level="a" type="main">Mesh migration following abdominal hernia repair: a comprehensive review</title>
+ <author>
+ <persName
+ xmlns="http://www.tei-c.org/ns/1.0">
+ <forename type="first">H</forename>
+ <forename type="middle">B</forename>
+ <surname>Cunningham</surname>
+ </persName>
+ </author>
+ <author>
+ <persName
+ xmlns="http://www.tei-c.org/ns/1.0">
+ <forename type="first">J</forename>
+ <forename type="middle">J</forename>
+ <surname>Weis</surname>
+ </persName>
+ </author>
+ <author>
+ <persName
+ xmlns="http://www.tei-c.org/ns/1.0">
+ <forename type="first">L</forename>
+ <forename type="middle">R</forename>
+ <surname>Taveras</surname>
+ </persName>
+ </author>
+ <author>
+ <persName
+ xmlns="http://www.tei-c.org/ns/1.0">
+ <forename type="first">S</forename>
+ <surname>Huerta</surname>
+ </persName>
+ </author>
+ <idno type="DOI">10.1007/s10029-019-01898-9</idno>
+ <idno type="PMID">30701369</idno>
+ </analytic>
+ <monogr>
+ <title level="j">Hernia</title>
+ <imprint>
+ <biblScope unit="volume">23</biblScope>
+ <biblScope unit="issue">2</biblScope>
+ <biblScope unit="page" from="235" to="243" />
+ <date type="published" when="2019-01-30" />
+ </imprint>
+ </monogr>
+</biblStruct>"""
+
+ d = transform_grobid_ref_xml(citation_xml)
+ assert d[
+ 'title'] == "Mesh migration following abdominal hernia repair: a comprehensive review"
+ assert d['authors'][2]['given_name'] == "L"
+ assert d['authors'][2]['surname'] == "Taveras"
+ assert d['authors'][2]['name'] == "L R Taveras"
+ assert d['doi'] == "10.1007/s10029-019-01898-9"
+ assert d['pmid'] == "30701369"
+ assert d['date'] == "2019-01-30"
+ assert d['pages'] == "235-243"
+ assert d['volume'] == "23"
+ assert d['issue'] == "2"
+ assert d['journal'] == "Hernia"