diff options
Diffstat (limited to 'tests/test_grobid2json.py')
-rw-r--r-- | tests/test_grobid2json.py | 143 |
1 files changed, 67 insertions, 76 deletions
diff --git a/tests/test_grobid2json.py b/tests/test_grobid2json.py index ed5d996..a1c975e 100644 --- a/tests/test_grobid2json.py +++ b/tests/test_grobid2json.py @@ -3,10 +3,10 @@ import json import pytest from grobid_tei_xml import teixml2json, parse_document_xml, GrobidDocument, GrobidCitation -from grobid_tei_xml.types import * +from grobid_tei_xml.grobid2json import transform_grobid_ref_xml -def test_teixml2json_small_xml(): +def test_small_xml(): with open('tests/files/small.xml', 'r') as f: tei_xml = f.read() @@ -15,80 +15,6 @@ def test_teixml2json_small_xml(): assert teixml2json(tei_xml) == json_form - assert parse_document_xml(tei_xml).to_dict() == json_form - -def test_teixml2json_small_xml(): - - with open('tests/files/small.xml', 'r') as f: - tei_xml = f.read() - - doc = parse_document_xml(tei_xml) - expected = GrobidDocument( - grobid_version='0.5.1-SNAPSHOT', - grobid_timestamp='2018-04-02T00:31+0000', - language_code='en', - header=GrobidHeader( - title="Dummy Example File", - authors=[ - GrobidAuthor( - name="Brewster Kahle", - given_name="Brewster", - surname="Kahle", - affiliation=GrobidAffiliation( - department="Faculty ofAgricultrial Engineering", - laboratory="Plant Physiology Laboratory", - institution="Technion-Israel Institute of Technology", - address=GrobidAddress( - post_code="32000", - settlement="Haifa", - country="Israel", - ), - ) - ), - GrobidAuthor( - name="J Doe", - given_name="J", - surname="Doe", - ), - ], - journal=GrobidJournal( - name="Dummy Example File. Journal of Fake News. pp. 1-2. ISSN 1234-5678", - ), - date="2000", - ), - abstract="Everything you ever wanted to know about nothing", - body="Introduction \nEverything starts somewhere, as somebody [1] once said. \n\n In Depth \n Meat \nYou know, for kids. \n Potatos \nQED.", - citations=[ - GrobidCitation( - index=0, - id="b0", - authors=[ - GrobidAuthor( - name="A Seaperson", - given_name="A", - surname="Seaperson" - ) - ], - date="2001", - journal="Letters in the Alphabet", - title="Everything is Wonderful", - volume="20", - pages="1-11", - ), - GrobidCitation( - index=1, - id="b1", - authors=[], - date="2011-03-28", - journal="The Dictionary", - title="All about Facts", - volume="14", - ), - ], - ) - - assert doc == expected - def test_invalid_xml(): @@ -125,3 +51,68 @@ def test_grobid_teixml2json() -> None: ref["unstructured"] == "Tasa K, Baker R, Murray M. Using patient feedback for qua- lity improvement. Quality Management in Health Care 1996;8:206-19." ) + + +def test_transform_grobid_ref_xml(): + citation_xml = """ +<biblStruct > + <analytic> + <title level="a" type="main">Mesh migration following abdominal hernia repair: a comprehensive review</title> + <author> + <persName + xmlns="http://www.tei-c.org/ns/1.0"> + <forename type="first">H</forename> + <forename type="middle">B</forename> + <surname>Cunningham</surname> + </persName> + </author> + <author> + <persName + xmlns="http://www.tei-c.org/ns/1.0"> + <forename type="first">J</forename> + <forename type="middle">J</forename> + <surname>Weis</surname> + </persName> + </author> + <author> + <persName + xmlns="http://www.tei-c.org/ns/1.0"> + <forename type="first">L</forename> + <forename type="middle">R</forename> + <surname>Taveras</surname> + </persName> + </author> + <author> + <persName + xmlns="http://www.tei-c.org/ns/1.0"> + <forename type="first">S</forename> + <surname>Huerta</surname> + </persName> + </author> + <idno type="DOI">10.1007/s10029-019-01898-9</idno> + <idno type="PMID">30701369</idno> + </analytic> + <monogr> + <title level="j">Hernia</title> + <imprint> + <biblScope unit="volume">23</biblScope> + <biblScope unit="issue">2</biblScope> + <biblScope unit="page" from="235" to="243" /> + <date type="published" when="2019-01-30" /> + </imprint> + </monogr> +</biblStruct>""" + + d = transform_grobid_ref_xml(citation_xml) + assert d[ + 'title'] == "Mesh migration following abdominal hernia repair: a comprehensive review" + assert d['authors'][2]['given_name'] == "L" + assert d['authors'][2]['surname'] == "Taveras" + assert d['authors'][2]['name'] == "L R Taveras" + assert d['doi'] == "10.1007/s10029-019-01898-9" + assert d['pmid'] == "30701369" + assert d['date'] == "2019-01-30" + assert d['pages'] == "235-243" + assert d['volume'] == "23" + assert d['issue'] == "2" + assert d['journal'] == "Hernia" |