aboutsummaryrefslogtreecommitdiffstats
path: root/tests/test_grobid_unstructured.py
blob: 91b739828b32c589be66106375fe781c42813e51 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import pytest

from grobid_tei_xml.grobid2json import transform_grobid_ref_xml


def test_transform_grobid_ref_xml():
    citation_xml = """
<biblStruct >
    <analytic>
        <title level="a" type="main">Mesh migration following abdominal hernia repair: a comprehensive review</title>
        <author>
            <persName
                xmlns="http://www.tei-c.org/ns/1.0">
                <forename type="first">H</forename>
                <forename type="middle">B</forename>
                <surname>Cunningham</surname>
            </persName>
        </author>
        <author>
            <persName
                xmlns="http://www.tei-c.org/ns/1.0">
                <forename type="first">J</forename>
                <forename type="middle">J</forename>
                <surname>Weis</surname>
            </persName>
        </author>
        <author>
            <persName
                xmlns="http://www.tei-c.org/ns/1.0">
                <forename type="first">L</forename>
                <forename type="middle">R</forename>
                <surname>Taveras</surname>
            </persName>
        </author>
        <author>
            <persName
                xmlns="http://www.tei-c.org/ns/1.0">
                <forename type="first">S</forename>
                <surname>Huerta</surname>
            </persName>
        </author>
        <idno type="DOI">10.1007/s10029-019-01898-9</idno>
        <idno type="PMID">30701369</idno>
    </analytic>
    <monogr>
        <title level="j">Hernia</title>
        <imprint>
            <biblScope unit="volume">23</biblScope>
            <biblScope unit="issue">2</biblScope>
            <biblScope unit="page" from="235" to="243" />
            <date type="published" when="2019-01-30" />
        </imprint>
    </monogr>
</biblStruct>"""

    d = transform_grobid_ref_xml(citation_xml)
    assert d[
        'title'] == "Mesh migration following abdominal hernia repair: a comprehensive review"
    assert d['authors'][2]['given_name'] == "L"
    assert d['authors'][2]['surname'] == "Taveras"
    assert d['authors'][2]['name'] == "L R Taveras"
    assert d['doi'] == "10.1007/s10029-019-01898-9"
    assert d['pmid'] == "30701369"
    assert d['date'] == "2019-01-30"
    assert d['pages'] == "235-243"
    assert d['volume'] == "23"
    assert d['issue'] == "2"
    assert d['journal'] == "Hernia"