diff options
Diffstat (limited to 'tests/test_parse.py')
-rw-r--r-- | tests/test_parse.py | 43 |
1 files changed, 41 insertions, 2 deletions
diff --git a/tests/test_parse.py b/tests/test_parse.py index 25ffa64..eb4b46e 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -42,7 +42,7 @@ def test_small_xml() -> None: surname="Doe", ), ], - journal="Dummy Example File. Journal of Fake News. pp. 1-2. ISSN 1234-5678", + book_title="Dummy Example File. Journal of Fake News. pp. 1-2. ISSN 1234-5678", date="2000", ), abstract="Everything you ever wanted to know about nothing", @@ -52,13 +52,15 @@ def test_small_xml() -> None: index=0, id="b0", authors=[ - GrobidAuthor(full_name="A Seaperson", given_name="A", surname="Seaperson") + GrobidAuthor(full_name="A Seaperson", middle_name="A", surname="Seaperson") ], date="2001", journal="Letters in the Alphabet", title="Everything is Wonderful", volume="20", pages="1-11", + first_page="1", + last_page="11", ), GrobidBiblio( index=1, @@ -68,6 +70,7 @@ def test_small_xml() -> None: journal="The Dictionary", title="All about Facts", volume="14", + note="author signed copy", ), ], ) @@ -192,12 +195,15 @@ def test_single_citations_xml() -> None: d = parse_citations_xml(citation_xml)[0] assert d.title == """Mesh migration following abdominal hernia repair: a comprehensive review""" assert d.authors[2].given_name == "L" + assert d.authors[2].middle_name == "R" assert d.authors[2].surname == "Taveras" assert d.authors[2].full_name == "L R Taveras" assert d.doi == "10.1007/s10029-019-01898-9" assert d.pmid == "30701369" assert d.date == "2019-01-30" assert d.pages == "235-243" + assert d.first_page == "235" + assert d.last_page == "243" assert d.volume == "23" assert d.issue == "2" assert d.journal == "Hernia" @@ -211,3 +217,36 @@ def test_citation_list_xml() -> None: citations = parse_citations_xml(tei_xml) assert len(citations) == 10 assert citations[7].title == "Global Hunger Index: The Challenge of Hidden Hunger" + + assert citations[3].note == "The Research Handbook on International Environmental Law" + assert citations[3].authors[0].surname == "Uhlířová" + assert citations[4].authors[0].surname == "Sleytr" + assert citations[4].authors[0].middle_name == "B" + + +def test_grobid_070_document() -> None: + # more recent GROBID v0.7.0 output + + with open('tests/files/example_grobid_plos.tei.xml', 'r') as f: + tei_xml = f.read() + + doc = parse_document_xml(tei_xml) + assert doc.grobid_timestamp == "2021-10-23T03:05+0000" + assert doc.grobid_version == "0.7.0-SNAPSHOT" + assert doc.pdf_md5 == "4F10689DEB84756CE82C8015951A22E5" + + assert doc.citations + cite_b6 = doc.citations[6] + assert cite_b6.id == "b6" + assert cite_b6.journal == "OR. Hydrobiol" + # note that this was not parsed well by GROBID + assert cite_b6.institution == "Crater Lake National Park" + assert cite_b6.date == "2007" + assert cite_b6.volume == "574" + assert cite_b6.issue == "1" + + # run these methods over some more examples + for c in doc.citations: + c.to_csl_dict() + c.to_dict() + c.to_legacy_dict() |