aboutsummaryrefslogtreecommitdiffstats
path: root/tests/test_parse.py
diff options
context:
space:
mode:
Diffstat (limited to 'tests/test_parse.py')
-rw-r--r--tests/test_parse.py43
1 files changed, 41 insertions, 2 deletions
diff --git a/tests/test_parse.py b/tests/test_parse.py
index 25ffa64..eb4b46e 100644
--- a/tests/test_parse.py
+++ b/tests/test_parse.py
@@ -42,7 +42,7 @@ def test_small_xml() -> None:
surname="Doe",
),
],
- journal="Dummy Example File. Journal of Fake News. pp. 1-2. ISSN 1234-5678",
+ book_title="Dummy Example File. Journal of Fake News. pp. 1-2. ISSN 1234-5678",
date="2000",
),
abstract="Everything you ever wanted to know about nothing",
@@ -52,13 +52,15 @@ def test_small_xml() -> None:
index=0,
id="b0",
authors=[
- GrobidAuthor(full_name="A Seaperson", given_name="A", surname="Seaperson")
+ GrobidAuthor(full_name="A Seaperson", middle_name="A", surname="Seaperson")
],
date="2001",
journal="Letters in the Alphabet",
title="Everything is Wonderful",
volume="20",
pages="1-11",
+ first_page="1",
+ last_page="11",
),
GrobidBiblio(
index=1,
@@ -68,6 +70,7 @@ def test_small_xml() -> None:
journal="The Dictionary",
title="All about Facts",
volume="14",
+ note="author signed copy",
),
],
)
@@ -192,12 +195,15 @@ def test_single_citations_xml() -> None:
d = parse_citations_xml(citation_xml)[0]
assert d.title == """Mesh migration following abdominal hernia repair: a comprehensive review"""
assert d.authors[2].given_name == "L"
+ assert d.authors[2].middle_name == "R"
assert d.authors[2].surname == "Taveras"
assert d.authors[2].full_name == "L R Taveras"
assert d.doi == "10.1007/s10029-019-01898-9"
assert d.pmid == "30701369"
assert d.date == "2019-01-30"
assert d.pages == "235-243"
+ assert d.first_page == "235"
+ assert d.last_page == "243"
assert d.volume == "23"
assert d.issue == "2"
assert d.journal == "Hernia"
@@ -211,3 +217,36 @@ def test_citation_list_xml() -> None:
citations = parse_citations_xml(tei_xml)
assert len(citations) == 10
assert citations[7].title == "Global Hunger Index: The Challenge of Hidden Hunger"
+
+ assert citations[3].note == "The Research Handbook on International Environmental Law"
+ assert citations[3].authors[0].surname == "Uhlířová"
+ assert citations[4].authors[0].surname == "Sleytr"
+ assert citations[4].authors[0].middle_name == "B"
+
+
+def test_grobid_070_document() -> None:
+ # more recent GROBID v0.7.0 output
+
+ with open('tests/files/example_grobid_plos.tei.xml', 'r') as f:
+ tei_xml = f.read()
+
+ doc = parse_document_xml(tei_xml)
+ assert doc.grobid_timestamp == "2021-10-23T03:05+0000"
+ assert doc.grobid_version == "0.7.0-SNAPSHOT"
+ assert doc.pdf_md5 == "4F10689DEB84756CE82C8015951A22E5"
+
+ assert doc.citations
+ cite_b6 = doc.citations[6]
+ assert cite_b6.id == "b6"
+ assert cite_b6.journal == "OR. Hydrobiol"
+ # note that this was not parsed well by GROBID
+ assert cite_b6.institution == "Crater Lake National Park"
+ assert cite_b6.date == "2007"
+ assert cite_b6.volume == "574"
+ assert cite_b6.issue == "1"
+
+ # run these methods over some more examples
+ for c in doc.citations:
+ c.to_csl_dict()
+ c.to_dict()
+ c.to_legacy_dict()