From 8e68f3cd4cc3d2b6b166bf87811bb4db82cc7573 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Mon, 25 Oct 2021 17:02:14 -0700 Subject: more test coverage and comments --- tests/files/example_citation_list.xml | 80 +++++++++++++++++++++++++++++++++++ tests/test_parse.py | 38 ++++++++++++++++- 2 files changed, 116 insertions(+), 2 deletions(-) (limited to 'tests') diff --git a/tests/files/example_citation_list.xml b/tests/files/example_citation_list.xml index d640393..218fa46 100644 --- a/tests/files/example_citation_list.xml +++ b/tests/files/example_citation_list.xml @@ -270,6 +270,86 @@ + + + Heart failure, chronic diuretic use, and increase in mortality and hospitalization: an observational study using propensity score methods + + AAhmed + + + AHusain + + + TELove + + + GGambassi + + + LJDell’italia + + + GSFrancis + + + MGheorghiade + + + RMAllman + + + SMeleth + + + RCBourge + + 10.1093/eurheartj/ehi890 + + + + Eur Heart J + + 27 + 12 + + + + + + + + + + MichaelBass + + + + Devices, Measurements and Properties + Handbook of Optics + + McGRAW-HILL + + 2 + + + + + + + Implications of abandoned shoreline features above Glacial Lake Duluth levels along the north shore of the Superior Basin in the vicinity of the Brule River + + + Paper presented at the 13th Biennial Meeting of the American Quaternary Association +
Minneapolis
+ + + + + University of Minnesota + +
+
+ diff --git a/tests/test_parse.py b/tests/test_parse.py index eb4b46e..976d1b1 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -215,14 +215,34 @@ def test_citation_list_xml() -> None: tei_xml = f.read() citations = parse_citations_xml(tei_xml) - assert len(citations) == 10 - assert citations[7].title == "Global Hunger Index: The Challenge of Hidden Hunger" + assert len(citations) == 13 assert citations[3].note == "The Research Handbook on International Environmental Law" assert citations[3].authors[0].surname == "Uhlířová" + assert citations[3].authors[1].surname == "Drumbl" + assert citations[3].editors + assert citations[3].editors[0].surname == "Fitzmaurice" + # TODO: multiple persName under a single (https://github.com/kermitt2/grobid/issues/845) + # assert citations[3].editors[1].surname == "Brus" + assert citations[4].authors[0].surname == "Sleytr" assert citations[4].authors[0].middle_name == "B" + assert citations[7].title == "Global Hunger Index: The Challenge of Hidden Hunger" + + assert citations[10].doi == "10.1093/eurheartj/ehi890" + assert citations[10].url is None + + assert citations[11].title == "Devices, Measurements and Properties" + assert citations[11].series_title == "Handbook of Optics" + assert citations[11].publisher == "McGRAW-HILL" + + assert citations[ + 12].title == "Implications of abandoned shoreline features above Glacial Lake Duluth levels along the north shore of the Superior Basin in the vicinity of the Brule River" + assert citations[ + 12].book_title == "Paper presented at the 13th Biennial Meeting of the American Quaternary Association" + assert citations[12].institution == "University of Minnesota" + def test_grobid_070_document() -> None: # more recent GROBID v0.7.0 output @@ -245,6 +265,20 @@ def test_grobid_070_document() -> None: assert cite_b6.volume == "574" assert cite_b6.issue == "1" + cite_b3 = doc.citations[3] + assert cite_b3.url == "http://unesdoc.unesco.org/ulis/" + assert cite_b3.title == "Requirements for Global Implementation of the Strategic Plan for Coastal GOOS" + assert cite_b3.authors + assert cite_b3.authors[0].surname == "Ioc-Unesco" + assert cite_b3.date == "2012" + + cite_b18 = doc.citations[18] + assert cite_b18.note == "TriOS GmbH [Internet" + assert cite_b18.date == "2017-01-05" + + cite_b29 = doc.citations[29] + assert cite_b29.note == "PhD dissertation" + # run these methods over some more examples for c in doc.citations: c.to_csl_dict() -- cgit v1.2.3