From 3e9cdaadf455ab1a9bb7289f7b9fa6c889f24669 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 27 Oct 2021 20:36:46 -0700 Subject: add more explicit single and multiple citation parsing functions --- tests/test_parse.py | 44 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 41 insertions(+), 3 deletions(-) (limited to 'tests/test_parse.py') diff --git a/tests/test_parse.py b/tests/test_parse.py index 25529c4..70dcc98 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -5,7 +5,14 @@ import xml.etree.ElementTree import pytest -from grobid_tei_xml import GrobidBiblio, GrobidDocument, parse_citations_xml, parse_document_xml +from grobid_tei_xml import ( + GrobidBiblio, + GrobidDocument, + parse_citation_list_xml, + parse_citation_xml, + parse_citations_xml, + parse_document_xml, +) from grobid_tei_xml.types import * @@ -198,7 +205,8 @@ def test_single_citations_xml() -> None: """ - d = parse_citations_xml(citation_xml)[0] + d = parse_citation_xml(citation_xml) + assert d assert ( d.title == """Mesh migration following abdominal hernia repair: a comprehensive review""" @@ -217,13 +225,21 @@ def test_single_citations_xml() -> None: assert d.issue == "2" assert d.journal == "Hernia" + d2 = parse_citations_xml(citation_xml)[0] + assert d.title == d2.title + assert d.authors == d2.authors + def test_citation_list_xml() -> None: with open("tests/files/example_citation_list.xml", "r") as f: tei_xml = f.read() - citations = parse_citations_xml(tei_xml) + citations = parse_citation_list_xml(tei_xml) + + # verify that old function still works + assert citations == parse_citations_xml(tei_xml) + assert len(citations) == 13 assert citations[3].note == "The Research Handbook on International Environmental Law" @@ -300,3 +316,25 @@ def test_grobid_070_document() -> None: c.to_csl_dict() c.to_dict() c.to_legacy_dict() + + +def test_empty_citations() -> None: + + with open("tests/files/empty_citation_unstructured.tei.xml", "r") as f: + mostly_empty_xml = f.read() + + with open("tests/files/empty_citation.tei.xml", "r") as f: + empty_xml = f.read() + + assert parse_citation_xml(empty_xml) is None + assert parse_citation_xml(mostly_empty_xml) is None + + d = parse_citation_list_xml(empty_xml) + assert d + assert d[0].index == 0 + assert d[0].unstructured is None + + d2 = parse_citation_list_xml(mostly_empty_xml) + assert d2 + assert d2[0].index == 0 + assert d2[0].unstructured == "blah" -- cgit v1.2.3