summaryrefslogtreecommitdiffstats
path: root/tests
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-10-27 20:36:46 -0700
committerBryan Newbold <bnewbold@archive.org>2021-10-27 20:36:46 -0700
commit3e9cdaadf455ab1a9bb7289f7b9fa6c889f24669 (patch)
tree87800571c10cdad8f958080f19c60fb86109178e /tests
parent3e3902efe418677054d1711a3de1ab8c4cd2c57c (diff)
downloadgrobid_tei_xml-3e9cdaadf455ab1a9bb7289f7b9fa6c889f24669.tar.gz
grobid_tei_xml-3e9cdaadf455ab1a9bb7289f7b9fa6c889f24669.zip
add more explicit single and multiple citation parsing functions
Diffstat (limited to 'tests')
-rw-r--r--tests/files/empty_citation.tei.xml6
-rw-r--r--tests/files/empty_citation_unstructured.tei.xml7
-rw-r--r--tests/test_parse.py44
3 files changed, 54 insertions, 3 deletions
diff --git a/tests/files/empty_citation.tei.xml b/tests/files/empty_citation.tei.xml
new file mode 100644
index 0000000..cb21f6e
--- /dev/null
+++ b/tests/files/empty_citation.tei.xml
@@ -0,0 +1,6 @@
+<biblStruct >
+ <monogr>
+ <title/>
+ <imprint/>
+ </monogr>
+</biblStruct>
diff --git a/tests/files/empty_citation_unstructured.tei.xml b/tests/files/empty_citation_unstructured.tei.xml
new file mode 100644
index 0000000..35aee19
--- /dev/null
+++ b/tests/files/empty_citation_unstructured.tei.xml
@@ -0,0 +1,7 @@
+<biblStruct >
+ <monogr>
+ <title/>
+ <imprint/>
+ </monogr>
+ <note type="raw_reference">blah</note>
+</biblStruct>
diff --git a/tests/test_parse.py b/tests/test_parse.py
index 25529c4..70dcc98 100644
--- a/tests/test_parse.py
+++ b/tests/test_parse.py
@@ -5,7 +5,14 @@ import xml.etree.ElementTree
import pytest
-from grobid_tei_xml import GrobidBiblio, GrobidDocument, parse_citations_xml, parse_document_xml
+from grobid_tei_xml import (
+ GrobidBiblio,
+ GrobidDocument,
+ parse_citation_list_xml,
+ parse_citation_xml,
+ parse_citations_xml,
+ parse_document_xml,
+)
from grobid_tei_xml.types import *
@@ -198,7 +205,8 @@ def test_single_citations_xml() -> None:
</monogr>
</biblStruct>"""
- d = parse_citations_xml(citation_xml)[0]
+ d = parse_citation_xml(citation_xml)
+ assert d
assert (
d.title
== """Mesh migration following abdominal hernia repair: a comprehensive review"""
@@ -217,13 +225,21 @@ def test_single_citations_xml() -> None:
assert d.issue == "2"
assert d.journal == "Hernia"
+ d2 = parse_citations_xml(citation_xml)[0]
+ assert d.title == d2.title
+ assert d.authors == d2.authors
+
def test_citation_list_xml() -> None:
with open("tests/files/example_citation_list.xml", "r") as f:
tei_xml = f.read()
- citations = parse_citations_xml(tei_xml)
+ citations = parse_citation_list_xml(tei_xml)
+
+ # verify that old function still works
+ assert citations == parse_citations_xml(tei_xml)
+
assert len(citations) == 13
assert citations[3].note == "The Research Handbook on International Environmental Law"
@@ -300,3 +316,25 @@ def test_grobid_070_document() -> None:
c.to_csl_dict()
c.to_dict()
c.to_legacy_dict()
+
+
+def test_empty_citations() -> None:
+
+ with open("tests/files/empty_citation_unstructured.tei.xml", "r") as f:
+ mostly_empty_xml = f.read()
+
+ with open("tests/files/empty_citation.tei.xml", "r") as f:
+ empty_xml = f.read()
+
+ assert parse_citation_xml(empty_xml) is None
+ assert parse_citation_xml(mostly_empty_xml) is None
+
+ d = parse_citation_list_xml(empty_xml)
+ assert d
+ assert d[0].index == 0
+ assert d[0].unstructured is None
+
+ d2 = parse_citation_list_xml(mostly_empty_xml)
+ assert d2
+ assert d2[0].index == 0
+ assert d2[0].unstructured == "blah"