summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-11-03 20:26:02 -0700
committerBryan Newbold <bnewbold@archive.org>2021-11-03 20:26:02 -0700
commit6ad771e195065a1f22ddbe5a4098acc70137be9b (patch)
tree4f1c4b751118c3f65573edf63bd9f0d52224b608
parent1f57527aa621525d46e9ddbbd4bab2682df8d67e (diff)
downloadgrobid_tei_xml-6ad771e195065a1f22ddbe5a4098acc70137be9b.tar.gz
grobid_tei_xml-6ad771e195065a1f22ddbe5a4098acc70137be9b.zip
handle 'bare' editor names
-rwxr-xr-xgrobid_tei_xml/parse.py2
-rw-r--r--tests/files/citation/editor_no_persname.tei.xml14
-rw-r--r--tests/test_parse.py16
3 files changed, 31 insertions, 1 deletions
diff --git a/grobid_tei_xml/parse.py b/grobid_tei_xml/parse.py
index dea1f2e..ebdf65b 100755
--- a/grobid_tei_xml/parse.py
+++ b/grobid_tei_xml/parse.py
@@ -114,7 +114,7 @@ def _parse_editor(elem: ET.Element, ns: str = ns) -> List[GrobidAuthor]:
"""
persname_tags = elem.findall(f"./{{{ns}}}persName")
- if persname_tags is None:
+ if len(persname_tags or []) == 0:
if elem.find("*") is None:
# sometimes there is a "bare" editor name we can use
raw_name = elem.text
diff --git a/tests/files/citation/editor_no_persname.tei.xml b/tests/files/citation/editor_no_persname.tei.xml
new file mode 100644
index 0000000..6e49494
--- /dev/null
+++ b/tests/files/citation/editor_no_persname.tei.xml
@@ -0,0 +1,14 @@
+<biblStruct >
+ <analytic>
+ <title/>
+ </analytic>
+ <monogr>
+ <title level="j">Clinical Gynecologic Endocrinology and Infertility</title>
+ <editor>Mitchell C</editor>
+ <imprint>
+ <biblScope unit="page" from="651" to="666" />
+ <date type="published" when="1994">1994</date>
+ <publisher>Williams and Wilkins</publisher>
+ </imprint>
+ </monogr>
+</biblStruct>
diff --git a/tests/test_parse.py b/tests/test_parse.py
index 9d8f4ff..a818f09 100644
--- a/tests/test_parse.py
+++ b/tests/test_parse.py
@@ -440,3 +440,19 @@ def test_author_email() -> None:
assert doc.body is None
assert doc.acknowledgement is None
assert doc.annex is None
+
+
+def test_citation_bare_editor() -> None:
+ with open("tests/files/citation/editor_no_persname.tei.xml", "r") as f:
+ tei_xml = f.read()
+
+ ref = parse_citation_xml(tei_xml)
+ assert ref
+ assert ref.title is None
+ assert ref.journal == "Clinical Gynecologic Endocrinology and Infertility"
+ assert ref.publisher == "Williams and Wilkins"
+ assert ref.date == "1994"
+ assert ref.authors == []
+ assert ref.editors
+ assert len(ref.editors) == 1
+ assert ref.editors[0].full_name == "Mitchell C"