From 6ad771e195065a1f22ddbe5a4098acc70137be9b Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 3 Nov 2021 20:26:02 -0700 Subject: handle 'bare' editor names --- grobid_tei_xml/parse.py | 2 +- tests/files/citation/editor_no_persname.tei.xml | 14 ++++++++++++++ tests/test_parse.py | 16 ++++++++++++++++ 3 files changed, 31 insertions(+), 1 deletion(-) create mode 100644 tests/files/citation/editor_no_persname.tei.xml diff --git a/grobid_tei_xml/parse.py b/grobid_tei_xml/parse.py index dea1f2e..ebdf65b 100755 --- a/grobid_tei_xml/parse.py +++ b/grobid_tei_xml/parse.py @@ -114,7 +114,7 @@ def _parse_editor(elem: ET.Element, ns: str = ns) -> List[GrobidAuthor]: """ persname_tags = elem.findall(f"./{{{ns}}}persName") - if persname_tags is None: + if len(persname_tags or []) == 0: if elem.find("*") is None: # sometimes there is a "bare" editor name we can use raw_name = elem.text diff --git a/tests/files/citation/editor_no_persname.tei.xml b/tests/files/citation/editor_no_persname.tei.xml new file mode 100644 index 0000000..6e49494 --- /dev/null +++ b/tests/files/citation/editor_no_persname.tei.xml @@ -0,0 +1,14 @@ + + + + </analytic> + <monogr> + <title level="j">Clinical Gynecologic Endocrinology and Infertility + Mitchell C + + + 1994 + Williams and Wilkins + + + diff --git a/tests/test_parse.py b/tests/test_parse.py index 9d8f4ff..a818f09 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -440,3 +440,19 @@ def test_author_email() -> None: assert doc.body is None assert doc.acknowledgement is None assert doc.annex is None + + +def test_citation_bare_editor() -> None: + with open("tests/files/citation/editor_no_persname.tei.xml", "r") as f: + tei_xml = f.read() + + ref = parse_citation_xml(tei_xml) + assert ref + assert ref.title is None + assert ref.journal == "Clinical Gynecologic Endocrinology and Infertility" + assert ref.publisher == "Williams and Wilkins" + assert ref.date == "1994" + assert ref.authors == [] + assert ref.editors + assert len(ref.editors) == 1 + assert ref.editors[0].full_name == "Mitchell C" -- cgit v1.2.3