diff options
author | Bryan Newbold <bnewbold@archive.org> | 2021-11-03 20:26:02 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2021-11-03 20:26:02 -0700 |
commit | 6ad771e195065a1f22ddbe5a4098acc70137be9b (patch) | |
tree | 4f1c4b751118c3f65573edf63bd9f0d52224b608 | |
parent | 1f57527aa621525d46e9ddbbd4bab2682df8d67e (diff) | |
download | grobid_tei_xml-6ad771e195065a1f22ddbe5a4098acc70137be9b.tar.gz grobid_tei_xml-6ad771e195065a1f22ddbe5a4098acc70137be9b.zip |
handle 'bare' editor names
-rwxr-xr-x | grobid_tei_xml/parse.py | 2 | ||||
-rw-r--r-- | tests/files/citation/editor_no_persname.tei.xml | 14 | ||||
-rw-r--r-- | tests/test_parse.py | 16 |
3 files changed, 31 insertions, 1 deletions
diff --git a/grobid_tei_xml/parse.py b/grobid_tei_xml/parse.py index dea1f2e..ebdf65b 100755 --- a/grobid_tei_xml/parse.py +++ b/grobid_tei_xml/parse.py @@ -114,7 +114,7 @@ def _parse_editor(elem: ET.Element, ns: str = ns) -> List[GrobidAuthor]: """ persname_tags = elem.findall(f"./{{{ns}}}persName") - if persname_tags is None: + if len(persname_tags or []) == 0: if elem.find("*") is None: # sometimes there is a "bare" editor name we can use raw_name = elem.text diff --git a/tests/files/citation/editor_no_persname.tei.xml b/tests/files/citation/editor_no_persname.tei.xml new file mode 100644 index 0000000..6e49494 --- /dev/null +++ b/tests/files/citation/editor_no_persname.tei.xml @@ -0,0 +1,14 @@ +<biblStruct > + <analytic> + <title/> + </analytic> + <monogr> + <title level="j">Clinical Gynecologic Endocrinology and Infertility</title> + <editor>Mitchell C</editor> + <imprint> + <biblScope unit="page" from="651" to="666" /> + <date type="published" when="1994">1994</date> + <publisher>Williams and Wilkins</publisher> + </imprint> + </monogr> +</biblStruct> diff --git a/tests/test_parse.py b/tests/test_parse.py index 9d8f4ff..a818f09 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -440,3 +440,19 @@ def test_author_email() -> None: assert doc.body is None assert doc.acknowledgement is None assert doc.annex is None + + +def test_citation_bare_editor() -> None: + with open("tests/files/citation/editor_no_persname.tei.xml", "r") as f: + tei_xml = f.read() + + ref = parse_citation_xml(tei_xml) + assert ref + assert ref.title is None + assert ref.journal == "Clinical Gynecologic Endocrinology and Infertility" + assert ref.publisher == "Williams and Wilkins" + assert ref.date == "1994" + assert ref.authors == [] + assert ref.editors + assert len(ref.editors) == 1 + assert ref.editors[0].full_name == "Mitchell C" |