diff options
Diffstat (limited to 'grobid_tei_xml/grobid2json.py')
-rw-r--r-- | grobid_tei_xml/grobid2json.py | 8 |
1 files changed, 7 insertions, 1 deletions
diff --git a/grobid_tei_xml/grobid2json.py b/grobid_tei_xml/grobid2json.py index 7f455af..8946ab8 100644 --- a/grobid_tei_xml/grobid2json.py +++ b/grobid_tei_xml/grobid2json.py @@ -83,9 +83,9 @@ def journal_info(elem: ET.Element) -> Dict[str, Any]: journal["eissn"] = elem.findtext('.//{%s}idno[@type="eISSN"]' % ns) journal["volume"] = elem.findtext('.//{%s}biblScope[@unit="volume"]' % ns) journal["issue"] = elem.findtext('.//{%s}biblScope[@unit="issue"]' % ns) - keys = list(journal.keys()) # remove empty/null keys + keys = list(journal.keys()) for k in keys: if not journal[k]: journal.pop(k) @@ -140,6 +140,12 @@ def biblio_info(elem: ET.Element, ns: str = ns) -> Dict[str, Any]: ref["url"] = ref["url"].split(">")[0] else: ref["url"] = None + + # remove empty/null keys + keys = list(ref.keys()) + for k in keys: + if ref[k] is None: + ref.pop(k) return ref |