From dcdebd34883d1a3afa568acea5ebc30842cda538 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 5 Mar 2019 17:35:59 -0800 Subject: small bugfixes to pubmed xml parser --- python/parse_pubmed_xml.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/python/parse_pubmed_xml.py b/python/parse_pubmed_xml.py index 9350e9a4..413333cc 100644 --- a/python/parse_pubmed_xml.py +++ b/python/parse_pubmed_xml.py @@ -161,20 +161,22 @@ class PubMedParser(): if pages: pages = pages.string - title = medline.Article.ArticleTitle.string, # always present - if type(title) is tuple: - title = ': '.join(title) - if title.endswith('.'): - title = title[:-1] - # this hides some "special" titles, but the vast majority are - # translations; translations don't always include the original_title - if title.startswith('[') and title.endswith(']'): - title = title[1:-1] + title = medline.Article.ArticleTitle.string # always present + if title: + if title.endswith('.'): + title = title[:-1] + # this hides some "special" titles, but the vast majority are + # translations; translations don't always include the original_title + if title.startswith('[') and title.endswith(']'): + title = title[1:-1] + else: + # TODO: will filter out later + title = None original_title = medline.Article.find("VernacularTitle", recurse=False) if original_title: - original_title = original_title.string - if original_title.endswith('.'): + original_title = original_title.string or None + if original_title and original_title.endswith('.'): original_title = original_title[:-1] # TODO: happening in alpha order, not handling multi-language well. -- cgit v1.2.3