diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2019-03-05 17:35:59 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2019-05-21 11:41:29 -0700 |
commit | a987af927686725f7778475f4c383d59c8c494bf (patch) | |
tree | f620f4897ce810198c069fbb2f2f5a2570772ea9 /python/parse_pubmed_xml.py | |
parent | 3ec275c7d78aa261027f35c26366a382c5dd7a6c (diff) | |
download | fatcat-a987af927686725f7778475f4c383d59c8c494bf.tar.gz fatcat-a987af927686725f7778475f4c383d59c8c494bf.zip |
small bugfixes to pubmed xml parser
Diffstat (limited to 'python/parse_pubmed_xml.py')
-rw-r--r-- | python/parse_pubmed_xml.py | 24 |
1 files changed, 13 insertions, 11 deletions
diff --git a/python/parse_pubmed_xml.py b/python/parse_pubmed_xml.py index 9350e9a4..413333cc 100644 --- a/python/parse_pubmed_xml.py +++ b/python/parse_pubmed_xml.py @@ -161,20 +161,22 @@ class PubMedParser(): if pages: pages = pages.string - title = medline.Article.ArticleTitle.string, # always present - if type(title) is tuple: - title = ': '.join(title) - if title.endswith('.'): - title = title[:-1] - # this hides some "special" titles, but the vast majority are - # translations; translations don't always include the original_title - if title.startswith('[') and title.endswith(']'): - title = title[1:-1] + title = medline.Article.ArticleTitle.string # always present + if title: + if title.endswith('.'): + title = title[:-1] + # this hides some "special" titles, but the vast majority are + # translations; translations don't always include the original_title + if title.startswith('[') and title.endswith(']'): + title = title[1:-1] + else: + # TODO: will filter out later + title = None original_title = medline.Article.find("VernacularTitle", recurse=False) if original_title: - original_title = original_title.string - if original_title.endswith('.'): + original_title = original_title.string or None + if original_title and original_title.endswith('.'): original_title = original_title[:-1] # TODO: happening in alpha order, not handling multi-language well. |