From 938d2c5366d80618b839c83baadc9b5c62d10dce Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 1 Apr 2020 12:02:43 -0700 Subject: pubmed: use untranslated title if translated not available The primary motivation for this change is that fatcat *requires* a non-empty title for each release entity. Pubmed/Medline occasionally indexes just a VenacularTitle with no ArticleTitle for foreign publications, and currently those records don't end up in fatcat at all. --- python/fatcat_tools/importers/pubmed.py | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'python/fatcat_tools') diff --git a/python/fatcat_tools/importers/pubmed.py b/python/fatcat_tools/importers/pubmed.py index 62bb1ddb..abcb21d9 100644 --- a/python/fatcat_tools/importers/pubmed.py +++ b/python/fatcat_tools/importers/pubmed.py @@ -412,6 +412,12 @@ class PubmedImporter(EntityImporter): if original_title and original_title.endswith('.'): original_title = original_title[:-1] + if original_title and not title: + # if we only have an "original" title, but not translated/english + # title, sub in the original title so the entity can be created + title = original_title + original_title = None + # TODO: happening in alpha order, not handling multi-language well. language = medline.Article.Language if language: -- cgit v1.2.3