summaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2020-04-01 12:02:43 -0700
committerBryan Newbold <bnewbold@robocracy.org>2020-04-01 12:02:45 -0700
commit938d2c5366d80618b839c83baadc9b5c62d10dce (patch)
treed5bdd27e903ea2abbe4576cd3d844b2a86267516 /python/fatcat_tools
parentf77a553350238c8ccc9c3bc0edcf47fb9dd067b3 (diff)
downloadfatcat-938d2c5366d80618b839c83baadc9b5c62d10dce.tar.gz
fatcat-938d2c5366d80618b839c83baadc9b5c62d10dce.zip
pubmed: use untranslated title if translated not available
The primary motivation for this change is that fatcat *requires* a non-empty title for each release entity. Pubmed/Medline occasionally indexes just a VenacularTitle with no ArticleTitle for foreign publications, and currently those records don't end up in fatcat at all.
Diffstat (limited to 'python/fatcat_tools')
-rw-r--r--python/fatcat_tools/importers/pubmed.py6
1 files changed, 6 insertions, 0 deletions
diff --git a/python/fatcat_tools/importers/pubmed.py b/python/fatcat_tools/importers/pubmed.py
index 62bb1ddb..abcb21d9 100644
--- a/python/fatcat_tools/importers/pubmed.py
+++ b/python/fatcat_tools/importers/pubmed.py
@@ -412,6 +412,12 @@ class PubmedImporter(EntityImporter):
if original_title and original_title.endswith('.'):
original_title = original_title[:-1]
+ if original_title and not title:
+ # if we only have an "original" title, but not translated/english
+ # title, sub in the original title so the entity can be created
+ title = original_title
+ original_title = None
+
# TODO: happening in alpha order, not handling multi-language well.
language = medline.Article.Language
if language: