diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2019-12-23 14:01:15 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2019-12-23 14:01:15 -0800 |
commit | 1bb0a2181d5a30241d80279c5930eb753733f30b (patch) | |
tree | c79606e9dccb12dc30197188c838525913234bff | |
parent | 4c2ca53303a949c00707e0c552489930f059a54a (diff) | |
download | fatcat-1bb0a2181d5a30241d80279c5930eb753733f30b.tar.gz fatcat-1bb0a2181d5a30241d80279c5930eb753733f30b.zip |
add basic MedlineDate year parsing
-rw-r--r-- | python/fatcat_tools/importers/pubmed.py | 11 |
1 files changed, 11 insertions, 0 deletions
diff --git a/python/fatcat_tools/importers/pubmed.py b/python/fatcat_tools/importers/pubmed.py index 80cf986c..aeac43b5 100644 --- a/python/fatcat_tools/importers/pubmed.py +++ b/python/fatcat_tools/importers/pubmed.py @@ -486,6 +486,8 @@ class PubmedImporter(EntityImporter): pub_date = medline.Article.find('ArticleDate') if not pub_date: pub_date = journal.PubDate + if not pub_date: + pub_date = journal.JournalIssue.PubDate release_date = None release_year = None if pub_date.Year: @@ -500,6 +502,15 @@ class PubmedImporter(EntityImporter): except ValueError as ve: sys.stderr.write("bad date, skipping: {}\n".format(ve)) release_date = None + elif pub_date.MedlineDate: + medline_date = pub_date.MedlineDate.string.strip() + if len(medline_date) >= 4 and medline_date[:4].isdigit(): + release_year = int(medline_date[:4]) + if release_year < 1300 or release_year > 2040: + print("bad medline year, skipping: {}\n".format(release_year), file=sys.stderr) + release_year = None + else: + print("unparsable medline date, skipping: {}\n".format(medline_date), file=sys.stderr) if journal.find("Title"): container_name = journal.Title.string |