summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2019-12-23 14:01:15 -0800
committerBryan Newbold <bnewbold@robocracy.org>2019-12-23 14:01:15 -0800
commit1bb0a2181d5a30241d80279c5930eb753733f30b (patch)
treec79606e9dccb12dc30197188c838525913234bff
parent4c2ca53303a949c00707e0c552489930f059a54a (diff)
downloadfatcat-1bb0a2181d5a30241d80279c5930eb753733f30b.tar.gz
fatcat-1bb0a2181d5a30241d80279c5930eb753733f30b.zip
add basic MedlineDate year parsing
-rw-r--r--python/fatcat_tools/importers/pubmed.py11
1 files changed, 11 insertions, 0 deletions
diff --git a/python/fatcat_tools/importers/pubmed.py b/python/fatcat_tools/importers/pubmed.py
index 80cf986c..aeac43b5 100644
--- a/python/fatcat_tools/importers/pubmed.py
+++ b/python/fatcat_tools/importers/pubmed.py
@@ -486,6 +486,8 @@ class PubmedImporter(EntityImporter):
pub_date = medline.Article.find('ArticleDate')
if not pub_date:
pub_date = journal.PubDate
+ if not pub_date:
+ pub_date = journal.JournalIssue.PubDate
release_date = None
release_year = None
if pub_date.Year:
@@ -500,6 +502,15 @@ class PubmedImporter(EntityImporter):
except ValueError as ve:
sys.stderr.write("bad date, skipping: {}\n".format(ve))
release_date = None
+ elif pub_date.MedlineDate:
+ medline_date = pub_date.MedlineDate.string.strip()
+ if len(medline_date) >= 4 and medline_date[:4].isdigit():
+ release_year = int(medline_date[:4])
+ if release_year < 1300 or release_year > 2040:
+ print("bad medline year, skipping: {}\n".format(release_year), file=sys.stderr)
+ release_year = None
+ else:
+ print("unparsable medline date, skipping: {}\n".format(medline_date), file=sys.stderr)
if journal.find("Title"):
container_name = journal.Title.string