From 5af153683cdd9b883bc142a4bd7bb447612d494a Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 22 May 2019 16:01:37 -0700 Subject: more strict pubmed DOI handling --- python/fatcat_tools/importers/pubmed.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'python/fatcat_tools/importers') diff --git a/python/fatcat_tools/importers/pubmed.py b/python/fatcat_tools/importers/pubmed.py index b33bbf13..e5d413a7 100644 --- a/python/fatcat_tools/importers/pubmed.py +++ b/python/fatcat_tools/importers/pubmed.py @@ -380,7 +380,9 @@ class PubmedImporter(EntityImporter): doi = doi.string.lower() if doi.startswith('doi:'): doi = doi[4:] - assert doi.startswith('10.') + if not (doi.startswith('10.') and '/' in doi and doi.split('/')[1]): + sys.stderr.write("BOGUS DOI: {}\n".format(doi)) + doi = None pmcid = identifiers.find("ArticleId", IdType="pmc") if pmcid: -- cgit v1.2.3