diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2019-05-22 16:01:37 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2019-05-22 16:01:37 -0700 |
commit | 5af153683cdd9b883bc142a4bd7bb447612d494a (patch) | |
tree | fd1332dbce1db1dd5019008bc8ba67e939544f08 | |
parent | a9752a685be5a0ff7794ba652ca7954cb7508124 (diff) | |
download | fatcat-5af153683cdd9b883bc142a4bd7bb447612d494a.tar.gz fatcat-5af153683cdd9b883bc142a4bd7bb447612d494a.zip |
more strict pubmed DOI handling
-rw-r--r-- | python/fatcat_tools/importers/pubmed.py | 4 |
1 files changed, 3 insertions, 1 deletions
diff --git a/python/fatcat_tools/importers/pubmed.py b/python/fatcat_tools/importers/pubmed.py index b33bbf13..e5d413a7 100644 --- a/python/fatcat_tools/importers/pubmed.py +++ b/python/fatcat_tools/importers/pubmed.py @@ -380,7 +380,9 @@ class PubmedImporter(EntityImporter): doi = doi.string.lower() if doi.startswith('doi:'): doi = doi[4:] - assert doi.startswith('10.') + if not (doi.startswith('10.') and '/' in doi and doi.split('/')[1]): + sys.stderr.write("BOGUS DOI: {}\n".format(doi)) + doi = None pmcid = identifiers.find("ArticleId", IdType="pmc") if pmcid: |