diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2019-05-22 15:53:21 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2019-05-22 15:53:21 -0700 |
commit | be86ebbdcee910f3187305470440189331a16911 (patch) | |
tree | 592e695cc7f5148f26d7333d31d7b3673de71e34 /python/fatcat_tools/importers | |
parent | b98ae12c4de21fc49caa42814cc645d9fda44958 (diff) | |
download | fatcat-be86ebbdcee910f3187305470440189331a16911.tar.gz fatcat-be86ebbdcee910f3187305470440189331a16911.zip |
more pubmed checks; handle PMID/DOI mismatch differently
Diffstat (limited to 'python/fatcat_tools/importers')
-rw-r--r-- | python/fatcat_tools/importers/pubmed.py | 9 |
1 files changed, 7 insertions, 2 deletions
diff --git a/python/fatcat_tools/importers/pubmed.py b/python/fatcat_tools/importers/pubmed.py index eecf99f6..b33bbf13 100644 --- a/python/fatcat_tools/importers/pubmed.py +++ b/python/fatcat_tools/importers/pubmed.py @@ -378,6 +378,9 @@ class PubmedImporter(EntityImporter): doi = identifiers.find("ArticleId", IdType="doi") if doi: doi = doi.string.lower() + if doi.startswith('doi:'): + doi = doi[4:] + assert doi.startswith('10.') pmcid = identifiers.find("ArticleId", IdType="pmc") if pmcid: @@ -717,8 +720,10 @@ class PubmedImporter(EntityImporter): if existing and existing.ext_ids.pmid and existing.ext_ids.pmid != re.ext_ids.pmid: warnings.warn("PMID/DOI mismatch: release {}, pmid {} != {}".format( existing.ident, existing.ext_ids.pmid, re.ext_ids.pmid)) - self.counts['exists-pmid-doi-mismatch'] += 1 - return False + self.counts['warn-pmid-doi-mismatch'] += 1 + # don't clobber DOI, but do group together + re.ext_ids.doi = None + re.work_id = existing.work_id if existing and existing.ext_ids.pmid and (existing.refs or not re.refs): # TODO: any other reasons to do an update? |