aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2019-05-22 15:53:21 -0700
committerBryan Newbold <bnewbold@robocracy.org>2019-05-22 15:53:21 -0700
commitbe86ebbdcee910f3187305470440189331a16911 (patch)
tree592e695cc7f5148f26d7333d31d7b3673de71e34
parentb98ae12c4de21fc49caa42814cc645d9fda44958 (diff)
downloadfatcat-be86ebbdcee910f3187305470440189331a16911.tar.gz
fatcat-be86ebbdcee910f3187305470440189331a16911.zip
more pubmed checks; handle PMID/DOI mismatch differently
-rw-r--r--python/fatcat_tools/importers/pubmed.py9
1 files changed, 7 insertions, 2 deletions
diff --git a/python/fatcat_tools/importers/pubmed.py b/python/fatcat_tools/importers/pubmed.py
index eecf99f6..b33bbf13 100644
--- a/python/fatcat_tools/importers/pubmed.py
+++ b/python/fatcat_tools/importers/pubmed.py
@@ -378,6 +378,9 @@ class PubmedImporter(EntityImporter):
doi = identifiers.find("ArticleId", IdType="doi")
if doi:
doi = doi.string.lower()
+ if doi.startswith('doi:'):
+ doi = doi[4:]
+ assert doi.startswith('10.')
pmcid = identifiers.find("ArticleId", IdType="pmc")
if pmcid:
@@ -717,8 +720,10 @@ class PubmedImporter(EntityImporter):
if existing and existing.ext_ids.pmid and existing.ext_ids.pmid != re.ext_ids.pmid:
warnings.warn("PMID/DOI mismatch: release {}, pmid {} != {}".format(
existing.ident, existing.ext_ids.pmid, re.ext_ids.pmid))
- self.counts['exists-pmid-doi-mismatch'] += 1
- return False
+ self.counts['warn-pmid-doi-mismatch'] += 1
+ # don't clobber DOI, but do group together
+ re.ext_ids.doi = None
+ re.work_id = existing.work_id
if existing and existing.ext_ids.pmid and (existing.refs or not re.refs):
# TODO: any other reasons to do an update?