aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--python/fatcat_tools/importers/pubmed.py9
1 files changed, 7 insertions, 2 deletions
diff --git a/python/fatcat_tools/importers/pubmed.py b/python/fatcat_tools/importers/pubmed.py
index eecf99f6..b33bbf13 100644
--- a/python/fatcat_tools/importers/pubmed.py
+++ b/python/fatcat_tools/importers/pubmed.py
@@ -378,6 +378,9 @@ class PubmedImporter(EntityImporter):
doi = identifiers.find("ArticleId", IdType="doi")
if doi:
doi = doi.string.lower()
+ if doi.startswith('doi:'):
+ doi = doi[4:]
+ assert doi.startswith('10.')
pmcid = identifiers.find("ArticleId", IdType="pmc")
if pmcid:
@@ -717,8 +720,10 @@ class PubmedImporter(EntityImporter):
if existing and existing.ext_ids.pmid and existing.ext_ids.pmid != re.ext_ids.pmid:
warnings.warn("PMID/DOI mismatch: release {}, pmid {} != {}".format(
existing.ident, existing.ext_ids.pmid, re.ext_ids.pmid))
- self.counts['exists-pmid-doi-mismatch'] += 1
- return False
+ self.counts['warn-pmid-doi-mismatch'] += 1
+ # don't clobber DOI, but do group together
+ re.ext_ids.doi = None
+ re.work_id = existing.work_id
if existing and existing.ext_ids.pmid and (existing.refs or not re.refs):
# TODO: any other reasons to do an update?