From 13b107d8397c47a3159800a34307063f8e2b509e Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Sat, 28 Mar 2020 20:17:29 -0700 Subject: pubmed: allow updates if PMCID does not exist yet The intent of this change is to start updating Pubmed metadata records when a PMCID has been assigned, but that ext_id hasn't been recorded in fatcat yet. It is likely that this change will result in some additional duplicate PMCIDs in the catalog. But the principle is that the PMID is the primary pubmed identifier, and all records with a PMID should have the PMCID that pubmed indicates, even if there exists another incorrect record. --- python/fatcat_tools/importers/pubmed.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'python/fatcat_tools') diff --git a/python/fatcat_tools/importers/pubmed.py b/python/fatcat_tools/importers/pubmed.py index d32fcefa..1cdb450b 100644 --- a/python/fatcat_tools/importers/pubmed.py +++ b/python/fatcat_tools/importers/pubmed.py @@ -768,7 +768,12 @@ class PubmedImporter(EntityImporter): self.counts["exists"] += 1 return False - if existing and existing.ext_ids.pmid and (existing.refs or not re.refs): + if ( + existing + and existing.ext_ids.pmid + and (existing.ext_ids.pmcid or not re.ext_ids.pmcid) + and (existing.refs or not re.refs) + ): # TODO: any other reasons to do an update? # don't update if it already has PMID self.counts["exists"] += 1 -- cgit v1.2.3