diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2020-03-28 20:17:29 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2021-11-10 12:55:49 -0800 |
commit | 13b107d8397c47a3159800a34307063f8e2b509e (patch) | |
tree | 1d435d36404b9d00f2b2c9c64f5340769fa367b1 | |
parent | a6d994fbc18debcf3860e6deb12eb54234a42839 (diff) | |
download | fatcat-13b107d8397c47a3159800a34307063f8e2b509e.tar.gz fatcat-13b107d8397c47a3159800a34307063f8e2b509e.zip |
pubmed: allow updates if PMCID does not exist yet
The intent of this change is to start updating Pubmed metadata records
when a PMCID has been assigned, but that ext_id hasn't been recorded in
fatcat yet.
It is likely that this change will result in some additional duplicate
PMCIDs in the catalog. But the principle is that the PMID is the primary
pubmed identifier, and all records with a PMID should have the PMCID
that pubmed indicates, even if there exists another incorrect record.
-rw-r--r-- | python/fatcat_tools/importers/pubmed.py | 7 |
1 files changed, 6 insertions, 1 deletions
diff --git a/python/fatcat_tools/importers/pubmed.py b/python/fatcat_tools/importers/pubmed.py index d32fcefa..1cdb450b 100644 --- a/python/fatcat_tools/importers/pubmed.py +++ b/python/fatcat_tools/importers/pubmed.py @@ -768,7 +768,12 @@ class PubmedImporter(EntityImporter): self.counts["exists"] += 1 return False - if existing and existing.ext_ids.pmid and (existing.refs or not re.refs): + if ( + existing + and existing.ext_ids.pmid + and (existing.ext_ids.pmcid or not re.ext_ids.pmcid) + and (existing.refs or not re.refs) + ): # TODO: any other reasons to do an update? # don't update if it already has PMID self.counts["exists"] += 1 |