summaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools/importers/pubmed.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2019-05-16 13:53:15 -0700
committerBryan Newbold <bnewbold@robocracy.org>2019-05-21 11:41:29 -0700
commit6ff79f47c7c7ae27b28685674672e58b7dd4d271 (patch)
tree2470f89de864207da8ccc92151cb35d5e20ba21b /python/fatcat_tools/importers/pubmed.py
parent300665927f578151321b0d91b28f8aadffcf227d (diff)
downloadfatcat-6ff79f47c7c7ae27b28685674672e58b7dd4d271.tar.gz
fatcat-6ff79f47c7c7ae27b28685674672e58b7dd4d271.zip
tweaks to new imports/tests
Diffstat (limited to 'python/fatcat_tools/importers/pubmed.py')
-rw-r--r--python/fatcat_tools/importers/pubmed.py10
1 files changed, 4 insertions, 6 deletions
diff --git a/python/fatcat_tools/importers/pubmed.py b/python/fatcat_tools/importers/pubmed.py
index 1feb41cd..f83922a3 100644
--- a/python/fatcat_tools/importers/pubmed.py
+++ b/python/fatcat_tools/importers/pubmed.py
@@ -115,9 +115,6 @@ class PubmedImporter(EntityImporter):
XXX: full author names
"""
- def __init__(self):
- pass
-
def __init__(self, api, issn_map_file, **kwargs):
eg_desc = kwargs.get('editgroup_description',
@@ -181,7 +178,8 @@ class PubmedImporter(EntityImporter):
pmcid = identifiers.find("ArticleId", IdType="pmc")
if pmcid:
- pmcid = pmcid.string
+ # XXX: strip the version part? or retain?
+ pmcid = pmcid.string.split('.')[0]
release_type = None
pub_types = []
@@ -471,7 +469,7 @@ class PubmedImporter(EntityImporter):
self.counts['exists-pmid-doi-mismatch'] += 1
return False
- if existing and existing.ext_ids.pmid and existing.refs:
+ if existing and existing.ext_ids.pmid and (existing.refs or not re.refs):
# TODO: any other reasons to do an update?
# don't update if it already has PMID
self.counts['exists'] += 1
@@ -508,5 +506,5 @@ class PubmedImporter(EntityImporter):
#sys.exit(-1)
if __name__=='__main__':
- parser = PubMedParser()
+ parser = PubmedImporter(None, None)
parser.parse_file(open(sys.argv[1]))