aboutsummaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2018-11-21 11:58:46 -0800
committerBryan Newbold <bnewbold@robocracy.org>2018-11-21 11:58:46 -0800
commit7ec413416acb2b3d7da0be32b78982316b9c696f (patch)
treecc0799316a0875d7aea6f1d9fddc03fb5e505410 /python/fatcat_tools
parent008366697aba8046fd33ae1f3707972d87c9a342 (diff)
downloadfatcat-7ec413416acb2b3d7da0be32b78982316b9c696f.tar.gz
fatcat-7ec413416acb2b3d7da0be32b78982316b9c696f.zip
crossref importer checks for existing DOIs
Diffstat (limited to 'python/fatcat_tools')
-rw-r--r--python/fatcat_tools/importers/crossref.py17
-rw-r--r--python/fatcat_tools/importers/matched.py6
2 files changed, 19 insertions, 4 deletions
diff --git a/python/fatcat_tools/importers/crossref.py b/python/fatcat_tools/importers/crossref.py
index 475afdb0..385a8235 100644
--- a/python/fatcat_tools/importers/crossref.py
+++ b/python/fatcat_tools/importers/crossref.py
@@ -40,7 +40,7 @@ class CrossrefImporter(FatcatImporter):
See https://github.com/CrossRef/rest-api-doc for JSON schema notes
"""
- def __init__(self, host_url, issn_map_file, extid_map_file=None, create_containers=True):
+ def __init__(self, host_url, issn_map_file, extid_map_file=None, create_containers=True, check_existing=True):
super().__init__(host_url, issn_map_file)
self.extid_map_db = None
if extid_map_file:
@@ -50,6 +50,7 @@ class CrossrefImporter(FatcatImporter):
else:
print("Not using external ID map")
self.create_containers = create_containers
+ self.check_existing = check_existing
def lookup_ext_ids(self, doi):
if self.extid_map_db is None:
@@ -85,6 +86,20 @@ class CrossrefImporter(FatcatImporter):
'book-track', 'proceedings-series'):
return None
+ # lookup existing DOI
+ existing_release = None
+ if self.check_existing:
+ try:
+ existing_release = self.api.lookup_release(doi=obj['DOI'].lower())
+ except fatcat_client.rest.ApiException as err:
+ if err.status != 404:
+ raise err
+
+ # eventually we'll want to support "updates", but for now just skip if
+ # entity already exists
+ if existing_release:
+ return None
+
# contribs
def do_contribs(obj_list, ctype):
contribs = []
diff --git a/python/fatcat_tools/importers/matched.py b/python/fatcat_tools/importers/matched.py
index 732fccbe..6f83dd23 100644
--- a/python/fatcat_tools/importers/matched.py
+++ b/python/fatcat_tools/importers/matched.py
@@ -37,12 +37,12 @@ class MatchedImporter(FatcatImporter):
- core_id, wikidata_id, pmcid, pmid: not as lists
"""
- def __init__(self, host_url, skip_file_update=False, default_mime=None,
+ def __init__(self, host_url, skip_file_updates=False, default_mime=None,
default_link_rel="web"):
super().__init__(host_url)
self.default_mime = default_mime
self.default_link_rel = default_link_rel
- self.skip_file_update = skip_file_update
+ self.skip_file_updates = skip_file_updates
def make_url(self, raw):
rel = self.default_link_rel
@@ -61,7 +61,7 @@ class MatchedImporter(FatcatImporter):
# lookup sha1, or create new entity
fe = None
- if not self.skip_file_update:
+ if not self.skip_file_updates:
try:
fe = self.api.lookup_file(sha1=sha1)
except fatcat_client.rest.ApiException as err: