From 7ec413416acb2b3d7da0be32b78982316b9c696f Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 21 Nov 2018 11:58:46 -0800 Subject: crossref importer checks for existing DOIs --- python/fatcat_tools/importers/crossref.py | 17 ++++++++++++++++- python/fatcat_tools/importers/matched.py | 6 +++--- 2 files changed, 19 insertions(+), 4 deletions(-) (limited to 'python/fatcat_tools/importers') diff --git a/python/fatcat_tools/importers/crossref.py b/python/fatcat_tools/importers/crossref.py index 475afdb0..385a8235 100644 --- a/python/fatcat_tools/importers/crossref.py +++ b/python/fatcat_tools/importers/crossref.py @@ -40,7 +40,7 @@ class CrossrefImporter(FatcatImporter): See https://github.com/CrossRef/rest-api-doc for JSON schema notes """ - def __init__(self, host_url, issn_map_file, extid_map_file=None, create_containers=True): + def __init__(self, host_url, issn_map_file, extid_map_file=None, create_containers=True, check_existing=True): super().__init__(host_url, issn_map_file) self.extid_map_db = None if extid_map_file: @@ -50,6 +50,7 @@ class CrossrefImporter(FatcatImporter): else: print("Not using external ID map") self.create_containers = create_containers + self.check_existing = check_existing def lookup_ext_ids(self, doi): if self.extid_map_db is None: @@ -85,6 +86,20 @@ class CrossrefImporter(FatcatImporter): 'book-track', 'proceedings-series'): return None + # lookup existing DOI + existing_release = None + if self.check_existing: + try: + existing_release = self.api.lookup_release(doi=obj['DOI'].lower()) + except fatcat_client.rest.ApiException as err: + if err.status != 404: + raise err + + # eventually we'll want to support "updates", but for now just skip if + # entity already exists + if existing_release: + return None + # contribs def do_contribs(obj_list, ctype): contribs = [] diff --git a/python/fatcat_tools/importers/matched.py b/python/fatcat_tools/importers/matched.py index 732fccbe..6f83dd23 100644 --- a/python/fatcat_tools/importers/matched.py +++ b/python/fatcat_tools/importers/matched.py @@ -37,12 +37,12 @@ class MatchedImporter(FatcatImporter): - core_id, wikidata_id, pmcid, pmid: not as lists """ - def __init__(self, host_url, skip_file_update=False, default_mime=None, + def __init__(self, host_url, skip_file_updates=False, default_mime=None, default_link_rel="web"): super().__init__(host_url) self.default_mime = default_mime self.default_link_rel = default_link_rel - self.skip_file_update = skip_file_update + self.skip_file_updates = skip_file_updates def make_url(self, raw): rel = self.default_link_rel @@ -61,7 +61,7 @@ class MatchedImporter(FatcatImporter): # lookup sha1, or create new entity fe = None - if not self.skip_file_update: + if not self.skip_file_updates: try: fe = self.api.lookup_file(sha1=sha1) except fatcat_client.rest.ApiException as err: -- cgit v1.2.3