diff options
| author | Bryan Newbold <bnewbold@robocracy.org> | 2018-11-21 11:58:46 -0800 | 
|---|---|---|
| committer | Bryan Newbold <bnewbold@robocracy.org> | 2018-11-21 11:58:46 -0800 | 
| commit | 7ec413416acb2b3d7da0be32b78982316b9c696f (patch) | |
| tree | cc0799316a0875d7aea6f1d9fddc03fb5e505410 /python/fatcat_tools | |
| parent | 008366697aba8046fd33ae1f3707972d87c9a342 (diff) | |
| download | fatcat-7ec413416acb2b3d7da0be32b78982316b9c696f.tar.gz fatcat-7ec413416acb2b3d7da0be32b78982316b9c696f.zip | |
crossref importer checks for existing DOIs
Diffstat (limited to 'python/fatcat_tools')
| -rw-r--r-- | python/fatcat_tools/importers/crossref.py | 17 | ||||
| -rw-r--r-- | python/fatcat_tools/importers/matched.py | 6 | 
2 files changed, 19 insertions, 4 deletions
| diff --git a/python/fatcat_tools/importers/crossref.py b/python/fatcat_tools/importers/crossref.py index 475afdb0..385a8235 100644 --- a/python/fatcat_tools/importers/crossref.py +++ b/python/fatcat_tools/importers/crossref.py @@ -40,7 +40,7 @@ class CrossrefImporter(FatcatImporter):      See https://github.com/CrossRef/rest-api-doc for JSON schema notes      """ -    def __init__(self, host_url, issn_map_file, extid_map_file=None, create_containers=True): +    def __init__(self, host_url, issn_map_file, extid_map_file=None, create_containers=True, check_existing=True):          super().__init__(host_url, issn_map_file)          self.extid_map_db = None          if extid_map_file: @@ -50,6 +50,7 @@ class CrossrefImporter(FatcatImporter):          else:              print("Not using external ID map")          self.create_containers = create_containers +        self.check_existing = check_existing      def lookup_ext_ids(self, doi):          if self.extid_map_db is None: @@ -85,6 +86,20 @@ class CrossrefImporter(FatcatImporter):                  'book-track', 'proceedings-series'):              return None +        # lookup existing DOI +        existing_release = None +        if self.check_existing: +            try: +                existing_release = self.api.lookup_release(doi=obj['DOI'].lower()) +            except fatcat_client.rest.ApiException as err: +                if err.status != 404: +                    raise err + +        # eventually we'll want to support "updates", but for now just skip if +        # entity already exists +        if existing_release: +            return None +          # contribs          def do_contribs(obj_list, ctype):              contribs = [] diff --git a/python/fatcat_tools/importers/matched.py b/python/fatcat_tools/importers/matched.py index 732fccbe..6f83dd23 100644 --- a/python/fatcat_tools/importers/matched.py +++ b/python/fatcat_tools/importers/matched.py @@ -37,12 +37,12 @@ class MatchedImporter(FatcatImporter):      - core_id, wikidata_id, pmcid, pmid: not as lists      """ -    def __init__(self, host_url, skip_file_update=False, default_mime=None, +    def __init__(self, host_url, skip_file_updates=False, default_mime=None,              default_link_rel="web"):          super().__init__(host_url)          self.default_mime = default_mime          self.default_link_rel = default_link_rel -        self.skip_file_update = skip_file_update +        self.skip_file_updates = skip_file_updates      def make_url(self, raw):          rel = self.default_link_rel @@ -61,7 +61,7 @@ class MatchedImporter(FatcatImporter):          # lookup sha1, or create new entity          fe = None -        if not self.skip_file_update: +        if not self.skip_file_updates:              try:                  fe = self.api.lookup_file(sha1=sha1)              except fatcat_client.rest.ApiException as err: | 
