diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2019-04-19 10:27:32 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2019-04-19 10:27:32 -0700 |
commit | 88a51468cfb85b0607a3f5fe28ddafca46e104c2 (patch) | |
tree | 70548f4827b06238de97d6b250b94bec6ad259d3 | |
parent | 46c07cee6eb2161cdc35e954e207ce120340f1ea (diff) | |
download | fatcat-88a51468cfb85b0607a3f5fe28ddafca46e104c2.tar.gz fatcat-88a51468cfb85b0607a3f5fe28ddafca46e104c2.zip |
handle API 400 in arabesque import (invalid extid)
-rw-r--r-- | python/fatcat_tools/importers/arabesque.py | 21 |
1 files changed, 14 insertions, 7 deletions
diff --git a/python/fatcat_tools/importers/arabesque.py b/python/fatcat_tools/importers/arabesque.py index cdb8c7ad..ad211d21 100644 --- a/python/fatcat_tools/importers/arabesque.py +++ b/python/fatcat_tools/importers/arabesque.py @@ -93,21 +93,28 @@ class ArabesqueMatchImporter(EntityImporter): # check/cleanup DOI if self.extid_type == 'doi': - self.extid_type.replace('http://doi.org/', '') - self.extid_type.replace('https://doi.org/', '') + extid = extid.lower() + extid.replace('http://doi.org/', '') + extid.replace('https://doi.org/', '') + if extid.startswith('doi:'): + extid = extid[4:] if not extid.startswith('10.'): - self.counts['skip-bad-doi'] + self.counts['skip-extid-invalid'] return None # lookup extid try: re = self.api.lookup_release(**{self.extid_type: extid}) except fatcat_client.rest.ApiException as err: - if err.status != 404: + if err.status == 404: + # bail on 404 (release not in DB) + self.counts['skip-extid-not-found'] += 1 + return None + elif err.status == 400: + self.counts['skip-extid-invalid'] += 1 + return None + else: raise err - # bail on 404 (release not in DB) - self.counts['skip-extid-not-found'] += 1 - return None url = make_rel_url(row['final_url'], self.default_link_rel) if not url: |