From 88a51468cfb85b0607a3f5fe28ddafca46e104c2 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Fri, 19 Apr 2019 10:27:32 -0700 Subject: handle API 400 in arabesque import (invalid extid) --- python/fatcat_tools/importers/arabesque.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) (limited to 'python/fatcat_tools/importers') diff --git a/python/fatcat_tools/importers/arabesque.py b/python/fatcat_tools/importers/arabesque.py index cdb8c7ad..ad211d21 100644 --- a/python/fatcat_tools/importers/arabesque.py +++ b/python/fatcat_tools/importers/arabesque.py @@ -93,21 +93,28 @@ class ArabesqueMatchImporter(EntityImporter): # check/cleanup DOI if self.extid_type == 'doi': - self.extid_type.replace('http://doi.org/', '') - self.extid_type.replace('https://doi.org/', '') + extid = extid.lower() + extid.replace('http://doi.org/', '') + extid.replace('https://doi.org/', '') + if extid.startswith('doi:'): + extid = extid[4:] if not extid.startswith('10.'): - self.counts['skip-bad-doi'] + self.counts['skip-extid-invalid'] return None # lookup extid try: re = self.api.lookup_release(**{self.extid_type: extid}) except fatcat_client.rest.ApiException as err: - if err.status != 404: + if err.status == 404: + # bail on 404 (release not in DB) + self.counts['skip-extid-not-found'] += 1 + return None + elif err.status == 400: + self.counts['skip-extid-invalid'] += 1 + return None + else: raise err - # bail on 404 (release not in DB) - self.counts['skip-extid-not-found'] += 1 - return None url = make_rel_url(row['final_url'], self.default_link_rel) if not url: -- cgit v1.2.3