summaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools/importers
diff options
context:
space:
mode:
Diffstat (limited to 'python/fatcat_tools/importers')
-rw-r--r--python/fatcat_tools/importers/common.py12
-rw-r--r--python/fatcat_tools/importers/datacite.py12
-rw-r--r--python/fatcat_tools/importers/ingest.py7
-rw-r--r--python/fatcat_tools/importers/pubmed.py1
4 files changed, 24 insertions, 8 deletions
diff --git a/python/fatcat_tools/importers/common.py b/python/fatcat_tools/importers/common.py
index 99c330a6..eafc6546 100644
--- a/python/fatcat_tools/importers/common.py
+++ b/python/fatcat_tools/importers/common.py
@@ -458,7 +458,8 @@ class EntityImporter:
creator_id = rv.ident
except ApiException as ae:
# If anything other than a 404 (not found), something is wrong
- assert ae.status == 404
+ if ae.status != 404:
+ raise ae
self._orcid_id_map[orcid] = creator_id # might be None
return creator_id
@@ -479,7 +480,8 @@ class EntityImporter:
release_id = rv.ident
except ApiException as ae:
# If anything other than a 404 (not found), something is wrong
- assert ae.status == 404
+ if ae.status != 404:
+ raise ae
self._doi_id_map[doi] = release_id # might be None
return release_id
@@ -495,7 +497,8 @@ class EntityImporter:
release_id = rv.ident
except ApiException as ae:
# If anything other than a 404 (not found), something is wrong
- assert ae.status == 404
+ if ae.status != 404:
+ raise ae
self._pmid_id_map[pmid] = release_id # might be None
return release_id
@@ -512,7 +515,8 @@ class EntityImporter:
container_id = rv.ident
except ApiException as ae:
# If anything other than a 404 (not found), something is wrong
- assert ae.status == 404
+ if ae.status != 404:
+ raise ae
self._issnl_id_map[issnl] = container_id # might be None
return container_id
diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py
index 81f00876..d998f266 100644
--- a/python/fatcat_tools/importers/datacite.py
+++ b/python/fatcat_tools/importers/datacite.py
@@ -496,10 +496,12 @@ class DataciteImporter(EntityImporter):
if not desc.get('descriptionType') == 'Abstract':
continue
- # Description maybe a string or list.
+ # Description maybe a string, int or list.
text = desc.get('description', '')
if not text:
continue
+ if isinstance(text, int):
+ text = '{}'.format(text)
if isinstance(text, list):
try:
text = "\n".join(text)
@@ -758,6 +760,14 @@ class DataciteImporter(EntityImporter):
given_name = clean(given_name)
if surname:
surname = clean(surname)
+
+ # Perform a final assertion that name does not reduce to zero
+ # (e.g. whitespace only name).
+ if name:
+ name = name.strip()
+ if not name:
+ continue
+
if raw_affiliation == '':
continue
diff --git a/python/fatcat_tools/importers/ingest.py b/python/fatcat_tools/importers/ingest.py
index 4772bfaa..6cf1604b 100644
--- a/python/fatcat_tools/importers/ingest.py
+++ b/python/fatcat_tools/importers/ingest.py
@@ -19,6 +19,7 @@ class IngestFileResultImporter(EntityImporter):
editgroup_description=eg_desc,
editgroup_extra=eg_extra,
**kwargs)
+ self.use_glutton_match = False
self.default_link_rel = kwargs.get("default_link_rel", "web")
assert self.default_link_rel
self.require_grobid = require_grobid
@@ -107,9 +108,10 @@ class IngestFileResultImporter(EntityImporter):
elif err.status == 400:
self.counts['warn-extid-invalid'] += 1
continue
+ raise err
release_ident = release.ident
break
- if not release_ident and row.get('grobid'):
+ if self.use_glutton_match and not release_ident and row.get('grobid'):
# try biblio-glutton extracted hit
if row['grobid'].get('fatcat_release'):
release_ident = row['grobid']['fatcat_release'].split('_')[-1]
@@ -197,8 +199,7 @@ class IngestFileResultImporter(EntityImporter):
if not existing:
return True
- # the following checks all assume there is an existing item
-
+ # NOTE: the following checks all assume there is an existing item
if (fe.release_ids[0] in existing.release_ids) and existing.urls:
# TODO: could still, in theory update with the new URL?
self.counts['exists'] += 1
diff --git a/python/fatcat_tools/importers/pubmed.py b/python/fatcat_tools/importers/pubmed.py
index abcb21d9..3d3e3a8c 100644
--- a/python/fatcat_tools/importers/pubmed.py
+++ b/python/fatcat_tools/importers/pubmed.py
@@ -782,6 +782,7 @@ class PubmedImporter(EntityImporter):
# NOTE: API behavior might change in the future?
if "release_edit_editgroup_id_ident_id_key" in err.body:
self.counts['skip-update-conflict'] += 1
+ return False
else:
raise err
finally: