aboutsummaryrefslogtreecommitdiffstats
path: root/python/sandcrawler/ingest_fileset.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-12-15 13:55:16 -0800
committerBryan Newbold <bnewbold@archive.org>2021-12-15 13:55:16 -0800
commitffdc901fa067db55fe6cfeb8d0c3807d29df092c (patch)
tree8fbca44e2bbd1af30af360f84dbbf238c1c814c6 /python/sandcrawler/ingest_fileset.py
parent100e5348c4203dd346993a185ba3749105e91541 (diff)
downloadsandcrawler-ffdc901fa067db55fe6cfeb8d0c3807d29df092c.tar.gz
sandcrawler-ffdc901fa067db55fe6cfeb8d0c3807d29df092c.zip
fileset ingest: more requests timeouts, sessions
Diffstat (limited to 'python/sandcrawler/ingest_fileset.py')
-rw-r--r--python/sandcrawler/ingest_fileset.py8
1 files changed, 7 insertions, 1 deletions
diff --git a/python/sandcrawler/ingest_fileset.py b/python/sandcrawler/ingest_fileset.py
index 732a6ab..542dfbc 100644
--- a/python/sandcrawler/ingest_fileset.py
+++ b/python/sandcrawler/ingest_fileset.py
@@ -325,12 +325,18 @@ class IngestFilesetWorker(IngestFileWorker):
result["error_message"] = str(e)[:1600]
return result
except requests.exceptions.HTTPError as e:
+ result["error_message"] = str(e)[:1600]
if e.response.status_code == 404:
result["status"] = "platform-404"
result["error_message"] = str(e)[:1600]
return result
else:
- raise e
+ result["status"] = "platform-http-error"
+ return result
+ except requests.exceptions.RequestException as e:
+ result["error_message"] = str(e)[:1600]
+ result["status"] = "platform-error"
+ return result
# print(dataset_meta, file=sys.stderr)
platform = dataset_meta.platform_name