aboutsummaryrefslogtreecommitdiffstats
path: root/python/sandcrawler/fileset_strategies.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2022-02-18 19:42:17 -0800
committerBryan Newbold <bnewbold@archive.org>2022-02-18 19:42:17 -0800
commit6ff4dca52c413e66da10cac1fe3b5c7c3ee4315c (patch)
treec0bdb06e14d758c2d7fcff39fee5194d6ba955df /python/sandcrawler/fileset_strategies.py
parent8993f5208a811b9f79013789d4e5150b7366421f (diff)
downloadsandcrawler-6ff4dca52c413e66da10cac1fe3b5c7c3ee4315c.tar.gz
sandcrawler-6ff4dca52c413e66da10cac1fe3b5c7c3ee4315c.zip
ingest: handle more fileset failure modes
Diffstat (limited to 'python/sandcrawler/fileset_strategies.py')
-rw-r--r--python/sandcrawler/fileset_strategies.py6
1 files changed, 5 insertions, 1 deletions
diff --git a/python/sandcrawler/fileset_strategies.py b/python/sandcrawler/fileset_strategies.py
index 9bca551..cccc061 100644
--- a/python/sandcrawler/fileset_strategies.py
+++ b/python/sandcrawler/fileset_strategies.py
@@ -338,7 +338,11 @@ class WebFilesetStrategy(FilesetIngestStrategy):
continue
file_meta = gen_file_metadata(resource.body)
- file_meta, _html_resource = fix_transfer_encoding(file_meta, resource)
+ try:
+ file_meta, _html_resource = fix_transfer_encoding(file_meta, resource)
+ except:
+ m.status = "transfer-encoding-error"
+ continue
if self.ingest_strategy == "web-file":
file_file_meta = file_meta