diff options
-rw-r--r-- | python/sandcrawler/fileset_strategies.py | 2 | ||||
-rw-r--r-- | python/sandcrawler/ingest_fileset.py | 6 | ||||
-rw-r--r-- | python/sandcrawler/workers.py | 2 |
3 files changed, 5 insertions, 5 deletions
diff --git a/python/sandcrawler/fileset_strategies.py b/python/sandcrawler/fileset_strategies.py index cccc061..fcebbb8 100644 --- a/python/sandcrawler/fileset_strategies.py +++ b/python/sandcrawler/fileset_strategies.py @@ -340,7 +340,7 @@ class WebFilesetStrategy(FilesetIngestStrategy): file_meta = gen_file_metadata(resource.body) try: file_meta, _html_resource = fix_transfer_encoding(file_meta, resource) - except: + except Exception: m.status = "transfer-encoding-error" continue diff --git a/python/sandcrawler/ingest_fileset.py b/python/sandcrawler/ingest_fileset.py index dceca03..3acbece 100644 --- a/python/sandcrawler/ingest_fileset.py +++ b/python/sandcrawler/ingest_fileset.py @@ -146,9 +146,9 @@ class IngestFilesetWorker(IngestFileWorker): result["status"] = "wayback-content-error" result["error_message"] = str(e)[:1600] return result - except NotImplementedError: - result['status'] = 'not-implemented' - result['error_message'] = str(e)[:1600] + except NotImplementedError as e: + result["status"] = "not-implemented" + result["error_message"] = str(e)[:1600] return result html_biblio = None diff --git a/python/sandcrawler/workers.py b/python/sandcrawler/workers.py index 597a0ac..15363ea 100644 --- a/python/sandcrawler/workers.py +++ b/python/sandcrawler/workers.py @@ -477,7 +477,7 @@ class ZipfilePusher(RecordPusher): self.counts["total"] += 1 # NB doesn't really extract the file, just gives you a stream (file-like-object) for reading it flo = archive.open(zipinfo, "r") - data = flo.read(2 ** 32) + data = flo.read(2**32) flo.close() if self.batch_size: batch.append(data) |