aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2022-02-24 16:35:38 -0800
committerBryan Newbold <bnewbold@archive.org>2022-02-24 16:35:38 -0800
commit681eb8028f3e99796978288dcd10653909281f40 (patch)
treefe99f377d1dc304d20c63fc5eda3109e17fef4c0
parent7f7846b99042897afd5916b9263320c0e2775706 (diff)
downloadsandcrawler-681eb8028f3e99796978288dcd10653909281f40.tar.gz
sandcrawler-681eb8028f3e99796978288dcd10653909281f40.zip
small lint/typo/fmt fixes
-rw-r--r--python/sandcrawler/fileset_strategies.py2
-rw-r--r--python/sandcrawler/ingest_fileset.py6
-rw-r--r--python/sandcrawler/workers.py2
3 files changed, 5 insertions, 5 deletions
diff --git a/python/sandcrawler/fileset_strategies.py b/python/sandcrawler/fileset_strategies.py
index cccc061..fcebbb8 100644
--- a/python/sandcrawler/fileset_strategies.py
+++ b/python/sandcrawler/fileset_strategies.py
@@ -340,7 +340,7 @@ class WebFilesetStrategy(FilesetIngestStrategy):
file_meta = gen_file_metadata(resource.body)
try:
file_meta, _html_resource = fix_transfer_encoding(file_meta, resource)
- except:
+ except Exception:
m.status = "transfer-encoding-error"
continue
diff --git a/python/sandcrawler/ingest_fileset.py b/python/sandcrawler/ingest_fileset.py
index dceca03..3acbece 100644
--- a/python/sandcrawler/ingest_fileset.py
+++ b/python/sandcrawler/ingest_fileset.py
@@ -146,9 +146,9 @@ class IngestFilesetWorker(IngestFileWorker):
result["status"] = "wayback-content-error"
result["error_message"] = str(e)[:1600]
return result
- except NotImplementedError:
- result['status'] = 'not-implemented'
- result['error_message'] = str(e)[:1600]
+ except NotImplementedError as e:
+ result["status"] = "not-implemented"
+ result["error_message"] = str(e)[:1600]
return result
html_biblio = None
diff --git a/python/sandcrawler/workers.py b/python/sandcrawler/workers.py
index 597a0ac..15363ea 100644
--- a/python/sandcrawler/workers.py
+++ b/python/sandcrawler/workers.py
@@ -477,7 +477,7 @@ class ZipfilePusher(RecordPusher):
self.counts["total"] += 1
# NB doesn't really extract the file, just gives you a stream (file-like-object) for reading it
flo = archive.open(zipinfo, "r")
- data = flo.read(2 ** 32)
+ data = flo.read(2**32)
flo.close()
if self.batch_size:
batch.append(data)