small lint/typo/fmt fixes

author: Bryan Newbold <bnewbold@archive.org> 2022-02-24 16:35:38 -0800
committer: Bryan Newbold <bnewbold@archive.org> 2022-02-24 16:35:38 -0800
commit: 681eb8028f3e99796978288dcd10653909281f40 (patch)
tree: fe99f377d1dc304d20c63fc5eda3109e17fef4c0
parent: 7f7846b99042897afd5916b9263320c0e2775706 (diff)
download: sandcrawler-681eb8028f3e99796978288dcd10653909281f40.tar.gz
sandcrawler-681eb8028f3e99796978288dcd10653909281f40.zip
3 files changed, 5 insertions, 5 deletions
diff --git a/python/sandcrawler/fileset_strategies.py b/python/sandcrawler/fileset_strategies.py
index cccc061..fcebbb8 100644
--- a/python/sandcrawler/fileset_strategies.py
+++ b/python/sandcrawler/fileset_strategies.py
@@ -340,7 +340,7 @@ class WebFilesetStrategy(FilesetIngestStrategy):
             file_meta = gen_file_metadata(resource.body)
             try:
                 file_meta, _html_resource = fix_transfer_encoding(file_meta, resource)
-            except:
+            except Exception:
                 m.status = "transfer-encoding-error"
                 continue
 
diff --git a/python/sandcrawler/ingest_fileset.py b/python/sandcrawler/ingest_fileset.py
index dceca03..3acbece 100644
--- a/python/sandcrawler/ingest_fileset.py
+++ b/python/sandcrawler/ingest_fileset.py
@@ -146,9 +146,9 @@ class IngestFilesetWorker(IngestFileWorker):
             result["status"] = "wayback-content-error"
             result["error_message"] = str(e)[:1600]
             return result
-        except NotImplementedError:
-            result['status'] = 'not-implemented'
-            result['error_message'] = str(e)[:1600]
+        except NotImplementedError as e:
+            result["status"] = "not-implemented"
+            result["error_message"] = str(e)[:1600]
             return result
 
         html_biblio = None
diff --git a/python/sandcrawler/workers.py b/python/sandcrawler/workers.py
index 597a0ac..15363ea 100644
--- a/python/sandcrawler/workers.py
+++ b/python/sandcrawler/workers.py
@@ -477,7 +477,7 @@ class ZipfilePusher(RecordPusher):
                 self.counts["total"] += 1
                 # NB doesn't really extract the file, just gives you a stream (file-like-object) for reading it
                 flo = archive.open(zipinfo, "r")
-                data = flo.read(2 ** 32)
+                data = flo.read(2**32)
                 flo.close()
                 if self.batch_size:
                     batch.append(data)
author	Bryan Newbold <bnewbold@archive.org>	2022-02-24 16:35:38 -0800
committer	Bryan Newbold <bnewbold@archive.org>	2022-02-24 16:35:38 -0800
commit	681eb8028f3e99796978288dcd10653909281f40 (patch)
tree	fe99f377d1dc304d20c63fc5eda3109e17fef4c0
parent	7f7846b99042897afd5916b9263320c0e2775706 (diff)
download	sandcrawler-681eb8028f3e99796978288dcd10653909281f40.tar.gz sandcrawler-681eb8028f3e99796978288dcd10653909281f40.zip