aboutsummaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2023-01-04 19:37:07 -0800
committerBryan Newbold <bnewbold@archive.org>2023-01-04 19:37:07 -0800
commit5f73b6428f4b505880ef02429d57f11dc50d98e5 (patch)
tree744ecb99e69777c7eb902ade84dfb125367dddda /python
parent99cc7de073baee53bb97075377906743d364ab84 (diff)
downloadsandcrawler-5f73b6428f4b505880ef02429d57f11dc50d98e5.tar.gz
sandcrawler-5f73b6428f4b505880ef02429d57f11dc50d98e5.zip
mypy lint fixes
Diffstat (limited to 'python')
-rw-r--r--python/sandcrawler/minio.py2
-rw-r--r--python/sandcrawler/misc.py2
-rw-r--r--python/sandcrawler/pdftrio.py4
-rw-r--r--python/sandcrawler/workers.py2
4 files changed, 5 insertions, 5 deletions
diff --git a/python/sandcrawler/minio.py b/python/sandcrawler/minio.py
index d47ab89..8836515 100644
--- a/python/sandcrawler/minio.py
+++ b/python/sandcrawler/minio.py
@@ -99,7 +99,7 @@ class SandcrawlerMinioClient(object):
sha1hex: str,
extension: str = "",
prefix: str = "",
- bucket: str = None,
+ bucket: Optional[str] = None,
) -> bytes:
"""
sha1hex is sha1 of the blob itself
diff --git a/python/sandcrawler/misc.py b/python/sandcrawler/misc.py
index 4ab9082..4e37036 100644
--- a/python/sandcrawler/misc.py
+++ b/python/sandcrawler/misc.py
@@ -281,7 +281,7 @@ def requests_retry_session(
retries: int = 10,
backoff_factor: int = 1,
status_forcelist: List[int] = [500, 502, 504],
- session: requests.Session = None,
+ session: Optional[requests.Session] = None,
) -> requests.Session:
"""
From: https://www.peterbe.com/plog/best-practice-with-retries-with-requests
diff --git a/python/sandcrawler/pdftrio.py b/python/sandcrawler/pdftrio.py
index 1119211..112df6a 100644
--- a/python/sandcrawler/pdftrio.py
+++ b/python/sandcrawler/pdftrio.py
@@ -82,7 +82,7 @@ class PdfTrioWorker(SandcrawlerFetchWorker):
self.pdftrio_client = pdftrio_client
self.sink = sink
- def process(self, record: Any, key: str = None) -> Any:
+ def process(self, record: Any, key: Optional[str] = None) -> Any:
start_process = time.time()
fetch_sec = None
@@ -126,7 +126,7 @@ class PdfTrioBlobWorker(SandcrawlerWorker):
self.sink = sink
self.mode = mode
- def process(self, blob: Any, key: str = None) -> Any:
+ def process(self, blob: Any, key: Optional[str] = None) -> Any:
start_process = time.time()
if not blob:
return None
diff --git a/python/sandcrawler/workers.py b/python/sandcrawler/workers.py
index 15363ea..356f050 100644
--- a/python/sandcrawler/workers.py
+++ b/python/sandcrawler/workers.py
@@ -108,7 +108,7 @@ class SandcrawlerWorker(object):
"""
return True
- def process(self, task: Any, key: str = None) -> Any:
+ def process(self, task: Any, key: Optional[str] = None) -> Any:
"""
Derived workers need to implement business logic here.