From a39e4b864968fa73e475cc40af67203faef5236d Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 26 Oct 2021 17:56:44 -0700 Subject: more progress on type annotations --- python/sandcrawler/ingest_fileset.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'python/sandcrawler/ingest_fileset.py') diff --git a/python/sandcrawler/ingest_fileset.py b/python/sandcrawler/ingest_fileset.py index ea34948..defbeba 100644 --- a/python/sandcrawler/ingest_fileset.py +++ b/python/sandcrawler/ingest_fileset.py @@ -14,6 +14,7 @@ from sandcrawler.ia import (CdxApiError, PetaboxError, SavePageNowError, Wayback WaybackError, cdx_to_dict, fix_transfer_encoding) from sandcrawler.ingest_file import IngestFileWorker from sandcrawler.misc import clean_url, gen_file_metadata +from sandcrawler.worker import SandcrawlerWorker MAX_BODY_SIZE_BYTES = 128 * 1024 * 1024 @@ -31,7 +32,7 @@ class IngestFilesetWorker(IngestFileWorker): checking to see if content has been archived already) 4. summarize status """ - def __init__(self, sink=None, **kwargs): + def __init__(self, sink: Optional[SandcrawlerWorker] = None, **kwargs): super().__init__(sink=None, **kwargs) self.sink = sink @@ -246,7 +247,7 @@ class IngestFilesetWorker(IngestFileWorker): base_url, force_recrawl=force_recrawl) result['request'] = request - if result.get('status') != None: + if result.get('status') is not None: result['request'] = request return result -- cgit v1.2.3