diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-11-04 18:10:33 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-11-04 18:10:33 -0800 |
commit | 8f0ff9996b70e7b7e2e5c5de01933394857da966 (patch) | |
tree | a315db5946c985fa4cebfb503777615b0170c6c6 | |
parent | de71aa92d4c7c9d14dfccc0188032d4e7b10090f (diff) | |
download | sandcrawler-8f0ff9996b70e7b7e2e5c5de01933394857da966.tar.gz sandcrawler-8f0ff9996b70e7b7e2e5c5de01933394857da966.zip |
persist: fix worker API/typing hacks (raw_key, key, key_str)
-rw-r--r-- | python/sandcrawler/persist.py | 18 |
1 files changed, 9 insertions, 9 deletions
diff --git a/python/sandcrawler/persist.py b/python/sandcrawler/persist.py index 033bc91..fbd2bdb 100644 --- a/python/sandcrawler/persist.py +++ b/python/sandcrawler/persist.py @@ -532,24 +532,24 @@ class GenericPersistDocWorker(SandcrawlerWorker): self.s3_folder = kwargs.get('s3_folder', "unknown") self.doc_key = "unknown" - def process(self, record: dict, raw_key: Optional[AnyStr] = None) -> None: + def process(self, record: dict, key: Optional[AnyStr] = None) -> None: if record.get('status') != 'success' or not record.get(self.doc_key): return - assert raw_key is not None - if isinstance(raw_key, bytes): - key = raw_key.decode('utf-8') - elif isinstance(raw_key, str): - key = raw_key - assert len(key) == 40 + assert key is not None + if isinstance(key, bytes): + key_str = key.decode('utf-8') + elif isinstance(key, str): + key_str = key + assert len(key_str) == 40 if 'sha1hex' in record: - assert key == record['sha1hex'] + assert key_str == record['sha1hex'] resp = self.s3.put_blob( folder=self.s3_folder, blob=record[self.doc_key].encode('utf-8'), - sha1hex=key, + sha1hex=key_str, extension=self.s3_extension, ) self.counts['s3-put'] += 1 |