diff options
Diffstat (limited to 'python/sandcrawler/workers.py')
| -rw-r--r-- | python/sandcrawler/workers.py | 8 | 
1 files changed, 4 insertions, 4 deletions
| diff --git a/python/sandcrawler/workers.py b/python/sandcrawler/workers.py index ba0358f..ceb6671 100644 --- a/python/sandcrawler/workers.py +++ b/python/sandcrawler/workers.py @@ -132,7 +132,7 @@ class SandcrawlerFetchWorker(SandcrawlerWorker):                  raise Exception("wayback client not configured for this SandcrawlerFetchWorker")              try:                  start = time.time() -                blob = self.wayback_client.fetch_petabox_body( +                blob: bytes = self.wayback_client.fetch_petabox_body(                      csize=record['warc_csize'],                      offset=record['warc_offset'],                      warc_path=record['warc_path'], @@ -166,11 +166,11 @@ class SandcrawlerFetchWorker(SandcrawlerWorker):          elif record.get('item') and record.get('path'):              # it's petabox link; fetch via HTTP              start = time.time() -            resp = requests.get("https://archive.org/serve/{}/{}".format( +            ia_resp = requests.get("https://archive.org/serve/{}/{}".format(                  record['item'], record['path']))              petabox_sec = time.time() - start              try: -                resp.raise_for_status() +                ia_resp.raise_for_status()              except Exception as e:                  return dict(                      key=default_key, @@ -178,7 +178,7 @@ class SandcrawlerFetchWorker(SandcrawlerWorker):                      status="error-petabox",                      error_msg=str(e),                  ) -            blob = resp.content +            blob = ia_resp.content          else:              raise ValueError(                  "not a CDX (wayback) or petabox (archive.org) dict; not sure how to proceed") | 
