diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-06-25 23:04:31 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-06-25 23:04:33 -0700 |
commit | 615914b13e5a67bcf749cdc682c86c83c1e7bacc (patch) | |
tree | 8ad87858ef525f8c6e08a797be12573abf68d77b | |
parent | 96513b5cc26dd91375a6433e49d24b2d9eb2bea9 (diff) | |
download | sandcrawler-615914b13e5a67bcf749cdc682c86c83c1e7bacc.tar.gz sandcrawler-615914b13e5a67bcf749cdc682c86c83c1e7bacc.zip |
don't nest generic fetch errors under pdf_trio
This came from sloppy refactoring (and missing test coverage)
-rw-r--r-- | python/sandcrawler/workers.py | 18 |
1 files changed, 6 insertions, 12 deletions
diff --git a/python/sandcrawler/workers.py b/python/sandcrawler/workers.py index 4a1d7a4..907bb5a 100644 --- a/python/sandcrawler/workers.py +++ b/python/sandcrawler/workers.py @@ -139,10 +139,8 @@ class SandcrawlerFetchWorker(SandcrawlerWorker): return dict( key=default_key, source=record, - pdf_trio=dict( - status="error-wayback", - error_msg=str(we), - ), + status="error-wayback", + error_msg=str(we), ) elif record.get('url') and record.get('datetime'): # it's a partial CDX dict or something? fetch using WaybackClient @@ -159,10 +157,8 @@ class SandcrawlerFetchWorker(SandcrawlerWorker): return dict( key=default_key, source=record, - pdf_trio=dict( - status="error-wayback", - error_msg=str(we), - ), + status="error-wayback", + error_msg=str(we), ) elif record.get('item') and record.get('path'): # it's petabox link; fetch via HTTP @@ -176,10 +172,8 @@ class SandcrawlerFetchWorker(SandcrawlerWorker): return dict( key=default_key, source=record, - pdf_trio=dict( - status="error-petabox", - error_msg=str(e), - ), + status="error-petabox", + error_msg=str(e), ) blob = resp.content else: |