diff options
-rw-r--r-- | python/sandcrawler/workers.py | 18 |
1 files changed, 6 insertions, 12 deletions
diff --git a/python/sandcrawler/workers.py b/python/sandcrawler/workers.py index 4a1d7a4..907bb5a 100644 --- a/python/sandcrawler/workers.py +++ b/python/sandcrawler/workers.py @@ -139,10 +139,8 @@ class SandcrawlerFetchWorker(SandcrawlerWorker): return dict( key=default_key, source=record, - pdf_trio=dict( - status="error-wayback", - error_msg=str(we), - ), + status="error-wayback", + error_msg=str(we), ) elif record.get('url') and record.get('datetime'): # it's a partial CDX dict or something? fetch using WaybackClient @@ -159,10 +157,8 @@ class SandcrawlerFetchWorker(SandcrawlerWorker): return dict( key=default_key, source=record, - pdf_trio=dict( - status="error-wayback", - error_msg=str(we), - ), + status="error-wayback", + error_msg=str(we), ) elif record.get('item') and record.get('path'): # it's petabox link; fetch via HTTP @@ -176,10 +172,8 @@ class SandcrawlerFetchWorker(SandcrawlerWorker): return dict( key=default_key, source=record, - pdf_trio=dict( - status="error-petabox", - error_msg=str(e), - ), + status="error-petabox", + error_msg=str(e), ) blob = resp.content else: |