From 615914b13e5a67bcf749cdc682c86c83c1e7bacc Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Thu, 25 Jun 2020 23:04:31 -0700 Subject: don't nest generic fetch errors under pdf_trio This came from sloppy refactoring (and missing test coverage) --- python/sandcrawler/workers.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/python/sandcrawler/workers.py b/python/sandcrawler/workers.py index 4a1d7a4..907bb5a 100644 --- a/python/sandcrawler/workers.py +++ b/python/sandcrawler/workers.py @@ -139,10 +139,8 @@ class SandcrawlerFetchWorker(SandcrawlerWorker): return dict( key=default_key, source=record, - pdf_trio=dict( - status="error-wayback", - error_msg=str(we), - ), + status="error-wayback", + error_msg=str(we), ) elif record.get('url') and record.get('datetime'): # it's a partial CDX dict or something? fetch using WaybackClient @@ -159,10 +157,8 @@ class SandcrawlerFetchWorker(SandcrawlerWorker): return dict( key=default_key, source=record, - pdf_trio=dict( - status="error-wayback", - error_msg=str(we), - ), + status="error-wayback", + error_msg=str(we), ) elif record.get('item') and record.get('path'): # it's petabox link; fetch via HTTP @@ -176,10 +172,8 @@ class SandcrawlerFetchWorker(SandcrawlerWorker): return dict( key=default_key, source=record, - pdf_trio=dict( - status="error-petabox", - error_msg=str(e), - ), + status="error-petabox", + error_msg=str(e), ) blob = resp.content else: -- cgit v1.2.3