aboutsummaryrefslogtreecommitdiffstats
path: root/python/sandcrawler/workers.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/sandcrawler/workers.py')
-rw-r--r--python/sandcrawler/workers.py18
1 files changed, 6 insertions, 12 deletions
diff --git a/python/sandcrawler/workers.py b/python/sandcrawler/workers.py
index 4a1d7a4..907bb5a 100644
--- a/python/sandcrawler/workers.py
+++ b/python/sandcrawler/workers.py
@@ -139,10 +139,8 @@ class SandcrawlerFetchWorker(SandcrawlerWorker):
return dict(
key=default_key,
source=record,
- pdf_trio=dict(
- status="error-wayback",
- error_msg=str(we),
- ),
+ status="error-wayback",
+ error_msg=str(we),
)
elif record.get('url') and record.get('datetime'):
# it's a partial CDX dict or something? fetch using WaybackClient
@@ -159,10 +157,8 @@ class SandcrawlerFetchWorker(SandcrawlerWorker):
return dict(
key=default_key,
source=record,
- pdf_trio=dict(
- status="error-wayback",
- error_msg=str(we),
- ),
+ status="error-wayback",
+ error_msg=str(we),
)
elif record.get('item') and record.get('path'):
# it's petabox link; fetch via HTTP
@@ -176,10 +172,8 @@ class SandcrawlerFetchWorker(SandcrawlerWorker):
return dict(
key=default_key,
source=record,
- pdf_trio=dict(
- status="error-petabox",
- error_msg=str(e),
- ),
+ status="error-petabox",
+ error_msg=str(e),
)
blob = resp.content
else: