aboutsummaryrefslogtreecommitdiffstats
path: root/mapreduce/extraction_cdx_grobid.py
diff options
context:
space:
mode:
Diffstat (limited to 'mapreduce/extraction_cdx_grobid.py')
-rwxr-xr-xmapreduce/extraction_cdx_grobid.py3
1 files changed, 2 insertions, 1 deletions
diff --git a/mapreduce/extraction_cdx_grobid.py b/mapreduce/extraction_cdx_grobid.py
index 9a0d795..708e170 100755
--- a/mapreduce/extraction_cdx_grobid.py
+++ b/mapreduce/extraction_cdx_grobid.py
@@ -109,7 +109,8 @@ class MRExtractCdxGrobid(MRJob):
rstore = ResourceStore(loaderfactory=CDXLoaderFactory())
gwb_record = rstore.load_resource(warc_uri, offset, c_size)
except wayback.exception.ResourceUnavailable as err:
- return None, dict(status="petabox_error", reason="failed to load file contents")
+ return None, dict(status="error",
+ reason="failed to load file contents from wayback/petabox")
if gwb_record.get_status()[0] != 200:
return None, dict(status="error",