diff options
Diffstat (limited to 'mapreduce/extraction_cdx_grobid.py')
-rwxr-xr-x | mapreduce/extraction_cdx_grobid.py | 3 |
1 files changed, 2 insertions, 1 deletions
diff --git a/mapreduce/extraction_cdx_grobid.py b/mapreduce/extraction_cdx_grobid.py index 9a0d795..708e170 100755 --- a/mapreduce/extraction_cdx_grobid.py +++ b/mapreduce/extraction_cdx_grobid.py @@ -109,7 +109,8 @@ class MRExtractCdxGrobid(MRJob): rstore = ResourceStore(loaderfactory=CDXLoaderFactory()) gwb_record = rstore.load_resource(warc_uri, offset, c_size) except wayback.exception.ResourceUnavailable as err: - return None, dict(status="petabox_error", reason="failed to load file contents") + return None, dict(status="error", + reason="failed to load file contents from wayback/petabox") if gwb_record.get_status()[0] != 200: return None, dict(status="error", |