From 052080ae28aeb2635f4b8917d4772e745ddbbf7b Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 28 Jan 2020 23:03:50 -0800 Subject: grobid worker: catch PetaboxError also --- python/sandcrawler/grobid.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/sandcrawler/grobid.py b/python/sandcrawler/grobid.py index bc886c2..cbb499d 100644 --- a/python/sandcrawler/grobid.py +++ b/python/sandcrawler/grobid.py @@ -4,7 +4,7 @@ import requests from grobid2json import teixml2json from .workers import SandcrawlerWorker from .misc import gen_file_metadata -from .ia import WaybackClient, WaybackError +from .ia import WaybackClient, WaybackError, PetaboxError class GrobidClient(object): @@ -99,7 +99,7 @@ class GrobidWorker(SandcrawlerWorker): offset=record['warc_offset'], warc_path=record['warc_path'], ) - except WaybackError as we: + except (WaybackError, PetaboxError) as we: return dict( status="error-wayback", error_msg=str(we), -- cgit v1.2.3