aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-01-28 23:03:50 -0800
committerBryan Newbold <bnewbold@archive.org>2020-01-28 23:03:50 -0800
commit052080ae28aeb2635f4b8917d4772e745ddbbf7b (patch)
treea0031b04a73e7f63f0ce774a99365bec36bf1581
parent6d6bf1c448246c6534d7087eb1db5b9341796b61 (diff)
downloadsandcrawler-052080ae28aeb2635f4b8917d4772e745ddbbf7b.tar.gz
sandcrawler-052080ae28aeb2635f4b8917d4772e745ddbbf7b.zip
grobid worker: catch PetaboxError also
-rw-r--r--python/sandcrawler/grobid.py4
1 files changed, 2 insertions, 2 deletions
diff --git a/python/sandcrawler/grobid.py b/python/sandcrawler/grobid.py
index bc886c2..cbb499d 100644
--- a/python/sandcrawler/grobid.py
+++ b/python/sandcrawler/grobid.py
@@ -4,7 +4,7 @@ import requests
from grobid2json import teixml2json
from .workers import SandcrawlerWorker
from .misc import gen_file_metadata
-from .ia import WaybackClient, WaybackError
+from .ia import WaybackClient, WaybackError, PetaboxError
class GrobidClient(object):
@@ -99,7 +99,7 @@ class GrobidWorker(SandcrawlerWorker):
offset=record['warc_offset'],
warc_path=record['warc_path'],
)
- except WaybackError as we:
+ except (WaybackError, PetaboxError) as we:
return dict(
status="error-wayback",
error_msg=str(we),