diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-01-28 23:03:50 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-01-28 23:03:50 -0800 |
commit | 052080ae28aeb2635f4b8917d4772e745ddbbf7b (patch) | |
tree | a0031b04a73e7f63f0ce774a99365bec36bf1581 | |
parent | 6d6bf1c448246c6534d7087eb1db5b9341796b61 (diff) | |
download | sandcrawler-052080ae28aeb2635f4b8917d4772e745ddbbf7b.tar.gz sandcrawler-052080ae28aeb2635f4b8917d4772e745ddbbf7b.zip |
grobid worker: catch PetaboxError also
-rw-r--r-- | python/sandcrawler/grobid.py | 4 |
1 files changed, 2 insertions, 2 deletions
diff --git a/python/sandcrawler/grobid.py b/python/sandcrawler/grobid.py index bc886c2..cbb499d 100644 --- a/python/sandcrawler/grobid.py +++ b/python/sandcrawler/grobid.py @@ -4,7 +4,7 @@ import requests from grobid2json import teixml2json from .workers import SandcrawlerWorker from .misc import gen_file_metadata -from .ia import WaybackClient, WaybackError +from .ia import WaybackClient, WaybackError, PetaboxError class GrobidClient(object): @@ -99,7 +99,7 @@ class GrobidWorker(SandcrawlerWorker): offset=record['warc_offset'], warc_path=record['warc_path'], ) - except WaybackError as we: + except (WaybackError, PetaboxError) as we: return dict( status="error-wayback", error_msg=str(we), |