diff options
| author | Bryan Newbold <bnewbold@archive.org> | 2020-01-28 23:03:50 -0800 | 
|---|---|---|
| committer | Bryan Newbold <bnewbold@archive.org> | 2020-01-28 23:03:50 -0800 | 
| commit | 052080ae28aeb2635f4b8917d4772e745ddbbf7b (patch) | |
| tree | a0031b04a73e7f63f0ce774a99365bec36bf1581 /python | |
| parent | 6d6bf1c448246c6534d7087eb1db5b9341796b61 (diff) | |
| download | sandcrawler-052080ae28aeb2635f4b8917d4772e745ddbbf7b.tar.gz sandcrawler-052080ae28aeb2635f4b8917d4772e745ddbbf7b.zip | |
grobid worker: catch PetaboxError also
Diffstat (limited to 'python')
| -rw-r--r-- | python/sandcrawler/grobid.py | 4 | 
1 files changed, 2 insertions, 2 deletions
| diff --git a/python/sandcrawler/grobid.py b/python/sandcrawler/grobid.py index bc886c2..cbb499d 100644 --- a/python/sandcrawler/grobid.py +++ b/python/sandcrawler/grobid.py @@ -4,7 +4,7 @@ import requests  from grobid2json import teixml2json  from .workers import SandcrawlerWorker  from .misc import gen_file_metadata -from .ia import WaybackClient, WaybackError +from .ia import WaybackClient, WaybackError, PetaboxError  class GrobidClient(object): @@ -99,7 +99,7 @@ class GrobidWorker(SandcrawlerWorker):                      offset=record['warc_offset'],                      warc_path=record['warc_path'],                  ) -            except WaybackError as we: +            except (WaybackError, PetaboxError) as we:                  return dict(                      status="error-wayback",                      error_msg=str(we), | 
