diff options
-rw-r--r-- | python/sandcrawler/grobid.py | 12 |
1 files changed, 9 insertions, 3 deletions
diff --git a/python/sandcrawler/grobid.py b/python/sandcrawler/grobid.py index 31af974..b989591 100644 --- a/python/sandcrawler/grobid.py +++ b/python/sandcrawler/grobid.py @@ -85,8 +85,11 @@ class GrobidWorker(SandcrawlerWorker): if not self.wayback_client: raise Exception("wayback client not configured for this GrobidWorker") try: - blob = self.wayback_client.fetch_petabox_body(record['warc_csize'], - record['warc_offset'], record['warc_path']) + blob = self.wayback_client.fetch_petabox_body( + csize=record['warc_csize'], + offset=record['warc_offset'], + warc_path=record['warc_path'], + ) except WaybackError as we: return dict(status="error-wayback", error_msg=str(we), source=record) elif record.get('url') and record.get('datetime'): @@ -94,7 +97,10 @@ class GrobidWorker(SandcrawlerWorker): if not self.wayback_client: raise Exception("wayback client not configured for this GrobidWorker") try: - blob = self.wayback_client.fetch_warc_by_url_dt(record['url'], record['datetime']) + blob = self.wayback_client.fetch_replay_body( + url=record['url'], + datetime=record['datetime'], + ) except WaybackError as we: return dict(status="error-wayback", error_msg=str(we), source=record) elif record.get('item') and record.get('path'): |