From 818a936be9480bb75e40d7e3723aed3ac8c1eee9 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 14 Jan 2020 17:01:26 -0800 Subject: grobid worker fixes for newer ia lib refactors --- python/sandcrawler/grobid.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'python') diff --git a/python/sandcrawler/grobid.py b/python/sandcrawler/grobid.py index 31af974..b989591 100644 --- a/python/sandcrawler/grobid.py +++ b/python/sandcrawler/grobid.py @@ -85,8 +85,11 @@ class GrobidWorker(SandcrawlerWorker): if not self.wayback_client: raise Exception("wayback client not configured for this GrobidWorker") try: - blob = self.wayback_client.fetch_petabox_body(record['warc_csize'], - record['warc_offset'], record['warc_path']) + blob = self.wayback_client.fetch_petabox_body( + csize=record['warc_csize'], + offset=record['warc_offset'], + warc_path=record['warc_path'], + ) except WaybackError as we: return dict(status="error-wayback", error_msg=str(we), source=record) elif record.get('url') and record.get('datetime'): @@ -94,7 +97,10 @@ class GrobidWorker(SandcrawlerWorker): if not self.wayback_client: raise Exception("wayback client not configured for this GrobidWorker") try: - blob = self.wayback_client.fetch_warc_by_url_dt(record['url'], record['datetime']) + blob = self.wayback_client.fetch_replay_body( + url=record['url'], + datetime=record['datetime'], + ) except WaybackError as we: return dict(status="error-wayback", error_msg=str(we), source=record) elif record.get('item') and record.get('path'): -- cgit v1.2.3