aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--python/sandcrawler/grobid.py12
1 files changed, 9 insertions, 3 deletions
diff --git a/python/sandcrawler/grobid.py b/python/sandcrawler/grobid.py
index 31af974..b989591 100644
--- a/python/sandcrawler/grobid.py
+++ b/python/sandcrawler/grobid.py
@@ -85,8 +85,11 @@ class GrobidWorker(SandcrawlerWorker):
if not self.wayback_client:
raise Exception("wayback client not configured for this GrobidWorker")
try:
- blob = self.wayback_client.fetch_petabox_body(record['warc_csize'],
- record['warc_offset'], record['warc_path'])
+ blob = self.wayback_client.fetch_petabox_body(
+ csize=record['warc_csize'],
+ offset=record['warc_offset'],
+ warc_path=record['warc_path'],
+ )
except WaybackError as we:
return dict(status="error-wayback", error_msg=str(we), source=record)
elif record.get('url') and record.get('datetime'):
@@ -94,7 +97,10 @@ class GrobidWorker(SandcrawlerWorker):
if not self.wayback_client:
raise Exception("wayback client not configured for this GrobidWorker")
try:
- blob = self.wayback_client.fetch_warc_by_url_dt(record['url'], record['datetime'])
+ blob = self.wayback_client.fetch_replay_body(
+ url=record['url'],
+ datetime=record['datetime'],
+ )
except WaybackError as we:
return dict(status="error-wayback", error_msg=str(we), source=record)
elif record.get('item') and record.get('path'):