aboutsummaryrefslogtreecommitdiffstats
path: root/python/sandcrawler/grobid.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-01-08 17:11:23 -0800
committerBryan Newbold <bnewbold@archive.org>2020-01-09 16:29:37 -0800
commitcc9c911e714bc29b1ccd9133d56bec3960752a07 (patch)
tree718a0484cd145f02e8794331317dfe3155157903 /python/sandcrawler/grobid.py
parent51e2b302d223dc79c38dc0339e66719fd38f067c (diff)
downloadsandcrawler-cc9c911e714bc29b1ccd9133d56bec3960752a07.tar.gz
sandcrawler-cc9c911e714bc29b1ccd9133d56bec3960752a07.zip
fix grobid tests for new wayback refactors
Diffstat (limited to 'python/sandcrawler/grobid.py')
-rw-r--r--python/sandcrawler/grobid.py6
1 files changed, 3 insertions, 3 deletions
diff --git a/python/sandcrawler/grobid.py b/python/sandcrawler/grobid.py
index 9fd5ad4..31af974 100644
--- a/python/sandcrawler/grobid.py
+++ b/python/sandcrawler/grobid.py
@@ -85,8 +85,8 @@ class GrobidWorker(SandcrawlerWorker):
if not self.wayback_client:
raise Exception("wayback client not configured for this GrobidWorker")
try:
- blob = self.wayback_client.fetch_warc_content(record['warc_path'],
- record['warc_offset'], record['warc_csize'])
+ blob = self.wayback_client.fetch_petabox_body(record['warc_csize'],
+ record['warc_offset'], record['warc_path'])
except WaybackError as we:
return dict(status="error-wayback", error_msg=str(we), source=record)
elif record.get('url') and record.get('datetime'):
@@ -94,7 +94,7 @@ class GrobidWorker(SandcrawlerWorker):
if not self.wayback_client:
raise Exception("wayback client not configured for this GrobidWorker")
try:
- blob = self.wayback_client.fetch_url_datetime(record['url'], record['datetime'])
+ blob = self.wayback_client.fetch_warc_by_url_dt(record['url'], record['datetime'])
except WaybackError as we:
return dict(status="error-wayback", error_msg=str(we), source=record)
elif record.get('item') and record.get('path'):