diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-01-08 17:11:23 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-01-09 16:29:37 -0800 |
commit | cc9c911e714bc29b1ccd9133d56bec3960752a07 (patch) | |
tree | 718a0484cd145f02e8794331317dfe3155157903 /python/sandcrawler | |
parent | 51e2b302d223dc79c38dc0339e66719fd38f067c (diff) | |
download | sandcrawler-cc9c911e714bc29b1ccd9133d56bec3960752a07.tar.gz sandcrawler-cc9c911e714bc29b1ccd9133d56bec3960752a07.zip |
fix grobid tests for new wayback refactors
Diffstat (limited to 'python/sandcrawler')
-rw-r--r-- | python/sandcrawler/grobid.py | 6 |
1 files changed, 3 insertions, 3 deletions
diff --git a/python/sandcrawler/grobid.py b/python/sandcrawler/grobid.py index 9fd5ad4..31af974 100644 --- a/python/sandcrawler/grobid.py +++ b/python/sandcrawler/grobid.py @@ -85,8 +85,8 @@ class GrobidWorker(SandcrawlerWorker): if not self.wayback_client: raise Exception("wayback client not configured for this GrobidWorker") try: - blob = self.wayback_client.fetch_warc_content(record['warc_path'], - record['warc_offset'], record['warc_csize']) + blob = self.wayback_client.fetch_petabox_body(record['warc_csize'], + record['warc_offset'], record['warc_path']) except WaybackError as we: return dict(status="error-wayback", error_msg=str(we), source=record) elif record.get('url') and record.get('datetime'): @@ -94,7 +94,7 @@ class GrobidWorker(SandcrawlerWorker): if not self.wayback_client: raise Exception("wayback client not configured for this GrobidWorker") try: - blob = self.wayback_client.fetch_url_datetime(record['url'], record['datetime']) + blob = self.wayback_client.fetch_warc_by_url_dt(record['url'], record['datetime']) except WaybackError as we: return dict(status="error-wayback", error_msg=str(we), source=record) elif record.get('item') and record.get('path'): |