aboutsummaryrefslogtreecommitdiffstats
path: root/python/tests/test_wayback.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-01-08 17:03:59 -0800
committerBryan Newbold <bnewbold@archive.org>2020-01-09 16:29:37 -0800
commit51e2b302d223dc79c38dc0339e66719fd38f067c (patch)
treefcbd99c93362a183d744e2967b3954d3634481b6 /python/tests/test_wayback.py
parent96e5ed0f0a8b6fc041fdc8076940cd82be891563 (diff)
downloadsandcrawler-51e2b302d223dc79c38dc0339e66719fd38f067c.tar.gz
sandcrawler-51e2b302d223dc79c38dc0339e66719fd38f067c.zip
more wayback and SPN tests and fixes
Diffstat (limited to 'python/tests/test_wayback.py')
-rw-r--r--python/tests/test_wayback.py40
1 files changed, 28 insertions, 12 deletions
diff --git a/python/tests/test_wayback.py b/python/tests/test_wayback.py
index 7e63ec7..eeb4b37 100644
--- a/python/tests/test_wayback.py
+++ b/python/tests/test_wayback.py
@@ -35,14 +35,7 @@ CDX_MULTI_HIT = [
def cdx_client():
client = CdxApiClient(
host_url="http://dummy-cdx/cdx",
- )
- return client
-
-@pytest.fixture
-def wayback_client(cdx_client):
- client = WaybackClient(
- cdx_client=cdx_client,
- petabox_webdata_secret="dummy-petabox-secret",
+ cdx_auth_token="dummy-token",
)
return client
@@ -102,9 +95,32 @@ def test_cdx_lookup_best(cdx_client):
assert resp.sha1b32 == CDX_BEST_SHA1B32
assert resp.warc_path == CDX_SINGLE_HIT[1][-1]
+WARC_TARGET = "http://fatcat.wiki/"
+WARC_BODY = "<html>some stuff</html>"
+
+@pytest.fixture
+def wayback_client(cdx_client, mocker):
+ client = WaybackClient(
+ cdx_client=cdx_client,
+ petabox_webdata_secret="dummy-petabox-secret",
+ )
+ # mock out the wayback store with mock stuff
+ client.rstore = mocker.Mock()
+ resource = mocker.Mock()
+ client.rstore.load_resource = mocker.MagicMock(return_value=resource)
+ resource.get_status = mocker.MagicMock(return_value=[200])
+ resource.get_location = mocker.MagicMock(return_value=[WARC_TARGET])
+ body = mocker.Mock()
+ resource.open_raw_content = mocker.MagicMock(return_value=body)
+ body.read = mocker.MagicMock(return_value=WARC_BODY)
+
+ return client
+
def test_wayback_fetch(wayback_client, mocker):
- # mock something
- #mocker.patch('fatcat_tools.harvest.harvest_common.HarvestState.initialize_from_kafka')
- #blah = mocker.Mock()
- return
+ resp = wayback_client.fetch_petabox(123, 456789, "here/there.warc.gz")
+ assert resp.body == WARC_BODY
+ assert resp.location == WARC_TARGET
+
+ resp = wayback_client.fetch_petabox_body(123, 456789, "here/there.warc.gz")
+ assert resp == WARC_BODY