From 21ad5cd9942044939c8203dd076ea080b6d55a61 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Thu, 5 May 2022 11:21:29 -0700 Subject: ingest spn2: fix tests --- python/sandcrawler/__init__.py | 1 + python/sandcrawler/ia.py | 2 +- python/tests/test_ingest.py | 26 ++++++++++++ python/tests/test_savepagenow.py | 85 +++++++++++++++++++++++++++++++++++++--- 4 files changed, 108 insertions(+), 6 deletions(-) diff --git a/python/sandcrawler/__init__.py b/python/sandcrawler/__init__.py index 6718c57..469c2a2 100644 --- a/python/sandcrawler/__init__.py +++ b/python/sandcrawler/__init__.py @@ -7,6 +7,7 @@ from .ia import ( CdxRow, PetaboxError, ResourceResult, + SavePageNowBackoffError, SavePageNowClient, SavePageNowError, WarcResource, diff --git a/python/sandcrawler/ia.py b/python/sandcrawler/ia.py index 9c727ce..7365383 100644 --- a/python/sandcrawler/ia.py +++ b/python/sandcrawler/ia.py @@ -1012,7 +1012,7 @@ class SavePageNowClient: break # check if SPNv2 user has capacity available - resp = self.v2_session.get("https://web.archive.org/save/status/user") + resp = self.v2_session.get(f"{self.v2endpoint}/status/user") if resp.status_code == 429: raise SavePageNowBackoffError( f"SPNv2 availability API status_code: {resp.status_code}" diff --git a/python/tests/test_ingest.py b/python/tests/test_ingest.py index ad8c22e..e14a452 100644 --- a/python/tests/test_ingest.py +++ b/python/tests/test_ingest.py @@ -49,6 +49,19 @@ def test_ingest_success(ingest_worker_pdf): "ingest_type": "pdf", "base_url": "http://dummy-host/", } + responses.add( + responses.GET, + "http://dummy-spnv2/save/status/user", + status=200, + body=json.dumps( + { + "available": 23, + "daily_captures": 60295, + "daily_captures_limit": 300000, + "processing": 1, + } + ), + ) responses.add( responses.POST, "http://dummy-spnv2/save", @@ -135,6 +148,19 @@ def test_ingest_landing(ingest_worker): "ingest_type": "pdf", "base_url": "http://dummy-host/", } + responses.add( + responses.GET, + "http://dummy-spnv2/save/status/user", + status=200, + body=json.dumps( + { + "available": 23, + "daily_captures": 60295, + "daily_captures_limit": 300000, + "processing": 1, + } + ), + ) responses.add( responses.POST, "http://dummy-spnv2/save", diff --git a/python/tests/test_savepagenow.py b/python/tests/test_savepagenow.py index f3a5e46..add2c60 100644 --- a/python/tests/test_savepagenow.py +++ b/python/tests/test_savepagenow.py @@ -4,7 +4,7 @@ import pytest import responses from test_wayback import * -from sandcrawler import CdxPartial, SavePageNowClient, SavePageNowError +from sandcrawler import CdxPartial, SavePageNowBackoffError, SavePageNowClient, SavePageNowError TARGET = "http://dummy-target.dummy" JOB_ID = "e70f33c7-9eca-4c88-826d-26930564d7c8" @@ -116,6 +116,19 @@ def spn_client(): @responses.activate def test_savepagenow_success(spn_client): + responses.add( + responses.GET, + "http://dummy-spnv2/save/status/user", + status=200, + body=json.dumps( + { + "available": 23, + "daily_captures": 60295, + "daily_captures_limit": 300000, + "processing": 1, + } + ), + ) responses.add( responses.POST, "http://dummy-spnv2/save", @@ -143,7 +156,7 @@ def test_savepagenow_success(spn_client): resp = spn_client.save_url_now_v2(TARGET) - assert len(responses.calls) == 4 + assert len(responses.calls) == 5 assert resp.success is True assert resp.status == "success" @@ -156,6 +169,19 @@ def test_savepagenow_success(spn_client): @responses.activate def test_savepagenow_remote_error(spn_client): + responses.add( + responses.GET, + "http://dummy-spnv2/save/status/user", + status=200, + body=json.dumps( + { + "available": 23, + "daily_captures": 60295, + "daily_captures_limit": 300000, + "processing": 1, + } + ), + ) responses.add( responses.POST, "http://dummy-spnv2/save", @@ -177,7 +203,7 @@ def test_savepagenow_remote_error(spn_client): resp = spn_client.save_url_now_v2(TARGET) - assert len(responses.calls) == 3 + assert len(responses.calls) == 4 assert resp.success is False assert resp.status == ERROR_BODY["status_ext"] @@ -190,6 +216,19 @@ def test_savepagenow_remote_error(spn_client): @responses.activate def test_savepagenow_500(spn_client): + responses.add( + responses.GET, + "http://dummy-spnv2/save/status/user", + status=200, + body=json.dumps( + { + "available": 23, + "daily_captures": 60295, + "daily_captures_limit": 300000, + "processing": 1, + } + ), + ) responses.add( responses.POST, "http://dummy-spnv2/save", @@ -206,12 +245,48 @@ def test_savepagenow_500(spn_client): with pytest.raises(SavePageNowError): spn_client.save_url_now_v2(TARGET) - assert len(responses.calls) == 2 + assert len(responses.calls) == 3 + + +@responses.activate +def test_savepagenow_no_slots(spn_client): + + responses.add( + responses.GET, + "http://dummy-spnv2/save/status/user", + status=200, + body=json.dumps( + { + "available": 0, + "daily_captures": 60295, + "daily_captures_limit": 300000, + "processing": 1, + } + ), + ) + + with pytest.raises(SavePageNowBackoffError): + spn_client.save_url_now_v2(TARGET) + + assert len(responses.calls) == 1 @responses.activate def test_crawl_resource(spn_client, wayback_client): + responses.add( + responses.GET, + "http://dummy-spnv2/save/status/user", + status=200, + body=json.dumps( + { + "available": 23, + "daily_captures": 60295, + "daily_captures_limit": 300000, + "processing": 1, + } + ), + ) responses.add( responses.POST, "http://dummy-spnv2/save", @@ -244,7 +319,7 @@ def test_crawl_resource(spn_client, wayback_client): print("https://web.archive.org/web/{}id_/{}".format("20180326070330", TARGET + "/redirect")) resp = spn_client.crawl_resource(TARGET, wayback_client) - assert len(responses.calls) == 5 + assert len(responses.calls) == 6 assert resp.hit is True assert resp.status == "success" -- cgit v1.2.3