diff options
Diffstat (limited to 'python')
-rw-r--r-- | python/sandcrawler/__init__.py | 1 | ||||
-rw-r--r-- | python/sandcrawler/ia.py | 2 | ||||
-rw-r--r-- | python/tests/test_ingest.py | 26 | ||||
-rw-r--r-- | python/tests/test_savepagenow.py | 85 |
4 files changed, 108 insertions, 6 deletions
diff --git a/python/sandcrawler/__init__.py b/python/sandcrawler/__init__.py index 6718c57..469c2a2 100644 --- a/python/sandcrawler/__init__.py +++ b/python/sandcrawler/__init__.py @@ -7,6 +7,7 @@ from .ia import ( CdxRow, PetaboxError, ResourceResult, + SavePageNowBackoffError, SavePageNowClient, SavePageNowError, WarcResource, diff --git a/python/sandcrawler/ia.py b/python/sandcrawler/ia.py index 9c727ce..7365383 100644 --- a/python/sandcrawler/ia.py +++ b/python/sandcrawler/ia.py @@ -1012,7 +1012,7 @@ class SavePageNowClient: break # check if SPNv2 user has capacity available - resp = self.v2_session.get("https://web.archive.org/save/status/user") + resp = self.v2_session.get(f"{self.v2endpoint}/status/user") if resp.status_code == 429: raise SavePageNowBackoffError( f"SPNv2 availability API status_code: {resp.status_code}" diff --git a/python/tests/test_ingest.py b/python/tests/test_ingest.py index ad8c22e..e14a452 100644 --- a/python/tests/test_ingest.py +++ b/python/tests/test_ingest.py @@ -50,6 +50,19 @@ def test_ingest_success(ingest_worker_pdf): "base_url": "http://dummy-host/", } responses.add( + responses.GET, + "http://dummy-spnv2/save/status/user", + status=200, + body=json.dumps( + { + "available": 23, + "daily_captures": 60295, + "daily_captures_limit": 300000, + "processing": 1, + } + ), + ) + responses.add( responses.POST, "http://dummy-spnv2/save", status=200, @@ -136,6 +149,19 @@ def test_ingest_landing(ingest_worker): "base_url": "http://dummy-host/", } responses.add( + responses.GET, + "http://dummy-spnv2/save/status/user", + status=200, + body=json.dumps( + { + "available": 23, + "daily_captures": 60295, + "daily_captures_limit": 300000, + "processing": 1, + } + ), + ) + responses.add( responses.POST, "http://dummy-spnv2/save", status=200, diff --git a/python/tests/test_savepagenow.py b/python/tests/test_savepagenow.py index f3a5e46..add2c60 100644 --- a/python/tests/test_savepagenow.py +++ b/python/tests/test_savepagenow.py @@ -4,7 +4,7 @@ import pytest import responses from test_wayback import * -from sandcrawler import CdxPartial, SavePageNowClient, SavePageNowError +from sandcrawler import CdxPartial, SavePageNowBackoffError, SavePageNowClient, SavePageNowError TARGET = "http://dummy-target.dummy" JOB_ID = "e70f33c7-9eca-4c88-826d-26930564d7c8" @@ -117,6 +117,19 @@ def spn_client(): def test_savepagenow_success(spn_client): responses.add( + responses.GET, + "http://dummy-spnv2/save/status/user", + status=200, + body=json.dumps( + { + "available": 23, + "daily_captures": 60295, + "daily_captures_limit": 300000, + "processing": 1, + } + ), + ) + responses.add( responses.POST, "http://dummy-spnv2/save", status=200, @@ -143,7 +156,7 @@ def test_savepagenow_success(spn_client): resp = spn_client.save_url_now_v2(TARGET) - assert len(responses.calls) == 4 + assert len(responses.calls) == 5 assert resp.success is True assert resp.status == "success" @@ -157,6 +170,19 @@ def test_savepagenow_success(spn_client): def test_savepagenow_remote_error(spn_client): responses.add( + responses.GET, + "http://dummy-spnv2/save/status/user", + status=200, + body=json.dumps( + { + "available": 23, + "daily_captures": 60295, + "daily_captures_limit": 300000, + "processing": 1, + } + ), + ) + responses.add( responses.POST, "http://dummy-spnv2/save", status=200, @@ -177,7 +203,7 @@ def test_savepagenow_remote_error(spn_client): resp = spn_client.save_url_now_v2(TARGET) - assert len(responses.calls) == 3 + assert len(responses.calls) == 4 assert resp.success is False assert resp.status == ERROR_BODY["status_ext"] @@ -191,6 +217,19 @@ def test_savepagenow_remote_error(spn_client): def test_savepagenow_500(spn_client): responses.add( + responses.GET, + "http://dummy-spnv2/save/status/user", + status=200, + body=json.dumps( + { + "available": 23, + "daily_captures": 60295, + "daily_captures_limit": 300000, + "processing": 1, + } + ), + ) + responses.add( responses.POST, "http://dummy-spnv2/save", status=200, @@ -206,13 +245,49 @@ def test_savepagenow_500(spn_client): with pytest.raises(SavePageNowError): spn_client.save_url_now_v2(TARGET) - assert len(responses.calls) == 2 + assert len(responses.calls) == 3 + + +@responses.activate +def test_savepagenow_no_slots(spn_client): + + responses.add( + responses.GET, + "http://dummy-spnv2/save/status/user", + status=200, + body=json.dumps( + { + "available": 0, + "daily_captures": 60295, + "daily_captures_limit": 300000, + "processing": 1, + } + ), + ) + + with pytest.raises(SavePageNowBackoffError): + spn_client.save_url_now_v2(TARGET) + + assert len(responses.calls) == 1 @responses.activate def test_crawl_resource(spn_client, wayback_client): responses.add( + responses.GET, + "http://dummy-spnv2/save/status/user", + status=200, + body=json.dumps( + { + "available": 23, + "daily_captures": 60295, + "daily_captures_limit": 300000, + "processing": 1, + } + ), + ) + responses.add( responses.POST, "http://dummy-spnv2/save", status=200, @@ -244,7 +319,7 @@ def test_crawl_resource(spn_client, wayback_client): print("https://web.archive.org/web/{}id_/{}".format("20180326070330", TARGET + "/redirect")) resp = spn_client.crawl_resource(TARGET, wayback_client) - assert len(responses.calls) == 5 + assert len(responses.calls) == 6 assert resp.hit is True assert resp.status == "success" |