aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2022-05-05 11:21:29 -0700
committerBryan Newbold <bnewbold@archive.org>2022-05-05 11:21:29 -0700
commit21ad5cd9942044939c8203dd076ea080b6d55a61 (patch)
tree2ffbf6a641302615275fecf233eb8351c3ecf0a1
parent1f9ca570bd168154a72adcd2454b992dbc7e8d0a (diff)
downloadsandcrawler-21ad5cd9942044939c8203dd076ea080b6d55a61.tar.gz
sandcrawler-21ad5cd9942044939c8203dd076ea080b6d55a61.zip
ingest spn2: fix tests
-rw-r--r--python/sandcrawler/__init__.py1
-rw-r--r--python/sandcrawler/ia.py2
-rw-r--r--python/tests/test_ingest.py26
-rw-r--r--python/tests/test_savepagenow.py85
4 files changed, 108 insertions, 6 deletions
diff --git a/python/sandcrawler/__init__.py b/python/sandcrawler/__init__.py
index 6718c57..469c2a2 100644
--- a/python/sandcrawler/__init__.py
+++ b/python/sandcrawler/__init__.py
@@ -7,6 +7,7 @@ from .ia import (
CdxRow,
PetaboxError,
ResourceResult,
+ SavePageNowBackoffError,
SavePageNowClient,
SavePageNowError,
WarcResource,
diff --git a/python/sandcrawler/ia.py b/python/sandcrawler/ia.py
index 9c727ce..7365383 100644
--- a/python/sandcrawler/ia.py
+++ b/python/sandcrawler/ia.py
@@ -1012,7 +1012,7 @@ class SavePageNowClient:
break
# check if SPNv2 user has capacity available
- resp = self.v2_session.get("https://web.archive.org/save/status/user")
+ resp = self.v2_session.get(f"{self.v2endpoint}/status/user")
if resp.status_code == 429:
raise SavePageNowBackoffError(
f"SPNv2 availability API status_code: {resp.status_code}"
diff --git a/python/tests/test_ingest.py b/python/tests/test_ingest.py
index ad8c22e..e14a452 100644
--- a/python/tests/test_ingest.py
+++ b/python/tests/test_ingest.py
@@ -50,6 +50,19 @@ def test_ingest_success(ingest_worker_pdf):
"base_url": "http://dummy-host/",
}
responses.add(
+ responses.GET,
+ "http://dummy-spnv2/save/status/user",
+ status=200,
+ body=json.dumps(
+ {
+ "available": 23,
+ "daily_captures": 60295,
+ "daily_captures_limit": 300000,
+ "processing": 1,
+ }
+ ),
+ )
+ responses.add(
responses.POST,
"http://dummy-spnv2/save",
status=200,
@@ -136,6 +149,19 @@ def test_ingest_landing(ingest_worker):
"base_url": "http://dummy-host/",
}
responses.add(
+ responses.GET,
+ "http://dummy-spnv2/save/status/user",
+ status=200,
+ body=json.dumps(
+ {
+ "available": 23,
+ "daily_captures": 60295,
+ "daily_captures_limit": 300000,
+ "processing": 1,
+ }
+ ),
+ )
+ responses.add(
responses.POST,
"http://dummy-spnv2/save",
status=200,
diff --git a/python/tests/test_savepagenow.py b/python/tests/test_savepagenow.py
index f3a5e46..add2c60 100644
--- a/python/tests/test_savepagenow.py
+++ b/python/tests/test_savepagenow.py
@@ -4,7 +4,7 @@ import pytest
import responses
from test_wayback import *
-from sandcrawler import CdxPartial, SavePageNowClient, SavePageNowError
+from sandcrawler import CdxPartial, SavePageNowBackoffError, SavePageNowClient, SavePageNowError
TARGET = "http://dummy-target.dummy"
JOB_ID = "e70f33c7-9eca-4c88-826d-26930564d7c8"
@@ -117,6 +117,19 @@ def spn_client():
def test_savepagenow_success(spn_client):
responses.add(
+ responses.GET,
+ "http://dummy-spnv2/save/status/user",
+ status=200,
+ body=json.dumps(
+ {
+ "available": 23,
+ "daily_captures": 60295,
+ "daily_captures_limit": 300000,
+ "processing": 1,
+ }
+ ),
+ )
+ responses.add(
responses.POST,
"http://dummy-spnv2/save",
status=200,
@@ -143,7 +156,7 @@ def test_savepagenow_success(spn_client):
resp = spn_client.save_url_now_v2(TARGET)
- assert len(responses.calls) == 4
+ assert len(responses.calls) == 5
assert resp.success is True
assert resp.status == "success"
@@ -157,6 +170,19 @@ def test_savepagenow_success(spn_client):
def test_savepagenow_remote_error(spn_client):
responses.add(
+ responses.GET,
+ "http://dummy-spnv2/save/status/user",
+ status=200,
+ body=json.dumps(
+ {
+ "available": 23,
+ "daily_captures": 60295,
+ "daily_captures_limit": 300000,
+ "processing": 1,
+ }
+ ),
+ )
+ responses.add(
responses.POST,
"http://dummy-spnv2/save",
status=200,
@@ -177,7 +203,7 @@ def test_savepagenow_remote_error(spn_client):
resp = spn_client.save_url_now_v2(TARGET)
- assert len(responses.calls) == 3
+ assert len(responses.calls) == 4
assert resp.success is False
assert resp.status == ERROR_BODY["status_ext"]
@@ -191,6 +217,19 @@ def test_savepagenow_remote_error(spn_client):
def test_savepagenow_500(spn_client):
responses.add(
+ responses.GET,
+ "http://dummy-spnv2/save/status/user",
+ status=200,
+ body=json.dumps(
+ {
+ "available": 23,
+ "daily_captures": 60295,
+ "daily_captures_limit": 300000,
+ "processing": 1,
+ }
+ ),
+ )
+ responses.add(
responses.POST,
"http://dummy-spnv2/save",
status=200,
@@ -206,13 +245,49 @@ def test_savepagenow_500(spn_client):
with pytest.raises(SavePageNowError):
spn_client.save_url_now_v2(TARGET)
- assert len(responses.calls) == 2
+ assert len(responses.calls) == 3
+
+
+@responses.activate
+def test_savepagenow_no_slots(spn_client):
+
+ responses.add(
+ responses.GET,
+ "http://dummy-spnv2/save/status/user",
+ status=200,
+ body=json.dumps(
+ {
+ "available": 0,
+ "daily_captures": 60295,
+ "daily_captures_limit": 300000,
+ "processing": 1,
+ }
+ ),
+ )
+
+ with pytest.raises(SavePageNowBackoffError):
+ spn_client.save_url_now_v2(TARGET)
+
+ assert len(responses.calls) == 1
@responses.activate
def test_crawl_resource(spn_client, wayback_client):
responses.add(
+ responses.GET,
+ "http://dummy-spnv2/save/status/user",
+ status=200,
+ body=json.dumps(
+ {
+ "available": 23,
+ "daily_captures": 60295,
+ "daily_captures_limit": 300000,
+ "processing": 1,
+ }
+ ),
+ )
+ responses.add(
responses.POST,
"http://dummy-spnv2/save",
status=200,
@@ -244,7 +319,7 @@ def test_crawl_resource(spn_client, wayback_client):
print("https://web.archive.org/web/{}id_/{}".format("20180326070330", TARGET + "/redirect"))
resp = spn_client.crawl_resource(TARGET, wayback_client)
- assert len(responses.calls) == 5
+ assert len(responses.calls) == 6
assert resp.hit is True
assert resp.status == "success"