diff options
Diffstat (limited to 'python/tests')
-rw-r--r-- | python/tests/test_html.py | 25 | ||||
-rw-r--r-- | python/tests/test_ingest.py | 26 | ||||
-rw-r--r-- | python/tests/test_savepagenow.py | 85 |
3 files changed, 106 insertions, 30 deletions
diff --git a/python/tests/test_html.py b/python/tests/test_html.py index 614b802..043c63d 100644 --- a/python/tests/test_html.py +++ b/python/tests/test_html.py @@ -5,28 +5,3 @@ def test_extract_fulltext_url(): resp = extract_fulltext_url("asdf", b"asdf") assert resp == {} - - resp = extract_fulltext_url( - "http://dummy-site/", - b"""<html> - <head> - <meta name="citation_pdf_url" content="http://www.example.com/content/271/20/11761.full.pdf"> - </head> - <body> - <h1>my big article here</h1> - blah - </body> - </html>""", - ) - assert resp["pdf_url"] == "http://www.example.com/content/271/20/11761.full.pdf" - assert resp["technique"] == "citation_pdf_url" - - with open("tests/files/plos_one_article.html", "rb") as f: - resp = extract_fulltext_url( - "https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0213978", - f.read(), - ) - assert ( - resp["pdf_url"] - == "https://journals.plos.org/plosone/article/file?id=10.1371/journal.pone.0213978&type=printable" - ) diff --git a/python/tests/test_ingest.py b/python/tests/test_ingest.py index ad8c22e..e14a452 100644 --- a/python/tests/test_ingest.py +++ b/python/tests/test_ingest.py @@ -50,6 +50,19 @@ def test_ingest_success(ingest_worker_pdf): "base_url": "http://dummy-host/", } responses.add( + responses.GET, + "http://dummy-spnv2/save/status/user", + status=200, + body=json.dumps( + { + "available": 23, + "daily_captures": 60295, + "daily_captures_limit": 300000, + "processing": 1, + } + ), + ) + responses.add( responses.POST, "http://dummy-spnv2/save", status=200, @@ -136,6 +149,19 @@ def test_ingest_landing(ingest_worker): "base_url": "http://dummy-host/", } responses.add( + responses.GET, + "http://dummy-spnv2/save/status/user", + status=200, + body=json.dumps( + { + "available": 23, + "daily_captures": 60295, + "daily_captures_limit": 300000, + "processing": 1, + } + ), + ) + responses.add( responses.POST, "http://dummy-spnv2/save", status=200, diff --git a/python/tests/test_savepagenow.py b/python/tests/test_savepagenow.py index f3a5e46..add2c60 100644 --- a/python/tests/test_savepagenow.py +++ b/python/tests/test_savepagenow.py @@ -4,7 +4,7 @@ import pytest import responses from test_wayback import * -from sandcrawler import CdxPartial, SavePageNowClient, SavePageNowError +from sandcrawler import CdxPartial, SavePageNowBackoffError, SavePageNowClient, SavePageNowError TARGET = "http://dummy-target.dummy" JOB_ID = "e70f33c7-9eca-4c88-826d-26930564d7c8" @@ -117,6 +117,19 @@ def spn_client(): def test_savepagenow_success(spn_client): responses.add( + responses.GET, + "http://dummy-spnv2/save/status/user", + status=200, + body=json.dumps( + { + "available": 23, + "daily_captures": 60295, + "daily_captures_limit": 300000, + "processing": 1, + } + ), + ) + responses.add( responses.POST, "http://dummy-spnv2/save", status=200, @@ -143,7 +156,7 @@ def test_savepagenow_success(spn_client): resp = spn_client.save_url_now_v2(TARGET) - assert len(responses.calls) == 4 + assert len(responses.calls) == 5 assert resp.success is True assert resp.status == "success" @@ -157,6 +170,19 @@ def test_savepagenow_success(spn_client): def test_savepagenow_remote_error(spn_client): responses.add( + responses.GET, + "http://dummy-spnv2/save/status/user", + status=200, + body=json.dumps( + { + "available": 23, + "daily_captures": 60295, + "daily_captures_limit": 300000, + "processing": 1, + } + ), + ) + responses.add( responses.POST, "http://dummy-spnv2/save", status=200, @@ -177,7 +203,7 @@ def test_savepagenow_remote_error(spn_client): resp = spn_client.save_url_now_v2(TARGET) - assert len(responses.calls) == 3 + assert len(responses.calls) == 4 assert resp.success is False assert resp.status == ERROR_BODY["status_ext"] @@ -191,6 +217,19 @@ def test_savepagenow_remote_error(spn_client): def test_savepagenow_500(spn_client): responses.add( + responses.GET, + "http://dummy-spnv2/save/status/user", + status=200, + body=json.dumps( + { + "available": 23, + "daily_captures": 60295, + "daily_captures_limit": 300000, + "processing": 1, + } + ), + ) + responses.add( responses.POST, "http://dummy-spnv2/save", status=200, @@ -206,13 +245,49 @@ def test_savepagenow_500(spn_client): with pytest.raises(SavePageNowError): spn_client.save_url_now_v2(TARGET) - assert len(responses.calls) == 2 + assert len(responses.calls) == 3 + + +@responses.activate +def test_savepagenow_no_slots(spn_client): + + responses.add( + responses.GET, + "http://dummy-spnv2/save/status/user", + status=200, + body=json.dumps( + { + "available": 0, + "daily_captures": 60295, + "daily_captures_limit": 300000, + "processing": 1, + } + ), + ) + + with pytest.raises(SavePageNowBackoffError): + spn_client.save_url_now_v2(TARGET) + + assert len(responses.calls) == 1 @responses.activate def test_crawl_resource(spn_client, wayback_client): responses.add( + responses.GET, + "http://dummy-spnv2/save/status/user", + status=200, + body=json.dumps( + { + "available": 23, + "daily_captures": 60295, + "daily_captures_limit": 300000, + "processing": 1, + } + ), + ) + responses.add( responses.POST, "http://dummy-spnv2/save", status=200, @@ -244,7 +319,7 @@ def test_crawl_resource(spn_client, wayback_client): print("https://web.archive.org/web/{}id_/{}".format("20180326070330", TARGET + "/redirect")) resp = spn_client.crawl_resource(TARGET, wayback_client) - assert len(responses.calls) == 5 + assert len(responses.calls) == 6 assert resp.hit is True assert resp.status == "success" |