aboutsummaryrefslogtreecommitdiffstats
path: root/python/tests
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-01-14 15:53:00 -0800
committerBryan Newbold <bnewbold@archive.org>2020-01-14 15:53:00 -0800
commit21599839802b8ef3a84ffe90855f7bceaaa12a0d (patch)
treea65be9ff5b87fb62f4137762a4a97eb77f9c137b /python/tests
parent648f04bfdcf441ce4a396d09bdd0443b2a2ca51e (diff)
downloadsandcrawler-21599839802b8ef3a84ffe90855f7bceaaa12a0d.tar.gz
sandcrawler-21599839802b8ef3a84ffe90855f7bceaaa12a0d.zip
add live tests for ftp, revisits
Diffstat (limited to 'python/tests')
-rw-r--r--python/tests/test_live_wayback.py37
1 files changed, 36 insertions, 1 deletions
diff --git a/python/tests/test_live_wayback.py b/python/tests/test_live_wayback.py
index c5376e0..f15c63e 100644
--- a/python/tests/test_live_wayback.py
+++ b/python/tests/test_live_wayback.py
@@ -10,7 +10,7 @@ Simply uncomment lines to run.
import json
import pytest
-from sandcrawler import CdxApiClient, CdxApiError, WaybackClient, WaybackError, PetaboxError
+from sandcrawler import CdxApiClient, CdxApiError, WaybackClient, WaybackError, PetaboxError, SavePageNowClient, SavePageNowError, CdxPartial, gen_file_metadata
@pytest.fixture
@@ -23,6 +23,11 @@ def wayback_client():
client = WaybackClient()
return client
+@pytest.fixture
+def spn_client():
+ client = SavePageNowClient()
+ return client
+
@pytest.mark.skip(reason="hits prod services, requires auth")
def test_cdx_fetch(cdx_client):
@@ -112,3 +117,33 @@ def test_cdx_fetch_spn2(cdx_client):
assert resp.sha1b32 == "VYW7JXFK6EC2KC537N5B7PHYZC4B6MZL"
assert resp.status_code == 200
+@pytest.mark.skip(reason="hits prod services, requires auth")
+def test_lookup_ftp(wayback_client):
+ # ftp://ftp.ncbi.nlm.nih.gov/pub/pmc/oa_pdf/80/23/10.1177_1559827617708562.PMC6236633.pdf
+ # ftp://ftp.ncbi.nlm.nih.gov/pub/pmc/oa_pdf/ad/ab/mmr-17-05-6969.PMC5928650.pdf
+
+ url = "ftp://ftp.ncbi.nlm.nih.gov/pub/pmc/oa_pdf/ad/ab/mmr-17-05-6969.PMC5928650.pdf"
+ resp = wayback_client.lookup_resource(url)
+
+ assert resp.hit == True
+ assert resp.status == "success"
+ assert resp.terminal_url == url
+ assert resp.cdx.url == url
+
+ file_meta = gen_file_metadata(resp.body)
+ assert file_meta['sha1hex'] == resp.cdx.sha1hex
+
+@pytest.mark.skip(reason="hits prod services, requires auth")
+def test_crawl_ftp(spn_client, wayback_client):
+
+ url = "ftp://ftp.ncbi.nlm.nih.gov/pub/pmc/oa_pdf/ad/ab/mmr-17-05-6969.PMC5928650.pdf"
+ resp = spn_client.crawl_resource(url, wayback_client)
+
+ # FTP isn't supported yet!
+ #assert resp.hit == True
+ #assert resp.status == "success"
+ #assert resp.terminal_url == url
+ #assert resp.cdx.url == url
+
+ assert resp.hit == False
+ assert resp.status == "spn2-no-ftp"