diff options
author | Bryan Newbold <bnewbold@archive.org> | 2021-07-26 20:52:56 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2021-07-26 20:52:58 -0700 |
commit | 7ebcdfebdf4e1c8b69026a24cafe500c67dbc384 (patch) | |
tree | ab580d4307acc5d10b5a8669d28ac6a6ec6e81e5 /tests | |
parent | eeb456c16d016d8523023f787597efae7a6317b9 (diff) | |
download | fatcat-scholar-7ebcdfebdf4e1c8b69026a24cafe500c67dbc384.tar.gz fatcat-scholar-7ebcdfebdf4e1c8b69026a24cafe500c67dbc384.zip |
web: access_redirect_fallback mechanism
This adds a helper code path that "tries harder" to find an access link,
by querying the fatcat API directly to look for any file from any
release associated with the work. If it finds a match, it does the
redirect as usual (but does log the incident).
If no match can be found, there is now a more helpful access-specific
404 error page.
If the *work* is a 404, the generic error page is shown.
Diffstat (limited to 'tests')
-rw-r--r-- | tests/test_web.py | 103 |
1 files changed, 102 insertions, 1 deletions
diff --git a/tests/test_web.py b/tests/test_web.py index 7f1f72a..d9cfab6 100644 --- a/tests/test_web.py +++ b/tests/test_web.py @@ -3,6 +3,7 @@ from typing import Any import pytest from fastapi.testclient import TestClient +import fatcat_openapi_client from fatcat_scholar.web import app @@ -148,7 +149,11 @@ def test_basic_access_redirect(client: Any, mocker: Any) -> None: == "https://web.archive.org/web/20200206164725id_/https://www.federalreserve.gov/econresdata/feds/2015/files/2015118pap.pdf" ) - # check that URL is validated + # check that URL is validated (force fatcat API fallback to fail) + fatcat_api_raw = mocker.patch("fatcat_openapi_client.ApiClient.call_api") + fatcat_api_raw.side_effect = [ + fatcat_openapi_client.ApiException(status=404, reason="dummy") + ] rv = client.get( "/work/2x5qvct2dnhrbctqa2q2uyut6a/access/wayback/https://www.federalreserve.gov/econresdata/feds/2015/files/2015118pap.pdf.DUMMY", allow_redirects=False, @@ -156,6 +161,102 @@ def test_basic_access_redirect(client: Any, mocker: Any) -> None: assert rv.status_code == 404 +def test_access_redirect_fallback(client: Any, mocker: Any) -> None: + + with open("tests/files/elastic_fulltext_get.json") as f: + elastic_resp = json.loads(f.read()) + + es_raw = mocker.patch( + "elasticsearch.connection.Urllib3HttpConnection.perform_request" + ) + es_raw.side_effect = [ + (200, {}, json.dumps(elastic_resp)), + (200, {}, json.dumps(elastic_resp)), + (200, {}, json.dumps(elastic_resp)), + (200, {}, json.dumps(elastic_resp)), + ] + fatcat_get_work_raw = mocker.patch("fatcat_openapi_client.DefaultApi.get_work") + fatcat_get_work_raw.side_effect = [ + fatcat_openapi_client.WorkEntity( + state="active", ident="wwwwwwwwwwwwwwwwwwwwwwwwww", + ) + ] * 4 + fatcat_get_work_releases_raw = mocker.patch( + "fatcat_openapi_client.DefaultApi.get_work_releases" + ) + fatcat_get_work_releases_raw.side_effect = [ + [ + fatcat_openapi_client.ReleaseEntity( + ident="rrrrrrrrrrrrrrrrrrrrrrrrrr", + ext_ids=fatcat_openapi_client.ReleaseExtIds(), + ), + ] + ] * 4 + fatcat_get_release_raw = mocker.patch( + "fatcat_openapi_client.DefaultApi.get_release" + ) + fatcat_get_release_raw.side_effect = [ + fatcat_openapi_client.ReleaseEntity( + state="active", + ident="rrrrrrrrrrrrrrrrrrrrrrrrrr", + ext_ids=fatcat_openapi_client.ReleaseExtIds(), + files=[ + fatcat_openapi_client.FileEntity( + ident="ffffffffffffffffffffffffff", + urls=[ + fatcat_openapi_client.FileUrl( + rel="web", url="https://blarg.example.com", + ), + fatcat_openapi_client.FileUrl( + rel="webarchive", + url="https://web.archive.org/web/12345/https://example.com", + ), + fatcat_openapi_client.FileUrl( + rel="archive", + url="https://archive.org/download/some/thing.pdf", + ), + ], + ), + ], + ) + ] * 4 + + # redirects should work after API lookup, for both wayback and archive.org + rv = client.get( + "/work/2x5qvct2dnhrbctqa2q2uyut6a/access/wayback/https://example.com", + allow_redirects=False, + ) + assert rv.status_code == 302 + assert ( + rv.headers["Location"] + == "https://web.archive.org/web/12345id_/https://example.com" + ) + + rv = client.get( + "/work/2x5qvct2dnhrbctqa2q2uyut6a/access/ia_file/some/thing.pdf", + allow_redirects=False, + ) + assert rv.status_code == 302 + assert rv.headers["Location"] == "https://archive.org/download/some/thing.pdf" + + # wrong URLs should still not work, but display a page with helpful links + rv = client.get( + "/work/2x5qvct2dnhrbctqa2q2uyut6a/access/wayback/https://www.federalreserve.gov/econresdata/feds/2015/files/2015118pap.pdf.DUMMY", + allow_redirects=False, + ) + assert rv.status_code == 404 + assert b"Access Location Not Found" in rv.content + assert b"web.archive.org/web/*/https://www.federalreserve.gov" in rv.content + + rv = client.get( + "/work/2x5qvct2dnhrbctqa2q2uyut6a/access/ia_file/some/thing.else.pdf", + allow_redirects=False, + ) + assert rv.status_code == 404 + assert b"Access Location Not Found" in rv.content + assert b"archive.org/download/some/thing.else.pdf" in rv.content + + def test_access_redirect_encoding(client: Any, mocker: Any) -> None: with open("tests/files/elastic_get_work_a6gvpil4brdgzhqyaog3ftngqe.json") as f: |