aboutsummaryrefslogtreecommitdiffstats
path: root/tests/test_web.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-07-26 20:52:56 -0700
committerBryan Newbold <bnewbold@archive.org>2021-07-26 20:52:58 -0700
commit7ebcdfebdf4e1c8b69026a24cafe500c67dbc384 (patch)
treeab580d4307acc5d10b5a8669d28ac6a6ec6e81e5 /tests/test_web.py
parenteeb456c16d016d8523023f787597efae7a6317b9 (diff)
downloadfatcat-scholar-7ebcdfebdf4e1c8b69026a24cafe500c67dbc384.tar.gz
fatcat-scholar-7ebcdfebdf4e1c8b69026a24cafe500c67dbc384.zip
web: access_redirect_fallback mechanism
This adds a helper code path that "tries harder" to find an access link, by querying the fatcat API directly to look for any file from any release associated with the work. If it finds a match, it does the redirect as usual (but does log the incident). If no match can be found, there is now a more helpful access-specific 404 error page. If the *work* is a 404, the generic error page is shown.
Diffstat (limited to 'tests/test_web.py')
-rw-r--r--tests/test_web.py103
1 files changed, 102 insertions, 1 deletions
diff --git a/tests/test_web.py b/tests/test_web.py
index 7f1f72a..d9cfab6 100644
--- a/tests/test_web.py
+++ b/tests/test_web.py
@@ -3,6 +3,7 @@ from typing import Any
import pytest
from fastapi.testclient import TestClient
+import fatcat_openapi_client
from fatcat_scholar.web import app
@@ -148,7 +149,11 @@ def test_basic_access_redirect(client: Any, mocker: Any) -> None:
== "https://web.archive.org/web/20200206164725id_/https://www.federalreserve.gov/econresdata/feds/2015/files/2015118pap.pdf"
)
- # check that URL is validated
+ # check that URL is validated (force fatcat API fallback to fail)
+ fatcat_api_raw = mocker.patch("fatcat_openapi_client.ApiClient.call_api")
+ fatcat_api_raw.side_effect = [
+ fatcat_openapi_client.ApiException(status=404, reason="dummy")
+ ]
rv = client.get(
"/work/2x5qvct2dnhrbctqa2q2uyut6a/access/wayback/https://www.federalreserve.gov/econresdata/feds/2015/files/2015118pap.pdf.DUMMY",
allow_redirects=False,
@@ -156,6 +161,102 @@ def test_basic_access_redirect(client: Any, mocker: Any) -> None:
assert rv.status_code == 404
+def test_access_redirect_fallback(client: Any, mocker: Any) -> None:
+
+ with open("tests/files/elastic_fulltext_get.json") as f:
+ elastic_resp = json.loads(f.read())
+
+ es_raw = mocker.patch(
+ "elasticsearch.connection.Urllib3HttpConnection.perform_request"
+ )
+ es_raw.side_effect = [
+ (200, {}, json.dumps(elastic_resp)),
+ (200, {}, json.dumps(elastic_resp)),
+ (200, {}, json.dumps(elastic_resp)),
+ (200, {}, json.dumps(elastic_resp)),
+ ]
+ fatcat_get_work_raw = mocker.patch("fatcat_openapi_client.DefaultApi.get_work")
+ fatcat_get_work_raw.side_effect = [
+ fatcat_openapi_client.WorkEntity(
+ state="active", ident="wwwwwwwwwwwwwwwwwwwwwwwwww",
+ )
+ ] * 4
+ fatcat_get_work_releases_raw = mocker.patch(
+ "fatcat_openapi_client.DefaultApi.get_work_releases"
+ )
+ fatcat_get_work_releases_raw.side_effect = [
+ [
+ fatcat_openapi_client.ReleaseEntity(
+ ident="rrrrrrrrrrrrrrrrrrrrrrrrrr",
+ ext_ids=fatcat_openapi_client.ReleaseExtIds(),
+ ),
+ ]
+ ] * 4
+ fatcat_get_release_raw = mocker.patch(
+ "fatcat_openapi_client.DefaultApi.get_release"
+ )
+ fatcat_get_release_raw.side_effect = [
+ fatcat_openapi_client.ReleaseEntity(
+ state="active",
+ ident="rrrrrrrrrrrrrrrrrrrrrrrrrr",
+ ext_ids=fatcat_openapi_client.ReleaseExtIds(),
+ files=[
+ fatcat_openapi_client.FileEntity(
+ ident="ffffffffffffffffffffffffff",
+ urls=[
+ fatcat_openapi_client.FileUrl(
+ rel="web", url="https://blarg.example.com",
+ ),
+ fatcat_openapi_client.FileUrl(
+ rel="webarchive",
+ url="https://web.archive.org/web/12345/https://example.com",
+ ),
+ fatcat_openapi_client.FileUrl(
+ rel="archive",
+ url="https://archive.org/download/some/thing.pdf",
+ ),
+ ],
+ ),
+ ],
+ )
+ ] * 4
+
+ # redirects should work after API lookup, for both wayback and archive.org
+ rv = client.get(
+ "/work/2x5qvct2dnhrbctqa2q2uyut6a/access/wayback/https://example.com",
+ allow_redirects=False,
+ )
+ assert rv.status_code == 302
+ assert (
+ rv.headers["Location"]
+ == "https://web.archive.org/web/12345id_/https://example.com"
+ )
+
+ rv = client.get(
+ "/work/2x5qvct2dnhrbctqa2q2uyut6a/access/ia_file/some/thing.pdf",
+ allow_redirects=False,
+ )
+ assert rv.status_code == 302
+ assert rv.headers["Location"] == "https://archive.org/download/some/thing.pdf"
+
+ # wrong URLs should still not work, but display a page with helpful links
+ rv = client.get(
+ "/work/2x5qvct2dnhrbctqa2q2uyut6a/access/wayback/https://www.federalreserve.gov/econresdata/feds/2015/files/2015118pap.pdf.DUMMY",
+ allow_redirects=False,
+ )
+ assert rv.status_code == 404
+ assert b"Access Location Not Found" in rv.content
+ assert b"web.archive.org/web/*/https://www.federalreserve.gov" in rv.content
+
+ rv = client.get(
+ "/work/2x5qvct2dnhrbctqa2q2uyut6a/access/ia_file/some/thing.else.pdf",
+ allow_redirects=False,
+ )
+ assert rv.status_code == 404
+ assert b"Access Location Not Found" in rv.content
+ assert b"archive.org/download/some/thing.else.pdf" in rv.content
+
+
def test_access_redirect_encoding(client: Any, mocker: Any) -> None:
with open("tests/files/elastic_get_work_a6gvpil4brdgzhqyaog3ftngqe.json") as f: