diff options
-rw-r--r-- | fatcat_scholar/hacks.py | 13 | ||||
-rw-r--r-- | fatcat_scholar/web.py | 7 | ||||
-rw-r--r-- | tests/test_web.py | 11 |
3 files changed, 30 insertions, 1 deletions
diff --git a/fatcat_scholar/hacks.py b/fatcat_scholar/hacks.py index 1b53e01..0f16fc7 100644 --- a/fatcat_scholar/hacks.py +++ b/fatcat_scholar/hacks.py @@ -118,6 +118,12 @@ def test_wayback_direct_url() -> None: ) == "https://web.archive.org/web/1234id_/http://fatcat.wiki/thing.pdf" ) + assert ( + wayback_direct_url( + "https://web.archive.org/web/20170811115414/http://sudjms.net/issues/5-4/pdf/8)A%20comparison%20study%20of%20histochemical%20staining%20of%20various%20tissues%20after.pdf" + ) + == "https://web.archive.org/web/20170811115414id_/http://sudjms.net/issues/5-4/pdf/8)A%20comparison%20study%20of%20histochemical%20staining%20of%20various%20tissues%20after.pdf" + ) def make_access_redirect_url(access_type: str, access_url: str) -> str: @@ -157,3 +163,10 @@ def test_make_access_redirect_url() -> None: make_access_redirect_url("blah", "https://mit.edu/file.pdf") == "https://mit.edu/file.pdf" ) + assert ( + make_access_redirect_url( + "wayback", + "https://web.archive.org/web/20170811115414/http://sudjms.net/issues/5-4/pdf/8)A%20comparison%20study%20of%20histochemical%20staining%20of%20various%20tissues%20after.pdf", + ) + == "https://scholar.archive.org/access/wayback/20170811115414/http://sudjms.net/issues/5-4/pdf/8)A%20comparison%20study%20of%20histochemical%20staining%20of%20various%20tissues%20after.pdf" + ) diff --git a/fatcat_scholar/web.py b/fatcat_scholar/web.py index e2ac81e..04a1e88 100644 --- a/fatcat_scholar/web.py +++ b/fatcat_scholar/web.py @@ -209,7 +209,12 @@ def access_redirect_pdf(sha1: str = Query(..., min_length=40, max_length=40)) -> ) def access_redirect_wayback(timestamp: int, url: str, request: Request) -> Any: original_url = "/".join(str(request.url).split("/")[6:]) - access_url = f"https://web.archive.org/web/{timestamp}id_/{original_url}" + # the quote() call is necessary because the URL is un-encoded in the path parameter + # see also: https://github.com/encode/starlette/commit/f997938916d20e955478f60406ef9d293236a16d + access_url = urllib.parse.quote( + f"https://web.archive.org/web/{timestamp}id_/{original_url}", + safe=":/%#?=@[]!$&'()*+,;", + ) return RedirectResponse(access_url, status_code=302) diff --git a/tests/test_web.py b/tests/test_web.py index fc7ea14..ee11ee6 100644 --- a/tests/test_web.py +++ b/tests/test_web.py @@ -179,3 +179,14 @@ def test_access_redirects(client: Any, mocker: Any) -> None: rv.headers["Location"] == "https://web.archive.org/web/20170814015956id_/https://epub.uni-regensburg.de/21901/1/lorenz73.pdf" ) + + # spaces ("%20" vs "+") + rv = client.get( + "/access/wayback/20170811115414/http://sudjms.net/issues/5-4/pdf/8)A%20comparison%20study%20of%20histochemical%20staining%20of%20various%20tissues%20after.pdf", + allow_redirects=False, + ) + assert rv.status_code == 302 + assert ( + rv.headers["Location"] + == "https://web.archive.org/web/20170811115414id_/http://sudjms.net/issues/5-4/pdf/8)A%20comparison%20study%20of%20histochemical%20staining%20of%20various%20tissues%20after.pdf" + ) |