summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-05-19 15:50:06 -0700
committerBryan Newbold <bnewbold@archive.org>2021-05-19 15:50:06 -0700
commit4e452afae432ac718482b62d276bee42adc1e660 (patch)
tree9ea40e6053e3bdce32a8312a14b4dfae0094deec
parentef752720afe63dbcc960777fdb189c216f8a9dc8 (diff)
downloadfatcat-scholar-4e452afae432ac718482b62d276bee42adc1e660.tar.gz
fatcat-scholar-4e452afae432ac718482b62d276bee42adc1e660.zip
web: fixes to access redirect endpoints
-rw-r--r--fatcat_scholar/hacks.py13
-rw-r--r--fatcat_scholar/web.py7
-rw-r--r--tests/test_web.py11
3 files changed, 30 insertions, 1 deletions
diff --git a/fatcat_scholar/hacks.py b/fatcat_scholar/hacks.py
index 1b53e01..0f16fc7 100644
--- a/fatcat_scholar/hacks.py
+++ b/fatcat_scholar/hacks.py
@@ -118,6 +118,12 @@ def test_wayback_direct_url() -> None:
)
== "https://web.archive.org/web/1234id_/http://fatcat.wiki/thing.pdf"
)
+ assert (
+ wayback_direct_url(
+ "https://web.archive.org/web/20170811115414/http://sudjms.net/issues/5-4/pdf/8)A%20comparison%20study%20of%20histochemical%20staining%20of%20various%20tissues%20after.pdf"
+ )
+ == "https://web.archive.org/web/20170811115414id_/http://sudjms.net/issues/5-4/pdf/8)A%20comparison%20study%20of%20histochemical%20staining%20of%20various%20tissues%20after.pdf"
+ )
def make_access_redirect_url(access_type: str, access_url: str) -> str:
@@ -157,3 +163,10 @@ def test_make_access_redirect_url() -> None:
make_access_redirect_url("blah", "https://mit.edu/file.pdf")
== "https://mit.edu/file.pdf"
)
+ assert (
+ make_access_redirect_url(
+ "wayback",
+ "https://web.archive.org/web/20170811115414/http://sudjms.net/issues/5-4/pdf/8)A%20comparison%20study%20of%20histochemical%20staining%20of%20various%20tissues%20after.pdf",
+ )
+ == "https://scholar.archive.org/access/wayback/20170811115414/http://sudjms.net/issues/5-4/pdf/8)A%20comparison%20study%20of%20histochemical%20staining%20of%20various%20tissues%20after.pdf"
+ )
diff --git a/fatcat_scholar/web.py b/fatcat_scholar/web.py
index e2ac81e..04a1e88 100644
--- a/fatcat_scholar/web.py
+++ b/fatcat_scholar/web.py
@@ -209,7 +209,12 @@ def access_redirect_pdf(sha1: str = Query(..., min_length=40, max_length=40)) ->
)
def access_redirect_wayback(timestamp: int, url: str, request: Request) -> Any:
original_url = "/".join(str(request.url).split("/")[6:])
- access_url = f"https://web.archive.org/web/{timestamp}id_/{original_url}"
+ # the quote() call is necessary because the URL is un-encoded in the path parameter
+ # see also: https://github.com/encode/starlette/commit/f997938916d20e955478f60406ef9d293236a16d
+ access_url = urllib.parse.quote(
+ f"https://web.archive.org/web/{timestamp}id_/{original_url}",
+ safe=":/%#?=@[]!$&'()*+,;",
+ )
return RedirectResponse(access_url, status_code=302)
diff --git a/tests/test_web.py b/tests/test_web.py
index fc7ea14..ee11ee6 100644
--- a/tests/test_web.py
+++ b/tests/test_web.py
@@ -179,3 +179,14 @@ def test_access_redirects(client: Any, mocker: Any) -> None:
rv.headers["Location"]
== "https://web.archive.org/web/20170814015956id_/https://epub.uni-regensburg.de/21901/1/lorenz73.pdf"
)
+
+ # spaces ("%20" vs "+")
+ rv = client.get(
+ "/access/wayback/20170811115414/http://sudjms.net/issues/5-4/pdf/8)A%20comparison%20study%20of%20histochemical%20staining%20of%20various%20tissues%20after.pdf",
+ allow_redirects=False,
+ )
+ assert rv.status_code == 302
+ assert (
+ rv.headers["Location"]
+ == "https://web.archive.org/web/20170811115414id_/http://sudjms.net/issues/5-4/pdf/8)A%20comparison%20study%20of%20histochemical%20staining%20of%20various%20tissues%20after.pdf"
+ )