summaryrefslogtreecommitdiffstats
path: root/fatcat_scholar
diff options
context:
space:
mode:
Diffstat (limited to 'fatcat_scholar')
-rw-r--r--fatcat_scholar/hacks.py13
-rw-r--r--fatcat_scholar/web.py7
2 files changed, 19 insertions, 1 deletions
diff --git a/fatcat_scholar/hacks.py b/fatcat_scholar/hacks.py
index 1b53e01..0f16fc7 100644
--- a/fatcat_scholar/hacks.py
+++ b/fatcat_scholar/hacks.py
@@ -118,6 +118,12 @@ def test_wayback_direct_url() -> None:
)
== "https://web.archive.org/web/1234id_/http://fatcat.wiki/thing.pdf"
)
+ assert (
+ wayback_direct_url(
+ "https://web.archive.org/web/20170811115414/http://sudjms.net/issues/5-4/pdf/8)A%20comparison%20study%20of%20histochemical%20staining%20of%20various%20tissues%20after.pdf"
+ )
+ == "https://web.archive.org/web/20170811115414id_/http://sudjms.net/issues/5-4/pdf/8)A%20comparison%20study%20of%20histochemical%20staining%20of%20various%20tissues%20after.pdf"
+ )
def make_access_redirect_url(access_type: str, access_url: str) -> str:
@@ -157,3 +163,10 @@ def test_make_access_redirect_url() -> None:
make_access_redirect_url("blah", "https://mit.edu/file.pdf")
== "https://mit.edu/file.pdf"
)
+ assert (
+ make_access_redirect_url(
+ "wayback",
+ "https://web.archive.org/web/20170811115414/http://sudjms.net/issues/5-4/pdf/8)A%20comparison%20study%20of%20histochemical%20staining%20of%20various%20tissues%20after.pdf",
+ )
+ == "https://scholar.archive.org/access/wayback/20170811115414/http://sudjms.net/issues/5-4/pdf/8)A%20comparison%20study%20of%20histochemical%20staining%20of%20various%20tissues%20after.pdf"
+ )
diff --git a/fatcat_scholar/web.py b/fatcat_scholar/web.py
index e2ac81e..04a1e88 100644
--- a/fatcat_scholar/web.py
+++ b/fatcat_scholar/web.py
@@ -209,7 +209,12 @@ def access_redirect_pdf(sha1: str = Query(..., min_length=40, max_length=40)) ->
)
def access_redirect_wayback(timestamp: int, url: str, request: Request) -> Any:
original_url = "/".join(str(request.url).split("/")[6:])
- access_url = f"https://web.archive.org/web/{timestamp}id_/{original_url}"
+ # the quote() call is necessary because the URL is un-encoded in the path parameter
+ # see also: https://github.com/encode/starlette/commit/f997938916d20e955478f60406ef9d293236a16d
+ access_url = urllib.parse.quote(
+ f"https://web.archive.org/web/{timestamp}id_/{original_url}",
+ safe=":/%#?=@[]!$&'()*+,;",
+ )
return RedirectResponse(access_url, status_code=302)