aboutsummaryrefslogtreecommitdiffstats
path: root/fatcat_scholar/web.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-05-17 21:04:29 -0700
committerBryan Newbold <bnewbold@archive.org>2021-05-17 21:04:29 -0700
commitf767a344c1ed7722b79710c6f3c61d5802f78860 (patch)
tree8a4779f4c83687965a80127316274967f14f4685 /fatcat_scholar/web.py
parentf4ffc6863ec7d08a195cb8cb5370a153d093454e (diff)
downloadfatcat-scholar-f767a344c1ed7722b79710c6f3c61d5802f78860.tar.gz
fatcat-scholar-f767a344c1ed7722b79710c6f3c61d5802f78860.zip
iterate on PDF redirect links
Diffstat (limited to 'fatcat_scholar/web.py')
-rw-r--r--fatcat_scholar/web.py65
1 files changed, 34 insertions, 31 deletions
diff --git a/fatcat_scholar/web.py b/fatcat_scholar/web.py
index 3c8b8f3..f2ef331 100644
--- a/fatcat_scholar/web.py
+++ b/fatcat_scholar/web.py
@@ -5,6 +5,7 @@ So far there are few endpoints, so we just put them all here!
"""
import logging
+import urllib.parse
from typing import Optional, Any, List, Dict
from pydantic import BaseModel
@@ -25,7 +26,12 @@ from starlette_prometheus import metrics, PrometheusMiddleware
from starlette.exceptions import HTTPException as StarletteHTTPException
from fatcat_scholar.config import settings, GIT_REVISION
-from fatcat_scholar.hacks import Jinja2Templates, parse_accept_lang
+from fatcat_scholar.hacks import (
+ Jinja2Templates,
+ parse_accept_lang,
+ wayback_direct_url,
+ make_access_redirect_url,
+)
from fatcat_scholar.search import (
process_query,
FulltextQuery,
@@ -177,42 +183,15 @@ def get_work(work_ident: str = Query(..., min_length=20, max_length=20)) -> dict
return doc
-def wayback_direct_url(url: str) -> str:
- """
- Re-writes a wayback replay URL to add the 'id_' suffix (or equivalent for direct file access)
- """
- if not "://web.archive.org" in url:
- return url
- segments = url.split("/")
- if len(segments) < 6 or not segments[4].isdigit():
- return url
- segments[4] += "id_"
- return "/".join(segments)
-
-
-def test_wayback_direct_url() -> None:
- assert (
- wayback_direct_url("http://fatcat.wiki/thing.pdf")
- == "http://fatcat.wiki/thing.pdf"
- )
- assert (
- wayback_direct_url("https://web.archive.org/web/*/http://fatcat.wiki/thing.pdf")
- == "https://web.archive.org/web/*/http://fatcat.wiki/thing.pdf"
- )
- assert (
- wayback_direct_url(
- "https://web.archive.org/web/1234/http://fatcat.wiki/thing.pdf"
- )
- == "https://web.archive.org/web/1234id_/http://fatcat.wiki/thing.pdf"
- )
-
-
@api.get(
"/access-redirect/{sha1}.pdf",
operation_id="access_redirect_pdf",
include_in_schema=False,
)
def access_redirect_pdf(sha1: str = Query(..., min_length=40, max_length=40)) -> Any:
+ """
+ NOTE: DEPRECATED
+ """
fulltext = lookup_fulltext_pdf(sha1)
if not fulltext or not fulltext.access_url:
raise HTTPException(status_code=404, detail="PDF file not found")
@@ -222,6 +201,28 @@ def access_redirect_pdf(sha1: str = Query(..., min_length=40, max_length=40)) ->
return RedirectResponse(access_url, status_code=302)
+@api.get(
+ "/access/wayback/{timestamp}/{url:path}",
+ operation_id="access_redirect_wayback",
+ include_in_schema=False,
+)
+def access_redirect_wayback(timestamp: int, url: str, request: Request) -> Any:
+ original_url = "/".join(str(request.url).split("/")[6:])
+ access_url = f"https://web.archive.org/web/{timestamp}id_/{original_url}"
+ return RedirectResponse(access_url, status_code=302)
+
+
+@api.get(
+ "/access/ia_file/{item}/{file_path:path}",
+ operation_id="access_redirect_ia_file",
+ include_in_schema=False,
+)
+def access_redirect_ia_file(item: str, file_path: str, request: Request) -> Any:
+ original_path = urllib.parse.quote("/".join(str(request.url).split("/")[6:]))
+ access_url = f"https://archive.org/download/{item}/{original_path}"
+ return RedirectResponse(access_url, status_code=302)
+
+
web = APIRouter()
@@ -270,6 +271,7 @@ def load_i18n_templates() -> Any:
# pass-through application settings to be available in templates
templates.env.globals["settings"] = settings
templates.env.globals["babel_numbers"] = babel.numbers
+ templates.env.globals["make_access_redirect_url"] = make_access_redirect_url
d[lang_opt] = templates
return d
@@ -414,6 +416,7 @@ async def favicon() -> Any:
"fatcat_scholar/static/ia-favicon.ico", media_type="image/x-icon"
)
+
@app.get("/sitemap.xml", include_in_schema=False)
async def basic_sitemap() -> Any:
return FileResponse(