aboutsummaryrefslogtreecommitdiffstats
path: root/fatcat_scholar/web_hacks.py
diff options
context:
space:
mode:
Diffstat (limited to 'fatcat_scholar/web_hacks.py')
-rw-r--r--fatcat_scholar/web_hacks.py179
1 files changed, 179 insertions, 0 deletions
diff --git a/fatcat_scholar/web_hacks.py b/fatcat_scholar/web_hacks.py
new file mode 100644
index 0000000..2be90f0
--- /dev/null
+++ b/fatcat_scholar/web_hacks.py
@@ -0,0 +1,179 @@
+import typing
+
+import jinja2
+from starlette.background import BackgroundTask
+from starlette.templating import _TemplateResponse
+
+
+class Jinja2Templates:
+ """
+ This is a patched version of starlette.templating.Jinja2Templates that
+ supports extensions (list of strings) passed to jinja2.Environment
+ """
+
+ def __init__(self, directory: str, extensions: typing.List[str] = []) -> None:
+ assert jinja2 is not None, "jinja2 must be installed to use Jinja2Templates"
+ self.env = self.get_env(directory, extensions)
+
+ def get_env(
+ self, directory: str, extensions: typing.List[str] = []
+ ) -> "jinja2.Environment":
+ @jinja2.pass_context
+ def url_for(context: dict, name: str, **path_params: typing.Any) -> str:
+ request = context["request"]
+ return request.url_for(name, **path_params)
+
+ loader = jinja2.FileSystemLoader(directory)
+ env = jinja2.Environment(loader=loader, extensions=extensions, autoescape=True)
+ env.globals["url_for"] = url_for
+ return env
+
+ def get_template(self, name: str) -> "jinja2.Template":
+ return self.env.get_template(name)
+
+ def TemplateResponse(
+ self,
+ name: str,
+ context: dict,
+ status_code: int = 200,
+ headers: dict = None,
+ media_type: str = None,
+ background: BackgroundTask = None,
+ ) -> _TemplateResponse:
+ if "request" not in context:
+ raise ValueError('context must include a "request" key')
+ template = self.get_template(name)
+ return _TemplateResponse(
+ template,
+ context,
+ status_code=status_code,
+ headers=headers,
+ media_type=media_type,
+ background=background,
+ )
+
+
+def parse_accept_lang(header: str, options: typing.List[str]) -> typing.Optional[str]:
+ """
+ Crude HTTP Accept-Language content negotiation.
+ Assumes that languages are specified in order of priority, etc.
+ """
+ if not header:
+ return None
+ chunks = [v.split(";")[0].split("-")[0].split("_")[0] for v in header.split(",")]
+ for c in chunks:
+ if len(c) == 2 and c in options:
+ return c
+ return None
+
+
+def test_parse_accept_lang() -> None:
+ assert parse_accept_lang("", []) is None
+ assert parse_accept_lang("en,de", []) is None
+ assert parse_accept_lang("en,de", ["en"]) == "en"
+ assert parse_accept_lang("en-GB,de", ["en"]) == "en"
+ assert parse_accept_lang("zh_Hans_CN", ["en", "zh"]) == "zh"
+ assert parse_accept_lang("en,de", ["de"]) == "de"
+ assert (
+ parse_accept_lang("en-ca,en;q=0.8,en-us;q=0.6,de-de;q=0.4,de;q=0.2", ["de"])
+ == "de"
+ )
+ assert (
+ parse_accept_lang(
+ "en-ca,en;q=0.8,en-us;q=0.6,de-de;q=0.4,de;q=0.2", ["en", "de"]
+ )
+ == "en"
+ )
+ assert (
+ parse_accept_lang("en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7", ["zh", "en", "de"])
+ == "en"
+ )
+
+
+def wayback_direct_url(url: str) -> str:
+ """
+ Re-writes a wayback replay URL to add the 'id_' suffix (or equivalent for direct file access)
+ """
+ if "://web.archive.org" not in url:
+ return url
+ segments = url.split("/")
+ if len(segments) < 6 or not segments[4].isdigit():
+ return url
+ segments[4] += "id_"
+ return "/".join(segments)
+
+
+def test_wayback_direct_url() -> None:
+ assert (
+ wayback_direct_url("http://fatcat.wiki/thing.pdf")
+ == "http://fatcat.wiki/thing.pdf"
+ )
+ assert (
+ wayback_direct_url("https://web.archive.org/web/*/http://fatcat.wiki/thing.pdf")
+ == "https://web.archive.org/web/*/http://fatcat.wiki/thing.pdf"
+ )
+ assert (
+ wayback_direct_url(
+ "https://web.archive.org/web/1234/http://fatcat.wiki/thing.pdf"
+ )
+ == "https://web.archive.org/web/1234id_/http://fatcat.wiki/thing.pdf"
+ )
+ assert (
+ wayback_direct_url(
+ "https://web.archive.org/web/20170811115414/http://sudjms.net/issues/5-4/pdf/8)A%20comparison%20study%20of%20histochemical%20staining%20of%20various%20tissues%20after.pdf"
+ )
+ == "https://web.archive.org/web/20170811115414id_/http://sudjms.net/issues/5-4/pdf/8)A%20comparison%20study%20of%20histochemical%20staining%20of%20various%20tissues%20after.pdf"
+ )
+
+
+def make_access_redirect_url(work_ident: str, access_type: str, access_url: str) -> str:
+ if access_type == "wayback" and "://web.archive.org/" in access_url:
+ segments = access_url.split("/")
+ original_url = "/".join(segments[5:])
+ return f"https://scholar.archive.org/work/{work_ident}/access/wayback/{original_url}"
+ elif access_type == "ia_file" and "://archive.org/download/" in access_url:
+ suffix = "/".join(access_url.split("/")[4:])
+ return f"https://scholar.archive.org/work/{work_ident}/access/ia_file/{suffix}"
+ else:
+ return access_url
+
+
+def test_make_access_redirect_url() -> None:
+ assert (
+ make_access_redirect_url(
+ "lmobci36t5aelogzjsazuwxpie",
+ "wayback",
+ "https://web.archive.org/web/1234/http://fatcat.wiki/thing.pdf",
+ )
+ == "https://scholar.archive.org/work/lmobci36t5aelogzjsazuwxpie/access/wayback/http://fatcat.wiki/thing.pdf"
+ )
+ assert (
+ make_access_redirect_url(
+ "lmobci36t5aelogzjsazuwxpie",
+ "wayback",
+ "https://web.archive.org/web/1234/http://fatcat.wiki/thing.pdf?param=asdf",
+ )
+ == "https://scholar.archive.org/work/lmobci36t5aelogzjsazuwxpie/access/wayback/http://fatcat.wiki/thing.pdf?param=asdf"
+ )
+ assert (
+ make_access_redirect_url(
+ "lmobci36t5aelogzjsazuwxpie",
+ "ia_file",
+ "https://archive.org/download/something/file.pdf",
+ )
+ == "https://scholar.archive.org/work/lmobci36t5aelogzjsazuwxpie/access/ia_file/something/file.pdf"
+ )
+ assert (
+ make_access_redirect_url(
+ "lmobci36t5aelogzjsazuwxpie", "blah", "https://mit.edu/file.pdf"
+ )
+ == "https://mit.edu/file.pdf"
+ )
+ assert (
+ make_access_redirect_url(
+ "lmobci36t5aelogzjsazuwxpie",
+ "wayback",
+ "https://web.archive.org/web/20170811115414/http://sudjms.net/issues/5-4/pdf/8)A%20comparison%20study%20of%20histochemical%20staining%20of%20various%20tissues%20after.pdf",
+ )
+ == "https://scholar.archive.org/work/lmobci36t5aelogzjsazuwxpie/access/wayback/http://sudjms.net/issues/5-4/pdf/8)A%20comparison%20study%20of%20histochemical%20staining%20of%20various%20tissues%20after.pdf"
+ )