summaryrefslogtreecommitdiffstats
path: root/fatcat_scholar/hacks.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2022-08-12 11:56:57 -0700
committerBryan Newbold <bnewbold@archive.org>2022-08-12 11:56:57 -0700
commitc129824ddee29d130eedfaa47468365cf6825740 (patch)
tree66240b0aca88d62b4a2d54d06c0d3d4bde6b030c /fatcat_scholar/hacks.py
parent19670b4c2956f6289efe00b8dcff40fcb3d696ae (diff)
downloadfatcat-scholar-c129824ddee29d130eedfaa47468365cf6825740.tar.gz
fatcat-scholar-c129824ddee29d130eedfaa47468365cf6825740.zip
rename fatcat_scholar.hacks to fatcat_scholar.web_hacks
Diffstat (limited to 'fatcat_scholar/hacks.py')
-rw-r--r--fatcat_scholar/hacks.py179
1 files changed, 0 insertions, 179 deletions
diff --git a/fatcat_scholar/hacks.py b/fatcat_scholar/hacks.py
deleted file mode 100644
index 2be90f0..0000000
--- a/fatcat_scholar/hacks.py
+++ /dev/null
@@ -1,179 +0,0 @@
-import typing
-
-import jinja2
-from starlette.background import BackgroundTask
-from starlette.templating import _TemplateResponse
-
-
-class Jinja2Templates:
- """
- This is a patched version of starlette.templating.Jinja2Templates that
- supports extensions (list of strings) passed to jinja2.Environment
- """
-
- def __init__(self, directory: str, extensions: typing.List[str] = []) -> None:
- assert jinja2 is not None, "jinja2 must be installed to use Jinja2Templates"
- self.env = self.get_env(directory, extensions)
-
- def get_env(
- self, directory: str, extensions: typing.List[str] = []
- ) -> "jinja2.Environment":
- @jinja2.pass_context
- def url_for(context: dict, name: str, **path_params: typing.Any) -> str:
- request = context["request"]
- return request.url_for(name, **path_params)
-
- loader = jinja2.FileSystemLoader(directory)
- env = jinja2.Environment(loader=loader, extensions=extensions, autoescape=True)
- env.globals["url_for"] = url_for
- return env
-
- def get_template(self, name: str) -> "jinja2.Template":
- return self.env.get_template(name)
-
- def TemplateResponse(
- self,
- name: str,
- context: dict,
- status_code: int = 200,
- headers: dict = None,
- media_type: str = None,
- background: BackgroundTask = None,
- ) -> _TemplateResponse:
- if "request" not in context:
- raise ValueError('context must include a "request" key')
- template = self.get_template(name)
- return _TemplateResponse(
- template,
- context,
- status_code=status_code,
- headers=headers,
- media_type=media_type,
- background=background,
- )
-
-
-def parse_accept_lang(header: str, options: typing.List[str]) -> typing.Optional[str]:
- """
- Crude HTTP Accept-Language content negotiation.
- Assumes that languages are specified in order of priority, etc.
- """
- if not header:
- return None
- chunks = [v.split(";")[0].split("-")[0].split("_")[0] for v in header.split(",")]
- for c in chunks:
- if len(c) == 2 and c in options:
- return c
- return None
-
-
-def test_parse_accept_lang() -> None:
- assert parse_accept_lang("", []) is None
- assert parse_accept_lang("en,de", []) is None
- assert parse_accept_lang("en,de", ["en"]) == "en"
- assert parse_accept_lang("en-GB,de", ["en"]) == "en"
- assert parse_accept_lang("zh_Hans_CN", ["en", "zh"]) == "zh"
- assert parse_accept_lang("en,de", ["de"]) == "de"
- assert (
- parse_accept_lang("en-ca,en;q=0.8,en-us;q=0.6,de-de;q=0.4,de;q=0.2", ["de"])
- == "de"
- )
- assert (
- parse_accept_lang(
- "en-ca,en;q=0.8,en-us;q=0.6,de-de;q=0.4,de;q=0.2", ["en", "de"]
- )
- == "en"
- )
- assert (
- parse_accept_lang("en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7", ["zh", "en", "de"])
- == "en"
- )
-
-
-def wayback_direct_url(url: str) -> str:
- """
- Re-writes a wayback replay URL to add the 'id_' suffix (or equivalent for direct file access)
- """
- if "://web.archive.org" not in url:
- return url
- segments = url.split("/")
- if len(segments) < 6 or not segments[4].isdigit():
- return url
- segments[4] += "id_"
- return "/".join(segments)
-
-
-def test_wayback_direct_url() -> None:
- assert (
- wayback_direct_url("http://fatcat.wiki/thing.pdf")
- == "http://fatcat.wiki/thing.pdf"
- )
- assert (
- wayback_direct_url("https://web.archive.org/web/*/http://fatcat.wiki/thing.pdf")
- == "https://web.archive.org/web/*/http://fatcat.wiki/thing.pdf"
- )
- assert (
- wayback_direct_url(
- "https://web.archive.org/web/1234/http://fatcat.wiki/thing.pdf"
- )
- == "https://web.archive.org/web/1234id_/http://fatcat.wiki/thing.pdf"
- )
- assert (
- wayback_direct_url(
- "https://web.archive.org/web/20170811115414/http://sudjms.net/issues/5-4/pdf/8)A%20comparison%20study%20of%20histochemical%20staining%20of%20various%20tissues%20after.pdf"
- )
- == "https://web.archive.org/web/20170811115414id_/http://sudjms.net/issues/5-4/pdf/8)A%20comparison%20study%20of%20histochemical%20staining%20of%20various%20tissues%20after.pdf"
- )
-
-
-def make_access_redirect_url(work_ident: str, access_type: str, access_url: str) -> str:
- if access_type == "wayback" and "://web.archive.org/" in access_url:
- segments = access_url.split("/")
- original_url = "/".join(segments[5:])
- return f"https://scholar.archive.org/work/{work_ident}/access/wayback/{original_url}"
- elif access_type == "ia_file" and "://archive.org/download/" in access_url:
- suffix = "/".join(access_url.split("/")[4:])
- return f"https://scholar.archive.org/work/{work_ident}/access/ia_file/{suffix}"
- else:
- return access_url
-
-
-def test_make_access_redirect_url() -> None:
- assert (
- make_access_redirect_url(
- "lmobci36t5aelogzjsazuwxpie",
- "wayback",
- "https://web.archive.org/web/1234/http://fatcat.wiki/thing.pdf",
- )
- == "https://scholar.archive.org/work/lmobci36t5aelogzjsazuwxpie/access/wayback/http://fatcat.wiki/thing.pdf"
- )
- assert (
- make_access_redirect_url(
- "lmobci36t5aelogzjsazuwxpie",
- "wayback",
- "https://web.archive.org/web/1234/http://fatcat.wiki/thing.pdf?param=asdf",
- )
- == "https://scholar.archive.org/work/lmobci36t5aelogzjsazuwxpie/access/wayback/http://fatcat.wiki/thing.pdf?param=asdf"
- )
- assert (
- make_access_redirect_url(
- "lmobci36t5aelogzjsazuwxpie",
- "ia_file",
- "https://archive.org/download/something/file.pdf",
- )
- == "https://scholar.archive.org/work/lmobci36t5aelogzjsazuwxpie/access/ia_file/something/file.pdf"
- )
- assert (
- make_access_redirect_url(
- "lmobci36t5aelogzjsazuwxpie", "blah", "https://mit.edu/file.pdf"
- )
- == "https://mit.edu/file.pdf"
- )
- assert (
- make_access_redirect_url(
- "lmobci36t5aelogzjsazuwxpie",
- "wayback",
- "https://web.archive.org/web/20170811115414/http://sudjms.net/issues/5-4/pdf/8)A%20comparison%20study%20of%20histochemical%20staining%20of%20various%20tissues%20after.pdf",
- )
- == "https://scholar.archive.org/work/lmobci36t5aelogzjsazuwxpie/access/wayback/http://sudjms.net/issues/5-4/pdf/8)A%20comparison%20study%20of%20histochemical%20staining%20of%20various%20tissues%20after.pdf"
- )