1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
|
import typing
import babel.numbers
import babel.support
import jinja2
from starlette.background import BackgroundTask
from starlette.templating import _TemplateResponse
from fatcat_scholar.config import I18N_LANG_OPTIONS, settings
class Jinja2Templates:
"""
This is a patched version of starlette.templating.Jinja2Templates that
supports extensions (list of strings) passed to jinja2.Environment
"""
def __init__(self, directory: str, extensions: typing.List[str] = []) -> None:
assert jinja2 is not None, "jinja2 must be installed to use Jinja2Templates"
self.env = self.get_env(directory, extensions)
def get_env(
self, directory: str, extensions: typing.List[str] = []
) -> "jinja2.Environment":
@jinja2.pass_context
def url_for(context: dict, name: str, **path_params: typing.Any) -> str:
request = context["request"]
return request.url_for(name, **path_params)
loader = jinja2.FileSystemLoader(directory)
env = jinja2.Environment(loader=loader, extensions=extensions, autoescape=True)
env.globals["url_for"] = url_for
return env
def get_template(self, name: str) -> "jinja2.Template":
return self.env.get_template(name)
def TemplateResponse(
self,
name: str,
context: dict,
status_code: int = 200,
headers: dict = None,
media_type: str = None,
background: BackgroundTask = None,
) -> _TemplateResponse:
if "request" not in context:
raise ValueError('context must include a "request" key')
template = self.get_template(name)
return _TemplateResponse(
template,
context,
status_code=status_code,
headers=headers,
media_type=media_type,
background=background,
)
def load_i18n_files() -> typing.Any:
"""
This is a hack to work around lack of per-request translation
(babel/gettext) locale switching in FastAPI and Starlette. Flask (and
presumably others) get around this using global context (eg, in
Flask-Babel).
See related issues:
- https://github.com/encode/starlette/issues/279
- https://github.com/aio-libs/aiohttp-jinja2/issues/187
"""
d = dict()
for lang_opt in I18N_LANG_OPTIONS:
translations = babel.support.Translations.load(
dirname="fatcat_scholar/translations",
locales=[lang_opt],
)
d[lang_opt] = translations
return d
I18N_TRANSLATION_FILES = load_i18n_files()
def locale_gettext(translations: typing.Any) -> typing.Any:
def gt(s): # noqa: ANN001,ANN201
return translations.ugettext(s)
return gt
def locale_ngettext(translations: typing.Any) -> typing.Any:
def ngt(s, p, n): # noqa: ANN001,ANN201
return translations.ungettext(s, p, n)
return ngt
def i18n_templates(locale: str) -> Jinja2Templates:
"""
This is a hack to work around lack of per-request translation
(babel/gettext) locale switching in FastAPI and Starlette. Flask (and
presumably others) get around this using global context (eg, in
Flask-Babel).
The intent is to call this function and create a new Jinja2 Environment for
a specific language separately within a request (aka, not shared between
requests), when needed. This is inefficient but should resolve issues with
cross-request poisoning, both in threading (threadpool) or async
concurrency.
See related issues:
- https://github.com/encode/starlette/issues/279
- https://github.com/aio-libs/aiohttp-jinja2/issues/187
"""
translations = I18N_TRANSLATION_FILES[locale]
templates = Jinja2Templates(
directory="fatcat_scholar/templates",
extensions=["jinja2.ext.i18n", "jinja2.ext.do"],
)
templates.env.install_gettext_translations(translations, newstyle=True) # type: ignore
templates.env.install_gettext_callables( # type: ignore
locale_gettext(translations),
locale_ngettext(translations),
newstyle=True,
)
# remove a lot of whitespace in HTML output with these configs
templates.env.trim_blocks = True
templates.env.lstrip_blocks = True
# pass-through application settings to be available in templates
templates.env.globals["settings"] = settings
templates.env.globals["babel_numbers"] = babel.numbers
templates.env.globals["make_access_redirect_url"] = make_access_redirect_url
return templates
def parse_accept_lang(header: str, options: typing.List[str]) -> typing.Optional[str]:
"""
Crude HTTP Accept-Language content negotiation.
Assumes that languages are specified in order of priority, etc.
"""
if not header:
return None
chunks = [v.split(";")[0].split("-")[0].split("_")[0] for v in header.split(",")]
for c in chunks:
if len(c) == 2 and c in options:
return c
return None
def test_parse_accept_lang() -> None:
assert parse_accept_lang("", []) is None
assert parse_accept_lang("en,de", []) is None
assert parse_accept_lang("en,de", ["en"]) == "en"
assert parse_accept_lang("en-GB,de", ["en"]) == "en"
assert parse_accept_lang("zh_Hans_CN", ["en", "zh"]) == "zh"
assert parse_accept_lang("en,de", ["de"]) == "de"
assert (
parse_accept_lang("en-ca,en;q=0.8,en-us;q=0.6,de-de;q=0.4,de;q=0.2", ["de"])
== "de"
)
assert (
parse_accept_lang(
"en-ca,en;q=0.8,en-us;q=0.6,de-de;q=0.4,de;q=0.2", ["en", "de"]
)
== "en"
)
assert (
parse_accept_lang("en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7", ["zh", "en", "de"])
== "en"
)
def wayback_direct_url(url: str) -> str:
"""
Re-writes a wayback replay URL to add the 'id_' suffix (or equivalent for direct file access)
"""
if "://web.archive.org" not in url:
return url
segments = url.split("/")
if len(segments) < 6 or not segments[4].isdigit():
return url
segments[4] += "id_"
return "/".join(segments)
def test_wayback_direct_url() -> None:
assert (
wayback_direct_url("http://fatcat.wiki/thing.pdf")
== "http://fatcat.wiki/thing.pdf"
)
assert (
wayback_direct_url("https://web.archive.org/web/*/http://fatcat.wiki/thing.pdf")
== "https://web.archive.org/web/*/http://fatcat.wiki/thing.pdf"
)
assert (
wayback_direct_url(
"https://web.archive.org/web/1234/http://fatcat.wiki/thing.pdf"
)
== "https://web.archive.org/web/1234id_/http://fatcat.wiki/thing.pdf"
)
assert (
wayback_direct_url(
"https://web.archive.org/web/20170811115414/http://sudjms.net/issues/5-4/pdf/8)A%20comparison%20study%20of%20histochemical%20staining%20of%20various%20tissues%20after.pdf"
)
== "https://web.archive.org/web/20170811115414id_/http://sudjms.net/issues/5-4/pdf/8)A%20comparison%20study%20of%20histochemical%20staining%20of%20various%20tissues%20after.pdf"
)
def make_access_redirect_url(work_ident: str, access_type: str, access_url: str) -> str:
if access_type == "wayback" and "://web.archive.org/" in access_url:
segments = access_url.split("/")
original_url = "/".join(segments[5:])
return f"https://scholar.archive.org/work/{work_ident}/access/wayback/{original_url}"
elif access_type == "ia_file" and "://archive.org/download/" in access_url:
suffix = "/".join(access_url.split("/")[4:])
return f"https://scholar.archive.org/work/{work_ident}/access/ia_file/{suffix}"
else:
return access_url
def test_make_access_redirect_url() -> None:
assert (
make_access_redirect_url(
"lmobci36t5aelogzjsazuwxpie",
"wayback",
"https://web.archive.org/web/1234/http://fatcat.wiki/thing.pdf",
)
== "https://scholar.archive.org/work/lmobci36t5aelogzjsazuwxpie/access/wayback/http://fatcat.wiki/thing.pdf"
)
assert (
make_access_redirect_url(
"lmobci36t5aelogzjsazuwxpie",
"wayback",
"https://web.archive.org/web/1234/http://fatcat.wiki/thing.pdf?param=asdf",
)
== "https://scholar.archive.org/work/lmobci36t5aelogzjsazuwxpie/access/wayback/http://fatcat.wiki/thing.pdf?param=asdf"
)
assert (
make_access_redirect_url(
"lmobci36t5aelogzjsazuwxpie",
"ia_file",
"https://archive.org/download/something/file.pdf",
)
== "https://scholar.archive.org/work/lmobci36t5aelogzjsazuwxpie/access/ia_file/something/file.pdf"
)
assert (
make_access_redirect_url(
"lmobci36t5aelogzjsazuwxpie", "blah", "https://mit.edu/file.pdf"
)
== "https://mit.edu/file.pdf"
)
assert (
make_access_redirect_url(
"lmobci36t5aelogzjsazuwxpie",
"wayback",
"https://web.archive.org/web/20170811115414/http://sudjms.net/issues/5-4/pdf/8)A%20comparison%20study%20of%20histochemical%20staining%20of%20various%20tissues%20after.pdf",
)
== "https://scholar.archive.org/work/lmobci36t5aelogzjsazuwxpie/access/wayback/http://sudjms.net/issues/5-4/pdf/8)A%20comparison%20study%20of%20histochemical%20staining%20of%20various%20tissues%20after.pdf"
)
|