1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
|
# type: ignore
# TODO: we are ignoring mypy type checks on this file because of all the
# 'os.environ' usage, which is causing trouble with newer mypy. Should resolve
# (annotate?) and re-enable type checking here
"""
Default configuration for fatcat web interface (Flask application).
In production, we currently reconfigure these values using environment
variables, not by (eg) deploying a variant copy of this file.
This config is *only* for the web interface, *not* for any of the workers or
import scripts.
"""
import os
import subprocess
from typing import Union
basedir = os.path.abspath(os.path.dirname(__file__))
def bool_str(raw: Union[str, bool, None]) -> bool:
"""
Helper for parsing environment variables
"""
if not raw:
return False
if raw is True:
return True
if isinstance(raw, str):
raw_str = raw.strip()
if raw_str.lower() in ["0", "f", "false", "no", "n"]:
return False
if raw_str.lower() in ["1", "t", "true", "yes", "y"]:
return True
raise ValueError("Unparsable boolean value: {raw}")
def test_bool_str() -> None:
assert bool_str(True) is True
assert bool_str(None) is False
assert bool_str(False) is False
assert bool_str("") is False
assert bool_str("0") is False
assert bool_str("True") is True
assert bool_str("FALSE") is False
def fetch_git_sha() -> str:
"""
Get short commit id, runnable anywhere within a git repository.
"""
return (
subprocess.check_output(["git", "rev-parse", "--short", "HEAD"]).decode("ascii").strip()
)
class Config(object):
GIT_REVISION = (
subprocess.check_output(["git", "describe", "--tags", "--long", "--always"])
.strip()
.decode("utf-8")
)
# This is, effectively, the QA/PROD flag
FATCAT_DOMAIN = os.environ.get("FATCAT_DOMAIN", default="dev.fatcat.wiki")
FATCAT_API_AUTH_TOKEN = os.environ.get("FATCAT_API_AUTH_TOKEN", default=None)
FATCAT_API_HOST = os.environ.get(
"FATCAT_API_HOST", default=f"https://api.{FATCAT_DOMAIN}/v0"
)
public_host_default = f"https://api.{FATCAT_DOMAIN}/v0"
if FATCAT_DOMAIN == "dev.fatcat.wiki":
public_host_default = FATCAT_API_HOST
FATCAT_PUBLIC_API_HOST = os.environ.get(
"FATCAT_PUBLIC_API_HOST", default=public_host_default
)
# can set this to https://search.fatcat.wiki for some experimentation
ELASTICSEARCH_BACKEND = os.environ.get(
"ELASTICSEARCH_BACKEND", default="http://localhost:9200"
)
ELASTICSEARCH_RELEASE_INDEX = os.environ.get(
"ELASTICSEARCH_RELEASE_INDEX", default="fatcat_release"
)
ELASTICSEARCH_CONTAINER_INDEX = os.environ.get(
"ELASTICSEARCH_CONTAINER_INDEX", default="fatcat_container"
)
# for save-paper-now. set to None if not configured, so we don't display forms/links
KAFKA_PIXY_ENDPOINT = os.environ.get("KAFKA_PIXY_ENDPOINT", default=None) or None
KAFKA_SAVEPAPERNOW_TOPIC = os.environ.get(
"KAFKA_SAVEPAPERNOW_TOPIC", default="sandcrawler-dev.ingest-file-requests-priority"
)
# for flask things, like session cookies
FLASK_SECRET_KEY = os.environ.get("FLASK_SECRET_KEY", default=None)
SECRET_KEY = FLASK_SECRET_KEY
ORCID_CLIENT_ID = os.environ.get("ORCID_CLIENT_ID", default=None)
ORCID_CLIENT_SECRET = os.environ.get("ORCID_CLIENT_SECRET", default=None)
WIKIPEDIA_CLIENT_ID = os.environ.get("WIKIPEDIA_CLIENT_ID", default=None)
WIKIPEDIA_CLIENT_SECRET = os.environ.get("WIKIPEDIA_CLIENT_SECRET", default=None)
GITLAB_CLIENT_ID = os.environ.get("GITLAB_CLIENT_ID", default=None)
GITLAB_CLIENT_SECRET = os.environ.get("GITLAB_CLIENT_SECRET", default=None)
GITHUB_CLIENT_ID = os.environ.get("GITHUB_CLIENT_ID", default=None)
GITHUB_CLIENT_SECRET = os.environ.get("GITHUB_CLIENT_SECRET", default=None)
IA_XAUTH_URI = "https://archive.org/services/xauthn/"
IA_XAUTH_CLIENT_ID = os.environ.get("IA_XAUTH_CLIENT_ID", default=None)
IA_XAUTH_CLIENT_SECRET = os.environ.get("IA_XAUTH_CLIENT_SECRET", default=None)
# analytics; used in production
ENABLE_GOATCOUNTER = bool_str(os.environ.get("ENABLE_GOATCOUNTER", default=False))
GOATCOUNTER_ENDPOINT = os.environ.get(
"GOATCOUNTER_ENDPOINT", default="https://goatcounter.fatcat.wiki/count"
)
GOATCOUNTER_SCRIPT_URL = os.environ.get(
"GOATCOUNTER_SCRIPT_URL", default="https://goatcounter.fatcat.wiki/count.js"
)
# controls granularity of "shadow_only" preservation category
FATCAT_MERGE_SHADOW_PRESERVATION = bool_str(
os.environ.get("FATCAT_MERGE_SHADOW_PRESERVATION", default=False)
)
# creates a site-wide alert message
FATCAT_ALERT_MESSAGE = os.environ.get("FATCAT_ALERT_MESSAGE", default=None) or None
# CSRF on by default, but only for WTF forms (not, eg, search, lookups, GET
# forms)
WTF_CSRF_CHECK_DEFAULT = False
WTF_CSRF_TIME_LIMIT = None
# for login redirects
USE_SESSION_FOR_NEXT = True
if FATCAT_DOMAIN == "dev.fatcat.wiki":
# "Even more verbose" debug options
# SQLALCHEMY_ECHO = True
# DEBUG = True
pass
else:
# protect cookies (which include API tokens)
SESSION_COOKIE_HTTPONLY = True
SESSION_COOKIE_SECURE = True
SESSION_COOKIE_SAMESITE = "Lax"
PERMANENT_SESSION_LIFETIME = 2678400 # 31 days, in seconds
try:
GIT_RELEASE = fetch_git_sha()
except Exception as e:
print("WARNING: couldn't set sentry git release automatically: " + str(e))
GIT_RELEASE = None
SENTRY_CONFIG = {
#'include_paths': ['fatcat_web', 'fatcat_openapi_client', 'fatcat_tools'],
"enable-threads": True, # for uWSGI
"release": GIT_RELEASE,
"tags": {
"fatcat_domain": FATCAT_DOMAIN,
},
}
|