aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-09-15 17:40:13 -0700
committerBryan Newbold <bnewbold@archive.org>2020-09-15 17:40:15 -0700
commit7b80bfe54783bd662d0e71b3eee09c5ecf304355 (patch)
tree92277f982452fe34b7f1c7de2cbad2b3050004b9
parent0ec7f4a0fc8da9d37ad8afe60da070aaded8d5f2 (diff)
downloadfatcat-scholar-7b80bfe54783bd662d0e71b3eee09c5ecf304355.tar.gz
fatcat-scholar-7b80bfe54783bd662d0e71b3eee09c5ecf304355.zip
web robots.txt
Two variants depending on environment.
-rw-r--r--fatcat_scholar/static/robots.allow.txt5
-rw-r--r--fatcat_scholar/static/robots.disallow.txt4
-rw-r--r--fatcat_scholar/web.py13
3 files changed, 22 insertions, 0 deletions
diff --git a/fatcat_scholar/static/robots.allow.txt b/fatcat_scholar/static/robots.allow.txt
new file mode 100644
index 0000000..3c40454
--- /dev/null
+++ b/fatcat_scholar/static/robots.allow.txt
@@ -0,0 +1,5 @@
+# Allow most queries, but please don't crawl search results (or at least do so very politely, eg no parallel requests)
+
+User-Agent: *
+Disallow: /search
+Allow: /
diff --git a/fatcat_scholar/static/robots.disallow.txt b/fatcat_scholar/static/robots.disallow.txt
new file mode 100644
index 0000000..de20247
--- /dev/null
+++ b/fatcat_scholar/static/robots.disallow.txt
@@ -0,0 +1,4 @@
+# By default (eg, in QA/development) set robots to disallow all
+
+User-Agent: *
+Disallow: /
diff --git a/fatcat_scholar/web.py b/fatcat_scholar/web.py
index 866214e..5ad301d 100644
--- a/fatcat_scholar/web.py
+++ b/fatcat_scholar/web.py
@@ -7,6 +7,7 @@ So far there are few endpoints, so we just put them all here!
import babel.support
from fastapi import FastAPI, APIRouter, Request, Depends, Response
from fastapi.staticfiles import StaticFiles
+from fastapi.responses import PlainTextResponse
from dynaconf import settings
from typing import Optional, Any
@@ -218,3 +219,15 @@ for lang_option in I18N_LANG_OPTIONS:
app.include_router(api)
app.mount("/static", StaticFiles(directory="fatcat_scholar/static"), name="static")
+
+ROBOTS_ALLOW = open("fatcat_scholar/static/robots.allow.txt", "r").read()
+ROBOTS_DISALLOW = open("fatcat_scholar/static/robots.disallow.txt", "r").read()
+
+
+@app.get("/robots.txt", include_in_schema=False)
+async def robots_txt(response_class=PlainTextResponse) -> Any:
+ print(ROBOTS_ALLOW)
+ if settings.SCHOLAR_ENV == "prod":
+ return PlainTextResponse(ROBOTS_ALLOW)
+ else:
+ return PlainTextResponse(ROBOTS_DISALLOW)