From 7b80bfe54783bd662d0e71b3eee09c5ecf304355 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 15 Sep 2020 17:40:13 -0700 Subject: web robots.txt Two variants depending on environment. --- fatcat_scholar/static/robots.allow.txt | 5 +++++ fatcat_scholar/static/robots.disallow.txt | 4 ++++ fatcat_scholar/web.py | 13 +++++++++++++ 3 files changed, 22 insertions(+) create mode 100644 fatcat_scholar/static/robots.allow.txt create mode 100644 fatcat_scholar/static/robots.disallow.txt diff --git a/fatcat_scholar/static/robots.allow.txt b/fatcat_scholar/static/robots.allow.txt new file mode 100644 index 0000000..3c40454 --- /dev/null +++ b/fatcat_scholar/static/robots.allow.txt @@ -0,0 +1,5 @@ +# Allow most queries, but please don't crawl search results (or at least do so very politely, eg no parallel requests) + +User-Agent: * +Disallow: /search +Allow: / diff --git a/fatcat_scholar/static/robots.disallow.txt b/fatcat_scholar/static/robots.disallow.txt new file mode 100644 index 0000000..de20247 --- /dev/null +++ b/fatcat_scholar/static/robots.disallow.txt @@ -0,0 +1,4 @@ +# By default (eg, in QA/development) set robots to disallow all + +User-Agent: * +Disallow: / diff --git a/fatcat_scholar/web.py b/fatcat_scholar/web.py index 866214e..5ad301d 100644 --- a/fatcat_scholar/web.py +++ b/fatcat_scholar/web.py @@ -7,6 +7,7 @@ So far there are few endpoints, so we just put them all here! import babel.support from fastapi import FastAPI, APIRouter, Request, Depends, Response from fastapi.staticfiles import StaticFiles +from fastapi.responses import PlainTextResponse from dynaconf import settings from typing import Optional, Any @@ -218,3 +219,15 @@ for lang_option in I18N_LANG_OPTIONS: app.include_router(api) app.mount("/static", StaticFiles(directory="fatcat_scholar/static"), name="static") + +ROBOTS_ALLOW = open("fatcat_scholar/static/robots.allow.txt", "r").read() +ROBOTS_DISALLOW = open("fatcat_scholar/static/robots.disallow.txt", "r").read() + + +@app.get("/robots.txt", include_in_schema=False) +async def robots_txt(response_class=PlainTextResponse) -> Any: + print(ROBOTS_ALLOW) + if settings.SCHOLAR_ENV == "prod": + return PlainTextResponse(ROBOTS_ALLOW) + else: + return PlainTextResponse(ROBOTS_DISALLOW) -- cgit v1.2.3