summaryrefslogtreecommitdiffstats
path: root/python/fatcat_web/static/robots.txt
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2020-08-19 23:00:34 -0700
committerBryan Newbold <bnewbold@robocracy.org>2020-08-19 23:00:36 -0700
commitc6b33542398c933a6272586e6280f7026b63a124 (patch)
treef95b5eb848ea3eceff910cd44b172e86f0332c1b /python/fatcat_web/static/robots.txt
parent5f282a6267182214080ca36bcec4da1755589b46 (diff)
downloadfatcat-c6b33542398c933a6272586e6280f7026b63a124.tar.gz
fatcat-c6b33542398c933a6272586e6280f7026b63a124.zip
update robots.txt and sitemap.xml
- show minimal robots/sitemap if not in prod environment - default to allow all in robots.txt; link to sitemap index files - basic sitemap.xml without entity-level links
Diffstat (limited to 'python/fatcat_web/static/robots.txt')
-rw-r--r--python/fatcat_web/static/robots.txt19
1 files changed, 19 insertions, 0 deletions
diff --git a/python/fatcat_web/static/robots.txt b/python/fatcat_web/static/robots.txt
index a168f11b..e89af36e 100644
--- a/python/fatcat_web/static/robots.txt
+++ b/python/fatcat_web/static/robots.txt
@@ -1 +1,20 @@
# Hello friends!
+# If you are considering large or automated crawling, you may want to look at
+# our API (https://api.fatcat.wiki) or bulk database snapshots instead.
+
+# by default, can crawl anything on this domain. HTTP 429 ("backoff") status
+# codes are used for rate-limiting instead of any crawl delay specified here.
+# Up to a handful concurrent requests should be fine.
+User-agent: *
+Allow: /
+
+# crawling search result pages is expensive, so we do specify a long crawl delay for those
+User-agent: *
+Allow: /release/search
+Allow: /container/search
+Allow: /coverage/search
+Crawl-delay: 5
+
+Sitemap: https://fatcat.wiki/sitemap.xml
+Sitemap: https://fatcat.wiki/sitemap-index-releases.xml
+Sitemap: https://fatcat.wiki/sitemap-index-containers.xml