Merge branch 'bnewbold-sitemap' into 'master'

basic sitemap setup See merge request webgroup/fatcat!79
author: bnewbold <bnewbold@archive.org> 2020-08-20 21:17:59 +0000
committer: bnewbold <bnewbold@archive.org> 2020-08-20 21:17:59 +0000
commit: daf91b137483b7345448b597289c78f8fb3f9969 (patch)
tree: 712c27d902235d8d007763b512c57eaecd8045ad /python/fatcat_web/static/robots.txt
parent: 5007ee299ce07b31db6d48cd4ab2587f87af53ab (diff)
parent: 2a98d10be1cc1368f9510745bff07c343974d4a7 (diff)
download: fatcat-daf91b137483b7345448b597289c78f8fb3f9969.tar.gz
fatcat-daf91b137483b7345448b597289c78f8fb3f9969.zip
1 files changed, 19 insertions, 0 deletions
diff --git a/python/fatcat_web/static/robots.txt b/python/fatcat_web/static/robots.txt
index a168f11b..e89af36e 100644
--- a/python/fatcat_web/static/robots.txt
+++ b/python/fatcat_web/static/robots.txt
@@ -1 +1,20 @@
 # Hello friends!
+# If you are considering large or automated crawling, you may want to look at
+# our API (https://api.fatcat.wiki) or bulk database snapshots instead.
+
+# by default, can crawl anything on this domain. HTTP 429 ("backoff") status
+# codes are used for rate-limiting instead of any crawl delay specified here.
+# Up to a handful concurrent requests should be fine.
+User-agent: *
+Allow: /
+
+# crawling search result pages is expensive, so we do specify a long crawl delay for those
+User-agent: *
+Allow: /release/search
+Allow: /container/search
+Allow: /coverage/search
+Crawl-delay: 5
+
+Sitemap: https://fatcat.wiki/sitemap.xml
+Sitemap: https://fatcat.wiki/sitemap-index-releases.xml
+Sitemap: https://fatcat.wiki/sitemap-index-containers.xml
author	bnewbold <bnewbold@archive.org>	2020-08-20 21:17:59 +0000
committer	bnewbold <bnewbold@archive.org>	2020-08-20 21:17:59 +0000
commit	daf91b137483b7345448b597289c78f8fb3f9969 (patch)
tree	712c27d902235d8d007763b512c57eaecd8045ad /python/fatcat_web/static/robots.txt
parent	5007ee299ce07b31db6d48cd4ab2587f87af53ab (diff)
parent	2a98d10be1cc1368f9510745bff07c343974d4a7 (diff)
download	fatcat-daf91b137483b7345448b597289c78f8fb3f9969.tar.gz fatcat-daf91b137483b7345448b597289c78f8fb3f9969.zip