1 files changed, 13 insertions, 5 deletions
diff --git a/fatcat_scholar/static/robots.allow.txt b/fatcat_scholar/static/robots.allow.txt
index 21a343e..ccdfda1 100644
--- a/fatcat_scholar/static/robots.allow.txt
+++ b/fatcat_scholar/static/robots.allow.txt
@@ -2,17 +2,25 @@
 # If you are considering large or automated crawling, you may want to look at
 # our catalog API (https://api.fatcat.wiki) or bulk database snapshots instead.
 
+# large-scale bots should not index search pages
+User-agent: SemrushBot
+User-agent: YandexBot
+User-agent: bingbot
+User-agent: Googlebot
+Disallow: /search
+
+# crawling search result pages is expensive, so we do specify a long crawl
+# delay for those (for bots other than the above broad search bots)
+User-agent: *
+Allow: /search
+Crawl-delay: 5
+
 # by default, can crawl anything on this domain. HTTP 429 ("backoff") status
 # codes are used for rate-limiting instead of any crawl delay specified here.
 # Up to a handful concurrent requests should be fine.
 User-Agent: *
 Allow: /
 
-# crawling search result pages is expensive, so we do specify a long crawl delay for those
-User-agent: *
-Allow: /search
-Crawl-delay: 5
-
 Sitemap: https://scholar.archive.org/sitemap.xml
 Sitemap: https://scholar.archive.org/sitemap-index-works.xml