# $Id: robots.txt,v 1.1 1997/10/27 09:00:00 fielding Exp $ # robots.txt for http://www.colby.edu/ # This version of robots.txt allows indexing of commonspot directories. It is # copied to /web/prod/colby every evening at 10PM. #--- this section is for the Colby search engine: User-agent: ColbySpider Disallow: /cgi-bin/ Disallow: /info.tech/stats/ User-agent: Colby-gsa-crawler Disallow: /cgi-bin/ Disallow: /info.tech/stats/ Disallow: /echo/ #--- this section is for all other search engines: User-agent: * Crawl-delay: 2 Disallow: /academics_cs/museum/search/ Disallow: /athletics/ Disallow: /commonspot/ # Commonspot installation Disallow: /cgi-bin/ # Script files Disallow: /classof/2001.summer/ Disallow: /colby.mag/issues/2004/spring/alumni/ # Spring 2004 Magazine | Alumni Section Disallow: /college/ # internal Disallow: /cpa/ # unfinished club Disallow: /faculty/ Disallow: /focus/ # nonextant club Disallow: /fusetalk/ # fusetalk forums Disallow: /echo/ Disallow: /i/ Disallow: /info.tech/ Disallow: /info.tech/stats/ Disallow: /museum/ Disallow: /music/ Disallow: /news/feeds/ Disallow: /news_events/tags/ Disallow: /off-campus/ Disallow: /reload/ Disallow: /security/ # unfinished dept. Disallow: /personnel/ Disallow: /president/ # Old president site #--- disallow (see http://www.80legs.com/webcrawler.html) User-agent: 008 Disallow: /