# # robots.txt # # This file is to prevent the crawling and indexing of certain parts # of your site by web crawlers and spiders run by sites like Yahoo! # and Google. By telling these "robots" where not to go on your site, # you save bandwidth and server resources. # # This file will be ignored unless it is at the root of your host: # Used: http://example.com/robots.txt # Ignored: http://example.com/site/robots.txt # # For more information about the robots.txt standard, see: # http://www.robotstxt.org/wc/robots.html # # For syntax checking, see: # http://www.sxw.org.uk/computing/robots/check.html User-agent: * # Directories Disallow: /umbraco/ Disallow: /umbraco_client/ # Files Disallow: /INSTALL.txt Disallow: /LICENSE.txt # Paths (clean URLs) Allow: /umbraco/plugins/ # Paths (no clean URLs) # Disallow: /?q=admin/ Disallow: /welcome/$ Disallow: /test*$ # Begin block Bad-Robots from robots.txt User-agent: ia_archiver Disallow:/ # SEO-related bots # Sitemap Sitemap: http://www.cadets.net.au/sitemap.xml Sitemap: http://www.cadetnet.net.au/ror.xml Sitemap: http://www.cadetnet.net.au/urllist.txt # Sitemap: http://cadetnet.net.au/sitemap.html # Sitemap: http://cadetnet.net.au/sitemap.xml.gz