User-agent: *
Disallow: /application/attributes
Disallow: /application/authentication
Disallow: /application/bootstrap
Disallow: /application/config
Disallow: /application/controllers
Disallow: /application/elements
Disallow: /application/helpers
Disallow: /application/jobs
Disallow: /application/languages
Disallow: /application/mail
Disallow: /application/models
Disallow: /application/page_types
Disallow: /application/single_pages
Disallow: /application/tools
Disallow: /application/views
Disallow: /ccm/system/captcha/picture

User-agent: bingbot
Crawl-delay: 10

User-agent: AhrefsBot
Crawl-Delay: 10

# no need for Chinese or Russian searches
User-agent: Baiduspider
Disallow: /

# the following should also be in badbots

# The editoral comments for each of the following entries are
# only opinions provoked by the behavior of the associated
# 'spiders' as seen in local HTTP server logs.

###
# stupid bot
User-Agent: purebot
Disallow: /

# seems to only search for non-existent pages.
#	See ezooms.bot@gmail.com and wowrack.com
User-Agent: Ezooms
Disallow: /

# http://www.majestic12.co.uk/bot.php?+ follows many bogus and corrupt links
#   and so generates a lot of error log noise.
#   It does us no good and is a waste of our bandwidth.
User-Agent: MJ12bot
Disallow: /

# There is no need to waste bandwith on an outfit trying to monetize our
# web pages.  $50 for data scraped from the web is too much
# never bothers fetching robots.txt
# See http://www.domaintools.com
User-Agent: SurveyBot
Disallow: /
User-Agent: DomainTools
Disallow: /

# too many mangled links and implausible home page
User-Agent: sitebot
Disallow: /

# cutsy story is years stale and no longer excuses bad crawling
User-Agent: dotnetdotcom
Disallow: /
# cutsy story is years stale and no longer excuses bad crawling
User-Agent: dotbot
Disallow: /

# At best another broken spider that thinks all URLs are at the top level.
# At worst, a malware scanner.
# Never fetches robots.txt, contrary to http://www.warebay.com/bot.html
# See SolomonoBot/1.02 (http://www.solomono.ru)
User-Agent: SolomonoBot
Disallow: /

# evil
User-Agent: ZmEu
Disallow: /
# evil
User-Agent: Morfeus
Disallow: /
# evil
User-Agent: Snoopy
Disallow: /

# Yet another supposed search engine that generates bad links from plain text
#   It fetches and then ignores robots.txt
#   188.138.48.235 http://www.warebay.com/bot.html
User-Agent: WBSearchBot
Disallow: /

# monetizers of other people's bandwidth.
User-Agent: Exabot
Disallow: /
# monetizers of other people's bandwidth.
User-Agent: findlinks
Disallow: /
# monetizers of other people's bandwidth.
User-Agent: aiHitBot
Disallow: /
# monetizer of other people's bandwidth.
#   As is common with such, it ignores robots.txt.
User-Agent: AhrefsBot
Disallow: /

# Yet another monetizer of other people's bandwidth that hits selected
#   pages every few seconds from about a dozen HTTP clients around the
#   world without let, leave, hindrance, or notice.
#   There is no apparent way to ask them to stop.  One DinoPing agent at
#   support@edis.at responded to a request to stop with "just use iptables"
#   on 2012/08/13.
#   They're blind to the irony that one of their targets is
#   <A HREF="that-which-we-dont.html">http://www.rhyolite.com/anti-spam/that-which-we-dont.html</A>
User-Agent: DinoPing
Disallow: /

# unprovoked, unasked for "monitoring" and "checking"
User-Agent: panopta.com
Disallow: /

# checks much too fast and too much including traps
User-Agent: linkchecker.sourceforge.net
Disallow: /

# There is no need for third parties to check our links, thank you very much.
User-Agent: linkcheck
Disallow: /

# "The World's Experts in Search Analytics"
#   is yet another SEO outfit that hammers HTTP servers without permission
#   and without benefit for at least some HTTP server operators.
User-Agent: Searchmetrics
Disallow: /

# (supposed) SEO
User-Agent: lipperhey
Disallow: /
# (supposed) SEO
User-Agent: dataprovider.com
Disallow: /

# SEO
# http://www.semrush.com/bot.html suggests its results are
# for users:
#   Well, the real question is why do you not want the bot visiting
#   your page? Most bots are both harmless and quite beneficial. Bots
#   like Googlebot discover sites by following links from page to page.
#   This bot is crawling your page to help parse the content, so that
#   the relevant information contained within your site is easily indexed
#   and made more readily available to users searching for the content
#   you provide.
User-Agent: SemrushBot
Disallow: /

# ignores robots.txt
User-Agent: Sosospider
Disallow: /

# no apparent reason to spend bandwidth or attention on its bad URLs in logs
User-Agent: discoverybot
Disallow: /

# no need for Russian searches and they fetch but ignore robots.txt
User-Agent: Yandex
Disallow: /

# no "biomedical, biochemical, drug, health and disease related data" here.
#   192.31.21.179 switch from www.integromedb.org/Crawler to "Java/1.6.0_20"
#   and "-" after integromedb was added to robots.txt
User-Agent: www.integromedb.org/Crawler
Disallow: /

# does not handle protocol relative links.  Does not even fetch robots.txt
User-Agent: 360Spider
Disallow: /

# does not handle protocol relative links.
User-Agent: 80legs
Disallow: /

# does not know the difference between a hyperlink <A HREF="..."></A> and
#   anchors that are not links such as <A NAME="..."></A>
User-Agent: YamanaLab-Robot
Disallow: /

# ambulence chasers with stupid spider that hits the evil spider trap
User-Agent: ip-web-crawler.com
Disallow: /

# ignores rel="nofollow" in links
#   as if " onclick=..." were part of the URL
# fetches robots.txt and then ignores it
User-Agent: Aboundex
Disallow: /
User-Agent: Aboundexbot
Disallow: /

# fetches robots.txt for only some domains
#   searches for non-existent but often abuse URLs such as .../contact.cgi
User-Agent: yunyun
Disallow: /

# waste of bandwidth
User-Agent: masscan
Disallow: /