# robots.txt for art-ificialintelligence.com
#
# Editorial policy: block training, allow citation.
#
#   1. Search engines (Googlebot, Bingbot, DuckDuckBot, Applebot)
#      — allow. Drives human readers.
#   2. Live citation/answer bots (OAI-SearchBot, ChatGPT-User,
#      PerplexityBot, Claude-User, Claude-SearchBot,
#      Meta-ExternalFetcher) — allow. They fetch on a user's
#      behalf and cite back. This site ships llms.txt and
#      /for-machines specifically to invite them.
#   3. AI training crawlers (GPTBot, Google-Extended, ClaudeBot,
#      CCBot, Bytespider, Meta-ExternalAgent, etc.) — block.
#      Bulk corpus harvest, no citation, no traffic, no
#      compensation. Symmetric across vendors — no carveouts.
#   4. SEO/abusive scrapers (AhrefsBot, SemrushBot, MJ12bot,
#      DotBot, BLEXBot) — block. Hammer bandwidth, give a small
#      publication nothing.
#
# This file is the honour-system layer. The Cloudflare dashboard
# (Block AI Bots + AI Crawl Control) is the enforcement layer for
# crawlers that ignore robots.txt. Bot Fight Mode stays OFF on the
# Free plan — see GUARDRAILS.md §"Bot Policy" for rationale.
#
# The Content-Signal line below declares this site's preferences
# under the contentsignals.org / IETF draft spec:
#   search=yes    — index for search engines
#   ai-input=yes  — grounding / citation in live AI answers
#   ai-train=no   — no use in training generative models
#
# Rationale: .claude/rules/GUARDRAILS.md §"Bot Policy".
#
# AI agent content:
#   Index:        https://art-ificialintelligence.com/for-machines
#   llms.txt:     https://art-ificialintelligence.com/llms.txt
#   llms-full.txt:https://art-ificialintelligence.com/llms-full.txt

User-agent: *
Content-Signal: search=yes, ai-input=yes, ai-train=no
Allow: /
Disallow: /cdn-cgi/

Sitemap: https://art-ificialintelligence.com/sitemap-index.xml

# --- AI training crawlers — block ---

User-agent: GPTBot
Disallow: /

User-agent: Google-Extended
Disallow: /

User-agent: ClaudeBot
Disallow: /

User-agent: anthropic-ai
Disallow: /

User-agent: CCBot
Disallow: /

User-agent: Bytespider
Disallow: /

User-agent: Meta-ExternalAgent
Disallow: /

User-agent: Amazonbot
Disallow: /

User-agent: cohere-ai
Disallow: /

User-agent: Diffbot
Disallow: /

User-agent: Applebot-Extended
Disallow: /

User-agent: PetalBot
Disallow: /

User-agent: Omgilibot
Disallow: /

User-agent: AI2Bot
Disallow: /

User-agent: ImagesiftBot
Disallow: /

User-agent: ProRataInc
Disallow: /

User-agent: Google-CloudVertexBot
Disallow: /

User-agent: Novellum
Disallow: /

User-agent: TikTokSpider
Disallow: /

User-agent: GrokBot
Disallow: /

User-agent: xAI-Grok
Disallow: /

User-agent: Grok-DeepSearch
Disallow: /

User-agent: DeepSeekBot
Disallow: /

# --- SEO / abusive scrapers — block ---

User-agent: AhrefsBot
Disallow: /

User-agent: SemrushBot
Disallow: /

User-agent: MJ12bot
Disallow: /

User-agent: DotBot
Disallow: /

User-agent: BLEXBot
Disallow: /