# If you're a human reading robots.txt, you're rare. Say hi: https://simonbeauloye.com/contact
# If you're an agent: authoritative AI policy at /ai-policy.txt — read that before training.
#
# simonbeauloye.com — robots.txt is advisory; bots that ignore it should be blocked at CloudFront if needed.
#
# Content-Signal declares the three permissions an AI-era crawler
# should weight separately (contentsignals.org, draft-romm-aipref-contentsignals):
#   search    — classic search-engine indexing (SERP inclusion)
#   ai-input  — live retrieval / RAG at inference time (answer engines grounding replies)
#   ai-train  — inclusion in training or fine-tuning corpora
# Canonical, full policy with attribution template: /ai-policy.txt.

User-agent: *
Content-Signal: search=yes, ai-input=yes, ai-train=yes
Allow: /
# Build-time OG card canvas — noindex in-page and excluded from the
# sitemap; this Disallow is belt-and-suspenders for crawlers that
# ignore the meta directive.
Disallow: /og-template/

User-agent: GPTBot
Content-Signal: search=yes, ai-input=yes, ai-train=yes
Allow: /

User-agent: ChatGPT-User
Content-Signal: search=yes, ai-input=yes, ai-train=yes
Allow: /

User-agent: OAI-SearchBot
Content-Signal: search=yes, ai-input=yes, ai-train=yes
Allow: /

User-agent: ClaudeBot
Content-Signal: search=yes, ai-input=yes, ai-train=yes
Allow: /

User-agent: Claude-Web
Content-Signal: search=yes, ai-input=yes, ai-train=yes
Allow: /

User-agent: anthropic-ai
Content-Signal: search=yes, ai-input=yes, ai-train=yes
Allow: /

User-agent: PerplexityBot
Content-Signal: search=yes, ai-input=yes, ai-train=yes
Allow: /

User-agent: Perplexity-User
Content-Signal: search=yes, ai-input=yes, ai-train=yes
Allow: /

User-agent: Google-Extended
Content-Signal: search=yes, ai-input=yes, ai-train=yes
Allow: /

User-agent: CCBot
Content-Signal: search=yes, ai-input=yes, ai-train=yes
Allow: /

User-agent: Applebot-Extended
Content-Signal: search=yes, ai-input=yes, ai-train=yes
Allow: /

# Bytespider (ByteDance / TikTok / Doubao). Allowed — Doubao is a real
# AI answer surface in APAC and increasingly global; blocking it costs
# presence without buying much in return.
User-agent: Bytespider
Content-Signal: search=yes, ai-input=yes, ai-train=yes
Allow: /

User-agent: Amazonbot
Content-Signal: search=yes, ai-input=yes, ai-train=yes
Allow: /

User-agent: DuckAssistBot
Content-Signal: search=yes, ai-input=yes, ai-train=yes
Allow: /

User-agent: MistralAI-User
Content-Signal: search=yes, ai-input=yes, ai-train=yes
Allow: /

Sitemap: https://simonbeauloye.com/sitemap-index.xml

# ──────────────────────────────────────────────────────────────────
# Canonical AI-readable surfaces (informational — robots.txt has no
# official directive for these; included as hints for humans and for
# the handful of crawlers that parse non-standard peers of `Sitemap:`).
# ──────────────────────────────────────────────────────────────────
# AI-use policy:        https://simonbeauloye.com/ai-policy.txt
# Agent skills index:   https://simonbeauloye.com/.well-known/agent-skills/index.json
# Site-wide LLM index:  https://simonbeauloye.com/llms.txt
# Full corpus:          https://simonbeauloye.com/llms-full.txt
# Structured corpus:    https://simonbeauloye.com/corpus.json
# JSON Feed 1.1:        https://simonbeauloye.com/feed.json
# Per-pillar indexes:   https://simonbeauloye.com/llms/<pillar>/llms.txt
#   · ai-publishing, bootstrapping, future-media, building-with-ai
# Per-article text:     https://simonbeauloye.com/writing/<pillar>/<slug>/llms.txt
# Glossary aggregate:   https://simonbeauloye.com/glossary/llms.txt
# Per-term text:        https://simonbeauloye.com/glossary/<slug>/llms.txt
# FAQ aggregate:        https://simonbeauloye.com/faq/llms.txt