# Robots policy for the Arda Products Documentation site.
#
# This site is intentionally agent-friendly. The published content is
# public reference material, and one of the project goals is to make
# it easily consumable by AI assistants and tools.
#
# Agent-facing entry points:
#   - /llms.txt          curated index of canonical pages
#   - /llms-small.txt    hierarchy + summaries (small token budget)
#   - /llms-full.txt     full corpus as a single markdown bundle
#   - /<slug>.md         raw markdown source for any rendered page

# Anthropic — three separate user-agents.
# ClaudeBot:        training corpus.
# Claude-User:      live fetch when a Claude user asks about the docs.
# Claude-SearchBot: search index (improves Claude search quality).
User-agent: ClaudeBot
Allow: /

User-agent: Claude-User
Allow: /

User-agent: Claude-SearchBot
Allow: /

# OpenAI — separate training and live-search user-agents.
User-agent: GPTBot
Allow: /

User-agent: OAI-SearchBot
Allow: /

# Google — Google-Extended is the training-corpus opt-in distinct from
# the regular Googlebot used for search.
User-agent: Google-Extended
Allow: /

# Default policy for all other crawlers (search engines, archives).
User-agent: *
Allow: /

# Sitemap points at the production deployment (org-level GitHub Pages),
# which is the canonical, indexable location. Preview deployments at
# /documentation/ are ephemeral and intentionally not advertised here.
Sitemap: https://arda-cards.github.io/sitemap-index.xml