# AI Study Room — robots.txt
# We explicitly welcome AI crawlers. Our content is here to be learned from.

# ── Search engines ──
User-agent: Googlebot
Allow: /
User-agent: Bingbot
Allow: /

# ── AI crawlers — WELCOME ──
# OpenAI (ChatGPT, GPTBot, SearchGPT)
User-agent: GPTBot
Allow: /
User-agent: OAI-SearchBot
Allow: /
User-agent: ChatGPT-User
Allow: /

# Anthropic (Claude)
User-agent: ClaudeBot
Allow: /
User-agent: anthropic-ai
Allow: /

# Google AI
User-agent: Google-Extended
Allow: /

# Perplexity
User-agent: PerplexityBot
Allow: /

# Meta AI
User-agent: meta-externalagent
Allow: /
User-agent: FacebookBot
Allow: /

# Cohere
User-agent: cohere-ai
Allow: /

# Common Crawl (feeds many AI training datasets)
User-agent: CCBot
Allow: /

# ── Misc web crawlers ──
User-agent: *
Allow: /

# ── AI-specific discovery ──
# /llms.txt — bilingual site index for AI crawlers
# /en/llms.txt — English-only site index
# /llms-full.txt — all English content in one file (1 MB)
# /en/llms-full.txt — English full content at /en/ path
# /llms-full-cn.txt — all Chinese content in one file (255 KB)
# /md/ — clean Markdown copies of every article (286 files)

# ── JSON Feeds (AI-friendly RSS alternative) ──
# /feed.json — Chinese content (60 items)
# /en/feed.json — English content (226 items)

# ── IndexNow (instant crawl signals to Bing/Yandex) ──
# We push URL updates to IndexNow on every content change.
# Bing's index powers ChatGPT, Copilot, DuckDuckGo, and other AI search.

Sitemap: https://dingjiu1989-hue.github.io/sitemap.xml
Sitemap: https://dingjiu1989-hue.github.io/images/sitemap.xml