From 01497db6acd8331cdcc779ef2307d8a5844bc789 Mon Sep 17 00:00:00 2001 From: Rich Haines Date: Wed, 1 Apr 2026 16:26:08 +0300 Subject: [PATCH 1/5] Add @vercel/agent-readability dependency --- apps/docs/package.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/apps/docs/package.json b/apps/docs/package.json index 9ede83e3..2dd5f8ee 100644 --- a/apps/docs/package.json +++ b/apps/docs/package.json @@ -15,6 +15,7 @@ "@orama/tokenizers": "^3.1.16", "@streamdown/cjk": "^1.0.1", "@streamdown/code": "^1.0.1", + "@vercel/agent-readability": "^0.2.1", "@vercel/analytics": "^1.6.1", "@vercel/speed-insights": "^1.3.1", "@vercel/toolbar": "0.1.36", @@ -60,4 +61,4 @@ "typescript": "^5.9.3" }, "packageManager": "pnpm@10.19.0" -} +} \ No newline at end of file From 0087043ef1efbfcd105a913d1a3240885073002f Mon Sep 17 00:00:00 2001 From: Rich Haines Date: Wed, 1 Apr 2026 16:26:09 +0300 Subject: [PATCH 2/5] Replace local isAIAgent with @vercel/agent-readability --- apps/docs/proxy.ts | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/apps/docs/proxy.ts b/apps/docs/proxy.ts index b925d927..de235a96 100644 --- a/apps/docs/proxy.ts +++ b/apps/docs/proxy.ts @@ -1,3 +1,4 @@ +import { generateNotFoundMarkdown, isAIAgent } from "@vercel/agent-readability"; import { precompute } from "flags/next"; import { createI18nMiddleware } from "fumadocs-core/i18n/middleware"; import { isMarkdownPreferred, rewritePath } from "fumadocs-core/negotiation"; @@ -8,7 +9,6 @@ import { } from "next/server"; import { rootFlags } from "@/flags"; import { i18n } from "@/lib/geistdocs/i18n"; -import { isAIAgent } from "@/lib/ai-agent-detection"; import { trackMdRequest } from "@/lib/geistdocs/md-tracking"; const { rewrite: rewriteLLM } = rewritePath( @@ -65,7 +65,6 @@ const proxy = async (request: NextRequest, context: NextFetchEvent) => { } // AI agent detection — rewrite docs pages to markdown for agents - // so they always get structured content without needing .md URLs or Accept headers if ( (pathname === "/docs" || pathname.startsWith("/docs/")) && !pathname.includes("/llms.mdx/") @@ -90,6 +89,10 @@ const proxy = async (request: NextRequest, context: NextFetchEvent) => { ); return NextResponse.rewrite(new URL(result, request.nextUrl)); } + // Agent requested a non-existent docs URL — return helpful markdown + return new NextResponse(generateNotFoundMarkdown(pathname), { + headers: { "Content-Type": "text/markdown; charset=utf-8" }, + }); } } @@ -115,10 +118,9 @@ const proxy = async (request: NextRequest, context: NextFetchEvent) => { }; export const config = { - // Matcher ignoring `/_next/`, `/api/`, static assets, favicon, sitemap, robots, etc. matcher: [ "/((?!api|_next/static|_next/image|favicon.ico|sitemap.xml|robots.txt|\\.well-known/vercel/flags).*)", ], }; -export default proxy; +export default proxy; \ No newline at end of file From 9e09bdccd22c29bc134a82f1bad33789a7261df8 Mon Sep 17 00:00:00 2001 From: Rich Haines Date: Wed, 1 Apr 2026 16:26:11 +0300 Subject: [PATCH 3/5] Delete local ai-agent-detection.ts (replaced by package) --- apps/docs/lib/ai-agent-detection.ts | 168 ---------------------------- 1 file changed, 168 deletions(-) delete mode 100644 apps/docs/lib/ai-agent-detection.ts diff --git a/apps/docs/lib/ai-agent-detection.ts b/apps/docs/lib/ai-agent-detection.ts deleted file mode 100644 index 99fc6b51..00000000 --- a/apps/docs/lib/ai-agent-detection.ts +++ /dev/null @@ -1,168 +0,0 @@ -/** - * AI Agent Detection Utility - * - * Multi-signal detection for AI agents/bots. Used to serve markdown - * responses when agents request docs pages. - * - * Three detection layers: - * 1. Known UA patterns (definitive) — curated from https://bots.fyi/?tags=ai_assistant - * 2. Signature-Agent header (definitive) — catches ChatGPT agent (RFC 9421) - * 3. Missing browser fingerprint heuristic — catches unknown bots - * - * Optimizes for recall over precision: serving markdown to a non-AI bot - * is low-harm; missing an AI agent means a worse experience. - * - * Last reviewed: 2026-03-20 against bots.fyi + official vendor docs - */ - -// Layer 1: Known AI agent UA substrings (lowercase). -const AI_AGENT_UA_PATTERNS = [ - // Anthropic — https://support.claude.com/en/articles/8896518 - 'claudebot', - 'claude-searchbot', - 'claude-user', - 'anthropic-ai', - 'claude-web', - - // OpenAI — https://platform.openai.com/docs/bots - 'chatgpt', - 'gptbot', - 'oai-searchbot', - 'openai', - - // Google AI - 'gemini', - 'bard', - 'google-cloudvertexbot', - 'google-extended', - - // Meta - 'meta-externalagent', - 'meta-externalfetcher', - 'meta-webindexer', - - // Search/Research AI - 'perplexity', - 'youbot', - 'you.com', - 'deepseekbot', - - // Coding assistants - 'cursor', - 'github-copilot', - 'codeium', - 'tabnine', - 'sourcegraph', - - // Other AI agents / data scrapers (low-harm to serve markdown) - 'cohere-ai', - 'bytespider', - 'amazonbot', - 'ai2bot', - 'diffbot', - 'omgili', - 'omgilibot', -]; - -// Layer 2: Known AI service URLs in Signature-Agent header (RFC 9421). -const SIGNATURE_AGENT_DOMAINS = ['chatgpt.com']; - -// Layer 3: Traditional bot exclusion list — bots that should NOT trigger -// the heuristic layer (they're search engine crawlers, social previews, or -// monitoring tools, not AI agents). -const TRADITIONAL_BOT_PATTERNS = [ - 'googlebot', - 'bingbot', - 'yandexbot', - 'baiduspider', - 'duckduckbot', - 'slurp', - 'msnbot', - 'facebot', - 'twitterbot', - 'linkedinbot', - 'whatsapp', - 'telegrambot', - 'pingdom', - 'uptimerobot', - 'newrelic', - 'datadog', - 'statuspage', - 'site24x7', - 'applebot', -]; - -// Broad regex for bot-like UA strings (used only in Layer 3 heuristic). -const BOT_LIKE_REGEX = /bot|agent|fetch|crawl|spider|search/i; - -export type DetectionMethod = 'ua-match' | 'signature-agent' | 'heuristic'; - -export interface DetectionResult { - detected: boolean; - method: DetectionMethod | null; -} - -/** - * Detects AI agents from HTTP request headers. - * - * Returns both whether the agent was detected and which signal triggered, - * so callers can log the detection method for accuracy tracking. - */ -export function isAIAgent(request: { - headers: { get(name: string): string | null }; -}): DetectionResult { - const userAgent = request.headers.get('user-agent'); - - // Layer 1: Known UA pattern match - if (userAgent) { - const lowerUA = userAgent.toLowerCase(); - if (AI_AGENT_UA_PATTERNS.some((pattern) => lowerUA.includes(pattern))) { - return { detected: true, method: 'ua-match' }; - } - } - - // Layer 2: Signature-Agent header (RFC 9421, used by ChatGPT agent) - const signatureAgent = request.headers.get('signature-agent'); - if (signatureAgent) { - const lowerSig = signatureAgent.toLowerCase(); - if (SIGNATURE_AGENT_DOMAINS.some((domain) => lowerSig.includes(domain))) { - return { detected: true, method: 'signature-agent' }; - } - } - - // Layer 3: Missing browser fingerprint heuristic - // Real browsers (Chrome 76+, Firefox 90+, Safari 16.4+) send sec-fetch-mode - // on navigation requests. Its absence signals a programmatic client. - const secFetchMode = request.headers.get('sec-fetch-mode'); - if (!secFetchMode && userAgent && BOT_LIKE_REGEX.test(userAgent)) { - const lowerUA = userAgent.toLowerCase(); - const isTraditionalBot = TRADITIONAL_BOT_PATTERNS.some((pattern) => - lowerUA.includes(pattern), - ); - if (!isTraditionalBot) { - return { detected: true, method: 'heuristic' }; - } - } - - return { detected: false, method: null }; -} - -/** - * Generates a markdown response for AI agents that hit non-existent URLs. - */ -export function generateAgentNotFoundResponse(requestedPath: string): string { - return `# Page Not Found - -The URL \`${requestedPath}\` does not exist in the documentation. - -## How to find the correct page - -1. **Browse the sitemap**: [/sitemap.md](/sitemap.md) — A structured index of all pages with URLs, content types, and descriptions -2. **Browse the full index**: [/llms.txt](/llms.txt) — Complete documentation index - -## Tips for requesting documentation - -- For markdown responses, append \`.md\` to URLs (e.g., \`/docs/getting-started.md\`) -- Use \`Accept: text/markdown\` header for content negotiation -`; -} From 73c38b6669ce7b859ad73a809771776ada8f7f27 Mon Sep 17 00:00:00 2001 From: molebox Date: Tue, 7 Apr 2026 14:04:36 +0200 Subject: [PATCH 4/5] Fix broken DetectionMethod import, add Vary header and discovery options - Change DetectionMethod import from deleted local file to @vercel/agent-readability - Add Vary: Accept header on agent-rewrite responses - Add discovery options (sitemapUrl, indexUrl) to generateNotFoundMarkdown Co-Authored-By: Claude Opus 4.6 (1M context) --- apps/docs/lib/geistdocs/md-tracking.ts | 2 +- apps/docs/proxy.ts | 19 +++++++++++++++---- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/apps/docs/lib/geistdocs/md-tracking.ts b/apps/docs/lib/geistdocs/md-tracking.ts index 1c7d6419..3413c503 100644 --- a/apps/docs/lib/geistdocs/md-tracking.ts +++ b/apps/docs/lib/geistdocs/md-tracking.ts @@ -1,5 +1,5 @@ import { siteId } from "@/geistdocs"; -import type { DetectionMethod } from "@/lib/ai-agent-detection"; +import type { DetectionMethod } from "@vercel/agent-readability"; const PLATFORM_URL = "https://geistdocs.com/md-tracking"; diff --git a/apps/docs/proxy.ts b/apps/docs/proxy.ts index de235a96..30f6aee9 100644 --- a/apps/docs/proxy.ts +++ b/apps/docs/proxy.ts @@ -87,12 +87,23 @@ const proxy = async (request: NextRequest, context: NextFetchEvent) => { detectionMethod: agentResult.method, }) ); - return NextResponse.rewrite(new URL(result, request.nextUrl)); + const response = NextResponse.rewrite(new URL(result, request.nextUrl)); + response.headers.set("Vary", "Accept"); + return response; } // Agent requested a non-existent docs URL — return helpful markdown - return new NextResponse(generateNotFoundMarkdown(pathname), { - headers: { "Content-Type": "text/markdown; charset=utf-8" }, - }); + return new NextResponse( + generateNotFoundMarkdown(pathname, { + sitemapUrl: "/sitemap.md", + indexUrl: "/llms.txt", + }), + { + headers: { + "Content-Type": "text/markdown; charset=utf-8", + Vary: "Accept", + }, + }, + ); } } From 0a14f7adca16e23002f578590ea6f77da6f857ae Mon Sep 17 00:00:00 2001 From: molebox Date: Tue, 7 Apr 2026 14:10:46 +0200 Subject: [PATCH 5/5] Update pnpm-lock.yaml Co-Authored-By: Claude Opus 4.6 (1M context) --- pnpm-lock.yaml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index cb219c78..44bba925 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -74,6 +74,9 @@ importers: '@streamdown/code': specifier: ^1.0.1 version: 1.0.2(react@19.2.4) + '@vercel/agent-readability': + specifier: ^0.2.1 + version: 0.2.1(next@16.1.6(@babel/core@7.29.0)(@opentelemetry/api@1.9.0)(@playwright/test@1.58.1)(react-dom@19.2.4(react@19.2.4))(react@19.2.4)) '@vercel/analytics': specifier: ^1.6.1 version: 1.6.1(@sveltejs/kit@2.53.4(@opentelemetry/api@1.9.0)(@sveltejs/vite-plugin-svelte@4.0.4(svelte@5.41.3)(vite@6.4.1(@types/node@24.10.13)(jiti@2.6.1)(lightningcss@1.30.2)(yaml@2.8.1)))(svelte@5.41.3)(typescript@5.9.3)(vite@6.4.1(@types/node@24.10.13)(jiti@2.6.1)(lightningcss@1.30.2)(yaml@2.8.1)))(next@16.1.6(@babel/core@7.29.0)(@opentelemetry/api@1.9.0)(@playwright/test@1.58.1)(react-dom@19.2.4(react@19.2.4))(react@19.2.4))(react@19.2.4)(svelte@5.41.3) @@ -4828,6 +4831,16 @@ packages: '@upstash/redis@1.34.4': resolution: {integrity: sha512-AZx2iD5s1Pu/KCrRA7KVCffu3NSoaYnNY7N9YI7aLAYhcJfsriQKTe+8OxQWJqGqFbrvm17Lyr9HFnDLvqNpfA==} + '@vercel/agent-readability@0.2.1': + resolution: {integrity: sha512-ShT7BzIS/dwKompii8tm5do+NR1g4xL5M3wM7S01xsH6yuYQ7wiTPZEcmHMFLHCsAQg45/mD0hgrufpS3NVunw==} + engines: {node: '>=20.0.0'} + hasBin: true + peerDependencies: + next: '>=14' + peerDependenciesMeta: + next: + optional: true + '@vercel/analytics@1.5.0': resolution: {integrity: sha512-MYsBzfPki4gthY5HnYN7jgInhAZ7Ac1cYDoRWFomwGHWEX7odTEzbtg9kf/QSo7XEsEAqlQugA6gJ2WS2DEa3g==} peerDependencies: @@ -13209,6 +13222,10 @@ snapshots: dependencies: crypto-js: 4.2.0 + '@vercel/agent-readability@0.2.1(next@16.1.6(@babel/core@7.29.0)(@opentelemetry/api@1.9.0)(@playwright/test@1.58.1)(react-dom@19.2.4(react@19.2.4))(react@19.2.4))': + optionalDependencies: + next: 16.1.6(@babel/core@7.29.0)(@opentelemetry/api@1.9.0)(@playwright/test@1.58.1)(react-dom@19.2.4(react@19.2.4))(react@19.2.4) + '@vercel/analytics@1.5.0(@sveltejs/kit@2.53.4(@opentelemetry/api@1.9.0)(@sveltejs/vite-plugin-svelte@4.0.4(svelte@5.41.3)(vite@6.4.1(@types/node@22.14.0)(jiti@2.6.1)(lightningcss@1.30.2)(yaml@2.8.1)))(svelte@5.41.3)(typescript@5.8.2)(vite@6.4.1(@types/node@22.14.0)(jiti@2.6.1)(lightningcss@1.30.2)(yaml@2.8.1)))(next@16.1.5(@opentelemetry/api@1.9.0)(@playwright/test@1.58.1)(react-dom@19.2.0(react@19.2.0))(react@19.2.0))(react@19.2.0)(svelte@5.41.3)': optionalDependencies: '@sveltejs/kit': 2.53.4(@opentelemetry/api@1.9.0)(@sveltejs/vite-plugin-svelte@4.0.4(svelte@5.41.3)(vite@6.4.1(@types/node@22.14.0)(jiti@2.6.1)(lightningcss@1.30.2)(yaml@2.8.1)))(svelte@5.41.3)(typescript@5.8.2)(vite@6.4.1(@types/node@22.14.0)(jiti@2.6.1)(lightningcss@1.30.2)(yaml@2.8.1))