diff --git a/src/eval/discovery-harness.ts b/src/eval/discovery-harness.ts index 8ca2aa9..5b86677 100644 --- a/src/eval/discovery-harness.ts +++ b/src/eval/discovery-harness.ts @@ -429,6 +429,9 @@ export async function evaluateDiscoveryFixture({ for (const task of fixture.tasks) { const runner = runners[task.surface]; + if (!runner) { + throw new Error(`No runner registered for surface: ${task.surface}`); + } const payload = await runner(task, rootPath); results.push(evaluateDiscoveryTask(task, payload)); } diff --git a/src/index.ts b/src/index.ts index 9ae677d..48b53c6 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1748,8 +1748,7 @@ async function main() { const transport = new StdioServerTransport(); await server.connect(transport); - // Register cleanup before any handler that calls process.exit(), so the - // exit listener is always in place when stdin/onclose/signals fire. + // ── Cleanup guards (normal MCP lifecycle) ────────────────────────────────── const stopAllWatchers = () => { for (const project of getAllProjects()) { project.stopWatcher?.(); @@ -1770,27 +1769,58 @@ async function main() { process.exit(0); }); - // Detect stdin pipe closure — the primary signal that the MCP client is gone. - // StdioServerTransport only listens for 'data'/'error', never 'end'. process.stdin.on('end', () => process.exit(0)); process.stdin.on('close', () => process.exit(0)); - - // Handle graceful MCP protocol-level disconnect. - // Fires after SDK internal cleanup when transport.close() is called. server.onclose = () => process.exit(0); - if (process.env.CODEBASE_CONTEXT_DEBUG) console.error('[DEBUG] Server ready'); + // ── Zombie process prevention ────────────────────────────────────────────── + // If no MCP client sends an `initialize` message within 30 seconds, this + // process was started incorrectly (e.g. `npx codebase-context ` from + // a shell or AI agent without a subcommand). Exit cleanly to avoid a zombie. + const HANDSHAKE_TIMEOUT_MS = + Number.parseInt(process.env.CODEBASE_CONTEXT_HANDSHAKE_TIMEOUT_MS ?? '', 10) || 30_000; + let mcpClientInitialized = false; + + const handshakeTimer = setTimeout(() => { + if (!mcpClientInitialized) { + console.error( + 'No MCP client connected within ' + + Math.round(HANDSHAKE_TIMEOUT_MS / 1000) + + 's - exiting.\n' + + 'If you meant to use CLI commands:\n' + + ' npx codebase-context memory list\n' + + ' npx codebase-context search --query "..."\n' + + ' npx codebase-context --help' + ); + process.exit(1); + } + }, HANDSHAKE_TIMEOUT_MS); + handshakeTimer.unref(); - await refreshKnownRootsFromClient(); + // ── Deferred project initialization ──────────────────────────────────────── + // Don't start CPU-heavy indexing or file-watchers until the MCP handshake + // completes. A misfire (no real client) consumes near-zero resources during + // the timeout window and exits cleanly. + server.oninitialized = async () => { + mcpClientInitialized = true; + clearTimeout(handshakeTimer); - // Keep the current single-project auto-select behavior when exactly one startup project is known. - const startupRoots = getKnownRootPaths(); - if (startupRoots.length === 1) { - await initProject(startupRoots[0], watcherDebounceMs, { enableWatcher: true }); - setActiveProject(startupRoots[0]); - } + if (process.env.CODEBASE_CONTEXT_DEBUG) console.error('[DEBUG] Server ready'); + + try { + await refreshKnownRootsFromClient(); + + const startupRoots = getKnownRootPaths(); + if (startupRoots.length === 1) { + await initProject(startupRoots[0], watcherDebounceMs, { enableWatcher: true }); + setActiveProject(startupRoots[0]); + } + } catch (error) { + console.error('[codebase-context] Project initialization failed:', error); + } + }; - // Subscribe to root changes + // Subscribe to root changes (lightweight — no project init cost) server.setNotificationHandler(RootsListChangedNotificationSchema, async () => { try { await refreshKnownRootsFromClient(); diff --git a/tests/zombie-guard.test.ts b/tests/zombie-guard.test.ts new file mode 100644 index 0000000..7e755f4 --- /dev/null +++ b/tests/zombie-guard.test.ts @@ -0,0 +1,122 @@ +/** + * Integration tests for zombie process prevention. + * + * These tests verify that the MCP server exits cleanly when no client connects + * (handshake timeout) and that project initialization is deferred until after + * the MCP handshake completes. + * + * The tests spawn real child processes to exercise the actual startup path. + */ + +import { describe, it, expect, beforeAll } from 'vitest'; +import { spawn } from 'node:child_process'; +import { existsSync } from 'node:fs'; +import path from 'node:path'; +import os from 'node:os'; +import { fileURLToPath } from 'node:url'; + +const __dirname = path.dirname(fileURLToPath(import.meta.url)); +const ENTRY_POINT = path.resolve(__dirname, '..', 'dist', 'index.js'); + +/** + * Spawn the MCP server as a child process and wait for it to exit. + * Returns { code, stderr, elapsed } where elapsed is in milliseconds. + */ +function spawnServer( + args: string[], + env: Record = {}, + timeoutMs = 45_000 +): Promise<{ code: number | null; signal: string | null; stderr: string; elapsed: number }> { + return new Promise((resolve, reject) => { + const start = Date.now(); + let stderr = ''; + + const child = spawn(process.execPath, [ENTRY_POINT, ...args], { + stdio: ['pipe', 'pipe', 'pipe'], + env: { ...process.env, ...env }, + timeout: timeoutMs + }); + + child.stderr?.on('data', (chunk: Buffer) => { + stderr += chunk.toString(); + }); + + child.on('error', reject); + child.on('close', (code, signal) => { + resolve({ code, signal, stderr, elapsed: Date.now() - start }); + }); + + // Don't write anything to stdin — simulate the zombie scenario + // where no MCP client sends an `initialize` message. + }); +} + +describe('zombie process prevention', () => { + beforeAll(() => { + if (!existsSync(ENTRY_POINT)) { + throw new Error( + `dist/index.js not found - run \`npm run build\` before the zombie-guard tests.` + ); + } + }); + + it('exits with code 1 when no MCP client connects within timeout', async () => { + // Use a short timeout for the test (2 seconds instead of the default 30). + // Use os.tmpdir() as a real existing directory so path validation passes — + // this tests the realistic scenario where a valid path IS provided but no + // MCP client connects (which is exactly the Codex zombie scenario). + const result = await spawnServer( + [os.tmpdir()], + { CODEBASE_CONTEXT_HANDSHAKE_TIMEOUT_MS: '2000' } + ); + + expect(result.code).toBe(1); + expect(result.stderr).toContain('No MCP client connected within'); + expect(result.stderr).toContain('npx codebase-context --help'); + // Should exit roughly around the timeout (2s), not hang forever + expect(result.elapsed).toBeLessThan(10_000); + }, 15_000); + + it('exits with code 1 even when invoked with no arguments at all', async () => { + const result = await spawnServer( + [], + { CODEBASE_CONTEXT_HANDSHAKE_TIMEOUT_MS: '2000' } + ); + + expect(result.code).toBe(1); + expect(result.stderr).toContain('No MCP client connected within'); + expect(result.elapsed).toBeLessThan(10_000); + }, 15_000); + + it('does not start indexing or file watchers before handshake', async () => { + // With DEBUG on, the server logs "[DEBUG] Server ready" inside oninitialized. + // Since no client ever connects, that log must never appear. + // Use os.tmpdir() so path validation passes before the handshake timer runs. + const result = await spawnServer( + [os.tmpdir()], + { + CODEBASE_CONTEXT_HANDSHAKE_TIMEOUT_MS: '2000', + CODEBASE_CONTEXT_DEBUG: '1' + } + ); + + expect(result.code).toBe(1); + // "[DEBUG] Server ready" is printed inside oninitialized — should NOT appear + // because no client ever sends `initialize`. + expect(result.stderr).not.toContain('[DEBUG] Server ready'); + }, 15_000); + + it('respects custom timeout via environment variable', async () => { + const start = Date.now(); + const result = await spawnServer( + [], + { CODEBASE_CONTEXT_HANDSHAKE_TIMEOUT_MS: '1000' } + ); + const elapsed = Date.now() - start; + + expect(result.code).toBe(1); + // Should exit around 1 second, definitely under 5 + expect(elapsed).toBeGreaterThan(800); + expect(elapsed).toBeLessThan(5_000); + }, 10_000); +});