diff --git a/apps/cli/src/background-commands.test.ts b/apps/cli/src/background-commands.test.ts new file mode 100644 index 0000000..60f4e56 --- /dev/null +++ b/apps/cli/src/background-commands.test.ts @@ -0,0 +1,111 @@ +// Tests for the background-task slash commands: /tasks and /background. +// Both drive a session-scoped TaskManager (ctx.tasks). Here it's a real +// TaskManager wired to a stub runner — no sub-agent actually runs, so the tests +// stay fast and deterministic while exercising create / list / get / output. + +import { describe, expect, it } from 'vitest'; +import { SessionManager, TaskManager, type TaskRunHandle } from '@deepcode/core'; +import { CommandRegistry, type SessionContext } from './commands.js'; + +const reg = new CommandRegistry(); + +/** A TaskManager whose runner immediately resolves with a fixed result string. */ +function stubManager(result = 'done'): TaskManager { + return new TaskManager( + () => ({ done: Promise.resolve(result), abort: () => {} }) as TaskRunHandle, + ); +} + +function ctx(overrides: Partial = {}): SessionContext { + return { + cwd: '/tmp/x', + model: 'deepseek-chat', + mode: 'default', + effort: 'medium', + settings: {}, + creds: { apiKey: 'sk-test' }, + sessionId: 's1', + sessions: new SessionManager({ root: '/tmp/x' }), + usage: { inputTokens: 0, outputTokens: 0, reasoningTokens: 0, cacheReadTokens: 0 }, + ...overrides, + }; +} + +describe('/background', () => { + it('creates a task and reports its id', async () => { + const tasks = stubManager(); + const out = ( + await reg.match('/background')!.cmd.run(['fix', 'the', 'flaky', 'test'], ctx({ tasks })) + ).join('\n'); + expect(out).toMatch(/Started background task task-/); + const list = tasks.list(); + expect(list).toHaveLength(1); + expect(list[0]!.description).toBe('fix the flaky test'); + }); + + it('the `/bg` alias works', async () => { + const tasks = stubManager(); + await reg.match('/bg')!.cmd.run(['do', 'thing'], ctx({ tasks })); + expect(tasks.list()).toHaveLength(1); + }); + + it('shows usage when given no prompt', async () => { + const out = (await reg.match('/background')!.cmd.run([], ctx({ tasks: stubManager() }))).join( + '\n', + ); + expect(out).toMatch(/Usage: \/background/); + }); + + it('is unavailable without a task manager', async () => { + const out = (await reg.match('/background')!.cmd.run(['x'], ctx())).join('\n'); + expect(out).toMatch(/unavailable/i); + }); + + it('reports a runner failure instead of throwing', async () => { + const tasks = new TaskManager(() => { + throw new Error('no runner attached'); + }); + const out = (await reg.match('/background')!.cmd.run(['x'], ctx({ tasks }))).join('\n'); + expect(out).toMatch(/Could not start background task: no runner attached/); + }); +}); + +describe('/tasks', () => { + it('reports an empty list', async () => { + const out = (await reg.match('/tasks')!.cmd.run([], ctx({ tasks: stubManager() }))).join('\n'); + expect(out).toMatch(/No background tasks yet/); + }); + + it('lists started tasks with id, status, and description', async () => { + const tasks = stubManager(); + tasks.create({ description: 'task one', prompt: 'p1' }); + tasks.create({ description: 'task two', prompt: 'p2' }); + const out = (await reg.match('/tasks')!.cmd.run([], ctx({ tasks }))).join('\n'); + expect(out).toMatch(/Background tasks \(2\)/); + expect(out).toContain('task one'); + expect(out).toContain('task two'); + expect(out).toMatch(/\[(running|completed)\]/); + }); + + it('`/tasks ` shows a single task’s status and output', async () => { + const tasks = stubManager('the background result'); + const t = tasks.create({ description: 'investigate', prompt: 'look into x' }); + await tasks.wait(t.id); // let the stub runner settle → completed + output + const out = (await reg.match('/tasks')!.cmd.run([t.id], ctx({ tasks }))).join('\n'); + expect(out).toContain(t.id); + expect(out).toMatch(/\[completed\]/); + expect(out).toContain('the background result'); + }); + + it('`/tasks ` reports no such task', async () => { + const out = ( + await reg.match('/tasks')!.cmd.run(['task-nope'], ctx({ tasks: stubManager() })) + ).join('\n'); + expect(out).toMatch(/No task "task-nope"/); + }); + + it('is unavailable without a task manager', async () => { + const out = (await reg.match('/tasks')!.cmd.run([], ctx())).join('\n'); + expect(out).toMatch(/unavailable/i); + }); +}); diff --git a/apps/cli/src/commands.ts b/apps/cli/src/commands.ts index 1515b37..af8a5e1 100644 --- a/apps/cli/src/commands.ts +++ b/apps/cli/src/commands.ts @@ -10,6 +10,7 @@ import type { SessionManager, SessionMeta, StoredMessage, + TaskManager, } from '@deepcode/core'; import { contextWindowFor, @@ -163,6 +164,10 @@ export interface SessionContext { provider?: Provider; /** Set by /rewind to request history replacement. REPL applies after run. */ newHistory?: StoredMessage[]; + /** Session-scoped background-task manager (REPL-injected) — backs /tasks and + * /background. Same instance the agent loop uses, so tasks the agent starts + * are visible here and vice-versa. */ + tasks?: TaskManager; } export interface SlashCommand { @@ -1134,6 +1139,58 @@ export const BtwCommand: SlashCommand = { }, }; +export const TasksCommand: SlashCommand = { + name: '/tasks', + description: 'List background tasks this session, or `/tasks ` to show one’s output.', + run(args, ctx) { + if (!ctx.tasks) return ['(Background tasks are unavailable here.)']; + // `/tasks ` → show that task's status + output so far. + if (args[0]) { + const id = args[0].trim(); + const task = ctx.tasks.get(id); + if (!task) return [`No task "${id}". Run /tasks to list them.`]; + const out = (task.output || '').trim(); + return [ + `${task.id} [${task.status}] ${task.description}`, + ` created ${task.createdAt}${task.finishedAt ? ` · finished ${task.finishedAt}` : ''}`, + '', + out || `(no output yet — task is ${task.status})`, + ]; + } + const tasks = ctx.tasks.list(); + if (tasks.length === 0) { + return ['No background tasks yet.', 'Start one with `/background `.']; + } + const lines = [`Background tasks (${tasks.length}):`]; + for (const t of tasks) lines.push(` ${t.id} [${t.status}] ${t.description}`); + lines.push(''); + lines.push('Show one with `/tasks `; cancel via the agent’s TaskStop tool.'); + return lines; + }, +}; + +export const BackgroundCommand: SlashCommand = { + name: '/background', + aliases: ['/bg'], + description: 'Run a prompt as a background sub-agent while you keep working.', + run(args, ctx) { + if (!ctx.tasks) return ['(Background tasks are unavailable here.)']; + const prompt = args.join(' ').trim(); + if (!prompt) { + return ['Usage: /background — runs as a background sub-agent.']; + } + try { + const task = ctx.tasks.create({ description: prompt.slice(0, 60), prompt }); + return [ + `Started background task ${task.id}: “${task.description}”.`, + 'It runs while you keep chatting. Check it with `/tasks` (or `/tasks ' + task.id + '`).', + ]; + } catch (err) { + return [`Could not start background task: ${(err as Error).message}`]; + } + }, +}; + export const BUILTIN_COMMANDS: SlashCommand[] = [ HelpCommand, ClearCommand, @@ -1170,6 +1227,8 @@ export const BUILTIN_COMMANDS: SlashCommand[] = [ UpgradeCommand, PrivacySettingsCommand, BtwCommand, + TasksCommand, + BackgroundCommand, ]; // ────────────────────────────────────────────────────────────────────────── diff --git a/apps/cli/src/repl.ts b/apps/cli/src/repl.ts index 65548eb..3e49860 100644 --- a/apps/cli/src/repl.ts +++ b/apps/cli/src/repl.ts @@ -9,6 +9,7 @@ import { HookDispatcher, ReadTool, SessionManager, + TaskManager, ToolRegistry, WebFetchTool, WriteTool, @@ -456,6 +457,38 @@ export async function startRepl(opts: ReplOpts): Promise { history, }; + // Session-scoped background-task manager (M3.15.3 / parity: /tasks, /background). + // ONE manager for the whole REPL session so tasks persist across turns and are + // visible to both the agent (via TaskCreate) and slash commands. Each turn's + // runAgent attaches a richer runner (named sub-agents + SubagentStop). This + // baseline runner only handles `/background` started before the first turn: + // it runs the prompt as a depth-1 sub-agent (clean context, no nested tasks), + // reading ctx.model/ctx.mode live so /model and /mode switches are honored. + const tasks = new TaskManager((spec) => { + const ac = new AbortController(); + const done = runAgent({ + provider, + tools, + systemPrompt, + userMessage: spec.prompt, + model: ctx.model, + maxTokens, + temperature, + cwd: ctx.cwd, + signal: ac.signal, + mode: ctx.mode as Mode, + permissions: settings.permissions, + hooks, + pluginDirs: pluginContrib.dirs, + sandboxConfig: settings.sandbox, + autoMode: settings.autoMode, + subAgentDepth: 1, + systemReminders: false, + }).then((r) => assistantText(r.history)); + return { done, abort: () => ac.abort() }; + }); + ctx.tasks = tasks; + if (!opts.bare) { output.write( `\n ▎ DeepCode · ${ctx.model} · mode: ${ctx.mode} · effort: ${ctx.effort}\n`, @@ -619,6 +652,9 @@ export async function startRepl(opts: ReplOpts): Promise { autoCompact: { contextWindow: contextWindowFor(ctx.model), threshold: 0.8 }, autoMode: settings.autoMode, sandboxConfig: settings.sandbox, + // Session-scoped manager: the agent's TaskCreate calls land here too, so + // background tasks persist across turns and show up in /tasks. + taskManager: tasks, approval: async (toolName, _input, verdict) => { output.write(`\n ⏸ Approve ${toolName}? Reason: ${verdict.reason}\n`); const answer = (await rl.question(' [y]es / [n]o / [a]lways: ')).trim().toLowerCase(); @@ -717,6 +753,17 @@ function formatEvent(out: Writable, e: AgentEvent): void { } } +/** Flatten an agent run's assistant text — the result of a background task. */ +function assistantText(history: StoredMessage[]): string { + return history + .filter((m) => m.role === 'assistant') + .flatMap((m) => m.content) + .filter((b): b is Extract => b.type === 'text') + .map((b) => b.text) + .join('\n') + .trim(); +} + function formatToolInput(input: Record): string { for (const key of ['file_path', 'command', 'pattern', 'path']) { const v = input[key]; diff --git a/docs/BEHAVIOR_PARITY.md b/docs/BEHAVIOR_PARITY.md index 1d11f10..eb5f547 100644 --- a/docs/BEHAVIOR_PARITY.md +++ b/docs/BEHAVIOR_PARITY.md @@ -47,9 +47,9 @@ Legend: `✅` matches · `🟡` matches with caveats · `🔄` deferred · `⚠ | `/voice` | ✓ | ✗ | 🔄 M8 | | `/teleport` | ✓ | ✗ | 🔄 M8 | | `/desktop` | ✓ | ✗ | 🔄 M6 | -| `/background` | ✓ | ✗ | 🔄 (paired with TaskCreate M3.15.3) | -| `/batch` | ✓ | ✗ | 🔄 | -| `/tasks` | ✓ | ✗ | 🔄 | +| `/background` | ✓ | ✓ | ✅ — runs a prompt as a background sub-agent via the session TaskManager (alias `/bg`); agent-started TaskCreate tasks appear too | +| `/batch` | ✓ | ✗ | 🔄 — batch-of-prompts not yet wired (use `/background` per prompt) | +| `/tasks` | ✓ | ✓ | ✅ — lists this session's background tasks; `/tasks ` shows one's status + output | | `/plan` | ✓ | ✗ | 🔄 — set via `/mode plan` in DeepCode | | `/login` / `/logout` | ✓ | ✓ | ✅ — /logout clears creds + exits; /login stores a new key (next launch) | | `/export` | ✓ | ✓ | ✅ — writes the conversation to a markdown file | diff --git a/packages/core/src/agent.ts b/packages/core/src/agent.ts index 324102a..9d81cc6 100644 --- a/packages/core/src/agent.ts +++ b/packages/core/src/agent.ts @@ -4,7 +4,7 @@ import { compact, shouldCompact } from './compaction/index.js'; import type { PermissionRules } from './config/types.js'; import { dispatchToolCall, type DispatchVerdict } from './harness/tool-dispatcher.js'; -import { TaskManager } from './tasks/manager.js'; +import { TaskManager, type TaskRunner } from './tasks/manager.js'; import type { HookDispatcher } from './hooks/index.js'; import type { Mode } from './types.js'; import type { Provider } from './providers/types.js'; @@ -92,6 +92,12 @@ export interface RunAgentOptions { /** Installed-plugin directories — so the Task tool can resolve plugin-bundled * sub-agents (`/agents/*.md`) in addition to user/project ones. */ pluginDirs?: string[]; + /** Optional host-owned background-task manager (e.g. the REPL's session-scoped + * one). When set, this run attaches its sub-agent runner to it and exposes it + * on the tool context, so background tasks persist across runAgent calls and + * are visible to slash commands. When absent, a per-run manager is created + * (the original behavior). Top-level only. */ + taskManager?: TaskManager; } /** Max sub-agent recursion: top-level (0) may spawn sub-agents (depth 1); those @@ -374,7 +380,7 @@ export async function runAgent(opts: RunAgentOptions): Promise { // just that task. A sub-agent (depth ≥ 1) gets no manager → can't spawn tasks. if (depth === 0 && toolCtx.runSubAgent) { const runSub = toolCtx.runSubAgent; - toolCtx.tasks = new TaskManager((spec) => { + const runner: TaskRunner = (spec) => { const ac = new AbortController(); const done = runSub({ prompt: spec.prompt, @@ -382,7 +388,17 @@ export async function runAgent(opts: RunAgentOptions): Promise { signal: ac.signal, }).then((r) => r.text); return { done, abort: () => ac.abort() }; - }); + }; + // Reuse a host-provided manager (e.g. REPL session-scoped) so tasks persist + // across turns and stay visible to slash commands; attach THIS run's runner + // either way (it resolves named sub-agents + fires SubagentStop). Otherwise + // fall back to a per-run manager (the original behavior). + if (opts.taskManager) { + opts.taskManager.setRunner(runner); + toolCtx.tasks = opts.taskManager; + } else { + toolCtx.tasks = new TaskManager(runner); + } } const totalUsage = { inputTokens: 0, outputTokens: 0, reasoningTokens: 0, cacheReadTokens: 0 }; diff --git a/packages/core/src/tasks/manager.test.ts b/packages/core/src/tasks/manager.test.ts index 1ad958e..da82d4f 100644 --- a/packages/core/src/tasks/manager.test.ts +++ b/packages/core/src/tasks/manager.test.ts @@ -71,6 +71,23 @@ describe('TaskManager', () => { expect(mgr.get(t.id)?.status).toBe('completed'); }); + it('setRunner re-targets the runner for subsequent create() calls', async () => { + const calls: string[] = []; + const mgr = new TaskManager((spec) => { + calls.push(`A:${spec.prompt}`); + return { done: Promise.resolve('a'), abort: () => {} }; + }); + mgr.create({ description: 'one', prompt: 'p1' }); + mgr.setRunner((spec) => { + calls.push(`B:${spec.prompt}`); + return { done: Promise.resolve('b'), abort: () => {} }; + }); + mgr.create({ description: 'two', prompt: 'p2' }); + expect(calls).toEqual(['A:p1', 'B:p2']); + // Both tasks remain tracked — setRunner doesn't disturb existing records. + expect(mgr.list()).toHaveLength(2); + }); + it('list / get / update / unknown-id behaviour', async () => { const mgr = new TaskManager(() => ({ done: Promise.resolve('r'), abort: () => {} })); const t = mgr.create({ description: 'orig', prompt: 'p' }); diff --git a/packages/core/src/tasks/manager.ts b/packages/core/src/tasks/manager.ts index dd82728..88f4459 100644 --- a/packages/core/src/tasks/manager.ts +++ b/packages/core/src/tasks/manager.ts @@ -42,7 +42,17 @@ export class TaskManager { private readonly handles = new Map(); private seq = 0; - constructor(private readonly runner: TaskRunner) {} + constructor(private runner: TaskRunner) {} + + /** + * Replace the runner used for subsequent `create()` calls. Lets a host own a + * long-lived (e.g. REPL session-scoped) manager while the agent loop attaches + * its run-local sub-agent runner each turn. Tasks already started are + * unaffected — their handle is captured at `create()` time. + */ + setRunner(runner: TaskRunner): void { + this.runner = runner; + } private newId(): string { return `task-${(this.seq++).toString(36)}`;