Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 19 additions & 6 deletions docs/agents.md
Original file line number Diff line number Diff line change
Expand Up @@ -196,15 +196,28 @@ export default {

The agent uses the default model unless overridden. The report file is always written to `output/reports/`; there is no opt-out for the file itself, but `enabled: false` disables the agent so nothing runs.

## Captain Agent *(coming soon)*
## Captain Agent

**Purpose:** Orchestrates the whole testing session.
**Purpose:** Supervises explicit user requests and non-standard recovery situations.

**Modes:**
- `idle` - plan management, project inspection, knowledge and experience file work. Available even before a page is loaded.
- `web` - page interaction, navigation, browser diagnostics, visual/context checks.
- `test` - test timeline inspection, state inspection, generated code/log analysis.
- `heal` - browser and test recovery when an active test loses its page or browser context.

**What it does:**
- Coordinates all agents intelligently
- Responds to user commands in real-time
- Adjusts strategy based on discoveries
- Manages conversation context efficiently
- Handles direct TUI requests that need more judgment than a slash command
- Explains current Explorbot configuration and suggests focused setup improvements
- Reads recent output artifacts before answering questions about previous sessions
- Inspects active tests, failed steps, page states, and Pilot analysis
- Recovers closed/crashed pages during test execution and tells Tester how to continue
- Can reload, recover, restart the browser, open a fresh tab, or close extra tabs when needed

**When Captain runs:**
- On explicit user requests in the TUI
- During test interrupts where the user asks to stop, pass, skip, or redirect execution
- During fatal browser execution errors, where it first tries recovery before stopping the test

## Per-Agent Model Configuration

Expand Down
47 changes: 40 additions & 7 deletions src/action.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@ import { htmlCombinedSnapshot, minifyHtml } from './utils/html.js';
import { createDebug, setStepSpanParent, tag } from './utils/logger.js';
import { safeFilename } from './utils/strings.ts';
import { throttle } from './utils/throttle.ts';
import { isFatalBrowserError } from './utils/browser-errors.ts';

const debugLog = createDebug('explorbot:action');
const FATAL_BROWSER_ERRORS = /Frame was detached|Target closed|Execution context was destroyed|Protocol error|Session closed/i;

class Action {
private actor: CodeceptJS.I;
Expand Down Expand Up @@ -78,21 +78,26 @@ class Action {
const page = this.playwrightHelper.page;
const frame = this.playwrightHelper.frame;
await page?.waitForLoadState('domcontentloaded', { timeout: 10000 })?.catch(() => {});
const grabAll = () => Promise.all([(this.actor as any).grabSource(), (this.actor as any).grabTitle(), this.captureBrowserLogs()]);
await waitForUsablePageDom(page);
const grabAll = () => Promise.all([captureHtml(page, frame, this.actor), captureTitle(page, this.actor), this.captureBrowserLogs()]);
const [html, title, browserLogs] = await grabAll().catch(async (err: Error) => {
const msg = err instanceof Error ? err.message : String(err);
if (!/navigating and changing the content/i.test(msg)) throw err;
await page?.waitForLoadState('domcontentloaded', { timeout: 10000 })?.catch(() => {});
await waitForUsablePageDom(page);
return grabAll();
});
const url = page?.url() || (await (this.actor as any).grabCurrentUrl?.());

let screenshotFile: string | undefined = undefined;
const statesDir = outputPath('states');
fs.mkdirSync(statesDir, { recursive: true });

if (includeScreenshot) {
const filename = safeFilename(`${stateHash}_${timestamp}`, '.png');
screenshotFile = await (this.actor as any)
.saveScreenshot(filename)
const screenshotPath = join(statesDir, filename);
screenshotFile = await page
?.screenshot({ path: screenshotPath, fullPage: true })
.then(() => filename)
.catch((err: Error) => {
debugLog('Screenshot failed, continuing without it:', err);
Expand All @@ -101,8 +106,6 @@ class Action {
}

// Save HTML to file
const statesDir = outputPath('states');
fs.mkdirSync(statesDir, { recursive: true });
const htmlFile = safeFilename(`${stateHash}_${timestamp}`, '.html');
const htmlPath = join(statesDir, htmlFile);
fs.writeFileSync(htmlPath, html, 'utf8');
Expand Down Expand Up @@ -158,7 +161,7 @@ class Action {
return result;
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
if (FATAL_BROWSER_ERRORS.test(msg)) throw err;
if (isFatalBrowserError(err)) throw err;
debugLog('capturePageState failed with non-fatal error:', msg);
const url = this.playwrightHelper.page?.url?.() || '';
return new ActionResult({ url, error: msg });
Expand Down Expand Up @@ -375,6 +378,7 @@ class Action {
return true;
} catch (error) {
this.lastError = error as Error;
if (isFatalBrowserError(error)) throw error;
debugLog(`Attempt failed: ${codeBlock}: ${errorToString(error) || this.lastError?.toString()}`);
return false;
}
Expand Down Expand Up @@ -406,6 +410,35 @@ function errorToString(error: any): string {
return error.message || error.toString();
}

async function waitForUsablePageDom(page: any): Promise<void> {
if (!page?.waitForFunction) return;

await page
.waitForFunction(
() => {
const body = document.body;
if (!body) return false;
return body.children.length > 0 || body.textContent?.trim().length > 0;
},
undefined,
{ timeout: 5000 }
)
.catch(() => {});
}

async function captureHtml(page: any, frame: any, actor: any): Promise<string> {
if (frame?.content) return frame.content();
if (page?.content) return page.content();
if (actor?.grabSource) return actor.grabSource();
throw new Error('Playwright page is unavailable for HTML capture');
}

async function captureTitle(page: any, actor: any): Promise<string> {
if (page?.title) return page.title();
if (actor?.grabTitle) return actor.grabTitle();
return '';
}

function sanitizeCodeBlock(code: string): string {
return code
.split('\n')
Expand Down
114 changes: 94 additions & 20 deletions src/ai/captain.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ const MAX_STEPS = 15;
const CaptainBase = WithTestMode(WithWebMode(WithIdleMode(TaskAgent as unknown as new (...args: any[]) => TaskAgent)));

export class Captain extends CaptainBase implements Agent {
protected readonly ACTION_TOOLS = ['click', 'pressKey', 'form', 'navigate'];
emoji = '🧑‍✈️';
private explorBot: ExplorBot;
private conversation: Conversation | null = null;
Expand Down Expand Up @@ -72,6 +71,12 @@ export class Captain extends CaptainBase implements Agent {

protected trackToolExecutions(toolExecutions: any[]): void {
super.trackToolExecutions(toolExecutions);
if (toolExecutions.length > 0) {
this.recentToolCalls.push(...toolExecutions);
if (this.recentToolCalls.length > 20) {
this.recentToolCalls = this.recentToolCalls.slice(-20);
}
}
for (const exec of toolExecutions) {
const label = toolExecutionLabel(exec.input);
if (!label) continue;
Expand All @@ -80,14 +85,19 @@ export class Captain extends CaptainBase implements Agent {
}
}

private detectMode(): CaptainMode {
if (this.explorBot.getExplorer().activeTest) return 'test';
if (this.explorBot.getExplorer().getStateManager().getCurrentState()) return 'web';
getMode(): CaptainMode {
const explorer = this.explorBot.getExplorer();
const activeTest = explorer.activeTest;
const page = explorer.playwrightHelper?.page;

if (activeTest && (!page || page.isClosed?.())) return 'heal';
if (activeTest) return 'test';
if (explorer.getStateManager().getCurrentState()) return 'web';
return 'idle';
}

private systemPrompt(): string {
const mode = this.detectMode();
const mode = this.getMode();
const currentUrl = this.explorBot.getExplorer().getStateManager().getCurrentState()?.url;
const customPrompt = this.explorBot.getProvider().getSystemPromptForAgent('captain', currentUrl);

Expand All @@ -101,18 +111,21 @@ export class Captain extends CaptainBase implements Agent {
- idle: plan management, file operations, knowledge. Always available.
- web: page interaction, navigation, browser diagnostics. When working with a web page.
- test: test analysis, state inspection. When a test is running or analyzing results.
- heal: browser/test recovery. When a test is running and browser state is broken or unavailable.
</modes>

${this.idleModePrompt()}
${mode === 'web' ? this.webModePrompt() : ''}
${mode === 'test' ? this.testModePrompt() : ''}
${mode === 'web' || mode === 'heal' ? this.webModePrompt() : ''}
${mode === 'test' || mode === 'heal' ? this.testModePrompt() : ''}

<rules>
- After a successful action, if the pageDiff confirms the goal, call done() immediately — do not verify with see() or context() unless the user explicitly asked for verification
- Prefer completing in fewer tool calls over thoroughness
- NEVER run tests unless the user explicitly asks
${mode === 'web' ? this.webModeRules() : ''}
${mode === 'test' ? this.testModeRules() : ''}
- If you are answering with information rather than completing a browser action, include the actual user-facing answer in done({ details }). Do not only say that it was shown or explained.
${mode === 'web' || mode === 'heal' ? this.webModeRules() : ''}
${mode === 'test' || mode === 'heal' ? this.testModeRules() : ''}
${mode === 'heal' ? '- First diagnose browser availability, then recover the browser/page before continuing test analysis.' : ''}
</rules>

${customPrompt || ''}
Expand Down Expand Up @@ -250,9 +263,20 @@ export class Captain extends CaptainBase implements Agent {
description: 'Call when the user request is fulfilled.',
inputSchema: z.object({
summary: z.string().describe('What was done'),
details: z.string().optional().describe('Actual user-facing content. Required when the user asked to show, display, explain, summarize, compare, or diagnose information.'),
}),
execute: async ({ summary }) => {
execute: async ({ summary, details }) => {
debugLog('done', summary);
if (!details?.trim() && !this.canCompleteWithoutDetails()) {
return {
success: false,
message: 'No user-facing result was provided. Call done() again with the actual answer in details, or complete a browser action first.',
};
}
if (details?.trim()) {
tag('details').log(details);
task.addNote(details);
}
task.addNote(summary);
onDone(summary);
return { success: true, summary };
Expand All @@ -261,6 +285,9 @@ export class Captain extends CaptainBase implements Agent {
runCommand: tool({
description: dedent`
Execute a TUI command. Returns log output from command execution.
Use only when the user explicitly asks to run a slash command.
Never use this to analyze files, reports, logs, plans, generated tests, knowledge, or experience.
Never run a slash command unless the user request itself starts with that slash command.
${this.commandDescriptions
.map((c) => {
const opts = c.options ? ` (${c.options})` : '';
Expand All @@ -274,6 +301,13 @@ export class Captain extends CaptainBase implements Agent {
execute: async ({ command }) => {
if (!this.commandExecutor) return { success: false, message: 'Command executor not available' };
const cmd = command.startsWith('/') ? command : `/${command}`;
if (!isExplicitSlashRequest(task.description, cmd)) {
return {
success: false,
command: cmd,
message: 'Command blocked: slash commands require an explicit matching slash-command request from the user.',
};
}
startLogCapture();
try {
await this.commandExecutor(cmd);
Expand All @@ -286,11 +320,12 @@ export class Captain extends CaptainBase implements Agent {
}

private async tools(task: Task, onDone: (summary: string) => void) {
const mode = this.detectMode();
const mode = this.getMode();
const ctx: ModeContext = { explorBot: this.explorBot, task };
const core = this.coreTools(task, onDone);
const idle = await this.idleModeTools(ctx);

if (mode === 'heal') return { ...core, ...idle, ...this.testModeTools(ctx), ...this.webModeTools(ctx) };
if (mode === 'test') return { ...core, ...idle, ...this.testModeTools(ctx) };
if (mode === 'web') return { ...core, ...idle, ...this.webModeTools(ctx) };
return { ...core, ...idle };
Expand Down Expand Up @@ -365,20 +400,28 @@ export class Captain extends CaptainBase implements Agent {
return result.object;
}

async processExecutionError(error: Error, activeTest: Test): Promise<ExecutionRecoveryAction> {
const explorer = this.explorBot.getExplorer();
const result = await explorer.handleExecutionError(error);
return {
...result,
message: result.recovered ? `${result.message}\nContinue the test "${activeTest.scenario}" from the restored page.` : result.message,
};
}

private canCompleteWithoutDetails(): boolean {
return (this.recentToolCalls || []).some(hasBrowserCompletionEvidence);
}

async handle(input: string, options: { reset?: boolean } = {}): Promise<string | null> {
const stateManager = this.explorBot.getExplorer().getStateManager();
const initialState = stateManager.getCurrentState();

if (!initialState) {
tag('warning').log('No page loaded. Use /navigate or I.amOnPage() first.');
return null;
}

const conversation = options.reset ? this.resetConversation() : this.ensureConversation();
let isDone = false;
let finalSummary: string | null = null;

const startUrl = initialState.url || '';
const startUrl = initialState?.url || '';
const task = new Task(input, startUrl);
const onDone = (summary: string) => {
isDone = true;
Expand Down Expand Up @@ -421,12 +464,14 @@ export class Captain extends CaptainBase implements Agent {
}

const currentState = stateManager.getCurrentState();
if (!currentState) {
if (!currentState && this.getMode() !== 'idle') {
stop();
return;
}

await this.reinjectContextIfNeeded(conversation, currentState);
if (currentState) {
await this.reinjectContextIfNeeded(conversation, currentState);
}

if (userInput) {
const newContext = await this.getPageContext();
Expand Down Expand Up @@ -463,7 +508,7 @@ export class Captain extends CaptainBase implements Agent {

if (result?.toolExecutions?.length) {
const lastExec = result.toolExecutions[result.toolExecutions.length - 1];
if (lastExec.wasSuccessful && this.ACTION_TOOLS.includes(lastExec.toolName)) {
if (hasBrowserCompletionEvidence(lastExec)) {
conversation.addUserText('Action succeeded. If the goal is achieved, call done() now with a brief summary.');
}
}
Expand Down Expand Up @@ -500,3 +545,32 @@ interface SupervisorAction {
action: 'inject' | 'stop' | 'pass' | 'skip';
message: string;
}

interface ExecutionRecoveryAction {
action: 'continue' | 'stop';
message: string;
recovered?: boolean;
}

function isExplicitSlashRequest(input: string, command: string): boolean {
const requested = slashCommandToken(input);
const actual = slashCommandToken(command);
if (!requested || !actual) return false;
return requested === actual;
}

function slashCommandToken(value: string): string | null {
const trimmed = value.trim();
if (!trimmed.startsWith('/')) return null;

for (let i = 1; i < trimmed.length; i++) {
if (trimmed[i] <= ' ') return trimmed.slice(0, i);
}
return trimmed;
}

function hasBrowserCompletionEvidence(execution: any): boolean {
if (!execution?.wasSuccessful) return false;
const output = execution.output || {};
return Boolean(output.pageDiff || output.code || output.playwrightGroupId);
}
Loading
Loading