diff --git a/docs/agents.md b/docs/agents.md index a3793e8..957a91f 100644 --- a/docs/agents.md +++ b/docs/agents.md @@ -196,15 +196,28 @@ export default { The agent uses the default model unless overridden. The report file is always written to `output/reports/`; there is no opt-out for the file itself, but `enabled: false` disables the agent so nothing runs. -## Captain Agent *(coming soon)* +## Captain Agent -**Purpose:** Orchestrates the whole testing session. +**Purpose:** Supervises explicit user requests and non-standard recovery situations. + +**Modes:** +- `idle` - plan management, project inspection, knowledge and experience file work. Available even before a page is loaded. +- `web` - page interaction, navigation, browser diagnostics, visual/context checks. +- `test` - test timeline inspection, state inspection, generated code/log analysis. +- `heal` - browser and test recovery when an active test loses its page or browser context. **What it does:** -- Coordinates all agents intelligently -- Responds to user commands in real-time -- Adjusts strategy based on discoveries -- Manages conversation context efficiently +- Handles direct TUI requests that need more judgment than a slash command +- Explains current Explorbot configuration and suggests focused setup improvements +- Reads recent output artifacts before answering questions about previous sessions +- Inspects active tests, failed steps, page states, and Pilot analysis +- Recovers closed/crashed pages during test execution and tells Tester how to continue +- Can reload, recover, restart the browser, open a fresh tab, or close extra tabs when needed + +**When Captain runs:** +- On explicit user requests in the TUI +- During test interrupts where the user asks to stop, pass, skip, or redirect execution +- During fatal browser execution errors, where it first tries recovery before stopping the test ## Per-Agent Model Configuration diff --git a/src/action.ts b/src/action.ts index 0d1bbe2..09d35b6 100644 --- a/src/action.ts +++ b/src/action.ts @@ -23,9 +23,9 @@ import { htmlCombinedSnapshot, minifyHtml } from './utils/html.js'; import { createDebug, setStepSpanParent, tag } from './utils/logger.js'; import { safeFilename } from './utils/strings.ts'; import { throttle } from './utils/throttle.ts'; +import { isFatalBrowserError } from './utils/browser-errors.ts'; const debugLog = createDebug('explorbot:action'); -const FATAL_BROWSER_ERRORS = /Frame was detached|Target closed|Execution context was destroyed|Protocol error|Session closed/i; class Action { private actor: CodeceptJS.I; @@ -78,21 +78,26 @@ class Action { const page = this.playwrightHelper.page; const frame = this.playwrightHelper.frame; await page?.waitForLoadState('domcontentloaded', { timeout: 10000 })?.catch(() => {}); - const grabAll = () => Promise.all([(this.actor as any).grabSource(), (this.actor as any).grabTitle(), this.captureBrowserLogs()]); + await waitForUsablePageDom(page); + const grabAll = () => Promise.all([captureHtml(page, frame, this.actor), captureTitle(page, this.actor), this.captureBrowserLogs()]); const [html, title, browserLogs] = await grabAll().catch(async (err: Error) => { const msg = err instanceof Error ? err.message : String(err); if (!/navigating and changing the content/i.test(msg)) throw err; await page?.waitForLoadState('domcontentloaded', { timeout: 10000 })?.catch(() => {}); + await waitForUsablePageDom(page); return grabAll(); }); const url = page?.url() || (await (this.actor as any).grabCurrentUrl?.()); let screenshotFile: string | undefined = undefined; + const statesDir = outputPath('states'); + fs.mkdirSync(statesDir, { recursive: true }); if (includeScreenshot) { const filename = safeFilename(`${stateHash}_${timestamp}`, '.png'); - screenshotFile = await (this.actor as any) - .saveScreenshot(filename) + const screenshotPath = join(statesDir, filename); + screenshotFile = await page + ?.screenshot({ path: screenshotPath, fullPage: true }) .then(() => filename) .catch((err: Error) => { debugLog('Screenshot failed, continuing without it:', err); @@ -101,8 +106,6 @@ class Action { } // Save HTML to file - const statesDir = outputPath('states'); - fs.mkdirSync(statesDir, { recursive: true }); const htmlFile = safeFilename(`${stateHash}_${timestamp}`, '.html'); const htmlPath = join(statesDir, htmlFile); fs.writeFileSync(htmlPath, html, 'utf8'); @@ -158,7 +161,7 @@ class Action { return result; } catch (err) { const msg = err instanceof Error ? err.message : String(err); - if (FATAL_BROWSER_ERRORS.test(msg)) throw err; + if (isFatalBrowserError(err)) throw err; debugLog('capturePageState failed with non-fatal error:', msg); const url = this.playwrightHelper.page?.url?.() || ''; return new ActionResult({ url, error: msg }); @@ -375,6 +378,7 @@ class Action { return true; } catch (error) { this.lastError = error as Error; + if (isFatalBrowserError(error)) throw error; debugLog(`Attempt failed: ${codeBlock}: ${errorToString(error) || this.lastError?.toString()}`); return false; } @@ -406,6 +410,35 @@ function errorToString(error: any): string { return error.message || error.toString(); } +async function waitForUsablePageDom(page: any): Promise { + if (!page?.waitForFunction) return; + + await page + .waitForFunction( + () => { + const body = document.body; + if (!body) return false; + return body.children.length > 0 || body.textContent?.trim().length > 0; + }, + undefined, + { timeout: 5000 } + ) + .catch(() => {}); +} + +async function captureHtml(page: any, frame: any, actor: any): Promise { + if (frame?.content) return frame.content(); + if (page?.content) return page.content(); + if (actor?.grabSource) return actor.grabSource(); + throw new Error('Playwright page is unavailable for HTML capture'); +} + +async function captureTitle(page: any, actor: any): Promise { + if (page?.title) return page.title(); + if (actor?.grabTitle) return actor.grabTitle(); + return ''; +} + function sanitizeCodeBlock(code: string): string { return code .split('\n') diff --git a/src/ai/captain.ts b/src/ai/captain.ts index ddb5548..bb5756b 100644 --- a/src/ai/captain.ts +++ b/src/ai/captain.ts @@ -26,7 +26,6 @@ const MAX_STEPS = 15; const CaptainBase = WithTestMode(WithWebMode(WithIdleMode(TaskAgent as unknown as new (...args: any[]) => TaskAgent))); export class Captain extends CaptainBase implements Agent { - protected readonly ACTION_TOOLS = ['click', 'pressKey', 'form', 'navigate']; emoji = '🧑‍✈️'; private explorBot: ExplorBot; private conversation: Conversation | null = null; @@ -72,6 +71,12 @@ export class Captain extends CaptainBase implements Agent { protected trackToolExecutions(toolExecutions: any[]): void { super.trackToolExecutions(toolExecutions); + if (toolExecutions.length > 0) { + this.recentToolCalls.push(...toolExecutions); + if (this.recentToolCalls.length > 20) { + this.recentToolCalls = this.recentToolCalls.slice(-20); + } + } for (const exec of toolExecutions) { const label = toolExecutionLabel(exec.input); if (!label) continue; @@ -80,14 +85,19 @@ export class Captain extends CaptainBase implements Agent { } } - private detectMode(): CaptainMode { - if (this.explorBot.getExplorer().activeTest) return 'test'; - if (this.explorBot.getExplorer().getStateManager().getCurrentState()) return 'web'; + getMode(): CaptainMode { + const explorer = this.explorBot.getExplorer(); + const activeTest = explorer.activeTest; + const page = explorer.playwrightHelper?.page; + + if (activeTest && (!page || page.isClosed?.())) return 'heal'; + if (activeTest) return 'test'; + if (explorer.getStateManager().getCurrentState()) return 'web'; return 'idle'; } private systemPrompt(): string { - const mode = this.detectMode(); + const mode = this.getMode(); const currentUrl = this.explorBot.getExplorer().getStateManager().getCurrentState()?.url; const customPrompt = this.explorBot.getProvider().getSystemPromptForAgent('captain', currentUrl); @@ -101,18 +111,21 @@ export class Captain extends CaptainBase implements Agent { - idle: plan management, file operations, knowledge. Always available. - web: page interaction, navigation, browser diagnostics. When working with a web page. - test: test analysis, state inspection. When a test is running or analyzing results. + - heal: browser/test recovery. When a test is running and browser state is broken or unavailable. ${this.idleModePrompt()} - ${mode === 'web' ? this.webModePrompt() : ''} - ${mode === 'test' ? this.testModePrompt() : ''} + ${mode === 'web' || mode === 'heal' ? this.webModePrompt() : ''} + ${mode === 'test' || mode === 'heal' ? this.testModePrompt() : ''} - After a successful action, if the pageDiff confirms the goal, call done() immediately — do not verify with see() or context() unless the user explicitly asked for verification - Prefer completing in fewer tool calls over thoroughness - NEVER run tests unless the user explicitly asks - ${mode === 'web' ? this.webModeRules() : ''} - ${mode === 'test' ? this.testModeRules() : ''} + - If you are answering with information rather than completing a browser action, include the actual user-facing answer in done({ details }). Do not only say that it was shown or explained. + ${mode === 'web' || mode === 'heal' ? this.webModeRules() : ''} + ${mode === 'test' || mode === 'heal' ? this.testModeRules() : ''} + ${mode === 'heal' ? '- First diagnose browser availability, then recover the browser/page before continuing test analysis.' : ''} ${customPrompt || ''} @@ -250,9 +263,20 @@ export class Captain extends CaptainBase implements Agent { description: 'Call when the user request is fulfilled.', inputSchema: z.object({ summary: z.string().describe('What was done'), + details: z.string().optional().describe('Actual user-facing content. Required when the user asked to show, display, explain, summarize, compare, or diagnose information.'), }), - execute: async ({ summary }) => { + execute: async ({ summary, details }) => { debugLog('done', summary); + if (!details?.trim() && !this.canCompleteWithoutDetails()) { + return { + success: false, + message: 'No user-facing result was provided. Call done() again with the actual answer in details, or complete a browser action first.', + }; + } + if (details?.trim()) { + tag('details').log(details); + task.addNote(details); + } task.addNote(summary); onDone(summary); return { success: true, summary }; @@ -261,6 +285,9 @@ export class Captain extends CaptainBase implements Agent { runCommand: tool({ description: dedent` Execute a TUI command. Returns log output from command execution. + Use only when the user explicitly asks to run a slash command. + Never use this to analyze files, reports, logs, plans, generated tests, knowledge, or experience. + Never run a slash command unless the user request itself starts with that slash command. ${this.commandDescriptions .map((c) => { const opts = c.options ? ` (${c.options})` : ''; @@ -274,6 +301,13 @@ export class Captain extends CaptainBase implements Agent { execute: async ({ command }) => { if (!this.commandExecutor) return { success: false, message: 'Command executor not available' }; const cmd = command.startsWith('/') ? command : `/${command}`; + if (!isExplicitSlashRequest(task.description, cmd)) { + return { + success: false, + command: cmd, + message: 'Command blocked: slash commands require an explicit matching slash-command request from the user.', + }; + } startLogCapture(); try { await this.commandExecutor(cmd); @@ -286,11 +320,12 @@ export class Captain extends CaptainBase implements Agent { } private async tools(task: Task, onDone: (summary: string) => void) { - const mode = this.detectMode(); + const mode = this.getMode(); const ctx: ModeContext = { explorBot: this.explorBot, task }; const core = this.coreTools(task, onDone); const idle = await this.idleModeTools(ctx); + if (mode === 'heal') return { ...core, ...idle, ...this.testModeTools(ctx), ...this.webModeTools(ctx) }; if (mode === 'test') return { ...core, ...idle, ...this.testModeTools(ctx) }; if (mode === 'web') return { ...core, ...idle, ...this.webModeTools(ctx) }; return { ...core, ...idle }; @@ -365,20 +400,28 @@ export class Captain extends CaptainBase implements Agent { return result.object; } + async processExecutionError(error: Error, activeTest: Test): Promise { + const explorer = this.explorBot.getExplorer(); + const result = await explorer.handleExecutionError(error); + return { + ...result, + message: result.recovered ? `${result.message}\nContinue the test "${activeTest.scenario}" from the restored page.` : result.message, + }; + } + + private canCompleteWithoutDetails(): boolean { + return (this.recentToolCalls || []).some(hasBrowserCompletionEvidence); + } + async handle(input: string, options: { reset?: boolean } = {}): Promise { const stateManager = this.explorBot.getExplorer().getStateManager(); const initialState = stateManager.getCurrentState(); - if (!initialState) { - tag('warning').log('No page loaded. Use /navigate or I.amOnPage() first.'); - return null; - } - const conversation = options.reset ? this.resetConversation() : this.ensureConversation(); let isDone = false; let finalSummary: string | null = null; - const startUrl = initialState.url || ''; + const startUrl = initialState?.url || ''; const task = new Task(input, startUrl); const onDone = (summary: string) => { isDone = true; @@ -421,12 +464,14 @@ export class Captain extends CaptainBase implements Agent { } const currentState = stateManager.getCurrentState(); - if (!currentState) { + if (!currentState && this.getMode() !== 'idle') { stop(); return; } - await this.reinjectContextIfNeeded(conversation, currentState); + if (currentState) { + await this.reinjectContextIfNeeded(conversation, currentState); + } if (userInput) { const newContext = await this.getPageContext(); @@ -463,7 +508,7 @@ export class Captain extends CaptainBase implements Agent { if (result?.toolExecutions?.length) { const lastExec = result.toolExecutions[result.toolExecutions.length - 1]; - if (lastExec.wasSuccessful && this.ACTION_TOOLS.includes(lastExec.toolName)) { + if (hasBrowserCompletionEvidence(lastExec)) { conversation.addUserText('Action succeeded. If the goal is achieved, call done() now with a brief summary.'); } } @@ -500,3 +545,32 @@ interface SupervisorAction { action: 'inject' | 'stop' | 'pass' | 'skip'; message: string; } + +interface ExecutionRecoveryAction { + action: 'continue' | 'stop'; + message: string; + recovered?: boolean; +} + +function isExplicitSlashRequest(input: string, command: string): boolean { + const requested = slashCommandToken(input); + const actual = slashCommandToken(command); + if (!requested || !actual) return false; + return requested === actual; +} + +function slashCommandToken(value: string): string | null { + const trimmed = value.trim(); + if (!trimmed.startsWith('/')) return null; + + for (let i = 1; i < trimmed.length; i++) { + if (trimmed[i] <= ' ') return trimmed.slice(0, i); + } + return trimmed; +} + +function hasBrowserCompletionEvidence(execution: any): boolean { + if (!execution?.wasSuccessful) return false; + const output = execution.output || {}; + return Boolean(output.pageDiff || output.code || output.playwrightGroupId); +} diff --git a/src/ai/captain/file-tools.ts b/src/ai/captain/file-tools.ts new file mode 100644 index 0000000..89f6a8e --- /dev/null +++ b/src/ai/captain/file-tools.ts @@ -0,0 +1,126 @@ +import { existsSync, readFileSync, readdirSync, statSync } from 'node:fs'; +import { basename, isAbsolute, join, relative, resolve } from 'node:path'; + +export const CAPTAIN_ARTIFACT_DIRS = ['reports', 'plans', 'tests', 'states'] as const; +export const CAPTAIN_ALLOWED_READ_DIRS = ['output', 'knowledge', 'experience'] as const; +export const CAPTAIN_ARTIFACT_SCAN_LIMIT = 200; +export const CAPTAIN_ARTIFACT_LIST_LIMIT = 20; +export const CAPTAIN_READ_FILE_DEFAULT_LIMIT = 12000; +export const CAPTAIN_READ_FILE_MAX_LIMIT = 50000; +export const CAPTAIN_READ_FILE_MIN_LIMIT = 1000; + +export function listRecentArtifacts(outputDir: string): Array<{ path: string; size: number; modifiedAt: string }> { + const artifacts: Array<{ path: string; size: number; modifiedAt: string; timestamp: number }> = []; + + for (const dir of CAPTAIN_ARTIFACT_DIRS) { + if (artifacts.length >= CAPTAIN_ARTIFACT_SCAN_LIMIT) break; + const targetDir = join(outputDir, dir); + if (!existsSync(targetDir)) continue; + collectArtifacts(outputDir, targetDir, artifacts); + } + + return artifacts + .sort((a, b) => b.timestamp - a.timestamp) + .slice(0, CAPTAIN_ARTIFACT_LIST_LIMIT) + .map(({ timestamp, ...artifact }) => artifact); +} + +export function readCaptainFile(projectRoot: string | null, input: ReadCaptainFileInput, allowedDirs: readonly string[] = CAPTAIN_ALLOWED_READ_DIRS): ReadCaptainFileResult { + const resolved = resolveReadableFile(projectRoot, input.path, allowedDirs); + if (!resolved) { + return { success: false, message: 'File is outside allowed directories' }; + } + if (!existsSync(resolved)) { + return { success: false, message: `File not found: ${input.path}` }; + } + if (!statSync(resolved).isFile()) { + return { success: false, message: `Not a file: ${input.path}` }; + } + + const maxChars = normalizeMaxChars(input.maxChars); + const fullContent = readFileSync(resolved, 'utf8'); + const content = selectContent(fullContent, input); + return { + success: true, + path: relative(projectRoot || process.cwd(), resolved), + truncated: content.length > maxChars, + content: content.slice(0, maxChars), + }; +} + +function collectArtifacts(outputDir: string, targetDir: string, artifacts: Array<{ path: string; size: number; modifiedAt: string; timestamp: number }>): void { + for (const entry of readdirSync(targetDir, { withFileTypes: true })) { + if (artifacts.length >= CAPTAIN_ARTIFACT_SCAN_LIMIT) return; + const entryPath = join(targetDir, entry.name); + if (entry.isDirectory()) { + collectArtifacts(outputDir, entryPath, artifacts); + continue; + } + + const stats = statSync(entryPath); + artifacts.push({ + path: relative(outputDir, entryPath), + size: stats.size, + modifiedAt: stats.mtime.toISOString(), + timestamp: stats.mtimeMs, + }); + } +} + +function resolveReadableFile(projectRoot: string | null, requestedPath: string, allowedDirs: readonly string[]): string | null { + if (!projectRoot) return null; + + let cleanPath = requestedPath.trim(); + const projectName = basename(projectRoot); + if (cleanPath.startsWith(`${projectName}/`) || cleanPath.startsWith(`${projectName}\\`)) { + cleanPath = cleanPath.slice(projectName.length + 1); + } + + const resolved = isAbsolute(cleanPath) ? resolve(cleanPath) : resolve(projectRoot, cleanPath); + const allowedRoots = allowedDirs.map((dir) => resolve(projectRoot, dir)); + for (const root of allowedRoots) { + const rel = relative(root, resolved); + if (!rel || (!rel.startsWith('..') && !isAbsolute(rel))) return resolved; + } + + return null; +} + +function selectContent(content: string, input: ReadCaptainFileInput): string { + if (!input.startLine && !input.endLine) return content; + + const lines = content.split(/\r?\n/); + const startIndex = resolveLineIndex(input.startLine, lines.length, 1); + const endIndex = resolveLineIndex(input.endLine, lines.length, lines.length); + if (endIndex < startIndex) return ''; + return lines.slice(startIndex - 1, endIndex).join('\n'); +} + +function resolveLineIndex(line: number | undefined, totalLines: number, fallback: number): number { + if (!line) return fallback; + if (line < 0) return Math.max(1, totalLines + line + 1); + return Math.min(Math.max(1, line), totalLines); +} + +function normalizeMaxChars(maxChars?: number): number { + return Math.max(CAPTAIN_READ_FILE_MIN_LIMIT, Math.min(maxChars || CAPTAIN_READ_FILE_DEFAULT_LIMIT, CAPTAIN_READ_FILE_MAX_LIMIT)); +} + +export interface ReadCaptainFileInput { + path: string; + startLine?: number; + endLine?: number; + maxChars?: number; +} + +export type ReadCaptainFileResult = + | { + success: true; + path: string; + truncated: boolean; + content: string; + } + | { + success: false; + message: string; + }; diff --git a/src/ai/captain/idle-mode.ts b/src/ai/captain/idle-mode.ts index 215ca99..a49abb2 100644 --- a/src/ai/captain/idle-mode.ts +++ b/src/ai/captain/idle-mode.ts @@ -4,6 +4,7 @@ import dedent from 'dedent'; import { z } from 'zod'; import { ConfigParser } from '../../config.ts'; import { Test } from '../../test-plan.ts'; +import { listRecentArtifacts, readCaptainFile } from './file-tools.ts'; import { type Constructor, type ModeContext, resolveProjectRoot } from './mixin.ts'; let cachedBashTool: Awaited> | null = null; @@ -15,6 +16,8 @@ export function WithIdleMode(Base: T) { const config = ConfigParser.getInstance().getConfig(); const knowledgeDir = config.dirs?.knowledge || 'knowledge'; const experienceDir = config.dirs?.experience || 'experience'; + const outputDir = config.dirs?.output || 'output'; + const readableDirs = [outputDir, knowledgeDir, experienceDir]; if (!cachedBashTool && projectRoot) { cachedBashTool = await createBashTool({ @@ -75,6 +78,55 @@ export function WithIdleMode(Base: T) { return { success: true, tests: plan.tests.length }; }, }), + project: tool({ + description: dedent` + Inspect Explorbot project configuration and recent generated artifacts. + Use this before answering questions about setup, previous sessions, reports, saved plans, or output files. + `, + inputSchema: z.object({ + view: z.enum(['config', 'artifacts']).optional().describe('config shows setup summary; artifacts lists recent generated files'), + }), + execute: async ({ view }) => { + const parser = ConfigParser.getInstance(); + const config = parser.getConfig(); + const outputDir = parser.getOutputDir(); + + if (view === 'artifacts') { + return { + success: true, + outputDir, + artifacts: listRecentArtifacts(outputDir), + suggestion: 'Use readFile to inspect specific reports, plans, logs, generated tests, knowledge, or experience files.', + }; + } + + return { + success: true, + configPath: parser.getConfigPath(), + baseUrl: config.playwright?.url, + browser: config.playwright?.browser, + headed: config.playwright?.show === true, + dirs: config.dirs, + agents: Object.fromEntries(Object.entries(config.ai?.agents || {}).map(([name, agentConfig]: [string, any]) => [name, { enabled: agentConfig?.enabled !== false, hasModelOverride: !!agentConfig?.model }])), + reporterEnabled: config.reporter?.enabled === true, + apiEnabled: !!config.api, + }; + }, + }), + readFile: tool({ + description: dedent` + Read a specific Explorbot project file for analysis. + Use this for explicit user questions about reports, plans, logs, generated tests, knowledge, or experience files. + Prefer this over bash() for reading file contents after bash has found the file. + `, + inputSchema: z.object({ + path: z.string().describe('Path inside output, knowledge, or experience directories'), + startLine: z.number().optional().describe('First line to read, 1-based. Negative values count from the end of the file'), + endLine: z.number().optional().describe('Last line to read, 1-based and inclusive. Negative values count from the end of the file'), + maxChars: z.number().optional().describe('Maximum characters to return, default 12000'), + }), + execute: async (input) => readCaptainFile(projectRoot, input, readableDirs), + }), }; if (cachedBashTool) { @@ -88,18 +140,32 @@ export function WithIdleMode(Base: T) { const config = ConfigParser.getInstance().getConfig(); const knowledgeDir = config.dirs?.knowledge || 'knowledge'; const experienceDir = config.dirs?.experience || 'experience'; + const outputDir = config.dirs?.output || 'output'; return dedent` - Plan management: updatePlan() — replace or append tests in the current plan - - bash() — run shell commands for file operations - - READ from: ${knowledgeDir}/, ${experienceDir}/, output/ - - WRITE to: ${knowledgeDir}/, ${experienceDir}/ only (NOT output/) - - Use ls to list files, cat to read small files - - Use head/tail for large files to avoid excessive output - - Use grep to search file contents + - readFile() — read specific report, plan, log, generated test, knowledge, or experience file content + - bash() — discover files and inspect file metadata + - READ from: ${knowledgeDir}/, ${experienceDir}/, ${outputDir}/ + - WRITE to: ${knowledgeDir}/, ${experienceDir}/ only (NOT ${outputDir}/) + - Use wc -l -c file.txt to inspect size + - Use file file.txt to inspect type + - Use find . -name "*.md" to discover files + - Use grep -n "keyword" file.txt to find matching lines + - Use ls -lh to list files + + Use bash() for file discovery and search. Once the needed file and line range are known, + use readFile() to read its contents. Do not use bash() to print file contents. + + + + Use project({ view: "config" }) before explaining Explorbot setup or suggesting config improvements. + Use project({ view: "artifacts" }) before answering questions about previous sessions, reports, plans, generated tests, or logs. + + When user shares credentials, selectors, or important domain info during conversation, suggest saving it to a knowledge file using bash tool. diff --git a/src/ai/captain/mixin.ts b/src/ai/captain/mixin.ts index a86afa9..342c12f 100644 --- a/src/ai/captain/mixin.ts +++ b/src/ai/captain/mixin.ts @@ -8,7 +8,7 @@ export type Constructor = new (...args: any[]) => T; export const debugLog = createDebug('explorbot:captain'); -export type CaptainMode = 'idle' | 'web' | 'test'; +export type CaptainMode = 'idle' | 'web' | 'test' | 'heal'; export interface ModeContext { explorBot: ExplorBot; diff --git a/src/ai/captain/web-mode.ts b/src/ai/captain/web-mode.ts index c8f6b0b..e71dc65 100644 --- a/src/ai/captain/web-mode.ts +++ b/src/ai/captain/web-mode.ts @@ -47,18 +47,53 @@ export function WithWebMode(Base: T) { description: dedent` Direct browser access via Playwright. Use for diagnostics and browser management. Actions: + - status: Inspect browser/page availability, URL, title, tab count - evaluate: Run JavaScript in browser context (localStorage, cookies, DOM, console) - closeTabs: Close all browser tabs except the current one - - screenshot: Take a screenshot of current page - reload: Reload the current page + - screenshot: Take a screenshot of current page + - recover: Recover from a closed/crashed page using Explorer recovery + - restart: Restart the browser when page/context recovery is not enough + - openFreshTab: Open a fresh tab in the current browser context `, inputSchema: z.object({ - action: z.enum(['evaluate', 'closeTabs', 'screenshot', 'reload']).describe('Browser action to perform'), + action: z.enum(['status', 'evaluate', 'closeTabs', 'reload', 'screenshot', 'recover', 'restart', 'openFreshTab']).describe('Browser action to perform'), code: z.string().optional().describe('JavaScript code for evaluate action'), }), execute: async ({ action, code }) => { - const page = ctx.explorBot.getExplorer().playwrightHelper?.page; - if (!page) return { success: false, message: 'No browser page available' }; + const explorer = ctx.explorBot.getExplorer(); + + if (action === 'status') { + const page = explorer.playwrightHelper?.page; + const pages = page?.context?.().pages?.() || []; + return { + success: true, + hasPage: !!page, + isClosed: page?.isClosed?.() || false, + url: page && !page.isClosed?.() ? await page.url() : null, + title: page && !page.isClosed?.() ? await page.title().catch(() => null) : null, + tabs: pages.length, + }; + } + + if (action === 'recover') { + const recovered = await explorer.recoverFromBrowserError(); + return { success: recovered, message: recovered ? 'Browser page recovered' : 'Browser recovery failed' }; + } + + if (action === 'restart') { + const restarted = await explorer.restartBrowser(); + return { success: restarted, message: restarted ? 'Browser restarted' : 'Browser restart failed' }; + } + + if (action === 'openFreshTab') { + await ctx.explorBot.openFreshTab(); + const state = explorer.getStateManager().getCurrentState(); + return { success: true, url: state?.url, title: state?.title }; + } + + const page = explorer.playwrightHelper?.page; + if (!page || page.isClosed?.()) return { success: false, message: 'No browser page available. Try browser({ action: "recover" }) first.' }; if (action === 'evaluate') { if (!code) return { success: false, message: 'Code required for evaluate action' }; @@ -112,7 +147,7 @@ export function WithWebMode(Base: T) { - Page actions: click, pressKey, form (CodeceptJS tools) - Navigation: navigate() — AI-powered navigation to URLs or page descriptions - - Browser diagnostics: browser() — evaluate JS, close tabs, screenshot, reload + - Browser diagnostics: browser() — inspect status, evaluate JS, close tabs, screenshot, reload, recover closed/crashed pages, restart browser, open a fresh tab - Visual analysis: see() — screenshot-based page verification - Context refresh: context() — get fresh HTML/ARIA snapshot - Visual fallback: visualClick() — coordinate-based click when locators fail diff --git a/src/ai/historian/screencast.ts b/src/ai/historian/screencast.ts index eda1d87..1384345 100644 --- a/src/ai/historian/screencast.ts +++ b/src/ai/historian/screencast.ts @@ -10,6 +10,8 @@ import { relativeToCwd } from '../../utils/next-steps.ts'; import { safeFilename } from '../../utils/strings.ts'; import { type Constructor, debugLog } from './mixin.ts'; +const FATAL_SCREENCAST_STOP_ERRORS = /Target page, context or browser has been closed|Target closed|Session closed|Protocol error/i; + export interface ScreencastMethods { attachScreencast(): void; isScreencastActive(): boolean; @@ -113,17 +115,24 @@ export function WithScreencast(Base: T) { if (!this.screencastActive) return; const path = this.screencastPath; const task = this.screencastTask; + let stopped = false; try { await this.screencastPage.screencast.stop(); + stopped = true; } catch (err) { - tag('operation').log(`Screencast stop failed: ${(err as Error).message}`); + const message = (err as Error).message; + if (FATAL_SCREENCAST_STOP_ERRORS.test(message)) { + tag('operation').log('Screencast skipped: browser was closed before recording could be finalized'); + } else { + tag('operation').log(`Screencast stop failed: ${message}`); + } } this.screencastActive = false; this.screencastPage = null; this.screencastPath = null; this.screencastTask = null; this.screencastLastChapter = null; - if (path) { + if (path && stopped) { this.savedFiles.add(path); task?.addArtifact?.(path); tag('operation').log(`Saved screencast: ${relativeToCwd(path)}`); diff --git a/src/ai/navigator.ts b/src/ai/navigator.ts index ba78398..35cba2f 100644 --- a/src/ai/navigator.ts +++ b/src/ai/navigator.ts @@ -136,6 +136,10 @@ class Navigator implements Agent { } async visit(url: string): Promise { + return this.explorer.runWithBrowserRecovery('navigator.visit', () => this.visitOnce(url)); + } + + private async visitOnce(url: string): Promise { try { const action = this.explorer.createAction(); @@ -170,7 +174,7 @@ class Navigator implements Agent { throw new Error(`Navigation to ${url} failed: ${action.lastError?.message}`); } } - await action.caputrePageWithScreenshot(); + await this.explorer.capturePageWithScreenshot(); await this.hooksRunner.runAfterHook('navigator', url); } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); @@ -382,7 +386,7 @@ class Navigator implements Agent { // URL did not transition to expectedUrl within timeout } } - const freshState = await action.capturePageState(); + const freshState = await this.explorer.capturePageState(); const currentUrl = /^https?:\/\//i.test(expectedUrl) ? freshState.fullUrl || freshState.url || '' : freshState.url || ''; const urlMatches = this.isSameExpectedOrigin(expectedUrl, action.stateManager) && normalizeUrl(currentUrl) === normalizeUrl(expectedUrl); const stateChanged = freshState.getStateHash() !== actionResult.getStateHash(); diff --git a/src/ai/pilot.ts b/src/ai/pilot.ts index e92f4be..ac0fb3b 100644 --- a/src/ai/pilot.ts +++ b/src/ai/pilot.ts @@ -67,7 +67,7 @@ export class Pilot implements Agent { } async reviewCompletion(task: Test, currentState: ActionResult, testerConversation: Conversation, navigator?: Navigator): Promise { - const verdictType = task.hasAchievedAny() ? 'finish' : 'stop'; + const verdictType = this.hasCompletionEvidence(task, currentState, testerConversation) ? 'finish' : 'stop'; return this.reviewDecision(verdictType, task, currentState, testerConversation, navigator); } @@ -86,14 +86,14 @@ export class Pilot implements Agent { const sessionLog = this.formatSessionLog(testerConversation); const stateContext = this.buildStateContext(currentState); + const successfulAssertions = this.formatSuccessfulAssertions(currentState, testerConversation); const notes = task.notesToString() || 'No notes recorded.'; let visualAnalysis = ''; let screenshotState: ActionResult | null = null; if (this.provider.hasVision()) { try { - const action = this.explorer.createAction(); - screenshotState = await action.caputrePageWithScreenshot(); + screenshotState = await this.explorer.capturePageWithScreenshot(); if (screenshotState.screenshot) { visualAnalysis = (await this.researcher.answerQuestionAboutScreenshot(screenshotState, `Describe current page state relevant to: ${task.scenario}`)) || ''; } @@ -125,6 +125,10 @@ export class Pilot implements Agent { ${this.formatExpectations(task)} + + ${successfulAssertions || 'None'} + + ${notes} @@ -658,8 +662,7 @@ export class Pilot implements Agent { private async checkDataAvailability(task: Test, requestedData: string, fishermanReason: string | undefined): Promise { if (!this.provider.hasVision()) return null; - const action = this.explorer.createAction(); - const screenshotState = await action.caputrePageWithScreenshot().catch(() => null); + const screenshotState = await this.explorer.capturePageWithScreenshot().catch(() => null); if (!screenshotState?.screenshot) return null; const question = dedent` @@ -882,6 +885,32 @@ export class Pilot implements Agent { return parts.join('\n\n'); } + private hasCompletionEvidence(task: Test, currentState: ActionResult, testerConversation: Conversation): boolean { + if (task.hasAchievedAny()) return true; + return this.hasSuccessfulCheckEvidence(currentState, testerConversation); + } + + private hasSuccessfulCheckEvidence(currentState: ActionResult, testerConversation: Conversation): boolean { + if (Object.values(currentState.verifications ?? {}).some(Boolean)) return true; + return testerConversation.getToolExecutions().some((t) => CHECK_TOOLS.includes(t.toolName) && t.wasSuccessful); + } + + private formatSuccessfulAssertions(currentState: ActionResult, testerConversation: Conversation): string { + const lines: string[] = []; + for (const [assertion, passed] of Object.entries(currentState.verifications ?? {})) { + if (passed) lines.push(`PASS state verification: ${assertion}`); + } + + for (const exec of testerConversation.getToolExecutions()) { + if (!CHECK_TOOLS.includes(exec.toolName) || !exec.wasSuccessful) continue; + const description = exec.input?.assertion || exec.input?.request || truncateJson(exec.input); + const result = exec.output?.message || exec.output?.analysis || exec.output?.result; + lines.push(`PASS ${exec.toolName}: ${description}${result ? ` -> ${result}` : ''}`); + } + + return [...new Set(lines)].join('\n'); + } + private formatActions(toolCalls: any[]): string { return toolCalls .map((t) => { diff --git a/src/ai/researcher.ts b/src/ai/researcher.ts index 1db107e..6df455d 100644 --- a/src/ai/researcher.ts +++ b/src/ai/researcher.ts @@ -130,7 +130,7 @@ export class Researcher extends ResearcherBase implements Agent { const annotatedElements = await this.explorer.annotateElements(); debugLog(`Annotated ${annotatedElements.length} interactive elements with eidx`); - this.actionResult = await this.explorer.createAction().capturePageState({ includeScreenshot: screenshot && this.provider.hasVision() }); + this.actionResult = await this.explorer.capturePageState({ includeScreenshot: screenshot && this.provider.hasVision() }); const condition = detectPageCondition(this.actionResult!); if (condition === 'error') { @@ -239,7 +239,7 @@ export class Researcher extends ResearcherBase implements Agent { // Must run BEFORE visuallyAnnotateContainers — annotation overlays inject z-index 99998+ which would pollute the scoring. if (!interrupted() && this.hasScreenshotToAnalyze) { const sections = parseResearchSections(result.text); - const focused = await detectFocusedSection(this.explorer.playwrightHelper.page, sections); + const focused = await this.explorer.runWithBrowserRecovery('detectFocusedSection', () => detectFocusedSection(this.explorer.playwrightHelper.page, sections)); if (focused) markSectionAsFocused(result, focused); } @@ -252,7 +252,7 @@ export class Researcher extends ResearcherBase implements Agent { const freshBroken = freshContainerLocs.filter((l) => l.valid === false).map((l) => l.locator); const containers = validContainers.filter((c) => !freshBroken.includes(c.css)); await this.visuallyAnnotateElements({ containers }); - this.actionResult = await this.explorer.createAction().caputrePageWithScreenshot(); + this.actionResult = await this.explorer.capturePageWithScreenshot(); const visualResult = await this.analyzeScreenshotForVisualProps(); if (visualResult.elements.size > 0) { await this.mergeVisualData(result, visualResult.elements); @@ -331,7 +331,7 @@ export class Researcher extends ResearcherBase implements Agent { if (!this.actionResult) { debugLog('No action result, navigating to URL'); await this.explorer.visit(url); - this.actionResult = await this.explorer.createAction().capturePageState({ includeScreenshot: screenshot }); + this.actionResult = await this.explorer.capturePageState({ includeScreenshot: screenshot }); return; } @@ -341,7 +341,7 @@ export class Researcher extends ResearcherBase implements Agent { if (!isEmpty && isOnCurrentState) { if ((!this.actionResult.screenshot && screenshot) || !this.actionResult.ariaSnapshot) { - this.actionResult = await this.explorer.createAction().capturePageState({ includeScreenshot: screenshot }); + this.actionResult = await this.explorer.capturePageState({ includeScreenshot: screenshot }); } return; } @@ -349,6 +349,8 @@ export class Researcher extends ResearcherBase implements Agent { if (isEmpty && isOnCurrentState) { debugLog('HTML body empty on current URL, waiting for content'); tag('step').log('Page body is empty, waiting for content...'); + await this.explorer.visit(url); + this.actionResult = await this.explorer.capturePageState({ includeScreenshot: screenshot ?? false }); await this.waitUntilSettled(screenshot ?? false); return; } @@ -357,22 +359,21 @@ export class Researcher extends ResearcherBase implements Agent { tag('step').log('Navigating to URL...'); await this.explorer.visit(url); - this.actionResult = await this.explorer.createAction().capturePageState({ includeScreenshot: screenshot ?? false }); + this.actionResult = await this.explorer.capturePageState({ includeScreenshot: screenshot ?? false }); } private async waitUntilSettled(screenshot: boolean): Promise { const errorPageTimeout = (this.explorer.getConfig().ai?.agents?.researcher as any)?.errorPageTimeout ?? 10; if (errorPageTimeout <= 0) return false; - const page = this.explorer.playwrightHelper.page; const includeScreenshot = screenshot && this.provider.hasVision(); try { - await page?.waitForLoadState('networkidle', { timeout: errorPageTimeout * 1000 }); + await this.explorer.runWithBrowserRecovery('waitUntilSettled', () => this.explorer.playwrightHelper.page?.waitForLoadState('networkidle', { timeout: errorPageTimeout * 1000 })); } catch {} await this.explorer.annotateElements(); - this.actionResult = await this.explorer.createAction().capturePageState({ includeScreenshot }); + this.actionResult = await this.explorer.capturePageState({ includeScreenshot }); let condition = detectPageCondition(this.actionResult!); if (condition === 'error') { @@ -383,7 +384,7 @@ export class Researcher extends ResearcherBase implements Agent { for (let i = 0; i < 3; i++) { await new Promise((r) => setTimeout(r, 1000)); await this.explorer.annotateElements(); - this.actionResult = await this.explorer.createAction().capturePageState({ includeScreenshot }); + this.actionResult = await this.explorer.capturePageState({ includeScreenshot }); condition = detectPageCondition(this.actionResult!); if (condition === 'error') { throw new ErrorPageError(this.actionResult!.url, this.actionResult!.title); @@ -762,17 +763,15 @@ export class Researcher extends ResearcherBase implements Agent { } async navigateTo(url: string): Promise { - const action = this.explorer.createAction(); - await action.execute(`I.amOnPage("${url}")`); + await this.explorer.visit(url); } async cancelInUi() { const beforeAria = this.stateManager.getCurrentState()?.ariaSnapshot || null; - const action = this.explorer.createAction(); - await action.execute('I.clickXY(0, 0)'); + await this.explorer.executeAction('I.clickXY(0, 0)'); if (diffAriaSnapshots(beforeAria, this.stateManager.getCurrentState()?.ariaSnapshot || null)) return; - await action.execute(`I.pressKey('Escape')`); + await this.explorer.executeAction(`I.pressKey('Escape')`); } } diff --git a/src/ai/researcher/coordinates.ts b/src/ai/researcher/coordinates.ts index 9a45f04..3893fca 100644 --- a/src/ai/researcher/coordinates.ts +++ b/src/ai/researcher/coordinates.ts @@ -81,7 +81,7 @@ export function WithCoordinates(Base: T) { } async visuallyAnnotateElements(opts?: { containers?: Array<{ css: string; label: string }> }): Promise { - return visuallyAnnotateContainers(this.explorer.playwrightHelper.page, opts?.containers || []); + return this.explorer.visuallyAnnotateElements(opts); } private async _analyzeScreenshotForVisualProps(): Promise { @@ -193,7 +193,6 @@ export function WithCoordinates(Base: T) { } async backfillCoordinates(result: ResearchResult): Promise { - const page = this.explorer.playwrightHelper.page; const sections = parseResearchSections(result.text); const eidxWithoutCoords: string[] = []; for (const section of sections) { @@ -203,7 +202,7 @@ export function WithCoordinates(Base: T) { } if (eidxWithoutCoords.length === 0) return; - const webElements = await WebElement.fromEidxList(page, eidxWithoutCoords); + const webElements = await this.explorer.runWithBrowserRecovery('backfillCoordinates', () => WebElement.fromEidxList(this.explorer.playwrightHelper.page, eidxWithoutCoords)); if (webElements.length === 0) return; const rectMap = new Map(webElements.map((w) => [w.eidx!, w])); diff --git a/src/ai/researcher/deep-analysis.ts b/src/ai/researcher/deep-analysis.ts index 1f7296b..cebe84b 100644 --- a/src/ai/researcher/deep-analysis.ts +++ b/src/ai/researcher/deep-analysis.ts @@ -279,11 +279,10 @@ export function WithDeepAnalysis(Base: T) { const isCoordinateClick = el.commands[0].startsWith('I.clickXY('); if (!isCoordinateClick) { const hoverCmd = el.commands[0].replace('I.click(', 'I.moveCursorTo('); - const hoverAction = this.explorer.createAction(); - await hoverAction.attempt(hoverCmd, undefined, false); + await this.explorer.attemptAction(hoverCmd, undefined, false); await new Promise((r) => setTimeout(r, 500)); - await this.explorer.createAction().capturePageState(); + await this.explorer.capturePageState(); const hoverAR = ActionResult.fromState(this.stateManager.getCurrentState()!); const hoverDiff = await hoverAR.diff(previousState); await hoverDiff.calculate(); @@ -303,9 +302,8 @@ export function WithDeepAnalysis(Base: T) { } let clickCode: string | null = null; - const action = this.explorer.createAction(); for (const cmd of el.commands) { - if (await action.attempt(cmd, undefined, false)) { + if (await this.explorer.attemptAction(cmd, undefined, false)) { clickCode = cmd; break; } @@ -319,7 +317,7 @@ export function WithDeepAnalysis(Base: T) { let diff: Diff; try { - await this.explorer.createAction().capturePageState(); + await this.explorer.capturePageState(); const currAR = ActionResult.fromState(this.stateManager.getCurrentState()!); diff = await currAR.diff(previousState); await diff.calculate(); @@ -361,7 +359,7 @@ export function WithDeepAnalysis(Base: T) { private async _restorePageState(url: string, originalAria: string): Promise { try { await (this as any).cancelInUi(); - await this.explorer.createAction().capturePageState(); + await this.explorer.capturePageState(); const currentAria = this.stateManager.getCurrentState()?.ariaSnapshot || ''; if (!diffAriaSnapshots(originalAria, currentAria)) return; } catch (err) { diff --git a/src/ai/researcher/locators.ts b/src/ai/researcher/locators.ts index 1c1fae6..62fa733 100644 --- a/src/ai/researcher/locators.ts +++ b/src/ai/researcher/locators.ts @@ -194,8 +194,7 @@ export function WithLocators(Base: T) { } if (needsXpath.length > 0) { - const page = this.explorer.playwrightHelper.page; - const webElements = await WebElement.fromEidxList(page, needsXpath); + const webElements = await this.explorer.runWithBrowserRecovery('backfillBrokenLocators', () => WebElement.fromEidxList(this.explorer.playwrightHelper.page, needsXpath)); const changedSections = new Set<(typeof sections)[0]>(); for (const w of webElements) { const entry = needsXpathEls.get(w.eidx!); diff --git a/src/ai/task-agent.ts b/src/ai/task-agent.ts index 69c3090..4474e71 100644 --- a/src/ai/task-agent.ts +++ b/src/ai/task-agent.ts @@ -24,7 +24,7 @@ export abstract class TaskAgent { protected consecutiveFailures = 0; protected consecutiveEmptyResults = 0; protected recentToolCalls: any[] = []; - protected abstract readonly ACTION_TOOLS: string[]; + protected readonly ACTION_TOOLS: string[] = []; private _historian: Historian | null = null; private _quartermaster: Quartermaster | null = null; diff --git a/src/ai/tester.ts b/src/ai/tester.ts index 4b63990..a02ed09 100644 --- a/src/ai/tester.ts +++ b/src/ai/tester.ts @@ -136,17 +136,6 @@ export class Tester extends TaskAgent implements Agent { task.addNote(`Network error: ${r.method} ${r.path} → ${r.status}`, TestResult.FAILED); }); - const page = this.explorer.playwrightHelper?.page; - const onPageError = (err: Error) => { - task.addNote(`Console error: ${err.message}`, TestResult.FAILED); - }; - const onConsoleMessage = (msg: any) => { - if (msg.type() !== 'error') return; - task.addNote(`Console error: ${msg.text()}`, TestResult.FAILED); - }; - page?.on('pageerror', onPageError); - page?.on('console', onConsoleMessage); - const initialState = ActionResult.fromState(state); const conversation = this.provider.startConversation(this.getSystemMessage(), 'tester'); @@ -176,20 +165,18 @@ export class Tester extends TaskAgent implements Agent { expected: task.expected, }, }, - async () => this.runTestSession(task, initialState, conversation, { offFailedRequest, page, onPageError, onConsoleMessage }) + async () => this.runTestSession(task, initialState, conversation, { offFailedRequest }) ); } - private async runTestSession(task: Test, initialState: ActionResult, conversation: Conversation, handlers: { offFailedRequest?: () => void; page: any; onPageError: (err: Error) => void; onConsoleMessage: (msg: any) => void }): Promise<{ success: boolean }> { - const { offFailedRequest, page, onPageError, onConsoleMessage } = handlers; + private async runTestSession(task: Test, initialState: ActionResult, conversation: Conversation, handlers: TestSessionHandlers): Promise<{ success: boolean }> { + const { offFailedRequest } = handlers; if (this.pilot) { try { const plan = await this.pilot.planTest(task, initialState); if (task.hasFinished) { offFailedRequest?.(); - page?.off('pageerror', onPageError); - page?.off('console', onConsoleMessage); return { success: task.isSuccessful }; } if (plan) { @@ -201,19 +188,29 @@ export class Tester extends TaskAgent implements Agent { task.addNote(`Planning failed: ${message}`, TestResult.FAILED); task.finish(TestResult.FAILED); offFailedRequest?.(); - page?.off('pageerror', onPageError); - page?.off('console', onConsoleMessage); return { success: false }; } } debugLog('Starting test execution with tools'); - task.start(); - await this.explorer.startTest(task); + if (!(await this.explorer.startTest(task))) { + offFailedRequest?.(); + await this.cleanupStartedTest(task); + return { success: task.isSuccessful }; + } debugLog(`Navigating to ${task.startUrl}`); - await this.explorer.visit(task.startUrl!); + try { + await this.explorer.visit(task.startUrl!); + } catch (error) { + const result = await this.handleLoopError(task, error); + if (result === 'stop') { + offFailedRequest?.(); + await this.cleanupStartedTest(task); + return { success: task.isSuccessful }; + } + } const startState = this.explorer.getStateManager().getCurrentState(); if (startState) task.addUrlNote(startState); @@ -238,6 +235,12 @@ export class Tester extends TaskAgent implements Agent { await loop( async ({ stop, pause, iteration, userInput }) => { debugLog('iteration', iteration); + if (!(await this.explorer.ensurePageAvailable())) { + task.addNote('Browser page is unavailable'); + task.finish(TestResult.FAILED); + stop(); + return; + } const currentState = this.getCurrentState(); const tools = { @@ -385,22 +388,16 @@ export class Tester extends TaskAgent implements Agent { } : undefined, catch: async ({ error, stop }) => { - tag('error').log(`Test execution error: ${error}`); - const message = error instanceof Error ? error.message : String(error); - if (!task.hasFinished) { - task.addNote(`Execution error: ${message}`); - } - if (error instanceof Error && error.name === 'AbortError') { - stop(); - return; - } - conversation.addUserText(`Previous AI call failed: ${message}. Take a different approach on the next step.`); + const result = await this.handleLoopError(task, error); + if (result === 'stop') stop(); }, } ); if (task.hasFinished) break; + if (!(await this.explorer.ensurePageAvailable())) break; + const finalState = this.getCurrentState(); const wantsContinue = await this.pilot!.finalReview(task, finalState, conversation, this.navigator); @@ -429,8 +426,6 @@ export class Tester extends TaskAgent implements Agent { offStateChange(); offFailedRequest?.(); - page?.off('pageerror', onPageError); - page?.off('console', onConsoleMessage); await this.finishTest(task); await this.explorer.stopTest(task, { startUrl: task.startUrl, @@ -1093,4 +1088,40 @@ export class Tester extends TaskAgent implements Agent { }), }; } + + private async handleLoopError(task: Test, error: unknown): Promise<'continue' | 'stop'> { + const message = error instanceof Error ? error.message : String(error); + if (!task.hasFinished) task.addNote(`Execution error: ${message}`); + + const result = await this.explorer.handleExecutionError(error); + tag('info').log(`Browser supervisor: ${result.action} - ${result.message}`); + task.addNote(result.message); + + if (result.action === 'stop') { + task.finish(TestResult.FAILED); + return 'stop'; + } + + if (result.recovered) { + this.resetFailureCount(); + this.previousUrl = null; + this.previousStateHash = null; + } + + this.currentConversation?.addUserText(result.message); + return 'continue'; + } + + private async cleanupStartedTest(task: Test): Promise { + await this.finishTest(task); + await this.explorer.stopTest(task, { + startUrl: task.startUrl, + style: task.style, + sessionName: task.sessionName, + }); + } +} + +interface TestSessionHandlers { + offFailedRequest?: () => void; } diff --git a/src/ai/tools.ts b/src/ai/tools.ts index 961cd49..4fadc26 100644 --- a/src/ai/tools.ts +++ b/src/ai/tools.ts @@ -6,6 +6,7 @@ import type { ExperienceTracker } from '../experience-tracker.ts'; import type Explorer from '../explorer.ts'; import { type Task, TestResult } from '../test-plan.js'; import { extractFocusedElement } from '../utils/aria.ts'; +import { isFatalBrowserError } from '../utils/browser-errors.ts'; import { createDebug, tag } from '../utils/logger.js'; import { pause } from '../utils/loop.js'; import { WebElement } from '../utils/web-element.ts'; @@ -287,6 +288,7 @@ export function createCodeceptJSTools(explorer: Explorer, task: Task) { suggestion: 'Verify the key name is correct. For typing text, use form() tool instead.', }); } catch (error) { + throwIfFatalBrowserError(error); activeNote.commit(TestResult.FAILED); const errorMessage = error instanceof Error ? error.toString() : 'Unknown error occurred'; return failedToolResult('pressKey', `PressKey tool failed: ${errorMessage}`); @@ -405,6 +407,7 @@ export function createCodeceptJSTools(explorer: Explorer, task: Task) { action ); } catch (error) { + throwIfFatalBrowserError(error); activeNote.commit(TestResult.FAILED); const errorMessage = error instanceof Error ? error.toString() : 'Unknown error occurred'; return failedToolResult('form', `Form tool failed: ${errorMessage}`); @@ -444,8 +447,7 @@ export function createSpecialContextTools(explorer: Explorer, context: 'iframe') await explorer.switchToMainFrame(); - const action = explorer.createAction(); - const nextState = await action.capturePageState(); + const nextState = await explorer.capturePageState(); const toolResult = await nextState.toToolResult(previousState, 'I.switchTo()'); return successToolResult('exitIframe', { @@ -454,6 +456,7 @@ export function createSpecialContextTools(explorer: Explorer, context: 'iframe') code: 'I.switchTo()', }); } catch (error) { + throwIfFatalBrowserError(error); const errorMessage = error instanceof Error ? error.toString() : 'Unknown error occurred'; return failedToolResult('exitIframe', `Failed to exit iframe: ${errorMessage}`); } @@ -500,8 +503,7 @@ export function createAgentTools({ } try { - const action = explorer.createAction(); - const actionResult = await action.caputrePageWithScreenshot(); + const actionResult = await explorer.capturePageWithScreenshot(); if (!actionResult.screenshot) { return failedToolResult('see', 'Failed to capture screenshot for analysis'); @@ -519,6 +521,7 @@ export function createAgentTools({ suggestion: 'Visual confirmation is valid evidence for test results. Use record() to note the visual findings.', }); } catch (error) { + throwIfFatalBrowserError(error); const errorMessage = error instanceof Error ? error.toString() : 'Unknown error occurred'; visionDisabled = true; tag('warning').log('⚠️ Vision model is not available. Visual checks are disabled for this session.'); @@ -600,8 +603,7 @@ export function createAgentTools({ }); } - const action = explorer.createAction(); - const actionResult = await action.capturePageState(); + const actionResult = await explorer.capturePageState(); const result = await navigator.verifyState(assertion, actionResult); if (result.verified) { @@ -619,6 +621,7 @@ export function createAgentTools({ suggestion: 'The assertion could not be verified. Check if the condition is actually present on the page or try a different assertion.', }); } catch (error) { + throwIfFatalBrowserError(error); const errorMessage = error instanceof Error ? error.toString() : 'Unknown error occurred'; return failedToolResult('verify', `Verify tool failed: ${errorMessage}`, { error: errorMessage, @@ -674,6 +677,7 @@ export function createAgentTools({ `, }); } catch (error) { + throwIfFatalBrowserError(error); const errorMessage = error instanceof Error ? error.toString() : 'Unknown error occurred'; return failedToolResult('research', `Research tool failed: ${errorMessage}`, { error: errorMessage, @@ -718,6 +722,7 @@ export function createAgentTools({ suggestion: 'The action could not be completed. Try a different instruction or use more specific element descriptions.', }); } catch (error) { + throwIfFatalBrowserError(error); const errorMessage = error instanceof Error ? error.toString() : 'Unknown error occurred'; return failedToolResult('interact', `Interact tool failed: ${errorMessage}`, { error: errorMessage, @@ -756,7 +761,7 @@ export function createAgentTools({ const previousState = ActionResult.fromState(currentState); const action = explorer.createAction(); - const actionResult = await action.caputrePageWithScreenshot(); + const actionResult = await explorer.capturePageWithScreenshot(); if (!actionResult.screenshot) { return failedToolResult('visualClick', 'Failed to capture screenshot for visual analysis'); @@ -797,6 +802,7 @@ export function createAgentTools({ analysis: locationResult, }); } catch (error) { + throwIfFatalBrowserError(error); const errorMessage = error instanceof Error ? error.toString() : 'Unknown error occurred'; visionDisabled = true; tag('warning').log('⚠️ Vision model is not available. Visual clicks are disabled for this session.'); @@ -1018,6 +1024,10 @@ function cap(text: string | undefined | null, max: number): string { return `${text.slice(0, max)}\n[...truncated; ${text.length - max} chars omitted...]`; } +function throwIfFatalBrowserError(error: unknown): void { + if (isFatalBrowserError(error)) throw error; +} + function transformContainsCommand(command: string): string { if (!command.includes(':contains(')) return command; diff --git a/src/components/LogPane.tsx b/src/components/LogPane.tsx index d22fb7b..c9cb091 100644 --- a/src/components/LogPane.tsx +++ b/src/components/LogPane.tsx @@ -121,6 +121,7 @@ const LogPane: React.FC = React.memo(({ verboseMode }) => { case 'step': return { color: 'cyan' as const, dimColor: true }; case 'multiline': + case 'details': return { color: 'gray' as const, dimColor: true }; case 'html': return { color: 'gray' as const }; @@ -143,16 +144,16 @@ const LogPane: React.FC = React.memo(({ verboseMode }) => { } const styles = getLogStyles(log.type); - if (log.type === 'multiline') { + if (log.type === 'multiline' || log.type === 'details') { const cleaned = stripAnsi(dedent(log.content)); const parsed = parseMarkdownToTerminal(cleaned); const lines = parsed.split('\n'); const maxLines = log.maxLines || 16; - const truncated = lines.length > maxLines ? `${lines.slice(0, maxLines).join('\n')}\n... (${lines.length - maxLines} more lines)` : parsed; + const content = log.type === 'details' ? parsed : lines.length > maxLines ? `${lines.slice(0, maxLines).join('\n')}\n... (${lines.length - maxLines} more lines)` : parsed; return ( - {truncated} + {content} ); diff --git a/src/explorer.ts b/src/explorer.ts index d93e045..7838381 100644 --- a/src/explorer.ts +++ b/src/explorer.ts @@ -2,13 +2,14 @@ import { existsSync, mkdirSync } from 'node:fs'; import path, { join } from 'node:path'; // @ts-ignore import * as codeceptjs from 'codeceptjs'; +import dedent from 'dedent'; import stepsListener from 'codeceptjs/lib/listener/steps'; import storeListener from 'codeceptjs/lib/listener/store'; import { createTest } from 'codeceptjs/lib/mocha/test'; +import type { BrowserContextOptions } from 'playwright'; import { ActionResult } from './action-result.ts'; import Action from './action.js'; import { AIProvider } from './ai/provider.js'; -import type { BrowserContextOptions } from 'playwright'; import { visuallyAnnotateContainers } from './ai/researcher/coordinates.ts'; import { RequestStore } from './api/request-store.ts'; import { XhrCapture } from './api/xhr-capture.ts'; @@ -19,10 +20,11 @@ import { KnowledgeTracker } from './knowledge-tracker.js'; import { PlaywrightRecorder } from './playwright-recorder.ts'; import { Reporter } from './reporter.ts'; import { StateManager } from './state-manager.js'; -import { Test } from './test-plan.ts'; +import { Test, TestResult } from './test-plan.ts'; import { ELEMENT_EXTRACTION_CONFIG, getElementDataExtractorSource } from './utils/html.ts'; import { createDebug, log, tag } from './utils/logger.js'; import { WebElement } from './utils/web-element.ts'; +import { BrowserRecoveryError, isFatalBrowserError } from './utils/browser-errors.ts'; declare global { namespace NodeJS { @@ -39,7 +41,6 @@ declare namespace CodeceptJS { } const debugLog = createDebug('explorbot:explorer'); -const FATAL_BROWSER_ERRORS = /Frame was detached|Target closed|Execution context was destroyed|Protocol error|Session closed/i; const RECOVERABLE_NAVIGATION_ERRORS = /net::ERR_ABORTED|page\.screenshot.*Timeout|waiting for fonts to load/i; interface TabInfo { @@ -47,6 +48,12 @@ interface TabInfo { title: string; } +interface BrowserExecutionErrorResult { + action: 'continue' | 'stop'; + message: string; + recovered?: boolean; +} + class Explorer { private aiProvider: AIProvider; playwrightHelper: any; @@ -64,6 +71,10 @@ class Explorer { private xhrCapture: XhrCapture | null = null; private requestStore: RequestStore | null = null; private playwrightRecorder: PlaywrightRecorder = new PlaywrightRecorder(); + private observedTestPages = new Set(); + private testPageErrorHandler: ((error: Error) => void) | null = null; + private testConsoleHandler: ((message: any) => void) | null = null; + private testDialogHandler: ((dialog: any) => void) | null = null; constructor(config: ExplorbotConfig, aiProvider: AIProvider, options?: { show?: boolean; headless?: boolean; incognito?: boolean; session?: string }) { this.config = config; @@ -199,12 +210,14 @@ class Explorer { } } - private setupXhrCapture(): void { + private setupXhrCapture(reuseRequestStore = false): void { const configParser = ConfigParser.getInstance(); const outputDir = configParser.getOutputDir(); - this.requestStore = new RequestStore(outputDir); + if (!reuseRequestStore || !this.requestStore) { + this.requestStore = new RequestStore(outputDir); + } const baseUrl = this.config.playwright.url; - this.xhrCapture = new XhrCapture(this.requestStore, baseUrl); + this.xhrCapture = new XhrCapture(this.requestStore!, baseUrl); this.xhrCapture.attach(this.playwrightHelper.page); } @@ -239,18 +252,7 @@ class Explorer { } await this.connectOrLaunchBrowser(); const hasSession = this.options?.session && existsSync(this.options.session); - const helperOptions = this.playwrightHelper.options || {}; - // CodeceptJS skips _createContextPage when sessions/storageState are involved, so we - // build contextOptions ourselves. Most keys share a name with Playwright's - // BrowserContextOptions and are copied as-is; `emulate` must be flattened, `basicAuth` - // renamed to `httpCredentials`, and `storageState` comes from the --session flag. - const contextOptions: BrowserContextOptions = { - ...helperOptions, - }; - if (helperOptions.emulate) Object.assign(contextOptions, helperOptions.emulate); - if (helperOptions.basicAuth) contextOptions.httpCredentials = helperOptions.basicAuth; - if (hasSession) contextOptions.storageState = this.options!.session; - await this.playwrightHelper._createContextPage(contextOptions); + await this.playwrightHelper._createContextPage(this.createBrowserContextOptions()); await this.playwrightRecorder.start(this.playwrightHelper.browserContext); this.setupXhrCapture(); if (hasSession) { @@ -287,15 +289,75 @@ class Explorer { await this.playwrightHelper._startBrowser(); } + private createBrowserContextOptions(): BrowserContextOptions { + const helperOptions = this.playwrightHelper.options || {}; + const contextOptions: BrowserContextOptions = { + ...helperOptions, + }; + + if (helperOptions.emulate) Object.assign(contextOptions, helperOptions.emulate); + if (helperOptions.basicAuth) contextOptions.httpCredentials = helperOptions.basicAuth; + if (this.options?.session && existsSync(this.options.session)) contextOptions.storageState = this.options.session; + + return contextOptions; + } + createAction() { return new Action(this.actor, this.stateManager, this.playwrightRecorder); } + async runWithBrowserRecovery(label: string, operation: () => Promise): Promise { + if (!(await this.ensurePageAvailable())) { + throw new Error(`Browser page is unavailable before ${label}`); + } + + try { + return await operation(); + } catch (error) { + if (!this.isFatalBrowserError(error)) throw error; + + tag('warning').log(`${label}: browser page is unavailable, recovering...`); + let recovered = await this.recoverFromBrowserError(); + if (!recovered) recovered = await this.restartBrowser(); + if (!recovered) throw new BrowserRecoveryError(label, error, false); + if (!(await this.waitForUsablePageDom())) throw new BrowserRecoveryError(label, error, true); + + try { + return await operation(); + } catch (retryError) { + if (this.isFatalBrowserError(retryError)) { + throw new BrowserRecoveryError(label, retryError, true); + } + throw retryError; + } + } + } + + async capturePageState(opts: { includeScreenshot?: boolean } = {}): Promise { + return this.runWithBrowserRecovery('capturePageState', () => this.createAction().capturePageState(opts)); + } + + async capturePageWithScreenshot(): Promise { + return this.capturePageState({ includeScreenshot: true }); + } + + async executeAction(code: string): Promise { + return this.runWithBrowserRecovery('executeAction', () => this.createAction().execute(code)); + } + + async attemptAction(code: string, originalMessage?: string, experience = true): Promise { + return this.runWithBrowserRecovery('attemptAction', () => this.createAction().attempt(code, originalMessage, experience)); + } + getPlaywrightRecorder(): PlaywrightRecorder { return this.playwrightRecorder; } async visit(url: string) { + return this.runWithBrowserRecovery('visit', () => this.visitOnce(url)); + } + + private async visitOnce(url: string) { await this.closeOtherTabs(); const serializedUrl = JSON.stringify(url); @@ -338,12 +400,14 @@ class Explorer { } async annotateElements(): Promise { - const { elements } = await annotatePageElements(this.playwrightHelper.page); - return elements; + return this.runWithBrowserRecovery('annotateElements', async () => { + const { elements } = await annotatePageElements(this.playwrightHelper.page); + return elements; + }); } async visuallyAnnotateElements(opts?: { containers?: Array<{ css: string; label: string }> }): Promise { - return visuallyAnnotateContainers(this.playwrightHelper.page, opts?.containers || []); + return this.runWithBrowserRecovery('visuallyAnnotateElements', () => visuallyAnnotateContainers(this.playwrightHelper.page, opts?.containers || [])); } async getEidxInContainer(containerCss: string | null): Promise { @@ -386,28 +450,99 @@ class Explorer { await this.playwrightHelper.page.reload(); } + private resolveBrowserUrl(url?: string): string | null { + if (!url) return null; + try { + return new URL(url).toString(); + } catch {} + + const baseUrl = this.config.playwright?.url || this.config.web?.url; + if (!baseUrl) return null; + + try { + return new URL(url, baseUrl).toString(); + } catch { + return null; + } + } + isFatalBrowserError(error: unknown): boolean { - const msg = error instanceof Error ? error.message : String(error); - return FATAL_BROWSER_ERRORS.test(msg); + return isFatalBrowserError(error); } async recoverFromBrowserError(): Promise { try { - const url = this.stateManager.getCurrentState()?.url; + if (!this.playwrightHelper?.page || this.playwrightHelper.page.isClosed?.()) { + const context = this.playwrightHelper?.browserContext; + if (!context) return await this.restartBrowser(); + const page = await context.newPage(); + await page.bringToFront(); + await this.playwrightHelper._setPage(page); + this.bindFrameNavigated(page); + if (this.xhrCapture) { + this.xhrCapture.attach(this.playwrightHelper.page); + } + } + + const url = this.resolveBrowserUrl(this.stateManager.getCurrentState()?.url); if (url) { tag('warning').log(`Browser error detected, recovering by navigating to ${url}`); await this.playwrightHelper.page.goto(url, { waitUntil: 'domcontentloaded', timeout: 10000 }); - return true; + return this.waitForUsablePageDom(); } tag('warning').log('Browser error detected, reloading page'); await this.playwrightHelper.page.reload({ waitUntil: 'domcontentloaded', timeout: 10000 }); - return true; + return this.waitForUsablePageDom(); } catch (err) { tag('error').log(`Browser recovery failed: ${err instanceof Error ? err.message : err}`); return false; } } + async restartBrowser(): Promise { + if (!this.playwrightHelper) return false; + + const url = this.resolveBrowserUrl(this.stateManager.getCurrentState()?.url); + + try { + if (this.xhrCapture && this.playwrightHelper.page) { + this.xhrCapture.detach(this.playwrightHelper.page); + } + + await this.playwrightRecorder.stop(); + + if (this.playwrightHelper.browserContext) { + await this.playwrightHelper.browserContext.close().catch((err: unknown) => { + debugLog('Failed to close browser context before restart:', err); + }); + this.playwrightHelper.browserContext = null; + } + + if (!this.isSharedBrowser) { + await this.playwrightHelper._stopBrowser().catch((err: unknown) => { + debugLog('Failed to stop browser before restart:', err); + }); + } + + await this.connectOrLaunchBrowser(); + await this.playwrightHelper._createContextPage(this.createBrowserContextOptions()); + await this.playwrightRecorder.start(this.playwrightHelper.browserContext); + this.setupXhrCapture(true); + this.listenToStateChanged(); + + if (url) { + await this.playwrightHelper.page.goto(url, { waitUntil: 'domcontentloaded', timeout: 10000 }); + if (!(await this.waitForUsablePageDom())) return false; + } + + tag('success').log('Browser restarted'); + return true; + } catch (err) { + tag('error').log(`Browser restart failed: ${err instanceof Error ? err.message : err}`); + return false; + } + } + async switchToMainFrame() { if (this.playwrightHelper.frame) { debugLog('Switching to main frame'); @@ -415,6 +550,30 @@ class Explorer { } } + private async waitForUsablePageDom(): Promise { + const page = this.playwrightHelper?.page; + if (!page) return false; + + await page.waitForLoadState?.('domcontentloaded', { timeout: 5000 }).catch(() => {}); + if (page.waitForFunction) { + const hasUsableDom = await page + .waitForFunction( + () => { + const body = document.body; + if (!body) return false; + return body.children.length > 0 || body.textContent?.trim().length > 0; + }, + undefined, + { timeout: 5000 } + ) + .then(() => true) + .catch(() => false); + if (!hasUsableDom) return false; + } + await page.waitForLoadState?.('networkidle', { timeout: 3000 }).catch(() => {}); + return true; + } + async isInsideIframe(): Promise { if (this.playwrightHelper.frame) return true; @@ -542,11 +701,13 @@ class Explorer { return this._activeTest; } - async startTest(test: Test) { + async startTest(test: Test): Promise { this._activeTest = test; + test.start(); await this.reporter.reportTestStart(test); await this.closeOtherTabs(); this.otherTabs = []; + if (!(await this.ensurePageAvailable())) return false; const codeceptjsTest = toCodeceptjsTest(test); @@ -563,13 +724,7 @@ class Explorer { test.setActiveNoteScreenshot(lastScreenshot); }; - const dialogHandler = (dialog: any) => { - const dialogType = dialog.type(); - const dialogMessage = dialog.message(); - test.addNote(`Native dialog ${dialogType} appeared: ${dialogMessage}. Accepted automatically`); - }; - - this.playwrightHelper?.page?.on('dialog', dialogHandler); + this.watchActiveTestPage(); codeceptjs.event.dispatcher.emit('test.before', codeceptjsTest); codeceptjs.event.dispatcher.emit('test.start', codeceptjsTest); @@ -580,11 +735,105 @@ class Explorer { codeceptjs.event.dispatcher.on('test.after', () => { codeceptjs.event.dispatcher.off('step.passed', stepHandler); codeceptjs.event.dispatcher.off('step.failed', stepHandler); - this.playwrightHelper?.page?.off('dialog', dialogHandler); + this.unwatchActiveTestPages(); }); + + return true; + } + + async ensurePageAvailable(): Promise { + const page = this.playwrightHelper?.page; + if (page && !page.isClosed?.()) { + this.watchActiveTestPage(page); + return true; + } + + const recovered = await this.recoverFromBrowserError(); + if (!recovered) return false; + this.watchActiveTestPage(); + return true; + } + + async ensureActiveTestPageAvailable(): Promise { + return this.ensurePageAvailable(); + } + + async handleExecutionError(error: unknown): Promise { + const message = error instanceof Error ? error.message : String(error); + tag('error').log(`Browser execution error: ${message}`); + + if (error instanceof Error && error.name === 'AbortError') { + return { + action: 'stop', + message, + }; + } + + if (error instanceof BrowserRecoveryError) { + return { + action: 'stop', + recovered: error.recovered, + message: error.message, + }; + } + + if (!this.isFatalBrowserError(error)) { + return { + action: 'continue', + message: `Previous execution error: ${message}. Investigate the current state and choose a different approach.`, + }; + } + + let recovered = await this.recoverFromBrowserError(); + if (!recovered) recovered = await this.restartBrowser(); + + if (!recovered) { + return { + action: 'stop', + recovered: false, + message: `Browser could not be recovered after fatal error: ${message}`, + }; + } + + this.watchActiveTestPage(); + return { + action: 'continue', + recovered: true, + message: dedent` + Browser was recovered after a fatal page error. + Continue from the restored page. + The interrupted browser action is not product evidence. + Inspect the restored page and retry the current step when it is still required. + `, + }; + } + + watchActiveTestPage(page = this.playwrightHelper?.page): void { + if (!this._activeTest) return; + if (!page) return; + if (this.observedTestPages.has(page)) return; + + this.testPageErrorHandler ||= (err: Error) => { + this._activeTest?.addNote(`Console error: ${err.message}`, TestResult.FAILED); + }; + this.testConsoleHandler ||= (msg: any) => { + if (msg.type() !== 'error') return; + this._activeTest?.addNote(`Console error: ${msg.text()}`, TestResult.FAILED); + }; + this.testDialogHandler ||= (dialog: any) => { + const dialogType = dialog.type(); + const dialogMessage = dialog.message(); + this._activeTest?.addNote(`Native dialog ${dialogType} appeared: ${dialogMessage}. Accepted automatically`); + }; + + page.on('pageerror', this.testPageErrorHandler); + page.on('console', this.testConsoleHandler); + page.on('dialog', this.testDialogHandler); + this.observedTestPages.add(page); } async stopTest(test: Test, meta?: Record) { + this.unwatchActiveTestPages(); this._activeTest = null; const lastScreenshot = this.stateManager.getCurrentState()?.screenshotFile; if (lastScreenshot) { @@ -609,6 +858,15 @@ class Explorer { codeceptjs.event.dispatcher.emit('test.after', codeceptjsTest); } + private unwatchActiveTestPages(): void { + for (const page of this.observedTestPages) { + if (this.testPageErrorHandler) page.off('pageerror', this.testPageErrorHandler); + if (this.testConsoleHandler) page.off('console', this.testConsoleHandler); + if (this.testDialogHandler) page.off('dialog', this.testDialogHandler); + } + this.observedTestPages.clear(); + } + async hasPlaywrightLocator(locatorFn: (page: any) => any, opts: { multiple?: boolean; contents?: boolean; success?: (locator: any) => Promise | void } = {}): Promise { try { const pwLocator = locatorFn(this.playwrightHelper.page); @@ -671,7 +929,7 @@ class Explorer { await oldPage.close(); await newPage.bringToFront(); - this.playwrightHelper.page = newPage; + await this.playwrightHelper._setPage(newPage); this.otherTabs = []; this.bindFrameNavigated(newPage); @@ -705,8 +963,7 @@ class Explorer { } await firstPage.bringToFront(); - - this.playwrightHelper.page = firstPage; + await this.playwrightHelper._setPage(firstPage); debugLog(`Cleaned up tabs, now focused on: ${await firstPage.url()}`); } diff --git a/src/utils/browser-errors.ts b/src/utils/browser-errors.ts new file mode 100644 index 0000000..1966722 --- /dev/null +++ b/src/utils/browser-errors.ts @@ -0,0 +1,25 @@ +// Playwright and CodeceptJS surface browser/page disposal as plain Error objects, +// not typed exceptions. Keep those external message markers in one adapter so +// recovery decisions are not duplicated across agents/actions. +const FATAL_BROWSER_ERROR_MARKERS = ['Frame was detached', 'Target closed', 'Target page, context or browser has been closed', 'Execution context was destroyed', 'Protocol error', 'Session closed']; + +export class BrowserRecoveryError extends Error { + constructor( + label: string, + public originalError: unknown, + public recovered: boolean + ) { + super(`${label} failed ${recovered ? 'after browser recovery' : 'because browser could not be recovered'}: ${browserErrorMessage(originalError)}`); + this.name = 'BrowserRecoveryError'; + } +} + +export function isFatalBrowserError(error: unknown): boolean { + if (error instanceof BrowserRecoveryError) return true; + const message = (error instanceof Error ? error.message : String(error)).toLowerCase(); + return FATAL_BROWSER_ERROR_MARKERS.some((marker) => message.includes(marker.toLowerCase())); +} + +export function browserErrorMessage(error: unknown): string { + return error instanceof Error ? error.message : String(error); +} diff --git a/src/utils/logger.ts b/src/utils/logger.ts index 503bee7..e7e4173 100644 --- a/src/utils/logger.ts +++ b/src/utils/logger.ts @@ -11,7 +11,7 @@ import { Observability } from '../observability.ts'; import { RecentStepFilter } from './log-filters.ts'; import { parseMarkdownToTerminal } from './markdown-terminal.ts'; -export type LogType = 'info' | 'success' | 'error' | 'warning' | 'debug' | 'substep' | 'operation' | 'step' | 'multiline' | 'html' | 'input'; +export type LogType = 'info' | 'success' | 'error' | 'warning' | 'debug' | 'substep' | 'operation' | 'step' | 'multiline' | 'details' | 'html' | 'input'; export interface TaggedLogEntry { type: LogType; @@ -99,7 +99,7 @@ class ConsoleDestination implements LogDestination { if (entry.type === 'operation' && !this.verboseMode) return; if (entry.type === 'step' && !this.verboseMode && this.recentSteps.shouldSuppress(entry.content)) return; let content = entry.content; - if (entry.type === 'multiline') { + if (entry.type === 'multiline' || entry.type === 'details') { const cleaned = stripAnsi(dedent(entry.content)); const parsed = parseMarkdownToTerminal(cleaned); content = parsed; @@ -314,7 +314,7 @@ class CaptainDestination implements LogDestination { stopCapture(): string[] { this.capturing = false; - const logs = this.entries.filter((e) => e.type !== 'debug' && e.type !== 'html' && e.type !== 'multiline' && e.type !== 'operation').map((e) => `[${e.type}] ${e.content}`); + const logs = this.entries.filter((e) => e.type !== 'debug' && e.type !== 'html' && e.type !== 'multiline' && e.type !== 'details' && e.type !== 'operation').map((e) => `[${e.type}] ${e.content}`); this.entries = []; return logs; } diff --git a/tests/integration/researcher-browser.test.ts b/tests/integration/researcher-browser.test.ts index 3af2d9e..b4d9a8c 100644 --- a/tests/integration/researcher-browser.test.ts +++ b/tests/integration/researcher-browser.test.ts @@ -98,6 +98,9 @@ describe('Researcher with real browser + aimock', () => { getConfig: () => ConfigParser.getInstance().getConfig(), visit: async () => {}, annotateElements: async () => (await annotatePageElements(page)).elements, + capturePageState: async () => ActionResult.fromState(state), + capturePageWithScreenshot: async () => ActionResult.fromState(state), + runWithBrowserRecovery: async (_label: string, operation: () => Promise) => operation(), createAction: () => ({ capturePageState: async () => ActionResult.fromState(state), caputrePageWithScreenshot: async () => ActionResult.fromState(state), diff --git a/tests/integration/researcher-sections.test.ts b/tests/integration/researcher-sections.test.ts index c1ac258..bf0eece 100644 --- a/tests/integration/researcher-sections.test.ts +++ b/tests/integration/researcher-sections.test.ts @@ -42,6 +42,9 @@ function createMockExplorer(configOverrides: Record = {}, playw getConfig: () => config, visit: async () => {}, annotateElements: async () => [], + capturePageState: async () => ActionResult.fromState(fakeState), + capturePageWithScreenshot: async () => ActionResult.fromState(fakeState), + runWithBrowserRecovery: async (_label: string, operation: () => Promise) => operation(), createAction: () => ({ capturePageState: async () => ActionResult.fromState(fakeState), caputrePageWithScreenshot: async () => ActionResult.fromState(fakeState), diff --git a/tests/integration/researcher.test.ts b/tests/integration/researcher.test.ts index fc178ad..79da34b 100644 --- a/tests/integration/researcher.test.ts +++ b/tests/integration/researcher.test.ts @@ -68,6 +68,9 @@ function createMockExplorer(state = fakeState) { getConfig: () => ConfigParser.getInstance().getConfig(), visit: async () => {}, annotateElements: async () => [], + capturePageState: async () => ActionResult.fromState(state), + capturePageWithScreenshot: async () => ActionResult.fromState(state), + runWithBrowserRecovery: async (_label: string, operation: () => Promise) => operation(), createAction: () => ({ capturePageState: async () => ActionResult.fromState(state), caputrePageWithScreenshot: async () => ActionResult.fromState(state), diff --git a/tests/unit/captain-artifacts.test.ts b/tests/unit/captain-artifacts.test.ts new file mode 100644 index 0000000..fa7f068 --- /dev/null +++ b/tests/unit/captain-artifacts.test.ts @@ -0,0 +1,187 @@ +import { describe, expect, it } from 'bun:test'; +import { mkdirSync, rmSync, writeFileSync } from 'node:fs'; +import { basename, dirname, join } from 'node:path'; +import { Captain } from '../../src/ai/captain.ts'; +import { readCaptainFile } from '../../src/ai/captain/file-tools.ts'; +import { ConfigParser } from '../../src/config.ts'; +import type { Task } from '../../src/test-plan.ts'; + +function buildCaptain(commandExecutor?: (cmd: string) => Promise) { + return Object.assign(Object.create(Captain.prototype), { + commandExecutor, + commandDescriptions: [], + }) as Captain; +} + +function task(description: string, notes: string[] = []) { + return { + description, + addNote: (note: string) => notes.push(note), + } as unknown as Task; +} + +describe('Captain artifact analysis tools', () => { + it('keeps done details as the user-facing answer', async () => { + const notes: string[] = []; + const captain = buildCaptain(); + const tools = (captain as any).coreTools(task('show config', notes), () => {}); + + const result = await tools.done.execute({ summary: 'Displayed config details', details: 'baseUrl: https://example.test\nbrowser: chromium' }); + + expect(result.success).toBe(true); + expect(notes).toEqual(['baseUrl: https://example.test\nbrowser: chromium', 'Displayed config details']); + }); + + it('rejects completion without details before any successful action', async () => { + const captain = buildCaptain(); + const tools = (captain as any).coreTools(task('inspect the current state'), () => {}); + + const result = await tools.done.execute({ summary: 'Inspected current state' }); + + expect(result.success).toBe(false); + expect(result.message).toContain('actual answer in details'); + }); + + it('allows completion without details after successful browser evidence', async () => { + const captain = buildCaptain(); + (captain as any).recentToolCalls = [{ wasSuccessful: true, output: { code: 'I.click("Submit")' } }]; + const tools = (captain as any).coreTools(task('click the submit button'), () => {}); + + const result = await tools.done.execute({ summary: 'Clicked submit' }); + + expect(result.success).toBe(true); + }); + + it('rejects completion without details after read-only tool output', async () => { + const captain = buildCaptain(); + (captain as any).recentToolCalls = [{ wasSuccessful: true, output: { content: 'report details' } }]; + const tools = (captain as any).coreTools(task('show report'), () => {}); + + const result = await tools.done.execute({ summary: 'Read report' }); + + expect(result.success).toBe(false); + }); + + it('reads explicit report artifact paths without shell commands', async () => { + ConfigParser.resetForTesting(); + ConfigParser.setupTestConfig(); + const parser = ConfigParser.getInstance(); + const outputDir = join(dirname(parser.getConfigPath()!), 'output'); + const reportDir = join(outputDir, 'reports'); + mkdirSync(reportDir, { recursive: true }); + writeFileSync(join(reportDir, 'session-demo-tests.md'), '# Failed run\n\nExpected button was missing.'); + + const captain = buildCaptain(); + const tools = await (captain as any).idleModeTools({ explorBot: {}, task: task('analyze report') }); + const result = await tools.readFile.execute({ path: 'output/reports/session-demo-tests.md' }); + + expect(result.success).toBe(true); + expect(result.content).toContain('Expected button was missing'); + + rmSync(join(outputDir, '..'), { recursive: true, force: true }); + }); + + it('accepts paths prefixed with the project directory name', async () => { + ConfigParser.resetForTesting(); + ConfigParser.setupTestConfig(); + const parser = ConfigParser.getInstance(); + const outputDir = join(dirname(parser.getConfigPath()!), 'output'); + const reportDir = join(outputDir, 'reports'); + mkdirSync(reportDir, { recursive: true }); + writeFileSync(join(reportDir, 'session-demo-tests.md'), '# Failed run\n\nWrong expectation.'); + + const captain = buildCaptain(); + const tools = await (captain as any).idleModeTools({ explorBot: {}, task: task('analyze report') }); + const projectName = basename(dirname(parser.getConfigPath()!)); + const result = await tools.readFile.execute({ path: `${projectName}/output/reports/session-demo-tests.md` }); + + expect(result.success).toBe(true); + expect(result.content).toContain('Wrong expectation'); + + rmSync(join(outputDir, '..'), { recursive: true, force: true }); + }); + + it('reads a requested line range from file contents', async () => { + ConfigParser.resetForTesting(); + ConfigParser.setupTestConfig(); + const parser = ConfigParser.getInstance(); + const outputDir = join(dirname(parser.getConfigPath()!), 'output'); + const reportDir = join(outputDir, 'reports'); + mkdirSync(reportDir, { recursive: true }); + writeFileSync(join(reportDir, 'session-demo-tests.md'), ['line 1', 'line 2', 'line 3', 'line 4'].join('\n')); + + const captain = buildCaptain(); + const tools = await (captain as any).idleModeTools({ explorBot: {}, task: task('analyze report') }); + const result = await tools.readFile.execute({ path: 'output/reports/session-demo-tests.md', startLine: 2, endLine: 3 }); + + expect(result.success).toBe(true); + expect(result.content).toBe('line 2\nline 3'); + + rmSync(join(outputDir, '..'), { recursive: true, force: true }); + }); + + it('reads line ranges from the end of file', async () => { + ConfigParser.resetForTesting(); + ConfigParser.setupTestConfig(); + const parser = ConfigParser.getInstance(); + const outputDir = join(dirname(parser.getConfigPath()!), 'output'); + const reportDir = join(outputDir, 'reports'); + mkdirSync(reportDir, { recursive: true }); + writeFileSync(join(reportDir, 'session-demo-tests.md'), ['line 1', 'line 2', 'line 3', 'line 4'].join('\n')); + + const captain = buildCaptain(); + const tools = await (captain as any).idleModeTools({ explorBot: {}, task: task('analyze report') }); + const result = await tools.readFile.execute({ path: 'output/reports/session-demo-tests.md', startLine: -2 }); + + expect(result.success).toBe(true); + expect(result.content).toBe('line 3\nline 4'); + + rmSync(join(outputDir, '..'), { recursive: true, force: true }); + }); + + it('uses caller-provided readable directories', () => { + ConfigParser.resetForTesting(); + ConfigParser.setupTestConfig(); + const parser = ConfigParser.getInstance(); + const projectRoot = dirname(parser.getConfigPath()!); + const customDir = join(projectRoot, 'custom-knowledge'); + mkdirSync(customDir, { recursive: true }); + writeFileSync(join(customDir, 'hint.md'), 'custom directory content'); + + const result = readCaptainFile(projectRoot, { path: 'custom-knowledge/hint.md' }, ['custom-knowledge']); + + expect(result.success).toBe(true); + if (result.success) { + expect(result.content).toContain('custom directory content'); + } + + rmSync(projectRoot, { recursive: true, force: true }); + }); +}); + +describe('Captain command guard', () => { + it('blocks slash commands for natural-language analysis requests', async () => { + let called = false; + const captain = buildCaptain(async () => { + called = true; + }); + const tools = (captain as any).coreTools(task('analyze the latest report'), () => {}); + const result = await tools.runCommand.execute({ command: '/anything' }); + + expect(result.success).toBe(false); + expect(called).toBe(false); + expect(result.message).toContain('Command blocked'); + }); + + it('allows slash commands when the user explicitly typed that slash command', async () => { + let called = false; + const captain = buildCaptain(async () => { + called = true; + }); + const tools = (captain as any).coreTools(task('/anything value'), () => {}); + const result = await tools.runCommand.execute({ command: '/anything value' }); + + expect(result.success).toBe(true); + expect(called).toBe(true); + }); +}); diff --git a/tests/unit/captain-mode.test.ts b/tests/unit/captain-mode.test.ts new file mode 100644 index 0000000..d5e37cf --- /dev/null +++ b/tests/unit/captain-mode.test.ts @@ -0,0 +1,125 @@ +import { describe, expect, it } from 'bun:test'; +import { Captain } from '../../src/ai/captain.ts'; + +function buildCaptain(opts: { state?: any; activeTest?: any; page?: any }) { + const explorer = { + activeTest: opts.activeTest || null, + playwrightHelper: { + page: opts.page, + }, + getStateManager: () => ({ + getCurrentState: () => opts.state || null, + }), + }; + + const explorBot = { + getExplorer: () => explorer, + }; + + return Object.assign(Object.create(Captain.prototype), { explorBot }) as Captain; +} + +function buildCaptainWithExplorer(explorer: any) { + return Object.assign(Object.create(Captain.prototype), { + explorBot: { + getExplorer: () => explorer, + }, + }) as Captain; +} + +describe('Captain modes', () => { + it('uses idle mode without a loaded page', () => { + const captain = buildCaptain({}); + + expect(captain.getMode()).toBe('idle'); + }); + + it('uses web mode when a page state exists', () => { + const captain = buildCaptain({ state: { url: '/dashboard' } }); + + expect(captain.getMode()).toBe('web'); + }); + + it('uses test mode while a test is active', () => { + const captain = buildCaptain({ + activeTest: { sessionName: 'test-session' }, + page: { isClosed: () => false }, + state: { url: '/dashboard' }, + }); + + expect(captain.getMode()).toBe('test'); + }); + + it('uses heal mode when active test has no usable browser page', () => { + const captain = buildCaptain({ + activeTest: { sessionName: 'test-session' }, + page: { isClosed: () => true }, + state: { url: '/dashboard' }, + }); + + expect(captain.getMode()).toBe('heal'); + }); +}); + +describe('Captain execution recovery', () => { + it('continues after a fatal browser error is recovered', async () => { + const captain = buildCaptainWithExplorer({ + handleExecutionError: async () => ({ + action: 'continue', + recovered: true, + message: 'Browser was recovered after a fatal page error.', + }), + }); + + const recovery = await captain.processExecutionError(new Error('Target closed'), { scenario: 'create project' } as any); + + expect(recovery.action).toBe('continue'); + expect(recovery.recovered).toBe(true); + expect(recovery.message).toContain('Browser was recovered'); + }); + + it('stops when a fatal browser error cannot be recovered', async () => { + const captain = buildCaptainWithExplorer({ + handleExecutionError: async () => ({ + action: 'stop', + recovered: false, + message: 'Browser could not be recovered', + }), + }); + + const recovery = await captain.processExecutionError(new Error('Target closed'), { scenario: 'create project' } as any); + + expect(recovery.action).toBe('stop'); + expect(recovery.recovered).toBe(false); + }); + + it('continues when browser restart recovers after page recovery fails', async () => { + const captain = buildCaptainWithExplorer({ + handleExecutionError: async () => ({ + action: 'continue', + recovered: true, + message: 'Browser was recovered after a fatal page error.', + }), + }); + + const recovery = await captain.processExecutionError(new Error('Target closed'), { scenario: 'create project' } as any); + + expect(recovery.action).toBe('continue'); + expect(recovery.recovered).toBe(true); + }); + + it('continues with guidance for non-fatal execution errors', async () => { + const captain = buildCaptainWithExplorer({ + handleExecutionError: async () => ({ + action: 'continue', + message: 'Previous execution error: Locator not found. Investigate the current state and choose a different approach.', + }), + }); + + const recovery = await captain.processExecutionError(new Error('Locator not found'), { scenario: 'create project' } as any); + + expect(recovery.action).toBe('continue'); + expect(recovery.recovered).toBeUndefined(); + expect(recovery.message).toContain('Previous execution error'); + }); +}); diff --git a/tests/unit/explorer-recovery-url.test.ts b/tests/unit/explorer-recovery-url.test.ts new file mode 100644 index 0000000..d91c0be --- /dev/null +++ b/tests/unit/explorer-recovery-url.test.ts @@ -0,0 +1,154 @@ +import { describe, expect, it } from 'bun:test'; +import Explorer from '../../src/explorer.ts'; + +function buildExplorer(baseUrl: string) { + return Object.assign(Object.create(Explorer.prototype), { + config: { + playwright: { url: baseUrl }, + web: { url: baseUrl }, + }, + }) as Explorer; +} + +describe('Explorer recovery URL resolution', () => { + it('resolves path-only state URLs against the configured base URL', () => { + const explorer = buildExplorer('https://the-internet.herokuapp.com'); + + expect((explorer as any).resolveBrowserUrl('/')).toBe('https://the-internet.herokuapp.com/'); + expect((explorer as any).resolveBrowserUrl('/add_remove_elements/')).toBe('https://the-internet.herokuapp.com/add_remove_elements/'); + }); + + it('keeps absolute state URLs unchanged', () => { + const explorer = buildExplorer('https://the-internet.herokuapp.com'); + + expect((explorer as any).resolveBrowserUrl('https://example.test/page')).toBe('https://example.test/page'); + }); + + it('creates a fresh active page when recovering a closed page', async () => { + const explorer = buildExplorer('https://the-internet.herokuapp.com'); + const navigated: string[] = []; + const boundEvents: string[] = []; + const newPage = { + goto: async (url: string) => { + navigated.push(url); + }, + bringToFront: async () => {}, + on: (event: string) => { + boundEvents.push(event); + }, + mainFrame: () => ({}), + }; + (explorer as any).playwrightHelper = { + page: { isClosed: () => true }, + _setPage: async (page: any) => { + (explorer as any).playwrightHelper.page = page; + }, + browserContext: { + newPage: async () => newPage, + }, + }; + (explorer as any).stateManager = { + getCurrentState: () => ({ url: '/' }), + updateStateFromBasic: () => {}, + }; + + const recovered = await explorer.recoverFromBrowserError(); + + expect(recovered).toBe(true); + expect((explorer as any).playwrightHelper.page).toBe(newPage); + expect(navigated).toEqual(['https://the-internet.herokuapp.com/']); + expect(boundEvents).toContain('framenavigated'); + }); + + it('recovers and retries browser operations in Explorer', async () => { + const explorer = buildExplorer('https://the-internet.herokuapp.com'); + let attempts = 0; + let recoveries = 0; + (explorer as any).playwrightHelper = { + page: { isClosed: () => false }, + }; + (explorer as any).recoverFromBrowserError = async () => { + recoveries++; + return true; + }; + + const result = await explorer.runWithBrowserRecovery('test operation', async () => { + attempts++; + if (attempts === 1) throw new Error('Target closed'); + return 'recovered'; + }); + + expect(result).toBe('recovered'); + expect(attempts).toBe(2); + expect(recoveries).toBe(1); + }); + + it('recovers and retries action attempts through Explorer', async () => { + const explorer = buildExplorer('https://the-internet.herokuapp.com'); + let attempts = 0; + let recoveries = 0; + (explorer as any).playwrightHelper = { + page: { isClosed: () => false }, + }; + (explorer as any).recoverFromBrowserError = async () => { + recoveries++; + return true; + }; + (explorer as any).createAction = () => ({ + attempt: async () => { + attempts++; + if (attempts === 1) throw new Error('Target page, context or browser has been closed'); + return true; + }, + }); + + const result = await explorer.attemptAction('I.click("Menu")', undefined, false); + + expect(result).toBe(true); + expect(attempts).toBe(2); + expect(recoveries).toBe(1); + }); + + it('stops when an operation fails again after browser recovery', async () => { + const explorer = buildExplorer('https://the-internet.herokuapp.com'); + (explorer as any).playwrightHelper = { + page: { isClosed: () => false }, + }; + (explorer as any).recoverFromBrowserError = async () => true; + + let error: unknown; + try { + await explorer.runWithBrowserRecovery('capturePageState', async () => { + throw new Error('Target page, context or browser has been closed'); + }); + } catch (err) { + error = err; + } + + const result = await explorer.handleExecutionError(error); + + expect(result.action).toBe('stop'); + expect(result.message).toContain('failed after browser recovery'); + }); + + it('returns a stop decision when browser recovery fails', async () => { + const explorer = buildExplorer('https://the-internet.herokuapp.com'); + (explorer as any).recoverFromBrowserError = async () => false; + (explorer as any).restartBrowser = async () => false; + + const result = await explorer.handleExecutionError(new Error('Target closed')); + + expect(result.action).toBe('stop'); + expect(result.recovered).toBe(false); + }); + + it('returns guidance for non-browser execution errors', async () => { + const explorer = buildExplorer('https://the-internet.herokuapp.com'); + + const result = await explorer.handleExecutionError(new Error('Locator not found')); + + expect(result.action).toBe('continue'); + expect(result.recovered).toBeUndefined(); + expect(result.message).toContain('Previous execution error'); + }); +}); diff --git a/tests/unit/historian-screencast.test.ts b/tests/unit/historian-screencast.test.ts new file mode 100644 index 0000000..3c0d5ce --- /dev/null +++ b/tests/unit/historian-screencast.test.ts @@ -0,0 +1,41 @@ +import { describe, expect, it } from 'bun:test'; +import { WithScreencast } from '../../src/ai/historian/screencast.ts'; + +function buildScreencastHost(stop: () => Promise) { + const Host = WithScreencast(Object as unknown as new () => object); + const host: any = new Host(); + host.savedFiles = new Set(); + host.screencastActive = true; + host.screencastPath = 'output/screencasts/test.webm'; + host.screencastPage = { + screencast: { stop }, + }; + const artifacts: string[] = []; + host.screencastTask = { + addArtifact: (path: string) => artifacts.push(path), + }; + return { host, artifacts }; +} + +describe('Historian screencast cleanup', () => { + it('does not save screencast artifact when browser was closed before stop', async () => { + const { host, artifacts } = buildScreencastHost(async () => { + throw new Error('stop: Target page, context or browser has been closed'); + }); + + await host.stopScreencast(); + + expect(host.savedFiles.size).toBe(0); + expect(artifacts).toHaveLength(0); + expect(host.isScreencastActive()).toBe(false); + }); + + it('saves screencast artifact after a clean stop', async () => { + const { host, artifacts } = buildScreencastHost(async () => {}); + + await host.stopScreencast(); + + expect(host.savedFiles.has('output/screencasts/test.webm')).toBe(true); + expect(artifacts).toEqual(['output/screencasts/test.webm']); + }); +}); diff --git a/tests/unit/pilot-evidence.test.ts b/tests/unit/pilot-evidence.test.ts new file mode 100644 index 0000000..82aa10a --- /dev/null +++ b/tests/unit/pilot-evidence.test.ts @@ -0,0 +1,44 @@ +import { describe, expect, it } from 'bun:test'; +import { Pilot } from '../../src/ai/pilot.ts'; + +function buildPilot(): Pilot { + return Object.assign(Object.create(Pilot.prototype), {}) as Pilot; +} + +describe('Pilot evidence', () => { + it('treats passed state verifications as successful assertion evidence', () => { + const pilot = buildPilot(); + const state = { verifications: { 'Heading is visible': true } }; + const conversation = { getToolExecutions: () => [] }; + + expect((pilot as any).hasSuccessfulCheckEvidence(state, conversation)).toBe(true); + expect((pilot as any).formatSuccessfulAssertions(state, conversation)).toContain('PASS state verification'); + }); + + it('treats successful check tools as assertion evidence', () => { + const pilot = buildPilot(); + const state = {}; + const conversation = { + getToolExecutions: () => [ + { + toolName: 'verify', + wasSuccessful: true, + input: { assertion: 'Heading is visible' }, + output: { message: 'Verification passed: Heading is visible' }, + }, + ], + }; + + expect((pilot as any).hasSuccessfulCheckEvidence(state, conversation)).toBe(true); + expect((pilot as any).formatSuccessfulAssertions(state, conversation)).toContain('PASS verify'); + }); + + it('treats achieved task notes as completion evidence', () => { + const pilot = buildPilot(); + const task = { hasAchievedAny: () => true }; + const state = {}; + const conversation = { getToolExecutions: () => [] }; + + expect((pilot as any).hasCompletionEvidence(task, state, conversation)).toBe(true); + }); +});