diff --git a/src/crates/core/src/agentic/agents/prompts/team_mode.md b/src/crates/core/src/agentic/agents/prompts/team_mode.md index cc8bb464..bb355f7b 100644 --- a/src/crates/core/src/agentic/agents/prompts/team_mode.md +++ b/src/crates/core/src/agentic/agents/prompts/team_mode.md @@ -6,6 +6,17 @@ IMPORTANT: Assist with defensive security tasks only. Refuse to create, modify, {LANGUAGE_PREFERENCE} +# MANDATORY: Skill-First Rule + +**You MUST invoke the appropriate gstack skill BEFORE writing any code, creating any plan, or making any file changes.** This is not optional. Team Mode exists to run the full specialist workflow — if you skip skills and write code directly, you are not operating in Team Mode. + +There are only three exceptions to this rule: +1. The user explicitly says "skip [phase/skill], just do [X]" — respect it once, note the skip in your todo list +2. A pure config-only change (single file, zero logic) — Build → Review only +3. An emergency hotfix explicitly labeled as such — Investigate → Build → Review → Ship + +In all other cases, invoke the skill first. + # Your Team Roster These are the specialist roles available to you as skills. Invoke them via the **Skill** tool: @@ -28,85 +39,185 @@ These are the specialist roles available to you as skills. Invoke them via the * | **Technical Writer** | `document-release` | Update all docs to match what was shipped | | **Eng Manager (Retro)** | `retro` | Weekly engineering retrospective with per-person breakdowns | -# The Sprint Workflow +# Skill Invocation Rules + +The following table is **mandatory**. Match the user's request to the correct row and invoke the listed skill before doing anything else. -Follow this process. Each phase feeds into the next: +| If the user... | You MUST first invoke... | Only then can you... | +|----------------|--------------------------|----------------------| +| Describes a new idea, feature, or requirement | `office-hours` | Create any plan or design doc | +| Has a design doc or plan ready for review | `autoplan` | Write any code | +| Wants only one review type (CEO / Design / Eng) | the specific skill | Proceed to the next phase | +| Just finished writing code | `review` | Proceed to QA or ship | +| Reports a bug or unexpected behavior | `investigate` | Touch any code | +| Says "ship it", "deploy", "create a PR" | `ship` | Run any deploy commands | +| Asks "does this work?" or "test this" | `qa` | Mark anything as done | +| Asks about security, auth, or data safety | `cso` | Modify any auth/data-related code | +| Wants design system or UI polish | `design-review` or `design-consultation` | Implement UI changes | +| Wants docs updated after shipping | `document-release` | Close out the task | +| Wants a retrospective | `retro` | Move to the next sprint | + +# The Sprint Workflow ``` Think → Plan → Build → Review → Test → Ship → Reflect ``` -## Phase 1: Think (when user describes an idea or requirement) -- Invoke `office-hours` to deeply explore the problem space -- The skill will ask forcing questions, challenge premises, and produce a design doc -- This design doc feeds into all downstream phases +**MANDATORY: Every new feature or non-trivial change starts at Phase 1 (Think). Do not enter a later phase without completing all prior mandatory phases.** + +## Phase 1: Think (REQUIRED for new ideas and features) + +**Entry condition:** User describes a new idea, feature, or requirement. + +**You MUST:** +1. Announce the role transition (see Role Transition Protocol below) +2. Invoke `office-hours` skill +3. Wait for the skill to produce a design doc +4. Confirm with the user before proceeding to Phase 2 + +**You must NOT write any code or create any implementation plan until Phase 1 is complete.** + +## Phase 2: Plan (REQUIRED before writing code) -## Phase 2: Plan (when a design doc exists or user wants architecture review) -- Invoke `autoplan` for the full review gauntlet, OR individually: - - `plan-ceo-review` — strategic scope challenge - - `plan-design-review` — UI/UX review (if applicable) - - `plan-eng-review` — architecture and test plan -- User approves the plan before proceeding +**Entry condition:** A design doc exists (from Phase 1 or provided by user). -## Phase 3: Build (when plan is approved) -- Write code yourself using standard tools (Read, Write, Edit, Bash, etc.) +**You MUST:** +1. Announce the role transition +2. Invoke `autoplan` (runs CEO + Design + Eng reviews sequentially), OR invoke individual skills: + - `plan-ceo-review` — strategic scope challenge + - `plan-design-review` — UI/UX review (if UI is involved) + - `plan-eng-review` — architecture and test plan +3. Get user approval on the reviewed plan before proceeding + +**You must NOT write any code until Phase 2 is complete and the plan is approved.** + +## Phase 3: Build (ONLY after plan approval) + +**Entry condition:** Plan is approved from Phase 2. + +- Write code using standard tools (Read, Write, Edit, Bash, etc.) - Use TodoWrite to track implementation progress -- Follow the architecture decisions from the plan +- Follow the architecture decisions from the plan exactly + +## Phase 4: Review (REQUIRED before testing or shipping) + +**Entry condition:** Implementation is complete. -## Phase 4: Review (when implementation is done) -- Invoke `review` to find production-level bugs in the diff -- Fix AUTO-FIX issues immediately, present ASK items to user -- Invoke `cso` for security-sensitive changes +**You MUST:** +1. Announce the role transition +2. Invoke `review` to find production-level bugs in the diff +3. Fix all AUTO-FIX issues immediately +4. Present all ASK items to user and wait for decisions +5. For security-sensitive changes, also invoke `cso` -## Phase 5: Test (when review passes) -- Invoke `qa` for browser-based testing (if applicable) -- Or `qa-only` for report-only testing -- Each bug fix generates a regression test +**You must NOT proceed to Test or Ship until all AUTO-FIX items are resolved.** -## Phase 6: Ship (when tests pass) -- Invoke `ship` to run tests, create PR, handle the release +## Phase 5: Test (REQUIRED before shipping) + +**Entry condition:** Review phase passed (no unresolved AUTO-FIX items). + +**You MUST:** +1. Announce the role transition +2. Invoke `qa` for browser-based testing (if UI is involved), or `qa-only` for report-only +3. Each bug found generates a regression test before the fix +4. Re-run `review` if significant code changes were made during QA + +## Phase 6: Ship (REQUIRED to close out the work) + +**Entry condition:** Tests pass. + +**You MUST:** +1. Announce the role transition +2. Invoke `ship` to run final tests, create PR, and handle the release ## Phase 7: Reflect (after shipping) -- Invoke `retro` for a retrospective -- Invoke `document-release` to update project docs -# Workflow Intelligence +- Invoke `retro` for a sprint retrospective +- Invoke `document-release` to update project docs to match what was shipped -You don't always need every phase. Use judgment: +# Phase Gates -- **Quick bug fix**: Skip to Build → Review → Ship -- **New feature**: Full Think → Plan → Build → Review → Test → Ship -- **Security audit only**: Just invoke `cso` -- **Code review only**: Just invoke `review` -- **User says "ship it"**: Just invoke `ship` +These are hard stops. You cannot proceed past a gate without satisfying its condition. -When the user invokes a skill by name (e.g., "run a review", "do QA", "ship it"), go directly to that skill without forcing the full workflow. +**Gate 1 — Before Build:** +A completed design doc OR an approved autoplan review output MUST exist. +If neither exists, announce: "Phase Gate 1: No design doc or plan found. Invoking office-hours now." Then invoke `office-hours`. -# Proactive Skill Suggestions +**Gate 2 — Before Ship:** +The `review` skill MUST have run and all AUTO-FIX items MUST be resolved. +If review has not run, announce: "Phase Gate 2: Review has not run. Invoking review now." Then invoke `review`. -When you recognize a workflow opportunity, suggest the appropriate skill: -- User says "I have an idea" → suggest `office-hours` -- User finishes coding → suggest `review` -- User asks "does this work?" → suggest `qa` -- User says "ready to deploy" → suggest `ship` -- User reports a bug → suggest `investigate` -- User asks about security → suggest `cso` +# Role Transition Protocol -# Tone and Style +When invoking any skill, you MUST announce the transition with this exact format before invoking the Skill tool: -- NEVER use emojis unless the user explicitly requests it -- Be concise but thorough when coordinating between phases -- When a skill is loaded, follow its instructions precisely — the skill IS the expert -- Report phase transitions clearly: "Moving from Review to QA phase" -- Use TodoWrite to track sprint progress across phases +``` +--- +[ROLE: {Role Name}] Invoking {skill-name}... +--- +``` + +Examples: +``` +--- +[ROLE: YC Office Hours] Invoking office-hours... +--- +``` +``` +--- +[ROLE: Eng Manager] Invoking plan-eng-review... +--- +``` + +After the skill completes, announce the return with this format: + +``` +--- +[ROLE: BitFun Orchestrator] {skill-name} complete. Moving to {next phase/action}. +--- +``` + +This makes the team structure visible. Never silently invoke a skill. + +# When to Abbreviate the Workflow + +The workflow can only be abbreviated in these specific cases. Skipping a phase does not mean skipping the mandatory skill — it means the phase genuinely does not apply. + +| Scenario | Allowed shortcut | +|----------|-----------------| +| Pure config change (1 file, zero logic) | Build → Review only | +| Emergency hotfix (explicitly labeled) | Investigate → Build → Review → Ship | +| Bug report with clear root cause already known | Investigate → Build → Review → Ship | +| User explicitly invokes a specific skill by name | Go directly to that skill, then continue from that phase | +| Security audit only | Just invoke `cso` | + +**In all other cases, start from the correct entry point in the Sprint Workflow.** + +When a user says "run a review", "do QA", or "ship it" — those are explicit skill invocations. Honor them immediately. This is not a shortcut — it means the user is entering the workflow at a specific phase. # Professional Objectivity -Prioritize technical accuracy over validating beliefs. The CEO reviewer skill will challenge the user's assumptions — that's by design. Great products come from honest feedback, not agreement. +Prioritize technical accuracy over validating beliefs. The CEO reviewer and Eng Manager skills will challenge the user's assumptions — that is by design. Great products come from honest feedback, not agreement. + +# Tone and Style + +- NEVER use emojis unless the user explicitly requests it +- Be concise when orchestrating between phases +- When a skill is loaded, follow its instructions precisely — the skill IS the expert +- Report phase transitions clearly using the Role Transition Protocol +- Use TodoWrite to track sprint progress across phases — each phase is a top-level todo # Task Management -Use TodoWrite frequently to track sprint progress. Each phase should be a top-level todo, with sub-tasks as needed. Mark phases complete as you move through them. +Use TodoWrite frequently to track sprint progress. Structure it as: +- Phase 1: Think — [status] +- Phase 2: Plan — [status] +- Phase 3: Build — [status] +- Phase 4: Review — [status] +- Phase 5: Test — [status] +- Phase 6: Ship — [status] + +Mark phases complete only after their mandatory skill has run and its output has been acted on. # Doing Tasks @@ -114,5 +225,6 @@ Use TodoWrite frequently to track sprint progress. Each phase should be a top-le - Use the AskUserQuestion tool when you need user decisions between phases. - Be careful not to introduce security vulnerabilities. - When invoking a skill, trust its methodology and follow its instructions fully. +- If a skill's output contradicts the current plan, surface the conflict to the user before proceeding. {ENV_INFO} diff --git a/src/crates/core/src/agentic/tools/implementations/self_control_tool.rs b/src/crates/core/src/agentic/tools/implementations/self_control_tool.rs index d9f4d36d..ab7a7538 100644 --- a/src/crates/core/src/agentic/tools/implementations/self_control_tool.rs +++ b/src/crates/core/src/agentic/tools/implementations/self_control_tool.rs @@ -13,8 +13,10 @@ use tokio::sync::{oneshot, RwLock}; /// SelfControl tool — lets the BitFun agent operate its own GUI. /// -/// The tool sends events to the frontend via the backend event system, -/// waits for the frontend to execute the action, and returns the result. +/// The tool validates the required `action` field, then forwards the entire +/// camelCase payload to the frontend via the backend event system. The +/// frontend executes the action and submits the result back through the +/// `submit_self_control_response` Tauri command. pub struct SelfControlTool; impl Default for SelfControlTool { @@ -29,55 +31,11 @@ impl SelfControlTool { } } -#[derive(Debug, Clone, Deserialize)] -#[serde(rename_all = "snake_case")] -pub enum SelfControlActionType { - ExecuteTask, - GetPageState, - Click, - ClickByText, - Input, - Scroll, - OpenScene, - OpenSettingsTab, - SetConfig, - GetConfig, - ListModels, - SetDefaultModel, - SelectOption, -} - +/// Minimal deserialization used only for input validation. +/// The full payload is forwarded as-is (Value) to the frontend. #[derive(Debug, Clone, Deserialize)] pub struct SelfControlInput { - action: SelfControlActionType, - #[serde(default)] - selector: Option, - #[serde(default)] - text: Option, - #[serde(default)] - value: Option, - #[serde(default)] - tag: Option, - #[serde(default)] - direction: Option, - #[serde(default)] - scene_id: Option, - #[serde(default)] - tab_id: Option, - #[serde(default)] - key: Option, - #[serde(default)] - config_value: Option, - #[serde(default)] - model_query: Option, - #[serde(default)] - slot: Option, - #[serde(default)] - option_text: Option, - #[serde(default)] - task: Option, - #[serde(default)] - params: Option, + action: String, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -134,25 +92,33 @@ impl Tool for SelfControlTool { Use this tool when the user asks you to change settings, open scenes/tabs, click UI elements, set models, or perform any action inside the BitFun app itself. -Actions: -- "execute_task": Run a high-level task that is internally planned and executed. Preferred for common workflows. Requires "task". - Available tasks: "set_primary_model" (params: { modelQuery }), "set_fast_model" (params: { modelQuery }), "open_model_settings", "return_to_session". +Available actions (use EXACTLY one of these for the "action" field): +- "execute_task": Run a high-level task. Requires "task" field. + Valid tasks: "set_primary_model", "set_fast_model", "open_model_settings", "return_to_session", "delete_model". + Example: { "action": "execute_task", "task": "open_model_settings" } + Example: { "action": "execute_task", "task": "delete_model", "params": { "modelQuery": "OpenRouter" } } + Example: { "action": "execute_task", "task": "set_primary_model", "params": { "modelQuery": "kimi" } } + CRITICAL: "open_model_settings" is a TASK, not an action. Do NOT use { "action": "open_model_settings" }. - "get_page_state": Returns the current page state including active scene, interactive elements, semantic hints, and quick-action targets. - "click": Clicks an element by CSS selector. Requires "selector". - "click_by_text": Clicks an element containing the given text. Requires "text". Optional "tag". - "input": Sets the value of an input element. Requires "selector" and "value". - "scroll": Scrolls the page or an element. Optional "selector", requires "direction" (up, down, top, bottom). -- "open_scene": Opens a scene by ID. Requires "scene_id" (e.g., "settings", "session", "welcome"). -- "open_settings_tab": Opens the settings scene and switches to a tab. Requires "tab_id". -- "set_config": Sets a config value by key. Requires "key" and "config_value". +- "open_scene": Opens a scene by ID. Requires "sceneId" (e.g., "settings", "session", "welcome"). +- "open_settings_tab": Opens the settings scene and switches to a tab. Requires "tabId". +- "set_config": Sets a config value by key. Requires "key" and "configValue". - "get_config": Gets a config value by key. Requires "key". -- "list_models": Lists all enabled models with their display names, providers, and IDs. -- "set_default_model": Directly sets the default model by config search. Falls back to UI if not found. Requires "model_query". -- "select_option": Opens a custom Select dropdown and clicks an option by text. Requires "selector" and "option_text". +- "list_models": Lists all configured models with their display names, providers, and IDs. Optional "includeDisabled" (boolean). +- "set_default_model": Directly sets the default model by config search. Falls back to UI if not found. Requires "modelQuery". Optional "slot" ("primary" or "fast"). +- "select_option": Opens a custom Select dropdown and clicks an option by text. Requires "selector" and "optionText". +- "wait": Pauses for a given duration. Requires "durationMs" (e.g., 500). +- "press_key": Simulates a key press. Requires "key" (e.g., "Enter", "Escape"). +- "read_text": Reads the visible text of an element. Requires "selector". +- "delete_model": Deletes a model from ai.models by matching name, model_name, or provider. Requires "modelQuery". Guidelines: 1. For well-known requests (e.g., "set Kimi as the main model"), ALWAYS prefer "execute_task" with "set_primary_model". -2. For model requests, use "list_models" only when the user explicitly asks to see available models. +2. When a page changes, use "wait" with ~300-500ms before the next action to let UI settle. 3. For unknown UI tasks, use "get_page_state" first, read the "semanticHints" field, then decide. 4. After completing the user's request, return to the session scene with "return_to_session" task or open_scene "session"."# .to_string(), @@ -178,22 +144,26 @@ Guidelines: "get_config", "list_models", "set_default_model", - "select_option" + "select_option", + "wait", + "press_key", + "read_text", + "delete_model" ], - "description": "The self-control action to perform. Prefer execute_task for common workflows." + "description": "The self-control action to perform. MUST be one of the enum values. For open_model_settings or delete_model, use execute_task with the task field, NOT the action field." }, "task": { "type": "string", - "enum": ["set_primary_model", "set_fast_model", "open_model_settings", "return_to_session"], + "enum": ["set_primary_model", "set_fast_model", "open_model_settings", "return_to_session", "delete_model"], "description": "Task name when using execute_task." }, "params": { "type": "object", - "description": "Task parameters when using execute_task (e.g., { modelQuery: \"kimi\" })." + "description": "Task parameters when using execute_task (e.g., { \"modelQuery\": \"kimi\" })." }, "selector": { "type": "string", - "description": "CSS selector for click, input, or select_option actions." + "description": "CSS selector for click, input, select_option, or read_text actions." }, "text": { "type": "string", @@ -212,11 +182,11 @@ Guidelines: "enum": ["up", "down", "top", "bottom"], "description": "Scroll direction." }, - "scene_id": { + "sceneId": { "type": "string", "description": "Scene ID for open_scene (e.g., settings, session, welcome)." }, - "tab_id": { + "tabId": { "type": "string", "description": "Settings tab ID for open_settings_tab (e.g., models, basics, session-config)." }, @@ -224,21 +194,29 @@ Guidelines: "type": "string", "description": "Config key for get_config / set_config." }, - "config_value": { + "configValue": { "description": "Config value for set_config." }, - "model_query": { + "modelQuery": { "type": "string", - "description": "Model name or ID to search for when using set_default_model (e.g., \"doubao pro\", \"gpt-4o\")." + "description": "Model name or ID to search for when using set_default_model, delete_model, or delete_model task (e.g., \"doubao pro\", \"gpt-4o\")." }, "slot": { "type": "string", "enum": ["primary", "fast"], "description": "Which default model slot to set (primary or fast). Defaults to primary." }, - "option_text": { + "optionText": { "type": "string", "description": "Text of the dropdown option to select. Used with select_option." + }, + "durationMs": { + "type": "integer", + "description": "Duration in milliseconds to wait when using wait action." + }, + "includeDisabled": { + "type": "boolean", + "description": "Whether to include disabled models when using list_models. Defaults to false." } }, "required": ["action"] @@ -260,9 +238,17 @@ Guidelines: async fn validate_input( &self, - _input: &Value, + input: &Value, _context: Option<&ToolUseContext>, ) -> ValidationResult { + if input.get("action").and_then(|v| v.as_str()).unwrap_or("").is_empty() { + return ValidationResult { + result: false, + message: Some("Missing required field: action".to_string()), + error_code: None, + meta: None, + }; + } ValidationResult::default() } @@ -271,8 +257,10 @@ Guidelines: input: &Value, _context: &ToolUseContext, ) -> BitFunResult> { - let params: SelfControlInput = serde_json::from_value(input.clone()) + // Validate action field — full payload is forwarded as-is to the frontend + let validated: SelfControlInput = serde_json::from_value(input.clone()) .map_err(|e| BitFunError::tool(format!("Invalid SelfControl input: {}", e)))?; + let action_name = validated.action; let request_id = format!("selfcontrol_{}", uuid::Uuid::new_v4()); let (tx, rx) = oneshot::channel(); @@ -286,46 +274,13 @@ Guidelines: ); } - let mut action_payload = json!({ - "type": match params.action { - SelfControlActionType::ExecuteTask => "execute_task", - SelfControlActionType::GetPageState => "get_page_state", - SelfControlActionType::Click => "click", - SelfControlActionType::ClickByText => "click_by_text", - SelfControlActionType::Input => "input", - SelfControlActionType::Scroll => "scroll", - SelfControlActionType::OpenScene => "open_scene", - SelfControlActionType::OpenSettingsTab => "open_settings_tab", - SelfControlActionType::SetConfig => "set_config", - SelfControlActionType::GetConfig => "get_config", - SelfControlActionType::ListModels => "list_models", - SelfControlActionType::SetDefaultModel => "set_default_model", - SelfControlActionType::SelectOption => "select_option", - }, - "selector": params.selector, - "text": params.text, - "value": params.value, - "tag": params.tag, - "direction": params.direction, - "scene_id": params.scene_id, - "tab_id": params.tab_id, - "key": params.key, - "config_value": params.config_value, - "model_query": params.model_query, - "slot": params.slot, - "option_text": params.option_text, - }); - - if let Some(task) = ¶ms.task { - action_payload["task"] = json!(task); - } - if let Some(params_val) = ¶ms.params { - action_payload["params"] = params_val.clone(); - } - + // Forward the entire input payload directly — no field re-mapping needed. + // The LLM fills fields using the camelCase names from input_schema, so the + // frontend receives them in the correct format without any normalization. let event_payload = json!({ "requestId": request_id, - "action": action_payload, + "actionType": action_name, + "action": input, }); let event_system = get_global_event_system(); @@ -336,7 +291,14 @@ Guidelines: }) .await { - log::warn!("Failed to emit self-control request event: {}", e); + // Emit failed — clean up the pending entry and return immediately + // rather than blocking until timeout. + let pending_requests = get_pending_requests(); + pending_requests.write().await.remove(&request_id); + return Err(BitFunError::tool(format!( + "Failed to emit self-control request: {}", + e + ))); } let wait_timeout = Duration::from_secs(30); diff --git a/src/web-ui/src/infrastructure/self-control/SelfControlEventListener.ts b/src/web-ui/src/infrastructure/self-control/SelfControlEventListener.ts index 71588b6a..cc407974 100644 --- a/src/web-ui/src/infrastructure/self-control/SelfControlEventListener.ts +++ b/src/web-ui/src/infrastructure/self-control/SelfControlEventListener.ts @@ -6,13 +6,17 @@ import { api } from '@/infrastructure/api/service-api/ApiClient'; import { createLogger } from '@/shared/utils/logger'; import { SelfControlAPI } from '@/infrastructure/api/service-api/SelfControlAPI'; -import { selfControlService, type SelfControlAction } from './SelfControlService'; +import { + selfControlService, + type SelfControlAction, + type SelfControlIncomingAction, +} from './SelfControlService'; const logger = createLogger('SelfControlEventListener'); export interface SelfControlRequestEvent { requestId: string; - action: SelfControlAction; + action: SelfControlIncomingAction | SelfControlAction; } let unlistenFn: (() => void) | null = null; @@ -27,7 +31,10 @@ export function startSelfControlEventListener(): void { unlistenFn = api.listen('selfcontrol://request', (event) => { const { requestId, action } = event; - logger.info('Received self-control request', { requestId, actionType: action.type }); + logger.info('Received self-control request', { + requestId, + actionType: action.type ?? action.action ?? 'unknown', + }); void (async () => { try { diff --git a/src/web-ui/src/infrastructure/self-control/SelfControlService.test.ts b/src/web-ui/src/infrastructure/self-control/SelfControlService.test.ts new file mode 100644 index 00000000..99144f84 --- /dev/null +++ b/src/web-ui/src/infrastructure/self-control/SelfControlService.test.ts @@ -0,0 +1,135 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest'; + +const { + openSceneMock, + setActiveTabMock, + getConfigMock, + setConfigMock, + resetConfigState, +} = vi.hoisted(() => { + const openSceneMock = vi.fn(); + const setActiveTabMock = vi.fn(); + let configState: Record = {}; + + const getConfigMock = vi.fn(async (key: string) => configState[key]); + const setConfigMock = vi.fn(async (key: string, value: unknown) => { + configState[key] = value; + }); + + return { + openSceneMock, + setActiveTabMock, + getConfigMock, + setConfigMock, + resetConfigState(nextState: Record) { + configState = structuredClone(nextState); + }, + }; +}); + +vi.mock('@/app/stores/sceneStore', () => ({ + useSceneStore: { + getState: () => ({ + activeTabId: 'session', + openScene: openSceneMock, + }), + }, +})); + +vi.mock('@/app/scenes/settings/settingsStore', () => ({ + useSettingsStore: { + getState: () => ({ + activeTab: 'models', + setActiveTab: setActiveTabMock, + }), + }, +})); + +vi.mock('@/infrastructure/config', () => ({ + configManager: { + getConfig: getConfigMock, + setConfig: setConfigMock, + }, +})); + +vi.mock('@/infrastructure/config/services/modelConfigs', () => ({ + getModelDisplayName: ({ name, model_name }: { name?: string; model_name?: string }) => + model_name || name || 'Unknown', +})); + +vi.mock('@/infrastructure/config/services/providerCatalog', () => ({ + matchProviderCatalogItemByBaseUrl: () => null, +})); + +vi.mock('@/shared/utils/logger', () => ({ + createLogger: () => ({ + info: vi.fn(), + debug: vi.fn(), + warn: vi.fn(), + error: vi.fn(), + }), +})); + +import { SelfControlService } from './SelfControlService'; + +describe('SelfControlService', () => { + beforeEach(() => { + vi.clearAllMocks(); + resetConfigState({}); + }); + + it('accepts raw Rust payloads that use action as the discriminator', async () => { + const service = new SelfControlService(); + + await expect(service.executeAction({ action: 'open_scene', sceneId: 'settings' })).resolves.toBe( + 'Opened scene: settings', + ); + + expect(openSceneMock).toHaveBeenCalledWith('settings'); + }); + + it('repairs default model references after deleting the current default model', async () => { + resetConfigState({ + 'ai.models': [ + { + id: 'model-primary', + name: 'Target', + model_name: 'target-v1', + provider: 'provider-a', + enabled: true, + }, + { + id: 'model-fallback', + name: 'Fallback', + model_name: 'fallback-v1', + provider: 'provider-b', + enabled: true, + }, + ], + 'ai.default_models': { + primary: 'model-primary', + fast: 'model-primary', + }, + }); + + const service = new SelfControlService(); + + await expect(service.executeAction({ action: 'delete_model', modelQuery: 'Target' })).resolves.toContain( + 'Default model updates: primary fallback -> model-fallback; fast fallback -> model-fallback.', + ); + + expect(setConfigMock).toHaveBeenCalledWith('ai.models', [ + { + id: 'model-fallback', + name: 'Fallback', + model_name: 'fallback-v1', + provider: 'provider-b', + enabled: true, + }, + ]); + expect(setConfigMock).toHaveBeenCalledWith('ai.default_models', { + primary: 'model-fallback', + fast: 'model-fallback', + }); + }); +}); diff --git a/src/web-ui/src/infrastructure/self-control/SelfControlService.ts b/src/web-ui/src/infrastructure/self-control/SelfControlService.ts index 7fa94f40..ed84cdf0 100644 --- a/src/web-ui/src/infrastructure/self-control/SelfControlService.ts +++ b/src/web-ui/src/infrastructure/self-control/SelfControlService.ts @@ -1,8 +1,16 @@ /** - * SelfControlService — lets BitFun agent operate its own GUI with task-level orchestration. + * SelfControlService — lets BitFun agent operate its own GUI. * - * Supports both atomic actions (click, input) and semantic tasks (set_primary_model, - * open_model_settings) that are automatically planned and executed internally. + * Architecture: four responsibility regions inside one class. + * + * Region 1 – DOM Primitives : click / input / scroll / pressKey / readText / wait + * Region 2 – App State : openScene / openSettingsTab / getPageState + * Region 3 – Config & Models : setConfig / getConfig / listModels / setDefaultModel / deleteModel + * Region 4 – Task Orchestration: executeTask — composes Regions 1-3 + * + * The backend forwards the LLM's camelCase payload directly without any + * field remapping, so all action types here use camelCase field names that + * match the Rust input_schema exactly. */ import { useSceneStore } from '@/app/stores/sceneStore'; @@ -14,6 +22,15 @@ import { createLogger } from '@/shared/utils/logger'; const logger = createLogger('SelfControlService'); +// Option selectors tried in order when looking for dropdown items +const DROPDOWN_OPTION_SELECTORS = [ + '.select__option', + '[role="option"]', + '.dropdown__item', + '.menu__item', + 'li', +] as const; + export interface SimplifiedElement { tag: string; id?: string; @@ -38,6 +55,7 @@ export interface PageState { semanticHints: string[]; } +/** Internal normalized action shape used by the dispatcher. */ export type SelfControlAction = | { type: 'execute_task'; task: string; params?: Record } | { type: 'click'; selector: string } @@ -46,12 +64,41 @@ export type SelfControlAction = | { type: 'scroll'; selector?: string; direction: 'up' | 'down' | 'top' | 'bottom' } | { type: 'open_scene'; sceneId: string } | { type: 'open_settings_tab'; tabId: string } - | { type: 'set_config'; key: string; value: unknown } + | { type: 'set_config'; key: string; configValue: unknown } | { type: 'get_config'; key: string } - | { type: 'list_models' } + | { type: 'list_models'; includeDisabled?: boolean } | { type: 'set_default_model'; modelQuery: string; slot?: 'primary' | 'fast' } | { type: 'select_option'; selector: string; optionText: string } - | { type: 'get_page_state' }; + | { type: 'get_page_state' } + | { type: 'wait'; durationMs: number } + | { type: 'press_key'; key: string } + | { type: 'read_text'; selector: string } + | { type: 'delete_model'; modelQuery: string }; + +/** + * Raw action payload received from Rust. + * The tool schema uses `action` as the discriminator; we normalize it to `type` + * before dispatch so the frontend can accept both the new direct passthrough + * payload and older internal callers that already send `type`. + */ +export type SelfControlIncomingAction = Partial & { + action?: SelfControlAction['type']; + type?: SelfControlAction['type']; + sceneId?: string; + scene_id?: string; + tabId?: string; + tab_id?: string; + configValue?: unknown; + config_value?: unknown; + modelQuery?: string; + model_query?: string; + optionText?: string; + option_text?: string; + durationMs?: number; + duration_ms?: number; + includeDisabled?: boolean; + include_disabled?: boolean; +}; interface ModelInfo { id: string; @@ -65,13 +112,20 @@ interface ModelInfo { export class SelfControlService { private highlightOverlay: HTMLDivElement | null = null; - getPageState(): PageState { + // ── Region 2: App State ────────────────────────────────────────────────── + + async getPageState(): Promise { const activeScene = useSceneStore.getState().activeTabId; - const activeSettingsTab = activeScene === 'settings' ? useSettingsStore.getState().activeTab : undefined; + const activeSettingsTab = + activeScene === 'settings' ? useSettingsStore.getState().activeTab : undefined; const elements = this.collectInteractiveElements(); const targets = this.buildTargetIndex(elements); const semanticHints = this.buildSemanticHints(activeScene, activeSettingsTab, elements, targets); + if (activeScene === 'settings' && activeSettingsTab === 'models') { + await this.maybeAppendModelSummary(semanticHints); + } + return { title: document.title, activeScene, @@ -82,7 +136,9 @@ export class SelfControlService { }; } - async executeAction(rawAction: SelfControlAction): Promise { + // ── Dispatcher ─────────────────────────────────────────────────────────── + + async executeAction(rawAction: SelfControlIncomingAction | SelfControlAction): Promise { const action = this.normalizeAction(rawAction); logger.info('Executing self-control action', { type: action.type }); @@ -91,55 +147,51 @@ export class SelfControlService { return this.executeTask(action.task, action.params); case 'get_page_state': - return JSON.stringify(this.getPageState(), null, 2); + return JSON.stringify(await this.getPageState(), null, 2); + // Region 2: App State case 'open_scene': - useSceneStore.getState().openScene(action.sceneId as any); - return `Opened scene: ${action.sceneId}`; - + return this.openScene(action.sceneId); case 'open_settings_tab': - useSceneStore.getState().openScene('settings'); - useSettingsStore.getState().setActiveTab(action.tabId as any); - return `Opened settings tab: ${action.tabId}`; + return this.openSettingsTab(action.tabId); + // Region 3: Config & Models case 'set_config': - await configManager.setConfig(action.key, action.value); - return `Set config ${action.key} = ${JSON.stringify(action.value)}`; - - case 'get_config': { - const value = await configManager.getConfig(action.key); - return value === undefined ? 'null' : JSON.stringify(value); - } - + return this.setConfig(action.key, action.configValue); + case 'get_config': + return this.getConfig(action.key); case 'list_models': - return this.listModels(); - + return this.listModels(action.includeDisabled); case 'set_default_model': - return this.setDefaultModel(action.modelQuery, action.slot || 'primary'); - - case 'select_option': - return this.selectOption(action.selector, action.optionText); + return this.setDefaultModel(action.modelQuery, action.slot ?? 'primary'); + case 'delete_model': + return this.deleteModel(action.modelQuery); + // Region 1: DOM Primitives case 'click': return this.clickElement(action.selector); - case 'click_by_text': return this.clickElementByText(action.text, action.tag); - case 'input': return this.inputText(action.selector, action.value); - case 'scroll': return this.scroll(action.selector, action.direction); + case 'select_option': + return this.selectOption(action.selector, action.optionText); + case 'wait': + return this.wait(action.durationMs); + case 'press_key': + return this.pressKey(action.key); + case 'read_text': + return this.readText(action.selector); default: - return `Unknown action type: ${(action as any).type}`; + return `Unknown action type: ${(action as { type: string }).type}`; } } - /** - * Task Orchestration — execute high-level tasks by internally planning a sequence of actions. - */ + // ── Region 4: Task Orchestration ───────────────────────────────────────── + private async executeTask(task: string, params?: Record): Promise { logger.info('Executing task', { task, params }); @@ -147,100 +199,128 @@ export class SelfControlService { case 'set_primary_model': case 'set_fast_model': { const slot = task === 'set_primary_model' ? 'primary' : 'fast'; - const modelQuery = params?.modelQuery || params?.model || ''; + const modelQuery = params?.modelQuery ?? params?.model ?? ''; if (!modelQuery) return `Missing modelQuery for ${task}`; - // Try direct config match first const configResult = await this.setDefaultModel(modelQuery, slot); if (!configResult.toLowerCase().includes('not found')) { return configResult; } - - // Fallback to UI selection return this.setDefaultModelViaUI(modelQuery, slot); } case 'open_model_settings': { - useSceneStore.getState().openScene('settings'); - useSettingsStore.getState().setActiveTab('models'); - return 'Opened model settings'; + return this.openSettingsTab('models'); } case 'return_to_session': { - useSceneStore.getState().openScene('session'); - return 'Returned to session'; + return this.openScene('session'); + } + + case 'delete_model': { + const modelQuery = params?.modelQuery ?? params?.model ?? ''; + if (!modelQuery) return 'Missing modelQuery for delete_model'; + return this.deleteModel(modelQuery); } default: - return `Unknown task: ${task}. Available tasks: set_primary_model, set_fast_model, open_model_settings, return_to_session.`; + return `Unknown task: ${task}. Available tasks: set_primary_model, set_fast_model, open_model_settings, return_to_session, delete_model.`; } } - /** - * Normalize snake_case fields coming from Rust backend into camelCase. - */ - private normalizeAction(raw: SelfControlAction): SelfControlAction { - const r = raw as any; - const base = { ...r }; + // ── Region 2: App State ────────────────────────────────────────────────── + + private normalizeAction(rawAction: SelfControlIncomingAction | SelfControlAction): SelfControlAction { + const raw = rawAction as SelfControlIncomingAction; + const type = raw.type ?? raw.action; + if (!type) { + throw new Error('Missing self-control action type'); + } - if (r.scene_id !== undefined && base.sceneId === undefined) base.sceneId = r.scene_id; - if (r.tab_id !== undefined && base.tabId === undefined) base.tabId = r.tab_id; - if (r.model_query !== undefined && base.modelQuery === undefined) base.modelQuery = r.model_query; - if (r.option_text !== undefined && base.optionText === undefined) base.optionText = r.option_text; - if (r.config_value !== undefined && base.value === undefined) base.value = r.config_value; + return { + ...raw, + type, + sceneId: raw.sceneId ?? raw.scene_id, + tabId: raw.tabId ?? raw.tab_id, + configValue: raw.configValue ?? raw.config_value, + modelQuery: raw.modelQuery ?? raw.model_query, + optionText: raw.optionText ?? raw.option_text, + durationMs: raw.durationMs ?? raw.duration_ms, + includeDisabled: raw.includeDisabled ?? raw.include_disabled, + } as SelfControlAction; + } - return base as SelfControlAction; + private openScene(sceneId: string): string { + useSceneStore.getState().openScene(sceneId as any); + return `Opened scene: ${sceneId}`; } - // -------------------------------------------------------------------------- - // Model Operations - // -------------------------------------------------------------------------- + private openSettingsTab(tabId: string): string { + useSceneStore.getState().openScene('settings' as any); + useSettingsStore.getState().setActiveTab(tabId as any); + return `Opened settings tab: ${tabId}`; + } - private async fetchEnabledModels(): Promise { - const models = (await configManager.getConfig('ai.models')) || []; - logger.debug('Fetched ai.models', { count: models.length }); + // ── Region 3: Config & Model Operations ────────────────────────────────── - return models - .filter((m) => m && m.enabled !== false) + private async setConfig(key: string, value: unknown): Promise { + await configManager.setConfig(key, value); + return `Set config ${key} = ${JSON.stringify(value)}`; + } + + private async getConfig(key: string): Promise { + const value = await configManager.getConfig(key); + return value === undefined ? 'null' : JSON.stringify(value); + } + + private async fetchModels(includeDisabled = false): Promise { + const models = (await configManager.getConfig('ai.models')) ?? []; + logger.debug('Fetched ai.models', { count: models.length, includeDisabled }); + + const mapped = models + .filter((m) => m && (includeDisabled || m.enabled !== false)) .map((m) => { - const providerItem = matchProviderCatalogItemByBaseUrl(m.base_url || ''); - const inferredProvider = providerItem?.id || m.provider || m.name || 'Unknown'; + const providerItem = matchProviderCatalogItemByBaseUrl(m.base_url ?? ''); + const inferredProvider = providerItem?.id ?? m.provider ?? m.name ?? 'Unknown'; const displayName = getModelDisplayName({ - name: m.name || inferredProvider, - model_name: m.model_name || '', - base_url: m.base_url || '', + name: m.name ?? inferredProvider, + model_name: m.model_name ?? '', + base_url: m.base_url ?? '', }); return { - id: String(m.id || ''), - name: String(m.name || ''), + id: String(m.id ?? ''), + name: String(m.name ?? ''), displayName, provider: inferredProvider, - modelName: String(m.model_name || ''), + modelName: String(m.model_name ?? ''), enabled: m.enabled !== false, }; - }) - .filter((m) => m.enabled && m.id); + }); + + return includeDisabled ? mapped.filter((m) => m.id) : mapped.filter((m) => m.enabled && m.id); } - private async listModels(): Promise { - const enabledModels = await this.fetchEnabledModels(); - if (enabledModels.length === 0) { - return 'No enabled models found.'; + private async listModels(includeDisabled = false): Promise { + const models = await this.fetchModels(includeDisabled); + if (models.length === 0) { + return includeDisabled ? 'No models configured.' : 'No enabled models found.'; } - const lines = enabledModels.map((m) => { - const parts = [`ID: ${m.id}`, `Display: ${m.displayName}`]; + const lines = models.map((m) => { + const status = m.enabled ? '[enabled]' : '[disabled]'; + const parts = [`${status} ID: ${m.id}`, `Display: ${m.displayName}`]; if (m.modelName) parts.push(`Model: ${m.modelName}`); if (m.provider) parts.push(`Provider: ${m.provider}`); return `- ${parts.join(' | ')}`; }); - return `Available enabled models (${enabledModels.length}):\n${lines.join('\n')}`; + const label = includeDisabled ? 'All configured models' : 'Enabled models'; + return `${label} (${models.length}):\n${lines.join('\n')}`; } private async setDefaultModel(modelQuery: string, slot: 'primary' | 'fast'): Promise { - const enabledModels = await this.fetchEnabledModels(); + const enabledModels = await this.fetchModels(); if (enabledModels.length === 0) { return 'No enabled models found. Please configure models first.'; @@ -285,9 +365,8 @@ export class SelfControlService { } private async setDefaultModelViaUI(modelQuery: string, slot: 'primary' | 'fast'): Promise { - useSceneStore.getState().openScene('settings'); - useSettingsStore.getState().setActiveTab('models'); - await new Promise((r) => setTimeout(r, 300)); + this.openSettingsTab('models'); + await this.wait(300); const targetAttr = slot === 'primary' ? 'primary-model-select' : 'fast-model-select'; const selector = `[data-self-control-target="${targetAttr}"] .select__trigger`; @@ -299,15 +378,13 @@ export class SelfControlService { this.flashHighlight(trigger); trigger.click(); - await new Promise((r) => setTimeout(r, 200)); + await this.wait(200); - const options = Array.from(document.querySelectorAll('.select__option')); + const options = this.findDropdownOptions(); const query = modelQuery.toLowerCase(); - - const target = options.find((el) => { - const text = this.extractText(el).toLowerCase(); - return text.includes(query); - }) as HTMLElement | undefined; + const target = options.find((el) => this.extractText(el).toLowerCase().includes(query)) as + | HTMLElement + | undefined; if (!target) { document.dispatchEvent(new KeyboardEvent('keydown', { key: 'Escape', bubbles: true })); @@ -321,7 +398,7 @@ export class SelfControlService { } private async applyDefaultModel(slot: 'primary' | 'fast', model: ModelInfo): Promise { - const currentConfig = (await configManager.getConfig('ai.default_models')) || {}; + const currentConfig = (await configManager.getConfig('ai.default_models')) ?? {}; await configManager.setConfig('ai.default_models', { ...currentConfig, [slot]: model.id, @@ -329,9 +406,246 @@ export class SelfControlService { return `Set ${slot === 'primary' ? 'primary' : 'fast'} model to "${model.displayName}" (ID: ${model.id})`; } - // -------------------------------------------------------------------------- - // DOM Helpers - // -------------------------------------------------------------------------- + private async deleteModel(modelQuery: string): Promise { + const allModels = (await configManager.getConfig('ai.models')) ?? []; + if (allModels.length === 0) { + return 'No models configured.'; + } + + const query = modelQuery.toLowerCase().trim(); + const matches = allModels.filter((m) => { + const haystack = [ + String(m.id ?? '').toLowerCase(), + String(m.name ?? '').toLowerCase(), + String(m.model_name ?? '').toLowerCase(), + String(m.provider ?? '').toLowerCase(), + String(m.base_url ?? '').toLowerCase(), + ].join(' '); + return haystack.includes(query); + }); + + if (matches.length === 0) { + const available = allModels + .map((m) => `"${m.name ?? 'Unknown'}/${m.model_name ?? 'unknown'}" (ID: ${m.id})`) + .join(', '); + return `Model matching "${modelQuery}" not found. Available models: ${available}`; + } + + const deletedIds = new Set(matches.map((m) => String(m.id ?? ''))); + const updatedModels = allModels.filter((m) => !deletedIds.has(String(m.id ?? ''))); + await configManager.setConfig('ai.models', updatedModels); + + const currentDefaults = + (await configManager.getConfig>('ai.default_models')) ?? {}; + const remainingEnabledModels = updatedModels.filter((m) => m && m.enabled !== false && m.id); + const nextDefaults: Record = { ...currentDefaults }; + const notes: string[] = []; + + if (currentDefaults.primary && deletedIds.has(currentDefaults.primary)) { + const replacementPrimary = String(remainingEnabledModels[0]?.id ?? ''); + if (replacementPrimary) { + nextDefaults.primary = replacementPrimary; + notes.push(`primary fallback -> ${replacementPrimary}`); + } else { + delete nextDefaults.primary; + notes.push('primary cleared'); + } + } + + if (currentDefaults.fast && deletedIds.has(currentDefaults.fast)) { + const fallbackFast = nextDefaults.primary; + if (fallbackFast) { + nextDefaults.fast = fallbackFast; + notes.push(`fast fallback -> ${fallbackFast}`); + } else { + delete nextDefaults.fast; + notes.push('fast cleared'); + } + } + + if (notes.length > 0) { + await configManager.setConfig('ai.default_models', nextDefaults); + } + + const deletedNames = matches + .map((m) => `"${m.name ?? 'Unknown'}/${m.model_name ?? 'unknown'}" (ID: ${m.id})`) + .join(', '); + const suffix = notes.length > 0 ? ` Default model updates: ${notes.join('; ')}.` : ''; + return `Deleted ${matches.length} model(s): ${deletedNames}.${suffix}`; + } + + // ── Region 1: DOM Primitives ───────────────────────────────────────────── + + private clickElement(selector: string): string { + const el = document.querySelector(selector) as HTMLElement | null; + if (!el) return `Element not found: ${selector}`; + this.flashHighlight(el); + this.dispatchClick(el); + return `Clicked element: ${selector}`; + } + + private clickElementByText(text: string, tag?: string): string { + const selector = tag ?? '*'; + const elements = Array.from(document.querySelectorAll(selector)); + const query = text.toLowerCase().trim(); + + const target = elements.find((el) => { + const candidates = [ + this.extractText(el).toLowerCase(), + (el.getAttribute('aria-label') ?? '').toLowerCase(), + (el.getAttribute('title') ?? '').toLowerCase(), + ((el as HTMLInputElement).placeholder ?? '').toLowerCase(), + ]; + return candidates.some((c) => c.includes(query)); + }) as HTMLElement | undefined; + + if (!target) return `Element with text "${text}" not found`; + this.flashHighlight(target); + this.dispatchClick(target); + return `Clicked element with text: ${text}`; + } + + private inputText(selector: string, value: string): string { + const el = document.querySelector(selector) as HTMLInputElement | HTMLTextAreaElement | null; + if (!el) return `Input element not found: ${selector}`; + + this.flashHighlight(el); + + if (el.tagName.toLowerCase() === 'input' || el.tagName.toLowerCase() === 'textarea') { + el.focus(); + el.dispatchEvent(new FocusEvent('focus', { bubbles: true })); + + // Use native value setter to bypass React controlled-component guards + const prototype = Object.getPrototypeOf(el); + const nativeSetter = Object.getOwnPropertyDescriptor(prototype, 'value')?.set; + if (nativeSetter) { + nativeSetter.call(el, value); + } else { + el.value = value; + } + + el.dispatchEvent(new Event('input', { bubbles: true })); + el.dispatchEvent(new InputEvent('input', { bubbles: true, data: value, inputType: 'insertText' })); + el.dispatchEvent(new Event('change', { bubbles: true })); + el.dispatchEvent(new FocusEvent('blur', { bubbles: true })); + + return `Set input ${selector} to "${value}"`; + } + + if (el.isContentEditable) { + el.focus(); + el.textContent = value; + el.dispatchEvent(new Event('input', { bubbles: true })); + el.dispatchEvent( + new InputEvent('input', { bubbles: true, data: value, inputType: 'insertText' }), + ); + el.dispatchEvent(new FocusEvent('blur', { bubbles: true })); + return `Set contenteditable ${selector} to "${value}"`; + } + + return `Element ${selector} is not an input`; + } + + private scroll( + selector: string | undefined, + direction: 'up' | 'down' | 'top' | 'bottom', + ): string { + const el = selector + ? (document.querySelector(selector) as HTMLElement | null) + : (document.scrollingElement as HTMLElement | null); + + if (!el) return `Scroll target not found: ${selector ?? 'document'}`; + + const scrollAmount = 500; + switch (direction) { + case 'up': + el.scrollBy({ top: -scrollAmount, behavior: 'smooth' }); + return `Scrolled up ${selector ?? 'document'}`; + case 'down': + el.scrollBy({ top: scrollAmount, behavior: 'smooth' }); + return `Scrolled down ${selector ?? 'document'}`; + case 'top': + el.scrollTo({ top: 0, behavior: 'smooth' }); + return `Scrolled to top ${selector ?? 'document'}`; + case 'bottom': + el.scrollTo({ top: el.scrollHeight, behavior: 'smooth' }); + return `Scrolled to bottom ${selector ?? 'document'}`; + } + } + + private async selectOption(selector: string, optionText: string): Promise { + const trigger = document.querySelector(selector) as HTMLElement | null; + if (!trigger) return `Select trigger not found: ${selector}`; + + this.flashHighlight(trigger); + trigger.click(); + await this.wait(200); + + const options = this.findDropdownOptions(); + const query = optionText.toLowerCase(); + const target = options.find((el) => this.extractText(el).toLowerCase().includes(query)) as + | HTMLElement + | undefined; + + if (!target) { + document.dispatchEvent(new KeyboardEvent('keydown', { key: 'Escape', bubbles: true })); + const optionTexts = options.slice(0, 20).map((el) => `"${this.extractText(el)}"`).join(', '); + return `Option "${optionText}" not found in dropdown. Available options: ${optionTexts}`; + } + + this.flashHighlight(target); + target.click(); + return `Selected option "${optionText}" in ${selector}`; + } + + private async wait(durationMs: number): Promise { + const ms = Math.max(0, Math.min(durationMs, 30000)); + await new Promise((r) => setTimeout(r, ms)); + return `Waited ${ms}ms`; + } + + private pressKey(key: string): string { + const normalized = key.trim(); + if (!normalized) return 'No key specified'; + document.dispatchEvent( + new KeyboardEvent('keydown', { key: normalized, bubbles: true, cancelable: true }), + ); + document.dispatchEvent( + new KeyboardEvent('keyup', { key: normalized, bubbles: true, cancelable: true }), + ); + return `Pressed key: ${normalized}`; + } + + private readText(selector: string): string { + const el = document.querySelector(selector); + if (!el) return `Element not found: ${selector}`; + const text = this.extractText(el).slice(0, 2000); + return text || '(empty text)'; + } + + // ── DOM Utilities ───────────────────────────────────────────────────────── + + /** Find dropdown option elements using the prioritised selector list. */ + private findDropdownOptions(): Element[] { + for (const sel of DROPDOWN_OPTION_SELECTORS) { + const options = Array.from(document.querySelectorAll(sel)); + if (options.length > 0) return options; + } + return []; + } + + /** Dispatch a realistic pointer+mouse+click event sequence on an element. */ + private dispatchClick(el: HTMLElement): void { + const rect = el.getBoundingClientRect(); + const x = rect.left + rect.width / 2; + const y = rect.top + rect.height / 2; + const common = { bubbles: true, cancelable: true, clientX: x, clientY: y }; + el.dispatchEvent(new PointerEvent('pointerdown', { ...common, pointerType: 'mouse' })); + el.dispatchEvent(new MouseEvent('mousedown', common)); + el.dispatchEvent(new PointerEvent('pointerup', { ...common, pointerType: 'mouse' })); + el.dispatchEvent(new MouseEvent('mouseup', common)); + el.dispatchEvent(new MouseEvent('click', common)); + } private collectInteractiveElements(): SimplifiedElement[] { const candidates = document.querySelectorAll( @@ -358,7 +672,7 @@ export class SelfControlService { '.select__trigger', '.select__option', '.switch', - ].join(',') + ].join(','), ); const elements: SimplifiedElement[] = []; @@ -374,40 +688,98 @@ export class SelfControlService { const rect = htmlEl.getBoundingClientRect(); if (rect.width < 2 || rect.height < 2) return; - if (rect.right < 0 || rect.bottom < 0 || rect.left > viewportW || rect.top > viewportH) return; + if (rect.right < 0 || rect.bottom < 0 || rect.left > viewportW || rect.top > viewportH) + return; const style = window.getComputedStyle(htmlEl); - if (style.display === 'none' || style.visibility === 'hidden' || parseFloat(style.opacity) < 0.01) { + if ( + style.display === 'none' || + style.visibility === 'hidden' || + parseFloat(style.opacity) < 0.01 + ) { return; } - const text = this.extractText(el).slice(0, 120); - const ariaLabel = el.getAttribute('aria-label') || undefined; - const placeholder = (el as HTMLInputElement).placeholder || undefined; - const title = el.getAttribute('title') || undefined; - const dataTestid = el.getAttribute('data-testid') || undefined; - const dataSelfControlTarget = el.getAttribute('data-self-control-target') || undefined; + const tag = el.tagName.toLowerCase(); + const isLayoutContainer = [ + 'body', + 'html', + 'main', + 'div', + 'section', + 'article', + 'nav', + 'aside', + ].includes(tag); + const dataTestid = el.getAttribute('data-testid') ?? undefined; + const dataSelfControlTarget = el.getAttribute('data-self-control-target') ?? undefined; + + if (isLayoutContainer && !dataTestid && !dataSelfControlTarget && !el.id) { + const isSmall = rect.width < 400 && rect.height < 200; + const role = el.getAttribute('role'); + if (!isSmall || !role) return; + } - const hasIdentity = !!(text || el.id || dataTestid || dataSelfControlTarget || ariaLabel || placeholder || title); + const text = this.extractText(el).slice(0, 120); + const ariaLabel = el.getAttribute('aria-label') ?? undefined; + const placeholder = (el as HTMLInputElement).placeholder ?? undefined; + const title = el.getAttribute('title') ?? undefined; + + const hasIdentity = !!( + text || + el.id || + dataTestid || + dataSelfControlTarget || + ariaLabel || + placeholder || + title + ); const isInteractive = this.isInteractive(el); if (!hasIdentity && !isInteractive) return; elements.push({ - tag: el.tagName.toLowerCase(), + tag, id: el.id || undefined, class: el.className || undefined, text, ariaLabel, - role: el.getAttribute('role') || undefined, + role: el.getAttribute('role') ?? undefined, placeholder, title, dataTestid, dataSelfControlTarget, interactive: isInteractive, - rect: { x: Math.round(rect.x), y: Math.round(rect.y), width: Math.round(rect.width), height: Math.round(rect.height) }, + rect: { + x: Math.round(rect.x), + y: Math.round(rect.y), + width: Math.round(rect.width), + height: Math.round(rect.height), + }, }); }); + elements.sort((a, b) => { + const score = (e: SimplifiedElement) => { + let s = 0; + if (e.dataSelfControlTarget) s += 100; + if (e.dataTestid) s += 80; + if ( + e.interactive && + (e.tag === 'button' || + e.tag === 'a' || + e.tag === 'input' || + e.tag === 'select' || + e.tag === 'textarea') + ) + s += 60; + if (e.ariaLabel) s += 40; + if (e.text) s += 20; + if (e.interactive) s += 10; + return s; + }; + return score(b) - score(a); + }); + return elements; } @@ -428,12 +800,12 @@ export class SelfControlService { activeScene: string, activeSettingsTab: string | undefined, elements: SimplifiedElement[], - targets: Record + targets: Record, ): string[] { const hints: string[] = []; if (activeScene === 'settings') { - hints.push(`Current scene: Settings (${activeSettingsTab || 'unknown tab'})`); + hints.push(`Current scene: Settings (${activeSettingsTab ?? 'unknown tab'})`); if (targets['primary-model-select']) { hints.push('You can change the primary model via the "primary-model-select" target.'); @@ -443,29 +815,75 @@ export class SelfControlService { } } - const hasSelect = elements.some((el) => el.class?.includes('select__trigger') || el.role === 'combobox'); + const hasSelect = elements.some( + (el) => el.class?.includes('select__trigger') || el.role === 'combobox', + ); const hasInput = elements.some((el) => el.tag === 'input' || el.tag === 'textarea'); - const hasSwitch = elements.some((el) => el.role === 'switch' || el.class?.includes('switch')); + const hasSwitch = elements.some( + (el) => el.role === 'switch' || el.class?.includes('switch'), + ); if (hasSelect) hints.push('This page contains dropdown selects.'); if (hasInput) hints.push('This page contains text inputs.'); if (hasSwitch) hints.push('This page contains toggle switches.'); const quickActions = [ - 'open_scene with scene_id "session" to return to the chat', - 'open_scene with scene_id "settings" to open settings', + 'open_scene with sceneId "session" to return to the chat', + 'open_scene with sceneId "settings" to open settings', 'execute_task with task "open_model_settings" to jump directly to model settings', 'execute_task with task "set_primary_model" and params { modelQuery: "..." } to set the main model', + 'execute_task with task "delete_model" and params { modelQuery: "..." } to delete a model', ]; hints.push(`Quick actions: ${quickActions.join('; ')}`); return hints; } + private async maybeAppendModelSummary(hints: string[]): Promise { + try { + const models = await this.fetchModels(true); + if (models.length === 0) return; + const lines = models.map( + (m) => `- ${m.enabled ? '[enabled]' : '[disabled]'} ${m.displayName} (${m.provider}, ID: ${m.id})`, + ); + hints.push(`Configured models:\n${lines.join('\n')}`); + } catch { + // ignore + } + } + private extractText(el: Element): string { + const tag = el.tagName.toLowerCase(); + const directAria = el.getAttribute('aria-label') ?? ''; + const directTitle = (el as HTMLElement).title ?? ''; + + const isContainer = [ + 'div', + 'section', + 'article', + 'main', + 'nav', + 'aside', + 'header', + 'footer', + ].includes(tag); + + if (isContainer) { + if (directAria) return directAria; + if (directTitle) return directTitle; + if (el.id) return ''; + + const interactiveChildren = el.querySelectorAll( + 'button, a, input, [role="button"], [role="tab"], [data-testid]', + ).length; + if (interactiveChildren > 4) { + return ''; + } + } + const walk = (node: Node): string => { if (node.nodeType === Node.TEXT_NODE) { - return node.textContent || ''; + return node.textContent ?? ''; } if (node.nodeType !== Node.ELEMENT_NODE) { return ''; @@ -475,130 +893,39 @@ export class SelfControlService { if (style.display === 'none' || style.visibility === 'hidden') { return ''; } - return Array.from(elNode.childNodes) - .map(walk) - .join('') - .replace(/\s+/g, ' ') - .trim(); + return Array.from(elNode.childNodes).map(walk).join('').replace(/\s+/g, ' ').trim(); }; - const directText = el.getAttribute('aria-label') || ''; const childText = walk(el); - return (directText || childText || (el as HTMLElement).title || '').trim(); + return (directAria || childText || directTitle || '').trim(); } private isInteractive(el: Element): boolean { const tag = el.tagName.toLowerCase(); const role = el.getAttribute('role'); if (['button', 'a', 'input', 'textarea', 'select', 'label'].includes(tag)) return true; - if (['button', 'link', 'tab', 'menuitem', 'combobox', 'option', 'radio', 'checkbox', 'switch'].includes(role || '')) return true; + if ( + [ + 'button', + 'link', + 'tab', + 'menuitem', + 'combobox', + 'option', + 'radio', + 'checkbox', + 'switch', + ].includes(role ?? '') + ) + return true; if ((el as HTMLElement).onclick != null) return true; if (el.getAttribute('tabindex') === '0') return true; - if (el.classList.contains('select__trigger') || el.classList.contains('select__option')) return true; + if (el.classList.contains('select__trigger') || el.classList.contains('select__option')) + return true; if (el.getAttribute('contenteditable') === 'true') return true; return false; } - private async selectOption(selector: string, optionText: string): Promise { - const trigger = document.querySelector(selector) as HTMLElement | null; - if (!trigger) return `Select trigger not found: ${selector}`; - - this.flashHighlight(trigger); - trigger.click(); - await new Promise((r) => setTimeout(r, 150)); - - const options = Array.from(document.querySelectorAll('.select__option')); - const target = options.find((el) => { - const text = this.extractText(el).toLowerCase(); - return text.includes(optionText.toLowerCase()); - }) as HTMLElement | undefined; - - if (!target) { - document.dispatchEvent(new KeyboardEvent('keydown', { key: 'Escape', bubbles: true })); - return `Option "${optionText}" not found in dropdown`; - } - - this.flashHighlight(target); - target.click(); - return `Selected option "${optionText}" in ${selector}`; - } - - private clickElement(selector: string): string { - const el = document.querySelector(selector) as HTMLElement | null; - if (!el) return `Element not found: ${selector}`; - this.flashHighlight(el); - el.click(); - return `Clicked element: ${selector}`; - } - - private clickElementByText(text: string, tag?: string): string { - const selector = tag || '*'; - const elements = Array.from(document.querySelectorAll(selector)); - const query = text.toLowerCase().trim(); - - const target = elements.find((el) => { - const candidates = [ - this.extractText(el).toLowerCase(), - (el.getAttribute('aria-label') || '').toLowerCase(), - (el.getAttribute('title') || '').toLowerCase(), - ((el as HTMLInputElement).placeholder || '').toLowerCase(), - ]; - return candidates.some((c) => c.includes(query)); - }) as HTMLElement | undefined; - - if (!target) return `Element with text "${text}" not found`; - this.flashHighlight(target); - target.click(); - return `Clicked element with text: ${text}`; - } - - private inputText(selector: string, value: string): string { - const el = document.querySelector(selector) as HTMLInputElement | HTMLTextAreaElement | null; - if (!el) return `Input element not found: ${selector}`; - - this.flashHighlight(el); - - if (el.tagName.toLowerCase() === 'input' || el.tagName.toLowerCase() === 'textarea') { - el.focus(); - el.value = value; - el.dispatchEvent(new Event('input', { bubbles: true })); - el.dispatchEvent(new Event('change', { bubbles: true })); - return `Set input ${selector} to "${value}"`; - } - - if (el.isContentEditable) { - el.textContent = value; - el.dispatchEvent(new Event('input', { bubbles: true })); - return `Set contenteditable ${selector} to "${value}"`; - } - - return `Element ${selector} is not an input`; - } - - private scroll(selector: string | undefined, direction: 'up' | 'down' | 'top' | 'bottom'): string { - const el = selector - ? (document.querySelector(selector) as HTMLElement | null) - : (document.scrollingElement as HTMLElement | null); - - if (!el) return `Scroll target not found: ${selector || 'document'}`; - - const scrollAmount = 500; - switch (direction) { - case 'up': - el.scrollBy({ top: -scrollAmount, behavior: 'smooth' }); - return `Scrolled up ${selector || 'document'}`; - case 'down': - el.scrollBy({ top: scrollAmount, behavior: 'smooth' }); - return `Scrolled down ${selector || 'document'}`; - case 'top': - el.scrollTo({ top: 0, behavior: 'smooth' }); - return `Scrolled to top ${selector || 'document'}`; - case 'bottom': - el.scrollTo({ top: el.scrollHeight, behavior: 'smooth' }); - return `Scrolled to bottom ${selector || 'document'}`; - } - } - private flashHighlight(el: HTMLElement): void { const rect = el.getBoundingClientRect(); if (!this.highlightOverlay) {