From 98910723e95a46c68d00500f3a15c9f50ed54045 Mon Sep 17 00:00:00 2001 From: Chris Busillo Date: Mon, 1 Jun 2026 19:33:37 -0400 Subject: [PATCH] docs(test-tui): add iTerm2 visibility helper --- .codex/skills/test-tui/SKILL.md | 209 ++++++++- .../test-tui/scripts/iterm2_tui_visibility.py | 399 ++++++++++++++++++ 2 files changed, 596 insertions(+), 12 deletions(-) create mode 100644 .codex/skills/test-tui/scripts/iterm2_tui_visibility.py diff --git a/.codex/skills/test-tui/SKILL.md b/.codex/skills/test-tui/SKILL.md index ca6d769ea95..c3aaff5ad88 100644 --- a/.codex/skills/test-tui/SKILL.md +++ b/.codex/skills/test-tui/SKILL.md @@ -1,26 +1,211 @@ --- name: test-tui -description: Guide for testing Every Code TUI interactively +description: >- + Repo-local guide for testing the Every Code TUI interactively, including + live-terminal visibility limits, VT100 snapshot evidence, restart dogfooding, + and how to avoid overclaiming visual verification. commands: - name: start-tui source: repo - example_argv: ["just", "codex", "-c", "log_dir="] - purpose: Start the TUI interactively from the repo recipe with logs directed to a temp directory. + example_argv: + - cargo + - run + - --manifest-path + - code-rs/Cargo.toml + - -p + - code-tui + - -- + - -c + - log_dir= + purpose: >- + Start the Every Code TUI interactively from the code-rs workspace with logs + directed to a temp directory. + - name: vt100-snapshots + source: repo + example_argv: + - cargo + - test + - --manifest-path + - code-rs/Cargo.toml + - -p + - code-tui + - --test + - vt100_chatwidget_snapshot + - --features + - test-helpers + - -- + - --nocapture + purpose: Render deterministic terminal frames through the VT100 snapshot harness. + - name: iterm2-tui-list + source: skill + resource_path: scripts/iterm2_tui_visibility.py + example_argv: + - uv + - run + - scripts/iterm2_tui_visibility.py + - list + purpose: List iTerm2 sessions with metadata only before selecting a TUI session. + - name: iterm2-tui-text + source: skill + resource_path: scripts/iterm2_tui_visibility.py + example_argv: + - uv + - run + - scripts/iterm2_tui_visibility.py + - text + - + purpose: Capture visible text from one selected iTerm2 session. + - name: iterm2-tui-windows + source: skill + resource_path: scripts/iterm2_tui_visibility.py + example_argv: + - uv + - run + - scripts/iterm2_tui_visibility.py + - windows + purpose: List visible iTerm2 macOS window ids for targeted screenshots. + - name: iterm2-tui-screenshot + source: skill + resource_path: scripts/iterm2_tui_visibility.py + example_argv: + - uv + - run + - scripts/iterm2_tui_visibility.py + - screenshot + - + - --window-id + - + purpose: Capture pixels for one selected iTerm2/macOS window. +resources: + - path: scripts/iterm2_tui_visibility.py + kind: script + description: Read-only macOS/iTerm2 SDK helper for TUI session text and window capture. workflow_defaults: - name: rust_log value: RUST_LOG=trace - description: Always set trace logging when starting the TUI for interactive testing. + description: >- + Always set trace logging when starting the TUI for interactive testing. - name: input_delivery value: text_then_enter - description: Send text first, then Enter as a separate write when driving the TUI programmatically. + description: >- + Send text first, then Enter as a separate write when driving the TUI + programmatically. --- -You can start and use Every Code TUI to verify changes. +# Test TUI + +This is a repo-local Every Code skill, not a generic TUI testing guide. Use it +for the Every Code TUI, the `code` command, and this repository's Rust +workspace. The `just codex` recipe name is a compatibility-era repo target, so +prefer direct `code-rs` commands when validating Every Code TUI changes. + +You can start and use the Every Code TUI to verify changes. + +## Important Notes + +- Start interactively. +- Always set `RUST_LOG="trace"` when starting the process. +- Pass `-c log_dir=` so logs are written to a known location. +- When sending a test message programmatically, send text first, then send Enter + in a separate write. Do not send text plus Enter in one burst. +- Use the Every Code TUI binary from the repo root: + `cargo run --manifest-path code-rs/Cargo.toml -p code-tui -- -c log_dir=`. + +## Visibility Contract + +Do not claim that you visually verified the live TUI unless you actually used a +live terminal surface that exposed the rendered UI. A rebuilt `code` binary, +passing tests, log inspection, Code Bridge events, and embedded string checks are +useful evidence, but they are not live visual verification by themselves. + +Code Bridge may capture browser/app surfaces, console logs, pageviews, or bridge +screenshots when a client is connected. It does not prove that the current +terminal TUI pixels were visible unless a screenshot or control response clearly +comes from the TUI surface being tested. + +Use precise language in closeout: + +- `live TUI visually verified` only after direct TUI capture/interaction. +- `iTerm2 session verified` after reading the relevant iTerm2 session's live + terminal contents through an explicit iTerm2 capture helper. +- `macOS window screenshot verified` after capturing the targeted window or + rectangle pixels and inspecting the image. +- `VT100-render verified` after the snapshot harness renders the relevant frame. +- `binary/restart verified` after `code --version`, PATH target, and process + restart checks pass. +- `not visually verified` when you only have tests/logs/binary evidence. + +## Evidence Ladder + +Prefer the strongest practical evidence for the change: + +1. Live interactive TUI: start with `RUST_LOG=trace cargo run --manifest-path +code-rs/Cargo.toml -p code-tui -- -c log_dir=`, drive the workflow, + and inspect the rendered terminal state directly. +2. iTerm2 session capture on macOS: use a read-only helper to list iTerm2 + windows/tabs/sessions, choose the Every Code session by title, tty, process, + cwd, or visible text, and capture that session's live terminal contents. +3. Targeted macOS screenshot: use a window-id or rectangle capture when pixels, + colors, cropping, or non-text rendering matter. +4. VT100 harness: use `render_chat_widget_to_vt100` or the + `vt100_chatwidget_snapshot` test when the scenario can be seeded + deterministically without a live process. +5. Focused unit/integration tests: acceptable for state transitions, event + routing, identity matching, and nonvisual behavior. +6. Binary/restart checks: verify PATH, symlink target, `code --version`, and + relevant strings or logs, but report them as binary evidence only. + +If a change is specifically about footer text, ordering, clipping, scroll state, +focus, cursor position, or terminal chrome, prefer live TUI or VT100-rendered +evidence over ordinary unit assertions. + +## macOS And iTerm2 Capture + +On macOS with iTerm2, prefer iTerm2 session text capture for TUI text/cell +evidence and macOS window screenshots for pixel evidence. Use the PEP 723 repo +helper through `uv run scripts/iterm2_tui_visibility.py` from this skill's +directory, or use the repo-root path shown below. Dependencies resolve without a +manually created virtual environment. + +The helper is read-only by default and supports these operations: + +- List iTerm2 windows, tabs, and sessions with stable identifiers and enough + metadata to choose the intended Every Code session: + `uv run .codex/skills/test-tui/scripts/iterm2_tui_visibility.py list`. +- Capture one selected session's visible terminal contents. + `uv run .codex/skills/test-tui/scripts/iterm2_tui_visibility.py text +`. +- List visible iTerm2 macOS windows for screenshot window ids: + `uv run .codex/skills/test-tui/scripts/iterm2_tui_visibility.py windows`. +- Capture one targeted macOS window: + `uv run .codex/skills/test-tui/scripts/iterm2_tui_visibility.py screenshot + --window-id `. +- Label every capture with the source window/tab/session so closeout can say + exactly what was inspected. + +Use targeted screenshots only after resolving a specific window id. The helper +uses macOS `screencapture -l ` for one window. It intentionally avoids +whole-screen screenshots and x/y rectangle capture by default. + +If the helper returns `llm_advice`, follow it: tell the user which macOS +permission or optional dependency is missing and suggest the install command it +reports. The helper uses the iTerm2 Python SDK for session capture and Quartz +Python bindings (`pyobjc-framework-Quartz`) for window-id listing; both are +declared in the script's inline dependency metadata for `uv run`. + +Do not silently inspect unrelated terminal sessions. Start with metadata/listing, +then capture the selected session or the set of sessions the user explicitly +asked to inspect. + +## Restart Dogfooding + +After rebuilding the PATH-resolved binary with `just local-code-rebuild`, verify: -Important notes: +- `which code` +- `readlink "$(which code)"` when the PATH entry is a symlink +- `code --version` +- `git status --short --branch` -Start interactively. -Always set RUST_LOG="trace" when starting the process. -Pass `-c log_dir=` argument to have logs written to a specific directory to help with debugging. -When sending a test message programmatically, send text first, then send Enter in a separate write (do not send text + Enter in one burst). -Use `just codex` target to run - `just codex -c ...` +This proves the restarted harness can pick up the rebuilt binary. It still does +not prove the live TUI rendered a particular visual state unless paired with live +TUI or VT100 evidence. diff --git a/.codex/skills/test-tui/scripts/iterm2_tui_visibility.py b/.codex/skills/test-tui/scripts/iterm2_tui_visibility.py new file mode 100644 index 00000000000..1454df800be --- /dev/null +++ b/.codex/skills/test-tui/scripts/iterm2_tui_visibility.py @@ -0,0 +1,399 @@ +# /// script +# requires-python = ">=3.11" +# dependencies = [ +# "iterm2>=2.9", +# "pyobjc-framework-Quartz>=10.0", +# ] +# /// +"""Read-only macOS/iTerm2 visibility helper for Every Code TUI dogfooding.""" + +from __future__ import annotations + +import argparse +import asyncio +import json +import re +import shutil +import subprocess +import sys +from dataclasses import asdict, dataclass +from datetime import datetime, timezone +from pathlib import Path +from typing import Any, Awaitable, Callable, TypeVar + + +T = TypeVar("T") + + +@dataclass +class Advice: + summary: str + action: str + install: str | None = None + + +class UserFacingError(RuntimeError): + def __init__(self, message: str, advice: Advice | None = None) -> None: + super().__init__(message) + self.advice = advice + + +def repo_root() -> Path: + path = Path(__file__).resolve() + for parent in path.parents: + if (parent / ".git").exists(): + return parent + return path.parents[4] + + +def default_capture_dir() -> Path: + return repo_root() / ".code" / "tui-captures" + + +def utc_stamp() -> str: + return datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ") + + +def json_print(payload: Any) -> None: + print(json.dumps(payload, ensure_ascii=False, indent=2, sort_keys=True)) + + +def advice_payload(error: UserFacingError) -> dict[str, Any]: + payload: dict[str, Any] = {"ok": False, "error": str(error)} + if error.advice is not None: + payload["llm_advice"] = asdict(error.advice) + return payload + + +def ensure_macos(feature: str) -> None: + if sys.platform == "darwin": + return + raise UserFacingError( + f"{feature} requires macOS", + Advice( + summary="This helper targets iTerm2 on macOS.", + action="Advise the user to use VT100 snapshot tests or a platform-specific terminal capture helper on this system.", + ), + ) + + +def import_iterm2() -> Any: + try: + import iterm2 # type: ignore[import-not-found] + except Exception as exc: + raise UserFacingError( + "the iTerm2 Python SDK is not available", + Advice( + summary="The helper needs the iTerm2 Python SDK to list and capture sessions.", + action="Advise the user to run this script through uv so PEP 723 dependencies are installed automatically.", + install="uv run .codex/skills/test-tui/scripts/iterm2_tui_visibility.py list", + ), + ) from exc + return iterm2 + + +def import_quartz() -> tuple[Any, int, int]: + try: + from Quartz import CGWindowListCopyWindowInfo # type: ignore[import-not-found] + from Quartz import kCGNullWindowID # type: ignore[import-not-found] + from Quartz import kCGWindowListOptionOnScreenOnly # type: ignore[import-not-found] + except Exception as exc: + raise UserFacingError( + "Quartz Python bindings are not available", + Advice( + summary="Window-id screenshots need pyobjc Quartz bindings.", + action="Advise the user to run this script through uv so PEP 723 dependencies are installed automatically.", + install="uv run .codex/skills/test-tui/scripts/iterm2_tui_visibility.py windows", + ), + ) from exc + return CGWindowListCopyWindowInfo, kCGWindowListOptionOnScreenOnly, kCGNullWindowID + + +def run_command(argv: list[str]) -> subprocess.CompletedProcess[str]: + try: + return subprocess.run(argv, check=False, text=True, capture_output=True) + except FileNotFoundError as exc: + raise UserFacingError( + f"required command is missing: {argv[0]}", + Advice( + summary=f"{argv[0]} is not available on PATH.", + action="Advise the user to install the missing macOS command or use the iTerm2 text capture path instead.", + ), + ) from exc + + +def process_for_tty(tty: str | None) -> str | None: + if not tty: + return None + proc = run_command(["ps", "-t", tty.replace("/dev/", ""), "-o", "comm=", "-o", "args="]) + if proc.returncode != 0: + return None + lines = [line.strip() for line in proc.stdout.splitlines() if line.strip()] + return lines[-1] if lines else None + + +async def session_variable(session: Any, name: str) -> Any: + try: + return await session.async_get_variable(name) + except Exception: + return None + + +def likely_every_code_score(session: dict[str, Any]) -> int: + haystack = " ".join( + str(session.get(key) or "") + for key in ("name", "tty", "foreground_process", "session_id") + ).lower() + score = 0 + if re.search(r"\b(code|codex)\b", haystack): + score += 3 + if "every code" in haystack: + score += 5 + if "code-rs" in haystack: + score += 2 + return score + + +async def session_metadata( + session: Any, + window_index: int, + tab_index: int, + session_index: int, +) -> dict[str, Any]: + grid_size = getattr(session, "grid_size", None) + tty = await session_variable(session, "tty") + cwd = await session_variable(session, "path") + job_name = await session_variable(session, "jobName") + metadata = { + "window_index": window_index, + "tab_index": tab_index, + "session_index": session_index, + "session_id": getattr(session, "session_id", None), + "name": getattr(session, "name", None), + "tty": tty, + "cwd": cwd, + "job_name": job_name, + "foreground_process": process_for_tty(tty), + "grid_size": { + "width": getattr(grid_size, "width", None), + "height": getattr(grid_size, "height", None), + }, + } + metadata["likely_every_code_score"] = likely_every_code_score(metadata) + return metadata + + +async def list_sessions(connection: Any) -> list[dict[str, Any]]: + app = await import_iterm2().async_get_app(connection) + sessions: list[dict[str, Any]] = [] + for window_index, window in enumerate(app.windows, start=1): + for tab_index, tab in enumerate(window.tabs, start=1): + for session_index, session in enumerate(tab.sessions, start=1): + sessions.append( + await session_metadata(session, window_index, tab_index, session_index) + ) + return sessions + + +async def find_session(connection: Any, session_id: str) -> Any: + app = await import_iterm2().async_get_app(connection) + for window in app.windows: + for tab in window.tabs: + for session in tab.sessions: + if getattr(session, "session_id", None) == session_id: + return session + raise UserFacingError( + f"iTerm2 session not found: {session_id}", + Advice( + summary="The requested session id is not currently visible to iTerm2.", + action="Advise the user to rerun the list command and select a current session id.", + ), + ) + + +async def run_with_iterm2(callback: Callable[[Any], Awaitable[T]]) -> T: + ensure_macos("iTerm2 session capture") + iterm2 = import_iterm2() + try: + connection = await iterm2.Connection.async_create() + return await callback(connection) + except UserFacingError: + raise + except Exception as exc: + raise UserFacingError( + f"iTerm2 SDK request failed: {exc}", + Advice( + summary="The helper could not connect to iTerm2 through the Python SDK.", + action="Advise the user to ensure iTerm2 is running and enable iTerm2's Python API/Scripting support, then retry with uv run .codex/skills/test-tui/scripts/iterm2_tui_visibility.py list.", + ), + ) from exc + + +async def command_list_async(_args: argparse.Namespace) -> int: + sessions = await run_with_iterm2(list_sessions) + json_print({"ok": True, "sessions": sessions}) + return 0 + + +async def command_text_async(args: argparse.Namespace) -> int: + async def capture(connection: Any) -> dict[str, Any]: + session = await find_session(connection, args.session_id) + iterm2 = import_iterm2() + response = await iterm2.rpc.async_get_screen_contents( + connection, + session.session_id, + None, + False, + ) + status = response.get_buffer_response.status + ok_status = iterm2.api_pb2.GetBufferResponse.Status.Value("OK") + if status != ok_status: + status_name = iterm2.api_pb2.GetBufferResponse.Status.Name(status) + raise UserFacingError( + f"iTerm2 screen contents request failed: {status_name}", + Advice( + summary="The iTerm2 SDK connected but could not read the selected session's screen.", + action="Advise the user to confirm the session is still visible and rerun the list command.", + ), + ) + contents = iterm2.screen.ScreenContents(response.get_buffer_response) + lines = [contents.line(index).string for index in range(contents.number_of_lines)] + return { + "session_id": getattr(session, "session_id", None), + "name": getattr(session, "name", None), + "cursor": { + "x": getattr(contents.cursor_coord, "x", None), + "y": getattr(contents.cursor_coord, "y", None), + }, + "rows": contents.number_of_lines, + "contents": "\n".join(lines), + } + + payload = await run_with_iterm2(capture) + json_print( + { + "ok": True, + "capture": { + "kind": "iterm2_session_text", + "timestamp": utc_stamp(), + **payload, + }, + } + ) + return 0 + + +def command_windows(_args: argparse.Namespace) -> int: + ensure_macos("macOS window listing") + copy_window_info, on_screen_only, null_window_id = import_quartz() + windows = copy_window_info(on_screen_only, null_window_id) + iterm_windows: list[dict[str, Any]] = [] + for window in windows or []: + owner = window.get("kCGWindowOwnerName") + if owner not in {"iTerm2", "iTerm"}: + continue + bounds = window.get("kCGWindowBounds") or {} + iterm_windows.append( + { + "window_id": window.get("kCGWindowNumber"), + "owner": owner, + "title": window.get("kCGWindowName"), + "bounds": { + "x": bounds.get("X"), + "y": bounds.get("Y"), + "width": bounds.get("Width"), + "height": bounds.get("Height"), + }, + } + ) + json_print({"ok": True, "windows": iterm_windows}) + return 0 + + +def command_screenshot(args: argparse.Namespace) -> int: + ensure_macos("macOS window screenshots") + if shutil.which("screencapture") is None: + raise UserFacingError( + "screencapture is not available", + Advice( + summary="The macOS screencapture command is required for pixel evidence.", + action="Advise the user to use iTerm2 text capture until screencapture is available.", + ), + ) + if not args.window_id: + raise UserFacingError( + "screenshot requires --window-id", + Advice( + summary="The helper captures windows, not x/y rectangles by default.", + action="Advise the user to run the windows command, choose the intended iTerm2 window id, then rerun screenshot with --window-id.", + install="uv run .codex/skills/test-tui/scripts/iterm2_tui_visibility.py windows", + ), + ) + output_dir = Path(args.output_dir) if args.output_dir else default_capture_dir() + output_dir.mkdir(parents=True, exist_ok=True) + output = output_dir / f"iterm2-window-{args.window_id}-{utc_stamp()}.png" + proc = run_command(["screencapture", "-x", "-l", str(args.window_id), str(output)]) + if proc.returncode != 0: + raise UserFacingError( + f"screencapture failed: {proc.stderr.strip() or proc.stdout.strip()}", + Advice( + summary="macOS refused or failed the targeted window screenshot.", + action="Advise the user to grant Screen Recording permission to the app running this helper and retry.", + ), + ) + json_print( + { + "ok": True, + "capture": { + "kind": "macos_window_screenshot", + "timestamp": utc_stamp(), + "session_id": args.session_id, + "window_id": str(args.window_id), + "output": str(output), + }, + } + ) + return 0 + + +def build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + description="Read-only iTerm2/macOS visibility helper for Every Code TUI sessions." + ) + sub = parser.add_subparsers(dest="command", required=True) + + list_parser = sub.add_parser("list", help="List iTerm2 sessions with metadata only.") + list_parser.set_defaults(func=lambda args: asyncio.run(command_list_async(args))) + + windows_parser = sub.add_parser( + "windows", help="List visible iTerm2 macOS windows and screenshot window ids." + ) + windows_parser.set_defaults(func=command_windows) + + text_parser = sub.add_parser("text", help="Capture one iTerm2 session's visible text.") + text_parser.add_argument("session_id", help="iTerm2 session unique id from the list command.") + text_parser.set_defaults(func=lambda args: asyncio.run(command_text_async(args))) + + screenshot_parser = sub.add_parser("screenshot", help="Capture a targeted macOS window.") + screenshot_parser.add_argument("session_id", help="iTerm2 session id for labeling context.") + screenshot_parser.add_argument("--window-id", help="Known macOS CGWindowID to capture.") + screenshot_parser.add_argument( + "--output-dir", help="Directory for screenshot output. Defaults to .code/tui-captures/." + ) + screenshot_parser.set_defaults(func=command_screenshot) + + return parser + + +def main(argv: list[str] | None = None) -> int: + parser = build_parser() + args = parser.parse_args(argv) + try: + return args.func(args) + except UserFacingError as exc: + json_print(advice_payload(exc)) + return 2 + + +if __name__ == "__main__": + raise SystemExit(main())