jmjava · jmjava · Apr 16, 2026 · Apr 16, 2026 · Apr 16, 2026 · Apr 16, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -32,7 +32,29 @@ jobs:
         with:
           python-version: ${{ matrix.python-version }}
       - name: Install system dependencies
-        run: sudo apt-get update && sudo apt-get install -y --no-install-recommends ffmpeg tesseract-ocr
+        run: |
+          set -euo pipefail
+          attempt=1
+          max_attempts=4
+          backoff=4
+
+          while [ "$attempt" -le "$max_attempts" ]; do
+            echo "apt attempt $attempt/$max_attempts"
+            if timeout 600s sudo apt-get -o Acquire::Retries=3 update \
+              && timeout 600s sudo apt-get -o Acquire::Retries=3 install -y --no-install-recommends ffmpeg tesseract-ocr; then
+              exit 0
+            fi
+
+            if [ "$attempt" -eq "$max_attempts" ]; then
+              echo "apt failed after $max_attempts attempts"
+              exit 1
+            fi
+
+            echo "apt failed, retrying in ${backoff}s..."
+            sleep "$backoff"
+            backoff=$((backoff * 2))
+            attempt=$((attempt + 1))
+          done
       - run: pip install ".[dev]"
       - run: pytest tests/ --ignore=tests/e2e -v --tb=short
 

diff --git a/README.md b/README.md
@@ -54,7 +54,8 @@ docgen validate --pre-push  # validate all outputs before committing
 | `docgen wizard [--port 8501]` | Launch narration setup wizard (local web GUI) |
 | `docgen tts [--segment 01] [--dry-run]` | Generate TTS audio |
 | `docgen manim [--scene StackDAGScene]` | Render Manim animations |
-| `docgen vhs [--tape 02-quickstart.tape] [--strict]` | Render VHS terminal recordings |
+| `docgen vhs [--tape 02-quickstart.tape] [--strict] [--timeout 120]` | Render VHS terminal recordings |
+| `docgen playwright --script scripts/capture.py --url http://localhost:3000 --source demo.mp4` | Capture browser demo video with Playwright script |
 | `docgen tape-lint [--tape 02-quickstart.tape]` | Lint tapes for commands likely to hang in VHS |
 | `docgen sync-vhs [--segment 01] [--dry-run]` | Rewrite VHS `Sleep` values from `animations/timing.json` |
 | `docgen compose [01 02 03] [--ffmpeg-timeout 900]` | Compose segments (audio + video) |
@@ -83,6 +84,14 @@ vhs:
   min_sleep_sec: 0.05       # floor for rewritten Sleep values
   render_timeout_sec: 120   # per-tape timeout for `docgen vhs`
 
+playwright:
+  python_path: ""           # optional python executable for capture scripts
+  timeout_sec: 120          # capture timeout in seconds
+  default_url: ""           # fallback URL when visual_map entry omits url
+  default_viewport:         # fallback viewport when visual_map entry omits viewport
+    width: 1920
+    height: 1080
+
 pipeline:
   sync_vhs_after_timestamps: false  # opt-in: run sync-vhs automatically in generate-all/rebuild-after-audio
 
@@ -93,6 +102,36 @@ compose:
 
 If you edit a `.tape` file, run `docgen vhs` before `docgen compose` so compose does not use stale rendered terminal video.
 
+### Playwright visual source (`type: playwright`)
+
+`visual_map` entries can now use a Playwright capture script:
+
+```yaml
+visual_map:
+  "04":
+    type: playwright
+    source: 04-browser-flow.mp4
+    script: scripts/demo_capture.py
+    url: http://localhost:3300
+    viewport:
+      width: 1920
+      height: 1080
+```
+
+During `docgen compose`, docgen runs the capture script first (if `source` does not exist yet),
+then muxes the generated MP4 with narration audio.
+
+Manual capture (useful while iterating on scripts):
+
+```bash
+docgen playwright --script scripts/demo_capture.py --url http://localhost:3300 --source 04-browser-flow.mp4
+```
+
+Script contract:
+- receives env vars: `DOCGEN_PLAYWRIGHT_OUTPUT`, optional `DOCGEN_PLAYWRIGHT_URL`,
+  `DOCGEN_PLAYWRIGHT_WIDTH`, `DOCGEN_PLAYWRIGHT_HEIGHT`, and optional segment metadata
+- must write an MP4 to the requested output path
+- should use headless Playwright for CI compatibility
 ### VHS safety: avoid real long-running commands in tapes
 
 VHS executes commands in a real shell session. For demos, prefer simulated output with `echo`

diff --git a/src/docgen/cli.py b/src/docgen/cli.py
@@ -134,6 +134,43 @@ def vhs(
             click.echo(f"    {e}")
 
 
+@main.command()
+@click.option(
+    "--script",
+    "script_path",
+    default=None,
+    help="Python script to execute for browser actions (required for standalone mode).",
+)
+@click.option("--url", default=None, help="Target URL for browser capture.")
+@click.option("--source", default="playwright-capture.mp4", help="Output filename under terminal/rendered/.")
+@click.option("--width", default=1920, type=int, help="Browser viewport width.")
+@click.option("--height", default=1080, type=int, help="Browser viewport height.")
+@click.option("--timeout", "timeout_sec", default=120, type=int, help="Capture timeout in seconds.")
+@click.pass_context
+def playwright(
+    ctx: click.Context,
+    script_path: str | None,
+    url: str | None,
+    source: str,
+    width: int,
+    height: int,
+    timeout_sec: int,
+) -> None:
+    """Capture a browser demo video using Playwright."""
+    from docgen.playwright_runner import PlaywrightRunner
+
+    cfg = ctx.obj["config"]
+    runner = PlaywrightRunner(cfg)
+    video = runner.capture(
+        script=script_path,
+        output=source,
+        url=url,
+        viewport={"width": width, "height": height},
+        timeout_sec=timeout_sec,
+    )
+    click.echo(f"[playwright] captured: {video}")
+
+
 @main.command("tape-lint")
 @click.option("--tape", default=None, help="Lint a single tape name or pattern.")
 @click.pass_context

diff --git a/src/docgen/compose.py b/src/docgen/compose.py
@@ -38,6 +38,15 @@ def compose_segments(self, segment_ids: list[str], *, strict: bool = True) -> in
                 video_path = self._vhs_path(vmap)
                 self._warn_if_stale_vhs(vmap, video_path)
                 ok = self._compose_simple(seg_id, video_path, strict=strict)
+            elif vtype == "playwright":
+                from docgen.playwright_runner import PlaywrightError, PlaywrightRunner
+
+                try:
+                    video_path = PlaywrightRunner(self.config).capture_segment(seg_id, vmap)
+                except PlaywrightError as exc:
+                    print(f"    SKIP: playwright capture failed ({exc})")
+                    video_path = Path("")
+                ok = video_path.exists() and self._compose_simple(seg_id, video_path, strict=strict)
             elif vtype == "mixed":
                 sources = [self._resolve_source(s) for s in vmap.get("sources", [])]
                 ok = self._compose_mixed(seg_id, sources)
@@ -245,6 +254,12 @@ def _vhs_path(self, vmap: dict[str, Any]) -> Path:
         src = vmap.get("source", "")
         return self.config.terminal_dir / "rendered" / src
 
+    def _playwright_path(self, vmap: dict[str, Any]) -> Path:
+        src = str(vmap.get("source", "")).strip()
+        if not src:
+            return self.config.terminal_dir / "rendered" / "playwright.mp4"
+        return self.config.terminal_dir / "rendered" / src
+
     def _resolve_source(self, source: str) -> Path:
         for base in self._manim_video_dirs():
             manim_path = base / source

diff --git a/src/docgen/config.py b/src/docgen/config.py
@@ -139,6 +139,40 @@ def sync_vhs_after_timestamps(self) -> bool:
         if "sync_vhs_after_timestamps" in pipeline_cfg:
             return bool(pipeline_cfg.get("sync_vhs_after_timestamps"))
         return self.sync_from_timing
+
+    # -- Playwright ------------------------------------------------------------
+
+    @property
+    def playwright_config(self) -> dict[str, Any]:
+        defaults: dict[str, Any] = {
+            "python_path": "",
+            "timeout_sec": 120,
+            "default_url": "",
+            "default_viewport": {"width": 1920, "height": 1080},
+        }
+        defaults.update(self.raw.get("playwright", {}))
+        return defaults
+
+    @property
+    def playwright_python_path(self) -> str | None:
+        value = self.playwright_config.get("python_path")
+        return str(value) if value else None
+
+    @property
+    def playwright_timeout_sec(self) -> int:
+        return int(self.playwright_config.get("timeout_sec", 120))
+
+    @property
+    def playwright_default_url(self) -> str | None:
+        value = str(self.playwright_config.get("default_url", "")).strip()
+        return value or None
+
+    @property
+    def playwright_default_viewport(self) -> tuple[int, int]:
+        raw = self.playwright_config.get("default_viewport", {}) or {}
+        width = int(raw.get("width", 1920))
+        height = int(raw.get("height", 1080))
+        return width, height
     # -- Compose ----------------------------------------------------------------
 
     @property

diff --git a/src/docgen/playwright_runner.py b/src/docgen/playwright_runner.py
@@ -0,0 +1,152 @@
+"""Playwright visual source runner via external capture scripts."""
+
+from __future__ import annotations
+
+import os
+import subprocess
+import sys
+from pathlib import Path
+from typing import Any, TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from docgen.config import Config
+
+
+class PlaywrightError(RuntimeError):
+    """Raised when Playwright capture fails."""
+
+
+class PlaywrightRunner:
+    """Runs user-provided browser capture scripts for docgen segments."""
+
+    def __init__(self, config: Config, timeout_sec: int | None = None) -> None:
+        self.config = config
+        self.timeout_sec = (
+            int(timeout_sec)
+            if timeout_sec is not None
+            else int(self.config.playwright_timeout_sec)
+        )
+
+    def capture_segment(self, seg_id: str, vmap: dict[str, Any]) -> Path:
+        """Capture (or resolve) segment video for `type: playwright` visual map."""
+        source = str(vmap.get("source", "")).strip()
+        if not source:
+            raise PlaywrightError(
+                f"visual_map[{seg_id}] type=playwright requires a 'source' output path"
+            )
+        output_path = self._resolve_output_path(source)
+
+        script = str(vmap.get("script", "")).strip()
+        if not script:
+            if output_path.exists():
+                return output_path
+            raise PlaywrightError(
+                f"type=playwright source missing and no script configured: {output_path}"
+            )
+
+        script_path = self._resolve_path(script)
+        if not script_path.exists():
+            raise PlaywrightError(f"Playwright script not found: {script_path}")
+
+        url = str(vmap.get("url", "")).strip() or None
+        viewport = vmap.get("viewport", {}) or {}
+        width = int(viewport.get("width", 1920))
+        height = int(viewport.get("height", 1080))
+        args = [str(a) for a in (vmap.get("args", []) or [])]
+
+        return self.capture(
+            script=script_path,
+            output=output_path,
+            url=url,
+            viewport={"width": width, "height": height},
+            args=args,
+            segment_id=seg_id,
+        )
+
+    def capture(
+        self,
+        *,
+        script: Path | str | None,
+        output: Path | str | None = None,
+        source: str | None = None,
+        url: str | None = None,
+        viewport: dict[str, int] | None = None,
+        args: list[str] | None = None,
+        segment_id: str | None = None,
+        timeout_sec: int | None = None,
+    ) -> Path:
+        """Run one external capture script and return the output video path."""
+        if script is None and url is None:
+            raise PlaywrightError("capture requires --script or --url")
+        if script is None:
+            raise PlaywrightError("capture requires --script")
+
+        script_path = self._resolve_path(script)
+        output_value = output if output is not None else source
+        if output_value is None:
+            output_value = "playwright-capture.mp4"
+        output_path = self._resolve_output_path(output_value)
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+
+        python_bin = self.config.playwright_python_path or sys.executable
+        env = os.environ.copy()
+        env["DOCGEN_PLAYWRIGHT_OUTPUT"] = str(output_path)
+        if url:
+            env["DOCGEN_PLAYWRIGHT_URL"] = url
+        if segment_id:
+            env["DOCGEN_PLAYWRIGHT_SEGMENT"] = segment_id
+        vp = viewport or {}
+        width = int(vp.get("width", 1920))
+        height = int(vp.get("height", 1080))
+        env["DOCGEN_PLAYWRIGHT_WIDTH"] = str(width)
+        env["DOCGEN_PLAYWRIGHT_HEIGHT"] = str(height)
+        env["DOCGEN_PLAYWRIGHT_VIEWPORT"] = f"{width}x{height}"
+
+        effective_timeout = max(1, int(timeout_sec if timeout_sec is not None else self.timeout_sec))
+        env["DOCGEN_PLAYWRIGHT_TIMEOUT_SEC"] = str(effective_timeout)
+
+        cmd = [python_bin, str(script_path), *(args or [])]
+        try:
+            result = subprocess.run(
+                cmd,
+                cwd=str(self.config.base_dir),
+                env=env,
+                capture_output=True,
+                text=True,
+                timeout=effective_timeout,
+                check=True,
+            )
+        except FileNotFoundError:
+            raise PlaywrightError(f"python executable not found: {python_bin}")
+        except subprocess.TimeoutExpired:
+            raise PlaywrightError(
+                f"Playwright capture timed out after {effective_timeout}s ({script_path.name})"
+            )
+        except subprocess.CalledProcessError as exc:
+            detail = (exc.stderr or exc.stdout or "")[:400]
+            raise PlaywrightError(
+                f"Playwright script failed ({script_path.name}): {detail}"
+            )
+
+        if not output_path.exists():
+            detail = (result.stderr or result.stdout or "").strip()
+            hint = f" ({detail[:200]})" if detail else ""
+            raise PlaywrightError(
+                f"Playwright script finished but output is missing: {output_path}{hint}"
+            )
+        return output_path
+
+    def _resolve_path(self, value: Path | str) -> Path:
+        path = Path(value)
+        if path.is_absolute():
+            return path
+        return (self.config.base_dir / path).resolve()
+
+    def _resolve_output_path(self, value: Path | str) -> Path:
+        path = Path(value)
+        if path.is_absolute():
+            return path
+        # Source values are normally relative to terminal/rendered.
+        if path.parent == Path("."):
+            return (self.config.terminal_dir / "rendered" / path).resolve()
+        return (self.config.base_dir / path).resolve()
diff --git a/src/docgen/wizard.py b/src/docgen/wizard.py
@@ -386,6 +386,25 @@ def api_run_step(step: str, segment_id: str):
                 comp.compose_segments([segment_id])
                 return jsonify({"ok": True, "step": "compose", "segment": segment_id})
 
+            elif step == "playwright":
+                from docgen.playwright_runner import PlaywrightRunner
+
+                vmap = cfg.visual_map.get(segment_id, {})
+                source = str(vmap.get("source", "")).strip()
+                if not source:
+                    return jsonify({"error": "visual_map source is required for playwright"}), 400
+
+                runner = PlaywrightRunner(cfg)
+                video = runner.capture_segment(segment_id, vmap)
+                return jsonify(
+                    {
+                        "ok": True,
+                        "step": "playwright",
+                        "segment": segment_id,
+                        "video": str(video.relative_to(cfg.base_dir)),
+                    }
+                )
+
             elif step == "validate":
                 from docgen.validate import Validator
                 v = Validator(cfg)