diff --git a/Cargo.lock b/Cargo.lock
index 01237be0..9ecbe581 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -258,6 +258,31 @@ dependencies = [
  "tokio",
 ]
 
+[[package]]
+name = "agentkeys-gate"
+version = "0.1.0"
+dependencies = [
+ "agentkeys-backend-client",
+ "agentkeys-core",
+ "agentkeys-memory-engine",
+ "agentkeys-protocol",
+ "anyhow",
+ "async-trait",
+ "axum",
+ "base64 0.22.1",
+ "clap",
+ "http-body-util",
+ "reqwest",
+ "rustls 0.23.37",
+ "serde",
+ "serde_json",
+ "thiserror 2.0.18",
+ "tokio",
+ "tower 0.4.13",
+ "tracing",
+ "tracing-subscriber",
+]
+
 [[package]]
 name = "agentkeys-mcp"
 version = "0.1.0"
diff --git a/Cargo.toml b/Cargo.toml
index 248379d5..2fbf1063 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -11,6 +11,7 @@ members = [
     "crates/agentkeys-daemon",
     "crates/agentkeys-mcp",
     "crates/agentkeys-mcp-server",
+    "crates/agentkeys-gate",
     "crates/agentkeys-provisioner",
     "crates/agentkeys-backend-client",
     "crates/agentkeys-protocol",
diff --git a/crates/agentkeys-core/src/audit/bodies.rs b/crates/agentkeys-core/src/audit/bodies.rs
index b86463a1..1813942c 100644
--- a/crates/agentkeys-core/src/audit/bodies.rs
+++ b/crates/agentkeys-core/src/audit/bodies.rs
@@ -236,6 +236,38 @@ pub struct ConfigTeardownBody {
     pub actor_target: String,
 }
 
+// ── 90..99 — gate family (Plan A in-path CustomLLM gate) ───────────────
+//
+// Emitted by `agentkeys-gate` once per LLM turn. The memory READS themselves
+// are audited worker-side (#229, op_kind MemoryGet); this row records the
+// turn-level control-plane facts: which engine answered, what was injected,
+// and the per-namespace authorization outcome.
+
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
+pub struct GateTurnNamespaceBody {
+    /// Memory namespace the gate attempted to inject (e.g. `"travel"`).
+    pub namespace: String,
+    /// Outcome of the in-path cap-check + read: `"injected"`, `"empty"`, or
+    /// `"denied"` (a 403 the actor had no scope for — recorded, non-fatal).
+    pub outcome: String,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
+pub struct GateTurnBody {
+    /// The engine that produced the completion (`"openai-compatible"`,
+    /// `"echo"`, …) — provenance for "which model answered".
+    pub engine: String,
+    /// The upstream model id the turn ran against.
+    pub model: String,
+    /// Per-namespace cap-check + injection outcomes for this turn.
+    pub namespaces: Vec<GateTurnNamespaceBody>,
+    /// Total bytes of memory injected across all namespaces.
+    pub injected_bytes: u64,
+    /// Upstream-reported token usage (0 when the engine doesn't report it).
+    pub prompt_tokens: u64,
+    pub completion_tokens: u64,
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -448,6 +480,49 @@ mod tests {
         }
     }
 
+    /// Gate family (Plan A): canonical CBOR roundtrip + typed decode for the
+    /// per-turn gate audit row, including the nested per-namespace outcomes.
+    #[test]
+    fn gate_family_cbor_roundtrip_and_typed_decode() {
+        use crate::audit::{envelope_for, AuditEnvelope, AuditOpKind, AuditResult, TypedAuditBody};
+
+        let body = GateTurnBody {
+            engine: "openai-compatible".into(),
+            model: "doubao-pro".into(),
+            namespaces: vec![
+                GateTurnNamespaceBody {
+                    namespace: "personal".into(),
+                    outcome: "injected".into(),
+                },
+                GateTurnNamespaceBody {
+                    namespace: "travel".into(),
+                    outcome: "denied".into(),
+                },
+            ],
+            injected_bytes: 42,
+            prompt_tokens: 150,
+            completion_tokens: 30,
+        };
+        let env = envelope_for(
+            [0x44; 32],
+            [0x22; 32],
+            AuditOpKind::GateTurn,
+            body.clone(),
+            AuditResult::Success,
+            Some("where did I go in Chengdu?".to_string()),
+            None,
+        )
+        .unwrap();
+        let decoded =
+            AuditEnvelope::from_canonical_cbor(&env.to_canonical_cbor().unwrap()).unwrap();
+        assert_eq!(decoded.op_kind, AuditOpKind::GateTurn as u8);
+        assert_eq!(AuditOpKind::GateTurn.label(), "gate.turn");
+        match decoded.typed_body().unwrap() {
+            TypedAuditBody::GateTurn(b) => assert_eq!(b, body),
+            other => panic!("unexpected typed body: {other:?}"),
+        }
+    }
+
     #[test]
     fn payment_direct_body_uses_ref_as_field_name() {
         // Sanity check: `ref` is a Rust reserved word, so the field is
diff --git a/crates/agentkeys-core/src/audit/mod.rs b/crates/agentkeys-core/src/audit/mod.rs
index 31df0f26..3a0b95c4 100644
--- a/crates/agentkeys-core/src/audit/mod.rs
+++ b/crates/agentkeys-core/src/audit/mod.rs
@@ -56,9 +56,9 @@ use thiserror::Error;
 pub use bodies::{
     ConfigGetBody, ConfigPutBody, ConfigTeardownBody, CredFetchBody, CredStoreBody,
     CredTeardownBody, DeviceAddBody, DeviceRevokeBody, EmailReceiveBody, EmailSendBody,
-    K10RotateBody, K3EpochAdvanceBody, MemoryGetBody, MemoryPutBody, MemoryTeardownBody,
-    PaymentDirectBody, PaymentEscrowRedeemBody, ScopeGrantBody, ScopeRevokeBody, SignEip191Body,
-    SignEip712Body,
+    GateTurnBody, GateTurnNamespaceBody, K10RotateBody, K3EpochAdvanceBody, MemoryGetBody,
+    MemoryPutBody, MemoryTeardownBody, PaymentDirectBody, PaymentEscrowRedeemBody, ScopeGrantBody,
+    ScopeRevokeBody, SignEip191Body, SignEip712Body,
 };
 pub use op_kind::AuditOpKind;
 
@@ -238,6 +238,7 @@ pub enum TypedAuditBody {
     ConfigPut(ConfigPutBody),
     ConfigGet(ConfigGetBody),
     ConfigTeardown(ConfigTeardownBody),
+    GateTurn(GateTurnBody),
 }
 
 impl TypedAuditBody {
@@ -277,6 +278,7 @@ impl TypedAuditBody {
             AuditOpKind::ConfigTeardown => {
                 Self::ConfigTeardown(serde_json::from_value(value).ok()?)
             }
+            AuditOpKind::GateTurn => Self::GateTurn(serde_json::from_value(value).ok()?),
         })
     }
 }
diff --git a/crates/agentkeys-core/src/audit/op_kind.rs b/crates/agentkeys-core/src/audit/op_kind.rs
index 259b2c60..da7fbe98 100644
--- a/crates/agentkeys-core/src/audit/op_kind.rs
+++ b/crates/agentkeys-core/src/audit/op_kind.rs
@@ -15,7 +15,8 @@
 //! - 60-69 email family (EmailSend=60, EmailReceive=61; 62-69 reserved)
 //! - 70-79 K3 family (K3EpochAdvance=70; 71-79 reserved)
 //! - 80-89 config family (ConfigPut=80, ConfigGet=81, ConfigTeardown=82; 83-89 reserved)
-//! - 90-255 reserved for future families
+//! - 90-99 gate family (GateTurn=90; 91-99 reserved)
+//! - 100-255 reserved for future families
 
 /// Canonical op_kind enum. The byte value MUST match the row in arch.md
 /// §15.3a. The enum is `repr(u8)` so `as u8` gives the canonical byte.
@@ -47,6 +48,11 @@ pub enum AuditOpKind {
     ConfigPut = 80,
     ConfigGet = 81,
     ConfigTeardown = 82,
+    /// In-path CustomLLM gate turn (Plan A, `docs/plan/ai-device-platform.md`
+    /// §3): one cap-checked + memory-injected + audited LLM turn. Recorded by
+    /// `agentkeys-gate` so a gated turn is on the ledger even when it injected
+    /// no memory (the worker-side memory audit only covers turns that read).
+    GateTurn = 90,
 }
 
 impl AuditOpKind {
@@ -75,6 +81,7 @@ impl AuditOpKind {
             80 => Self::ConfigPut,
             81 => Self::ConfigGet,
             82 => Self::ConfigTeardown,
+            90 => Self::GateTurn,
             _ => return None,
         })
     }
@@ -105,6 +112,7 @@ impl AuditOpKind {
             Self::ConfigPut => "config.put",
             Self::ConfigGet => "config.get",
             Self::ConfigTeardown => "config.teardown",
+            Self::GateTurn => "gate.turn",
         }
     }
 }
@@ -140,6 +148,7 @@ mod tests {
             AuditOpKind::ConfigPut,
             AuditOpKind::ConfigGet,
             AuditOpKind::ConfigTeardown,
+            AuditOpKind::GateTurn,
         ];
         for k in all {
             let byte = k as u8;
@@ -156,7 +165,7 @@ mod tests {
     #[test]
     fn unknown_bytes_return_none() {
         for byte in [
-            3u8, 9, 13, 19, 22, 32, 42, 53, 62, 71, 83, 89, 90, 200, 250, 255,
+            3u8, 9, 13, 19, 22, 32, 42, 53, 62, 71, 83, 89, 91, 99, 200, 250, 255,
         ] {
             assert_eq!(
                 AuditOpKind::from_u8(byte),
@@ -193,6 +202,7 @@ mod tests {
             AuditOpKind::ConfigPut as u8,
             AuditOpKind::ConfigGet as u8,
             AuditOpKind::ConfigTeardown as u8,
+            AuditOpKind::GateTurn as u8,
         ];
         let s: HashSet<_> = all.iter().copied().collect();
         assert_eq!(s.len(), all.len(), "duplicate byte assignment");
diff --git a/crates/agentkeys-gate/Cargo.toml b/crates/agentkeys-gate/Cargo.toml
new file mode 100644
index 00000000..ecc31eff
--- /dev/null
+++ b/crates/agentkeys-gate/Cargo.toml
@@ -0,0 +1,53 @@
+[package]
+name = "agentkeys-gate"
+version = "0.1.0"
+edition = "2021"
+
+[[bin]]
+name = "agentkeys-gate"
+path = "src/main.rs"
+
+[lib]
+name = "agentkeys_gate"
+path = "src/lib.rs"
+
+[dependencies]
+# The shipped broker/worker chain (cap-mint -> per-actor STS -> worker put/get ->
+# audit). The gate is a THIN in-path wrapper over this; it never re-types a
+# cap/worker body (issue #203 — one owner).
+agentkeys-backend-client = { workspace = true }
+# Wire helpers shared with the browser host: service_memory(), normalize_omni_0x().
+agentkeys-protocol = { workspace = true }
+# Caller-side memory selection engine (Passthrough / Lexical) — deterministic,
+# no LLM in the gate path. The injection seam (plan §6a).
+agentkeys-memory-engine = { workspace = true }
+# Canonical audit op_kind table (AuditOpKind) + K10 device-key load for cap-PoP.
+agentkeys-core = { workspace = true }
+
+serde = { workspace = true }
+serde_json = { workspace = true }
+tokio = { workspace = true }
+async-trait = { workspace = true }
+thiserror = { workspace = true }
+anyhow = { workspace = true }
+
+axum = { version = "0.7", features = ["json"] }
+tower = "0.4"
+# reqwest pins rustls-tls so the upstream LLM call uses the same crypto provider
+# the broker host already ships; the `ring` provider is installed in main.
+reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] }
+rustls = { version = "0.23", default-features = false, features = ["ring", "std", "tls12"] }
+clap = { version = "4", features = ["derive", "env"] }
+tracing = "0.1"
+tracing-subscriber = { version = "0.3", features = ["env-filter"] }
+base64 = "0.22"
+
+[dev-dependencies]
+tokio = { workspace = true }
+async-trait = { workspace = true }
+serde_json = { workspace = true }
+# AuditOpKind::GateTurn assertions in the gate-flow tests.
+agentkeys-core = { workspace = true }
+base64 = "0.22"
+tower = { version = "0.4", features = ["util"] }
+http-body-util = "0.1"
diff --git a/crates/agentkeys-gate/IMPLEMENTATION_LOG.md b/crates/agentkeys-gate/IMPLEMENTATION_LOG.md
new file mode 100644
index 00000000..c18544c8
--- /dev/null
+++ b/crates/agentkeys-gate/IMPLEMENTATION_LOG.md
@@ -0,0 +1,179 @@
+# Implementation log — `agentkeys-gate`
+
+A narrative of what this crate is, how it was built, and **why each decision was
+made** — so you can understand the work without reading every line. For *how to
+run/test it*, see [`docs/operator-runbook-gate.md`](../../docs/operator-runbook-gate.md);
+for the quick reference, the [README](README.md).
+
+> **Update (2026-06-16) — positioning + identity model (post-review).** Review of #308 + an identity-model dig (see [`docs/plan/shared-user-memory-delegation.md`](../../docs/plan/shared-user-memory-delegation.md)) corrected two things below:
+> 1. **This is an *adapter* above the worker enforcement boundary, not a security gate.** The **worker** independently re-verifies every cap against the chain — that is the safety boundary. The gate adds in-path memory-injection + a turn-level audit record; it does not *enforce* (so read "deterministic enforcement is structural" in §5 as "the gate cannot read memory the *worker* won't serve it a cap for" — the enforcement is the worker's).
+> 2. **Memory is the USER's; the agent is a delegated caller (DECIDED).** Owner = user/master (`memory:user:<id>:<ns>`); the gate runs **as the agent** (its own `J1_agent` + K10), authorized by a stored master grant. The delegated config shape (`operator = owner`, `actor = agent`) is correct, but the **real-backend shared-memory path is blocked on 3 backend gaps** (broker caller-auth, owner-keyed storage, grant-scoped STS — plan doc §3). Until they land, the gate runs end-to-end only with the `echo` engine. The gate also now **rejects** `tools`/`tool_choice` (it does not yet enforce tool-calling).
+
+---
+
+## 1. What was asked
+
+"Start to work on the **A plan in #305**." That PR
+([`docs/plan/ai-device-platform.md`](../../docs/plan/ai-device-platform.md)) is the
+design for an AI-device platform. **Plan A = the Volcano MVP** (§3) — a 5-piece
+build:
+
+1. Tuya T5 firmware → Volcano RTC (voice in/out) — *hardware*.
+2. **The in-path CustomLLM gate** — per turn: cap/permission-check + memory-inject + audit, then call the model.
+3. Reuse the shipped AgentKeys backend (broker, memory worker, cap-mint, audit).
+4. Management view (parent-control).
+5. **The adapter seam** — one engine behind a stable interface, so Plan B (self-host + more models) is "A + engines", not a rewrite (`A ⊂ B`).
+
+This crate is **pieces 2 + 5, reusing piece 3**. Pieces 1 and 4 are deferred (a
+hardware spike and a UI task); see §8 below.
+
+## 2. The one-sentence shape
+
+`agentkeys-gate` is the in-path twin of `agentkeys-mcp-server`. Where the MCP
+server lets an LLM *choose* to call AgentKeys tools, the gate sits **in the
+request path** and runs the gate logic on **every** turn, deterministically,
+then calls the model — which is what makes "central control + see-all-memory +
+audit" real rather than opt-in.
+
+## 3. The per-turn flow (the heart — [`gate.rs`](src/gate.rs))
+
+```
+POST /v1/chat/completions  (Volcano RTC CustomLLM → gate)
+        │
+        ▼
+1. resolve identity      actor_omni (X-AgentKeys-Actor header OR config default),
+                         operator_omni, device_key_hash — normalized to 0x form.
+        │
+        ▼
+2. cap-check + memory-inject   for each configured namespace:
+        ├─ mint a broker cap   CapMintOp::MemoryGet, service = service_memory(ns) = "memory:<ns>"
+        ├─ read the worker     BackendClient::memory_get  → base64 plaintext
+        ├─ select lines        agentkeys-memory-engine (passthrough/lexical), deterministic, no LLM
+        └─ classify outcome    injected / empty (404) / denied (403) / FATAL (anything else)
+        │   → prepend ONE system message with the selected lines
+        ▼
+3. engine completion     EngineAdapter::complete(request)   (echo, or openai-compatible → Doubao)
+        │
+        ▼
+4. audit                 one GateTurn (op_kind 90) row: {engine, model, per-namespace outcomes,
+                         injected_bytes, token usage}.  Non-fatal by default; --require-audit makes it block.
+        │
+        ▼
+OpenAI ChatCompletionResponse  (gate → Volcano → TTS → device)
+```
+
+## 4. Module map
+
+| File | Role |
+|---|---|
+| [`gate.rs`](src/gate.rs) | The orchestration above. `Gate::handle_chat_completion` + the memory-inject + audit helpers + the error `classify()`. |
+| [`engine.rs`](src/engine.rs) | The **swappable seam** (piece 5): the `EngineAdapter` trait + `OpenAiCompatibleEngine` (Plan A → Doubao) + `EchoEngine` (local). |
+| [`backend.rs`](src/backend.rs) | The `Backend` trait (cap_mint / memory_get / audit_append), impl'd on the shared `BackendClient`; the test seam. |
+| [`openai.rs`](src/openai.rs) | The OpenAI `/v1/chat/completions` wire types (the gate's public contract). |
+| [`config.rs`](src/config.rs) | `Cli` (clap) + `Config` (read once at startup) + `for_tests()`. |
+| [`auth.rs`](src/auth.rs) | Vendor `Bearer` auth (constant-time) + the per-device `X-AgentKeys-Actor` header + the anonymous dev path. |
+| [`error.rs`](src/error.rs) | `GateError` (thiserror) → HTTP status + OpenAI-shaped error body. |
+| [`server.rs`](src/server.rs) | The axum router (`/v1/chat/completions`, `/healthz`). |
+| [`main.rs`](src/main.rs) | Wire it up: build `BackendClient` + the chosen engine, run the server. |
+
+## 5. Key decisions (and why)
+
+**Reuse the shipped backend; own no new wire shape.** Every cap/memory/audit
+call goes through the shared `agentkeys-backend-client::BackendClient` and the
+shared protocol types — the same path the MCP server, daemon, and browser use.
+This is the #203 "one owner" rule: re-typing a cap/worker body in a second place
+is the exact drift-bug class the repo spent issues closing. The gate's *own* new
+types (the OpenAI request/response) are its public HTTP contract, not a
+broker/worker body, so they live here and don't touch the fixture gate.
+
+**The engine is the only swappable part (`A ⊂ B`).** [`engine.rs`](src/engine.rs)
+hands an adapter only the (already memory-injected, already authorized) request +
+a base URL + an API key. It never sees keys, identity, cap-tokens, or the audit
+trail — those stay at the broker/worker/gate. That **red line** is what keeps the
+platform sovereign and reversible: swapping `echo` → Doubao → Bedrock → a
+self-host runtime is config, never a rewrite, and a compromised/forked engine
+can't reach your memory or identity. (The engine *does* legitimately see the
+injected memory **in the prompt** — that's the whole point of the gate; the red
+line is about secrets/authority, not the content the user authorized for use.)
+
+**Why an `echo` engine exists.** It is not a test stub. It lets the entire
+in-path gate (device → Volcano RTC → gate → reply) run on **real hardware before
+Doubao credentials exist**, and it reflects the injected memory in its reply so
+an operator can *see* the gate read + inject. That directly serves Plan A §3's
+"prove the gate in-path on real hardware" goal.
+
+**The error-classification policy (no silent fallback).** In `classify()`, a
+per-namespace **403 → `denied`** (the actor has no scope; recorded, non-fatal —
+the turn answers degraded), a **404/empty → `empty`**. **Anything else**
+(transport, 5xx, parse, not-configured) is a **hard `Backend` error that fails
+the turn loud** (502). This honors the "fail loud, never hide a broken broker
+path behind a silent fallback" rule: a broken backend must be *visible*, not
+papered over as "memory was empty." The deterministic enforcement is structural —
+the gate physically cannot read memory it has no broker-minted cap for.
+
+**The audit leg = a new `GateTurn` op_kind (90).** The plan makes audit the top
+feature. Two facts pulled in different directions: (a) the memory worker already
+audits each read as `MemoryGet` (#229), so double-auditing reads would be noise;
+(b) a pure-chat turn that reads no memory would otherwise leave **no** trace on
+the ledger. Resolution: the gate emits **one turn-level `GateTurn` row** (engine,
+model, per-namespace outcomes, token usage) — distinct from the data-plane
+`MemoryGet` rows. Adding an op_kind is a canonical-registry change, so it was
+done to spec: the enum + decode + typed body + the arch.md §15.3a row + the
+frozen tests (family 90-99 = "gate"). It is **non-fatal by default**
+(`--require-audit` enforces) — the staged-rollout posture the worker audit uses,
+so an audit-worker hiccup doesn't take the gate down before the rollout is ready.
+
+**Identity: header-or-config.** The per-device actor comes from the
+`X-AgentKeys-Actor` header (one gate serving a fleet) **or** the configured
+default actor (one-gate-per-device). The cap-mint session JWT is the *configured
+device-actor JWT*, separate from the inbound *vendor* bearer that authenticates
+the caller — two different secrets, two different trust roles.
+
+## 6. The review finding that got fixed
+
+A 4-reviewer adversarial pass (+ a verify pass) flagged **one** real defect: the
+non-403/404 arm of `classify()` formatted the **full** `BackendError` — including
+the upstream HTTP **body** — into the error returned to the vendor caller. A 5xx
+broker/worker body could thus cross the trust boundary. **Fix** (in
+[`gate.rs`](src/gate.rs)): log the full error for the operator via `tracing`, but
+return only the safe **status category** (`backend HTTP 500`, never the body) to
+the caller — and the same hardening for the engine path. Regression test:
+`backend_5xx_body_is_not_echoed_to_caller` in
+[`tests/gate_flow.rs`](tests/gate_flow.rs). (The other 15 review findings were
+verified as not-real.)
+
+## 7. Test coverage
+
+| Test | Proves |
+|---|---|
+| `injects_memory_and_audits_the_turn` | the happy path: 2 namespaces minted + injected, one `GateTurn` (90) audit row with both outcomes `injected`. |
+| `denied_namespace_is_recorded_not_fatal` | a 403 namespace → `denied`, turn proceeds with the allowed one. |
+| `broken_backend_fails_the_turn` | a transport error fails the turn loud; the engine is never reached. |
+| `backend_5xx_body_is_not_echoed_to_caller` | the review-fix: status surfaces, the upstream body never leaks. |
+| `pure_chat_turn_is_still_audited` | no namespaces → no caps, but the turn is still on the ledger (the gap `GateTurn` closes). |
+| `actor_header_overrides_default` | `X-AgentKeys-Actor` overrides the config default actor. |
+| `tests/http.rs` (×4) | the real axum router: health, no-token 401, wrong-token 401, a full anonymous echo turn. |
+| `openai.rs` / `engine.rs` / `auth.rs` unit tests (×11) | wire round-trip + unknown-param passthrough, echo surfaces memory, engine-without-key, constant-time token compare. |
+| `agentkeys-core` `gate_family_cbor_roundtrip_and_typed_decode` | the `GateTurn` envelope CBOR-roundtrips + typed-decodes. |
+
+All offline (mock `Backend` + `EngineAdapter`); no env mutation; clippy `-D
+warnings`; fmt clean.
+
+## 8. What is deferred (and why)
+
+| Deferred | Why |
+|---|---|
+| Piece 1 — Tuya firmware → Volcano RTC | hardware spike; needs the board. |
+| Piece 4 — parent-control management view | a UI wiring task; sequenced after the endpoint. |
+| **Deploy wiring** — `setup-broker-host.sh` | the gate isn't a deploy surface yet (needs a systemd unit + nginx vhost for the CustomLLM endpoint). |
+| Memory **write-back** | the MVP reads memory; writing new memory from a turn is a later step. |
+| **Streaming** | turns are cap/audit-granular; `stream:true` is served as a single completion. |
+| **Spend** | plan §4 blocks it on single-use caps + mint-time budgeting (caps are replayable-until-TTL today). |
+
+## 9. Where the addresses/identity come from
+
+Nothing is hardcoded — every value is a flag/env read once at startup
+(`Config::from_cli`). The device actor, operator omni, device-key hash, session
+JWT, K10 key, and IAM role are the **same inputs** the in-sandbox daemon / MCP
+server use for a paired device; the gate just impersonates one device actor
+against the shipped broker.
diff --git a/crates/agentkeys-gate/README.md b/crates/agentkeys-gate/README.md
new file mode 100644
index 00000000..9147cf2f
--- /dev/null
+++ b/crates/agentkeys-gate/README.md
@@ -0,0 +1,87 @@
+# agentkeys-gate
+
+The **in-path CustomLLM gate** — Plan A pieces 2 + 5 of
+[`docs/plan/ai-device-platform.md`](../../docs/plan/ai-device-platform.md) §3.
+
+> **Testing it:** [`docs/operator-runbook-gate.md`](../../docs/operator-runbook-gate.md) (a 3-level runbook — local echo smoke test → real backend → Doubao).
+> **Understanding it:** [`IMPLEMENTATION_LOG.md`](IMPLEMENTATION_LOG.md) (what was built + why each decision).
+
+An OpenAI-compatible `POST /v1/chat/completions` endpoint that Volcano RTC's
+**CustomLLM mode** posts to. Per turn it does, in order:
+
+1. **cap-check + memory-inject** — mints a broker `memory-get` cap per configured
+   namespace (`service = memory:<ns>`, `CapMintOp::MemoryGet`), reads it through
+   the memory worker, runs the deterministic memory-selection engine
+   (`agentkeys-memory-engine`), and prepends the selected lines as a leading
+   `system` message;
+2. **engine completion** — forwards the (now memory-augmented) request to a
+   swappable [`EngineAdapter`](src/engine.rs);
+3. **audit** — appends one `GateTurn` (op_kind 90) row so every gated turn is on
+   the ledger, even one that injected no memory.
+
+It owns **no new wire shape** and **no persistent state**: identity · cap ·
+memory · audit all route through the shipped
+`agentkeys-backend-client::BackendClient`. That is the red line — none of those
+ever live inside the swappable engine.
+
+## A ⊂ B — the engine seam
+
+The [`EngineAdapter`](src/engine.rs) is the **only** swappable part of the
+platform, so Plan B (self-host runtime, more model vendors) is "A + engines",
+not a rewrite. Two engines ship today:
+
+| Engine (`--engine`) | What it calls | Use |
+|---|---|---|
+| `echo` (default) | nothing — reflects the turn + the injected memory | Hardware bring-up: run device → Volcano RTC → gate → reply with **no Doubao credentials**, and SEE the gate read + inject memory. This is Plan A §3's "prove the gate in-path on real hardware" path. |
+| `openai-compatible` | a `/v1/chat/completions` upstream (Volcano RTC CustomLLM → Doubao, or any OpenAI-compatible server) | Production Plan A. Needs `--engine-base-url` + `--engine-api-key`. |
+
+## Run
+
+```bash
+# Local / hardware bring-up — echo engine, no auth, no upstream:
+AGENTKEYS_GATE_ALLOW_ANONYMOUS=1 \
+AGENTKEYS_GATE_DEFAULT_ACTOR=0x<device_actor_omni> \
+AGENTKEYS_GATE_DEFAULT_OPERATOR_OMNI=0x<master_omni> \
+AGENTKEYS_GATE_DEFAULT_DEVICE_KEY_HASH=0x<k10_hash> \
+AGENTKEYS_BROKER_URL=https://broker.litentry.org \
+AGENTKEYS_MEMORY_URL=https://cred.litentry.org \
+AGENTKEYS_AUDIT_URL=https://audit.litentry.org \
+AGENTKEYS_DEVICE_KEY_FILE=~/.agentkeys/agent-device.key \
+  cargo run -p agentkeys-gate
+
+# Production — vendor-authenticated, Doubao via Volcano RTC CustomLLM:
+AGENTKEYS_GATE_VENDOR_TOKENS=folotoy:<bearer> \
+AGENTKEYS_GATE_ENGINE=openai-compatible \
+AGENTKEYS_GATE_ENGINE_BASE_URL=https://ark.cn-beijing.volces.com/api/v3 \
+AGENTKEYS_GATE_ENGINE_API_KEY_FILE=/etc/agentkeys/doubao.key \
+AGENTKEYS_GATE_ENGINE_MODEL=doubao-pro-32k \
+  ... (broker/memory/audit URLs + device key as above) \
+  cargo run -p agentkeys-gate
+```
+
+All config is `--flag` / env (see `--help`); nothing is hardcoded. The
+per-device actor can be supplied per request via the `X-AgentKeys-Actor` header
+(overrides `AGENTKEYS_GATE_DEFAULT_ACTOR`), so one gate instance can serve a
+fleet, or one-gate-per-device can bake the actor into config.
+
+## What is NOT here (deferred per the plan's own sequencing)
+
+- **Streaming** — turns are cap/audit-granular; `stream: true` is served as a
+  single non-streamed completion.
+- **Spend** — gated on single-use caps + mint-time budgeting (plan §4); the gate
+  does no payments.
+- **Deploy wiring** — the gate is not yet wired into `setup-broker-host.sh`; that
+  is a follow-up (a new systemd unit + nginx vhost for the CustomLLM endpoint).
+- **Memory WRITE-back**, the Tuya firmware spike (piece 1), and the
+  parent-control management view (piece 4) are separate follow-ups.
+
+## Tests
+
+```bash
+cargo test -p agentkeys-gate
+```
+
+`tests/gate_flow.rs` proves the four per-turn behaviours (inject + audit,
+denied-namespace non-fatal, broken-backend fails loud, pure-chat still audited)
+against offline doubles; `tests/http.rs` drives the real axum router (auth +
+health + a full echo turn).
diff --git a/crates/agentkeys-gate/src/auth.rs b/crates/agentkeys-gate/src/auth.rs
new file mode 100644
index 00000000..4e835048
--- /dev/null
+++ b/crates/agentkeys-gate/src/auth.rs
@@ -0,0 +1,136 @@
+//! Inbound auth for the HTTP transport.
+//!
+//! The vendor authenticates with a `Bearer <token>` (constant-time matched
+//! against the configured `<vendor_id>:<token>` registry, exactly like the MCP
+//! server). The per-device actor is conveyed in `X-AgentKeys-Actor`, or falls
+//! back to the gate's configured default actor (one-gate-per-device deploys).
+//!
+//! NOTE: this bearer authenticates the *vendor calling the gate*. The session
+//! JWT the gate forwards to the broker for cap-mint is the configured
+//! `agent_session_bearer` (the device actor's own JWT) — a separate secret.
+
+use crate::config::Config;
+use crate::error::{GateError, GateResult};
+
+/// Resolved caller identity for one request.
+#[derive(Debug, Clone)]
+pub struct CallerContext {
+    /// Which vendor token authenticated (or `"anonymous"` in dev mode).
+    pub vendor_id: String,
+    /// The device actor omni from `X-AgentKeys-Actor`, or `"*"` when the caller
+    /// didn't bind one (the gate then uses its configured default actor).
+    pub actor_omni: String,
+}
+
+/// Constant-time string compare — no early exit on the first differing byte, so
+/// token validation can't be timing-probed.
+fn constant_time_eq(a: &str, b: &str) -> bool {
+    let (a, b) = (a.as_bytes(), b.as_bytes());
+    if a.len() != b.len() {
+        return false;
+    }
+    let mut diff = 0u8;
+    for (x, y) in a.iter().zip(b.iter()) {
+        diff |= x ^ y;
+    }
+    diff == 0
+}
+
+/// Validate the `Authorization` header against the vendor-token registry and
+/// resolve the caller. When `allow_anonymous` is set AND no tokens are
+/// configured, the bearer check is skipped (dev / echo-engine hardware bring-up).
+pub fn authenticate(
+    config: &Config,
+    authorization: Option<&str>,
+    actor_header: Option<&str>,
+) -> GateResult<CallerContext> {
+    let actor_omni = match actor_header.map(str::trim).filter(|s| !s.is_empty()) {
+        Some(a) => a.to_string(),
+        None => "*".to_string(),
+    };
+
+    if config.vendor_tokens.is_empty() {
+        if config.allow_anonymous {
+            return Ok(CallerContext {
+                vendor_id: "anonymous".to_string(),
+                actor_omni,
+            });
+        }
+        return Err(GateError::Unauthorized(
+            "no vendor tokens configured — set AGENTKEYS_GATE_VENDOR_TOKENS, or \
+             AGENTKEYS_GATE_ALLOW_ANONYMOUS=1 for local/echo bring-up"
+                .into(),
+        ));
+    }
+
+    let token = authorization
+        .and_then(|h| h.strip_prefix("Bearer "))
+        .map(str::trim)
+        .filter(|t| !t.is_empty())
+        .ok_or_else(|| GateError::Unauthorized("missing or malformed Bearer token".into()))?;
+
+    // Check every entry (no early return) so a hit vs miss can't be timed.
+    let mut matched: Option<&str> = None;
+    for (vendor, expected) in &config.vendor_tokens {
+        if constant_time_eq(token, expected) {
+            matched = Some(vendor);
+        }
+    }
+
+    match matched {
+        Some(vendor) => Ok(CallerContext {
+            vendor_id: vendor.to_string(),
+            actor_omni,
+        }),
+        None => Err(GateError::Unauthorized("invalid vendor token".into())),
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn cfg() -> Config {
+        Config::for_tests().with_vendor_token("folotoy", "secret-abc")
+    }
+
+    #[test]
+    fn valid_bearer_resolves_vendor_and_actor() {
+        let ctx = authenticate(&cfg(), Some("Bearer secret-abc"), Some("0xdeadbeef")).unwrap();
+        assert_eq!(ctx.vendor_id, "folotoy");
+        assert_eq!(ctx.actor_omni, "0xdeadbeef");
+    }
+
+    #[test]
+    fn missing_actor_header_defaults_to_star() {
+        let ctx = authenticate(&cfg(), Some("Bearer secret-abc"), None).unwrap();
+        assert_eq!(ctx.actor_omni, "*");
+    }
+
+    #[test]
+    fn wrong_token_rejected() {
+        let err = authenticate(&cfg(), Some("Bearer nope"), None).unwrap_err();
+        assert!(matches!(err, GateError::Unauthorized(_)));
+    }
+
+    #[test]
+    fn missing_bearer_rejected() {
+        let err = authenticate(&cfg(), None, None).unwrap_err();
+        assert!(matches!(err, GateError::Unauthorized(_)));
+    }
+
+    #[test]
+    fn anonymous_allowed_when_configured_and_no_tokens() {
+        let mut c = Config::for_tests();
+        c.allow_anonymous = true;
+        let ctx = authenticate(&c, None, Some("0xabc")).unwrap();
+        assert_eq!(ctx.vendor_id, "anonymous");
+        assert_eq!(ctx.actor_omni, "0xabc");
+    }
+
+    #[test]
+    fn no_tokens_and_not_anonymous_is_unauthorized() {
+        let err = authenticate(&Config::for_tests(), Some("Bearer x"), None).unwrap_err();
+        assert!(matches!(err, GateError::Unauthorized(_)));
+    }
+}
diff --git a/crates/agentkeys-gate/src/backend.rs b/crates/agentkeys-gate/src/backend.rs
new file mode 100644
index 00000000..5669f721
--- /dev/null
+++ b/crates/agentkeys-gate/src/backend.rs
@@ -0,0 +1,64 @@
+//! Backend seam — the broker/worker RPCs the gate drives in-path.
+//!
+//! Like the MCP server (issue #203/#207), the gate owns NO persistent state and
+//! re-types NO wire body. The production backend IS the shared
+//! [`agentkeys_backend_client::BackendClient`]; the `Backend` trait is impl'd
+//! directly on it. The trait survives as the test seam — `tests/common`'s mock
+//! is its second impl, keeping the gate's unit tests offline + deterministic.
+
+use async_trait::async_trait;
+
+use agentkeys_backend_client::BackendClient;
+
+// One owner for every broker/worker wire shape (issue #203). Re-exported so the
+// gate's modules import `crate::backend::*`.
+pub use agentkeys_backend_client::{
+    AuditAppendInput, AuditAppendResult, BackendError, CapMintOp, CapMintRequest, CapToken,
+    MemoryGetInput, MemoryGetResult,
+};
+
+/// The three broker/worker calls the in-path gate makes per turn:
+/// mint a memory-read cap, read the namespace, append the gate-turn audit row.
+#[async_trait]
+pub trait Backend: Send + Sync {
+    async fn cap_mint(
+        &self,
+        op: CapMintOp,
+        req: CapMintRequest,
+        session_bearer: &str,
+    ) -> Result<CapToken, BackendError>;
+
+    async fn memory_get(&self, input: MemoryGetInput) -> Result<MemoryGetResult, BackendError>;
+
+    async fn audit_append(
+        &self,
+        input: AuditAppendInput,
+    ) -> Result<AuditAppendResult, BackendError>;
+}
+
+// The production backend IS the shared client (issue #203 — the ONE owner of the
+// cap-mint -> STS relay -> worker chain). Each method delegates to
+// `BackendClient`'s inherent method of the same name; inherent methods win in
+// resolution, so there is no recursion.
+#[async_trait]
+impl Backend for BackendClient {
+    async fn cap_mint(
+        &self,
+        op: CapMintOp,
+        req: CapMintRequest,
+        session_bearer: &str,
+    ) -> Result<CapToken, BackendError> {
+        self.cap_mint(op, req, session_bearer).await
+    }
+
+    async fn memory_get(&self, input: MemoryGetInput) -> Result<MemoryGetResult, BackendError> {
+        self.memory_get(input).await
+    }
+
+    async fn audit_append(
+        &self,
+        input: AuditAppendInput,
+    ) -> Result<AuditAppendResult, BackendError> {
+        self.audit_append(input).await
+    }
+}
diff --git a/crates/agentkeys-gate/src/config.rs b/crates/agentkeys-gate/src/config.rs
new file mode 100644
index 00000000..9649d871
--- /dev/null
+++ b/crates/agentkeys-gate/src/config.rs
@@ -0,0 +1,288 @@
+//! Runtime configuration — CLI flags + env vars, read ONCE at startup and
+//! treated as immutable (the `from_env`/inject pattern; AGENTS.md bans
+//! `std::env::set_var` in tests, so `Config::for_tests` builds the struct
+//! directly).
+
+use std::collections::HashMap;
+use std::net::SocketAddr;
+
+use clap::Parser;
+
+/// Default memory namespaces injected per turn — the canonical four
+/// (`apps/parent-control/lib/constants.ts`).
+const DEFAULT_NAMESPACES: &str = "personal,family,work,travel";
+const DEFAULT_CAP_TTL_SECONDS: u64 = 300;
+
+#[derive(Parser, Debug, Clone)]
+#[command(
+    name = "agentkeys-gate",
+    about = "AgentKeys in-path CustomLLM gate — cap-check + memory-inject + audit per LLM turn \
+             (Plan A: ai-device-platform.md §3)"
+)]
+pub struct Cli {
+    /// HTTP bind address. The OpenAI-compatible endpoint lives at
+    /// `POST /v1/chat/completions`; health at `GET /healthz`.
+    #[arg(long, env = "AGENTKEYS_GATE_LISTEN", default_value = "0.0.0.0:8077")]
+    pub listen: SocketAddr,
+
+    /// Broker base URL (cap-mint).
+    #[arg(long, env = "AGENTKEYS_BROKER_URL")]
+    pub broker_url: Option<String>,
+
+    /// Memory worker base URL.
+    #[arg(long, env = "AGENTKEYS_MEMORY_URL")]
+    pub memory_url: Option<String>,
+
+    /// Audit worker base URL.
+    #[arg(long, env = "AGENTKEYS_AUDIT_URL")]
+    pub audit_url: Option<String>,
+
+    /// Comma-separated `<vendor_id>:<bearer_token>` pairs the gate accepts.
+    /// Empty = the gate refuses every request (401) unless
+    /// `--allow-anonymous`.
+    #[arg(long, env = "AGENTKEYS_GATE_VENDOR_TOKENS", default_value = "")]
+    pub vendor_tokens: String,
+
+    /// Accept unauthenticated requests when no vendor tokens are configured.
+    /// For LOCAL / echo-engine hardware bring-up only — logged loudly at start.
+    #[arg(long, env = "AGENTKEYS_GATE_ALLOW_ANONYMOUS", default_value_t = false)]
+    pub allow_anonymous: bool,
+
+    /// Ambient device actor omni used when the request carries no
+    /// `X-AgentKeys-Actor` header (one-gate-per-device deploys).
+    #[arg(long, env = "AGENTKEYS_GATE_DEFAULT_ACTOR")]
+    pub default_actor: Option<String>,
+
+    /// Ambient operator (master) omni for cap-mint binding.
+    #[arg(long, env = "AGENTKEYS_GATE_DEFAULT_OPERATOR_OMNI")]
+    pub default_operator_omni: Option<String>,
+
+    /// Ambient device-key hash for cap-mint binding (used only when no K10
+    /// device key is loaded; see `AGENTKEYS_DEVICE_KEY_FILE`).
+    #[arg(long, env = "AGENTKEYS_GATE_DEFAULT_DEVICE_KEY_HASH")]
+    pub default_device_key_hash: Option<String>,
+
+    /// Agent session JWT (omni == the device actor) the gate forwards to the
+    /// broker for cap-mint + the per-actor STS relay.
+    #[arg(long, env = "AGENTKEYS_GATE_AGENT_SESSION_BEARER")]
+    pub agent_session_bearer: Option<String>,
+
+    /// Owner-only file (0600) holding the agent session JWT — preferred over the
+    /// inline flag so the JWT never rides the process list.
+    #[arg(long, env = "AGENTKEYS_GATE_AGENT_SESSION_BEARER_FILE")]
+    pub agent_session_bearer_file: Option<String>,
+
+    /// Memory IAM role ARN the memory worker assumes via web-identity (per-actor
+    /// S3 isolation).
+    #[arg(long, env = "AGENTKEYS_GATE_MEMORY_ROLE_ARN")]
+    pub memory_role_arn: Option<String>,
+
+    /// AWS region for the STS `AssumeRoleWithWebIdentity` call.
+    #[arg(long, env = "AWS_REGION", default_value = "us-east-1")]
+    pub aws_region: String,
+
+    /// Comma-separated memory namespaces injected per turn.
+    #[arg(long, env = "AGENTKEYS_GATE_MEMORY_NAMESPACES", default_value = DEFAULT_NAMESPACES)]
+    pub memory_namespaces: String,
+
+    /// Memory selection engine for injection: `passthrough` (default) or
+    /// `lexical`. Deterministic, no LLM in the gate path.
+    #[arg(long, env = "AGENTKEYS_MEMORY_ENGINE", default_value = "passthrough")]
+    pub memory_engine: String,
+
+    /// TTL (seconds) for the per-turn memory-read caps.
+    #[arg(long, env = "AGENTKEYS_GATE_CAP_TTL_SECONDS", default_value_t = DEFAULT_CAP_TTL_SECONDS)]
+    pub cap_ttl_seconds: u64,
+
+    /// Deny the turn if the gate-turn audit row fails to append (tamper-evident
+    /// audit is the product). Default off during staged rollout — matches the
+    /// `AGENTKEYS_WORKER_REQUIRE_AUDIT` posture.
+    #[arg(long, env = "AGENTKEYS_GATE_REQUIRE_AUDIT", default_value_t = false)]
+    pub require_audit: bool,
+
+    /// LLM engine: `echo` (local, no upstream — for hardware bring-up) or
+    /// `openai-compatible` (Volcano RTC CustomLLM → Doubao or any
+    /// `/v1/chat/completions` upstream).
+    #[arg(long, env = "AGENTKEYS_GATE_ENGINE", default_value = "echo")]
+    pub engine: String,
+
+    /// Upstream base URL for `--engine openai-compatible` (e.g.
+    /// `https://ark.cn-beijing.volces.com/api/v3`).
+    #[arg(long, env = "AGENTKEYS_GATE_ENGINE_BASE_URL")]
+    pub engine_base_url: Option<String>,
+
+    /// Upstream API key for `--engine openai-compatible`.
+    #[arg(long, env = "AGENTKEYS_GATE_ENGINE_API_KEY")]
+    pub engine_api_key: Option<String>,
+
+    /// Owner-only file (0600) holding the upstream API key — preferred over the
+    /// inline flag.
+    #[arg(long, env = "AGENTKEYS_GATE_ENGINE_API_KEY_FILE")]
+    pub engine_api_key_file: Option<String>,
+
+    /// Force a specific upstream model (Doubao SKU). When unset the caller's
+    /// `model` field is forwarded.
+    #[arg(long, env = "AGENTKEYS_GATE_ENGINE_MODEL")]
+    pub engine_model: Option<String>,
+}
+
+/// Which LLM engine the gate delegates the turn to.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum EngineKind {
+    Echo,
+    OpenAiCompatible {
+        base_url: String,
+        api_key: String,
+        model: Option<String>,
+    },
+}
+
+#[derive(Debug, Clone)]
+pub struct Config {
+    pub listen: SocketAddr,
+    pub broker_url: Option<String>,
+    pub memory_url: Option<String>,
+    pub audit_url: Option<String>,
+    /// vendor_id -> bearer_token
+    pub vendor_tokens: HashMap<String, String>,
+    pub allow_anonymous: bool,
+    pub default_actor: Option<String>,
+    pub default_operator_omni: Option<String>,
+    pub default_device_key_hash: Option<String>,
+    pub agent_session_bearer: Option<String>,
+    pub memory_role_arn: Option<String>,
+    pub aws_region: String,
+    pub memory_namespaces: Vec<String>,
+    pub memory_engine: String,
+    pub cap_ttl_seconds: u64,
+    pub require_audit: bool,
+    pub engine: EngineKind,
+}
+
+fn read_secret_file(path: &str, what: &str) -> anyhow::Result<Option<String>> {
+    let raw = std::fs::read_to_string(path)
+        .map_err(|e| anyhow::anyhow!("read {what} file {path}: {e}"))?;
+    let trimmed = raw.trim().to_string();
+    Ok(if trimmed.is_empty() {
+        None
+    } else {
+        Some(trimmed)
+    })
+}
+
+fn parse_csv(raw: &str) -> Vec<String> {
+    raw.split(',')
+        .map(str::trim)
+        .filter(|s| !s.is_empty())
+        .map(str::to_string)
+        .collect()
+}
+
+impl Config {
+    pub fn from_cli(cli: Cli) -> anyhow::Result<Self> {
+        let mut vendor_tokens = HashMap::new();
+        for pair in cli
+            .vendor_tokens
+            .split(',')
+            .filter(|s| !s.trim().is_empty())
+        {
+            let (vendor, token) = pair
+                .split_once(':')
+                .ok_or_else(|| anyhow::anyhow!("malformed vendor_token entry: {pair}"))?;
+            vendor_tokens.insert(vendor.trim().to_string(), token.trim().to_string());
+        }
+
+        // Prefer the owner-only bearer FILE over an inline bearer (keeps the JWT
+        // off the process list), same posture as the MCP server.
+        let agent_session_bearer = match cli.agent_session_bearer {
+            Some(b) => Some(b),
+            None => match cli.agent_session_bearer_file.as_deref() {
+                Some(path) => read_secret_file(path, "agent-session-bearer")?,
+                None => None,
+            },
+        };
+
+        let engine = match cli.engine.as_str() {
+            "echo" => EngineKind::Echo,
+            "openai-compatible" | "openai_compatible" => {
+                let base_url = cli.engine_base_url.ok_or_else(|| {
+                    anyhow::anyhow!(
+                        "--engine openai-compatible requires --engine-base-url \
+                         (env AGENTKEYS_GATE_ENGINE_BASE_URL)"
+                    )
+                })?;
+                let api_key = match cli.engine_api_key {
+                    Some(k) => Some(k),
+                    None => match cli.engine_api_key_file.as_deref() {
+                        Some(path) => read_secret_file(path, "engine-api-key")?,
+                        None => None,
+                    },
+                }
+                .ok_or_else(|| {
+                    anyhow::anyhow!(
+                        "--engine openai-compatible requires --engine-api-key or \
+                         --engine-api-key-file"
+                    )
+                })?;
+                EngineKind::OpenAiCompatible {
+                    base_url,
+                    api_key,
+                    model: cli.engine_model,
+                }
+            }
+            other => anyhow::bail!("unknown engine `{other}` (expected echo|openai-compatible)"),
+        };
+
+        let memory_namespaces = parse_csv(&cli.memory_namespaces);
+
+        Ok(Self {
+            listen: cli.listen,
+            broker_url: cli.broker_url,
+            memory_url: cli.memory_url,
+            audit_url: cli.audit_url,
+            vendor_tokens,
+            allow_anonymous: cli.allow_anonymous,
+            default_actor: cli.default_actor,
+            default_operator_omni: cli.default_operator_omni,
+            default_device_key_hash: cli.default_device_key_hash,
+            agent_session_bearer,
+            memory_role_arn: cli.memory_role_arn,
+            aws_region: cli.aws_region,
+            memory_namespaces,
+            memory_engine: cli.memory_engine,
+            cap_ttl_seconds: cli.cap_ttl_seconds,
+            require_audit: cli.require_audit,
+            engine,
+        })
+    }
+
+    /// Test builder — no parsing, no env reads. Echo engine, no namespaces,
+    /// caller injects the rest.
+    pub fn for_tests() -> Self {
+        Self {
+            listen: "127.0.0.1:0".parse().unwrap(),
+            broker_url: None,
+            memory_url: None,
+            audit_url: None,
+            vendor_tokens: HashMap::new(),
+            allow_anonymous: false,
+            default_actor: None,
+            default_operator_omni: None,
+            default_device_key_hash: None,
+            agent_session_bearer: None,
+            memory_role_arn: None,
+            aws_region: "us-east-1".to_string(),
+            memory_namespaces: Vec::new(),
+            memory_engine: "passthrough".to_string(),
+            cap_ttl_seconds: DEFAULT_CAP_TTL_SECONDS,
+            require_audit: false,
+            engine: EngineKind::Echo,
+        }
+    }
+
+    pub fn with_vendor_token(mut self, vendor: &str, token: &str) -> Self {
+        self.vendor_tokens
+            .insert(vendor.to_string(), token.to_string());
+        self
+    }
+}
diff --git a/crates/agentkeys-gate/src/engine.rs b/crates/agentkeys-gate/src/engine.rs
new file mode 100644
index 00000000..ec11f21b
--- /dev/null
+++ b/crates/agentkeys-gate/src/engine.rs
@@ -0,0 +1,214 @@
+//! The engine seam (Plan A piece 5) — the ONLY swappable part of the platform.
+//!
+//! The gate owns identity · cap · memory-inject · audit (the red line). The
+//! engine owns just the LLM turn: "given an OpenAI request, return an OpenAI
+//! response from the upstream." Plan A ships ONE engine — an OpenAI-compatible
+//! HTTP upstream (Volcano RTC CustomLLM → Doubao). Plan B adds Bedrock/Anthropic,
+//! BytePlus, Qwen, and the self-host runtime behind this SAME trait, so B = A +
+//! engines with no rewrite.
+//!
+//! The red-line invariant is structural: an `EngineAdapter` is handed only the
+//! (already memory-injected, already authorized) request + a base URL + an API
+//! key. It never sees K3/K10/K11, cap-tokens, the actor identity, or the audit
+//! trail — those live at the broker + worker + gate, unreachable from a swapped
+//! engine.
+
+use std::time::{SystemTime, UNIX_EPOCH};
+
+use async_trait::async_trait;
+
+use crate::openai::{ChatCompletionRequest, ChatCompletionResponse, ChatMessage, Choice, Usage};
+
+#[derive(thiserror::Error, Debug)]
+pub enum EngineError {
+    #[error("engine not configured: {0}")]
+    NotConfigured(&'static str),
+    #[error("upstream HTTP error ({status}): {body}")]
+    Http { status: u16, body: String },
+    #[error("upstream transport error: {0}")]
+    Transport(String),
+    #[error("upstream response parse error: {0}")]
+    Parse(String),
+}
+
+/// The pluggable LLM engine. Implementations call exactly one upstream and
+/// return its completion — no policy, no memory, no audit (those are the gate's).
+#[async_trait]
+pub trait EngineAdapter: Send + Sync {
+    /// Stable name for audit provenance (`"openai-compatible"`, `"echo"`, …).
+    fn name(&self) -> &'static str;
+
+    /// Run one completion. `req.messages` already carries any gate-injected
+    /// memory system message; the engine forwards it verbatim.
+    async fn complete(
+        &self,
+        req: ChatCompletionRequest,
+    ) -> Result<ChatCompletionResponse, EngineError>;
+}
+
+fn now_unix() -> u64 {
+    SystemTime::now()
+        .duration_since(UNIX_EPOCH)
+        .map(|d| d.as_secs())
+        .unwrap_or(0)
+}
+
+/// Plan-A engine: an OpenAI-compatible upstream (Volcano RTC CustomLLM → Doubao,
+/// or any `/v1/chat/completions` server). `base_url` is the upstream root (e.g.
+/// `https://ark.cn-beijing.volces.com/api/v3`); the adapter appends
+/// `/chat/completions`.
+pub struct OpenAiCompatibleEngine {
+    client: reqwest::Client,
+    base_url: String,
+    api_key: String,
+    /// Optional model override. When `None` the caller's `req.model` is used —
+    /// so the vendor picks the Doubao SKU.
+    model: Option<String>,
+}
+
+impl OpenAiCompatibleEngine {
+    pub fn new(base_url: String, api_key: String, model: Option<String>) -> Self {
+        Self {
+            client: reqwest::Client::new(),
+            base_url: base_url.trim_end_matches('/').to_string(),
+            api_key,
+            model,
+        }
+    }
+}
+
+#[async_trait]
+impl EngineAdapter for OpenAiCompatibleEngine {
+    fn name(&self) -> &'static str {
+        "openai-compatible"
+    }
+
+    async fn complete(
+        &self,
+        mut req: ChatCompletionRequest,
+    ) -> Result<ChatCompletionResponse, EngineError> {
+        if self.api_key.is_empty() {
+            return Err(EngineError::NotConfigured("engine api key"));
+        }
+        if let Some(model) = &self.model {
+            req.model = model.clone();
+        }
+        // The gate never streams to the upstream — the per-turn cap/audit work is
+        // turn-granular, so a single completion is requested even if the caller
+        // asked to stream.
+        req.stream = false;
+
+        let url = format!("{}/chat/completions", self.base_url);
+        let resp = self
+            .client
+            .post(&url)
+            .bearer_auth(&self.api_key)
+            .json(&req)
+            .send()
+            .await
+            .map_err(|e| EngineError::Transport(e.to_string()))?;
+
+        if !resp.status().is_success() {
+            let status = resp.status().as_u16();
+            let body = resp.text().await.unwrap_or_default();
+            return Err(EngineError::Http { status, body });
+        }
+
+        resp.json::<ChatCompletionResponse>()
+            .await
+            .map_err(|e| EngineError::Parse(e.to_string()))
+    }
+}
+
+/// Local engine that echoes the conversation instead of calling an upstream LLM.
+///
+/// This is NOT a test stub — it lets the whole in-path gate (device → Volcano
+/// RTC → gate → response) run on real hardware BEFORE Doubao credentials exist,
+/// which is exactly Plan A §3's "prove the gate in-path on real hardware" goal.
+/// It surfaces the injected memory in its reply so an operator can SEE that the
+/// gate read + injected the user's memory.
+pub struct EchoEngine;
+
+#[async_trait]
+impl EngineAdapter for EchoEngine {
+    fn name(&self) -> &'static str {
+        "echo"
+    }
+
+    async fn complete(
+        &self,
+        req: ChatCompletionRequest,
+    ) -> Result<ChatCompletionResponse, EngineError> {
+        let injected = req
+            .messages
+            .iter()
+            .find(|m| m.role == "system")
+            .map(ChatMessage::text)
+            .filter(|s| !s.is_empty());
+        let user = req
+            .messages
+            .iter()
+            .rev()
+            .find(|m| m.role == "user")
+            .map(ChatMessage::text)
+            .unwrap_or_default();
+
+        let reply = match injected {
+            Some(mem) => format!("[echo] You said: {user}\n[echo] Using your memory:\n{mem}"),
+            None => format!("[echo] You said: {user}"),
+        };
+
+        Ok(ChatCompletionResponse {
+            id: "echo-0".to_string(),
+            object: "chat.completion".to_string(),
+            created: now_unix(),
+            model: req.model,
+            choices: vec![Choice {
+                index: 0,
+                message: ChatMessage::new("assistant", reply),
+                finish_reason: Some("stop".to_string()),
+            }],
+            usage: Usage::default(),
+            extra: serde_json::Map::new(),
+        })
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::openai::ChatMessage;
+
+    #[tokio::test]
+    async fn echo_surfaces_injected_memory() {
+        let mut req = ChatCompletionRequest {
+            model: "m".into(),
+            messages: vec![ChatMessage::new("user", "what do you know about me?")],
+            stream: false,
+            extra: serde_json::Map::new(),
+        };
+        req.prepend_system("Allergic to peanuts.");
+        let resp = EchoEngine.complete(req).await.unwrap();
+        let text = resp.choices[0].message.text();
+        assert!(
+            text.contains("peanuts"),
+            "echo reflects injected memory: {text}"
+        );
+        assert!(text.contains("what do you know about me?"));
+    }
+
+    #[tokio::test]
+    async fn openai_engine_without_key_is_not_configured() {
+        let engine = OpenAiCompatibleEngine::new("http://localhost:1".into(), "".into(), None);
+        let req = ChatCompletionRequest {
+            model: "m".into(),
+            messages: vec![ChatMessage::new("user", "hi")],
+            stream: false,
+            extra: serde_json::Map::new(),
+        };
+        assert!(matches!(
+            engine.complete(req).await,
+            Err(EngineError::NotConfigured(_))
+        ));
+    }
+}
diff --git a/crates/agentkeys-gate/src/error.rs b/crates/agentkeys-gate/src/error.rs
new file mode 100644
index 00000000..036fcefe
--- /dev/null
+++ b/crates/agentkeys-gate/src/error.rs
@@ -0,0 +1,82 @@
+//! Gate error type. Library errors use `thiserror` (AGENTS.md convention); the
+//! HTTP layer maps each variant to an OpenAI-shaped error envelope + status.
+//!
+//! The split mirrors the "no silent fallback" rule: a broken backend path
+//! (broker/worker unreachable or 5xx) is a LOUD `Backend` error surfaced as 502,
+//! never a quiet "pretend memory was empty". A per-namespace authorization deny
+//! (HTTP 403) is NOT an error here — it is a recorded, non-fatal outcome handled
+//! in `gate::inject_memory`.
+
+use crate::openai::ApiError;
+
+#[derive(thiserror::Error, Debug)]
+pub enum GateError {
+    /// Inbound request rejected before any backend work (bad bearer / missing
+    /// vendor token).
+    #[error("unauthorized: {0}")]
+    Unauthorized(String),
+
+    /// The caller is authenticated but not allowed (e.g. actor mismatch).
+    #[error("forbidden: {0}")]
+    Forbidden(String),
+
+    /// Malformed OpenAI request the gate can't act on.
+    #[error("bad request: {0}")]
+    BadRequest(String),
+
+    /// A required identity field (actor / operator / device-key-hash) was
+    /// neither supplied per-request nor configured. A deploy/config error.
+    #[error("identity not resolved: {0}")]
+    Identity(String),
+
+    /// The broker/worker chain is broken (unreachable, 5xx, parse, or not
+    /// configured). Surfaced loud — we do NOT degrade silently past a broken
+    /// gate path.
+    #[error("backend error: {0}")]
+    Backend(String),
+
+    /// The upstream LLM engine failed (Doubao/Volcano unreachable or errored).
+    #[error("engine error: {0}")]
+    Engine(String),
+
+    /// Audit append failed AND `AGENTKEYS_GATE_REQUIRE_AUDIT=1` is set — the
+    /// turn is denied because it could not be recorded (tamper-evident audit is
+    /// the product).
+    #[error("audit error: {0}")]
+    Audit(String),
+
+    #[error("internal error: {0}")]
+    Internal(String),
+}
+
+impl GateError {
+    /// HTTP status for this error.
+    pub fn status(&self) -> u16 {
+        match self {
+            GateError::Unauthorized(_) => 401,
+            GateError::Forbidden(_) => 403,
+            GateError::BadRequest(_) => 400,
+            GateError::Identity(_) => 500,
+            GateError::Backend(_) | GateError::Engine(_) => 502,
+            GateError::Audit(_) | GateError::Internal(_) => 500,
+        }
+    }
+
+    /// The OpenAI `error.type` discriminator a vendor's OpenAI client expects.
+    fn openai_type(&self) -> &'static str {
+        match self {
+            GateError::Unauthorized(_) => "authentication_error",
+            GateError::Forbidden(_) => "permission_error",
+            GateError::BadRequest(_) => "invalid_request_error",
+            GateError::Backend(_) | GateError::Engine(_) => "upstream_error",
+            GateError::Identity(_) | GateError::Audit(_) | GateError::Internal(_) => "api_error",
+        }
+    }
+
+    /// Render as the OpenAI error envelope returned in the HTTP body.
+    pub fn to_api_error(&self) -> ApiError {
+        ApiError::new(self.openai_type(), self.to_string())
+    }
+}
+
+pub type GateResult<T> = Result<T, GateError>;
diff --git a/crates/agentkeys-gate/src/gate.rs b/crates/agentkeys-gate/src/gate.rs
new file mode 100644
index 00000000..7e1d9389
--- /dev/null
+++ b/crates/agentkeys-gate/src/gate.rs
@@ -0,0 +1,383 @@
+//! The in-path gate (Plan A piece 2). One LLM turn =
+//!
+//!   resolve identity -> cap-check + memory-inject (per namespace) ->
+//!   engine completion -> gate-turn audit.
+//!
+//! Reuses the shipped backend verbatim: each namespace is read by minting a
+//! broker `memory-get` cap (`CapMintOp::MemoryGet`, service `memory:<ns>`) and
+//! calling the memory worker — the SAME path the MCP `memory.get` tool uses, so
+//! per-namespace cap isolation + the worker-side memory audit (#229) hold
+//! unchanged. The gate then appends ONE additional `GateTurn` audit row so a
+//! turn is on the ledger even when it injected no memory.
+
+use std::sync::Arc;
+
+use agentkeys_core::audit::AuditOpKind;
+use agentkeys_memory_engine::{engine_from_name, select_blob, MemoryEngine, SelectionBudget};
+use agentkeys_protocol::{normalize_omni_0x, service_memory};
+use base64::Engine as _;
+use serde_json::json;
+
+use crate::auth::CallerContext;
+use crate::backend::{Backend, BackendError, CapMintOp, CapMintRequest, MemoryGetInput};
+use crate::config::Config;
+use crate::engine::EngineAdapter;
+use crate::error::{GateError, GateResult};
+use crate::openai::ChatCompletionRequest;
+
+/// Canonical audit result byte (`agentkeys_core::audit` — Success = 0).
+const AUDIT_RESULT_SUCCESS: u8 = 0;
+/// Cap the audited intent text so a huge prompt can't bloat the envelope.
+const MAX_INTENT_CHARS: usize = 1024;
+/// OpenAI tool-calling fields the gate refuses to forward (it does not yet
+/// enforce tool routing — forwarding them would bypass AgentKeys enforcement at
+/// the upstream). See `docs/plan/shared-user-memory-delegation.md` §4.
+const UNSUPPORTED_TOOL_FIELDS: &[&str] = &["tools", "tool_choice", "functions", "function_call"];
+
+/// Resolved per-turn identity (all normalized to `0x`-omni shape).
+struct Identity {
+    operator_omni: String,
+    actor_omni: String,
+    device_key_hash: String,
+}
+
+/// What happened for one namespace's memory read this turn — recorded in the
+/// audit row. `Denied`/`Empty` are non-fatal; a broken backend is a hard error
+/// raised before any outcome is recorded.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+enum NamespaceOutcome {
+    Injected(usize),
+    Empty,
+    Denied,
+}
+
+impl NamespaceOutcome {
+    fn label(self) -> &'static str {
+        match self {
+            NamespaceOutcome::Injected(_) => "injected",
+            NamespaceOutcome::Empty => "empty",
+            NamespaceOutcome::Denied => "denied",
+        }
+    }
+}
+
+/// The gate: shared, immutable, cloneable handles wired once at startup.
+pub struct Gate {
+    config: Config,
+    backend: Arc<dyn Backend>,
+    llm: Arc<dyn EngineAdapter>,
+    memory_selector: Box<dyn MemoryEngine>,
+    memory_budget: SelectionBudget,
+}
+
+impl Gate {
+    /// Production constructor — builds the memory-selection engine + budget from
+    /// config/env.
+    pub fn new(config: Config, backend: Arc<dyn Backend>, llm: Arc<dyn EngineAdapter>) -> Self {
+        let memory_selector = engine_from_name(&config.memory_engine);
+        let memory_budget = SelectionBudget::from_env();
+        Self {
+            config,
+            backend,
+            llm,
+            memory_selector,
+            memory_budget,
+        }
+    }
+
+    pub fn config(&self) -> &Config {
+        &self.config
+    }
+
+    /// Handle one OpenAI `/v1/chat/completions` turn through the gate.
+    pub async fn handle_chat_completion(
+        &self,
+        caller: &CallerContext,
+        mut req: ChatCompletionRequest,
+    ) -> GateResult<crate::openai::ChatCompletionResponse> {
+        if req.messages.is_empty() {
+            return Err(GateError::BadRequest("messages must not be empty".into()));
+        }
+        // Tool-calling is NOT gate-enforced in the MVP. Forwarding `tools` /
+        // `tool_choice` / `functions` to the upstream would let the model call
+        // tools DIRECTLY at the upstream, outside AgentKeys cap/audit enforcement
+        // (the "in-path" property would silently leak). Refuse rather than
+        // forward — gate-enforced, worker-backed tool routing is a follow-up.
+        if let Some(field) = UNSUPPORTED_TOOL_FIELDS
+            .iter()
+            .find(|f| req.extra.contains_key(**f))
+        {
+            return Err(GateError::BadRequest(format!(
+                "`{field}` is not supported: the gate does not yet enforce tool-calling, so it \
+                 refuses to forward it to the upstream (that would bypass AgentKeys enforcement). \
+                 Remove it, or use a worker-backed action path."
+            )));
+        }
+        let ident = self.resolve_identity(caller)?;
+        let intent = truncate(&req.last_user_text(), MAX_INTENT_CHARS);
+
+        // ── cap-check + memory-inject ───────────────────────────────────────
+        let (context, outcomes) = self.inject_memory(&ident).await?;
+        if let Some(ctx) = context {
+            req.prepend_system(ctx);
+        }
+
+        // ── engine turn ─────────────────────────────────────────────────────
+        // Log the full upstream error for the operator, but return only the
+        // safe category to the vendor caller — never echo an upstream response
+        // body across the trust boundary.
+        let response = self.llm.complete(req).await.map_err(|e| {
+            tracing::warn!(error = %e, "engine call failed");
+            GateError::Engine(safe_engine_message(&e))
+        })?;
+
+        // ── gate-turn audit ─────────────────────────────────────────────────
+        self.audit_turn(&ident, &outcomes, &response, intent)
+            .await?;
+
+        Ok(response)
+    }
+
+    fn resolve_identity(&self, caller: &CallerContext) -> GateResult<Identity> {
+        let actor = if caller.actor_omni != "*" {
+            caller.actor_omni.as_str()
+        } else {
+            self.config.default_actor.as_deref().ok_or_else(|| {
+                GateError::Identity(
+                    "no actor: request carried no X-AgentKeys-Actor and no \
+                     AGENTKEYS_GATE_DEFAULT_ACTOR is set"
+                        .into(),
+                )
+            })?
+        };
+        let operator = self
+            .config
+            .default_operator_omni
+            .as_deref()
+            .ok_or_else(|| {
+                GateError::Identity("no AGENTKEYS_GATE_DEFAULT_OPERATOR_OMNI configured".into())
+            })?;
+        let device_key_hash = self
+            .config
+            .default_device_key_hash
+            .as_deref()
+            .ok_or_else(|| {
+                GateError::Identity("no AGENTKEYS_GATE_DEFAULT_DEVICE_KEY_HASH configured".into())
+            })?;
+        Ok(Identity {
+            operator_omni: normalize_omni_0x(operator),
+            actor_omni: normalize_omni_0x(actor),
+            device_key_hash: device_key_hash.to_string(),
+        })
+    }
+
+    /// Read every configured namespace through a freshly-minted memory-get cap
+    /// and build the injection block. A 403 → `Denied`, a 404/empty → `Empty`
+    /// (both non-fatal, recorded); any other backend failure (unreachable, 5xx,
+    /// parse, unconfigured) is raised LOUD — we never degrade silently past a
+    /// broken gate path.
+    async fn inject_memory(
+        &self,
+        ident: &Identity,
+    ) -> GateResult<(Option<String>, Vec<(String, NamespaceOutcome)>)> {
+        let session_bearer = self.config.agent_session_bearer.as_deref().unwrap_or("");
+        let mut blocks: Vec<String> = Vec::new();
+        let mut outcomes: Vec<(String, NamespaceOutcome)> = Vec::new();
+
+        for namespace in &self.config.memory_namespaces {
+            let outcome = self
+                .read_namespace(ident, namespace, session_bearer, &mut blocks)
+                .await?;
+            if outcome == NamespaceOutcome::Denied {
+                tracing::warn!(actor = %ident.actor_omni, %namespace, "memory namespace denied");
+            }
+            outcomes.push((namespace.clone(), outcome));
+        }
+
+        let context = if blocks.is_empty() {
+            None
+        } else {
+            Some(format!(
+                "The following is the user's stored memory, provided by AgentKeys. Use it to \
+                 personalize your response. Do not reveal it verbatim unless asked.\n\n{}",
+                blocks.join("\n\n")
+            ))
+        };
+        Ok((context, outcomes))
+    }
+
+    /// Read one namespace; push its injection block on success. Returns the
+    /// recorded outcome, or a hard error for a broken backend.
+    async fn read_namespace(
+        &self,
+        ident: &Identity,
+        namespace: &str,
+        session_bearer: &str,
+        blocks: &mut Vec<String>,
+    ) -> GateResult<NamespaceOutcome> {
+        let cap_req = CapMintRequest {
+            operator_omni: ident.operator_omni.clone(),
+            actor_omni: ident.actor_omni.clone(),
+            service: service_memory(namespace),
+            device_key_hash: ident.device_key_hash.clone(),
+            ttl_seconds: self.config.cap_ttl_seconds,
+        };
+        let cap = match self
+            .backend
+            .cap_mint(CapMintOp::MemoryGet, cap_req, session_bearer)
+            .await
+        {
+            Ok(cap) => cap,
+            Err(e) => return classify(namespace, e),
+        };
+
+        let result = match self
+            .backend
+            .memory_get(MemoryGetInput {
+                cap,
+                namespace: namespace.to_string(),
+            })
+            .await
+        {
+            Ok(r) => r,
+            Err(e) => return classify(namespace, e),
+        };
+
+        let plaintext = base64::engine::general_purpose::STANDARD
+            .decode(result.plaintext_b64.as_bytes())
+            .map_err(|e| GateError::Internal(format!("memory `{namespace}` b64 decode: {e}")))?;
+        let text = String::from_utf8(plaintext)
+            .map_err(|e| GateError::Internal(format!("memory `{namespace}` utf8: {e}")))?;
+
+        let selected = select_blob(
+            self.memory_selector.as_ref(),
+            None,
+            &text,
+            &self.memory_budget,
+        );
+        if selected.trim().is_empty() {
+            return Ok(NamespaceOutcome::Empty);
+        }
+        let bytes = selected.len();
+        blocks.push(format!("## {}\n{}", service_memory(namespace), selected));
+        Ok(NamespaceOutcome::Injected(bytes))
+    }
+
+    /// Append the `GateTurn` audit row. Best-effort by default (logged on
+    /// failure); hard-fails only when `require_audit` is set.
+    async fn audit_turn(
+        &self,
+        ident: &Identity,
+        outcomes: &[(String, NamespaceOutcome)],
+        response: &crate::openai::ChatCompletionResponse,
+        intent: String,
+    ) -> GateResult<()> {
+        let injected_bytes: usize = outcomes
+            .iter()
+            .map(|(_, o)| match o {
+                NamespaceOutcome::Injected(b) => *b,
+                _ => 0,
+            })
+            .sum();
+        let namespaces: Vec<_> = outcomes
+            .iter()
+            .map(|(ns, o)| json!({ "namespace": ns, "outcome": o.label() }))
+            .collect();
+        let op_body = json!({
+            "engine": self.llm.name(),
+            "model": response.model,
+            "namespaces": namespaces,
+            "injected_bytes": injected_bytes,
+            "prompt_tokens": response.usage.prompt_tokens,
+            "completion_tokens": response.usage.completion_tokens,
+        });
+
+        let input = crate::backend::AuditAppendInput {
+            operator_omni: ident.operator_omni.clone(),
+            actor_omni: ident.actor_omni.clone(),
+            op_kind: AuditOpKind::GateTurn as u8,
+            op_body,
+            result: AUDIT_RESULT_SUCCESS,
+            intent_text: if intent.is_empty() {
+                None
+            } else {
+                Some(intent)
+            },
+        };
+
+        match self.backend.audit_append(input).await {
+            Ok(r) => {
+                tracing::info!(
+                    actor = %ident.actor_omni,
+                    envelope_hash = %r.envelope_hash,
+                    "gate-turn audited"
+                );
+                Ok(())
+            }
+            Err(e) if self.config.require_audit => {
+                Err(GateError::Audit(format!("gate-turn audit failed: {e}")))
+            }
+            Err(e) => {
+                tracing::warn!(
+                    actor = %ident.actor_omni,
+                    error = %e,
+                    "gate-turn audit append failed (non-fatal; set \
+                     AGENTKEYS_GATE_REQUIRE_AUDIT=1 to enforce)"
+                );
+                Ok(())
+            }
+        }
+    }
+}
+
+/// Map a per-namespace backend error to a recorded outcome (403 → denied,
+/// 404 → empty) or a hard `Backend` error for anything that signals the gate
+/// path itself is broken.
+fn classify(namespace: &str, err: BackendError) -> GateResult<NamespaceOutcome> {
+    match err {
+        BackendError::Http { status: 403, .. } => Ok(NamespaceOutcome::Denied),
+        BackendError::Http { status: 404, .. } => Ok(NamespaceOutcome::Empty),
+        other => {
+            // Log the full error (including any upstream body) for the operator,
+            // but return only the safe category to the vendor caller — never
+            // echo a broker/worker response body across the trust boundary.
+            tracing::warn!(%namespace, error = %other, "memory backend failure");
+            Err(GateError::Backend(format!(
+                "memory namespace `{namespace}`: {}",
+                safe_backend_message(&other)
+            )))
+        }
+    }
+}
+
+/// User-facing rendering of a backend error — status/category only, never the
+/// upstream response body (which may carry first-party internal detail). The
+/// full error is logged for the operator at the call site.
+fn safe_backend_message(err: &BackendError) -> String {
+    match err {
+        BackendError::Http { status, .. } => format!("backend HTTP {status}"),
+        BackendError::Transport(_) => "backend transport error".to_string(),
+        BackendError::Parse(_) => "backend response parse error".to_string(),
+        BackendError::NotConfigured(what) => format!("backend not configured: {what}"),
+    }
+}
+
+/// User-facing rendering of an engine error — status/category only, never the
+/// upstream LLM response body.
+fn safe_engine_message(err: &crate::engine::EngineError) -> String {
+    use crate::engine::EngineError;
+    match err {
+        EngineError::Http { status, .. } => format!("upstream HTTP {status}"),
+        EngineError::Transport(_) => "upstream transport error".to_string(),
+        EngineError::Parse(_) => "upstream response parse error".to_string(),
+        EngineError::NotConfigured(what) => format!("engine not configured: {what}"),
+    }
+}
+
+fn truncate(s: &str, max: usize) -> String {
+    if s.chars().count() <= max {
+        s.to_string()
+    } else {
+        s.chars().take(max).collect()
+    }
+}
diff --git a/crates/agentkeys-gate/src/lib.rs b/crates/agentkeys-gate/src/lib.rs
new file mode 100644
index 00000000..531accf3
--- /dev/null
+++ b/crates/agentkeys-gate/src/lib.rs
@@ -0,0 +1,33 @@
+//! `agentkeys-gate` — the in-path CustomLLM gate (Plan A, `ai-device-platform.md`
+//! §3, pieces 2 + 5).
+//!
+//! An OpenAI-compatible `/v1/chat/completions` endpoint that Volcano RTC's
+//! CustomLLM mode posts to. Per turn it does, in order:
+//!
+//!   1. **cap-check + memory-inject** — mint a broker `memory-get` cap per
+//!      namespace (`service = memory:<ns>`) and read it via the memory worker,
+//!      then inject the selected lines as a leading `system` message;
+//!   2. **engine completion** — forward the (now memory-augmented) request to a
+//!      swappable [`engine::EngineAdapter`] (Plan A: an OpenAI-compatible upstream
+//!      = Volcano → Doubao; or the local `echo` engine for hardware bring-up);
+//!   3. **audit** — append a `GateTurn` row so every gated turn is on the ledger.
+//!
+//! It owns NO new wire shape and NO persistent state: identity · cap · memory ·
+//! audit all route through the shipped [`agentkeys_backend_client::BackendClient`]
+//! (the red line — none of those ever live inside the swappable engine).
+//!
+//! `A ⊂ B`: the [`engine::EngineAdapter`] seam is the only swappable part, so
+//! Plan B (self-host runtime, more model vendors) is "A + engines", not a rewrite.
+
+pub mod auth;
+pub mod backend;
+pub mod config;
+pub mod engine;
+pub mod error;
+pub mod gate;
+pub mod openai;
+pub mod server;
+
+pub use config::{Config, EngineKind};
+pub use error::{GateError, GateResult};
+pub use gate::Gate;
diff --git a/crates/agentkeys-gate/src/main.rs b/crates/agentkeys-gate/src/main.rs
new file mode 100644
index 00000000..6df52d4a
--- /dev/null
+++ b/crates/agentkeys-gate/src/main.rs
@@ -0,0 +1,87 @@
+//! Entry point — parse CLI, build the backend + engine, run the gate's HTTP
+//! server.
+
+use std::sync::Arc;
+
+use clap::Parser;
+
+use agentkeys_backend_client::BackendClient;
+use agentkeys_gate::{
+    backend::Backend,
+    config::{Cli, Config, EngineKind},
+    engine::{EchoEngine, EngineAdapter, OpenAiCompatibleEngine},
+    gate::Gate,
+    server,
+};
+
+#[tokio::main]
+async fn main() -> anyhow::Result<()> {
+    // rustls 0.23 needs a process-level CryptoProvider before any HTTPS work
+    // (the upstream LLM call via reqwest rustls-tls). Install `ring` explicitly.
+    let _ = rustls::crypto::ring::default_provider().install_default();
+
+    tracing_subscriber::fmt()
+        .with_env_filter(
+            tracing_subscriber::EnvFilter::try_from_default_env()
+                .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("info")),
+        )
+        .with_writer(std::io::stderr)
+        .init();
+
+    let cli = Cli::parse();
+    let config = Config::from_cli(cli)?;
+
+    if config.allow_anonymous && config.vendor_tokens.is_empty() {
+        tracing::warn!(
+            "AGENTKEYS_GATE_ALLOW_ANONYMOUS is set with no vendor tokens — the gate accepts \
+             UNAUTHENTICATED requests. Use only for local / echo-engine bring-up."
+        );
+    }
+
+    // The gate never re-types the broker/worker chain: the backend IS the shared
+    // BackendClient (#203). Load the K10 device key so every cap-mint is
+    // proof-of-possession-signed (#76); without it cap-mint fails at the broker.
+    let mut client = BackendClient::new(
+        config.broker_url.clone(),
+        config.memory_url.clone(),
+        config.audit_url.clone(),
+        None, // cred_url — the gate does not fetch credentials in the MVP
+        config.agent_session_bearer.clone(),
+        config.memory_role_arn.clone(),
+        None, // vault_role_arn — no cred path
+        config.aws_region.clone(),
+    );
+    match agentkeys_core::device_crypto::load_device_key_from_env() {
+        Some(dk) => client = client.with_device_key(Arc::new(dk)),
+        None => tracing::warn!(
+            "no K10 device key loaded — cap-mint will fail (set AGENTKEYS_DEVICE_KEY_FILE to the \
+             device's registered key; issue #76)"
+        ),
+    }
+    let backend: Arc<dyn Backend> = Arc::new(client);
+
+    let llm: Arc<dyn EngineAdapter> = match config.engine.clone() {
+        EngineKind::Echo => {
+            tracing::info!("engine: echo (local — no upstream LLM)");
+            Arc::new(EchoEngine)
+        }
+        EngineKind::OpenAiCompatible {
+            base_url,
+            api_key,
+            model,
+        } => {
+            tracing::info!(%base_url, model = ?model, "engine: openai-compatible upstream");
+            Arc::new(OpenAiCompatibleEngine::new(base_url, api_key, model))
+        }
+    };
+
+    let listen = config.listen;
+    let gate = Arc::new(Gate::new(config, backend, llm));
+    let app = server::router(gate);
+
+    let listener = tokio::net::TcpListener::bind(&listen).await?;
+    tracing::info!(addr = %listen, "agentkeys-gate listening (HTTP, OpenAI-compatible)");
+    axum::serve(listener, app).await?;
+
+    Ok(())
+}
diff --git a/crates/agentkeys-gate/src/openai.rs b/crates/agentkeys-gate/src/openai.rs
new file mode 100644
index 00000000..12d17377
--- /dev/null
+++ b/crates/agentkeys-gate/src/openai.rs
@@ -0,0 +1,218 @@
+//! OpenAI-compatible `/v1/chat/completions` wire types — the gate's PUBLIC
+//! contract.
+//!
+//! This is the shape Volcano RTC's **CustomLLM mode** posts to the gate, and
+//! (because the Plan-A engine is itself an OpenAI-compatible upstream) the same
+//! shape the gate forwards to Doubao. Modelling it once here keeps both
+//! directions in lock-step.
+//!
+//! Unknown fields are preserved via `#[serde(flatten)]` so vendor-specific
+//! params (`temperature`, `top_p`, `tools`, …) pass straight through to the
+//! upstream — the gate only ever *adds* a memory system message, never drops
+//! caller fields.
+
+use serde::{Deserialize, Serialize};
+use serde_json::{Map, Value};
+
+/// One chat message. `role` + `content` are modelled explicitly (the gate reads
+/// the role to find the user's intent and injects a `system` message); every
+/// other field (`name`, `tool_calls`, `tool_call_id`, …) rides through `extra`.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ChatMessage {
+    pub role: String,
+    /// OpenAI content is a string OR an array of typed parts (multimodal), so
+    /// it stays a `Value`. `null` for assistant tool-call stubs.
+    #[serde(default, skip_serializing_if = "Value::is_null")]
+    pub content: Value,
+    #[serde(flatten)]
+    pub extra: Map<String, Value>,
+}
+
+impl ChatMessage {
+    /// Build a plain `{role, content}` message.
+    pub fn new(role: &str, content: impl Into<String>) -> Self {
+        Self {
+            role: role.to_string(),
+            content: Value::String(content.into()),
+            extra: Map::new(),
+        }
+    }
+
+    /// Flatten `content` to text — handles both the string form and the
+    /// `[{type:"text", text:"…"}, …]` array form. Non-text parts are skipped.
+    pub fn text(&self) -> String {
+        match &self.content {
+            Value::String(s) => s.clone(),
+            Value::Array(parts) => parts
+                .iter()
+                .filter_map(|p| p.get("text").and_then(Value::as_str))
+                .collect::<Vec<_>>()
+                .join(" "),
+            _ => String::new(),
+        }
+    }
+}
+
+/// `/v1/chat/completions` request. `model` + `messages` are modelled; all other
+/// sampling params flow through `extra` to the upstream untouched.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ChatCompletionRequest {
+    pub model: String,
+    pub messages: Vec<ChatMessage>,
+    /// Streaming is NOT supported in the MVP gate (the in-path cap/audit work is
+    /// per-turn, not per-token). A `stream: true` request is served as a single
+    /// non-streamed completion; the field is preserved here only so it is not
+    /// forwarded as `true` to the upstream.
+    #[serde(default, skip_serializing_if = "is_false")]
+    pub stream: bool,
+    #[serde(flatten)]
+    pub extra: Map<String, Value>,
+}
+
+fn is_false(b: &bool) -> bool {
+    !*b
+}
+
+impl ChatCompletionRequest {
+    /// Prepend a `system` message carrying the injected memory context. OpenAI
+    /// servers honour a leading system block; placing it first keeps it ahead of
+    /// any caller-supplied system prompt without clobbering it.
+    pub fn prepend_system(&mut self, content: impl Into<String>) {
+        self.messages.insert(0, ChatMessage::new("system", content));
+    }
+
+    /// The most recent user message's text — the turn's "intent" for the audit
+    /// row. Empty when the caller sent no user message.
+    pub fn last_user_text(&self) -> String {
+        self.messages
+            .iter()
+            .rev()
+            .find(|m| m.role == "user")
+            .map(ChatMessage::text)
+            .unwrap_or_default()
+    }
+}
+
+/// One completion choice.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct Choice {
+    #[serde(default)]
+    pub index: u32,
+    pub message: ChatMessage,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub finish_reason: Option<String>,
+}
+
+/// Token accounting. Defaults to zero for engines (e.g. the echo engine) that
+/// don't report usage.
+#[derive(Debug, Clone, Default, Serialize, Deserialize)]
+pub struct Usage {
+    #[serde(default)]
+    pub prompt_tokens: u64,
+    #[serde(default)]
+    pub completion_tokens: u64,
+    #[serde(default)]
+    pub total_tokens: u64,
+}
+
+/// `/v1/chat/completions` response.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ChatCompletionResponse {
+    pub id: String,
+    #[serde(default = "default_object")]
+    pub object: String,
+    pub created: u64,
+    pub model: String,
+    pub choices: Vec<Choice>,
+    #[serde(default)]
+    pub usage: Usage,
+    #[serde(flatten)]
+    pub extra: Map<String, Value>,
+}
+
+fn default_object() -> String {
+    "chat.completion".to_string()
+}
+
+/// OpenAI-shaped error envelope returned to the caller on failure, so a vendor's
+/// OpenAI client parses gate errors the same way it parses upstream errors.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ApiError {
+    pub error: ApiErrorBody,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ApiErrorBody {
+    pub message: String,
+    #[serde(rename = "type")]
+    pub kind: String,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub code: Option<String>,
+}
+
+impl ApiError {
+    pub fn new(kind: &str, message: impl Into<String>) -> Self {
+        Self {
+            error: ApiErrorBody {
+                message: message.into(),
+                kind: kind.to_string(),
+                code: None,
+            },
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn request_roundtrips_and_preserves_unknown_params() {
+        let raw = r#"{
+            "model": "doubao-pro",
+            "messages": [
+                {"role": "system", "content": "be brief"},
+                {"role": "user", "content": "where did I go in Chengdu?"}
+            ],
+            "temperature": 0.4,
+            "top_p": 0.9
+        }"#;
+        let req: ChatCompletionRequest = serde_json::from_str(raw).unwrap();
+        assert_eq!(req.model, "doubao-pro");
+        assert_eq!(req.messages.len(), 2);
+        assert_eq!(req.last_user_text(), "where did I go in Chengdu?");
+        // Unknown sampling params survive a round-trip into `extra`.
+        let back = serde_json::to_value(&req).unwrap();
+        assert_eq!(back["temperature"], 0.4);
+        assert_eq!(back["top_p"], 0.9);
+        assert!(back.get("stream").is_none(), "stream omitted when false");
+    }
+
+    #[test]
+    fn prepend_system_goes_first() {
+        let mut req = ChatCompletionRequest {
+            model: "m".into(),
+            messages: vec![ChatMessage::new("user", "hi")],
+            stream: false,
+            extra: Map::new(),
+        };
+        req.prepend_system("MEMORY: peanuts allergy");
+        assert_eq!(req.messages[0].role, "system");
+        assert_eq!(req.messages[0].text(), "MEMORY: peanuts allergy");
+        assert_eq!(req.messages[1].role, "user");
+    }
+
+    #[test]
+    fn multimodal_content_text_extraction() {
+        let m = ChatMessage {
+            role: "user".into(),
+            content: serde_json::json!([
+                {"type": "text", "text": "hello"},
+                {"type": "image_url", "image_url": {"url": "x"}},
+                {"type": "text", "text": "world"}
+            ]),
+            extra: Map::new(),
+        };
+        assert_eq!(m.text(), "hello world");
+    }
+}
diff --git a/crates/agentkeys-gate/src/server.rs b/crates/agentkeys-gate/src/server.rs
new file mode 100644
index 00000000..7ddc7f86
--- /dev/null
+++ b/crates/agentkeys-gate/src/server.rs
@@ -0,0 +1,74 @@
+//! HTTP transport — the OpenAI-compatible surface Volcano RTC CustomLLM posts
+//! to. One route does real work (`POST /v1/chat/completions`); `GET /healthz` is
+//! for the load balancer.
+
+use std::sync::Arc;
+
+use axum::{
+    body::Bytes,
+    extract::State,
+    http::{HeaderMap, StatusCode},
+    response::{IntoResponse, Response},
+    routing::{get, post},
+    Json, Router,
+};
+
+use crate::auth::authenticate;
+use crate::error::GateError;
+use crate::gate::Gate;
+use crate::openai::ChatCompletionRequest;
+
+/// Build the router with the gate as shared state.
+pub fn router(gate: Arc<Gate>) -> Router {
+    Router::new()
+        .route("/healthz", get(healthz))
+        .route("/v1/chat/completions", post(chat_completions))
+        .with_state(gate)
+}
+
+async fn healthz() -> &'static str {
+    "ok"
+}
+
+fn header<'a>(headers: &'a HeaderMap, name: &str) -> Option<&'a str> {
+    headers.get(name).and_then(|v| v.to_str().ok())
+}
+
+/// Turn any gate error into an OpenAI-shaped error response at the mapped
+/// status.
+fn error_response(err: GateError) -> Response {
+    let status = StatusCode::from_u16(err.status()).unwrap_or(StatusCode::INTERNAL_SERVER_ERROR);
+    (status, Json(err.to_api_error())).into_response()
+}
+
+async fn chat_completions(
+    State(gate): State<Arc<Gate>>,
+    headers: HeaderMap,
+    body: Bytes,
+) -> Response {
+    let caller = match authenticate(
+        gate.config(),
+        header(&headers, "authorization"),
+        header(&headers, "x-agentkeys-actor"),
+    ) {
+        Ok(c) => c,
+        Err(e) => return error_response(e),
+    };
+
+    let req: ChatCompletionRequest = match serde_json::from_slice(&body) {
+        Ok(r) => r,
+        Err(e) => {
+            return error_response(GateError::BadRequest(format!(
+                "invalid chat completion request: {e}"
+            )))
+        }
+    };
+
+    match gate.handle_chat_completion(&caller, req).await {
+        Ok(resp) => (StatusCode::OK, Json(resp)).into_response(),
+        Err(e) => {
+            tracing::warn!(vendor = %caller.vendor_id, error = %e, "gate turn failed");
+            error_response(e)
+        }
+    }
+}
diff --git a/crates/agentkeys-gate/tests/common/mod.rs b/crates/agentkeys-gate/tests/common/mod.rs
new file mode 100644
index 00000000..d576d17e
--- /dev/null
+++ b/crates/agentkeys-gate/tests/common/mod.rs
@@ -0,0 +1,192 @@
+//! Offline test doubles for the gate's `Backend` + `EngineAdapter` seams. No
+//! network, deterministic — the gate's unit/integration tests run with these.
+//!
+//! Shared across test binaries, each of which uses a different subset of the
+//! helpers — so dead-code is expected per-binary.
+#![allow(dead_code)]
+
+use std::collections::{HashMap, HashSet};
+use std::sync::Mutex;
+
+use async_trait::async_trait;
+use base64::Engine as _;
+use serde_json::json;
+
+use agentkeys_gate::backend::{
+    AuditAppendInput, AuditAppendResult, Backend, BackendError, CapMintOp, CapMintRequest,
+    CapToken, MemoryGetInput, MemoryGetResult,
+};
+use agentkeys_gate::engine::{EngineAdapter, EngineError};
+use agentkeys_gate::openai::{
+    ChatCompletionRequest, ChatCompletionResponse, ChatMessage, Choice, Usage,
+};
+
+/// In-memory broker + memory + audit. Seed namespaces, deny some, or break the
+/// whole backend; records cap-mints + audit appends for assertions.
+#[derive(Default)]
+pub struct MockBackend {
+    seeded: HashMap<(String, String), String>,
+    denied: HashSet<String>,
+    broken: bool,
+    http_fail: Option<(u16, String)>,
+    pub cap_mints: Mutex<Vec<String>>,
+    pub audits: Mutex<Vec<AuditAppendInput>>,
+}
+
+impl MockBackend {
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Seed `content` for `(actor, namespace)`.
+    pub fn seed(mut self, actor: &str, namespace: &str, content: &str) -> Self {
+        self.seeded.insert(
+            (actor.to_string(), namespace.to_string()),
+            content.to_string(),
+        );
+        self
+    }
+
+    /// Make `cap_mint` for this namespace return HTTP 403 (deterministic deny).
+    pub fn deny(mut self, namespace: &str) -> Self {
+        self.denied.insert(namespace.to_string());
+        self
+    }
+
+    /// Make the whole backend unreachable (transport error on cap_mint).
+    pub fn broken(mut self) -> Self {
+        self.broken = true;
+        self
+    }
+
+    /// Make `cap_mint` return an HTTP error with this status + body (e.g. a 5xx
+    /// carrying internal detail) — to assert the body is NOT echoed to callers.
+    pub fn fail_http(mut self, status: u16, body: &str) -> Self {
+        self.http_fail = Some((status, body.to_string()));
+        self
+    }
+
+    pub fn cap_mint_count(&self) -> usize {
+        self.cap_mints.lock().unwrap().len()
+    }
+
+    pub fn audit_count(&self) -> usize {
+        self.audits.lock().unwrap().len()
+    }
+
+    pub fn last_audit(&self) -> Option<AuditAppendInput> {
+        self.audits.lock().unwrap().last().cloned()
+    }
+}
+
+#[async_trait]
+impl Backend for MockBackend {
+    async fn cap_mint(
+        &self,
+        _op: CapMintOp,
+        req: CapMintRequest,
+        _session_bearer: &str,
+    ) -> Result<CapToken, BackendError> {
+        if self.broken {
+            return Err(BackendError::Transport("mock broker unreachable".into()));
+        }
+        if let Some((status, body)) = &self.http_fail {
+            return Err(BackendError::Http {
+                status: *status,
+                body: body.clone(),
+            });
+        }
+        self.cap_mints.lock().unwrap().push(req.service.clone());
+        // service is `memory:<ns>`
+        let namespace = req.service.strip_prefix("memory:").unwrap_or(&req.service);
+        if self.denied.contains(namespace) {
+            return Err(BackendError::Http {
+                status: 403,
+                body: "out of scope".into(),
+            });
+        }
+        Ok(json!({ "payload": { "actor_omni": req.actor_omni, "service": req.service } }))
+    }
+
+    async fn memory_get(&self, input: MemoryGetInput) -> Result<MemoryGetResult, BackendError> {
+        let actor = input
+            .cap
+            .get("payload")
+            .and_then(|p| p.get("actor_omni"))
+            .and_then(|v| v.as_str())
+            .unwrap_or_default()
+            .to_string();
+        let content = self
+            .seeded
+            .get(&(actor, input.namespace.clone()))
+            .cloned()
+            .unwrap_or_default();
+        Ok(MemoryGetResult {
+            ok: true,
+            plaintext_b64: base64::engine::general_purpose::STANDARD.encode(content.as_bytes()),
+            namespace: input.namespace,
+        })
+    }
+
+    async fn audit_append(
+        &self,
+        input: AuditAppendInput,
+    ) -> Result<AuditAppendResult, BackendError> {
+        self.audits.lock().unwrap().push(input);
+        Ok(AuditAppendResult {
+            ok: true,
+            envelope_hash: "0xmockhash".into(),
+        })
+    }
+}
+
+/// Records the request it received (to assert what the gate forwarded) and
+/// returns a canned assistant reply.
+#[derive(Default)]
+pub struct RecordingEngine {
+    pub last_request: Mutex<Option<ChatCompletionRequest>>,
+}
+
+#[async_trait]
+impl EngineAdapter for RecordingEngine {
+    fn name(&self) -> &'static str {
+        "recording"
+    }
+
+    async fn complete(
+        &self,
+        req: ChatCompletionRequest,
+    ) -> Result<ChatCompletionResponse, EngineError> {
+        let model = req.model.clone();
+        *self.last_request.lock().unwrap() = Some(req);
+        Ok(ChatCompletionResponse {
+            id: "rec-0".into(),
+            object: "chat.completion".into(),
+            created: 0,
+            model,
+            choices: vec![Choice {
+                index: 0,
+                message: ChatMessage::new("assistant", "ok"),
+                finish_reason: Some("stop".into()),
+            }],
+            usage: Usage {
+                prompt_tokens: 7,
+                completion_tokens: 3,
+                total_tokens: 10,
+            },
+            extra: serde_json::Map::new(),
+        })
+    }
+}
+
+impl RecordingEngine {
+    /// The system message the gate injected (if any).
+    pub fn injected_system(&self) -> Option<String> {
+        self.last_request.lock().unwrap().as_ref().and_then(|r| {
+            r.messages
+                .iter()
+                .find(|m| m.role == "system")
+                .map(|m| m.text())
+        })
+    }
+}
diff --git a/crates/agentkeys-gate/tests/gate_flow.rs b/crates/agentkeys-gate/tests/gate_flow.rs
new file mode 100644
index 00000000..fdddad0b
--- /dev/null
+++ b/crates/agentkeys-gate/tests/gate_flow.rs
@@ -0,0 +1,255 @@
+//! The in-path gate, end-to-end, with offline doubles. Proves the four things
+//! Plan A §3 says the gate must do per turn: cap-check, memory-inject, engine
+//! call, audit — plus the deny + broken-backend behaviours.
+
+mod common;
+
+use std::sync::Arc;
+
+use agentkeys_core::audit::AuditOpKind;
+use agentkeys_gate::auth::CallerContext;
+use agentkeys_gate::backend::Backend;
+use agentkeys_gate::config::Config;
+use agentkeys_gate::engine::EngineAdapter;
+use agentkeys_gate::error::GateError;
+use agentkeys_gate::gate::Gate;
+use agentkeys_gate::openai::{ChatCompletionRequest, ChatMessage};
+
+use common::{MockBackend, RecordingEngine};
+
+const ACTOR: &str = "0xkevin";
+
+fn config(namespaces: &[&str]) -> Config {
+    let mut c = Config::for_tests();
+    c.default_actor = Some(ACTOR.to_string());
+    c.default_operator_omni = Some("0xmaster".to_string());
+    c.default_device_key_hash = Some("0xdevhash".to_string());
+    c.memory_namespaces = namespaces.iter().map(|s| s.to_string()).collect();
+    c
+}
+
+fn caller() -> CallerContext {
+    CallerContext {
+        vendor_id: "test".to_string(),
+        actor_omni: "*".to_string(),
+    }
+}
+
+fn turn(user: &str) -> ChatCompletionRequest {
+    ChatCompletionRequest {
+        model: "doubao-pro".to_string(),
+        messages: vec![ChatMessage::new("user", user)],
+        stream: false,
+        extra: serde_json::Map::new(),
+    }
+}
+
+/// Act 1 — permissioned memory is injected in-path and the turn is audited.
+#[tokio::test]
+async fn injects_memory_and_audits_the_turn() {
+    let backend = Arc::new(
+        MockBackend::new()
+            .seed(ACTOR, "personal", "Allergic to peanuts.")
+            .seed(ACTOR, "travel", "Chengdu trip Apr 12-16."),
+    );
+    let engine = Arc::new(RecordingEngine::default());
+    let gate = Gate::new(
+        config(&["personal", "travel"]),
+        backend.clone() as Arc<dyn Backend>,
+        engine.clone() as Arc<dyn EngineAdapter>,
+    );
+
+    let resp = gate
+        .handle_chat_completion(&caller(), turn("what do you know about me?"))
+        .await
+        .expect("turn succeeds");
+
+    // One cap minted per namespace; both injected.
+    assert_eq!(backend.cap_mint_count(), 2);
+    let injected = engine
+        .injected_system()
+        .expect("a system message was injected");
+    assert!(injected.contains("Allergic to peanuts."), "{injected}");
+    assert!(injected.contains("Chengdu trip"), "{injected}");
+    assert!(
+        injected.contains("memory:personal"),
+        "namespace header: {injected}"
+    );
+
+    // Exactly one GateTurn audit row, recording both namespaces as injected.
+    assert_eq!(backend.audit_count(), 1);
+    let audit = backend.last_audit().unwrap();
+    assert_eq!(audit.op_kind, AuditOpKind::GateTurn as u8);
+    assert_eq!(audit.actor_omni, ACTOR); // normalized 0x form preserved
+    assert_eq!(
+        audit.intent_text.as_deref(),
+        Some("what do you know about me?")
+    );
+    let outcomes = audit.op_body["namespaces"].as_array().unwrap();
+    assert_eq!(outcomes.len(), 2);
+    assert!(outcomes.iter().all(|o| o["outcome"] == "injected"));
+    assert_eq!(audit.op_body["engine"], "recording");
+
+    // The response is OpenAI-shaped.
+    assert_eq!(resp.choices.len(), 1);
+    assert_eq!(resp.model, "doubao-pro");
+}
+
+/// Act 2 — a namespace the actor has no scope for is DENIED (non-fatal): the
+/// turn proceeds with the allowed namespace, and the denial is on the audit row.
+#[tokio::test]
+async fn denied_namespace_is_recorded_not_fatal() {
+    let backend = Arc::new(
+        MockBackend::new()
+            .seed(ACTOR, "personal", "Allergic to peanuts.")
+            .deny("travel"),
+    );
+    let engine = Arc::new(RecordingEngine::default());
+    let gate = Gate::new(
+        config(&["personal", "travel"]),
+        backend.clone() as Arc<dyn Backend>,
+        engine.clone() as Arc<dyn EngineAdapter>,
+    );
+
+    gate.handle_chat_completion(&caller(), turn("hi"))
+        .await
+        .expect("turn proceeds despite one denied namespace");
+
+    let injected = engine.injected_system().unwrap();
+    assert!(injected.contains("Allergic to peanuts."));
+    assert!(!injected.contains("memory:travel"));
+
+    let audit = backend.last_audit().unwrap();
+    let outcomes = audit.op_body["namespaces"].as_array().unwrap();
+    let travel = outcomes
+        .iter()
+        .find(|o| o["namespace"] == "travel")
+        .unwrap();
+    assert_eq!(travel["outcome"], "denied");
+}
+
+/// Act 3 — a broken backend is surfaced LOUD, never hidden behind an empty-memory
+/// fallback (the "no silent fallback" rule).
+#[tokio::test]
+async fn broken_backend_fails_the_turn() {
+    let backend = Arc::new(MockBackend::new().broken());
+    let engine = Arc::new(RecordingEngine::default());
+    let gate = Gate::new(
+        config(&["personal"]),
+        backend.clone() as Arc<dyn Backend>,
+        engine.clone() as Arc<dyn EngineAdapter>,
+    );
+
+    let err = gate
+        .handle_chat_completion(&caller(), turn("hi"))
+        .await
+        .expect_err("broken broker must fail the turn");
+    assert!(matches!(err, GateError::Backend(_)), "got {err:?}");
+    // The engine was never reached.
+    assert!(engine.last_request.lock().unwrap().is_none());
+}
+
+/// Act 4 — a pure-chat turn with no namespaces configured still lands on the
+/// audit ledger (the gap the worker-side memory audit can't cover).
+#[tokio::test]
+async fn pure_chat_turn_is_still_audited() {
+    let backend = Arc::new(MockBackend::new());
+    let engine = Arc::new(RecordingEngine::default());
+    let gate = Gate::new(
+        config(&[]),
+        backend.clone() as Arc<dyn Backend>,
+        engine.clone() as Arc<dyn EngineAdapter>,
+    );
+
+    gate.handle_chat_completion(&caller(), turn("hello"))
+        .await
+        .expect("turn succeeds");
+
+    assert_eq!(
+        backend.cap_mint_count(),
+        0,
+        "no namespaces → no caps minted"
+    );
+    assert_eq!(backend.audit_count(), 1, "turn is still audited");
+    assert!(
+        engine.injected_system().is_none(),
+        "no memory system message"
+    );
+}
+
+/// A broker/worker 5xx body is NEVER echoed across the trust boundary to the
+/// vendor caller — only the safe status category surfaces.
+#[tokio::test]
+async fn backend_5xx_body_is_not_echoed_to_caller() {
+    let secret = "INTERNAL stacktrace user=root token=SHHH-do-not-leak";
+    let backend = Arc::new(MockBackend::new().fail_http(500, secret));
+    let engine = Arc::new(RecordingEngine::default());
+    let gate = Gate::new(
+        config(&["personal"]),
+        backend.clone() as Arc<dyn Backend>,
+        engine.clone() as Arc<dyn EngineAdapter>,
+    );
+
+    let err = gate
+        .handle_chat_completion(&caller(), turn("hi"))
+        .await
+        .expect_err("backend 5xx fails the turn");
+    let msg = err.to_string();
+    assert!(matches!(err, GateError::Backend(_)), "got {err:?}");
+    assert!(msg.contains("HTTP 500"), "status category surfaced: {msg}");
+    assert!(!msg.contains("SHHH"), "upstream body must NOT leak: {msg}");
+    assert!(
+        !msg.contains("stacktrace"),
+        "upstream body must NOT leak: {msg}"
+    );
+}
+
+/// The gate refuses to forward tool-calling fields to the upstream (they would
+/// bypass AgentKeys enforcement) — it is NOT a transparent proxy for them.
+#[tokio::test]
+async fn tool_calling_fields_are_rejected() {
+    let backend = Arc::new(MockBackend::new());
+    let engine = Arc::new(RecordingEngine::default());
+    let gate = Gate::new(
+        config(&[]),
+        backend.clone() as Arc<dyn Backend>,
+        engine.clone() as Arc<dyn EngineAdapter>,
+    );
+
+    let mut req = turn("call a tool");
+    req.extra.insert(
+        "tools".to_string(),
+        serde_json::json!([{"type": "function", "function": {"name": "turn_on_light"}}]),
+    );
+    let err = gate
+        .handle_chat_completion(&caller(), req)
+        .await
+        .expect_err("tools must be rejected");
+    assert!(matches!(err, GateError::BadRequest(_)), "got {err:?}");
+    // The engine was never reached — nothing forwarded upstream.
+    assert!(engine.last_request.lock().unwrap().is_none());
+}
+
+/// A per-request actor header overrides the configured default actor.
+#[tokio::test]
+async fn actor_header_overrides_default() {
+    let backend = Arc::new(MockBackend::new().seed("0xother", "personal", "other's data"));
+    let engine = Arc::new(RecordingEngine::default());
+    let gate = Gate::new(
+        config(&["personal"]),
+        backend.clone() as Arc<dyn Backend>,
+        engine.clone() as Arc<dyn EngineAdapter>,
+    );
+
+    let caller = CallerContext {
+        vendor_id: "test".to_string(),
+        actor_omni: "0xother".to_string(),
+    };
+    gate.handle_chat_completion(&caller, turn("hi"))
+        .await
+        .unwrap();
+
+    let injected = engine.injected_system().unwrap();
+    assert!(injected.contains("other's data"));
+    assert_eq!(backend.last_audit().unwrap().actor_omni, "0xother");
+}
diff --git a/crates/agentkeys-gate/tests/http.rs b/crates/agentkeys-gate/tests/http.rs
new file mode 100644
index 00000000..70872f9f
--- /dev/null
+++ b/crates/agentkeys-gate/tests/http.rs
@@ -0,0 +1,121 @@
+//! HTTP transport — auth gating, health, and a full anonymous echo turn driven
+//! through the real axum router.
+
+mod common;
+
+use std::sync::Arc;
+
+use axum::body::Body;
+use axum::http::Request;
+use http_body_util::BodyExt;
+use tower::ServiceExt;
+
+use agentkeys_gate::backend::Backend;
+use agentkeys_gate::config::Config;
+use agentkeys_gate::engine::{EchoEngine, EngineAdapter};
+use agentkeys_gate::gate::Gate;
+use agentkeys_gate::server::router;
+
+use common::MockBackend;
+
+fn echo_gate(config: Config) -> Arc<Gate> {
+    Arc::new(Gate::new(
+        config,
+        Arc::new(MockBackend::new()) as Arc<dyn Backend>,
+        Arc::new(EchoEngine) as Arc<dyn EngineAdapter>,
+    ))
+}
+
+fn chat_body() -> Body {
+    Body::from(
+        serde_json::to_vec(&serde_json::json!({
+            "model": "doubao-pro",
+            "messages": [{"role": "user", "content": "hello"}]
+        }))
+        .unwrap(),
+    )
+}
+
+#[tokio::test]
+async fn healthz_ok() {
+    let app = router(echo_gate(Config::for_tests()));
+    let resp = app
+        .oneshot(
+            Request::builder()
+                .uri("/healthz")
+                .body(Body::empty())
+                .unwrap(),
+        )
+        .await
+        .unwrap();
+    assert_eq!(resp.status(), 200);
+}
+
+#[tokio::test]
+async fn no_token_and_not_anonymous_is_401() {
+    let app = router(echo_gate(Config::for_tests()));
+    let resp = app
+        .oneshot(
+            Request::builder()
+                .method("POST")
+                .uri("/v1/chat/completions")
+                .header("content-type", "application/json")
+                .body(chat_body())
+                .unwrap(),
+        )
+        .await
+        .unwrap();
+    assert_eq!(resp.status(), 401);
+    let body = resp.into_body().collect().await.unwrap().to_bytes();
+    let json: serde_json::Value = serde_json::from_slice(&body).unwrap();
+    // OpenAI-shaped error envelope.
+    assert_eq!(json["error"]["type"], "authentication_error");
+}
+
+#[tokio::test]
+async fn wrong_vendor_token_is_401() {
+    let config = Config::for_tests().with_vendor_token("folotoy", "secret");
+    let app = router(echo_gate(config));
+    let resp = app
+        .oneshot(
+            Request::builder()
+                .method("POST")
+                .uri("/v1/chat/completions")
+                .header("content-type", "application/json")
+                .header("authorization", "Bearer wrong")
+                .body(chat_body())
+                .unwrap(),
+        )
+        .await
+        .unwrap();
+    assert_eq!(resp.status(), 401);
+}
+
+#[tokio::test]
+async fn anonymous_echo_turn_returns_openai_response() {
+    let mut config = Config::for_tests();
+    config.allow_anonymous = true;
+    config.default_actor = Some("0xkevin".into());
+    config.default_operator_omni = Some("0xmaster".into());
+    config.default_device_key_hash = Some("0xdev".into());
+    let app = router(echo_gate(config));
+
+    let resp = app
+        .oneshot(
+            Request::builder()
+                .method("POST")
+                .uri("/v1/chat/completions")
+                .header("content-type", "application/json")
+                .body(chat_body())
+                .unwrap(),
+        )
+        .await
+        .unwrap();
+
+    assert_eq!(resp.status(), 200);
+    let body = resp.into_body().collect().await.unwrap().to_bytes();
+    let json: serde_json::Value = serde_json::from_slice(&body).unwrap();
+    assert_eq!(json["object"], "chat.completion");
+    let reply = json["choices"][0]["message"]["content"].as_str().unwrap();
+    assert!(reply.contains("hello"), "echo reflects the user: {reply}");
+}
diff --git a/docs/arch.md b/docs/arch.md
index 1e641e5f..fd76db3f 100644
--- a/docs/arch.md
+++ b/docs/arch.md
@@ -1153,8 +1153,11 @@ and never reordered**. Grouped by 10s leaves room for related ops.
 | `ConfigPut` | 80 | `{key: string, payload_hash: [u8;32]}` | config-service (#201, #229) |
 | `ConfigGet` | 81 | `{key: string, cap_hash: [u8;32]}` | config-service (#201, #229) |
 | `ConfigTeardown` | 82 | `{actor_target: [u8;32]}` | config-service (#201, #229) |
+| `GateTurn` | 90 | `{engine: string, model: string, namespaces: [{namespace: string, outcome: string}], injected_bytes: u64, prompt_tokens: u64, completion_tokens: u64}` | `agentkeys-gate` (Plan A in-path CustomLLM gate; [`ai-device-platform.md`](plan/ai-device-platform.md) §3) |
 
-Byte ranges `3-9`, `13-19`, `22-29`, `32-39`, `42-49`, `53-59`, `62-69`, `71-79`, `83-89`, `90-255` are reserved for future extensions in the same family (config claimed `80-89` per #229).
+Byte ranges `3-9`, `13-19`, `22-29`, `32-39`, `42-49`, `53-59`, `62-69`, `71-79`, `83-89`, `91-99`, `100-255` are reserved for future extensions in the same family (config claimed `80-89` per #229; the gate claimed `90-99`).
+
+The `GateTurn` row records the **turn-level** control-plane fact (which engine answered, what memory was injected, the per-namespace authorization outcome). The memory READS themselves are still audited data-plane-side as `MemoryGet` (#229) by the memory worker — so a turn that injects memory produces both rows; a pure-chat turn produces only the `GateTurn` row (closing the gap where a no-memory turn would otherwise be invisible on the ledger).
 
 **Data-plane emit sites are LIVE (#229).** The cred / memory / config workers
 emit one envelope per store / fetch / teardown — after cap-verify, before the
@@ -2378,6 +2381,10 @@ agentkeys/                                  # repo root
 │   │                                       #   process exposing AgentKeys tools to
 │   │                                       #   LLM hosts over stdio / HTTP / xiaozhi
 │   │                                       #   mcp-endpoint WS relay (issue #107)
+│   ├── agentkeys-gate/                      # in-path CustomLLM gate binary — OpenAI-
+│   │                                       #   compatible /v1/chat/completions; per turn:
+│   │                                       #   cap-check + memory-inject + audit (GateTurn),
+│   │                                       #   swappable EngineAdapter (Plan A §3, A ⊂ B)
 │   ├── agentkeys-provisioner/              # Rust orchestrator that spawns TS scrapers
 │   ├── agentkeys-protocol/                 # The wire TYPES half of the #203 one-owner
 │   │                                       #   split (#215 re-land): pure serde, wasm-safe
@@ -2418,6 +2425,7 @@ agentkeys/                                  # repo root
 | `agentkeys-daemon` | Sidecar daemon (master / agent role per init); localhost proxy |
 | `agentkeys-mcp` | Legacy in-process MCP adapter library — used by `agentkeys-daemon`'s sidecar stdio loop (M0). |
 | `agentkeys-mcp-server` | Standalone Rust MCP server binary (issue #107). Three transports: stdio (Claude Desktop / Claude Code / Codex / Cursor / Cline / Roo / Windsurf / Gemini CLI), HTTP (broker-direct), xiaozhi `mcp-endpoint` WS relay. **One backend: `http`** (real broker + memory + audit workers — the production backend IS the shared `agentkeys-backend-client::BackendClient`). The `in-memory` fixture backend was **removed in #207 (real-data-only)**; transport/protocol conformance is now proven by the Rust `tests/transport_conformance.rs` (subprocess MCP client over HTTP + stdio against the real backend). Installed via `cargo install --git https://github.com/litentry/agentKeys agentkeys-mcp-server`. |
+| `agentkeys-gate` | Standalone Rust **in-path CustomLLM gate** binary (Plan A, [`ai-device-platform.md`](plan/ai-device-platform.md) §3). OpenAI-compatible `POST /v1/chat/completions` that Volcano RTC's CustomLLM mode posts to; per turn it does cap-check + memory-inject (`CapMintOp::MemoryGet` per `memory:<ns>`) → swappable [`EngineAdapter`](../crates/agentkeys-gate/src/engine.rs) (Plan-A impl = an OpenAI-compatible upstream → Doubao; `echo` engine for hardware bring-up) → `GateTurn` (90) audit. Owns no new wire shape — reuses the shared `agentkeys-backend-client::BackendClient`. The engine seam is the only swappable part (A ⊂ B). |
 | `agentkeys-provisioner` | Spawns TS scraper, encrypts obtained creds, submits via cap-store |
 | `agentkeys-chain` | Solidity contracts + Rust ABI bindings |
 
diff --git a/docs/operator-runbook-gate.md b/docs/operator-runbook-gate.md
new file mode 100644
index 00000000..32eb1ee3
--- /dev/null
+++ b/docs/operator-runbook-gate.md
@@ -0,0 +1,233 @@
+# Operator runbook — testing the in-path CustomLLM gate (`agentkeys-gate`)
+
+How to run and test the Plan-A gate ([`docs/plan/ai-device-platform.md`](plan/ai-device-platform.md) §3; crate [`crates/agentkeys-gate`](../crates/agentkeys-gate/)). The gate is an OpenAI-compatible `POST /v1/chat/completions` endpoint that, per LLM turn, does **cap-check → memory-inject → engine completion → audit**.
+
+There are three test levels, each adding one real dependency:
+
+| Part | Engine | Backend (broker/memory/audit) | What it proves | Needs |
+|---|---|---|---|---|
+| **1. Echo smoke test** | `echo` (local) | none | the gate runs, the OpenAI round-trip works, the gate is in-path | nothing — just `cargo` |
+| **2. Real backend** | `echo` | live broker + memory + audit workers | real per-namespace cap-check + memory-inject + a `GateTurn` audit row | a registered device actor + its K10 key + agent-session JWT |
+| **3. Doubao** | `openai-compatible` | live backend | the full Plan-A path (Volcano RTC CustomLLM → gate → Doubao) | a Volcano Ark / Doubao API key |
+
+> **Status of this runbook:** Part 1 is **verified** (the outputs below are real). Parts 2–3 require live credentials / a deployed broker and document the commands + the verification you should see.
+
+---
+
+## Part 1 — local echo smoke test (no dependencies)
+
+The `echo` engine answers locally (no upstream LLM), and with **no memory namespaces** the gate makes **no broker calls** — so this runs with zero external services.
+
+### 1.1 Build
+
+```bash
+cargo build -p agentkeys-gate
+```
+
+### 1.2 Run
+
+```bash
+RUST_LOG=info ./target/debug/agentkeys-gate \
+  --allow-anonymous --engine echo \
+  --default-actor 0xkevin \
+  --default-operator-omni 0xmaster \
+  --default-device-key-hash 0xdev \
+  --memory-namespaces "" \
+  --listen 127.0.0.1:8077
+```
+
+> **The `--memory-namespaces ""` is required for the no-broker test.** With the default namespaces (`personal,family,work,travel`) the gate tries to mint a memory-read cap against the broker; with no `--broker-url` that fails **loud** (a `backend not configured` 502 — by design, see Part 2). Empty namespaces skip memory entirely.
+
+You should see, on stderr:
+
+```
+WARN agentkeys_gate: AGENTKEYS_GATE_ALLOW_ANONYMOUS is set with no vendor tokens — the gate accepts UNAUTHENTICATED requests. Use only for local / echo-engine bring-up.
+WARN agentkeys_gate: no K10 device key loaded — cap-mint will fail (set AGENTKEYS_DEVICE_KEY_FILE ...)
+INFO agentkeys_gate: engine: echo (local — no upstream LLM)
+INFO agentkeys_gate: agentkeys-gate listening (HTTP, OpenAI-compatible) addr=127.0.0.1:8077
+```
+
+Both WARNs are expected for the local echo test (no auth, no device key).
+
+### 1.3 Test (in another terminal)
+
+**Health:**
+```bash
+curl -s http://127.0.0.1:8077/healthz
+```
+→ `ok`
+
+**A plain turn** (proves the OpenAI round-trip + the gate is in-path):
+```bash
+curl -s -X POST http://127.0.0.1:8077/v1/chat/completions \
+  -H 'content-type: application/json' \
+  --data '{"model":"doubao-pro","messages":[{"role":"user","content":"hello, who am I?"}]}'
+```
+→
+```json
+{"id":"echo-0","object":"chat.completion","created":1781581314,"model":"doubao-pro",
+ "choices":[{"index":0,"message":{"role":"assistant","content":"[echo] You said: hello, who am I?"},
+ "finish_reason":"stop"}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}
+```
+
+**A turn with a system message** — the echo engine reflects any `system` block back, which is exactly how **injected memory** will surface once a real backend feeds it (in Part 2 the gate produces that `system` block from your stored memory instead of you supplying it):
+```bash
+curl -s -X POST http://127.0.0.1:8077/v1/chat/completions \
+  -H 'content-type: application/json' \
+  --data '{"model":"doubao-pro","messages":[{"role":"system","content":"Allergic to peanuts."},{"role":"user","content":"what should I avoid?"}]}'
+```
+→ assistant content:
+```
+[echo] You said: what should I avoid?
+[echo] Using your memory:
+Allergic to peanuts.
+```
+
+**A malformed request** (empty messages) → OpenAI-shaped 400:
+```bash
+curl -s -o /dev/null -w "%{http_code}\n" -X POST http://127.0.0.1:8077/v1/chat/completions \
+  -H 'content-type: application/json' --data '{"model":"m","messages":[]}'
+```
+→ `400`
+
+**Auth** — restart without `--allow-anonymous` and with a vendor token to see the gate reject unauthenticated callers:
+```bash
+./target/debug/agentkeys-gate --engine echo --memory-namespaces "" \
+  --vendor-tokens 'folotoy:secret-abc' --default-actor 0xkevin \
+  --default-operator-omni 0xmaster --default-device-key-hash 0xdev --listen 127.0.0.1:8077 &
+
+curl -s -o /dev/null -w "no-token: %{http_code}\n" -X POST http://127.0.0.1:8077/v1/chat/completions \
+  -H 'content-type: application/json' --data '{"model":"m","messages":[{"role":"user","content":"hi"}]}'
+# -> no-token: 401
+
+curl -s -o /dev/null -w "good-token: %{http_code}\n" -X POST http://127.0.0.1:8077/v1/chat/completions \
+  -H 'content-type: application/json' -H 'authorization: Bearer secret-abc' \
+  --data '{"model":"m","messages":[{"role":"user","content":"hi"}]}'
+# -> good-token: 200
+```
+
+---
+
+## Part 2 — against a real backend (memory-inject + audit)
+
+This exercises the **actual** product: the gate mints a `memory-get` cap per namespace, reads your stored memory through the memory worker, injects it, and writes a `GateTurn` audit row.
+
+### 2.1 Prerequisites (the device actor must already exist on the backend)
+
+The gate impersonates one **device actor** against the broker. You need:
+
+- a running **broker + memory worker + audit worker** (the prod set, or your `--ci` stack) — their base URLs;
+- a **device actor omni** (`O_agent_X`) that is registered + scoped for memory on-chain, its **master/operator omni**, and its **device-key hash**;
+- the device's **K10 device key file** (`AGENTKEYS_DEVICE_KEY_FILE`) so every cap-mint is proof-of-possession-signed (#76);
+- the device's **agent-session JWT** (omni == the device actor) for the cap-mint + per-actor STS relay;
+- the **memory IAM role ARN** for the per-actor S3 relay.
+
+> These are the same inputs the in-sandbox daemon / MCP server use. If you've run the harness or paired a device, you already have them. Seeding memory for the actor: use `agentkeys` / the parent-control plant flow, or the MCP `memory.put` tool, before testing reads here.
+
+### 2.2 Run
+
+```bash
+RUST_LOG=info ./target/debug/agentkeys-gate \
+  --vendor-tokens 'folotoy:<vendor-bearer>' \
+  --broker-url   https://broker.litentry.org \
+  --memory-url   https://cred.litentry.org \
+  --audit-url    https://audit.litentry.org \
+  --memory-role-arn 'arn:aws:iam::<acct>:role/<memory-role>' \
+  --default-actor          0x<device_actor_omni> \
+  --default-operator-omni  0x<master_omni> \
+  --default-device-key-hash 0x<k10_hash> \
+  --agent-session-bearer-file /etc/agentkeys/<device>-session.jwt \
+  --memory-namespaces 'personal,travel' \
+  --engine echo \
+  --require-audit \
+  --listen 127.0.0.1:8077
+```
+
+`AGENTKEYS_DEVICE_KEY_FILE=~/.agentkeys/agent-device.key` must point at the device's registered K10 key (or the same env var the daemon uses). Every flag also has an env var (see [Config reference](#config-reference)); `--require-audit` makes a failed audit append **fail the turn** (recommended once the audit worker is healthy).
+
+### 2.3 Test + verify
+
+```bash
+curl -s -X POST http://127.0.0.1:8077/v1/chat/completions \
+  -H 'authorization: Bearer <vendor-bearer>' \
+  -H 'content-type: application/json' \
+  --data '{"model":"doubao-pro","messages":[{"role":"user","content":"what do you know about me?"}]}'
+```
+
+Expected, end-to-end:
+1. The assistant reply (echo) includes a `[echo] Using your memory:` block containing the lines stored under `memory:personal` / `memory:travel` for the actor — **proving the in-path cap-check + memory-inject worked**.
+2. The gate stderr logs `gate-turn audited envelope_hash=0x…`.
+3. **Two audit rows land** for the turn: the worker-side `MemoryGet` (op_kind 11) row(s) for the reads (#229), and the gate's `GateTurn` (op_kind 90) row for the turn. Confirm via the audit feed (`/v1/audit/recent` on the daemon, or the parent-control audit view); the `GateTurn` row's `op_body` carries `{engine, model, namespaces:[{namespace,outcome}], injected_bytes, prompt_tokens, completion_tokens}`.
+
+**Negative checks (the gate's enforcement):**
+- Point `--default-actor` at an actor with **no memory scope** → the per-namespace outcome is `denied` (HTTP 403 from the broker), the turn still answers (degraded), and the denial is recorded in the `GateTurn` row. Memory it has no cap for is **never** read.
+- Stop the memory worker (or give a bad `--broker-url`) → the turn fails **loud** with a 502 `backend HTTP …` (the "no silent fallback" rule); the gate does **not** pretend memory was empty.
+
+---
+
+## Part 3 — Doubao via the OpenAI-compatible engine
+
+Swap the `echo` engine for the real upstream. This is the production Plan-A path: Volcano RTC CustomLLM posts to the gate, the gate injects memory + audits, then forwards to Doubao.
+
+```bash
+RUST_LOG=info ./target/debug/agentkeys-gate \
+  ... (all the Part-2 backend + identity flags) ... \
+  --engine openai-compatible \
+  --engine-base-url 'https://ark.cn-beijing.volces.com/api/v3' \
+  --engine-api-key-file /etc/agentkeys/doubao.key \
+  --engine-model 'doubao-pro-32k' \
+  --listen 127.0.0.1:8077
+```
+
+Same `curl` as Part 2.3; the reply is now a **real Doubao completion** that has the user's memory in its context, and `usage` carries real token counts. In Volcano's agent console, set the agent's **CustomLLM endpoint** to this gate's URL (behind TLS) instead of calling Doubao directly — that is the in-path insertion point.
+
+---
+
+## Verifying the audit trail (the gate's top feature)
+
+Every gated turn is on the ledger. After any Part-2/3 turn:
+
+- **`GateTurn` (op_kind 90)** — one per turn, even a pure-chat turn with no memory (the gap the gate closes). Carries the engine, model, per-namespace outcomes, injected bytes, token usage.
+- **`MemoryGet` (op_kind 11)** — emitted worker-side (#229) for each namespace actually read.
+
+Decode a row with the daemon's `/v1/audit/<id>/decode` or the parent-control audit view; the `GateTurn` label is `gate.turn`.
+
+---
+
+## Troubleshooting
+
+| Symptom | Cause | Fix |
+|---|---|---|
+| Every turn 502s with `backend not configured: broker_url` (or `audit_url`) | A namespace is configured but no `--broker-url` (or audit append ran with no `--audit-url`) | For a no-backend smoke test set `--memory-namespaces ""`. Otherwise supply the broker/memory/audit URLs. |
+| Turn 502s with `backend HTTP 401`/`403` on cap-mint | The agent-session JWT is missing/expired, or the actor isn't scoped for that memory namespace on-chain | Refresh the session JWT; grant the namespace scope (parent-control / `agentkeys scope`). A 403 on ONE namespace is non-fatal (`denied`); a 401 is fatal. |
+| `WARN no K10 device key loaded — cap-mint will fail` | `AGENTKEYS_DEVICE_KEY_FILE` not set | Point it at the device's registered K10 key file. Harmless for the Part-1 echo test (no cap-mint). |
+| Reply has no `Using your memory` block | The namespace is empty for that actor, or all namespaces returned `denied`/`empty` | Seed memory for the actor (plant / `memory.put`), confirm scope; check the `GateTurn` row's `namespaces[].outcome`. |
+| `401` on every request | Vendor tokens are configured but the caller sent the wrong/no `Authorization: Bearer` | Send the right vendor bearer, or use `--allow-anonymous` for local bring-up only. |
+| `engine not configured: engine api key` | `--engine openai-compatible` with no `--engine-api-key[-file]` | Supply the Doubao/Volcano key. |
+
+---
+
+## Config reference
+
+Every flag has an env var (read **once** at startup; nothing is hardcoded):
+
+| Flag | Env | Default | Notes |
+|---|---|---|---|
+| `--listen` | `AGENTKEYS_GATE_LISTEN` | `0.0.0.0:8077` | |
+| `--broker-url` / `--memory-url` / `--audit-url` | `AGENTKEYS_BROKER_URL` / `AGENTKEYS_MEMORY_URL` / `AGENTKEYS_AUDIT_URL` | — | the shipped backend chain |
+| `--vendor-tokens` | `AGENTKEYS_GATE_VENDOR_TOKENS` | empty | `vendor:token,vendor2:token2`; empty ⇒ 401 unless `--allow-anonymous` |
+| `--allow-anonymous` | `AGENTKEYS_GATE_ALLOW_ANONYMOUS` | `false` | local / echo bring-up only |
+| `--default-actor` | `AGENTKEYS_GATE_DEFAULT_ACTOR` | — | overridden per-request by the `X-AgentKeys-Actor` header |
+| `--default-operator-omni` | `AGENTKEYS_GATE_DEFAULT_OPERATOR_OMNI` | — | master omni for cap binding |
+| `--default-device-key-hash` | `AGENTKEYS_GATE_DEFAULT_DEVICE_KEY_HASH` | — | |
+| `--agent-session-bearer[-file]` | `AGENTKEYS_GATE_AGENT_SESSION_BEARER[_FILE]` | — | device-actor JWT for cap-mint + STS |
+| `--memory-role-arn` | `AGENTKEYS_GATE_MEMORY_ROLE_ARN` | — | per-actor S3 relay role |
+| `--memory-namespaces` | `AGENTKEYS_GATE_MEMORY_NAMESPACES` | `personal,family,work,travel` | `""` = no memory (no broker calls) |
+| `--memory-engine` | `AGENTKEYS_MEMORY_ENGINE` | `passthrough` | `passthrough` or `lexical` (selection; budget via `AGENTKEYS_MEMORY_MAX_LINES`/`_BYTES`) |
+| `--cap-ttl-seconds` | `AGENTKEYS_GATE_CAP_TTL_SECONDS` | `300` | memory-read cap TTL |
+| `--require-audit` | `AGENTKEYS_GATE_REQUIRE_AUDIT` | `false` | deny the turn if the `GateTurn` audit append fails |
+| `--engine` | `AGENTKEYS_GATE_ENGINE` | `echo` | `echo` or `openai-compatible` |
+| `--engine-base-url` / `--engine-api-key[-file]` / `--engine-model` | `AGENTKEYS_GATE_ENGINE_*` | — | required for `openai-compatible` |
+| (device key) | `AGENTKEYS_DEVICE_KEY_FILE` | — | the K10 key for cap-PoP (#76) |
+
+See also: the crate [README](../crates/agentkeys-gate/README.md) and the [implementation log](../crates/agentkeys-gate/IMPLEMENTATION_LOG.md).
diff --git a/docs/plan/ai-device-platform.md b/docs/plan/ai-device-platform.md
index e01c44f7..acc25464 100644
--- a/docs/plan/ai-device-platform.md
+++ b/docs/plan/ai-device-platform.md
@@ -42,6 +42,13 @@ Compliance (China kids'-data, minors' mode, content moderation) is the **vendor'
 
 **Effort:** small — roughly the #112 MCP-server effort (the CustomLLM gate) + the parent-control wiring + the device firmware spike. Volcano carries all the heavy infra (voice, runtime, multi-tenancy).
 
+**Status (2026-06-16):**
+- ✅ **Pieces 2 + 5 landed** — the [`agentkeys-gate`](../../crates/agentkeys-gate/) crate: an OpenAI-compatible in-path `/v1/chat/completions` gate that per turn does cap-check + memory-inject + audit (new `GateTurn` op_kind 90, arch.md §15.3a) behind a swappable `EngineAdapter` (the A ⊂ B seam). Plan-A engine = an OpenAI-compatible upstream (Volcano→Doubao); an `echo` engine runs the gate on real hardware **before** Doubao creds exist (directly serving the "prove the gate in-path on real hardware" goal). Reuses piece 3 (the shipped `BackendClient` / memory-worker / cap-mint / audit) verbatim — owns no new wire shape.
+- ⏳ **Piece 1** (Tuya T5 firmware → Volcano RTC) — hardware spike, not started.
+- ⏳ **Piece 4** (parent-control management view wired to the gate's real backend) — follow-up.
+- ⏳ **Deploy wiring** — the gate is not yet a `setup-broker-host.sh` surface (new systemd unit + nginx vhost for the CustomLLM endpoint) — follow-up.
+- ⏳ **Fast-follow** (memory-import) and **spend** (§4 prerequisite) — unchanged, deferred.
+
 ## 4. Enforcement model (3 layers + the spend prerequisite)
 - **Layer 1 — universal cap-gate** (broker mint + worker verify, deterministic, §17.5): works on **any** runtime incl. hooks-less Doubao. **The floor; the MVP runs on this.**
 - **Layer 2 — hosted MCP** ([#112](https://github.com/litentry/agentKeys/issues/112)): broad reach across MCP hosts; same guarantee + automatic audit.
@@ -57,7 +64,7 @@ Compliance (China kids'-data, minors' mode, content moderation) is the **vendor'
 
 ## 6. Cold-start
 - **Lead with the gate** (N=1 value: central control + compliance) **+ memory-import** (day-one magic). **Not portability** (empty at N=1).
-- **Memory accumulates passively** through device usage — *no AgentKeys app required.* Portability **switches on automatically at the 2nd device** (the memory is already in the user's namespace; zero migration).
+- **Memory accumulates passively** through device usage — *no AgentKeys app required.* Portability **switches on automatically at the 2nd device** (the memory is already in the user's namespace; zero migration). **This is the shared-user-memory model (DECIDED — [`shared-user-memory-delegation.md`](shared-user-memory-delegation.md)):** memory is owned by the **user/master** (one `memory:user:<id>:<ns>` namespace), and each agent device is a **delegated caller** granted access at pair-time — so every device reads the same memory (a rule set once is honored everywhere). NOT per-agent silos. Portability "switching on" at device #2 is exactly device #2 being granted the *same* user namespace device #1 already reads.
 - **Memory-import from incumbents** — ChatGPT: a session-bookmarklet (à la [gpt2claude.com](https://gpt2claude.com/) / [GPT2Claude Migration Kit](https://github.com/Siamsnus/GPT2Claude-Migration-Kit)) · verbatim-dump prompt · official data-export; Doubao: prompt-dump / memory-panel copy (no official export). Normalize the unstructured text into the memory schema (profile/semantic/episodic + namespaces) via an LLM pass → the user's per-actor namespace. **Frictions:** one-time manual step · ToS gray-area (keep the prompt-dump as the unblockable fallback) · sparse coverage (don't oversell) · structuring work.
 - **Concentrate early vendors in one category** (kids' toys) for per-user device overlap so portability has a home.
 
diff --git a/docs/plan/shared-user-memory-delegation.md b/docs/plan/shared-user-memory-delegation.md
new file mode 100644
index 00000000..d6a2a2e5
--- /dev/null
+++ b/docs/plan/shared-user-memory-delegation.md
@@ -0,0 +1,113 @@
+# Plan — Shared user memory + delegated agent access (the identity model for AI devices)
+
+**Status:** DECIDED (product model) 2026-06-16, IMPLEMENTATION PENDING. Resolves the namespace/identity tension flagged while reviewing the `agentkeys-gate` PR (#308) and in [`ai-device-platform.md`](ai-device-platform.md) §6 vs [`spawn-sandbox-on-pair.md`](spawn-sandbox-on-pair.md).
+**Relates to:** [`arch.md`](../arch.md) §6 (identity), §15.2 (storage keying), §17.5 (four-layer per-actor isolation), §22c (master/agent binding), §22c.5/[#295](https://github.com/litentry/agentKeys/issues/295) (master hub — master never in the data path); the broker cap-mint ([`handlers/cap.rs`](../../crates/agentkeys-broker-server/src/handlers/cap.rs)), the worker scope re-check ([`worker-creds/src/verify.rs`](../../crates/agentkeys-worker-creds/src/verify.rs)), the memory worker S3 keying ([`worker-memory/src/handlers.rs`](../../crates/agentkeys-worker-memory/src/handlers.rs)).
+
+## 1. The decision
+
+**Memory belongs to the user (master), not to each agent device.** Agents are **delegated callers**.
+
+```
+Alice master      = user:alice            (owns the memory)
+Kitchen ESP32     = agent:kitchen         (delegated caller)
+Bedroom Tuya      = agent:bedroom         (delegated caller)
+Shared memory     = memory:user:alice:home   (one namespace, all of Alice's devices read it)
+```
+
+Both devices read the *same* user memory — e.g. *"After 10pm, do not turn on the main bedroom light; use the night light."* — so a rule set once is honored everywhere.
+
+**The runtime model (master never in the data path):**
+
+```
+Pair time   (control plane, master present, biometric-gated):
+  Alice grants agent:bedroom access to memory:user:alice:home   (stored on-chain grant)
+
+Runtime     (data plane, master ABSENT):
+  agent:bedroom asks broker for memory.get on memory:user:alice:home,
+    proving itself with J1_agent (session) + K10 (cap-PoP).
+  Broker verifies: caller IS agent:bedroom · Alice granted this agent this namespace ·
+    grant valid & not revoked · TTL/limits OK  → mints a SHORT-LIVED cap.
+  Worker re-verifies the cap independently, reads the user-owned shared memory.
+  Audit records: agent:bedroom read Alice's shared memory.
+```
+
+**One sentence:** the user owns the memory, the agent is a delegated caller, and runtime access is authorized by **stored master grants + agent self-refresh** — never by carrying the master session in the data path.
+
+### Why NOT "agent owns the memory" (operator == actor == agent)
+
+Per-agent memory (`memory:agent:bedroom:home`, each device owns its own namespace) is the simplest fit for the shipped `operator == actor` self-skip, but it is the **wrong product**: it silos memory per device, so a rule set on the bedroom device is invisible to the kitchen device. It also blurs "agent reading its own data" with "agent delegated to the user's shared data" — user-owned memory must still require a master grant, which the self-skip path deliberately bypasses. **Shared user memory ⇒ delegated, not self.**
+
+## 2. What already supports this (don't rebuild)
+
+The delegated runtime model is ~70% shipped — the verification machinery exists:
+
+| Capability | Where | Status |
+|---|---|---|
+| **Worker enforces delegated scope** (`operator ≠ actor` → on-chain `isServiceInScope(operator, actor, service)`; positive/negative/revocation tests) | [`worker-creds/src/verify.rs:611`](../../crates/agentkeys-worker-creds/src/verify.rs) ("POSITIVE delegation: master GRANTED the agent scope") | ✅ shipped |
+| **K10 PoP proves the caller IS the agent** (`keccak(ecrecover(client_sig)) == device_key_hash`, device bound on-chain to the actor) | [`handlers/cap.rs`](../../crates/agentkeys-broker-server/src/handlers/cap.rs) `verify_cap_pop` (#76) | ✅ shipped |
+| **Stored delegation grant** (`setScope(operator, actor, services)` at pair-time accept) | [`handlers/accept.rs`](../../crates/agentkeys-broker-server/src/handlers/accept.rs), `handlers/scope.rs` | ✅ shipped |
+| **Audit carries actor + operator** (a delegated read records agent + owner) | `GateTurn` (90) + `MemoryGet` (11) envelopes | ✅ shipped |
+| **Agent session token** (`J1_agent`, omni == the agent) | [`handlers/oidc.rs`](../../crates/agentkeys-broker-server/src/handlers/oidc.rs) | ✅ shipped |
+
+So the "broker verifies the agent + the grant, worker enforces, audit records" steps already exist for `operator ≠ actor`. Three gaps remain.
+
+## 3. The three gaps to close
+
+### Gap 1 — broker cap-mint authenticates the OWNER, not the agent caller
+
+[`cap.rs:393`](../../crates/agentkeys-broker-server/src/handlers/cap.rs): `if session_omni != req.operator_omni → OperatorMismatch`. The session JWT must be the **operator's (owner's)**. So an agent cannot mint a cap for the user's namespace with its **own** `J1_agent` — it would need the master's session, putting the master in the data path (§22c.5 violation).
+
+**Change:** authenticate the session as the **caller = actor** (`session_omni == actor_omni`), and authorize delegation via the existing on-chain grant + K10 PoP. Concretely:
+- `session_omni == req.actor_omni` (the agent proves itself; for master self-ops `operator == actor == master`, unchanged).
+- device binding becomes `device.operator_omni == req.operator_omni` (the owner that registered the device) and `device.actor_omni == req.actor_omni == session` (already the agent's device).
+- **Preserve `enforce_cred_store_master_self`** (#228): cred-STORE must stay `operator == actor` (master-self), gated on `operator == actor`, NOT on `session`. This is the one subtlety that must not regress.
+- The K10 PoP + the scope grant now carry the authz weight (they already exist). The session check shifts from "is the owner" to "is the caller" — a *more* correct model (caller proves identity; grant proves permission).
+
+Bounded, but it touches the cap-mint security path (#76/#90/#225 layered there) — **needs its own reviewed PR + the full negative-isolation test matrix** (§17.5), not a drive-by edit.
+
+### Gap 2 — memory is keyed by ACTOR, so there is no shared user namespace
+
+[`handlers.rs:373`](../../crates/agentkeys-worker-memory/src/handlers.rs): `s3_key(actor_omni, service) = bots/<actor_omni>/memory/<service>.enc`. Both put and get key off `cap.payload.actor_omni`. So a delegated cap (`operator=master, actor=agent`) reads `bots/<agent>/memory/` — the **agent's** prefix, not the user's. There is no `bots/<user>/memory/` that multiple agents share.
+
+**Change (recommended):** introduce **owner-keyed shared memory**, mirroring the **config data class** precedent (arch.md §17.2: config is keyed `bots/<operator_omni_hex>/config/`, master-owned, agents hold no cap → the master-self skip). Options:
+- **(a) Shared namespace keyed by operator/owner.** A `memory:user:<id>:<ns>` namespace stored at `bots/<operator_omni>/memory/<ns>.enc`; the worker keys *shared* reads off `operator_omni` (the owner), per-actor reads stay off `actor_omni`. Distinguish by a namespace convention or a cap field.
+- **(b) A distinct `shared-memory` data class** (own bucket + IAM role, like config), owner-keyed, with delegated agent reads via grant. Cleanest isolation, more infra.
+
+Either way, "shared user memory" must be addressable independent of which agent reads it.
+
+### Gap 3 — per-actor S3/IAM isolation blocks a granted agent from the owner's prefix
+
+[`apply-memory-bucket-policy.sh:104`](../../scripts/apply-memory-bucket-policy.sh): S3 access is scoped to `bots/${aws:PrincipalTag/agentkeys_actor_omni}/memory/*`, and the OIDC web-identity token is tagged with `agentkeys_actor_omni = session.omni` ([`oidc.rs:104`](../../crates/agentkeys-broker-server/src/handlers/oidc.rs)). So the agent's STS creds can only reach `bots/<agent>/` — **not** `bots/<user>/`. IAM cannot read the on-chain grant, so it can't conditionally widen this.
+
+**Change (options):**
+- Pair with Gap 2(a): the broker, having verified the grant at cap-mint, mints the STS web-identity token tagged with the **owner** omni for a granted shared read (the broker is the authority that already checked the grant) — so the agent's *cap* is its own, but the *S3 principal* for a shared read is owner-scoped. Tightly coupled to the broker change.
+- Or Gap 2(b): a `shared-memory` bucket whose policy grants read to the household's agents (a coarser, owner-level boundary), accepting that S3-layer isolation is at the *user* granularity for shared memory while the cap + worker layers stay per-grant.
+
+This is the layer that needs the most design care — it changes the §17.5 layer-3 assumption (per-actor) for the shared-memory case.
+
+## 4. Implications for `agentkeys-gate` (#308)
+
+The decision **vindicates the gate's delegated config shape** (`operator = owner, actor = agent`) — that was right; my prior "switch to self" lean is **overruled** by this product decision. What changes:
+
+- **Framing:** reposition as the **cloud agent-runtime adapter** (the hosted-LLM sibling of the in-sandbox agent daemon) — a **delegated caller above the worker enforcement boundary**, not a security gate. Drop the "deterministic enforcement is structural" over-claim from the docs.
+- **Identity:** `operator_omni` = the memory **owner** (user/master); `actor_omni` = the **agent**; the session bearer + K10 = the **agent's** (`J1_agent`). Once Gap 1 lands, the single agent bearer becomes correct (it authenticates the caller for cap-mint, and — with Gap 3 — the broker mints the owner-scoped STS for the shared read).
+- **Honesty:** the real-backend memory path is **blocked on Gaps 1–3**; until then the gate runs end-to-end only with the `echo` engine (hardware bring-up) and per-actor (non-shared) reads. The runbook's Part 2 must say so.
+- **Independent hardening (do now, no backend dep):** reject `tools`/`tool_choice`/`functions` (close the upstream tool-call enforcement bypass), and run-as-one-agent-identity (drop the loose `X-AgentKeys-Actor` multi-actor promise the single bearer can't honor).
+
+## 5. Sequencing
+
+1. **Agree this doc** (the namespace decision + the 3-gap shape).
+2. **Gate hardening + reframing** (no backend dep) — `agentkeys-gate` PR follow-up.
+3. **Gap 1 — broker cap-mint caller-auth** — own PR, full §17.5 negative-isolation matrix, preserve cred-store master-self.
+4. **Gap 2 — owner-keyed shared memory** (recommend the config-data-class-style approach) — worker + cap-payload.
+5. **Gap 3 — STS/bucket-policy for grant-scoped shared reads** — paired with 4; the hardest, design first.
+6. **Then** the gate's real-backend shared-memory path works end-to-end; re-do the runbook Part 2 against a live broker.
+
+## 6. Test plan (the invariants to prove)
+
+- **Positive delegation:** agent with a grant reads the user's shared namespace (`operator=owner, actor=agent`, agent's own session + K10) → success, audited as agent reading owner's memory.
+- **Negative — no grant:** agent WITHOUT a grant → broker `ServiceNotInScope` / worker re-check denies. Never reads.
+- **Negative — revoked:** master revokes the grant → next read denied (online-immediate; cached cap bounded by short TTL).
+- **Master self unchanged:** `operator == actor == master` still skips scope, reads `bots/<master>/`.
+- **Cred-store still master-self-only:** the Gap-1 change must NOT open delegated cred-store (#228 regression guard).
+- **Cross-agent isolation:** agent:bedroom cannot read agent:kitchen's *per-actor* memory (only the *shared* namespace both are granted).
diff --git a/docs/plan/spawn-sandbox-on-pair.md b/docs/plan/spawn-sandbox-on-pair.md
index 99bee87b..ccf80f90 100644
--- a/docs/plan/spawn-sandbox-on-pair.md
+++ b/docs/plan/spawn-sandbox-on-pair.md
@@ -67,6 +67,8 @@ RUN-TIME  (data plane, every interaction)
 | **Broker** (always-on) | K1 / signing authority | validates the master's signed intent, **spawns/tears down** sandboxes, mints cap-tokens, records on-chain binding |
 | **Sandbox** (per-device, ephemeral) | actor-scoped cap-token (not the master key) | runs hermes + daemon + that actor's memory; torn down on unpair/idle/revoke |
 
+> **Memory ownership (DECIDED 2026-06-16 — see [`shared-user-memory-delegation.md`](shared-user-memory-delegation.md)):** "that actor's memory" above is **delegated access to the user's shared memory**, NOT a per-agent silo. Memory belongs to the **user/master** (`memory:user:<id>:<ns>`); each agent is a **delegated caller** granted access at pair-time, so all of a user's devices read the same memory. The sandbox/daemon acts **as the agent** (its own `J1_agent` + K10) and self-refreshes short-lived caps at runtime — the master authorizes at pair-time but is never in the data path. (The runtime caller is the agent; the memory's owner is the user.)
+
 ## Why this beats the static sandbox
 
 1. **Isolation** — one sandbox per `O_agent_X` extends §17.5 four-layer isolation to the *compute* layer, not just storage.