diff --git a/Cargo.lock b/Cargo.lock index 01237be0..9ecbe581 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -258,6 +258,31 @@ dependencies = [ "tokio", ] +[[package]] +name = "agentkeys-gate" +version = "0.1.0" +dependencies = [ + "agentkeys-backend-client", + "agentkeys-core", + "agentkeys-memory-engine", + "agentkeys-protocol", + "anyhow", + "async-trait", + "axum", + "base64 0.22.1", + "clap", + "http-body-util", + "reqwest", + "rustls 0.23.37", + "serde", + "serde_json", + "thiserror 2.0.18", + "tokio", + "tower 0.4.13", + "tracing", + "tracing-subscriber", +] + [[package]] name = "agentkeys-mcp" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index 248379d5..2fbf1063 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,6 +11,7 @@ members = [ "crates/agentkeys-daemon", "crates/agentkeys-mcp", "crates/agentkeys-mcp-server", + "crates/agentkeys-gate", "crates/agentkeys-provisioner", "crates/agentkeys-backend-client", "crates/agentkeys-protocol", diff --git a/crates/agentkeys-core/src/audit/bodies.rs b/crates/agentkeys-core/src/audit/bodies.rs index b86463a1..1813942c 100644 --- a/crates/agentkeys-core/src/audit/bodies.rs +++ b/crates/agentkeys-core/src/audit/bodies.rs @@ -236,6 +236,38 @@ pub struct ConfigTeardownBody { pub actor_target: String, } +// ── 90..99 — gate family (Plan A in-path CustomLLM gate) ─────────────── +// +// Emitted by `agentkeys-gate` once per LLM turn. The memory READS themselves +// are audited worker-side (#229, op_kind MemoryGet); this row records the +// turn-level control-plane facts: which engine answered, what was injected, +// and the per-namespace authorization outcome. + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct GateTurnNamespaceBody { + /// Memory namespace the gate attempted to inject (e.g. `"travel"`). + pub namespace: String, + /// Outcome of the in-path cap-check + read: `"injected"`, `"empty"`, or + /// `"denied"` (a 403 the actor had no scope for — recorded, non-fatal). + pub outcome: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct GateTurnBody { + /// The engine that produced the completion (`"openai-compatible"`, + /// `"echo"`, …) — provenance for "which model answered". + pub engine: String, + /// The upstream model id the turn ran against. + pub model: String, + /// Per-namespace cap-check + injection outcomes for this turn. + pub namespaces: Vec, + /// Total bytes of memory injected across all namespaces. + pub injected_bytes: u64, + /// Upstream-reported token usage (0 when the engine doesn't report it). + pub prompt_tokens: u64, + pub completion_tokens: u64, +} + #[cfg(test)] mod tests { use super::*; @@ -448,6 +480,49 @@ mod tests { } } + /// Gate family (Plan A): canonical CBOR roundtrip + typed decode for the + /// per-turn gate audit row, including the nested per-namespace outcomes. + #[test] + fn gate_family_cbor_roundtrip_and_typed_decode() { + use crate::audit::{envelope_for, AuditEnvelope, AuditOpKind, AuditResult, TypedAuditBody}; + + let body = GateTurnBody { + engine: "openai-compatible".into(), + model: "doubao-pro".into(), + namespaces: vec![ + GateTurnNamespaceBody { + namespace: "personal".into(), + outcome: "injected".into(), + }, + GateTurnNamespaceBody { + namespace: "travel".into(), + outcome: "denied".into(), + }, + ], + injected_bytes: 42, + prompt_tokens: 150, + completion_tokens: 30, + }; + let env = envelope_for( + [0x44; 32], + [0x22; 32], + AuditOpKind::GateTurn, + body.clone(), + AuditResult::Success, + Some("where did I go in Chengdu?".to_string()), + None, + ) + .unwrap(); + let decoded = + AuditEnvelope::from_canonical_cbor(&env.to_canonical_cbor().unwrap()).unwrap(); + assert_eq!(decoded.op_kind, AuditOpKind::GateTurn as u8); + assert_eq!(AuditOpKind::GateTurn.label(), "gate.turn"); + match decoded.typed_body().unwrap() { + TypedAuditBody::GateTurn(b) => assert_eq!(b, body), + other => panic!("unexpected typed body: {other:?}"), + } + } + #[test] fn payment_direct_body_uses_ref_as_field_name() { // Sanity check: `ref` is a Rust reserved word, so the field is diff --git a/crates/agentkeys-core/src/audit/mod.rs b/crates/agentkeys-core/src/audit/mod.rs index 31df0f26..3a0b95c4 100644 --- a/crates/agentkeys-core/src/audit/mod.rs +++ b/crates/agentkeys-core/src/audit/mod.rs @@ -56,9 +56,9 @@ use thiserror::Error; pub use bodies::{ ConfigGetBody, ConfigPutBody, ConfigTeardownBody, CredFetchBody, CredStoreBody, CredTeardownBody, DeviceAddBody, DeviceRevokeBody, EmailReceiveBody, EmailSendBody, - K10RotateBody, K3EpochAdvanceBody, MemoryGetBody, MemoryPutBody, MemoryTeardownBody, - PaymentDirectBody, PaymentEscrowRedeemBody, ScopeGrantBody, ScopeRevokeBody, SignEip191Body, - SignEip712Body, + GateTurnBody, GateTurnNamespaceBody, K10RotateBody, K3EpochAdvanceBody, MemoryGetBody, + MemoryPutBody, MemoryTeardownBody, PaymentDirectBody, PaymentEscrowRedeemBody, ScopeGrantBody, + ScopeRevokeBody, SignEip191Body, SignEip712Body, }; pub use op_kind::AuditOpKind; @@ -238,6 +238,7 @@ pub enum TypedAuditBody { ConfigPut(ConfigPutBody), ConfigGet(ConfigGetBody), ConfigTeardown(ConfigTeardownBody), + GateTurn(GateTurnBody), } impl TypedAuditBody { @@ -277,6 +278,7 @@ impl TypedAuditBody { AuditOpKind::ConfigTeardown => { Self::ConfigTeardown(serde_json::from_value(value).ok()?) } + AuditOpKind::GateTurn => Self::GateTurn(serde_json::from_value(value).ok()?), }) } } diff --git a/crates/agentkeys-core/src/audit/op_kind.rs b/crates/agentkeys-core/src/audit/op_kind.rs index 259b2c60..da7fbe98 100644 --- a/crates/agentkeys-core/src/audit/op_kind.rs +++ b/crates/agentkeys-core/src/audit/op_kind.rs @@ -15,7 +15,8 @@ //! - 60-69 email family (EmailSend=60, EmailReceive=61; 62-69 reserved) //! - 70-79 K3 family (K3EpochAdvance=70; 71-79 reserved) //! - 80-89 config family (ConfigPut=80, ConfigGet=81, ConfigTeardown=82; 83-89 reserved) -//! - 90-255 reserved for future families +//! - 90-99 gate family (GateTurn=90; 91-99 reserved) +//! - 100-255 reserved for future families /// Canonical op_kind enum. The byte value MUST match the row in arch.md /// §15.3a. The enum is `repr(u8)` so `as u8` gives the canonical byte. @@ -47,6 +48,11 @@ pub enum AuditOpKind { ConfigPut = 80, ConfigGet = 81, ConfigTeardown = 82, + /// In-path CustomLLM gate turn (Plan A, `docs/plan/ai-device-platform.md` + /// §3): one cap-checked + memory-injected + audited LLM turn. Recorded by + /// `agentkeys-gate` so a gated turn is on the ledger even when it injected + /// no memory (the worker-side memory audit only covers turns that read). + GateTurn = 90, } impl AuditOpKind { @@ -75,6 +81,7 @@ impl AuditOpKind { 80 => Self::ConfigPut, 81 => Self::ConfigGet, 82 => Self::ConfigTeardown, + 90 => Self::GateTurn, _ => return None, }) } @@ -105,6 +112,7 @@ impl AuditOpKind { Self::ConfigPut => "config.put", Self::ConfigGet => "config.get", Self::ConfigTeardown => "config.teardown", + Self::GateTurn => "gate.turn", } } } @@ -140,6 +148,7 @@ mod tests { AuditOpKind::ConfigPut, AuditOpKind::ConfigGet, AuditOpKind::ConfigTeardown, + AuditOpKind::GateTurn, ]; for k in all { let byte = k as u8; @@ -156,7 +165,7 @@ mod tests { #[test] fn unknown_bytes_return_none() { for byte in [ - 3u8, 9, 13, 19, 22, 32, 42, 53, 62, 71, 83, 89, 90, 200, 250, 255, + 3u8, 9, 13, 19, 22, 32, 42, 53, 62, 71, 83, 89, 91, 99, 200, 250, 255, ] { assert_eq!( AuditOpKind::from_u8(byte), @@ -193,6 +202,7 @@ mod tests { AuditOpKind::ConfigPut as u8, AuditOpKind::ConfigGet as u8, AuditOpKind::ConfigTeardown as u8, + AuditOpKind::GateTurn as u8, ]; let s: HashSet<_> = all.iter().copied().collect(); assert_eq!(s.len(), all.len(), "duplicate byte assignment"); diff --git a/crates/agentkeys-gate/Cargo.toml b/crates/agentkeys-gate/Cargo.toml new file mode 100644 index 00000000..ecc31eff --- /dev/null +++ b/crates/agentkeys-gate/Cargo.toml @@ -0,0 +1,53 @@ +[package] +name = "agentkeys-gate" +version = "0.1.0" +edition = "2021" + +[[bin]] +name = "agentkeys-gate" +path = "src/main.rs" + +[lib] +name = "agentkeys_gate" +path = "src/lib.rs" + +[dependencies] +# The shipped broker/worker chain (cap-mint -> per-actor STS -> worker put/get -> +# audit). The gate is a THIN in-path wrapper over this; it never re-types a +# cap/worker body (issue #203 — one owner). +agentkeys-backend-client = { workspace = true } +# Wire helpers shared with the browser host: service_memory(), normalize_omni_0x(). +agentkeys-protocol = { workspace = true } +# Caller-side memory selection engine (Passthrough / Lexical) — deterministic, +# no LLM in the gate path. The injection seam (plan §6a). +agentkeys-memory-engine = { workspace = true } +# Canonical audit op_kind table (AuditOpKind) + K10 device-key load for cap-PoP. +agentkeys-core = { workspace = true } + +serde = { workspace = true } +serde_json = { workspace = true } +tokio = { workspace = true } +async-trait = { workspace = true } +thiserror = { workspace = true } +anyhow = { workspace = true } + +axum = { version = "0.7", features = ["json"] } +tower = "0.4" +# reqwest pins rustls-tls so the upstream LLM call uses the same crypto provider +# the broker host already ships; the `ring` provider is installed in main. +reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] } +rustls = { version = "0.23", default-features = false, features = ["ring", "std", "tls12"] } +clap = { version = "4", features = ["derive", "env"] } +tracing = "0.1" +tracing-subscriber = { version = "0.3", features = ["env-filter"] } +base64 = "0.22" + +[dev-dependencies] +tokio = { workspace = true } +async-trait = { workspace = true } +serde_json = { workspace = true } +# AuditOpKind::GateTurn assertions in the gate-flow tests. +agentkeys-core = { workspace = true } +base64 = "0.22" +tower = { version = "0.4", features = ["util"] } +http-body-util = "0.1" diff --git a/crates/agentkeys-gate/IMPLEMENTATION_LOG.md b/crates/agentkeys-gate/IMPLEMENTATION_LOG.md new file mode 100644 index 00000000..c18544c8 --- /dev/null +++ b/crates/agentkeys-gate/IMPLEMENTATION_LOG.md @@ -0,0 +1,179 @@ +# Implementation log — `agentkeys-gate` + +A narrative of what this crate is, how it was built, and **why each decision was +made** — so you can understand the work without reading every line. For *how to +run/test it*, see [`docs/operator-runbook-gate.md`](../../docs/operator-runbook-gate.md); +for the quick reference, the [README](README.md). + +> **Update (2026-06-16) — positioning + identity model (post-review).** Review of #308 + an identity-model dig (see [`docs/plan/shared-user-memory-delegation.md`](../../docs/plan/shared-user-memory-delegation.md)) corrected two things below: +> 1. **This is an *adapter* above the worker enforcement boundary, not a security gate.** The **worker** independently re-verifies every cap against the chain — that is the safety boundary. The gate adds in-path memory-injection + a turn-level audit record; it does not *enforce* (so read "deterministic enforcement is structural" in §5 as "the gate cannot read memory the *worker* won't serve it a cap for" — the enforcement is the worker's). +> 2. **Memory is the USER's; the agent is a delegated caller (DECIDED).** Owner = user/master (`memory:user::`); the gate runs **as the agent** (its own `J1_agent` + K10), authorized by a stored master grant. The delegated config shape (`operator = owner`, `actor = agent`) is correct, but the **real-backend shared-memory path is blocked on 3 backend gaps** (broker caller-auth, owner-keyed storage, grant-scoped STS — plan doc §3). Until they land, the gate runs end-to-end only with the `echo` engine. The gate also now **rejects** `tools`/`tool_choice` (it does not yet enforce tool-calling). + +--- + +## 1. What was asked + +"Start to work on the **A plan in #305**." That PR +([`docs/plan/ai-device-platform.md`](../../docs/plan/ai-device-platform.md)) is the +design for an AI-device platform. **Plan A = the Volcano MVP** (§3) — a 5-piece +build: + +1. Tuya T5 firmware → Volcano RTC (voice in/out) — *hardware*. +2. **The in-path CustomLLM gate** — per turn: cap/permission-check + memory-inject + audit, then call the model. +3. Reuse the shipped AgentKeys backend (broker, memory worker, cap-mint, audit). +4. Management view (parent-control). +5. **The adapter seam** — one engine behind a stable interface, so Plan B (self-host + more models) is "A + engines", not a rewrite (`A ⊂ B`). + +This crate is **pieces 2 + 5, reusing piece 3**. Pieces 1 and 4 are deferred (a +hardware spike and a UI task); see §8 below. + +## 2. The one-sentence shape + +`agentkeys-gate` is the in-path twin of `agentkeys-mcp-server`. Where the MCP +server lets an LLM *choose* to call AgentKeys tools, the gate sits **in the +request path** and runs the gate logic on **every** turn, deterministically, +then calls the model — which is what makes "central control + see-all-memory + +audit" real rather than opt-in. + +## 3. The per-turn flow (the heart — [`gate.rs`](src/gate.rs)) + +``` +POST /v1/chat/completions (Volcano RTC CustomLLM → gate) + │ + ▼ +1. resolve identity actor_omni (X-AgentKeys-Actor header OR config default), + operator_omni, device_key_hash — normalized to 0x form. + │ + ▼ +2. cap-check + memory-inject for each configured namespace: + ├─ mint a broker cap CapMintOp::MemoryGet, service = service_memory(ns) = "memory:" + ├─ read the worker BackendClient::memory_get → base64 plaintext + ├─ select lines agentkeys-memory-engine (passthrough/lexical), deterministic, no LLM + └─ classify outcome injected / empty (404) / denied (403) / FATAL (anything else) + │ → prepend ONE system message with the selected lines + ▼ +3. engine completion EngineAdapter::complete(request) (echo, or openai-compatible → Doubao) + │ + ▼ +4. audit one GateTurn (op_kind 90) row: {engine, model, per-namespace outcomes, + injected_bytes, token usage}. Non-fatal by default; --require-audit makes it block. + │ + ▼ +OpenAI ChatCompletionResponse (gate → Volcano → TTS → device) +``` + +## 4. Module map + +| File | Role | +|---|---| +| [`gate.rs`](src/gate.rs) | The orchestration above. `Gate::handle_chat_completion` + the memory-inject + audit helpers + the error `classify()`. | +| [`engine.rs`](src/engine.rs) | The **swappable seam** (piece 5): the `EngineAdapter` trait + `OpenAiCompatibleEngine` (Plan A → Doubao) + `EchoEngine` (local). | +| [`backend.rs`](src/backend.rs) | The `Backend` trait (cap_mint / memory_get / audit_append), impl'd on the shared `BackendClient`; the test seam. | +| [`openai.rs`](src/openai.rs) | The OpenAI `/v1/chat/completions` wire types (the gate's public contract). | +| [`config.rs`](src/config.rs) | `Cli` (clap) + `Config` (read once at startup) + `for_tests()`. | +| [`auth.rs`](src/auth.rs) | Vendor `Bearer` auth (constant-time) + the per-device `X-AgentKeys-Actor` header + the anonymous dev path. | +| [`error.rs`](src/error.rs) | `GateError` (thiserror) → HTTP status + OpenAI-shaped error body. | +| [`server.rs`](src/server.rs) | The axum router (`/v1/chat/completions`, `/healthz`). | +| [`main.rs`](src/main.rs) | Wire it up: build `BackendClient` + the chosen engine, run the server. | + +## 5. Key decisions (and why) + +**Reuse the shipped backend; own no new wire shape.** Every cap/memory/audit +call goes through the shared `agentkeys-backend-client::BackendClient` and the +shared protocol types — the same path the MCP server, daemon, and browser use. +This is the #203 "one owner" rule: re-typing a cap/worker body in a second place +is the exact drift-bug class the repo spent issues closing. The gate's *own* new +types (the OpenAI request/response) are its public HTTP contract, not a +broker/worker body, so they live here and don't touch the fixture gate. + +**The engine is the only swappable part (`A ⊂ B`).** [`engine.rs`](src/engine.rs) +hands an adapter only the (already memory-injected, already authorized) request + +a base URL + an API key. It never sees keys, identity, cap-tokens, or the audit +trail — those stay at the broker/worker/gate. That **red line** is what keeps the +platform sovereign and reversible: swapping `echo` → Doubao → Bedrock → a +self-host runtime is config, never a rewrite, and a compromised/forked engine +can't reach your memory or identity. (The engine *does* legitimately see the +injected memory **in the prompt** — that's the whole point of the gate; the red +line is about secrets/authority, not the content the user authorized for use.) + +**Why an `echo` engine exists.** It is not a test stub. It lets the entire +in-path gate (device → Volcano RTC → gate → reply) run on **real hardware before +Doubao credentials exist**, and it reflects the injected memory in its reply so +an operator can *see* the gate read + inject. That directly serves Plan A §3's +"prove the gate in-path on real hardware" goal. + +**The error-classification policy (no silent fallback).** In `classify()`, a +per-namespace **403 → `denied`** (the actor has no scope; recorded, non-fatal — +the turn answers degraded), a **404/empty → `empty`**. **Anything else** +(transport, 5xx, parse, not-configured) is a **hard `Backend` error that fails +the turn loud** (502). This honors the "fail loud, never hide a broken broker +path behind a silent fallback" rule: a broken backend must be *visible*, not +papered over as "memory was empty." The deterministic enforcement is structural — +the gate physically cannot read memory it has no broker-minted cap for. + +**The audit leg = a new `GateTurn` op_kind (90).** The plan makes audit the top +feature. Two facts pulled in different directions: (a) the memory worker already +audits each read as `MemoryGet` (#229), so double-auditing reads would be noise; +(b) a pure-chat turn that reads no memory would otherwise leave **no** trace on +the ledger. Resolution: the gate emits **one turn-level `GateTurn` row** (engine, +model, per-namespace outcomes, token usage) — distinct from the data-plane +`MemoryGet` rows. Adding an op_kind is a canonical-registry change, so it was +done to spec: the enum + decode + typed body + the arch.md §15.3a row + the +frozen tests (family 90-99 = "gate"). It is **non-fatal by default** +(`--require-audit` enforces) — the staged-rollout posture the worker audit uses, +so an audit-worker hiccup doesn't take the gate down before the rollout is ready. + +**Identity: header-or-config.** The per-device actor comes from the +`X-AgentKeys-Actor` header (one gate serving a fleet) **or** the configured +default actor (one-gate-per-device). The cap-mint session JWT is the *configured +device-actor JWT*, separate from the inbound *vendor* bearer that authenticates +the caller — two different secrets, two different trust roles. + +## 6. The review finding that got fixed + +A 4-reviewer adversarial pass (+ a verify pass) flagged **one** real defect: the +non-403/404 arm of `classify()` formatted the **full** `BackendError` — including +the upstream HTTP **body** — into the error returned to the vendor caller. A 5xx +broker/worker body could thus cross the trust boundary. **Fix** (in +[`gate.rs`](src/gate.rs)): log the full error for the operator via `tracing`, but +return only the safe **status category** (`backend HTTP 500`, never the body) to +the caller — and the same hardening for the engine path. Regression test: +`backend_5xx_body_is_not_echoed_to_caller` in +[`tests/gate_flow.rs`](tests/gate_flow.rs). (The other 15 review findings were +verified as not-real.) + +## 7. Test coverage + +| Test | Proves | +|---|---| +| `injects_memory_and_audits_the_turn` | the happy path: 2 namespaces minted + injected, one `GateTurn` (90) audit row with both outcomes `injected`. | +| `denied_namespace_is_recorded_not_fatal` | a 403 namespace → `denied`, turn proceeds with the allowed one. | +| `broken_backend_fails_the_turn` | a transport error fails the turn loud; the engine is never reached. | +| `backend_5xx_body_is_not_echoed_to_caller` | the review-fix: status surfaces, the upstream body never leaks. | +| `pure_chat_turn_is_still_audited` | no namespaces → no caps, but the turn is still on the ledger (the gap `GateTurn` closes). | +| `actor_header_overrides_default` | `X-AgentKeys-Actor` overrides the config default actor. | +| `tests/http.rs` (×4) | the real axum router: health, no-token 401, wrong-token 401, a full anonymous echo turn. | +| `openai.rs` / `engine.rs` / `auth.rs` unit tests (×11) | wire round-trip + unknown-param passthrough, echo surfaces memory, engine-without-key, constant-time token compare. | +| `agentkeys-core` `gate_family_cbor_roundtrip_and_typed_decode` | the `GateTurn` envelope CBOR-roundtrips + typed-decodes. | + +All offline (mock `Backend` + `EngineAdapter`); no env mutation; clippy `-D +warnings`; fmt clean. + +## 8. What is deferred (and why) + +| Deferred | Why | +|---|---| +| Piece 1 — Tuya firmware → Volcano RTC | hardware spike; needs the board. | +| Piece 4 — parent-control management view | a UI wiring task; sequenced after the endpoint. | +| **Deploy wiring** — `setup-broker-host.sh` | the gate isn't a deploy surface yet (needs a systemd unit + nginx vhost for the CustomLLM endpoint). | +| Memory **write-back** | the MVP reads memory; writing new memory from a turn is a later step. | +| **Streaming** | turns are cap/audit-granular; `stream:true` is served as a single completion. | +| **Spend** | plan §4 blocks it on single-use caps + mint-time budgeting (caps are replayable-until-TTL today). | + +## 9. Where the addresses/identity come from + +Nothing is hardcoded — every value is a flag/env read once at startup +(`Config::from_cli`). The device actor, operator omni, device-key hash, session +JWT, K10 key, and IAM role are the **same inputs** the in-sandbox daemon / MCP +server use for a paired device; the gate just impersonates one device actor +against the shipped broker. diff --git a/crates/agentkeys-gate/README.md b/crates/agentkeys-gate/README.md new file mode 100644 index 00000000..9147cf2f --- /dev/null +++ b/crates/agentkeys-gate/README.md @@ -0,0 +1,87 @@ +# agentkeys-gate + +The **in-path CustomLLM gate** — Plan A pieces 2 + 5 of +[`docs/plan/ai-device-platform.md`](../../docs/plan/ai-device-platform.md) §3. + +> **Testing it:** [`docs/operator-runbook-gate.md`](../../docs/operator-runbook-gate.md) (a 3-level runbook — local echo smoke test → real backend → Doubao). +> **Understanding it:** [`IMPLEMENTATION_LOG.md`](IMPLEMENTATION_LOG.md) (what was built + why each decision). + +An OpenAI-compatible `POST /v1/chat/completions` endpoint that Volcano RTC's +**CustomLLM mode** posts to. Per turn it does, in order: + +1. **cap-check + memory-inject** — mints a broker `memory-get` cap per configured + namespace (`service = memory:`, `CapMintOp::MemoryGet`), reads it through + the memory worker, runs the deterministic memory-selection engine + (`agentkeys-memory-engine`), and prepends the selected lines as a leading + `system` message; +2. **engine completion** — forwards the (now memory-augmented) request to a + swappable [`EngineAdapter`](src/engine.rs); +3. **audit** — appends one `GateTurn` (op_kind 90) row so every gated turn is on + the ledger, even one that injected no memory. + +It owns **no new wire shape** and **no persistent state**: identity · cap · +memory · audit all route through the shipped +`agentkeys-backend-client::BackendClient`. That is the red line — none of those +ever live inside the swappable engine. + +## A ⊂ B — the engine seam + +The [`EngineAdapter`](src/engine.rs) is the **only** swappable part of the +platform, so Plan B (self-host runtime, more model vendors) is "A + engines", +not a rewrite. Two engines ship today: + +| Engine (`--engine`) | What it calls | Use | +|---|---|---| +| `echo` (default) | nothing — reflects the turn + the injected memory | Hardware bring-up: run device → Volcano RTC → gate → reply with **no Doubao credentials**, and SEE the gate read + inject memory. This is Plan A §3's "prove the gate in-path on real hardware" path. | +| `openai-compatible` | a `/v1/chat/completions` upstream (Volcano RTC CustomLLM → Doubao, or any OpenAI-compatible server) | Production Plan A. Needs `--engine-base-url` + `--engine-api-key`. | + +## Run + +```bash +# Local / hardware bring-up — echo engine, no auth, no upstream: +AGENTKEYS_GATE_ALLOW_ANONYMOUS=1 \ +AGENTKEYS_GATE_DEFAULT_ACTOR=0x \ +AGENTKEYS_GATE_DEFAULT_OPERATOR_OMNI=0x \ +AGENTKEYS_GATE_DEFAULT_DEVICE_KEY_HASH=0x \ +AGENTKEYS_BROKER_URL=https://broker.litentry.org \ +AGENTKEYS_MEMORY_URL=https://cred.litentry.org \ +AGENTKEYS_AUDIT_URL=https://audit.litentry.org \ +AGENTKEYS_DEVICE_KEY_FILE=~/.agentkeys/agent-device.key \ + cargo run -p agentkeys-gate + +# Production — vendor-authenticated, Doubao via Volcano RTC CustomLLM: +AGENTKEYS_GATE_VENDOR_TOKENS=folotoy: \ +AGENTKEYS_GATE_ENGINE=openai-compatible \ +AGENTKEYS_GATE_ENGINE_BASE_URL=https://ark.cn-beijing.volces.com/api/v3 \ +AGENTKEYS_GATE_ENGINE_API_KEY_FILE=/etc/agentkeys/doubao.key \ +AGENTKEYS_GATE_ENGINE_MODEL=doubao-pro-32k \ + ... (broker/memory/audit URLs + device key as above) \ + cargo run -p agentkeys-gate +``` + +All config is `--flag` / env (see `--help`); nothing is hardcoded. The +per-device actor can be supplied per request via the `X-AgentKeys-Actor` header +(overrides `AGENTKEYS_GATE_DEFAULT_ACTOR`), so one gate instance can serve a +fleet, or one-gate-per-device can bake the actor into config. + +## What is NOT here (deferred per the plan's own sequencing) + +- **Streaming** — turns are cap/audit-granular; `stream: true` is served as a + single non-streamed completion. +- **Spend** — gated on single-use caps + mint-time budgeting (plan §4); the gate + does no payments. +- **Deploy wiring** — the gate is not yet wired into `setup-broker-host.sh`; that + is a follow-up (a new systemd unit + nginx vhost for the CustomLLM endpoint). +- **Memory WRITE-back**, the Tuya firmware spike (piece 1), and the + parent-control management view (piece 4) are separate follow-ups. + +## Tests + +```bash +cargo test -p agentkeys-gate +``` + +`tests/gate_flow.rs` proves the four per-turn behaviours (inject + audit, +denied-namespace non-fatal, broken-backend fails loud, pure-chat still audited) +against offline doubles; `tests/http.rs` drives the real axum router (auth + +health + a full echo turn). diff --git a/crates/agentkeys-gate/src/auth.rs b/crates/agentkeys-gate/src/auth.rs new file mode 100644 index 00000000..4e835048 --- /dev/null +++ b/crates/agentkeys-gate/src/auth.rs @@ -0,0 +1,136 @@ +//! Inbound auth for the HTTP transport. +//! +//! The vendor authenticates with a `Bearer ` (constant-time matched +//! against the configured `:` registry, exactly like the MCP +//! server). The per-device actor is conveyed in `X-AgentKeys-Actor`, or falls +//! back to the gate's configured default actor (one-gate-per-device deploys). +//! +//! NOTE: this bearer authenticates the *vendor calling the gate*. The session +//! JWT the gate forwards to the broker for cap-mint is the configured +//! `agent_session_bearer` (the device actor's own JWT) — a separate secret. + +use crate::config::Config; +use crate::error::{GateError, GateResult}; + +/// Resolved caller identity for one request. +#[derive(Debug, Clone)] +pub struct CallerContext { + /// Which vendor token authenticated (or `"anonymous"` in dev mode). + pub vendor_id: String, + /// The device actor omni from `X-AgentKeys-Actor`, or `"*"` when the caller + /// didn't bind one (the gate then uses its configured default actor). + pub actor_omni: String, +} + +/// Constant-time string compare — no early exit on the first differing byte, so +/// token validation can't be timing-probed. +fn constant_time_eq(a: &str, b: &str) -> bool { + let (a, b) = (a.as_bytes(), b.as_bytes()); + if a.len() != b.len() { + return false; + } + let mut diff = 0u8; + for (x, y) in a.iter().zip(b.iter()) { + diff |= x ^ y; + } + diff == 0 +} + +/// Validate the `Authorization` header against the vendor-token registry and +/// resolve the caller. When `allow_anonymous` is set AND no tokens are +/// configured, the bearer check is skipped (dev / echo-engine hardware bring-up). +pub fn authenticate( + config: &Config, + authorization: Option<&str>, + actor_header: Option<&str>, +) -> GateResult { + let actor_omni = match actor_header.map(str::trim).filter(|s| !s.is_empty()) { + Some(a) => a.to_string(), + None => "*".to_string(), + }; + + if config.vendor_tokens.is_empty() { + if config.allow_anonymous { + return Ok(CallerContext { + vendor_id: "anonymous".to_string(), + actor_omni, + }); + } + return Err(GateError::Unauthorized( + "no vendor tokens configured — set AGENTKEYS_GATE_VENDOR_TOKENS, or \ + AGENTKEYS_GATE_ALLOW_ANONYMOUS=1 for local/echo bring-up" + .into(), + )); + } + + let token = authorization + .and_then(|h| h.strip_prefix("Bearer ")) + .map(str::trim) + .filter(|t| !t.is_empty()) + .ok_or_else(|| GateError::Unauthorized("missing or malformed Bearer token".into()))?; + + // Check every entry (no early return) so a hit vs miss can't be timed. + let mut matched: Option<&str> = None; + for (vendor, expected) in &config.vendor_tokens { + if constant_time_eq(token, expected) { + matched = Some(vendor); + } + } + + match matched { + Some(vendor) => Ok(CallerContext { + vendor_id: vendor.to_string(), + actor_omni, + }), + None => Err(GateError::Unauthorized("invalid vendor token".into())), + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn cfg() -> Config { + Config::for_tests().with_vendor_token("folotoy", "secret-abc") + } + + #[test] + fn valid_bearer_resolves_vendor_and_actor() { + let ctx = authenticate(&cfg(), Some("Bearer secret-abc"), Some("0xdeadbeef")).unwrap(); + assert_eq!(ctx.vendor_id, "folotoy"); + assert_eq!(ctx.actor_omni, "0xdeadbeef"); + } + + #[test] + fn missing_actor_header_defaults_to_star() { + let ctx = authenticate(&cfg(), Some("Bearer secret-abc"), None).unwrap(); + assert_eq!(ctx.actor_omni, "*"); + } + + #[test] + fn wrong_token_rejected() { + let err = authenticate(&cfg(), Some("Bearer nope"), None).unwrap_err(); + assert!(matches!(err, GateError::Unauthorized(_))); + } + + #[test] + fn missing_bearer_rejected() { + let err = authenticate(&cfg(), None, None).unwrap_err(); + assert!(matches!(err, GateError::Unauthorized(_))); + } + + #[test] + fn anonymous_allowed_when_configured_and_no_tokens() { + let mut c = Config::for_tests(); + c.allow_anonymous = true; + let ctx = authenticate(&c, None, Some("0xabc")).unwrap(); + assert_eq!(ctx.vendor_id, "anonymous"); + assert_eq!(ctx.actor_omni, "0xabc"); + } + + #[test] + fn no_tokens_and_not_anonymous_is_unauthorized() { + let err = authenticate(&Config::for_tests(), Some("Bearer x"), None).unwrap_err(); + assert!(matches!(err, GateError::Unauthorized(_))); + } +} diff --git a/crates/agentkeys-gate/src/backend.rs b/crates/agentkeys-gate/src/backend.rs new file mode 100644 index 00000000..5669f721 --- /dev/null +++ b/crates/agentkeys-gate/src/backend.rs @@ -0,0 +1,64 @@ +//! Backend seam — the broker/worker RPCs the gate drives in-path. +//! +//! Like the MCP server (issue #203/#207), the gate owns NO persistent state and +//! re-types NO wire body. The production backend IS the shared +//! [`agentkeys_backend_client::BackendClient`]; the `Backend` trait is impl'd +//! directly on it. The trait survives as the test seam — `tests/common`'s mock +//! is its second impl, keeping the gate's unit tests offline + deterministic. + +use async_trait::async_trait; + +use agentkeys_backend_client::BackendClient; + +// One owner for every broker/worker wire shape (issue #203). Re-exported so the +// gate's modules import `crate::backend::*`. +pub use agentkeys_backend_client::{ + AuditAppendInput, AuditAppendResult, BackendError, CapMintOp, CapMintRequest, CapToken, + MemoryGetInput, MemoryGetResult, +}; + +/// The three broker/worker calls the in-path gate makes per turn: +/// mint a memory-read cap, read the namespace, append the gate-turn audit row. +#[async_trait] +pub trait Backend: Send + Sync { + async fn cap_mint( + &self, + op: CapMintOp, + req: CapMintRequest, + session_bearer: &str, + ) -> Result; + + async fn memory_get(&self, input: MemoryGetInput) -> Result; + + async fn audit_append( + &self, + input: AuditAppendInput, + ) -> Result; +} + +// The production backend IS the shared client (issue #203 — the ONE owner of the +// cap-mint -> STS relay -> worker chain). Each method delegates to +// `BackendClient`'s inherent method of the same name; inherent methods win in +// resolution, so there is no recursion. +#[async_trait] +impl Backend for BackendClient { + async fn cap_mint( + &self, + op: CapMintOp, + req: CapMintRequest, + session_bearer: &str, + ) -> Result { + self.cap_mint(op, req, session_bearer).await + } + + async fn memory_get(&self, input: MemoryGetInput) -> Result { + self.memory_get(input).await + } + + async fn audit_append( + &self, + input: AuditAppendInput, + ) -> Result { + self.audit_append(input).await + } +} diff --git a/crates/agentkeys-gate/src/config.rs b/crates/agentkeys-gate/src/config.rs new file mode 100644 index 00000000..9649d871 --- /dev/null +++ b/crates/agentkeys-gate/src/config.rs @@ -0,0 +1,288 @@ +//! Runtime configuration — CLI flags + env vars, read ONCE at startup and +//! treated as immutable (the `from_env`/inject pattern; AGENTS.md bans +//! `std::env::set_var` in tests, so `Config::for_tests` builds the struct +//! directly). + +use std::collections::HashMap; +use std::net::SocketAddr; + +use clap::Parser; + +/// Default memory namespaces injected per turn — the canonical four +/// (`apps/parent-control/lib/constants.ts`). +const DEFAULT_NAMESPACES: &str = "personal,family,work,travel"; +const DEFAULT_CAP_TTL_SECONDS: u64 = 300; + +#[derive(Parser, Debug, Clone)] +#[command( + name = "agentkeys-gate", + about = "AgentKeys in-path CustomLLM gate — cap-check + memory-inject + audit per LLM turn \ + (Plan A: ai-device-platform.md §3)" +)] +pub struct Cli { + /// HTTP bind address. The OpenAI-compatible endpoint lives at + /// `POST /v1/chat/completions`; health at `GET /healthz`. + #[arg(long, env = "AGENTKEYS_GATE_LISTEN", default_value = "0.0.0.0:8077")] + pub listen: SocketAddr, + + /// Broker base URL (cap-mint). + #[arg(long, env = "AGENTKEYS_BROKER_URL")] + pub broker_url: Option, + + /// Memory worker base URL. + #[arg(long, env = "AGENTKEYS_MEMORY_URL")] + pub memory_url: Option, + + /// Audit worker base URL. + #[arg(long, env = "AGENTKEYS_AUDIT_URL")] + pub audit_url: Option, + + /// Comma-separated `:` pairs the gate accepts. + /// Empty = the gate refuses every request (401) unless + /// `--allow-anonymous`. + #[arg(long, env = "AGENTKEYS_GATE_VENDOR_TOKENS", default_value = "")] + pub vendor_tokens: String, + + /// Accept unauthenticated requests when no vendor tokens are configured. + /// For LOCAL / echo-engine hardware bring-up only — logged loudly at start. + #[arg(long, env = "AGENTKEYS_GATE_ALLOW_ANONYMOUS", default_value_t = false)] + pub allow_anonymous: bool, + + /// Ambient device actor omni used when the request carries no + /// `X-AgentKeys-Actor` header (one-gate-per-device deploys). + #[arg(long, env = "AGENTKEYS_GATE_DEFAULT_ACTOR")] + pub default_actor: Option, + + /// Ambient operator (master) omni for cap-mint binding. + #[arg(long, env = "AGENTKEYS_GATE_DEFAULT_OPERATOR_OMNI")] + pub default_operator_omni: Option, + + /// Ambient device-key hash for cap-mint binding (used only when no K10 + /// device key is loaded; see `AGENTKEYS_DEVICE_KEY_FILE`). + #[arg(long, env = "AGENTKEYS_GATE_DEFAULT_DEVICE_KEY_HASH")] + pub default_device_key_hash: Option, + + /// Agent session JWT (omni == the device actor) the gate forwards to the + /// broker for cap-mint + the per-actor STS relay. + #[arg(long, env = "AGENTKEYS_GATE_AGENT_SESSION_BEARER")] + pub agent_session_bearer: Option, + + /// Owner-only file (0600) holding the agent session JWT — preferred over the + /// inline flag so the JWT never rides the process list. + #[arg(long, env = "AGENTKEYS_GATE_AGENT_SESSION_BEARER_FILE")] + pub agent_session_bearer_file: Option, + + /// Memory IAM role ARN the memory worker assumes via web-identity (per-actor + /// S3 isolation). + #[arg(long, env = "AGENTKEYS_GATE_MEMORY_ROLE_ARN")] + pub memory_role_arn: Option, + + /// AWS region for the STS `AssumeRoleWithWebIdentity` call. + #[arg(long, env = "AWS_REGION", default_value = "us-east-1")] + pub aws_region: String, + + /// Comma-separated memory namespaces injected per turn. + #[arg(long, env = "AGENTKEYS_GATE_MEMORY_NAMESPACES", default_value = DEFAULT_NAMESPACES)] + pub memory_namespaces: String, + + /// Memory selection engine for injection: `passthrough` (default) or + /// `lexical`. Deterministic, no LLM in the gate path. + #[arg(long, env = "AGENTKEYS_MEMORY_ENGINE", default_value = "passthrough")] + pub memory_engine: String, + + /// TTL (seconds) for the per-turn memory-read caps. + #[arg(long, env = "AGENTKEYS_GATE_CAP_TTL_SECONDS", default_value_t = DEFAULT_CAP_TTL_SECONDS)] + pub cap_ttl_seconds: u64, + + /// Deny the turn if the gate-turn audit row fails to append (tamper-evident + /// audit is the product). Default off during staged rollout — matches the + /// `AGENTKEYS_WORKER_REQUIRE_AUDIT` posture. + #[arg(long, env = "AGENTKEYS_GATE_REQUIRE_AUDIT", default_value_t = false)] + pub require_audit: bool, + + /// LLM engine: `echo` (local, no upstream — for hardware bring-up) or + /// `openai-compatible` (Volcano RTC CustomLLM → Doubao or any + /// `/v1/chat/completions` upstream). + #[arg(long, env = "AGENTKEYS_GATE_ENGINE", default_value = "echo")] + pub engine: String, + + /// Upstream base URL for `--engine openai-compatible` (e.g. + /// `https://ark.cn-beijing.volces.com/api/v3`). + #[arg(long, env = "AGENTKEYS_GATE_ENGINE_BASE_URL")] + pub engine_base_url: Option, + + /// Upstream API key for `--engine openai-compatible`. + #[arg(long, env = "AGENTKEYS_GATE_ENGINE_API_KEY")] + pub engine_api_key: Option, + + /// Owner-only file (0600) holding the upstream API key — preferred over the + /// inline flag. + #[arg(long, env = "AGENTKEYS_GATE_ENGINE_API_KEY_FILE")] + pub engine_api_key_file: Option, + + /// Force a specific upstream model (Doubao SKU). When unset the caller's + /// `model` field is forwarded. + #[arg(long, env = "AGENTKEYS_GATE_ENGINE_MODEL")] + pub engine_model: Option, +} + +/// Which LLM engine the gate delegates the turn to. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum EngineKind { + Echo, + OpenAiCompatible { + base_url: String, + api_key: String, + model: Option, + }, +} + +#[derive(Debug, Clone)] +pub struct Config { + pub listen: SocketAddr, + pub broker_url: Option, + pub memory_url: Option, + pub audit_url: Option, + /// vendor_id -> bearer_token + pub vendor_tokens: HashMap, + pub allow_anonymous: bool, + pub default_actor: Option, + pub default_operator_omni: Option, + pub default_device_key_hash: Option, + pub agent_session_bearer: Option, + pub memory_role_arn: Option, + pub aws_region: String, + pub memory_namespaces: Vec, + pub memory_engine: String, + pub cap_ttl_seconds: u64, + pub require_audit: bool, + pub engine: EngineKind, +} + +fn read_secret_file(path: &str, what: &str) -> anyhow::Result> { + let raw = std::fs::read_to_string(path) + .map_err(|e| anyhow::anyhow!("read {what} file {path}: {e}"))?; + let trimmed = raw.trim().to_string(); + Ok(if trimmed.is_empty() { + None + } else { + Some(trimmed) + }) +} + +fn parse_csv(raw: &str) -> Vec { + raw.split(',') + .map(str::trim) + .filter(|s| !s.is_empty()) + .map(str::to_string) + .collect() +} + +impl Config { + pub fn from_cli(cli: Cli) -> anyhow::Result { + let mut vendor_tokens = HashMap::new(); + for pair in cli + .vendor_tokens + .split(',') + .filter(|s| !s.trim().is_empty()) + { + let (vendor, token) = pair + .split_once(':') + .ok_or_else(|| anyhow::anyhow!("malformed vendor_token entry: {pair}"))?; + vendor_tokens.insert(vendor.trim().to_string(), token.trim().to_string()); + } + + // Prefer the owner-only bearer FILE over an inline bearer (keeps the JWT + // off the process list), same posture as the MCP server. + let agent_session_bearer = match cli.agent_session_bearer { + Some(b) => Some(b), + None => match cli.agent_session_bearer_file.as_deref() { + Some(path) => read_secret_file(path, "agent-session-bearer")?, + None => None, + }, + }; + + let engine = match cli.engine.as_str() { + "echo" => EngineKind::Echo, + "openai-compatible" | "openai_compatible" => { + let base_url = cli.engine_base_url.ok_or_else(|| { + anyhow::anyhow!( + "--engine openai-compatible requires --engine-base-url \ + (env AGENTKEYS_GATE_ENGINE_BASE_URL)" + ) + })?; + let api_key = match cli.engine_api_key { + Some(k) => Some(k), + None => match cli.engine_api_key_file.as_deref() { + Some(path) => read_secret_file(path, "engine-api-key")?, + None => None, + }, + } + .ok_or_else(|| { + anyhow::anyhow!( + "--engine openai-compatible requires --engine-api-key or \ + --engine-api-key-file" + ) + })?; + EngineKind::OpenAiCompatible { + base_url, + api_key, + model: cli.engine_model, + } + } + other => anyhow::bail!("unknown engine `{other}` (expected echo|openai-compatible)"), + }; + + let memory_namespaces = parse_csv(&cli.memory_namespaces); + + Ok(Self { + listen: cli.listen, + broker_url: cli.broker_url, + memory_url: cli.memory_url, + audit_url: cli.audit_url, + vendor_tokens, + allow_anonymous: cli.allow_anonymous, + default_actor: cli.default_actor, + default_operator_omni: cli.default_operator_omni, + default_device_key_hash: cli.default_device_key_hash, + agent_session_bearer, + memory_role_arn: cli.memory_role_arn, + aws_region: cli.aws_region, + memory_namespaces, + memory_engine: cli.memory_engine, + cap_ttl_seconds: cli.cap_ttl_seconds, + require_audit: cli.require_audit, + engine, + }) + } + + /// Test builder — no parsing, no env reads. Echo engine, no namespaces, + /// caller injects the rest. + pub fn for_tests() -> Self { + Self { + listen: "127.0.0.1:0".parse().unwrap(), + broker_url: None, + memory_url: None, + audit_url: None, + vendor_tokens: HashMap::new(), + allow_anonymous: false, + default_actor: None, + default_operator_omni: None, + default_device_key_hash: None, + agent_session_bearer: None, + memory_role_arn: None, + aws_region: "us-east-1".to_string(), + memory_namespaces: Vec::new(), + memory_engine: "passthrough".to_string(), + cap_ttl_seconds: DEFAULT_CAP_TTL_SECONDS, + require_audit: false, + engine: EngineKind::Echo, + } + } + + pub fn with_vendor_token(mut self, vendor: &str, token: &str) -> Self { + self.vendor_tokens + .insert(vendor.to_string(), token.to_string()); + self + } +} diff --git a/crates/agentkeys-gate/src/engine.rs b/crates/agentkeys-gate/src/engine.rs new file mode 100644 index 00000000..ec11f21b --- /dev/null +++ b/crates/agentkeys-gate/src/engine.rs @@ -0,0 +1,214 @@ +//! The engine seam (Plan A piece 5) — the ONLY swappable part of the platform. +//! +//! The gate owns identity · cap · memory-inject · audit (the red line). The +//! engine owns just the LLM turn: "given an OpenAI request, return an OpenAI +//! response from the upstream." Plan A ships ONE engine — an OpenAI-compatible +//! HTTP upstream (Volcano RTC CustomLLM → Doubao). Plan B adds Bedrock/Anthropic, +//! BytePlus, Qwen, and the self-host runtime behind this SAME trait, so B = A + +//! engines with no rewrite. +//! +//! The red-line invariant is structural: an `EngineAdapter` is handed only the +//! (already memory-injected, already authorized) request + a base URL + an API +//! key. It never sees K3/K10/K11, cap-tokens, the actor identity, or the audit +//! trail — those live at the broker + worker + gate, unreachable from a swapped +//! engine. + +use std::time::{SystemTime, UNIX_EPOCH}; + +use async_trait::async_trait; + +use crate::openai::{ChatCompletionRequest, ChatCompletionResponse, ChatMessage, Choice, Usage}; + +#[derive(thiserror::Error, Debug)] +pub enum EngineError { + #[error("engine not configured: {0}")] + NotConfigured(&'static str), + #[error("upstream HTTP error ({status}): {body}")] + Http { status: u16, body: String }, + #[error("upstream transport error: {0}")] + Transport(String), + #[error("upstream response parse error: {0}")] + Parse(String), +} + +/// The pluggable LLM engine. Implementations call exactly one upstream and +/// return its completion — no policy, no memory, no audit (those are the gate's). +#[async_trait] +pub trait EngineAdapter: Send + Sync { + /// Stable name for audit provenance (`"openai-compatible"`, `"echo"`, …). + fn name(&self) -> &'static str; + + /// Run one completion. `req.messages` already carries any gate-injected + /// memory system message; the engine forwards it verbatim. + async fn complete( + &self, + req: ChatCompletionRequest, + ) -> Result; +} + +fn now_unix() -> u64 { + SystemTime::now() + .duration_since(UNIX_EPOCH) + .map(|d| d.as_secs()) + .unwrap_or(0) +} + +/// Plan-A engine: an OpenAI-compatible upstream (Volcano RTC CustomLLM → Doubao, +/// or any `/v1/chat/completions` server). `base_url` is the upstream root (e.g. +/// `https://ark.cn-beijing.volces.com/api/v3`); the adapter appends +/// `/chat/completions`. +pub struct OpenAiCompatibleEngine { + client: reqwest::Client, + base_url: String, + api_key: String, + /// Optional model override. When `None` the caller's `req.model` is used — + /// so the vendor picks the Doubao SKU. + model: Option, +} + +impl OpenAiCompatibleEngine { + pub fn new(base_url: String, api_key: String, model: Option) -> Self { + Self { + client: reqwest::Client::new(), + base_url: base_url.trim_end_matches('/').to_string(), + api_key, + model, + } + } +} + +#[async_trait] +impl EngineAdapter for OpenAiCompatibleEngine { + fn name(&self) -> &'static str { + "openai-compatible" + } + + async fn complete( + &self, + mut req: ChatCompletionRequest, + ) -> Result { + if self.api_key.is_empty() { + return Err(EngineError::NotConfigured("engine api key")); + } + if let Some(model) = &self.model { + req.model = model.clone(); + } + // The gate never streams to the upstream — the per-turn cap/audit work is + // turn-granular, so a single completion is requested even if the caller + // asked to stream. + req.stream = false; + + let url = format!("{}/chat/completions", self.base_url); + let resp = self + .client + .post(&url) + .bearer_auth(&self.api_key) + .json(&req) + .send() + .await + .map_err(|e| EngineError::Transport(e.to_string()))?; + + if !resp.status().is_success() { + let status = resp.status().as_u16(); + let body = resp.text().await.unwrap_or_default(); + return Err(EngineError::Http { status, body }); + } + + resp.json::() + .await + .map_err(|e| EngineError::Parse(e.to_string())) + } +} + +/// Local engine that echoes the conversation instead of calling an upstream LLM. +/// +/// This is NOT a test stub — it lets the whole in-path gate (device → Volcano +/// RTC → gate → response) run on real hardware BEFORE Doubao credentials exist, +/// which is exactly Plan A §3's "prove the gate in-path on real hardware" goal. +/// It surfaces the injected memory in its reply so an operator can SEE that the +/// gate read + injected the user's memory. +pub struct EchoEngine; + +#[async_trait] +impl EngineAdapter for EchoEngine { + fn name(&self) -> &'static str { + "echo" + } + + async fn complete( + &self, + req: ChatCompletionRequest, + ) -> Result { + let injected = req + .messages + .iter() + .find(|m| m.role == "system") + .map(ChatMessage::text) + .filter(|s| !s.is_empty()); + let user = req + .messages + .iter() + .rev() + .find(|m| m.role == "user") + .map(ChatMessage::text) + .unwrap_or_default(); + + let reply = match injected { + Some(mem) => format!("[echo] You said: {user}\n[echo] Using your memory:\n{mem}"), + None => format!("[echo] You said: {user}"), + }; + + Ok(ChatCompletionResponse { + id: "echo-0".to_string(), + object: "chat.completion".to_string(), + created: now_unix(), + model: req.model, + choices: vec![Choice { + index: 0, + message: ChatMessage::new("assistant", reply), + finish_reason: Some("stop".to_string()), + }], + usage: Usage::default(), + extra: serde_json::Map::new(), + }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::openai::ChatMessage; + + #[tokio::test] + async fn echo_surfaces_injected_memory() { + let mut req = ChatCompletionRequest { + model: "m".into(), + messages: vec![ChatMessage::new("user", "what do you know about me?")], + stream: false, + extra: serde_json::Map::new(), + }; + req.prepend_system("Allergic to peanuts."); + let resp = EchoEngine.complete(req).await.unwrap(); + let text = resp.choices[0].message.text(); + assert!( + text.contains("peanuts"), + "echo reflects injected memory: {text}" + ); + assert!(text.contains("what do you know about me?")); + } + + #[tokio::test] + async fn openai_engine_without_key_is_not_configured() { + let engine = OpenAiCompatibleEngine::new("http://localhost:1".into(), "".into(), None); + let req = ChatCompletionRequest { + model: "m".into(), + messages: vec![ChatMessage::new("user", "hi")], + stream: false, + extra: serde_json::Map::new(), + }; + assert!(matches!( + engine.complete(req).await, + Err(EngineError::NotConfigured(_)) + )); + } +} diff --git a/crates/agentkeys-gate/src/error.rs b/crates/agentkeys-gate/src/error.rs new file mode 100644 index 00000000..036fcefe --- /dev/null +++ b/crates/agentkeys-gate/src/error.rs @@ -0,0 +1,82 @@ +//! Gate error type. Library errors use `thiserror` (AGENTS.md convention); the +//! HTTP layer maps each variant to an OpenAI-shaped error envelope + status. +//! +//! The split mirrors the "no silent fallback" rule: a broken backend path +//! (broker/worker unreachable or 5xx) is a LOUD `Backend` error surfaced as 502, +//! never a quiet "pretend memory was empty". A per-namespace authorization deny +//! (HTTP 403) is NOT an error here — it is a recorded, non-fatal outcome handled +//! in `gate::inject_memory`. + +use crate::openai::ApiError; + +#[derive(thiserror::Error, Debug)] +pub enum GateError { + /// Inbound request rejected before any backend work (bad bearer / missing + /// vendor token). + #[error("unauthorized: {0}")] + Unauthorized(String), + + /// The caller is authenticated but not allowed (e.g. actor mismatch). + #[error("forbidden: {0}")] + Forbidden(String), + + /// Malformed OpenAI request the gate can't act on. + #[error("bad request: {0}")] + BadRequest(String), + + /// A required identity field (actor / operator / device-key-hash) was + /// neither supplied per-request nor configured. A deploy/config error. + #[error("identity not resolved: {0}")] + Identity(String), + + /// The broker/worker chain is broken (unreachable, 5xx, parse, or not + /// configured). Surfaced loud — we do NOT degrade silently past a broken + /// gate path. + #[error("backend error: {0}")] + Backend(String), + + /// The upstream LLM engine failed (Doubao/Volcano unreachable or errored). + #[error("engine error: {0}")] + Engine(String), + + /// Audit append failed AND `AGENTKEYS_GATE_REQUIRE_AUDIT=1` is set — the + /// turn is denied because it could not be recorded (tamper-evident audit is + /// the product). + #[error("audit error: {0}")] + Audit(String), + + #[error("internal error: {0}")] + Internal(String), +} + +impl GateError { + /// HTTP status for this error. + pub fn status(&self) -> u16 { + match self { + GateError::Unauthorized(_) => 401, + GateError::Forbidden(_) => 403, + GateError::BadRequest(_) => 400, + GateError::Identity(_) => 500, + GateError::Backend(_) | GateError::Engine(_) => 502, + GateError::Audit(_) | GateError::Internal(_) => 500, + } + } + + /// The OpenAI `error.type` discriminator a vendor's OpenAI client expects. + fn openai_type(&self) -> &'static str { + match self { + GateError::Unauthorized(_) => "authentication_error", + GateError::Forbidden(_) => "permission_error", + GateError::BadRequest(_) => "invalid_request_error", + GateError::Backend(_) | GateError::Engine(_) => "upstream_error", + GateError::Identity(_) | GateError::Audit(_) | GateError::Internal(_) => "api_error", + } + } + + /// Render as the OpenAI error envelope returned in the HTTP body. + pub fn to_api_error(&self) -> ApiError { + ApiError::new(self.openai_type(), self.to_string()) + } +} + +pub type GateResult = Result; diff --git a/crates/agentkeys-gate/src/gate.rs b/crates/agentkeys-gate/src/gate.rs new file mode 100644 index 00000000..7e1d9389 --- /dev/null +++ b/crates/agentkeys-gate/src/gate.rs @@ -0,0 +1,383 @@ +//! The in-path gate (Plan A piece 2). One LLM turn = +//! +//! resolve identity -> cap-check + memory-inject (per namespace) -> +//! engine completion -> gate-turn audit. +//! +//! Reuses the shipped backend verbatim: each namespace is read by minting a +//! broker `memory-get` cap (`CapMintOp::MemoryGet`, service `memory:`) and +//! calling the memory worker — the SAME path the MCP `memory.get` tool uses, so +//! per-namespace cap isolation + the worker-side memory audit (#229) hold +//! unchanged. The gate then appends ONE additional `GateTurn` audit row so a +//! turn is on the ledger even when it injected no memory. + +use std::sync::Arc; + +use agentkeys_core::audit::AuditOpKind; +use agentkeys_memory_engine::{engine_from_name, select_blob, MemoryEngine, SelectionBudget}; +use agentkeys_protocol::{normalize_omni_0x, service_memory}; +use base64::Engine as _; +use serde_json::json; + +use crate::auth::CallerContext; +use crate::backend::{Backend, BackendError, CapMintOp, CapMintRequest, MemoryGetInput}; +use crate::config::Config; +use crate::engine::EngineAdapter; +use crate::error::{GateError, GateResult}; +use crate::openai::ChatCompletionRequest; + +/// Canonical audit result byte (`agentkeys_core::audit` — Success = 0). +const AUDIT_RESULT_SUCCESS: u8 = 0; +/// Cap the audited intent text so a huge prompt can't bloat the envelope. +const MAX_INTENT_CHARS: usize = 1024; +/// OpenAI tool-calling fields the gate refuses to forward (it does not yet +/// enforce tool routing — forwarding them would bypass AgentKeys enforcement at +/// the upstream). See `docs/plan/shared-user-memory-delegation.md` §4. +const UNSUPPORTED_TOOL_FIELDS: &[&str] = &["tools", "tool_choice", "functions", "function_call"]; + +/// Resolved per-turn identity (all normalized to `0x`-omni shape). +struct Identity { + operator_omni: String, + actor_omni: String, + device_key_hash: String, +} + +/// What happened for one namespace's memory read this turn — recorded in the +/// audit row. `Denied`/`Empty` are non-fatal; a broken backend is a hard error +/// raised before any outcome is recorded. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum NamespaceOutcome { + Injected(usize), + Empty, + Denied, +} + +impl NamespaceOutcome { + fn label(self) -> &'static str { + match self { + NamespaceOutcome::Injected(_) => "injected", + NamespaceOutcome::Empty => "empty", + NamespaceOutcome::Denied => "denied", + } + } +} + +/// The gate: shared, immutable, cloneable handles wired once at startup. +pub struct Gate { + config: Config, + backend: Arc, + llm: Arc, + memory_selector: Box, + memory_budget: SelectionBudget, +} + +impl Gate { + /// Production constructor — builds the memory-selection engine + budget from + /// config/env. + pub fn new(config: Config, backend: Arc, llm: Arc) -> Self { + let memory_selector = engine_from_name(&config.memory_engine); + let memory_budget = SelectionBudget::from_env(); + Self { + config, + backend, + llm, + memory_selector, + memory_budget, + } + } + + pub fn config(&self) -> &Config { + &self.config + } + + /// Handle one OpenAI `/v1/chat/completions` turn through the gate. + pub async fn handle_chat_completion( + &self, + caller: &CallerContext, + mut req: ChatCompletionRequest, + ) -> GateResult { + if req.messages.is_empty() { + return Err(GateError::BadRequest("messages must not be empty".into())); + } + // Tool-calling is NOT gate-enforced in the MVP. Forwarding `tools` / + // `tool_choice` / `functions` to the upstream would let the model call + // tools DIRECTLY at the upstream, outside AgentKeys cap/audit enforcement + // (the "in-path" property would silently leak). Refuse rather than + // forward — gate-enforced, worker-backed tool routing is a follow-up. + if let Some(field) = UNSUPPORTED_TOOL_FIELDS + .iter() + .find(|f| req.extra.contains_key(**f)) + { + return Err(GateError::BadRequest(format!( + "`{field}` is not supported: the gate does not yet enforce tool-calling, so it \ + refuses to forward it to the upstream (that would bypass AgentKeys enforcement). \ + Remove it, or use a worker-backed action path." + ))); + } + let ident = self.resolve_identity(caller)?; + let intent = truncate(&req.last_user_text(), MAX_INTENT_CHARS); + + // ── cap-check + memory-inject ─────────────────────────────────────── + let (context, outcomes) = self.inject_memory(&ident).await?; + if let Some(ctx) = context { + req.prepend_system(ctx); + } + + // ── engine turn ───────────────────────────────────────────────────── + // Log the full upstream error for the operator, but return only the + // safe category to the vendor caller — never echo an upstream response + // body across the trust boundary. + let response = self.llm.complete(req).await.map_err(|e| { + tracing::warn!(error = %e, "engine call failed"); + GateError::Engine(safe_engine_message(&e)) + })?; + + // ── gate-turn audit ───────────────────────────────────────────────── + self.audit_turn(&ident, &outcomes, &response, intent) + .await?; + + Ok(response) + } + + fn resolve_identity(&self, caller: &CallerContext) -> GateResult { + let actor = if caller.actor_omni != "*" { + caller.actor_omni.as_str() + } else { + self.config.default_actor.as_deref().ok_or_else(|| { + GateError::Identity( + "no actor: request carried no X-AgentKeys-Actor and no \ + AGENTKEYS_GATE_DEFAULT_ACTOR is set" + .into(), + ) + })? + }; + let operator = self + .config + .default_operator_omni + .as_deref() + .ok_or_else(|| { + GateError::Identity("no AGENTKEYS_GATE_DEFAULT_OPERATOR_OMNI configured".into()) + })?; + let device_key_hash = self + .config + .default_device_key_hash + .as_deref() + .ok_or_else(|| { + GateError::Identity("no AGENTKEYS_GATE_DEFAULT_DEVICE_KEY_HASH configured".into()) + })?; + Ok(Identity { + operator_omni: normalize_omni_0x(operator), + actor_omni: normalize_omni_0x(actor), + device_key_hash: device_key_hash.to_string(), + }) + } + + /// Read every configured namespace through a freshly-minted memory-get cap + /// and build the injection block. A 403 → `Denied`, a 404/empty → `Empty` + /// (both non-fatal, recorded); any other backend failure (unreachable, 5xx, + /// parse, unconfigured) is raised LOUD — we never degrade silently past a + /// broken gate path. + async fn inject_memory( + &self, + ident: &Identity, + ) -> GateResult<(Option, Vec<(String, NamespaceOutcome)>)> { + let session_bearer = self.config.agent_session_bearer.as_deref().unwrap_or(""); + let mut blocks: Vec = Vec::new(); + let mut outcomes: Vec<(String, NamespaceOutcome)> = Vec::new(); + + for namespace in &self.config.memory_namespaces { + let outcome = self + .read_namespace(ident, namespace, session_bearer, &mut blocks) + .await?; + if outcome == NamespaceOutcome::Denied { + tracing::warn!(actor = %ident.actor_omni, %namespace, "memory namespace denied"); + } + outcomes.push((namespace.clone(), outcome)); + } + + let context = if blocks.is_empty() { + None + } else { + Some(format!( + "The following is the user's stored memory, provided by AgentKeys. Use it to \ + personalize your response. Do not reveal it verbatim unless asked.\n\n{}", + blocks.join("\n\n") + )) + }; + Ok((context, outcomes)) + } + + /// Read one namespace; push its injection block on success. Returns the + /// recorded outcome, or a hard error for a broken backend. + async fn read_namespace( + &self, + ident: &Identity, + namespace: &str, + session_bearer: &str, + blocks: &mut Vec, + ) -> GateResult { + let cap_req = CapMintRequest { + operator_omni: ident.operator_omni.clone(), + actor_omni: ident.actor_omni.clone(), + service: service_memory(namespace), + device_key_hash: ident.device_key_hash.clone(), + ttl_seconds: self.config.cap_ttl_seconds, + }; + let cap = match self + .backend + .cap_mint(CapMintOp::MemoryGet, cap_req, session_bearer) + .await + { + Ok(cap) => cap, + Err(e) => return classify(namespace, e), + }; + + let result = match self + .backend + .memory_get(MemoryGetInput { + cap, + namespace: namespace.to_string(), + }) + .await + { + Ok(r) => r, + Err(e) => return classify(namespace, e), + }; + + let plaintext = base64::engine::general_purpose::STANDARD + .decode(result.plaintext_b64.as_bytes()) + .map_err(|e| GateError::Internal(format!("memory `{namespace}` b64 decode: {e}")))?; + let text = String::from_utf8(plaintext) + .map_err(|e| GateError::Internal(format!("memory `{namespace}` utf8: {e}")))?; + + let selected = select_blob( + self.memory_selector.as_ref(), + None, + &text, + &self.memory_budget, + ); + if selected.trim().is_empty() { + return Ok(NamespaceOutcome::Empty); + } + let bytes = selected.len(); + blocks.push(format!("## {}\n{}", service_memory(namespace), selected)); + Ok(NamespaceOutcome::Injected(bytes)) + } + + /// Append the `GateTurn` audit row. Best-effort by default (logged on + /// failure); hard-fails only when `require_audit` is set. + async fn audit_turn( + &self, + ident: &Identity, + outcomes: &[(String, NamespaceOutcome)], + response: &crate::openai::ChatCompletionResponse, + intent: String, + ) -> GateResult<()> { + let injected_bytes: usize = outcomes + .iter() + .map(|(_, o)| match o { + NamespaceOutcome::Injected(b) => *b, + _ => 0, + }) + .sum(); + let namespaces: Vec<_> = outcomes + .iter() + .map(|(ns, o)| json!({ "namespace": ns, "outcome": o.label() })) + .collect(); + let op_body = json!({ + "engine": self.llm.name(), + "model": response.model, + "namespaces": namespaces, + "injected_bytes": injected_bytes, + "prompt_tokens": response.usage.prompt_tokens, + "completion_tokens": response.usage.completion_tokens, + }); + + let input = crate::backend::AuditAppendInput { + operator_omni: ident.operator_omni.clone(), + actor_omni: ident.actor_omni.clone(), + op_kind: AuditOpKind::GateTurn as u8, + op_body, + result: AUDIT_RESULT_SUCCESS, + intent_text: if intent.is_empty() { + None + } else { + Some(intent) + }, + }; + + match self.backend.audit_append(input).await { + Ok(r) => { + tracing::info!( + actor = %ident.actor_omni, + envelope_hash = %r.envelope_hash, + "gate-turn audited" + ); + Ok(()) + } + Err(e) if self.config.require_audit => { + Err(GateError::Audit(format!("gate-turn audit failed: {e}"))) + } + Err(e) => { + tracing::warn!( + actor = %ident.actor_omni, + error = %e, + "gate-turn audit append failed (non-fatal; set \ + AGENTKEYS_GATE_REQUIRE_AUDIT=1 to enforce)" + ); + Ok(()) + } + } + } +} + +/// Map a per-namespace backend error to a recorded outcome (403 → denied, +/// 404 → empty) or a hard `Backend` error for anything that signals the gate +/// path itself is broken. +fn classify(namespace: &str, err: BackendError) -> GateResult { + match err { + BackendError::Http { status: 403, .. } => Ok(NamespaceOutcome::Denied), + BackendError::Http { status: 404, .. } => Ok(NamespaceOutcome::Empty), + other => { + // Log the full error (including any upstream body) for the operator, + // but return only the safe category to the vendor caller — never + // echo a broker/worker response body across the trust boundary. + tracing::warn!(%namespace, error = %other, "memory backend failure"); + Err(GateError::Backend(format!( + "memory namespace `{namespace}`: {}", + safe_backend_message(&other) + ))) + } + } +} + +/// User-facing rendering of a backend error — status/category only, never the +/// upstream response body (which may carry first-party internal detail). The +/// full error is logged for the operator at the call site. +fn safe_backend_message(err: &BackendError) -> String { + match err { + BackendError::Http { status, .. } => format!("backend HTTP {status}"), + BackendError::Transport(_) => "backend transport error".to_string(), + BackendError::Parse(_) => "backend response parse error".to_string(), + BackendError::NotConfigured(what) => format!("backend not configured: {what}"), + } +} + +/// User-facing rendering of an engine error — status/category only, never the +/// upstream LLM response body. +fn safe_engine_message(err: &crate::engine::EngineError) -> String { + use crate::engine::EngineError; + match err { + EngineError::Http { status, .. } => format!("upstream HTTP {status}"), + EngineError::Transport(_) => "upstream transport error".to_string(), + EngineError::Parse(_) => "upstream response parse error".to_string(), + EngineError::NotConfigured(what) => format!("engine not configured: {what}"), + } +} + +fn truncate(s: &str, max: usize) -> String { + if s.chars().count() <= max { + s.to_string() + } else { + s.chars().take(max).collect() + } +} diff --git a/crates/agentkeys-gate/src/lib.rs b/crates/agentkeys-gate/src/lib.rs new file mode 100644 index 00000000..531accf3 --- /dev/null +++ b/crates/agentkeys-gate/src/lib.rs @@ -0,0 +1,33 @@ +//! `agentkeys-gate` — the in-path CustomLLM gate (Plan A, `ai-device-platform.md` +//! §3, pieces 2 + 5). +//! +//! An OpenAI-compatible `/v1/chat/completions` endpoint that Volcano RTC's +//! CustomLLM mode posts to. Per turn it does, in order: +//! +//! 1. **cap-check + memory-inject** — mint a broker `memory-get` cap per +//! namespace (`service = memory:`) and read it via the memory worker, +//! then inject the selected lines as a leading `system` message; +//! 2. **engine completion** — forward the (now memory-augmented) request to a +//! swappable [`engine::EngineAdapter`] (Plan A: an OpenAI-compatible upstream +//! = Volcano → Doubao; or the local `echo` engine for hardware bring-up); +//! 3. **audit** — append a `GateTurn` row so every gated turn is on the ledger. +//! +//! It owns NO new wire shape and NO persistent state: identity · cap · memory · +//! audit all route through the shipped [`agentkeys_backend_client::BackendClient`] +//! (the red line — none of those ever live inside the swappable engine). +//! +//! `A ⊂ B`: the [`engine::EngineAdapter`] seam is the only swappable part, so +//! Plan B (self-host runtime, more model vendors) is "A + engines", not a rewrite. + +pub mod auth; +pub mod backend; +pub mod config; +pub mod engine; +pub mod error; +pub mod gate; +pub mod openai; +pub mod server; + +pub use config::{Config, EngineKind}; +pub use error::{GateError, GateResult}; +pub use gate::Gate; diff --git a/crates/agentkeys-gate/src/main.rs b/crates/agentkeys-gate/src/main.rs new file mode 100644 index 00000000..6df52d4a --- /dev/null +++ b/crates/agentkeys-gate/src/main.rs @@ -0,0 +1,87 @@ +//! Entry point — parse CLI, build the backend + engine, run the gate's HTTP +//! server. + +use std::sync::Arc; + +use clap::Parser; + +use agentkeys_backend_client::BackendClient; +use agentkeys_gate::{ + backend::Backend, + config::{Cli, Config, EngineKind}, + engine::{EchoEngine, EngineAdapter, OpenAiCompatibleEngine}, + gate::Gate, + server, +}; + +#[tokio::main] +async fn main() -> anyhow::Result<()> { + // rustls 0.23 needs a process-level CryptoProvider before any HTTPS work + // (the upstream LLM call via reqwest rustls-tls). Install `ring` explicitly. + let _ = rustls::crypto::ring::default_provider().install_default(); + + tracing_subscriber::fmt() + .with_env_filter( + tracing_subscriber::EnvFilter::try_from_default_env() + .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("info")), + ) + .with_writer(std::io::stderr) + .init(); + + let cli = Cli::parse(); + let config = Config::from_cli(cli)?; + + if config.allow_anonymous && config.vendor_tokens.is_empty() { + tracing::warn!( + "AGENTKEYS_GATE_ALLOW_ANONYMOUS is set with no vendor tokens — the gate accepts \ + UNAUTHENTICATED requests. Use only for local / echo-engine bring-up." + ); + } + + // The gate never re-types the broker/worker chain: the backend IS the shared + // BackendClient (#203). Load the K10 device key so every cap-mint is + // proof-of-possession-signed (#76); without it cap-mint fails at the broker. + let mut client = BackendClient::new( + config.broker_url.clone(), + config.memory_url.clone(), + config.audit_url.clone(), + None, // cred_url — the gate does not fetch credentials in the MVP + config.agent_session_bearer.clone(), + config.memory_role_arn.clone(), + None, // vault_role_arn — no cred path + config.aws_region.clone(), + ); + match agentkeys_core::device_crypto::load_device_key_from_env() { + Some(dk) => client = client.with_device_key(Arc::new(dk)), + None => tracing::warn!( + "no K10 device key loaded — cap-mint will fail (set AGENTKEYS_DEVICE_KEY_FILE to the \ + device's registered key; issue #76)" + ), + } + let backend: Arc = Arc::new(client); + + let llm: Arc = match config.engine.clone() { + EngineKind::Echo => { + tracing::info!("engine: echo (local — no upstream LLM)"); + Arc::new(EchoEngine) + } + EngineKind::OpenAiCompatible { + base_url, + api_key, + model, + } => { + tracing::info!(%base_url, model = ?model, "engine: openai-compatible upstream"); + Arc::new(OpenAiCompatibleEngine::new(base_url, api_key, model)) + } + }; + + let listen = config.listen; + let gate = Arc::new(Gate::new(config, backend, llm)); + let app = server::router(gate); + + let listener = tokio::net::TcpListener::bind(&listen).await?; + tracing::info!(addr = %listen, "agentkeys-gate listening (HTTP, OpenAI-compatible)"); + axum::serve(listener, app).await?; + + Ok(()) +} diff --git a/crates/agentkeys-gate/src/openai.rs b/crates/agentkeys-gate/src/openai.rs new file mode 100644 index 00000000..12d17377 --- /dev/null +++ b/crates/agentkeys-gate/src/openai.rs @@ -0,0 +1,218 @@ +//! OpenAI-compatible `/v1/chat/completions` wire types — the gate's PUBLIC +//! contract. +//! +//! This is the shape Volcano RTC's **CustomLLM mode** posts to the gate, and +//! (because the Plan-A engine is itself an OpenAI-compatible upstream) the same +//! shape the gate forwards to Doubao. Modelling it once here keeps both +//! directions in lock-step. +//! +//! Unknown fields are preserved via `#[serde(flatten)]` so vendor-specific +//! params (`temperature`, `top_p`, `tools`, …) pass straight through to the +//! upstream — the gate only ever *adds* a memory system message, never drops +//! caller fields. + +use serde::{Deserialize, Serialize}; +use serde_json::{Map, Value}; + +/// One chat message. `role` + `content` are modelled explicitly (the gate reads +/// the role to find the user's intent and injects a `system` message); every +/// other field (`name`, `tool_calls`, `tool_call_id`, …) rides through `extra`. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ChatMessage { + pub role: String, + /// OpenAI content is a string OR an array of typed parts (multimodal), so + /// it stays a `Value`. `null` for assistant tool-call stubs. + #[serde(default, skip_serializing_if = "Value::is_null")] + pub content: Value, + #[serde(flatten)] + pub extra: Map, +} + +impl ChatMessage { + /// Build a plain `{role, content}` message. + pub fn new(role: &str, content: impl Into) -> Self { + Self { + role: role.to_string(), + content: Value::String(content.into()), + extra: Map::new(), + } + } + + /// Flatten `content` to text — handles both the string form and the + /// `[{type:"text", text:"…"}, …]` array form. Non-text parts are skipped. + pub fn text(&self) -> String { + match &self.content { + Value::String(s) => s.clone(), + Value::Array(parts) => parts + .iter() + .filter_map(|p| p.get("text").and_then(Value::as_str)) + .collect::>() + .join(" "), + _ => String::new(), + } + } +} + +/// `/v1/chat/completions` request. `model` + `messages` are modelled; all other +/// sampling params flow through `extra` to the upstream untouched. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ChatCompletionRequest { + pub model: String, + pub messages: Vec, + /// Streaming is NOT supported in the MVP gate (the in-path cap/audit work is + /// per-turn, not per-token). A `stream: true` request is served as a single + /// non-streamed completion; the field is preserved here only so it is not + /// forwarded as `true` to the upstream. + #[serde(default, skip_serializing_if = "is_false")] + pub stream: bool, + #[serde(flatten)] + pub extra: Map, +} + +fn is_false(b: &bool) -> bool { + !*b +} + +impl ChatCompletionRequest { + /// Prepend a `system` message carrying the injected memory context. OpenAI + /// servers honour a leading system block; placing it first keeps it ahead of + /// any caller-supplied system prompt without clobbering it. + pub fn prepend_system(&mut self, content: impl Into) { + self.messages.insert(0, ChatMessage::new("system", content)); + } + + /// The most recent user message's text — the turn's "intent" for the audit + /// row. Empty when the caller sent no user message. + pub fn last_user_text(&self) -> String { + self.messages + .iter() + .rev() + .find(|m| m.role == "user") + .map(ChatMessage::text) + .unwrap_or_default() + } +} + +/// One completion choice. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Choice { + #[serde(default)] + pub index: u32, + pub message: ChatMessage, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub finish_reason: Option, +} + +/// Token accounting. Defaults to zero for engines (e.g. the echo engine) that +/// don't report usage. +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct Usage { + #[serde(default)] + pub prompt_tokens: u64, + #[serde(default)] + pub completion_tokens: u64, + #[serde(default)] + pub total_tokens: u64, +} + +/// `/v1/chat/completions` response. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ChatCompletionResponse { + pub id: String, + #[serde(default = "default_object")] + pub object: String, + pub created: u64, + pub model: String, + pub choices: Vec, + #[serde(default)] + pub usage: Usage, + #[serde(flatten)] + pub extra: Map, +} + +fn default_object() -> String { + "chat.completion".to_string() +} + +/// OpenAI-shaped error envelope returned to the caller on failure, so a vendor's +/// OpenAI client parses gate errors the same way it parses upstream errors. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ApiError { + pub error: ApiErrorBody, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ApiErrorBody { + pub message: String, + #[serde(rename = "type")] + pub kind: String, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub code: Option, +} + +impl ApiError { + pub fn new(kind: &str, message: impl Into) -> Self { + Self { + error: ApiErrorBody { + message: message.into(), + kind: kind.to_string(), + code: None, + }, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn request_roundtrips_and_preserves_unknown_params() { + let raw = r#"{ + "model": "doubao-pro", + "messages": [ + {"role": "system", "content": "be brief"}, + {"role": "user", "content": "where did I go in Chengdu?"} + ], + "temperature": 0.4, + "top_p": 0.9 + }"#; + let req: ChatCompletionRequest = serde_json::from_str(raw).unwrap(); + assert_eq!(req.model, "doubao-pro"); + assert_eq!(req.messages.len(), 2); + assert_eq!(req.last_user_text(), "where did I go in Chengdu?"); + // Unknown sampling params survive a round-trip into `extra`. + let back = serde_json::to_value(&req).unwrap(); + assert_eq!(back["temperature"], 0.4); + assert_eq!(back["top_p"], 0.9); + assert!(back.get("stream").is_none(), "stream omitted when false"); + } + + #[test] + fn prepend_system_goes_first() { + let mut req = ChatCompletionRequest { + model: "m".into(), + messages: vec![ChatMessage::new("user", "hi")], + stream: false, + extra: Map::new(), + }; + req.prepend_system("MEMORY: peanuts allergy"); + assert_eq!(req.messages[0].role, "system"); + assert_eq!(req.messages[0].text(), "MEMORY: peanuts allergy"); + assert_eq!(req.messages[1].role, "user"); + } + + #[test] + fn multimodal_content_text_extraction() { + let m = ChatMessage { + role: "user".into(), + content: serde_json::json!([ + {"type": "text", "text": "hello"}, + {"type": "image_url", "image_url": {"url": "x"}}, + {"type": "text", "text": "world"} + ]), + extra: Map::new(), + }; + assert_eq!(m.text(), "hello world"); + } +} diff --git a/crates/agentkeys-gate/src/server.rs b/crates/agentkeys-gate/src/server.rs new file mode 100644 index 00000000..7ddc7f86 --- /dev/null +++ b/crates/agentkeys-gate/src/server.rs @@ -0,0 +1,74 @@ +//! HTTP transport — the OpenAI-compatible surface Volcano RTC CustomLLM posts +//! to. One route does real work (`POST /v1/chat/completions`); `GET /healthz` is +//! for the load balancer. + +use std::sync::Arc; + +use axum::{ + body::Bytes, + extract::State, + http::{HeaderMap, StatusCode}, + response::{IntoResponse, Response}, + routing::{get, post}, + Json, Router, +}; + +use crate::auth::authenticate; +use crate::error::GateError; +use crate::gate::Gate; +use crate::openai::ChatCompletionRequest; + +/// Build the router with the gate as shared state. +pub fn router(gate: Arc) -> Router { + Router::new() + .route("/healthz", get(healthz)) + .route("/v1/chat/completions", post(chat_completions)) + .with_state(gate) +} + +async fn healthz() -> &'static str { + "ok" +} + +fn header<'a>(headers: &'a HeaderMap, name: &str) -> Option<&'a str> { + headers.get(name).and_then(|v| v.to_str().ok()) +} + +/// Turn any gate error into an OpenAI-shaped error response at the mapped +/// status. +fn error_response(err: GateError) -> Response { + let status = StatusCode::from_u16(err.status()).unwrap_or(StatusCode::INTERNAL_SERVER_ERROR); + (status, Json(err.to_api_error())).into_response() +} + +async fn chat_completions( + State(gate): State>, + headers: HeaderMap, + body: Bytes, +) -> Response { + let caller = match authenticate( + gate.config(), + header(&headers, "authorization"), + header(&headers, "x-agentkeys-actor"), + ) { + Ok(c) => c, + Err(e) => return error_response(e), + }; + + let req: ChatCompletionRequest = match serde_json::from_slice(&body) { + Ok(r) => r, + Err(e) => { + return error_response(GateError::BadRequest(format!( + "invalid chat completion request: {e}" + ))) + } + }; + + match gate.handle_chat_completion(&caller, req).await { + Ok(resp) => (StatusCode::OK, Json(resp)).into_response(), + Err(e) => { + tracing::warn!(vendor = %caller.vendor_id, error = %e, "gate turn failed"); + error_response(e) + } + } +} diff --git a/crates/agentkeys-gate/tests/common/mod.rs b/crates/agentkeys-gate/tests/common/mod.rs new file mode 100644 index 00000000..d576d17e --- /dev/null +++ b/crates/agentkeys-gate/tests/common/mod.rs @@ -0,0 +1,192 @@ +//! Offline test doubles for the gate's `Backend` + `EngineAdapter` seams. No +//! network, deterministic — the gate's unit/integration tests run with these. +//! +//! Shared across test binaries, each of which uses a different subset of the +//! helpers — so dead-code is expected per-binary. +#![allow(dead_code)] + +use std::collections::{HashMap, HashSet}; +use std::sync::Mutex; + +use async_trait::async_trait; +use base64::Engine as _; +use serde_json::json; + +use agentkeys_gate::backend::{ + AuditAppendInput, AuditAppendResult, Backend, BackendError, CapMintOp, CapMintRequest, + CapToken, MemoryGetInput, MemoryGetResult, +}; +use agentkeys_gate::engine::{EngineAdapter, EngineError}; +use agentkeys_gate::openai::{ + ChatCompletionRequest, ChatCompletionResponse, ChatMessage, Choice, Usage, +}; + +/// In-memory broker + memory + audit. Seed namespaces, deny some, or break the +/// whole backend; records cap-mints + audit appends for assertions. +#[derive(Default)] +pub struct MockBackend { + seeded: HashMap<(String, String), String>, + denied: HashSet, + broken: bool, + http_fail: Option<(u16, String)>, + pub cap_mints: Mutex>, + pub audits: Mutex>, +} + +impl MockBackend { + pub fn new() -> Self { + Self::default() + } + + /// Seed `content` for `(actor, namespace)`. + pub fn seed(mut self, actor: &str, namespace: &str, content: &str) -> Self { + self.seeded.insert( + (actor.to_string(), namespace.to_string()), + content.to_string(), + ); + self + } + + /// Make `cap_mint` for this namespace return HTTP 403 (deterministic deny). + pub fn deny(mut self, namespace: &str) -> Self { + self.denied.insert(namespace.to_string()); + self + } + + /// Make the whole backend unreachable (transport error on cap_mint). + pub fn broken(mut self) -> Self { + self.broken = true; + self + } + + /// Make `cap_mint` return an HTTP error with this status + body (e.g. a 5xx + /// carrying internal detail) — to assert the body is NOT echoed to callers. + pub fn fail_http(mut self, status: u16, body: &str) -> Self { + self.http_fail = Some((status, body.to_string())); + self + } + + pub fn cap_mint_count(&self) -> usize { + self.cap_mints.lock().unwrap().len() + } + + pub fn audit_count(&self) -> usize { + self.audits.lock().unwrap().len() + } + + pub fn last_audit(&self) -> Option { + self.audits.lock().unwrap().last().cloned() + } +} + +#[async_trait] +impl Backend for MockBackend { + async fn cap_mint( + &self, + _op: CapMintOp, + req: CapMintRequest, + _session_bearer: &str, + ) -> Result { + if self.broken { + return Err(BackendError::Transport("mock broker unreachable".into())); + } + if let Some((status, body)) = &self.http_fail { + return Err(BackendError::Http { + status: *status, + body: body.clone(), + }); + } + self.cap_mints.lock().unwrap().push(req.service.clone()); + // service is `memory:` + let namespace = req.service.strip_prefix("memory:").unwrap_or(&req.service); + if self.denied.contains(namespace) { + return Err(BackendError::Http { + status: 403, + body: "out of scope".into(), + }); + } + Ok(json!({ "payload": { "actor_omni": req.actor_omni, "service": req.service } })) + } + + async fn memory_get(&self, input: MemoryGetInput) -> Result { + let actor = input + .cap + .get("payload") + .and_then(|p| p.get("actor_omni")) + .and_then(|v| v.as_str()) + .unwrap_or_default() + .to_string(); + let content = self + .seeded + .get(&(actor, input.namespace.clone())) + .cloned() + .unwrap_or_default(); + Ok(MemoryGetResult { + ok: true, + plaintext_b64: base64::engine::general_purpose::STANDARD.encode(content.as_bytes()), + namespace: input.namespace, + }) + } + + async fn audit_append( + &self, + input: AuditAppendInput, + ) -> Result { + self.audits.lock().unwrap().push(input); + Ok(AuditAppendResult { + ok: true, + envelope_hash: "0xmockhash".into(), + }) + } +} + +/// Records the request it received (to assert what the gate forwarded) and +/// returns a canned assistant reply. +#[derive(Default)] +pub struct RecordingEngine { + pub last_request: Mutex>, +} + +#[async_trait] +impl EngineAdapter for RecordingEngine { + fn name(&self) -> &'static str { + "recording" + } + + async fn complete( + &self, + req: ChatCompletionRequest, + ) -> Result { + let model = req.model.clone(); + *self.last_request.lock().unwrap() = Some(req); + Ok(ChatCompletionResponse { + id: "rec-0".into(), + object: "chat.completion".into(), + created: 0, + model, + choices: vec![Choice { + index: 0, + message: ChatMessage::new("assistant", "ok"), + finish_reason: Some("stop".into()), + }], + usage: Usage { + prompt_tokens: 7, + completion_tokens: 3, + total_tokens: 10, + }, + extra: serde_json::Map::new(), + }) + } +} + +impl RecordingEngine { + /// The system message the gate injected (if any). + pub fn injected_system(&self) -> Option { + self.last_request.lock().unwrap().as_ref().and_then(|r| { + r.messages + .iter() + .find(|m| m.role == "system") + .map(|m| m.text()) + }) + } +} diff --git a/crates/agentkeys-gate/tests/gate_flow.rs b/crates/agentkeys-gate/tests/gate_flow.rs new file mode 100644 index 00000000..fdddad0b --- /dev/null +++ b/crates/agentkeys-gate/tests/gate_flow.rs @@ -0,0 +1,255 @@ +//! The in-path gate, end-to-end, with offline doubles. Proves the four things +//! Plan A §3 says the gate must do per turn: cap-check, memory-inject, engine +//! call, audit — plus the deny + broken-backend behaviours. + +mod common; + +use std::sync::Arc; + +use agentkeys_core::audit::AuditOpKind; +use agentkeys_gate::auth::CallerContext; +use agentkeys_gate::backend::Backend; +use agentkeys_gate::config::Config; +use agentkeys_gate::engine::EngineAdapter; +use agentkeys_gate::error::GateError; +use agentkeys_gate::gate::Gate; +use agentkeys_gate::openai::{ChatCompletionRequest, ChatMessage}; + +use common::{MockBackend, RecordingEngine}; + +const ACTOR: &str = "0xkevin"; + +fn config(namespaces: &[&str]) -> Config { + let mut c = Config::for_tests(); + c.default_actor = Some(ACTOR.to_string()); + c.default_operator_omni = Some("0xmaster".to_string()); + c.default_device_key_hash = Some("0xdevhash".to_string()); + c.memory_namespaces = namespaces.iter().map(|s| s.to_string()).collect(); + c +} + +fn caller() -> CallerContext { + CallerContext { + vendor_id: "test".to_string(), + actor_omni: "*".to_string(), + } +} + +fn turn(user: &str) -> ChatCompletionRequest { + ChatCompletionRequest { + model: "doubao-pro".to_string(), + messages: vec![ChatMessage::new("user", user)], + stream: false, + extra: serde_json::Map::new(), + } +} + +/// Act 1 — permissioned memory is injected in-path and the turn is audited. +#[tokio::test] +async fn injects_memory_and_audits_the_turn() { + let backend = Arc::new( + MockBackend::new() + .seed(ACTOR, "personal", "Allergic to peanuts.") + .seed(ACTOR, "travel", "Chengdu trip Apr 12-16."), + ); + let engine = Arc::new(RecordingEngine::default()); + let gate = Gate::new( + config(&["personal", "travel"]), + backend.clone() as Arc, + engine.clone() as Arc, + ); + + let resp = gate + .handle_chat_completion(&caller(), turn("what do you know about me?")) + .await + .expect("turn succeeds"); + + // One cap minted per namespace; both injected. + assert_eq!(backend.cap_mint_count(), 2); + let injected = engine + .injected_system() + .expect("a system message was injected"); + assert!(injected.contains("Allergic to peanuts."), "{injected}"); + assert!(injected.contains("Chengdu trip"), "{injected}"); + assert!( + injected.contains("memory:personal"), + "namespace header: {injected}" + ); + + // Exactly one GateTurn audit row, recording both namespaces as injected. + assert_eq!(backend.audit_count(), 1); + let audit = backend.last_audit().unwrap(); + assert_eq!(audit.op_kind, AuditOpKind::GateTurn as u8); + assert_eq!(audit.actor_omni, ACTOR); // normalized 0x form preserved + assert_eq!( + audit.intent_text.as_deref(), + Some("what do you know about me?") + ); + let outcomes = audit.op_body["namespaces"].as_array().unwrap(); + assert_eq!(outcomes.len(), 2); + assert!(outcomes.iter().all(|o| o["outcome"] == "injected")); + assert_eq!(audit.op_body["engine"], "recording"); + + // The response is OpenAI-shaped. + assert_eq!(resp.choices.len(), 1); + assert_eq!(resp.model, "doubao-pro"); +} + +/// Act 2 — a namespace the actor has no scope for is DENIED (non-fatal): the +/// turn proceeds with the allowed namespace, and the denial is on the audit row. +#[tokio::test] +async fn denied_namespace_is_recorded_not_fatal() { + let backend = Arc::new( + MockBackend::new() + .seed(ACTOR, "personal", "Allergic to peanuts.") + .deny("travel"), + ); + let engine = Arc::new(RecordingEngine::default()); + let gate = Gate::new( + config(&["personal", "travel"]), + backend.clone() as Arc, + engine.clone() as Arc, + ); + + gate.handle_chat_completion(&caller(), turn("hi")) + .await + .expect("turn proceeds despite one denied namespace"); + + let injected = engine.injected_system().unwrap(); + assert!(injected.contains("Allergic to peanuts.")); + assert!(!injected.contains("memory:travel")); + + let audit = backend.last_audit().unwrap(); + let outcomes = audit.op_body["namespaces"].as_array().unwrap(); + let travel = outcomes + .iter() + .find(|o| o["namespace"] == "travel") + .unwrap(); + assert_eq!(travel["outcome"], "denied"); +} + +/// Act 3 — a broken backend is surfaced LOUD, never hidden behind an empty-memory +/// fallback (the "no silent fallback" rule). +#[tokio::test] +async fn broken_backend_fails_the_turn() { + let backend = Arc::new(MockBackend::new().broken()); + let engine = Arc::new(RecordingEngine::default()); + let gate = Gate::new( + config(&["personal"]), + backend.clone() as Arc, + engine.clone() as Arc, + ); + + let err = gate + .handle_chat_completion(&caller(), turn("hi")) + .await + .expect_err("broken broker must fail the turn"); + assert!(matches!(err, GateError::Backend(_)), "got {err:?}"); + // The engine was never reached. + assert!(engine.last_request.lock().unwrap().is_none()); +} + +/// Act 4 — a pure-chat turn with no namespaces configured still lands on the +/// audit ledger (the gap the worker-side memory audit can't cover). +#[tokio::test] +async fn pure_chat_turn_is_still_audited() { + let backend = Arc::new(MockBackend::new()); + let engine = Arc::new(RecordingEngine::default()); + let gate = Gate::new( + config(&[]), + backend.clone() as Arc, + engine.clone() as Arc, + ); + + gate.handle_chat_completion(&caller(), turn("hello")) + .await + .expect("turn succeeds"); + + assert_eq!( + backend.cap_mint_count(), + 0, + "no namespaces → no caps minted" + ); + assert_eq!(backend.audit_count(), 1, "turn is still audited"); + assert!( + engine.injected_system().is_none(), + "no memory system message" + ); +} + +/// A broker/worker 5xx body is NEVER echoed across the trust boundary to the +/// vendor caller — only the safe status category surfaces. +#[tokio::test] +async fn backend_5xx_body_is_not_echoed_to_caller() { + let secret = "INTERNAL stacktrace user=root token=SHHH-do-not-leak"; + let backend = Arc::new(MockBackend::new().fail_http(500, secret)); + let engine = Arc::new(RecordingEngine::default()); + let gate = Gate::new( + config(&["personal"]), + backend.clone() as Arc, + engine.clone() as Arc, + ); + + let err = gate + .handle_chat_completion(&caller(), turn("hi")) + .await + .expect_err("backend 5xx fails the turn"); + let msg = err.to_string(); + assert!(matches!(err, GateError::Backend(_)), "got {err:?}"); + assert!(msg.contains("HTTP 500"), "status category surfaced: {msg}"); + assert!(!msg.contains("SHHH"), "upstream body must NOT leak: {msg}"); + assert!( + !msg.contains("stacktrace"), + "upstream body must NOT leak: {msg}" + ); +} + +/// The gate refuses to forward tool-calling fields to the upstream (they would +/// bypass AgentKeys enforcement) — it is NOT a transparent proxy for them. +#[tokio::test] +async fn tool_calling_fields_are_rejected() { + let backend = Arc::new(MockBackend::new()); + let engine = Arc::new(RecordingEngine::default()); + let gate = Gate::new( + config(&[]), + backend.clone() as Arc, + engine.clone() as Arc, + ); + + let mut req = turn("call a tool"); + req.extra.insert( + "tools".to_string(), + serde_json::json!([{"type": "function", "function": {"name": "turn_on_light"}}]), + ); + let err = gate + .handle_chat_completion(&caller(), req) + .await + .expect_err("tools must be rejected"); + assert!(matches!(err, GateError::BadRequest(_)), "got {err:?}"); + // The engine was never reached — nothing forwarded upstream. + assert!(engine.last_request.lock().unwrap().is_none()); +} + +/// A per-request actor header overrides the configured default actor. +#[tokio::test] +async fn actor_header_overrides_default() { + let backend = Arc::new(MockBackend::new().seed("0xother", "personal", "other's data")); + let engine = Arc::new(RecordingEngine::default()); + let gate = Gate::new( + config(&["personal"]), + backend.clone() as Arc, + engine.clone() as Arc, + ); + + let caller = CallerContext { + vendor_id: "test".to_string(), + actor_omni: "0xother".to_string(), + }; + gate.handle_chat_completion(&caller, turn("hi")) + .await + .unwrap(); + + let injected = engine.injected_system().unwrap(); + assert!(injected.contains("other's data")); + assert_eq!(backend.last_audit().unwrap().actor_omni, "0xother"); +} diff --git a/crates/agentkeys-gate/tests/http.rs b/crates/agentkeys-gate/tests/http.rs new file mode 100644 index 00000000..70872f9f --- /dev/null +++ b/crates/agentkeys-gate/tests/http.rs @@ -0,0 +1,121 @@ +//! HTTP transport — auth gating, health, and a full anonymous echo turn driven +//! through the real axum router. + +mod common; + +use std::sync::Arc; + +use axum::body::Body; +use axum::http::Request; +use http_body_util::BodyExt; +use tower::ServiceExt; + +use agentkeys_gate::backend::Backend; +use agentkeys_gate::config::Config; +use agentkeys_gate::engine::{EchoEngine, EngineAdapter}; +use agentkeys_gate::gate::Gate; +use agentkeys_gate::server::router; + +use common::MockBackend; + +fn echo_gate(config: Config) -> Arc { + Arc::new(Gate::new( + config, + Arc::new(MockBackend::new()) as Arc, + Arc::new(EchoEngine) as Arc, + )) +} + +fn chat_body() -> Body { + Body::from( + serde_json::to_vec(&serde_json::json!({ + "model": "doubao-pro", + "messages": [{"role": "user", "content": "hello"}] + })) + .unwrap(), + ) +} + +#[tokio::test] +async fn healthz_ok() { + let app = router(echo_gate(Config::for_tests())); + let resp = app + .oneshot( + Request::builder() + .uri("/healthz") + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(resp.status(), 200); +} + +#[tokio::test] +async fn no_token_and_not_anonymous_is_401() { + let app = router(echo_gate(Config::for_tests())); + let resp = app + .oneshot( + Request::builder() + .method("POST") + .uri("/v1/chat/completions") + .header("content-type", "application/json") + .body(chat_body()) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(resp.status(), 401); + let body = resp.into_body().collect().await.unwrap().to_bytes(); + let json: serde_json::Value = serde_json::from_slice(&body).unwrap(); + // OpenAI-shaped error envelope. + assert_eq!(json["error"]["type"], "authentication_error"); +} + +#[tokio::test] +async fn wrong_vendor_token_is_401() { + let config = Config::for_tests().with_vendor_token("folotoy", "secret"); + let app = router(echo_gate(config)); + let resp = app + .oneshot( + Request::builder() + .method("POST") + .uri("/v1/chat/completions") + .header("content-type", "application/json") + .header("authorization", "Bearer wrong") + .body(chat_body()) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(resp.status(), 401); +} + +#[tokio::test] +async fn anonymous_echo_turn_returns_openai_response() { + let mut config = Config::for_tests(); + config.allow_anonymous = true; + config.default_actor = Some("0xkevin".into()); + config.default_operator_omni = Some("0xmaster".into()); + config.default_device_key_hash = Some("0xdev".into()); + let app = router(echo_gate(config)); + + let resp = app + .oneshot( + Request::builder() + .method("POST") + .uri("/v1/chat/completions") + .header("content-type", "application/json") + .body(chat_body()) + .unwrap(), + ) + .await + .unwrap(); + + assert_eq!(resp.status(), 200); + let body = resp.into_body().collect().await.unwrap().to_bytes(); + let json: serde_json::Value = serde_json::from_slice(&body).unwrap(); + assert_eq!(json["object"], "chat.completion"); + let reply = json["choices"][0]["message"]["content"].as_str().unwrap(); + assert!(reply.contains("hello"), "echo reflects the user: {reply}"); +} diff --git a/docs/arch.md b/docs/arch.md index 1e641e5f..fd76db3f 100644 --- a/docs/arch.md +++ b/docs/arch.md @@ -1153,8 +1153,11 @@ and never reordered**. Grouped by 10s leaves room for related ops. | `ConfigPut` | 80 | `{key: string, payload_hash: [u8;32]}` | config-service (#201, #229) | | `ConfigGet` | 81 | `{key: string, cap_hash: [u8;32]}` | config-service (#201, #229) | | `ConfigTeardown` | 82 | `{actor_target: [u8;32]}` | config-service (#201, #229) | +| `GateTurn` | 90 | `{engine: string, model: string, namespaces: [{namespace: string, outcome: string}], injected_bytes: u64, prompt_tokens: u64, completion_tokens: u64}` | `agentkeys-gate` (Plan A in-path CustomLLM gate; [`ai-device-platform.md`](plan/ai-device-platform.md) §3) | -Byte ranges `3-9`, `13-19`, `22-29`, `32-39`, `42-49`, `53-59`, `62-69`, `71-79`, `83-89`, `90-255` are reserved for future extensions in the same family (config claimed `80-89` per #229). +Byte ranges `3-9`, `13-19`, `22-29`, `32-39`, `42-49`, `53-59`, `62-69`, `71-79`, `83-89`, `91-99`, `100-255` are reserved for future extensions in the same family (config claimed `80-89` per #229; the gate claimed `90-99`). + +The `GateTurn` row records the **turn-level** control-plane fact (which engine answered, what memory was injected, the per-namespace authorization outcome). The memory READS themselves are still audited data-plane-side as `MemoryGet` (#229) by the memory worker — so a turn that injects memory produces both rows; a pure-chat turn produces only the `GateTurn` row (closing the gap where a no-memory turn would otherwise be invisible on the ledger). **Data-plane emit sites are LIVE (#229).** The cred / memory / config workers emit one envelope per store / fetch / teardown — after cap-verify, before the @@ -2378,6 +2381,10 @@ agentkeys/ # repo root │ │ # process exposing AgentKeys tools to │ │ # LLM hosts over stdio / HTTP / xiaozhi │ │ # mcp-endpoint WS relay (issue #107) +│ ├── agentkeys-gate/ # in-path CustomLLM gate binary — OpenAI- +│ │ # compatible /v1/chat/completions; per turn: +│ │ # cap-check + memory-inject + audit (GateTurn), +│ │ # swappable EngineAdapter (Plan A §3, A ⊂ B) │ ├── agentkeys-provisioner/ # Rust orchestrator that spawns TS scrapers │ ├── agentkeys-protocol/ # The wire TYPES half of the #203 one-owner │ │ # split (#215 re-land): pure serde, wasm-safe @@ -2418,6 +2425,7 @@ agentkeys/ # repo root | `agentkeys-daemon` | Sidecar daemon (master / agent role per init); localhost proxy | | `agentkeys-mcp` | Legacy in-process MCP adapter library — used by `agentkeys-daemon`'s sidecar stdio loop (M0). | | `agentkeys-mcp-server` | Standalone Rust MCP server binary (issue #107). Three transports: stdio (Claude Desktop / Claude Code / Codex / Cursor / Cline / Roo / Windsurf / Gemini CLI), HTTP (broker-direct), xiaozhi `mcp-endpoint` WS relay. **One backend: `http`** (real broker + memory + audit workers — the production backend IS the shared `agentkeys-backend-client::BackendClient`). The `in-memory` fixture backend was **removed in #207 (real-data-only)**; transport/protocol conformance is now proven by the Rust `tests/transport_conformance.rs` (subprocess MCP client over HTTP + stdio against the real backend). Installed via `cargo install --git https://github.com/litentry/agentKeys agentkeys-mcp-server`. | +| `agentkeys-gate` | Standalone Rust **in-path CustomLLM gate** binary (Plan A, [`ai-device-platform.md`](plan/ai-device-platform.md) §3). OpenAI-compatible `POST /v1/chat/completions` that Volcano RTC's CustomLLM mode posts to; per turn it does cap-check + memory-inject (`CapMintOp::MemoryGet` per `memory:`) → swappable [`EngineAdapter`](../crates/agentkeys-gate/src/engine.rs) (Plan-A impl = an OpenAI-compatible upstream → Doubao; `echo` engine for hardware bring-up) → `GateTurn` (90) audit. Owns no new wire shape — reuses the shared `agentkeys-backend-client::BackendClient`. The engine seam is the only swappable part (A ⊂ B). | | `agentkeys-provisioner` | Spawns TS scraper, encrypts obtained creds, submits via cap-store | | `agentkeys-chain` | Solidity contracts + Rust ABI bindings | diff --git a/docs/operator-runbook-gate.md b/docs/operator-runbook-gate.md new file mode 100644 index 00000000..32eb1ee3 --- /dev/null +++ b/docs/operator-runbook-gate.md @@ -0,0 +1,233 @@ +# Operator runbook — testing the in-path CustomLLM gate (`agentkeys-gate`) + +How to run and test the Plan-A gate ([`docs/plan/ai-device-platform.md`](plan/ai-device-platform.md) §3; crate [`crates/agentkeys-gate`](../crates/agentkeys-gate/)). The gate is an OpenAI-compatible `POST /v1/chat/completions` endpoint that, per LLM turn, does **cap-check → memory-inject → engine completion → audit**. + +There are three test levels, each adding one real dependency: + +| Part | Engine | Backend (broker/memory/audit) | What it proves | Needs | +|---|---|---|---|---| +| **1. Echo smoke test** | `echo` (local) | none | the gate runs, the OpenAI round-trip works, the gate is in-path | nothing — just `cargo` | +| **2. Real backend** | `echo` | live broker + memory + audit workers | real per-namespace cap-check + memory-inject + a `GateTurn` audit row | a registered device actor + its K10 key + agent-session JWT | +| **3. Doubao** | `openai-compatible` | live backend | the full Plan-A path (Volcano RTC CustomLLM → gate → Doubao) | a Volcano Ark / Doubao API key | + +> **Status of this runbook:** Part 1 is **verified** (the outputs below are real). Parts 2–3 require live credentials / a deployed broker and document the commands + the verification you should see. + +--- + +## Part 1 — local echo smoke test (no dependencies) + +The `echo` engine answers locally (no upstream LLM), and with **no memory namespaces** the gate makes **no broker calls** — so this runs with zero external services. + +### 1.1 Build + +```bash +cargo build -p agentkeys-gate +``` + +### 1.2 Run + +```bash +RUST_LOG=info ./target/debug/agentkeys-gate \ + --allow-anonymous --engine echo \ + --default-actor 0xkevin \ + --default-operator-omni 0xmaster \ + --default-device-key-hash 0xdev \ + --memory-namespaces "" \ + --listen 127.0.0.1:8077 +``` + +> **The `--memory-namespaces ""` is required for the no-broker test.** With the default namespaces (`personal,family,work,travel`) the gate tries to mint a memory-read cap against the broker; with no `--broker-url` that fails **loud** (a `backend not configured` 502 — by design, see Part 2). Empty namespaces skip memory entirely. + +You should see, on stderr: + +``` +WARN agentkeys_gate: AGENTKEYS_GATE_ALLOW_ANONYMOUS is set with no vendor tokens — the gate accepts UNAUTHENTICATED requests. Use only for local / echo-engine bring-up. +WARN agentkeys_gate: no K10 device key loaded — cap-mint will fail (set AGENTKEYS_DEVICE_KEY_FILE ...) +INFO agentkeys_gate: engine: echo (local — no upstream LLM) +INFO agentkeys_gate: agentkeys-gate listening (HTTP, OpenAI-compatible) addr=127.0.0.1:8077 +``` + +Both WARNs are expected for the local echo test (no auth, no device key). + +### 1.3 Test (in another terminal) + +**Health:** +```bash +curl -s http://127.0.0.1:8077/healthz +``` +→ `ok` + +**A plain turn** (proves the OpenAI round-trip + the gate is in-path): +```bash +curl -s -X POST http://127.0.0.1:8077/v1/chat/completions \ + -H 'content-type: application/json' \ + --data '{"model":"doubao-pro","messages":[{"role":"user","content":"hello, who am I?"}]}' +``` +→ +```json +{"id":"echo-0","object":"chat.completion","created":1781581314,"model":"doubao-pro", + "choices":[{"index":0,"message":{"role":"assistant","content":"[echo] You said: hello, who am I?"}, + "finish_reason":"stop"}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}} +``` + +**A turn with a system message** — the echo engine reflects any `system` block back, which is exactly how **injected memory** will surface once a real backend feeds it (in Part 2 the gate produces that `system` block from your stored memory instead of you supplying it): +```bash +curl -s -X POST http://127.0.0.1:8077/v1/chat/completions \ + -H 'content-type: application/json' \ + --data '{"model":"doubao-pro","messages":[{"role":"system","content":"Allergic to peanuts."},{"role":"user","content":"what should I avoid?"}]}' +``` +→ assistant content: +``` +[echo] You said: what should I avoid? +[echo] Using your memory: +Allergic to peanuts. +``` + +**A malformed request** (empty messages) → OpenAI-shaped 400: +```bash +curl -s -o /dev/null -w "%{http_code}\n" -X POST http://127.0.0.1:8077/v1/chat/completions \ + -H 'content-type: application/json' --data '{"model":"m","messages":[]}' +``` +→ `400` + +**Auth** — restart without `--allow-anonymous` and with a vendor token to see the gate reject unauthenticated callers: +```bash +./target/debug/agentkeys-gate --engine echo --memory-namespaces "" \ + --vendor-tokens 'folotoy:secret-abc' --default-actor 0xkevin \ + --default-operator-omni 0xmaster --default-device-key-hash 0xdev --listen 127.0.0.1:8077 & + +curl -s -o /dev/null -w "no-token: %{http_code}\n" -X POST http://127.0.0.1:8077/v1/chat/completions \ + -H 'content-type: application/json' --data '{"model":"m","messages":[{"role":"user","content":"hi"}]}' +# -> no-token: 401 + +curl -s -o /dev/null -w "good-token: %{http_code}\n" -X POST http://127.0.0.1:8077/v1/chat/completions \ + -H 'content-type: application/json' -H 'authorization: Bearer secret-abc' \ + --data '{"model":"m","messages":[{"role":"user","content":"hi"}]}' +# -> good-token: 200 +``` + +--- + +## Part 2 — against a real backend (memory-inject + audit) + +This exercises the **actual** product: the gate mints a `memory-get` cap per namespace, reads your stored memory through the memory worker, injects it, and writes a `GateTurn` audit row. + +### 2.1 Prerequisites (the device actor must already exist on the backend) + +The gate impersonates one **device actor** against the broker. You need: + +- a running **broker + memory worker + audit worker** (the prod set, or your `--ci` stack) — their base URLs; +- a **device actor omni** (`O_agent_X`) that is registered + scoped for memory on-chain, its **master/operator omni**, and its **device-key hash**; +- the device's **K10 device key file** (`AGENTKEYS_DEVICE_KEY_FILE`) so every cap-mint is proof-of-possession-signed (#76); +- the device's **agent-session JWT** (omni == the device actor) for the cap-mint + per-actor STS relay; +- the **memory IAM role ARN** for the per-actor S3 relay. + +> These are the same inputs the in-sandbox daemon / MCP server use. If you've run the harness or paired a device, you already have them. Seeding memory for the actor: use `agentkeys` / the parent-control plant flow, or the MCP `memory.put` tool, before testing reads here. + +### 2.2 Run + +```bash +RUST_LOG=info ./target/debug/agentkeys-gate \ + --vendor-tokens 'folotoy:' \ + --broker-url https://broker.litentry.org \ + --memory-url https://cred.litentry.org \ + --audit-url https://audit.litentry.org \ + --memory-role-arn 'arn:aws:iam:::role/' \ + --default-actor 0x \ + --default-operator-omni 0x \ + --default-device-key-hash 0x \ + --agent-session-bearer-file /etc/agentkeys/-session.jwt \ + --memory-namespaces 'personal,travel' \ + --engine echo \ + --require-audit \ + --listen 127.0.0.1:8077 +``` + +`AGENTKEYS_DEVICE_KEY_FILE=~/.agentkeys/agent-device.key` must point at the device's registered K10 key (or the same env var the daemon uses). Every flag also has an env var (see [Config reference](#config-reference)); `--require-audit` makes a failed audit append **fail the turn** (recommended once the audit worker is healthy). + +### 2.3 Test + verify + +```bash +curl -s -X POST http://127.0.0.1:8077/v1/chat/completions \ + -H 'authorization: Bearer ' \ + -H 'content-type: application/json' \ + --data '{"model":"doubao-pro","messages":[{"role":"user","content":"what do you know about me?"}]}' +``` + +Expected, end-to-end: +1. The assistant reply (echo) includes a `[echo] Using your memory:` block containing the lines stored under `memory:personal` / `memory:travel` for the actor — **proving the in-path cap-check + memory-inject worked**. +2. The gate stderr logs `gate-turn audited envelope_hash=0x…`. +3. **Two audit rows land** for the turn: the worker-side `MemoryGet` (op_kind 11) row(s) for the reads (#229), and the gate's `GateTurn` (op_kind 90) row for the turn. Confirm via the audit feed (`/v1/audit/recent` on the daemon, or the parent-control audit view); the `GateTurn` row's `op_body` carries `{engine, model, namespaces:[{namespace,outcome}], injected_bytes, prompt_tokens, completion_tokens}`. + +**Negative checks (the gate's enforcement):** +- Point `--default-actor` at an actor with **no memory scope** → the per-namespace outcome is `denied` (HTTP 403 from the broker), the turn still answers (degraded), and the denial is recorded in the `GateTurn` row. Memory it has no cap for is **never** read. +- Stop the memory worker (or give a bad `--broker-url`) → the turn fails **loud** with a 502 `backend HTTP …` (the "no silent fallback" rule); the gate does **not** pretend memory was empty. + +--- + +## Part 3 — Doubao via the OpenAI-compatible engine + +Swap the `echo` engine for the real upstream. This is the production Plan-A path: Volcano RTC CustomLLM posts to the gate, the gate injects memory + audits, then forwards to Doubao. + +```bash +RUST_LOG=info ./target/debug/agentkeys-gate \ + ... (all the Part-2 backend + identity flags) ... \ + --engine openai-compatible \ + --engine-base-url 'https://ark.cn-beijing.volces.com/api/v3' \ + --engine-api-key-file /etc/agentkeys/doubao.key \ + --engine-model 'doubao-pro-32k' \ + --listen 127.0.0.1:8077 +``` + +Same `curl` as Part 2.3; the reply is now a **real Doubao completion** that has the user's memory in its context, and `usage` carries real token counts. In Volcano's agent console, set the agent's **CustomLLM endpoint** to this gate's URL (behind TLS) instead of calling Doubao directly — that is the in-path insertion point. + +--- + +## Verifying the audit trail (the gate's top feature) + +Every gated turn is on the ledger. After any Part-2/3 turn: + +- **`GateTurn` (op_kind 90)** — one per turn, even a pure-chat turn with no memory (the gap the gate closes). Carries the engine, model, per-namespace outcomes, injected bytes, token usage. +- **`MemoryGet` (op_kind 11)** — emitted worker-side (#229) for each namespace actually read. + +Decode a row with the daemon's `/v1/audit//decode` or the parent-control audit view; the `GateTurn` label is `gate.turn`. + +--- + +## Troubleshooting + +| Symptom | Cause | Fix | +|---|---|---| +| Every turn 502s with `backend not configured: broker_url` (or `audit_url`) | A namespace is configured but no `--broker-url` (or audit append ran with no `--audit-url`) | For a no-backend smoke test set `--memory-namespaces ""`. Otherwise supply the broker/memory/audit URLs. | +| Turn 502s with `backend HTTP 401`/`403` on cap-mint | The agent-session JWT is missing/expired, or the actor isn't scoped for that memory namespace on-chain | Refresh the session JWT; grant the namespace scope (parent-control / `agentkeys scope`). A 403 on ONE namespace is non-fatal (`denied`); a 401 is fatal. | +| `WARN no K10 device key loaded — cap-mint will fail` | `AGENTKEYS_DEVICE_KEY_FILE` not set | Point it at the device's registered K10 key file. Harmless for the Part-1 echo test (no cap-mint). | +| Reply has no `Using your memory` block | The namespace is empty for that actor, or all namespaces returned `denied`/`empty` | Seed memory for the actor (plant / `memory.put`), confirm scope; check the `GateTurn` row's `namespaces[].outcome`. | +| `401` on every request | Vendor tokens are configured but the caller sent the wrong/no `Authorization: Bearer` | Send the right vendor bearer, or use `--allow-anonymous` for local bring-up only. | +| `engine not configured: engine api key` | `--engine openai-compatible` with no `--engine-api-key[-file]` | Supply the Doubao/Volcano key. | + +--- + +## Config reference + +Every flag has an env var (read **once** at startup; nothing is hardcoded): + +| Flag | Env | Default | Notes | +|---|---|---|---| +| `--listen` | `AGENTKEYS_GATE_LISTEN` | `0.0.0.0:8077` | | +| `--broker-url` / `--memory-url` / `--audit-url` | `AGENTKEYS_BROKER_URL` / `AGENTKEYS_MEMORY_URL` / `AGENTKEYS_AUDIT_URL` | — | the shipped backend chain | +| `--vendor-tokens` | `AGENTKEYS_GATE_VENDOR_TOKENS` | empty | `vendor:token,vendor2:token2`; empty ⇒ 401 unless `--allow-anonymous` | +| `--allow-anonymous` | `AGENTKEYS_GATE_ALLOW_ANONYMOUS` | `false` | local / echo bring-up only | +| `--default-actor` | `AGENTKEYS_GATE_DEFAULT_ACTOR` | — | overridden per-request by the `X-AgentKeys-Actor` header | +| `--default-operator-omni` | `AGENTKEYS_GATE_DEFAULT_OPERATOR_OMNI` | — | master omni for cap binding | +| `--default-device-key-hash` | `AGENTKEYS_GATE_DEFAULT_DEVICE_KEY_HASH` | — | | +| `--agent-session-bearer[-file]` | `AGENTKEYS_GATE_AGENT_SESSION_BEARER[_FILE]` | — | device-actor JWT for cap-mint + STS | +| `--memory-role-arn` | `AGENTKEYS_GATE_MEMORY_ROLE_ARN` | — | per-actor S3 relay role | +| `--memory-namespaces` | `AGENTKEYS_GATE_MEMORY_NAMESPACES` | `personal,family,work,travel` | `""` = no memory (no broker calls) | +| `--memory-engine` | `AGENTKEYS_MEMORY_ENGINE` | `passthrough` | `passthrough` or `lexical` (selection; budget via `AGENTKEYS_MEMORY_MAX_LINES`/`_BYTES`) | +| `--cap-ttl-seconds` | `AGENTKEYS_GATE_CAP_TTL_SECONDS` | `300` | memory-read cap TTL | +| `--require-audit` | `AGENTKEYS_GATE_REQUIRE_AUDIT` | `false` | deny the turn if the `GateTurn` audit append fails | +| `--engine` | `AGENTKEYS_GATE_ENGINE` | `echo` | `echo` or `openai-compatible` | +| `--engine-base-url` / `--engine-api-key[-file]` / `--engine-model` | `AGENTKEYS_GATE_ENGINE_*` | — | required for `openai-compatible` | +| (device key) | `AGENTKEYS_DEVICE_KEY_FILE` | — | the K10 key for cap-PoP (#76) | + +See also: the crate [README](../crates/agentkeys-gate/README.md) and the [implementation log](../crates/agentkeys-gate/IMPLEMENTATION_LOG.md). diff --git a/docs/plan/ai-device-platform.md b/docs/plan/ai-device-platform.md index e01c44f7..acc25464 100644 --- a/docs/plan/ai-device-platform.md +++ b/docs/plan/ai-device-platform.md @@ -42,6 +42,13 @@ Compliance (China kids'-data, minors' mode, content moderation) is the **vendor' **Effort:** small — roughly the #112 MCP-server effort (the CustomLLM gate) + the parent-control wiring + the device firmware spike. Volcano carries all the heavy infra (voice, runtime, multi-tenancy). +**Status (2026-06-16):** +- ✅ **Pieces 2 + 5 landed** — the [`agentkeys-gate`](../../crates/agentkeys-gate/) crate: an OpenAI-compatible in-path `/v1/chat/completions` gate that per turn does cap-check + memory-inject + audit (new `GateTurn` op_kind 90, arch.md §15.3a) behind a swappable `EngineAdapter` (the A ⊂ B seam). Plan-A engine = an OpenAI-compatible upstream (Volcano→Doubao); an `echo` engine runs the gate on real hardware **before** Doubao creds exist (directly serving the "prove the gate in-path on real hardware" goal). Reuses piece 3 (the shipped `BackendClient` / memory-worker / cap-mint / audit) verbatim — owns no new wire shape. +- ⏳ **Piece 1** (Tuya T5 firmware → Volcano RTC) — hardware spike, not started. +- ⏳ **Piece 4** (parent-control management view wired to the gate's real backend) — follow-up. +- ⏳ **Deploy wiring** — the gate is not yet a `setup-broker-host.sh` surface (new systemd unit + nginx vhost for the CustomLLM endpoint) — follow-up. +- ⏳ **Fast-follow** (memory-import) and **spend** (§4 prerequisite) — unchanged, deferred. + ## 4. Enforcement model (3 layers + the spend prerequisite) - **Layer 1 — universal cap-gate** (broker mint + worker verify, deterministic, §17.5): works on **any** runtime incl. hooks-less Doubao. **The floor; the MVP runs on this.** - **Layer 2 — hosted MCP** ([#112](https://github.com/litentry/agentKeys/issues/112)): broad reach across MCP hosts; same guarantee + automatic audit. @@ -57,7 +64,7 @@ Compliance (China kids'-data, minors' mode, content moderation) is the **vendor' ## 6. Cold-start - **Lead with the gate** (N=1 value: central control + compliance) **+ memory-import** (day-one magic). **Not portability** (empty at N=1). -- **Memory accumulates passively** through device usage — *no AgentKeys app required.* Portability **switches on automatically at the 2nd device** (the memory is already in the user's namespace; zero migration). +- **Memory accumulates passively** through device usage — *no AgentKeys app required.* Portability **switches on automatically at the 2nd device** (the memory is already in the user's namespace; zero migration). **This is the shared-user-memory model (DECIDED — [`shared-user-memory-delegation.md`](shared-user-memory-delegation.md)):** memory is owned by the **user/master** (one `memory:user::` namespace), and each agent device is a **delegated caller** granted access at pair-time — so every device reads the same memory (a rule set once is honored everywhere). NOT per-agent silos. Portability "switching on" at device #2 is exactly device #2 being granted the *same* user namespace device #1 already reads. - **Memory-import from incumbents** — ChatGPT: a session-bookmarklet (à la [gpt2claude.com](https://gpt2claude.com/) / [GPT2Claude Migration Kit](https://github.com/Siamsnus/GPT2Claude-Migration-Kit)) · verbatim-dump prompt · official data-export; Doubao: prompt-dump / memory-panel copy (no official export). Normalize the unstructured text into the memory schema (profile/semantic/episodic + namespaces) via an LLM pass → the user's per-actor namespace. **Frictions:** one-time manual step · ToS gray-area (keep the prompt-dump as the unblockable fallback) · sparse coverage (don't oversell) · structuring work. - **Concentrate early vendors in one category** (kids' toys) for per-user device overlap so portability has a home. diff --git a/docs/plan/shared-user-memory-delegation.md b/docs/plan/shared-user-memory-delegation.md new file mode 100644 index 00000000..d6a2a2e5 --- /dev/null +++ b/docs/plan/shared-user-memory-delegation.md @@ -0,0 +1,113 @@ +# Plan — Shared user memory + delegated agent access (the identity model for AI devices) + +**Status:** DECIDED (product model) 2026-06-16, IMPLEMENTATION PENDING. Resolves the namespace/identity tension flagged while reviewing the `agentkeys-gate` PR (#308) and in [`ai-device-platform.md`](ai-device-platform.md) §6 vs [`spawn-sandbox-on-pair.md`](spawn-sandbox-on-pair.md). +**Relates to:** [`arch.md`](../arch.md) §6 (identity), §15.2 (storage keying), §17.5 (four-layer per-actor isolation), §22c (master/agent binding), §22c.5/[#295](https://github.com/litentry/agentKeys/issues/295) (master hub — master never in the data path); the broker cap-mint ([`handlers/cap.rs`](../../crates/agentkeys-broker-server/src/handlers/cap.rs)), the worker scope re-check ([`worker-creds/src/verify.rs`](../../crates/agentkeys-worker-creds/src/verify.rs)), the memory worker S3 keying ([`worker-memory/src/handlers.rs`](../../crates/agentkeys-worker-memory/src/handlers.rs)). + +## 1. The decision + +**Memory belongs to the user (master), not to each agent device.** Agents are **delegated callers**. + +``` +Alice master = user:alice (owns the memory) +Kitchen ESP32 = agent:kitchen (delegated caller) +Bedroom Tuya = agent:bedroom (delegated caller) +Shared memory = memory:user:alice:home (one namespace, all of Alice's devices read it) +``` + +Both devices read the *same* user memory — e.g. *"After 10pm, do not turn on the main bedroom light; use the night light."* — so a rule set once is honored everywhere. + +**The runtime model (master never in the data path):** + +``` +Pair time (control plane, master present, biometric-gated): + Alice grants agent:bedroom access to memory:user:alice:home (stored on-chain grant) + +Runtime (data plane, master ABSENT): + agent:bedroom asks broker for memory.get on memory:user:alice:home, + proving itself with J1_agent (session) + K10 (cap-PoP). + Broker verifies: caller IS agent:bedroom · Alice granted this agent this namespace · + grant valid & not revoked · TTL/limits OK → mints a SHORT-LIVED cap. + Worker re-verifies the cap independently, reads the user-owned shared memory. + Audit records: agent:bedroom read Alice's shared memory. +``` + +**One sentence:** the user owns the memory, the agent is a delegated caller, and runtime access is authorized by **stored master grants + agent self-refresh** — never by carrying the master session in the data path. + +### Why NOT "agent owns the memory" (operator == actor == agent) + +Per-agent memory (`memory:agent:bedroom:home`, each device owns its own namespace) is the simplest fit for the shipped `operator == actor` self-skip, but it is the **wrong product**: it silos memory per device, so a rule set on the bedroom device is invisible to the kitchen device. It also blurs "agent reading its own data" with "agent delegated to the user's shared data" — user-owned memory must still require a master grant, which the self-skip path deliberately bypasses. **Shared user memory ⇒ delegated, not self.** + +## 2. What already supports this (don't rebuild) + +The delegated runtime model is ~70% shipped — the verification machinery exists: + +| Capability | Where | Status | +|---|---|---| +| **Worker enforces delegated scope** (`operator ≠ actor` → on-chain `isServiceInScope(operator, actor, service)`; positive/negative/revocation tests) | [`worker-creds/src/verify.rs:611`](../../crates/agentkeys-worker-creds/src/verify.rs) ("POSITIVE delegation: master GRANTED the agent scope") | ✅ shipped | +| **K10 PoP proves the caller IS the agent** (`keccak(ecrecover(client_sig)) == device_key_hash`, device bound on-chain to the actor) | [`handlers/cap.rs`](../../crates/agentkeys-broker-server/src/handlers/cap.rs) `verify_cap_pop` (#76) | ✅ shipped | +| **Stored delegation grant** (`setScope(operator, actor, services)` at pair-time accept) | [`handlers/accept.rs`](../../crates/agentkeys-broker-server/src/handlers/accept.rs), `handlers/scope.rs` | ✅ shipped | +| **Audit carries actor + operator** (a delegated read records agent + owner) | `GateTurn` (90) + `MemoryGet` (11) envelopes | ✅ shipped | +| **Agent session token** (`J1_agent`, omni == the agent) | [`handlers/oidc.rs`](../../crates/agentkeys-broker-server/src/handlers/oidc.rs) | ✅ shipped | + +So the "broker verifies the agent + the grant, worker enforces, audit records" steps already exist for `operator ≠ actor`. Three gaps remain. + +## 3. The three gaps to close + +### Gap 1 — broker cap-mint authenticates the OWNER, not the agent caller + +[`cap.rs:393`](../../crates/agentkeys-broker-server/src/handlers/cap.rs): `if session_omni != req.operator_omni → OperatorMismatch`. The session JWT must be the **operator's (owner's)**. So an agent cannot mint a cap for the user's namespace with its **own** `J1_agent` — it would need the master's session, putting the master in the data path (§22c.5 violation). + +**Change:** authenticate the session as the **caller = actor** (`session_omni == actor_omni`), and authorize delegation via the existing on-chain grant + K10 PoP. Concretely: +- `session_omni == req.actor_omni` (the agent proves itself; for master self-ops `operator == actor == master`, unchanged). +- device binding becomes `device.operator_omni == req.operator_omni` (the owner that registered the device) and `device.actor_omni == req.actor_omni == session` (already the agent's device). +- **Preserve `enforce_cred_store_master_self`** (#228): cred-STORE must stay `operator == actor` (master-self), gated on `operator == actor`, NOT on `session`. This is the one subtlety that must not regress. +- The K10 PoP + the scope grant now carry the authz weight (they already exist). The session check shifts from "is the owner" to "is the caller" — a *more* correct model (caller proves identity; grant proves permission). + +Bounded, but it touches the cap-mint security path (#76/#90/#225 layered there) — **needs its own reviewed PR + the full negative-isolation test matrix** (§17.5), not a drive-by edit. + +### Gap 2 — memory is keyed by ACTOR, so there is no shared user namespace + +[`handlers.rs:373`](../../crates/agentkeys-worker-memory/src/handlers.rs): `s3_key(actor_omni, service) = bots//memory/.enc`. Both put and get key off `cap.payload.actor_omni`. So a delegated cap (`operator=master, actor=agent`) reads `bots//memory/` — the **agent's** prefix, not the user's. There is no `bots//memory/` that multiple agents share. + +**Change (recommended):** introduce **owner-keyed shared memory**, mirroring the **config data class** precedent (arch.md §17.2: config is keyed `bots//config/`, master-owned, agents hold no cap → the master-self skip). Options: +- **(a) Shared namespace keyed by operator/owner.** A `memory:user::` namespace stored at `bots//memory/.enc`; the worker keys *shared* reads off `operator_omni` (the owner), per-actor reads stay off `actor_omni`. Distinguish by a namespace convention or a cap field. +- **(b) A distinct `shared-memory` data class** (own bucket + IAM role, like config), owner-keyed, with delegated agent reads via grant. Cleanest isolation, more infra. + +Either way, "shared user memory" must be addressable independent of which agent reads it. + +### Gap 3 — per-actor S3/IAM isolation blocks a granted agent from the owner's prefix + +[`apply-memory-bucket-policy.sh:104`](../../scripts/apply-memory-bucket-policy.sh): S3 access is scoped to `bots/${aws:PrincipalTag/agentkeys_actor_omni}/memory/*`, and the OIDC web-identity token is tagged with `agentkeys_actor_omni = session.omni` ([`oidc.rs:104`](../../crates/agentkeys-broker-server/src/handlers/oidc.rs)). So the agent's STS creds can only reach `bots//` — **not** `bots//`. IAM cannot read the on-chain grant, so it can't conditionally widen this. + +**Change (options):** +- Pair with Gap 2(a): the broker, having verified the grant at cap-mint, mints the STS web-identity token tagged with the **owner** omni for a granted shared read (the broker is the authority that already checked the grant) — so the agent's *cap* is its own, but the *S3 principal* for a shared read is owner-scoped. Tightly coupled to the broker change. +- Or Gap 2(b): a `shared-memory` bucket whose policy grants read to the household's agents (a coarser, owner-level boundary), accepting that S3-layer isolation is at the *user* granularity for shared memory while the cap + worker layers stay per-grant. + +This is the layer that needs the most design care — it changes the §17.5 layer-3 assumption (per-actor) for the shared-memory case. + +## 4. Implications for `agentkeys-gate` (#308) + +The decision **vindicates the gate's delegated config shape** (`operator = owner, actor = agent`) — that was right; my prior "switch to self" lean is **overruled** by this product decision. What changes: + +- **Framing:** reposition as the **cloud agent-runtime adapter** (the hosted-LLM sibling of the in-sandbox agent daemon) — a **delegated caller above the worker enforcement boundary**, not a security gate. Drop the "deterministic enforcement is structural" over-claim from the docs. +- **Identity:** `operator_omni` = the memory **owner** (user/master); `actor_omni` = the **agent**; the session bearer + K10 = the **agent's** (`J1_agent`). Once Gap 1 lands, the single agent bearer becomes correct (it authenticates the caller for cap-mint, and — with Gap 3 — the broker mints the owner-scoped STS for the shared read). +- **Honesty:** the real-backend memory path is **blocked on Gaps 1–3**; until then the gate runs end-to-end only with the `echo` engine (hardware bring-up) and per-actor (non-shared) reads. The runbook's Part 2 must say so. +- **Independent hardening (do now, no backend dep):** reject `tools`/`tool_choice`/`functions` (close the upstream tool-call enforcement bypass), and run-as-one-agent-identity (drop the loose `X-AgentKeys-Actor` multi-actor promise the single bearer can't honor). + +## 5. Sequencing + +1. **Agree this doc** (the namespace decision + the 3-gap shape). +2. **Gate hardening + reframing** (no backend dep) — `agentkeys-gate` PR follow-up. +3. **Gap 1 — broker cap-mint caller-auth** — own PR, full §17.5 negative-isolation matrix, preserve cred-store master-self. +4. **Gap 2 — owner-keyed shared memory** (recommend the config-data-class-style approach) — worker + cap-payload. +5. **Gap 3 — STS/bucket-policy for grant-scoped shared reads** — paired with 4; the hardest, design first. +6. **Then** the gate's real-backend shared-memory path works end-to-end; re-do the runbook Part 2 against a live broker. + +## 6. Test plan (the invariants to prove) + +- **Positive delegation:** agent with a grant reads the user's shared namespace (`operator=owner, actor=agent`, agent's own session + K10) → success, audited as agent reading owner's memory. +- **Negative — no grant:** agent WITHOUT a grant → broker `ServiceNotInScope` / worker re-check denies. Never reads. +- **Negative — revoked:** master revokes the grant → next read denied (online-immediate; cached cap bounded by short TTL). +- **Master self unchanged:** `operator == actor == master` still skips scope, reads `bots//`. +- **Cred-store still master-self-only:** the Gap-1 change must NOT open delegated cred-store (#228 regression guard). +- **Cross-agent isolation:** agent:bedroom cannot read agent:kitchen's *per-actor* memory (only the *shared* namespace both are granted). diff --git a/docs/plan/spawn-sandbox-on-pair.md b/docs/plan/spawn-sandbox-on-pair.md index 99bee87b..ccf80f90 100644 --- a/docs/plan/spawn-sandbox-on-pair.md +++ b/docs/plan/spawn-sandbox-on-pair.md @@ -67,6 +67,8 @@ RUN-TIME (data plane, every interaction) | **Broker** (always-on) | K1 / signing authority | validates the master's signed intent, **spawns/tears down** sandboxes, mints cap-tokens, records on-chain binding | | **Sandbox** (per-device, ephemeral) | actor-scoped cap-token (not the master key) | runs hermes + daemon + that actor's memory; torn down on unpair/idle/revoke | +> **Memory ownership (DECIDED 2026-06-16 — see [`shared-user-memory-delegation.md`](shared-user-memory-delegation.md)):** "that actor's memory" above is **delegated access to the user's shared memory**, NOT a per-agent silo. Memory belongs to the **user/master** (`memory:user::`); each agent is a **delegated caller** granted access at pair-time, so all of a user's devices read the same memory. The sandbox/daemon acts **as the agent** (its own `J1_agent` + K10) and self-refreshes short-lived caps at runtime — the master authorizes at pair-time but is never in the data path. (The runtime caller is the agent; the memory's owner is the user.) + ## Why this beats the static sandbox 1. **Isolation** — one sandbox per `O_agent_X` extends §17.5 four-layer isolation to the *compute* layer, not just storage.