diff --git a/.github/workflows/harness-ci.yml b/.github/workflows/harness-ci.yml index 6915488d..59e4e30d 100644 --- a/.github/workflows/harness-ci.yml +++ b/.github/workflows/harness-ci.yml @@ -864,6 +864,7 @@ jobs: # config bucket/role aren't provisioned yet (setup-cloud.sh --ci). CONFIG_BUCKET=agentkeys-config-test-$ACCOUNT_ID CONFIG_ROLE_ARN=arn:aws:iam::$ACCOUNT_ID:role/agentkeys-config-role-test + AUDIT_BUCKET=agentkeys-audit-test-$ACCOUNT_ID AGENTKEYS_SIGNER_URL=https://signer-test.$TEST_BROKER_ZONE # Worker URLs derived from TEST_BROKER_ZONE → byte-for-byte match # setup-broker-host.sh --test's derive_companion() output. @@ -968,12 +969,12 @@ jobs: # every OTHER prereq still fails closed): # - scope-not-set: setScopeWithWebauthn needs a real WebAuthn assertion # (heima-scope-set.sh L172) a no-Touch-ID runner cannot produce. - # - config-role-missing (#201): the test config bucket+role is still an - # operator one-shot (setup-cloud.sh --ci) → step 19 (config WRITE) - # skips. config-worker-unreachable was dropped — broker-deploy §7b - # auto-issued config-test's cert, so step 21 (cross-class cap - # rejection, role-independent) runs as a live gate (#209 closed; its - # self-dissolving guard step was removed). + # - (#209/#201 config allowances RESOLVED 2026-06-11: config-worker- + # unreachable dropped when broker-deploy §7b auto-issued config-test's + # cert (#209 closed; self-dissolving guard removed); config-role-missing + # dropped too — the test config bucket+role is provisioned and step 19 + # (config WRITE + cross-bucket denials) ran LIVE in PR #281's green + # runs. Steps 19+21 are both live gates now; CI fails closed on drift.) # # The workflow_dispatch `stage` input still selects a single phase # (`--stage N`); push/PR (stage='' / 'all') runs the full 1-4 + 6 sequence. @@ -981,7 +982,7 @@ jobs: STAGE: ${{ inputs.stage }} run: | set -euo pipefail - ARGS=(--ci --allow-skip=scope-not-set,config-role-missing,classify-not-configured,classify-worker-unavailable) + ARGS=(--ci --allow-skip=scope-not-set,classify-not-configured,classify-worker-unavailable) case "${STAGE:-}" in 1|2|3) ARGS+=(--stage "$STAGE") ;; *) ;; # all / empty → full phases 1-6 (phase 5 = the mock-sandbox wire) diff --git a/Cargo.lock b/Cargo.lock index bfd3dbe9..c0ea731b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -389,18 +389,23 @@ name = "agentkeys-worker-audit" version = "0.1.0" dependencies = [ "agentkeys-core", + "agentkeys-types", "anyhow", + "aws-config", + "aws-sdk-s3", "axum", "ciborium", "clap", "hex", "http-body-util", + "k256", "reqwest", "serde", "serde_json", "sha3", "thiserror 2.0.18", "tokio", + "tokio-stream", "tower 0.4.13", "tracing", "tracing-subscriber", @@ -3909,12 +3914,14 @@ dependencies = [ "tokio", "tokio-native-tls", "tokio-rustls 0.26.4", + "tokio-util", "tower 0.5.3", "tower-http 0.6.8", "tower-service", "url", "wasm-bindgen", "wasm-bindgen-futures", + "wasm-streams", "web-sys", "webpki-roots 1.0.7", ] @@ -5244,6 +5251,19 @@ dependencies = [ "wasmparser", ] +[[package]] +name = "wasm-streams" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65" +dependencies = [ + "futures-util", + "js-sys", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", +] + [[package]] name = "wasmparser" version = "0.244.0" diff --git a/apps/parent-control/app/_components/dashboard.tsx b/apps/parent-control/app/_components/dashboard.tsx index c8a99ff7..ec867745 100644 --- a/apps/parent-control/app/_components/dashboard.tsx +++ b/apps/parent-control/app/_components/dashboard.tsx @@ -297,7 +297,7 @@ export function AuditFeed({ }) { const [filter, setFilter] = useState('all'); const filtered = filter === 'all' ? events : events.filter((e) => e.chip === filter); - const filters: (ChipKind | 'all')[] = ['all', 'memory', 'creds', 'payment', 'audit', 'chain', 'broker']; + const filters: (ChipKind | 'all')[] = ['all', 'memory', 'creds', 'payment', 'audit', 'chain', 'broker', 'worker', 'anchor']; if (events.length === 0) { return ( diff --git a/apps/parent-control/app/_components/types.ts b/apps/parent-control/app/_components/types.ts index 7f7ed515..3f48dd4e 100644 --- a/apps/parent-control/app/_components/types.ts +++ b/apps/parent-control/app/_components/types.ts @@ -50,6 +50,8 @@ export type ChipKind = | 'audit' | 'broker' | 'chain' + | 'worker' + | 'anchor' | 'payment' | 'revoke' | 'scope' diff --git a/apps/parent-control/lib/constants.ts b/apps/parent-control/lib/constants.ts index 21756e46..3d18447d 100644 --- a/apps/parent-control/lib/constants.ts +++ b/apps/parent-control/lib/constants.ts @@ -23,6 +23,8 @@ export const CHIP_STYLES: Record = { audit: 'chip', broker: 'chip', chain: 'chip ok', + worker: 'chip', + anchor: 'chip ok', payment: 'chip warn', revoke: 'chip bad', scope: 'chip', diff --git a/crates/agentkeys-bundler/src/lib.rs b/crates/agentkeys-bundler/src/lib.rs index b47f64e2..c7600eb7 100644 --- a/crates/agentkeys-bundler/src/lib.rs +++ b/crates/agentkeys-bundler/src/lib.rs @@ -16,5 +16,8 @@ //! all reads here are raw JSON). This bundler is PRIVATE: bound to loopback, //! fed only by the broker — not a public alt-mempool. -pub mod legacy_tx; +// `legacy_tx` moved to core (#109) — both the bundler and the audit worker's +// tier-A anchor relay sign legacy EOA txs; one implementation, re-exported +// here so existing `agentkeys_bundler::legacy_tx` paths keep working. +pub use agentkeys_core::legacy_tx; pub mod server; diff --git a/crates/agentkeys-core/examples/export_audit_vectors.rs b/crates/agentkeys-core/examples/export_audit_vectors.rs index 7eb25be5..1b94ecf6 100644 --- a/crates/agentkeys-core/examples/export_audit_vectors.rs +++ b/crates/agentkeys-core/examples/export_audit_vectors.rs @@ -32,9 +32,10 @@ //! ``` use agentkeys_core::audit::{ - AuditEnvelope, AuditOpKind, AuditResult, CredFetchBody, CredStoreBody, DeviceAddBody, - K3EpochAdvanceBody, MemoryPutBody, PaymentDirectBody, PaymentEscrowRedeemBody, ScopeGrantBody, - SignEip191Body, SignEip712Body, ENVELOPE_VERSION, + AuditBatchFailedBody, AuditEnvelope, AuditOpKind, AuditResult, AuditRootAnchorBody, + CredFetchBody, CredStoreBody, DeviceAddBody, K3EpochAdvanceBody, MemoryPutBody, + PaymentDirectBody, PaymentEscrowRedeemBody, ScopeGrantBody, SignEip191Body, SignEip712Body, + ENVELOPE_VERSION, }; use serde::Serialize; use serde_json::{json, Value}; @@ -226,6 +227,30 @@ fn main() { }, None, ), + vector( + AuditOpKind::AuditRootAnchor, + AuditRootAnchorBody { + merkle_root: hex0x(&[0x90; 32]), + op_kind_bitmap: hex0x(&{ + let mut bm = [0u8; 32]; + bm[31] = 0x03; // op_kinds 0+1 present + bm + }), + entry_count: 7, + relay_address: "0x4444444444444444444444444444444444444444".into(), + }, + None, + ), + vector( + AuditOpKind::AuditBatchFailed, + AuditBatchFailedBody { + merkle_root: hex0x(&[0x91; 32]), + entry_count: 3, + attempts: 3, + last_error: "eth_sendRawTransaction HTTP 503".into(), + }, + None, + ), unknown_vector(250), ]; diff --git a/crates/agentkeys-core/src/audit/bodies.rs b/crates/agentkeys-core/src/audit/bodies.rs index b86463a1..87ca96fb 100644 --- a/crates/agentkeys-core/src/audit/bodies.rs +++ b/crates/agentkeys-core/src/audit/bodies.rs @@ -236,6 +236,45 @@ pub struct ConfigTeardownBody { pub actor_target: String, } +// ── 90..99 — audit-service meta family (#109 tier-A hosted anchor) ───── +// +// The hosted relay anchors each per-operator Merkle batch by emitting an +// `AuditRootAnchor` envelope and committing ITS hash on-chain via the +// ungated `CredentialAudit.appendV2(operatorOmni, relayActorOmni, 90, +// envelopeHash)` — one tx per batch, real operator omni in the indexed +// topic, no contract change. Genuine anchors are distinguished from +// third-party spam by `tx.from == relay_address` (published at the +// worker's `GET /v1/audit/relay-info`). The master-gated `appendRootV2` +// remains the sovereign tier-B/C route. + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct AuditRootAnchorBody { + /// 32-byte hex — Merkle root over the batch's envelope-hash leaves + /// (domain-separated scheme per `CredentialAudit.verifyEntryInRoot`). + pub merkle_root: String, + /// 32-byte hex — bit N set when the batch contains op_kind N (the + /// `appendRootV2` bitmap convention, carried in-body here). + pub op_kind_bitmap: String, + pub entry_count: u64, + /// 20-byte hex — the tier-A relay EOA that signed the anchor tx. + /// Verifiers match it against the anchor tx's `from`. + pub relay_address: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct AuditBatchFailedBody { + /// 32-byte hex — the root of the batch that failed to anchor. Its + /// entries are re-queued, so a later `AuditRootAnchor` (with a fresh + /// root superset) eventually covers them. + pub merkle_root: String, + pub entry_count: u64, + /// How many submission attempts were made before giving up. + pub attempts: u8, + /// Last submission error, truncated — diagnostic only, not consumed + /// programmatically. + pub last_error: String, +} + #[cfg(test)] mod tests { use super::*; @@ -448,6 +487,62 @@ mod tests { } } + /// §15.3b step-5 worker test for the audit-service meta family (#109): + /// canonical CBOR roundtrip + typed decode for the tier-A anchor and + /// the batch-failed alert shapes. + #[test] + fn audit_meta_family_cbor_roundtrip_and_typed_decode() { + use crate::audit::{envelope_for, AuditEnvelope, AuditOpKind, AuditResult, TypedAuditBody}; + + let anchor = AuditRootAnchorBody { + merkle_root: format!("0x{}", "aa".repeat(32)), + op_kind_bitmap: format!("0x{}", "00".repeat(31)) + "03", + entry_count: 7, + relay_address: format!("0x{}", "ee".repeat(20)), + }; + let env = envelope_for( + [0x44; 32], // relay's derived actor omni + [0x22; 32], // the REAL operator whose batch was anchored + AuditOpKind::AuditRootAnchor, + anchor.clone(), + AuditResult::Success, + None, + None, + ) + .unwrap(); + let decoded = + AuditEnvelope::from_canonical_cbor(&env.to_canonical_cbor().unwrap()).unwrap(); + assert_eq!(AuditOpKind::AuditRootAnchor.label(), "audit.root_anchor"); + match decoded.typed_body().unwrap() { + TypedAuditBody::AuditRootAnchor(b) => assert_eq!(b, anchor), + other => panic!("unexpected typed body: {other:?}"), + } + + let failed = AuditBatchFailedBody { + merkle_root: format!("0x{}", "bb".repeat(32)), + entry_count: 3, + attempts: 3, + last_error: "eth_sendRawTransaction HTTP 503".into(), + }; + let env = envelope_for( + [0x44; 32], + [0x22; 32], + AuditOpKind::AuditBatchFailed, + failed.clone(), + AuditResult::Failure, + None, + None, + ) + .unwrap(); + let decoded = + AuditEnvelope::from_canonical_cbor(&env.to_canonical_cbor().unwrap()).unwrap(); + assert_eq!(AuditOpKind::AuditBatchFailed.label(), "audit.batch_failed"); + match decoded.typed_body().unwrap() { + TypedAuditBody::AuditBatchFailed(b) => assert_eq!(b, failed), + other => panic!("unexpected typed body: {other:?}"), + } + } + #[test] fn payment_direct_body_uses_ref_as_field_name() { // Sanity check: `ref` is a Rust reserved word, so the field is diff --git a/crates/agentkeys-core/src/audit/mod.rs b/crates/agentkeys-core/src/audit/mod.rs index 31df0f26..7240321c 100644 --- a/crates/agentkeys-core/src/audit/mod.rs +++ b/crates/agentkeys-core/src/audit/mod.rs @@ -54,11 +54,11 @@ use sha3::{Digest, Keccak256}; use thiserror::Error; pub use bodies::{ - ConfigGetBody, ConfigPutBody, ConfigTeardownBody, CredFetchBody, CredStoreBody, - CredTeardownBody, DeviceAddBody, DeviceRevokeBody, EmailReceiveBody, EmailSendBody, - K10RotateBody, K3EpochAdvanceBody, MemoryGetBody, MemoryPutBody, MemoryTeardownBody, - PaymentDirectBody, PaymentEscrowRedeemBody, ScopeGrantBody, ScopeRevokeBody, SignEip191Body, - SignEip712Body, + AuditBatchFailedBody, AuditRootAnchorBody, ConfigGetBody, ConfigPutBody, ConfigTeardownBody, + CredFetchBody, CredStoreBody, CredTeardownBody, DeviceAddBody, DeviceRevokeBody, + EmailReceiveBody, EmailSendBody, K10RotateBody, K3EpochAdvanceBody, MemoryGetBody, + MemoryPutBody, MemoryTeardownBody, PaymentDirectBody, PaymentEscrowRedeemBody, ScopeGrantBody, + ScopeRevokeBody, SignEip191Body, SignEip712Body, }; pub use op_kind::AuditOpKind; @@ -238,6 +238,8 @@ pub enum TypedAuditBody { ConfigPut(ConfigPutBody), ConfigGet(ConfigGetBody), ConfigTeardown(ConfigTeardownBody), + AuditRootAnchor(AuditRootAnchorBody), + AuditBatchFailed(AuditBatchFailedBody), } impl TypedAuditBody { @@ -277,6 +279,12 @@ impl TypedAuditBody { AuditOpKind::ConfigTeardown => { Self::ConfigTeardown(serde_json::from_value(value).ok()?) } + AuditOpKind::AuditRootAnchor => { + Self::AuditRootAnchor(serde_json::from_value(value).ok()?) + } + AuditOpKind::AuditBatchFailed => { + Self::AuditBatchFailed(serde_json::from_value(value).ok()?) + } }) } } diff --git a/crates/agentkeys-core/src/audit/op_kind.rs b/crates/agentkeys-core/src/audit/op_kind.rs index 259b2c60..9e04eb4a 100644 --- a/crates/agentkeys-core/src/audit/op_kind.rs +++ b/crates/agentkeys-core/src/audit/op_kind.rs @@ -15,7 +15,8 @@ //! - 60-69 email family (EmailSend=60, EmailReceive=61; 62-69 reserved) //! - 70-79 K3 family (K3EpochAdvance=70; 71-79 reserved) //! - 80-89 config family (ConfigPut=80, ConfigGet=81, ConfigTeardown=82; 83-89 reserved) -//! - 90-255 reserved for future families +//! - 90-99 audit-service meta family (AuditRootAnchor=90, AuditBatchFailed=91; 92-99 reserved) — issue #109 +//! - 100-255 reserved for future families /// Canonical op_kind enum. The byte value MUST match the row in arch.md /// §15.3a. The enum is `repr(u8)` so `as u8` gives the canonical byte. @@ -47,6 +48,8 @@ pub enum AuditOpKind { ConfigPut = 80, ConfigGet = 81, ConfigTeardown = 82, + AuditRootAnchor = 90, + AuditBatchFailed = 91, } impl AuditOpKind { @@ -75,6 +78,8 @@ impl AuditOpKind { 80 => Self::ConfigPut, 81 => Self::ConfigGet, 82 => Self::ConfigTeardown, + 90 => Self::AuditRootAnchor, + 91 => Self::AuditBatchFailed, _ => return None, }) } @@ -105,6 +110,8 @@ impl AuditOpKind { Self::ConfigPut => "config.put", Self::ConfigGet => "config.get", Self::ConfigTeardown => "config.teardown", + Self::AuditRootAnchor => "audit.root_anchor", + Self::AuditBatchFailed => "audit.batch_failed", } } } @@ -140,6 +147,8 @@ mod tests { AuditOpKind::ConfigPut, AuditOpKind::ConfigGet, AuditOpKind::ConfigTeardown, + AuditOpKind::AuditRootAnchor, + AuditOpKind::AuditBatchFailed, ]; for k in all { let byte = k as u8; @@ -156,7 +165,7 @@ mod tests { #[test] fn unknown_bytes_return_none() { for byte in [ - 3u8, 9, 13, 19, 22, 32, 42, 53, 62, 71, 83, 89, 90, 200, 250, 255, + 3u8, 9, 13, 19, 22, 32, 42, 53, 62, 71, 83, 89, 92, 99, 200, 250, 255, ] { assert_eq!( AuditOpKind::from_u8(byte), @@ -193,6 +202,8 @@ mod tests { AuditOpKind::ConfigPut as u8, AuditOpKind::ConfigGet as u8, AuditOpKind::ConfigTeardown as u8, + AuditOpKind::AuditRootAnchor as u8, + AuditOpKind::AuditBatchFailed as u8, ]; let s: HashSet<_> = all.iter().copied().collect(); assert_eq!(s.len(), all.len(), "duplicate byte assignment"); diff --git a/crates/agentkeys-bundler/src/legacy_tx.rs b/crates/agentkeys-core/src/legacy_tx.rs similarity index 92% rename from crates/agentkeys-bundler/src/legacy_tx.rs rename to crates/agentkeys-core/src/legacy_tx.rs index 0e20bfdb..eef01b38 100644 --- a/crates/agentkeys-bundler/src/legacy_tx.rs +++ b/crates/agentkeys-core/src/legacy_tx.rs @@ -1,12 +1,16 @@ //! Minimal legacy (pre-EIP-1559) transaction RLP encoding + EIP-155 signing. //! //! Heima accepts legacy txs and its `eth_estimateGas` reverts on `handleOps` -//! (see `docs/spec/heima-eth-gap.md`), so the bundler signs a fixed-gas-limit -//! legacy tx and submits it via `eth_sendRawTransaction` — no alloy/ethers +//! (see `docs/spec/heima-eth-gap.md`), so callers sign a fixed-gas-limit +//! legacy tx and submit it via `eth_sendRawTransaction` — no alloy/ethers //! (their receipt/header parsers crash on Heima's mixHash-less responses). //! Hand-rolled RLP, golden-tested against the EIP-155 reference vector. +//! +//! Lives in core (moved from `agentkeys-bundler`, #109) so both EOA tx +//! emitters — the bundler's `handleOps` carrier and the audit worker's +//! tier-A anchor relay — share one implementation. -use agentkeys_core::device_crypto::keccak256; +use crate::device_crypto::keccak256; use anyhow::{anyhow, Result}; use k256::ecdsa::SigningKey; diff --git a/crates/agentkeys-core/src/lib.rs b/crates/agentkeys-core/src/lib.rs index b3e544a7..3fb06a14 100644 --- a/crates/agentkeys-core/src/lib.rs +++ b/crates/agentkeys-core/src/lib.rs @@ -7,6 +7,7 @@ pub mod clear_signing; pub mod device_crypto; pub mod erc4337; pub mod init_flow; +pub mod legacy_tx; pub mod mock_client; pub mod otp; pub mod payment; diff --git a/crates/agentkeys-daemon/Cargo.toml b/crates/agentkeys-daemon/Cargo.toml index df852c3e..c82908f2 100644 --- a/crates/agentkeys-daemon/Cargo.toml +++ b/crates/agentkeys-daemon/Cargo.toml @@ -36,7 +36,7 @@ tracing-subscriber = { version = "0.3", features = ["env-filter"] } ed25519-dalek = { version = "2", features = ["rand_core"] } rand = "0.8" base64 = "0.22" -reqwest = { version = "0.12", features = ["json"] } +reqwest = { version = "0.12", features = ["json", "stream"] } # Parse `Retry-After` HTTP-date form (RFC 7231) for the §10.2 pairing poll # backoff; tiny, zero-dep, the same parser hyper uses internally. httpdate = "1" diff --git a/crates/agentkeys-daemon/src/main.rs b/crates/agentkeys-daemon/src/main.rs index 9330bba0..d0594b1c 100644 --- a/crates/agentkeys-daemon/src/main.rs +++ b/crates/agentkeys-daemon/src/main.rs @@ -1217,6 +1217,10 @@ async fn run_ui_bridge_mode(args: Args) -> anyhow::Result<()> { // exactly one passkey re-auth. ui_bridge::rehydrate_master_session(&state).await; } + // #109: fold the audit worker's Tier-1 SSE feed into the web app's + // audit stream (worker-side ops + on-chain anchor status, live). No-op + // when --audit-worker-url is empty. + ui_bridge::spawn_audit_feed_bridge(state.clone()); let app = ui_bridge::build_router(state, &args.ui_bridge_origin); let listener = tokio::net::TcpListener::bind(&args.ui_bridge_bind) diff --git a/crates/agentkeys-daemon/src/ui_bridge.rs b/crates/agentkeys-daemon/src/ui_bridge.rs index 33d6ece6..dcc9dcd6 100644 --- a/crates/agentkeys-daemon/src/ui_bridge.rs +++ b/crates/agentkeys-daemon/src/ui_bridge.rs @@ -77,6 +77,11 @@ pub struct UiBridgeState { pub caps: RwLock>>, pub audit: RwLock>, pub audit_tx: broadcast::Sender, + /// #109: envelope hashes already surfaced in the feed — dedups the two + /// delivery paths for the same op (the local submit-flow push vs the + /// audit-worker SSE bridge), whichever lands first. (VecDeque = FIFO + /// eviction order, HashSet = O(1) membership.) + pub seen_envelope_hashes: RwLock<(VecDeque, std::collections::HashSet)>, pub workers: RwLock>, pub anchor: RwLock, /// Master-actor memory entries, keyed by content_hash for idempotent @@ -841,6 +846,7 @@ pub fn build_state( caps: RwLock::new(HashMap::new()), audit: RwLock::new(VecDeque::with_capacity(AUDIT_BUFFER_CAP)), audit_tx, + seen_envelope_hashes: RwLock::new((VecDeque::new(), std::collections::HashSet::new())), workers: RwLock::new(HashMap::new()), anchor: RwLock::new(ApiAnchorStatus::default()), master_memory: RwLock::new(HashMap::new()), @@ -2370,11 +2376,7 @@ fn now_unix() -> u64 { fn now_ts_hms() -> String { // HH:MM:SS in UTC for audit event timestamps. Operator-facing only — // chain timestamps are independent. - let now = now_unix(); - let h = (now / 3600) % 24; - let m = (now / 60) % 60; - let s = now % 60; - format!("{:02}:{:02}:{:02}", h, m, s) + ts_hms_from_unix(now_unix()) } // ─── Read endpoints ──────────────────────────────────────────────────── @@ -6168,6 +6170,32 @@ async fn store_master_credential_inner( } async fn push_audit(state: &SharedUiBridgeState, evt: ApiAuditEvent) { + // #109 dedup: the same op can reach the feed twice — once from the + // local submit flow (which carries its envelope-hash receipts) and once + // from the audit-worker SSE bridge (one event per envelope). Whichever + // path delivers an envelope hash first wins; a later event whose hashes + // were ALL seen already is dropped. + if let Some(hashes) = &evt.audit_envelope_hashes { + if !hashes.is_empty() { + let mut seen = state.seen_envelope_hashes.write().await; + let any_new = hashes.iter().any(|h| !seen.1.contains(&h.to_lowercase())); + if !any_new { + return; + } + const SEEN_CAP: usize = 4096; + for h in hashes { + let h = h.to_lowercase(); + if seen.1.insert(h.clone()) { + seen.0.push_back(h); + if seen.0.len() > SEEN_CAP { + if let Some(evicted) = seen.0.pop_front() { + seen.1.remove(&evicted); + } + } + } + } + } + } let mut buf = state.audit.write().await; if buf.len() == AUDIT_BUFFER_CAP { buf.pop_front(); @@ -6179,6 +6207,239 @@ async fn push_audit(state: &SharedUiBridgeState, evt: ApiAuditEvent) { let _ = state.audit_tx.send(evt); } +// ─── #109: audit-worker feed bridge ───────────────────────────────────── +// +// The daemon subscribes to the audit worker's Tier-1 SSE +// (`GET /v1/audit/stream?operator=`) and folds every event +// into the EXISTING ApiAuditEvent feed the web app already streams — so +// worker-side ops the daemon never sees locally (agent cred fetches, +// memory reads, denials, the relay's on-chain anchors) appear live in the +// parent UI. Anchor events additionally flip `state.anchor` to REAL (the +// "Anchored ✓" badge was a synthesized placeholder before this). + +/// The operator omni the bridge filters on — same 3-source resolution the +/// fleet reconstruction uses (registered master, live session, onboarding). +async fn current_operator_omni(state: &SharedUiBridgeState) -> Option { + let from_registered = state + .registered_master + .read() + .await + .as_ref() + .map(|rm| rm.operator_omni.clone()); + let from_session = state + .master_session + .read() + .await + .as_ref() + .map(|ms| ms.operator_omni.clone()); + let from_onboarding = state + .onboarding_session + .read() + .await + .as_ref() + .map(|s| s.omni.clone()); + [from_registered, from_session, from_onboarding] + .into_iter() + .flatten() + .find(|o| !o.is_empty()) + .map(|o| agentkeys_backend_client::normalize_omni_0x(&o)) +} + +fn short_hex(s: &str) -> String { + let h = s.trim_start_matches("0x"); + if h.len() <= 10 { + format!("0x{h}") + } else { + format!("0x{}…{}", &h[..6], &h[h.len() - 4..]) + } +} + +fn ts_hms_from_unix(ts: u64) -> String { + let h = (ts / 3600) % 24; + let m = (ts / 60) % 60; + let s = ts % 60; + format!("{:02}:{:02}:{:02}", h, m, s) +} + +/// Map one worker feed event into the web app's audit-event shape. Pure — +/// unit-tested below. +fn worker_feed_event_to_api(evt: &agentkeys_types::audit_feed::AuditFeedEvent) -> ApiAuditEvent { + let result_str = match evt.result { + 0 => "ok", + 1 => "failure", + 2 => "NOT PERMITTED", + _ => "unknown", + }; + let (actor, chip, detail) = match evt.kind.as_str() { + "anchor" => ( + "audit-relay".to_string(), + "anchor".to_string(), + format!( + "anchored {} event(s) on-chain · root {} · tx {}", + evt.entry_count.unwrap_or(0), + short_hex(evt.merkle_root.as_deref().unwrap_or("")), + short_hex(evt.tx_hash.as_deref().unwrap_or("")), + ), + ), + "batch_failed" => ( + "audit-relay".to_string(), + "anchor".to_string(), + format!( + "batch anchor FAILED after retries · {} event(s) re-queued · root {}", + evt.entry_count.unwrap_or(0), + short_hex(evt.merkle_root.as_deref().unwrap_or("")), + ), + ), + _ => { + let mut d = format!( + "{} · actor {} · {}", + evt.op_kind_label, + short_hex(&evt.actor_omni), + result_str + ); + if let Some(intent) = &evt.intent_text { + d.push_str(" · "); + d.push_str(intent); + } + (short_hex(&evt.actor_omni), "worker".to_string(), d) + } + }; + let sev = if evt.result == 0 { "ok" } else { "bad" }; + let hash_tail = evt.envelope_hash.trim_start_matches("0x"); + ApiAuditEvent { + id: format!("e-worker-{}", &hash_tail[..hash_tail.len().min(12)]), + ts: ts_hms_from_unix(evt.ts_unix), + actor_id: actor.clone(), + actor, + kind: evt.op_kind_label.clone(), + detail, + chip, + sev: sev.into(), + tx_hash: evt.tx_hash.clone(), + audit_envelope_hashes: Some(vec![evt.envelope_hash.clone()]), + } +} + +/// Drain complete SSE frames (`event:`/`data:` blocks terminated by a blank +/// line) out of `buf`, leaving any partial frame in place. Comments and +/// keep-alives are dropped. Returns `(event_name, data)` pairs. +fn drain_sse_frames(buf: &mut String) -> Vec<(String, String)> { + let mut frames = Vec::new(); + while let Some(pos) = buf.find("\n\n") { + let frame: String = buf[..pos].to_string(); + buf.drain(..pos + 2); + let mut event_name = "message".to_string(); + let mut data_lines: Vec<&str> = Vec::new(); + for line in frame.lines() { + if let Some(rest) = line.strip_prefix("event:") { + event_name = rest.trim().to_string(); + } else if let Some(rest) = line.strip_prefix("data:") { + data_lines.push(rest.trim_start()); + } + // ":" comments (keep-alives) and other fields are ignored. + } + if !data_lines.is_empty() { + frames.push((event_name, data_lines.join("\n"))); + } + } + frames +} + +/// Spawn the long-running bridge task. No-op (returns false) when the +/// daemon was started without an audit-worker URL — hermetic tests and +/// no-infra dev runs stay silent. +pub fn spawn_audit_feed_bridge(state: SharedUiBridgeState) -> bool { + let Some(base) = state + .audit_worker_url + .clone() + .filter(|u| !u.trim().is_empty()) + else { + tracing::info!("audit feed bridge disabled (no audit worker url)"); + return false; + }; + tokio::spawn(async move { + let client = reqwest::Client::new(); + let mut backoff_secs = 2u64; + loop { + let Some(omni) = current_operator_omni(&state).await else { + // No master session yet — poll cheaply until one exists. + tokio::time::sleep(std::time::Duration::from_secs(5)).await; + continue; + }; + match pump_worker_feed(&state, &client, &base, &omni).await { + Ok(delivered) => { + if delivered > 0 { + backoff_secs = 2; // healthy run — reset backoff + } + tracing::info!(delivered, "audit feed bridge stream ended; reconnecting"); + } + Err(e) => { + tracing::warn!(error = %e, backoff_secs, "audit feed bridge connect/pump failed"); + } + } + tokio::time::sleep(std::time::Duration::from_secs(backoff_secs)).await; + backoff_secs = (backoff_secs * 2).min(60); + } + }); + true +} + +/// One SSE connection lifetime: backfill + live events until the stream +/// closes or the session's operator changes. Returns how many events were +/// folded into the feed. +async fn pump_worker_feed( + state: &SharedUiBridgeState, + client: &reqwest::Client, + base: &str, + operator_omni: &str, +) -> anyhow::Result { + use tokio_stream::StreamExt as _; + let url = format!( + "{}/v1/audit/stream?operator={}&backfill=200", + base.trim_end_matches('/'), + operator_omni + ); + let resp = client + .get(&url) + .header("accept", "text/event-stream") + .send() + .await?; + if !resp.status().is_success() { + anyhow::bail!("audit worker stream HTTP {}", resp.status()); + } + tracing::info!(operator = %operator_omni, "audit feed bridge connected"); + let mut stream = resp.bytes_stream(); + let mut buf = String::new(); + let mut delivered = 0usize; + while let Some(chunk) = stream.next().await { + let chunk = chunk?; + buf.push_str(&String::from_utf8_lossy(&chunk)); + for (event_name, data) in drain_sse_frames(&mut buf) { + let Ok(evt) = + serde_json::from_str::(&data) + else { + tracing::warn!(event_name, "audit feed bridge: undecodable event skipped"); + continue; + }; + if evt.kind == "anchor" { + let mut anchor = state.anchor.write().await; + anchor.last_anchor_at = evt.ts_unix; + } + delivered += 1; + push_audit(state, worker_feed_event_to_api(&evt)).await; + } + // Session switched (logout / reset / different master)? Reconnect + // with the new filter. Keep-alive comments tick this check even + // when no events flow. + let current = current_operator_omni(state).await; + if current.as_deref() != Some(operator_omni) { + tracing::info!("audit feed bridge: operator changed — reconnecting"); + break; + } + } + Ok(delivered) +} + // ─── Tests ───────────────────────────────────────────────────────────── // // These tests exercise the begin/finish state machine without a real @@ -8838,6 +9099,159 @@ mod tests { assert_eq!(received.id, "e-stream-1"); } + // ─── #109: audit-worker feed bridge units ─────────────────────────── + + fn worker_evt(kind: &str, hash: u8, result: u8) -> agentkeys_types::audit_feed::AuditFeedEvent { + agentkeys_types::audit_feed::AuditFeedEvent { + kind: kind.into(), + envelope_hash: format!("0x{}", hex::encode([hash; 32])), + ts_unix: 45_296, // 12:34:56 UTC + actor_omni: format!("0x{}", "aa".repeat(32)), + operator_omni: format!("0x{}", "bb".repeat(32)), + op_kind: 1, + op_kind_label: "cred.fetch".into(), + result, + intent_text: None, + tx_hash: if kind == "anchor" { + Some("0xfeedtx".into()) + } else { + None + }, + merkle_root: Some(format!("0x{}", "cc".repeat(32))), + entry_count: Some(3), + } + } + + #[test] + fn drain_sse_frames_handles_partials_events_and_comments() { + let mut buf = String::new(); + buf.push_str(": keep-alive\n\nevent: audit\ndata: {\"a\":1}\n\nevent: anch"); + let frames = drain_sse_frames(&mut buf); + assert_eq!(frames, vec![("audit".to_string(), "{\"a\":1}".to_string())]); + assert_eq!(buf, "event: anch", "partial frame stays buffered"); + buf.push_str("or\ndata: {\"b\":2}\n\n"); + let frames = drain_sse_frames(&mut buf); + assert_eq!( + frames, + vec![("anchor".to_string(), "{\"b\":2}".to_string())] + ); + assert!(buf.is_empty()); + } + + #[test] + fn worker_feed_event_maps_to_api_shape() { + let api = worker_feed_event_to_api(&worker_evt("event", 0x11, 2)); + assert_eq!(api.kind, "cred.fetch"); + assert_eq!(api.sev, "bad", "NotPermitted renders as bad"); + assert_eq!(api.ts, "12:34:56"); + assert_eq!(api.chip, "worker"); + assert!(api.detail.contains("NOT PERMITTED")); + assert_eq!( + api.audit_envelope_hashes, + Some(vec![format!("0x{}", hex::encode([0x11; 32]))]), + "decode page can fetch the real envelope" + ); + + let anchor = worker_feed_event_to_api(&worker_evt("anchor", 0x22, 0)); + assert_eq!(anchor.chip, "anchor"); + assert_eq!(anchor.actor, "audit-relay"); + assert_eq!(anchor.tx_hash.as_deref(), Some("0xfeedtx")); + assert!(anchor.detail.contains("anchored 3 event(s)")); + } + + #[tokio::test] + async fn push_audit_dedups_by_envelope_hash_either_order() { + let state = make_state(); + // Bridge delivers the worker envelope first… + push_audit( + &state, + worker_feed_event_to_api(&worker_evt("event", 0x33, 0)), + ) + .await; + assert_eq!(state.audit.read().await.len(), 1); + // …then the local submit flow pushes its own event carrying the SAME + // receipt hash → dropped as a duplicate. + let local = ApiAuditEvent { + id: "e-scope-1".into(), + ts: "00:00:01".into(), + actor_id: "master".into(), + actor: "master".into(), + kind: "scope.granted".into(), + detail: "dup of the worker event".into(), + chip: "broker".into(), + sev: "ok".into(), + tx_hash: Some("0xabc".into()), + audit_envelope_hashes: Some(vec![format!("0x{}", hex::encode([0x33; 32]))]), + }; + push_audit(&state, local.clone()).await; + assert_eq!(state.audit.read().await.len(), 1, "duplicate dropped"); + // An event with a FRESH hash still lands. + let mut fresh = local; + fresh.audit_envelope_hashes = Some(vec![format!("0x{}", hex::encode([0x44; 32]))]); + push_audit(&state, fresh).await; + assert_eq!(state.audit.read().await.len(), 2); + // Hash-less events are never deduped. + push_audit( + &state, + ApiAuditEvent { + id: "e-local-2".into(), + ts: "00:00:02".into(), + actor_id: "master".into(), + actor: "master".into(), + kind: "memory.updated".into(), + detail: "no receipt".into(), + chip: "broker".into(), + sev: "ok".into(), + tx_hash: None, + audit_envelope_hashes: None, + }, + ) + .await; + assert_eq!(state.audit.read().await.len(), 3); + } + + #[tokio::test] + async fn bridge_anchor_event_flips_anchor_status_real() { + // End-to-end through a real SSE socket: a fake worker serves one + // anchor event; the pump folds it into the feed AND updates + // /v1/anchor/status's last_anchor_at. + use axum::response::sse::{Event as SseEvent, Sse}; + let evt = worker_evt("anchor", 0x55, 0); + let payload = serde_json::to_string(&evt).unwrap(); + let app = axum::Router::new().route( + "/v1/audit/stream", + axum::routing::get(move || { + let payload = payload.clone(); + async move { + let stream = tokio_stream::once(Ok::<_, std::convert::Infallible>( + SseEvent::default().event("anchor").data(payload), + )); + Sse::new(stream) + } + }), + ); + let listener = tokio::net::TcpListener::bind("127.0.0.1:0").await.unwrap(); + let addr = listener.local_addr().unwrap(); + tokio::spawn(async move { + axum::serve(listener, app).await.ok(); + }); + + let state = make_state(); + let delivered = pump_worker_feed( + &state, + &reqwest::Client::new(), + &format!("http://{addr}"), + &format!("0x{}", "bb".repeat(32)), + ) + .await + .expect("pump"); + assert_eq!(delivered, 1); + assert_eq!(state.anchor.read().await.last_anchor_at, 45_296); + let feed = state.audit.read().await; + assert_eq!(feed.len(), 1); + assert_eq!(feed[0].chip, "anchor"); + } + #[tokio::test] async fn decode_audit_event_returns_real_calldata_and_envelope() { // #153: GET /v1/audit/:id/decode wires the real decoder. Assert the diff --git a/crates/agentkeys-types/src/audit_feed.rs b/crates/agentkeys-types/src/audit_feed.rs new file mode 100644 index 00000000..4c917856 --- /dev/null +++ b/crates/agentkeys-types/src/audit_feed.rs @@ -0,0 +1,42 @@ +//! Tier-1 audit feed wire shape (issue #109). +//! +//! ONE owner for the event JSON the audit worker's `GET /v1/audit/stream` +//! SSE emits and its ring buffers / S3 archive persist — consumed by the +//! worker (`agentkeys-worker-audit`), the daemon's feed bridge +//! (`ui_bridge`), and any future explorer poller. Re-typing this shape in +//! a consumer is the #200/#203 drift bug class; depend on this struct +//! instead. + +use serde::{Deserialize, Serialize}; + +/// One feed entry. `kind` distinguishes ordinary envelope events from the +/// tier-A relay's meta events (which carry the trailing optional fields). +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct AuditFeedEvent { + /// "event" | "anchor" | "batch_failed". + pub kind: String, + /// 0x-hex `keccak256(canonical_cbor(envelope))`. + pub envelope_hash: String, + pub ts_unix: u64, + /// 0x-hex 32-byte omni of the acting identity (for anchor / + /// batch_failed: the relay's derived omni). + pub actor_omni: String, + /// 0x-hex 32-byte omni of the operator whose boundary the op touched. + pub operator_omni: String, + pub op_kind: u8, + /// `AuditOpKind::label()` or `"unknown()"` — render directly so + /// new op_kinds degrade gracefully (non-break invariant #4). + pub op_kind_label: String, + /// 0=Success, 1=Failure, 2=NotPermitted. + pub result: u8, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub intent_text: Option, + /// Anchor events: the confirmed `appendV2` tx. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub tx_hash: Option, + /// Anchor / batch_failed events: the batch's Merkle root. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub merkle_root: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub entry_count: Option, +} diff --git a/crates/agentkeys-types/src/lib.rs b/crates/agentkeys-types/src/lib.rs index c01c343a..92061f36 100644 --- a/crates/agentkeys-types/src/lib.rs +++ b/crates/agentkeys-types/src/lib.rs @@ -2,6 +2,7 @@ use std::fmt; use serde::{Deserialize, Serialize}; +pub mod audit_feed; pub mod cred_manifest; pub mod provision; diff --git a/crates/agentkeys-worker-audit/Cargo.toml b/crates/agentkeys-worker-audit/Cargo.toml index ff576d12..0c3adc47 100644 --- a/crates/agentkeys-worker-audit/Cargo.toml +++ b/crates/agentkeys-worker-audit/Cargo.toml @@ -27,6 +27,13 @@ sha3 = "0.10" hex = "0.4" ciborium = "0.2" clap = { version = "4", features = ["derive", "env"] } +# #109 tier-A anchor relay: legacy-tx signing key + SSE stream plumbing. +agentkeys-types = { workspace = true } +k256 = { version = "0.13", features = ["ecdsa", "sha2"] } +tokio-stream = { version = "0.1", features = ["sync"] } +# #109 S3 cold archive (instance-profile creds, same as the email worker). +aws-config = { version = "1", features = ["behavior-version-latest"] } +aws-sdk-s3 = "1" [dev-dependencies] tokio = { workspace = true, features = ["full", "test-util"] } diff --git a/crates/agentkeys-worker-audit/src/anchor.rs b/crates/agentkeys-worker-audit/src/anchor.rs new file mode 100644 index 00000000..e1d62ae8 --- /dev/null +++ b/crates/agentkeys-worker-audit/src/anchor.rs @@ -0,0 +1,614 @@ +//! Tier-A on-chain anchor relay (issue #109) — the audit worker submits each +//! flushed V2 batch to `CredentialAudit` autonomously, on the +//! `AGENTKEYS_AUDIT_BATCH_SECONDS` cadence. +//! +//! ## Why `appendV2` + op_kind 90, not `appendRootV2` +//! +//! `appendRootV2` is gated `msg.sender == registry.operatorMasterWallet(omni)` +//! and the registry rejects EOA masters (`MasterMustBeAccount`), so a hosted +//! relay EOA can never pass it — and a prod master is a Touch-ID passkey that +//! cannot sign on a 2-minute cadence. Instead the relay wraps the batch root +//! in an honest `AuditRootAnchor` envelope (op_kind 90, body carries +//! `{merkle_root, op_kind_bitmap, entry_count, relay_address}`) and commits +//! THAT envelope's hash via the ungated +//! `appendV2(operatorOmni, relayActorOmni, 90, envelopeHash)` — one legacy tx +//! per batch, the REAL operator omni stays an indexed topic, zero contract +//! change (§15.3b invariant #6). Genuine anchors are distinguished from +//! third-party spam by `tx.from == relay_address` (published at +//! `GET /v1/audit/relay-info`). The master-gated `appendRootV2` remains the +//! sovereign tier-B/C route (`heima-worker-smoke.sh`). +//! +//! All chain reads are raw JSON-RPC (no alloy/ethers — Heima's mixHash-less +//! responses crash their parsers; see `docs/spec/heima-eth-gap.md`), and the +//! tx is a fixed-gas-limit legacy tx via `agentkeys_core::legacy_tx`. + +use std::time::Duration; + +use agentkeys_core::device_crypto::keccak256; +use agentkeys_core::legacy_tx::LegacyTx; +use anyhow::{anyhow, bail, Context, Result}; +use k256::ecdsa::SigningKey; +use serde_json::{json, Value}; +use tracing::{info, warn}; + +/// Everything the relay needs to sign + submit an anchor tx. Built once at +/// boot via `from_env` (workers inject a hand-built config in tests — never +/// mutate process env, per the #258/#259 rule). +pub struct RelayConfig { + pub rpc_url: String, + pub chain_id: u64, + /// `CredentialAudit` contract address (20 bytes). + pub credential_audit: [u8; 20], + /// `SidecarRegistry` address — the anti-spam gate: anchors are only + /// submitted for operators with a registered on-chain master + /// (`operatorMasterWallet(omni) != 0`). Without this, the open + /// `append/v2` endpoint would let a spammer mint arbitrary operator + /// omnis and drain the relay one anchor tx per fake operator per tick. + pub sidecar_registry: [u8; 20], + pub signing_key: SigningKey, + /// 20-byte EVM address derived from `signing_key`. + pub relay_address: [u8; 20], + /// `actor_omni_from_wallet(relay_address)` — the anchor envelopes' + /// actor identity. Deterministic; no on-chain registration needed. + pub relay_omni: [u8; 32], + /// Pinned gas limit (Heima `eth_estimateGas` is unreliable on some + /// shapes; `appendV2` is emit-only so 200k is generous headroom). + pub gas_limit: u128, + /// Submission attempts per batch before declaring the batch failed. + pub attempts: u32, + /// Base for exponential backoff between attempts (`base * 4^(n-1)`). + /// Tests set 0. + pub backoff_base: Duration, + /// How long to poll for the tx receipt before treating the attempt as + /// failed. A timed-out tx may still land later — re-anchoring the same + /// envelope hashes is benign (duplicate `AuditAppendedV2` events; + /// explorers dedup by envelope hash). + pub receipt_timeout: Duration, +} + +impl RelayConfig { + /// Resolve from env + the pinned chain profile. Returns `Ok(None)` when + /// no relay key is configured — the worker then runs in the pre-#109 + /// degraded mode (flush logs `appendRootV2` inputs, anchors nothing). + /// + /// Env surface (all optional except the key): + /// - `AGENTKEYS_AUDIT_RELAY_KEY_FILE` — path to a hex private key file + /// (preferred; generated 0600 by `setup-broker-host.sh`), or + /// `AGENTKEYS_AUDIT_RELAY_KEY` — inline hex (CI). + /// - `AGENTKEYS_CHAIN` / `AGENTKEYS_CHAIN_PROFILE_FILE` — profile pick. + /// - `AGENTKEYS_AUDIT_RPC_URL` — override the profile's `rpc.http`. + /// - `AGENTKEYS_AUDIT_CREDENTIAL_AUDIT_ADDRESS` — override the + /// profile's `CredentialAudit` address (isolated test stacks). + /// - `AGENTKEYS_AUDIT_ANCHOR_GAS_LIMIT` (default 200000), + /// `AGENTKEYS_AUDIT_ANCHOR_ATTEMPTS` (default 3), + /// `AGENTKEYS_AUDIT_ANCHOR_RECEIPT_TIMEOUT_SECS` (default 60). + pub fn from_env() -> Result> { + let key_hex = match std::env::var("AGENTKEYS_AUDIT_RELAY_KEY_FILE") { + Ok(path) if !path.is_empty() => std::fs::read_to_string(&path) + .with_context(|| format!("read AGENTKEYS_AUDIT_RELAY_KEY_FILE={path}"))? + .trim() + .to_string(), + _ => match std::env::var("AGENTKEYS_AUDIT_RELAY_KEY") { + Ok(k) if !k.is_empty() => k.trim().to_string(), + _ => return Ok(None), + }, + }; + + let (profile, picked) = agentkeys_core::chain_profile::ChainProfile::resolve( + None, + std::env::var("AGENTKEYS_CHAIN").ok().as_deref(), + std::env::var("AGENTKEYS_CHAIN_PROFILE_FILE") + .ok() + .as_deref(), + )?; + info!(profile = %profile.name, %picked, "anchor relay chain profile"); + + let rpc_url = match std::env::var("AGENTKEYS_AUDIT_RPC_URL") { + Ok(u) if !u.is_empty() => u, + _ => profile.rpc.http.clone(), + }; + let audit_addr_hex = match std::env::var("AGENTKEYS_AUDIT_CREDENTIAL_AUDIT_ADDRESS") { + Ok(a) if !a.is_empty() => a, + _ => profile + .contract("CredentialAudit") + .ok_or_else(|| anyhow!("chain profile {} has no CredentialAudit", profile.name))? + .address + .clone(), + }; + let registry_addr_hex = match std::env::var("AGENTKEYS_AUDIT_REGISTRY_ADDRESS") { + Ok(a) if !a.is_empty() => a, + _ => profile + .contract("SidecarRegistry") + .ok_or_else(|| anyhow!("chain profile {} has no SidecarRegistry", profile.name))? + .address + .clone(), + }; + + let gas_limit = env_u128("AGENTKEYS_AUDIT_ANCHOR_GAS_LIMIT", 200_000)?; + let attempts = env_u128("AGENTKEYS_AUDIT_ANCHOR_ATTEMPTS", 3)? as u32; + let receipt_secs = env_u128("AGENTKEYS_AUDIT_ANCHOR_RECEIPT_TIMEOUT_SECS", 60)? as u64; + + Ok(Some(Self::build( + rpc_url, + profile.chain_id, + &audit_addr_hex, + ®istry_addr_hex, + &key_hex, + gas_limit, + attempts, + Duration::from_secs(2), + Duration::from_secs(receipt_secs), + )?)) + } + + /// Assemble a config from explicit values (the test path — no env reads). + #[allow(clippy::too_many_arguments)] + pub fn build( + rpc_url: String, + chain_id: u64, + credential_audit_hex: &str, + sidecar_registry_hex: &str, + relay_key_hex: &str, + gas_limit: u128, + attempts: u32, + backoff_base: Duration, + receipt_timeout: Duration, + ) -> Result { + let credential_audit = decode20(credential_audit_hex) + .ok_or_else(|| anyhow!("CredentialAudit address must be 20-byte hex"))?; + let sidecar_registry = decode20(sidecar_registry_hex) + .ok_or_else(|| anyhow!("SidecarRegistry address must be 20-byte hex"))?; + let key_bytes = + hex::decode(relay_key_hex.trim().trim_start_matches("0x")).context("relay key hex")?; + let signing_key = SigningKey::from_slice(&key_bytes).context("relay key")?; + let relay_address = eth_address(&signing_key); + let relay_wallet = + agentkeys_types::WalletAddress(format!("0x{}", hex::encode(relay_address))); + let relay_omni = agentkeys_core::actor_omni::actor_omni_from_wallet(&relay_wallet); + Ok(Self { + rpc_url, + chain_id, + credential_audit, + sidecar_registry, + signing_key, + relay_address, + relay_omni, + gas_limit, + attempts, + backoff_base, + receipt_timeout, + }) + } + + pub fn relay_address_hex(&self) -> String { + format!("0x{}", hex::encode(self.relay_address)) + } + + pub fn relay_omni_hex(&self) -> String { + format!("0x{}", hex::encode(self.relay_omni)) + } +} + +/// `appendV2(bytes32 operatorOmni, bytes32 actorOmni, uint8 opKind, bytes32 +/// envelopeHash)` calldata. Selector is computed from the signature at call +/// time (golden-tested below) — no hardcoded magic bytes. +pub fn encode_append_v2_calldata( + operator_omni: [u8; 32], + actor_omni: [u8; 32], + op_kind: u8, + envelope_hash: [u8; 32], +) -> Vec { + let selector = &keccak256(b"appendV2(bytes32,bytes32,uint8,bytes32)")[..4]; + let mut data = Vec::with_capacity(4 + 32 * 4); + data.extend_from_slice(selector); + data.extend_from_slice(&operator_omni); + data.extend_from_slice(&actor_omni); + let mut kind_padded = [0u8; 32]; + kind_padded[31] = op_kind; + data.extend_from_slice(&kind_padded); + data.extend_from_slice(&envelope_hash); + data +} + +/// Outcome of one anchored batch. +#[derive(Debug, Clone)] +pub struct AnchorReceipt { + pub tx_hash: String, + pub attempts_used: u32, +} + +/// Submit one anchor tx (calldata already encoded) with the configured +/// retry/backoff policy. Returns the tx hash on the first confirmed +/// attempt; aggregates the last error when every attempt fails. +pub async fn submit_anchor_with_retries( + cfg: &RelayConfig, + http: &reqwest::Client, + calldata: Vec, +) -> std::result::Result { + let mut last_error = String::new(); + for attempt in 1..=cfg.attempts { + if attempt > 1 { + let backoff = cfg.backoff_base * 4u32.pow(attempt - 2); + tokio::time::sleep(backoff).await; + } + match submit_once(cfg, http, &calldata).await { + Ok(tx_hash) => { + return Ok(AnchorReceipt { + tx_hash, + attempts_used: attempt, + }) + } + Err(e) => { + last_error = format!("{e:#}"); + warn!(attempt, error = %last_error, "anchor submission attempt failed"); + } + } + } + Err(AnchorFailure { + attempts: cfg.attempts, + last_error, + }) +} + +/// All attempts exhausted — the caller re-queues the batch entries and +/// emits the `AuditBatchFailed` (op_kind 91) envelope. +#[derive(Debug, Clone)] +pub struct AnchorFailure { + pub attempts: u32, + pub last_error: String, +} + +async fn submit_once(cfg: &RelayConfig, http: &reqwest::Client, calldata: &[u8]) -> Result { + let submitter = cfg.relay_address_hex(); + let nonce = parse_qty( + &rpc_call( + http, + &cfg.rpc_url, + "eth_getTransactionCount", + json!([submitter, "pending"]), + ) + .await?, + "eth_getTransactionCount", + )?; + // +25% headroom over the node's quote so a base-fee tick doesn't strand + // the tx (same policy as the bundler's handleOps carrier). + let gas_price = parse_qty( + &rpc_call(http, &cfg.rpc_url, "eth_gasPrice", json!([])).await?, + "eth_gasPrice", + )? * 125 + / 100; + + let tx = LegacyTx { + nonce, + gas_price, + gas_limit: cfg.gas_limit, + to: cfg.credential_audit, + value: 0, + data: calldata.to_vec(), + chain_id: cfg.chain_id, + }; + let (raw, _) = tx.sign(&cfg.signing_key)?; + let tx_hash = rpc_call( + http, + &cfg.rpc_url, + "eth_sendRawTransaction", + json!([format!("0x{}", hex::encode(raw))]), + ) + .await? + .as_str() + .ok_or_else(|| anyhow!("eth_sendRawTransaction returned non-string"))? + .to_string(); + + // Poll the raw receipt (NEVER a typed parser — Heima receipts carry no + // mixHash). status 0x1 = success; 0x0 = reverted (appendV2 is ungated + + // emit-only, so a revert means out-of-gas or a wrong address — both + // operator errors worth failing loudly on). + let deadline = tokio::time::Instant::now() + cfg.receipt_timeout; + loop { + let receipt = rpc_call( + http, + &cfg.rpc_url, + "eth_getTransactionReceipt", + json!([tx_hash]), + ) + .await?; + if !receipt.is_null() { + let status = receipt + .get("status") + .and_then(|s| s.as_str()) + .unwrap_or("0x0"); + if status == "0x1" { + info!(%tx_hash, "anchor tx confirmed"); + return Ok(tx_hash); + } + bail!("anchor tx {tx_hash} reverted (status {status})"); + } + if tokio::time::Instant::now() >= deadline { + bail!( + "anchor tx {tx_hash} unconfirmed after {:?} (it may still land; re-anchoring is benign)", + cfg.receipt_timeout + ); + } + tokio::time::sleep(Duration::from_secs(2)).await; + } +} + +/// Raw JSON-RPC POST with transient-failure retries (the Heima public RPC +/// intermittently 500s — same posture as the workers' `eth_call` helper). +async fn rpc_call( + http: &reqwest::Client, + rpc_url: &str, + method: &str, + params: Value, +) -> Result { + let body = json!({"jsonrpc": "2.0", "id": 1, "method": method, "params": params}); + let mut last = String::new(); + for _ in 0..3 { + match http.post(rpc_url).json(&body).send().await { + Ok(resp) if resp.status().is_success() => { + let v: Value = resp + .json() + .await + .map_err(|e| anyhow!("{method} json: {e}"))?; + if let Some(err) = v.get("error") { + // RPC-level errors are NOT transient (bad tx, low funds) — + // surface immediately. + bail!("{method} rpc error: {err}"); + } + return Ok(v.get("result").cloned().unwrap_or(Value::Null)); + } + Ok(resp) => last = format!("{method} HTTP {}", resp.status()), + Err(e) => last = format!("{method} POST: {e}"), + } + tokio::time::sleep(Duration::from_millis(300)).await; + } + bail!("{last} (after 3 tries)") +} + +/// Anti-spam anchor gate (#109): does this operator have a registered +/// on-chain master? `eth_call SidecarRegistry.operatorMasterWallet(omni)` +/// → non-zero address. Spammers minting fake operator omnis through the +/// open `append/v2` endpoint fail this gate, so they can fill queues but +/// never burn relay gas. RPC errors return `Err` (the caller re-queues — +/// never drop real events on a transient flake, never burn gas blind). +pub async fn operator_has_master( + cfg: &RelayConfig, + http: &reqwest::Client, + operator_omni: [u8; 32], +) -> Result { + let selector = &keccak256(b"operatorMasterWallet(bytes32)")[..4]; + let mut data = Vec::with_capacity(4 + 32); + data.extend_from_slice(selector); + data.extend_from_slice(&operator_omni); + let result = rpc_call( + http, + &cfg.rpc_url, + "eth_call", + json!([{ + "to": format!("0x{}", hex::encode(cfg.sidecar_registry)), + "data": format!("0x{}", hex::encode(data)), + }, "latest"]), + ) + .await?; + let raw = result + .as_str() + .ok_or_else(|| anyhow!("eth_call returned non-string"))?; + let bytes = hex::decode(raw.trim_start_matches("0x")).unwrap_or_default(); + Ok(bytes.iter().any(|b| *b != 0)) +} + +/// Current relay balance in wei (`eth_getBalance`), `None` on any RPC +/// trouble — diagnostics only, never load-bearing. +pub async fn relay_balance_wei(cfg: &RelayConfig, http: &reqwest::Client) -> Option { + let v = rpc_call( + http, + &cfg.rpc_url, + "eth_getBalance", + json!([cfg.relay_address_hex(), "latest"]), + ) + .await + .ok()?; + parse_qty(&v, "eth_getBalance").ok() +} + +fn parse_qty(v: &Value, what: &str) -> Result { + let s = v + .as_str() + .ok_or_else(|| anyhow!("{what} returned non-string"))?; + u128::from_str_radix(s.trim_start_matches("0x"), 16).with_context(|| format!("{what}: {s}")) +} + +fn env_u128(key: &str, default: u128) -> Result { + match std::env::var(key) { + Ok(v) if !v.is_empty() => v.parse::().with_context(|| format!("{key}={v}")), + _ => Ok(default), + } +} + +fn decode20(s: &str) -> Option<[u8; 20]> { + let v = hex::decode(s.trim_start_matches("0x")).ok()?; + if v.len() != 20 { + return None; + } + let mut out = [0u8; 20]; + out.copy_from_slice(&v); + Some(out) +} + +/// Keccak-derived EVM address of a secp256k1 signing key. +fn eth_address(sk: &SigningKey) -> [u8; 20] { + let pubkey = sk.verifying_key().to_encoded_point(false); + let digest = keccak256(&pubkey.as_bytes()[1..]); + let mut out = [0u8; 20]; + out.copy_from_slice(&digest[12..]); + out +} + +#[cfg(test)] +mod tests { + use super::*; + use axum::{routing::post, Json, Router}; + use std::sync::atomic::{AtomicU32, Ordering}; + use std::sync::Arc; + + #[test] + fn append_v2_calldata_layout() { + let data = encode_append_v2_calldata([0x22; 32], [0x44; 32], 90, [0x90; 32]); + assert_eq!(data.len(), 4 + 32 * 4); + // Selector matches `cast sig "appendV2(bytes32,bytes32,uint8,bytes32)"`. + assert_eq!( + &data[..4], + &keccak256(b"appendV2(bytes32,bytes32,uint8,bytes32)")[..4] + ); + assert_eq!(&data[4..36], &[0x22; 32]); + assert_eq!(&data[36..68], &[0x44; 32]); + let mut kind = [0u8; 32]; + kind[31] = 90; + assert_eq!(&data[68..100], &kind); + assert_eq!(&data[100..132], &[0x90; 32]); + } + + #[test] + fn relay_identity_is_deterministic() { + let cfg = RelayConfig::build( + "http://127.0.0.1:1".into(), + 212013, + &format!("0x{}", "11".repeat(20)), + &format!("0x{}", "22".repeat(20)), + &format!("0x{}", "46".repeat(32)), + 200_000, + 3, + Duration::ZERO, + Duration::from_secs(1), + ) + .unwrap(); + // The EIP-155 reference key (0x46×32) → its well-known address. + assert_eq!( + cfg.relay_address_hex(), + "0x9d8a62f656a8d1615c1294fd71e9cfb3e4855a4f" + ); + // relay_omni = actor_omni(relay address) — the agentkeysevm digest. + let expected = agentkeys_core::actor_omni::actor_omni_from_wallet( + &agentkeys_types::WalletAddress(cfg.relay_address_hex()), + ); + assert_eq!(cfg.relay_omni, expected); + } + + /// Fake JSON-RPC node: counts calls; `fail_first` requests 503 before + /// recovering. Drives the full submit path over real HTTP. + async fn spawn_fake_rpc(fail_first: u32) -> (String, Arc) { + let calls = Arc::new(AtomicU32::new(0)); + let calls_in = calls.clone(); + let app = Router::new().route( + "/", + post(move |Json(req): Json| { + let calls = calls_in.clone(); + async move { + let n = calls.fetch_add(1, Ordering::SeqCst); + if n < fail_first { + return Err(axum::http::StatusCode::SERVICE_UNAVAILABLE); + } + let method = req.get("method").and_then(|m| m.as_str()).unwrap_or(""); + let result = match method { + "eth_getTransactionCount" => json!("0x0"), + "eth_gasPrice" => json!("0x3b9aca00"), + "eth_sendRawTransaction" => json!(format!("0x{}", "ab".repeat(32))), + "eth_getTransactionReceipt" => json!({"status": "0x1"}), + // Anti-spam gate: operator omni 0x22… is "registered" + // (non-zero master), everything else isn't. + "eth_call" => { + let data = req["params"][0]["data"].as_str().unwrap_or(""); + if data.ends_with(&"22".repeat(32)) { + json!(format!( + "0x{:0>64}", + "9d8a62f656a8d1615c1294fd71e9cfb3e4855a4f" + )) + } else { + json!(format!("0x{}", "00".repeat(32))) + } + } + other => panic!("unexpected method {other}"), + }; + Ok(Json(json!({"jsonrpc": "2.0", "id": 1, "result": result}))) + } + }), + ); + let listener = tokio::net::TcpListener::bind("127.0.0.1:0").await.unwrap(); + let addr = listener.local_addr().unwrap(); + tokio::spawn(async move { + axum::serve(listener, app).await.ok(); + }); + (format!("http://{addr}/"), calls) + } + + fn test_cfg(rpc_url: String) -> RelayConfig { + RelayConfig::build( + rpc_url, + 212013, + &format!("0x{}", "11".repeat(20)), + &format!("0x{}", "22".repeat(20)), + &format!("0x{}", "46".repeat(32)), + 200_000, + 3, + Duration::ZERO, + Duration::from_secs(5), + ) + .unwrap() + } + + #[tokio::test] + async fn submits_anchor_end_to_end_against_fake_rpc() { + let (url, _) = spawn_fake_rpc(0).await; + let cfg = test_cfg(url); + let http = reqwest::Client::new(); + let calldata = encode_append_v2_calldata([0x22; 32], cfg.relay_omni, 90, [0x90; 32]); + let receipt = submit_anchor_with_retries(&cfg, &http, calldata) + .await + .expect("anchored"); + assert_eq!(receipt.tx_hash, format!("0x{}", "ab".repeat(32))); + assert_eq!(receipt.attempts_used, 1); + } + + #[tokio::test] + async fn transient_rpc_failures_recover_within_one_attempt() { + // 2 leading 503s are absorbed by rpc_call's own 3-try transport + // retry — still attempt #1 from the batch-retry perspective. + let (url, _) = spawn_fake_rpc(2).await; + let cfg = test_cfg(url); + let http = reqwest::Client::new(); + let calldata = encode_append_v2_calldata([0x22; 32], cfg.relay_omni, 90, [0x90; 32]); + let receipt = submit_anchor_with_retries(&cfg, &http, calldata) + .await + .expect("anchored"); + assert_eq!(receipt.attempts_used, 1); + } + + #[tokio::test] + async fn operator_gate_distinguishes_registered_from_spam() { + let (url, _) = spawn_fake_rpc(0).await; + let cfg = test_cfg(url); + let http = reqwest::Client::new(); + assert!(operator_has_master(&cfg, &http, [0x22; 32]).await.unwrap()); + assert!(!operator_has_master(&cfg, &http, [0x99; 32]).await.unwrap()); + } + + #[tokio::test] + async fn exhausted_attempts_return_failure_with_last_error() { + // Every request 503s: 3 batch attempts × 3 transport tries all fail. + let (url, calls) = spawn_fake_rpc(u32::MAX).await; + let cfg = test_cfg(url); + let http = reqwest::Client::new(); + let calldata = encode_append_v2_calldata([0x22; 32], cfg.relay_omni, 90, [0x90; 32]); + let failure = submit_anchor_with_retries(&cfg, &http, calldata) + .await + .expect_err("must fail"); + assert_eq!(failure.attempts, 3); + assert!( + failure.last_error.contains("503"), + "got: {}", + failure.last_error + ); + // 3 attempts × 3 transport tries on the FIRST rpc (nonce fetch). + assert_eq!(calls.load(Ordering::SeqCst), 9); + } +} diff --git a/crates/agentkeys-worker-audit/src/archive.rs b/crates/agentkeys-worker-audit/src/archive.rs new file mode 100644 index 00000000..89619182 --- /dev/null +++ b/crates/agentkeys-worker-audit/src/archive.rs @@ -0,0 +1,260 @@ +//! S3 cold archive for the Tier-1 feed (issue #109). +//! +//! Layout under `s3://$AGENTKEYS_AUDIT_S3_BUCKET/$AGENTKEYS_AUDIT_S3_PREFIX`: +//! +//! - `feed//-.json` — one `FeedEvent` +//! per object. Zero-padded unix seconds make lexical key order +//! chronological, so "the last N events" is the listing tail. +//! - `envelopes/.cbor` — canonical envelope bytes (the +//! durable layer behind `GET /v1/audit/envelope/:hash`; pre-#109 the +//! by-hash store was in-memory only and restarts lost it). +//! +//! Every operation is best-effort with a loud WARN: the chain commitment is +//! the tamper-evidence layer; S3 is retention. Writes are spawned off the +//! append path so a slow S3 never blocks an emit. On boot, +//! `restore_rings` rebuilds the per-actor ring buffers ("last 1000 events +//! per actor survive a restart" — the #109 acceptance criterion). +//! +//! Credentials come from the default AWS provider chain (the broker host's +//! EC2 instance profile — same posture as the email worker's inbox client). + +use aws_sdk_s3::Client as S3Client; +use tracing::{info, warn}; + +use crate::state::{FeedEvent, State}; + +#[derive(Clone)] +pub struct Archive { + s3: S3Client, + bucket: String, + /// Normalized to either "" or "…/" so key joins are always clean. + prefix: String, +} + +impl Archive { + /// `None` when `AGENTKEYS_AUDIT_S3_BUCKET` is unset/empty — the worker + /// then runs in-memory only (rings still work, restarts lose them). + pub async fn from_env() -> Option { + let bucket = std::env::var("AGENTKEYS_AUDIT_S3_BUCKET").ok()?; + if bucket.is_empty() { + return None; + } + let prefix = std::env::var("AGENTKEYS_AUDIT_S3_PREFIX").unwrap_or_else(|_| "audit/".into()); + let cfg = aws_config::defaults(aws_config::BehaviorVersion::latest()) + .load() + .await; + Some(Self::new(S3Client::new(&cfg), bucket, prefix)) + } + + pub fn new(s3: S3Client, bucket: String, prefix: String) -> Self { + let prefix = match prefix.trim_matches('/') { + "" => String::new(), + p => format!("{p}/"), + }; + Self { s3, bucket, prefix } + } + + fn feed_key(&self, evt: &FeedEvent) -> String { + format!( + "{}feed/{}/{:012}-{}.json", + self.prefix, + evt.actor_omni.trim_start_matches("0x").to_lowercase(), + evt.ts_unix, + evt.envelope_hash.trim_start_matches("0x").to_lowercase(), + ) + } + + fn envelope_key(&self, envelope_hash: &str) -> String { + format!( + "{}envelopes/{}.cbor", + self.prefix, + envelope_hash.trim_start_matches("0x").to_lowercase() + ) + } + + /// Fire-and-forget archive of one feed event (spawned — never blocks + /// the append path). + pub fn archive_feed_event(&self, evt: FeedEvent) { + let this = self.clone(); + tokio::spawn(async move { + let key = this.feed_key(&evt); + let body = match serde_json::to_vec(&evt) { + Ok(b) => b, + Err(e) => { + warn!(error = %e, "archive: serialize feed event"); + return; + } + }; + if let Err(e) = this + .s3 + .put_object() + .bucket(&this.bucket) + .key(&key) + .body(body.into()) + .content_type("application/json") + .send() + .await + { + warn!(key, error = %e, "archive: feed event PUT failed"); + } + }); + } + + /// Fire-and-forget archive of one canonical envelope. + pub fn archive_envelope(&self, envelope_hash: String, cbor: Vec) { + let this = self.clone(); + tokio::spawn(async move { + let key = this.envelope_key(&envelope_hash); + if let Err(e) = this + .s3 + .put_object() + .bucket(&this.bucket) + .key(&key) + .body(cbor.into()) + .content_type("application/cbor") + .send() + .await + { + warn!(key, error = %e, "archive: envelope PUT failed"); + } + }); + } + + /// Cold-read an envelope by hash (the `GET /v1/audit/envelope/:hash` + /// fallback when the in-memory map misses, e.g. after a restart). + pub async fn fetch_envelope(&self, envelope_hash: &str) -> Option> { + let key = self.envelope_key(envelope_hash); + match self + .s3 + .get_object() + .bucket(&self.bucket) + .key(&key) + .send() + .await + { + Ok(out) => match out.body.collect().await { + Ok(bytes) => Some(bytes.into_bytes().to_vec()), + Err(e) => { + warn!(key, error = %e, "archive: envelope body read failed"); + None + } + }, + Err(_) => None, + } + } + + /// Boot-time recovery: rebuild every actor's ring buffer from the + /// archive tail (last `ring_cap` events per actor). + pub async fn restore_rings(&self, state: &State, ring_cap: usize) { + let feed_root = format!("{}feed/", self.prefix); + let actors = match self.list_actor_prefixes(&feed_root).await { + Ok(a) => a, + Err(e) => { + warn!(error = %e, "archive: restore skipped (list actors failed)"); + return; + } + }; + let mut restored_actors = 0usize; + let mut restored_events = 0usize; + for actor_prefix in actors { + let keys = match self.list_keys_tail(&actor_prefix, ring_cap).await { + Ok(k) => k, + Err(e) => { + warn!(actor_prefix, error = %e, "archive: actor listing failed"); + continue; + } + }; + let mut events = Vec::with_capacity(keys.len()); + for key in keys { + match self.get_feed_event(&key).await { + Some(evt) => events.push(evt), + None => warn!(key, "archive: feed event GET/parse failed during restore"), + } + } + if events.is_empty() { + continue; + } + events.sort_by_key(|e| e.ts_unix); + let actor = events[0].actor_omni.clone(); + restored_events += events.len(); + restored_actors += 1; + state.restore_ring(actor, events).await; + } + info!( + restored_actors, + restored_events, "archive: ring buffers restored" + ); + } + + async fn get_feed_event(&self, key: &str) -> Option { + let out = self + .s3 + .get_object() + .bucket(&self.bucket) + .key(key) + .send() + .await + .ok()?; + let bytes = out.body.collect().await.ok()?.into_bytes(); + serde_json::from_slice(&bytes).ok() + } + + /// One level of common prefixes under `feed/` — the per-actor folders. + async fn list_actor_prefixes(&self, feed_root: &str) -> anyhow::Result> { + let mut prefixes = Vec::new(); + let mut token: Option = None; + loop { + let resp = self + .s3 + .list_objects_v2() + .bucket(&self.bucket) + .prefix(feed_root) + .delimiter("/") + .set_continuation_token(token.take()) + .send() + .await?; + for p in resp.common_prefixes() { + if let Some(pfx) = p.prefix() { + prefixes.push(pfx.to_string()); + } + } + match resp.next_continuation_token() { + Some(t) => token = Some(t.to_string()), + None => break, + } + } + Ok(prefixes) + } + + /// The lexical tail (= chronological tail, keys are ts-prefixed) of one + /// actor's feed listing, capped to `n`. + async fn list_keys_tail(&self, actor_prefix: &str, n: usize) -> anyhow::Result> { + let mut tail: Vec = Vec::new(); + let mut token: Option = None; + loop { + let resp = self + .s3 + .list_objects_v2() + .bucket(&self.bucket) + .prefix(actor_prefix) + .set_continuation_token(token.take()) + .send() + .await?; + for o in resp.contents() { + if let Some(k) = o.key() { + tail.push(k.to_string()); + } + } + // Keep only the newest `n` as we page — bounds memory on large + // archives (listing is ascending, so retain the tail). + if tail.len() > n { + tail.drain(..tail.len() - n); + } + match resp.next_continuation_token() { + Some(t) => token = Some(t.to_string()), + None => break, + } + } + Ok(tail) + } +} diff --git a/crates/agentkeys-worker-audit/src/handlers.rs b/crates/agentkeys-worker-audit/src/handlers.rs index ff996ad5..9596f0f1 100644 --- a/crates/agentkeys-worker-audit/src/handlers.rs +++ b/crates/agentkeys-worker-audit/src/handlers.rs @@ -10,19 +10,31 @@ //! POST /v1/audit/append/v2 — store an envelope + return its `envelope_hash` //! GET /v1/audit/envelope/:hash — fetch the canonical CBOR for an envelope hash //! +//! Endpoints (Tier-1 feed + tier-A anchor, issue #109): +//! GET /v1/audit/stream — SSE live feed (?operator=&actor=&backfill=N) +//! GET /v1/audit/anchors/:operator — recent anchored batches w/ Merkle proofs +//! GET /v1/audit/relay-info — relay address/omni + anchor config +//! //! Per arch.md §15.3a, V1 + V2 coexist for one migration cycle. +use std::convert::Infallible; + use axum::{ body::Body, - extract::{Path, State}, + extract::{Path, Query, State}, http::{header, HeaderValue, StatusCode}, + response::sse::{Event, KeepAlive, Sse}, response::{IntoResponse, Response}, Json, }; use serde::{Deserialize, Serialize}; use serde_json::json; +use tokio_stream::wrappers::BroadcastStream; +use tokio_stream::{Stream, StreamExt}; -use crate::state::{AuditEvent, FlushResult, FlushV2Result, SharedState, V2QueueEntry}; +use crate::state::{ + AnchorRecord, AuditEvent, FeedEvent, FlushResult, FlushV2Result, SharedState, V2QueueEntry, +}; #[derive(Deserialize)] pub struct AppendRequest { @@ -56,6 +68,11 @@ pub struct FlushResponse { /// the `appendRootV2(operatorOmni, merkleRoot, opKindBitmap, entryCount)` /// inputs for the on-chain anchor. pub flushed_v2: Vec, + /// #109: whether the flushed V2 batches were handed to the tier-A + /// anchor task (poll `GET /v1/audit/anchors/:operator` for the + /// confirmed records — a flush response never waits out a chain + /// confirmation). + pub anchor_scheduled: bool, } pub async fn flush_one( @@ -66,14 +83,18 @@ pub async fn flush_one( .flush(&operator_omni) .await .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?; - let r2 = state + let r2: Vec = state .flush_v2(&operator_omni.to_lowercase()) .await - .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?; + .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))? + .into_iter() + .collect(); + let anchor_scheduled = spawn_anchor(&state, &r2); Ok(Json(FlushResponse { ok: true, flushed: r.into_iter().collect(), - flushed_v2: r2.into_iter().collect(), + flushed_v2: r2, + anchor_scheduled, })) } @@ -88,13 +109,30 @@ pub async fn flush_all( .flush_v2_all() .await .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?; + let anchor_scheduled = spawn_anchor(&state, &r2); Ok(Json(FlushResponse { ok: true, flushed: r, flushed_v2: r2, + anchor_scheduled, })) } +/// Hand flushed V2 batches to the background anchor task (#109). Returns +/// whether anchoring was actually scheduled (relay configured + batches +/// non-empty). +fn spawn_anchor(state: &SharedState, flushed: &[FlushV2Result]) -> bool { + if flushed.is_empty() || state.relay.is_none() { + return false; + } + let state = state.clone(); + let flushed = flushed.to_vec(); + tokio::spawn(async move { + crate::service::anchor_flushed(&state, &flushed).await; + }); + true +} + #[derive(Serialize)] pub struct QueueSizeResponse { pub operator_omni: String, @@ -210,9 +248,9 @@ pub async fn append_v2( .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("hash: {e}")))?; let hash_hex = format!("0x{}", hex::encode(envelope_hash)); - state.store_envelope(hash_hex.clone(), cbor).await; + state.store_envelope(hash_hex.clone(), cbor.clone()).await; // Tier-A anchor feed (#229): queue the envelope hash for the next - // `appendRootV2` Merkle batch alongside the by-hash store above. + // anchor batch alongside the by-hash store above. state .queue_v2( format!("0x{}", hex::encode(operator_omni)), @@ -225,12 +263,175 @@ pub async fn append_v2( ) .await; + // Tier-1 real-time feed (#109): ring buffer + SSE fan-out + cold + // archive. ~100ms event-to-UI comes from this in-process push (no + // polling anywhere on the path). + let op_kind_label = agentkeys_core::audit::AuditOpKind::from_u8(req.op_kind) + .map(|k| k.label().to_string()) + .unwrap_or_else(|| format!("unknown({})", req.op_kind)); + let evt = state + .push_feed(FeedEvent { + kind: "event".into(), + envelope_hash: hash_hex.clone(), + ts_unix: envelope.ts_unix, + actor_omni: format!("0x{}", hex::encode(actor_omni)), + operator_omni: format!("0x{}", hex::encode(operator_omni)), + op_kind: req.op_kind, + op_kind_label, + result: req.result, + intent_text: envelope.intent_text.clone(), + tx_hash: None, + merkle_root: None, + entry_count: None, + }) + .await; + if let Some(archive) = &state.archive { + archive.archive_feed_event(evt); + archive.archive_envelope(hash_hex.clone(), cbor); + } + Ok(Json(AppendV2Response { ok: true, envelope_hash: hash_hex, })) } +// ─── Tier-1 feed + tier-A anchor endpoints (issue #109) ────────────────── + +#[derive(Deserialize)] +pub struct StreamQuery { + /// Filter to one operator's events (0x-hex omni). Strongly recommended + /// — the worker is multi-tenant. + pub operator: Option, + /// Further filter to one actor. + pub actor: Option, + /// Ring-buffer events replayed on connect before going live. Default + /// 0; capped at the ring size. + #[serde(default)] + pub backfill: usize, +} + +/// `GET /v1/audit/stream` — Server-Sent Events: `backfill` ring events, +/// then every matching feed event as it happens. Event types: `audit` +/// (ordinary envelope), `anchor`, `batch_failed`. Heartbeats via SSE +/// keep-alive comments. +pub async fn stream( + State(state): State, + Query(q): Query, +) -> Sse>> { + // Subscribe BEFORE snapshotting the backfill so no event falls between + // the two (duplicates are possible at the seam; consumers dedup by + // envelope_hash). + let live = state.subscribe_feed(); + let backfill = if q.backfill > 0 { + state + .backfill(q.operator.as_deref(), q.actor.as_deref(), q.backfill) + .await + } else { + Vec::new() + }; + let operator = q.operator.clone(); + let actor = q.actor.clone(); + + let matches = move |e: &FeedEvent| -> bool { + if let Some(op) = &operator { + if !op.eq_ignore_ascii_case(&e.operator_omni) { + return false; + } + } + if let Some(a) = &actor { + // Anchor/batch_failed meta events carry the relay's actor omni; + // still deliver them to actor-filtered streams for the operator + // they belong to (the anchor badge needs them). + if e.kind == "event" && !a.eq_ignore_ascii_case(&e.actor_omni) { + return false; + } + } + true + }; + + let backfill_stream = tokio_stream::iter(backfill.into_iter().map(Ok::<_, Infallible>)); + let live_stream = BroadcastStream::new(live).filter_map(move |msg| match msg { + Ok(evt) if matches(&evt) => Some(Ok::<_, Infallible>(evt)), + _ => None, // lagged receivers skip; clients re-sync via backfill + }); + + let stream = backfill_stream + .chain(live_stream) + .map(|evt: Result| { + let evt = evt.expect("infallible"); + let name = match evt.kind.as_str() { + "anchor" => "anchor", + "batch_failed" => "batch_failed", + _ => "audit", + }; + Ok(Event::default() + .event(name) + .data(serde_json::to_string(&evt).unwrap_or_else(|_| "{}".into()))) + }); + Sse::new(stream).keep_alive(KeepAlive::default()) +} + +#[derive(Serialize)] +pub struct AnchorsResponse { + pub operator_omni: String, + pub anchors: Vec, +} + +/// `GET /v1/audit/anchors/:operator` — recent anchored batches (newest +/// last) with per-entry Merkle proofs. The tamper test recomputes each +/// leaf from the fetched envelope and verifies it against +/// `merkle_root_hex` via the proof; any modified event no longer matches. +pub async fn anchors_for( + State(state): State, + Path(operator_omni): Path, +) -> Json { + let anchors = state.anchors_for(&operator_omni).await; + Json(AnchorsResponse { + operator_omni, + anchors, + }) +} + +#[derive(Serialize)] +pub struct RelayInfoResponse { + pub anchor_enabled: bool, + /// 0x-hex EVM address of the tier-A relay — verifiers match anchor + /// txs' `from` against this. `null` in degraded mode. + #[serde(skip_serializing_if = "Option::is_none")] + pub relay_address: Option, + /// The relay's derived actor omni (`actor_omni(relay_address)`). + #[serde(skip_serializing_if = "Option::is_none")] + pub relay_omni: Option, + /// Current relay balance in wei (string — u128 range), when the RPC + /// answered. The funding helper reads this. + #[serde(skip_serializing_if = "Option::is_none")] + pub balance_wei: Option, +} + +/// `GET /v1/audit/relay-info` — the relay's public identity + funding +/// state. `heima-fund-audit-relay.sh` reads `relay_address` + `balance_wei` +/// to idempotently top up from the deploy wallet. +pub async fn relay_info(State(state): State) -> Json { + let Some(relay) = state.relay.as_ref() else { + return Json(RelayInfoResponse { + anchor_enabled: false, + relay_address: None, + relay_omni: None, + balance_wei: None, + }); + }; + let balance_wei = crate::anchor::relay_balance_wei(relay, &state.http) + .await + .map(|b| b.to_string()); + Json(RelayInfoResponse { + anchor_enabled: true, + relay_address: Some(relay.relay_address_hex()), + relay_omni: Some(relay.relay_omni_hex()), + balance_wei, + }) +} + /// `GET /v1/audit/envelope/:hash` — return the canonical CBOR for the /// envelope identified by `envelope_hash` (a 0x-prefixed 64-hex string). /// Returns 404 if unknown. @@ -239,7 +440,17 @@ pub async fn append_v2( /// matches by re-running `keccak256(body)`. pub async fn get_envelope(State(state): State, Path(hash): Path) -> Response { let key = hash.to_lowercase(); - match state.get_envelope(&key).await { + let mut found = state.get_envelope(&key).await; + if found.is_none() { + // #109: cold-archive fallback — survives worker restarts. + if let Some(archive) = &state.archive { + found = archive.fetch_envelope(&key).await; + if let Some(cbor) = &found { + state.store_envelope(key.clone(), cbor.clone()).await; + } + } + } + match found { Some(cbor) => Response::builder() .status(StatusCode::OK) .header( diff --git a/crates/agentkeys-worker-audit/src/lib.rs b/crates/agentkeys-worker-audit/src/lib.rs index 4cc8b442..4541e7ff 100644 --- a/crates/agentkeys-worker-audit/src/lib.rs +++ b/crates/agentkeys-worker-audit/src/lib.rs @@ -1,15 +1,21 @@ //! Audit-service worker — tier-A Merkle relay per arch.md §15.3. //! //! Accepts per-event audit appends over HTTP, batches them in memory per -//! operator, computes a Merkle tree on flush, and writes the root to the -//! on-chain CredentialAudit contract (one tx per batch — `appendRoot`). +//! operator, computes a Merkle tree on flush, and (#109) anchors each batch +//! on-chain autonomously via the relay EOA + the `AuditRootAnchor` +//! envelope (`CredentialAudit.appendV2`, op_kind 90). Also serves the +//! Tier-1 real-time feed: per-actor ring buffers + SSE + S3 cold archive. //! //! Tier-A vs tier-C (direct `append` per event): tier-A trades latency for //! gas — each batch is one tx regardless of size, but events aren't visible -//! on chain until the next flush. +//! on chain until the next flush (`AGENTKEYS_AUDIT_BATCH_SECONDS`, default +//! 120). +pub mod anchor; +pub mod archive; pub mod handlers; pub mod merkle; +pub mod service; pub mod state; use axum::{ @@ -32,5 +38,12 @@ pub fn create_router(state: state::SharedState) -> Router { ) .route("/v1/audit/append/v2", post(handlers::append_v2)) .route("/v1/audit/envelope/:hash", get(handlers::get_envelope)) + // Tier-1 feed + tier-A anchor surfaces (issue #109). + .route("/v1/audit/stream", get(handlers::stream)) + .route( + "/v1/audit/anchors/:operator_omni", + get(handlers::anchors_for), + ) + .route("/v1/audit/relay-info", get(handlers::relay_info)) .with_state(state) } diff --git a/crates/agentkeys-worker-audit/src/main.rs b/crates/agentkeys-worker-audit/src/main.rs index e1df03ee..e226ae39 100644 --- a/crates/agentkeys-worker-audit/src/main.rs +++ b/crates/agentkeys-worker-audit/src/main.rs @@ -1,14 +1,12 @@ use std::sync::Arc; -use axum::routing::{get, post}; -use axum::Router; use clap::Parser; -use tracing::info; +use tracing::{info, warn}; -use agentkeys_worker_audit::handlers; use agentkeys_worker_audit::state::State; +use agentkeys_worker_audit::{anchor, archive, create_router, service}; -/// Audit-service worker — tier-A Merkle relay (arch.md §15.3). +/// Audit-service worker — tier-A Merkle relay (arch.md §15.3, issue #109). #[derive(Parser)] #[command(name = "agentkeys-worker-audit", version)] struct Args { @@ -28,14 +26,16 @@ struct Args { )] leaves_dir: String, - /// Periodic flush interval, in seconds. Default 300 (5 min). Set to 0 to - /// disable the timer (manual flush via /v1/audit/flush-all only). - #[arg( - long, - env = "AGENTKEYS_WORKER_AUDIT_FLUSH_INTERVAL_SECS", - default_value_t = 300 - )] - flush_interval_secs: u64, + /// Tier-2 batch cadence in seconds (#109; the 2-minute anchor is a + /// PRODUCT decision — see the issue before relaxing it). 0 disables + /// the timer (manual flush via /v1/audit/flush-all only). + #[arg(long, env = "AGENTKEYS_AUDIT_BATCH_SECONDS")] + batch_seconds: Option, + + /// DEPRECATED alias for --batch-seconds (pre-#109 name). Honored only + /// when AGENTKEYS_AUDIT_BATCH_SECONDS is unset. + #[arg(long, env = "AGENTKEYS_WORKER_AUDIT_FLUSH_INTERVAL_SECS")] + flush_interval_secs: Option, } #[tokio::main] @@ -49,68 +49,106 @@ async fn main() -> anyhow::Result<()> { .init(); let args = Args::parse(); - let state = Arc::new(State::new(args.leaves_dir.clone())); + let batch_seconds = match (args.batch_seconds, args.flush_interval_secs) { + (Some(s), _) => s, + (None, Some(legacy)) => { + warn!( + legacy, + "AGENTKEYS_WORKER_AUDIT_FLUSH_INTERVAL_SECS is deprecated — set AGENTKEYS_AUDIT_BATCH_SECONDS" + ); + legacy + } + (None, None) => 120, + }; + + let relay = match anchor::RelayConfig::from_env() { + Ok(Some(r)) => { + info!( + relay_address = %r.relay_address_hex(), + relay_omni = %r.relay_omni_hex(), + rpc = %r.rpc_url, + "tier-A anchor relay ENABLED" + ); + Some(r) + } + Ok(None) => { + warn!( + "tier-A anchor relay UNCONFIGURED (no AGENTKEYS_AUDIT_RELAY_KEY[_FILE]) — \ + flushes will log appendRootV2 inputs only (degraded mode)" + ); + None + } + Err(e) => { + // Boot anyway (the feed + store still work) but say why loudly — + // same degraded-boot posture as the #241 bundler. + warn!(error = %e, "tier-A anchor relay config INVALID — degraded mode"); + None + } + }; - // Spawn the periodic flusher if configured. - if args.flush_interval_secs > 0 { + let s3_archive = archive::Archive::from_env().await; + if s3_archive.is_none() { + warn!("S3 cold archive UNCONFIGURED (no AGENTKEYS_AUDIT_S3_BUCKET) — rings are in-memory only"); + } + + let state = Arc::new( + State::new(args.leaves_dir.clone()) + .with_relay(relay) + .with_archive(s3_archive), + ); + + // Boot-time ring recovery (#109): last 1000 events per actor. + if let Some(archive) = state.archive.clone() { + archive + .restore_rings(&state, agentkeys_worker_audit::state::DEFAULT_RING_CAP) + .await; + } + + // The Tier-2 anchor timer (#109): flush + anchor every batch_seconds. + if batch_seconds > 0 { let state = state.clone(); - let interval = args.flush_interval_secs; tokio::spawn(async move { - let mut t = tokio::time::interval(std::time::Duration::from_secs(interval)); + let mut t = tokio::time::interval(std::time::Duration::from_secs(batch_seconds)); t.tick().await; // skip immediate fire loop { t.tick().await; + // V1 queues — legacy log-only flush (operator master commits + // appendRoot itself; see heima-worker-smoke.sh). match state.flush_all().await { - Ok(rs) if !rs.is_empty() => { + Ok(rs) => { for r in rs { info!( operator_omni = %r.operator_omni, entries = r.entry_count, root = %r.merkle_root_hex, leaves = %r.leaves_path, - "auto-flush: Merkle root ready for on-chain appendRoot" + "auto-flush: V1 Merkle root ready for on-chain appendRoot" ); } } - Ok(_) => {} - Err(e) => tracing::error!(error=%e, "flush failed"), + Err(e) => tracing::error!(error = %e, "v1 flush failed"), } - // V2 envelope batches (#229) — anchor inputs for appendRootV2. - match state.flush_v2_all().await { - Ok(rs) if !rs.is_empty() => { - for r in rs { + // V2 queues — flush AND anchor (the #109 tier-A default-on). + match service::flush_v2_and_anchor(&state, None).await { + Ok((flushed, anchored)) => { + if !flushed.is_empty() { info!( - operator_omni = %r.operator_omni, - entries = r.entry_count, - root = %r.merkle_root_hex, - op_kind_bitmap = %r.op_kind_bitmap_hex, - leaves = %r.leaves_path, - "auto-flush: V2 Merkle root ready for on-chain appendRootV2" + batches = flushed.len(), + anchored = anchored.len(), + "auto-flush: V2 batches processed" ); } } - Ok(_) => {} - Err(e) => tracing::error!(error=%e, "v2 flush failed"), + Err(e) => tracing::error!(error = %e, "v2 flush failed"), } } }); + info!(batch_seconds, "tier-2 anchor timer running"); + } else { + warn!("batch timer DISABLED (batch_seconds=0) — manual flush only"); } - let app = Router::new() - .route("/healthz", get(|| async { "ok" })) - .route("/v1/audit/append", post(handlers::append)) - .route("/v1/audit/flush/:operator_omni", post(handlers::flush_one)) - .route("/v1/audit/flush-all", post(handlers::flush_all)) - .route( - "/v1/audit/queue-size/:operator_omni", - get(handlers::queue_size), - ) - // V2 endpoints (arch.md §15.3a, issue #97 phase B). V1 stays so - // existing callers keep working during the migration cycle. - .route("/v1/audit/append/v2", post(handlers::append_v2)) - .route("/v1/audit/envelope/:hash", get(handlers::get_envelope)) - .with_state(state); - + let app = create_router(state); let listener = tokio::net::TcpListener::bind(&args.bind).await?; info!(bind = %args.bind, "agentkeys-worker-audit listening"); axum::serve(listener, app).await?; diff --git a/crates/agentkeys-worker-audit/src/merkle.rs b/crates/agentkeys-worker-audit/src/merkle.rs index 4c758959..70471a4d 100644 --- a/crates/agentkeys-worker-audit/src/merkle.rs +++ b/crates/agentkeys-worker-audit/src/merkle.rs @@ -118,6 +118,18 @@ pub fn merkle_proof(raw_leaves: &[Bytes32], index: usize) -> Vec { proof } +/// Verify a sorted-pairs proof for a RAW (unprefixed) leaf against a root — +/// the off-chain mirror of `CredentialAudit.verifyEntryInRoot`. Used by the +/// anchor-record tests and any consumer of `GET /v1/audit/anchors` proofs +/// (#109 tamper-evidence check). +pub fn verify_proof(raw_leaf: Bytes32, proof: &[Bytes32], root: Bytes32) -> bool { + let mut computed = leaf_prefix(raw_leaf); + for sibling in proof { + computed = hash_pair(computed, *sibling); + } + computed == root +} + #[cfg(test)] mod tests { use super::*; diff --git a/crates/agentkeys-worker-audit/src/service.rs b/crates/agentkeys-worker-audit/src/service.rs new file mode 100644 index 00000000..a989eda4 --- /dev/null +++ b/crates/agentkeys-worker-audit/src/service.rs @@ -0,0 +1,487 @@ +//! Flush-and-anchor orchestration (issue #109) — the ONE V2 flush path, +//! shared by the periodic timer and the HTTP flush endpoints so a manual +//! flush can never silently drain a batch past the anchor. +//! +//! Per flushed batch: +//! - relay configured → build the `AuditRootAnchor` (op_kind 90) envelope, +//! commit its hash on-chain via `appendV2`, record the anchor (with +//! per-entry Merkle proofs) and surface it in the feed; +//! - submission exhausted its retries → re-queue the batch entries (the +//! next flush re-batches them under a fresh root), emit the +//! `AuditBatchFailed` (op_kind 91) envelope into the store + queue + +//! feed, and ERROR-log (journald is the operator alert path); +//! - no relay → pre-#109 degraded mode: log the `appendRootV2` inputs. + +use std::time::{SystemTime, UNIX_EPOCH}; + +use agentkeys_core::audit::{ + envelope_for, AuditBatchFailedBody, AuditOpKind, AuditResult, AuditRootAnchorBody, +}; +use tracing::{error, info, warn}; + +use crate::anchor::{encode_append_v2_calldata, submit_anchor_with_retries}; +use crate::state::{AnchorRecord, FeedEvent, FlushV2Result, SharedState, V2QueueEntry}; + +fn now_unix() -> u64 { + SystemTime::now() + .duration_since(UNIX_EPOCH) + .map(|d| d.as_secs()) + .unwrap_or(0) +} + +fn decode32_hex(s: &str) -> [u8; 32] { + let v = hex::decode(s.trim_start_matches("0x")).unwrap_or_default(); + let mut out = [0u8; 32]; + let n = v.len().min(32); + out[..n].copy_from_slice(&v[..n]); + out +} + +/// Drain V2 queues (one operator, or all when `None`) and anchor each +/// batch inline. The TIMER path — anchoring (chain receipt wait included) +/// happens before the next tick. Returns the flush results plus the +/// anchors that landed. +pub async fn flush_v2_and_anchor( + state: &SharedState, + operator_omni: Option<&str>, +) -> anyhow::Result<(Vec, Vec)> { + let flushed = match operator_omni { + Some(op) => state.flush_v2(op).await?.into_iter().collect(), + None => state.flush_v2_all().await?, + }; + let anchors = anchor_flushed(state, &flushed).await; + Ok((flushed, anchors)) +} + +/// Anchor a set of already-flushed batches. The HTTP flush handlers SPAWN +/// this (a flush response must not wait out a chain confirmation — callers +/// poll `GET /v1/audit/anchors/:operator` for the outcome, mirroring the +/// "anchored within 2 min" product contract); the timer awaits it inline. +pub async fn anchor_flushed(state: &SharedState, flushed: &[FlushV2Result]) -> Vec { + let mut anchors = Vec::new(); + for flush in flushed { + if let Some(record) = anchor_one_batch(state, flush).await { + anchors.push(record); + } + } + anchors +} + +/// Anchor a single flushed batch. `None` when the relay is unconfigured or +/// the batch failed (entries re-queued — nothing is lost either way). +async fn anchor_one_batch(state: &SharedState, flush: &FlushV2Result) -> Option { + let Some(relay) = state.relay.as_ref() else { + info!( + operator_omni = %flush.operator_omni, + entries = flush.entry_count, + root = %flush.merkle_root_hex, + op_kind_bitmap = %flush.op_kind_bitmap_hex, + leaves = %flush.leaves_path, + "anchor relay unconfigured — appendRootV2 inputs logged only (degraded mode)" + ); + return None; + }; + + let operator32 = decode32_hex(&flush.operator_omni); + let ts = now_unix(); + + // Anti-spam gate (#109): anchor only operators with a registered + // on-chain master. The open append/v2 endpoint otherwise lets a + // spammer mint fake operator omnis that each cost the relay one tx + // per tick. Unregistered → DROP (the envelopes stay fetchable by + // hash; re-queueing spam forever would grow the queue unboundedly). + // Transient RPC failure → re-queue and retry next tick (never drop + // real events on a flake, never burn gas blind). + match operator_anchor_allowed(state, &flush.operator_omni, operator32).await { + Ok(true) => {} + Ok(false) => { + warn!( + operator_omni = %flush.operator_omni, + entries = flush.entry_count, + "anchor gate: operator has NO registered master — batch dropped (spam posture)" + ); + return None; + } + Err(e) => { + warn!( + operator_omni = %flush.operator_omni, + error = %e, + "anchor gate: registry check failed — batch re-queued for next tick" + ); + state + .requeue_v2(&flush.operator_omni, flush.entries.clone()) + .await; + return None; + } + } + + // The anchor envelope — an honest AuditEnvelope whose hash goes on + // chain. actor = the relay's derived omni; operator = the REAL + // operator whose batch this is (stays an indexed topic on-chain). + let body = AuditRootAnchorBody { + merkle_root: flush.merkle_root_hex.clone(), + op_kind_bitmap: flush.op_kind_bitmap_hex.clone(), + entry_count: flush.entry_count, + relay_address: relay.relay_address_hex(), + }; + let envelope = match envelope_for( + relay.relay_omni, + operator32, + AuditOpKind::AuditRootAnchor, + body, + AuditResult::Success, + None, + None, + ) { + Ok(mut e) => { + e.ts_unix = ts; + e + } + Err(e) => { + error!(error = %e, "anchor envelope build failed — re-queueing batch"); + state + .requeue_v2(&flush.operator_omni, flush.entries.clone()) + .await; + return None; + } + }; + let (cbor, env_hash) = match (envelope.to_canonical_cbor(), envelope.envelope_hash()) { + (Ok(c), Ok(h)) => (c, h), + (c, h) => { + error!(?c, ?h, "anchor envelope encode failed — re-queueing batch"); + state + .requeue_v2(&flush.operator_omni, flush.entries.clone()) + .await; + return None; + } + }; + let env_hash_hex = format!("0x{}", hex::encode(env_hash)); + + let calldata = encode_append_v2_calldata( + operator32, + relay.relay_omni, + AuditOpKind::AuditRootAnchor as u8, + env_hash, + ); + + match submit_anchor_with_retries(relay, &state.http, calldata).await { + Ok(receipt) => { + state + .store_envelope(env_hash_hex.clone(), cbor.clone()) + .await; + if let Some(archive) = &state.archive { + archive.archive_envelope(env_hash_hex.clone(), cbor); + } + let record = state + .record_anchor(flush, env_hash_hex.clone(), receipt.tx_hash.clone(), ts) + .await; + let evt = state + .push_feed(FeedEvent { + kind: "anchor".into(), + envelope_hash: env_hash_hex, + ts_unix: ts, + actor_omni: relay.relay_omni_hex(), + operator_omni: flush.operator_omni.clone(), + op_kind: AuditOpKind::AuditRootAnchor as u8, + op_kind_label: AuditOpKind::AuditRootAnchor.label().into(), + result: AuditResult::Success as u8, + intent_text: None, + tx_hash: Some(receipt.tx_hash.clone()), + merkle_root: Some(flush.merkle_root_hex.clone()), + entry_count: Some(flush.entry_count), + }) + .await; + if let Some(archive) = &state.archive { + archive.archive_feed_event(evt); + } + info!( + operator_omni = %flush.operator_omni, + entries = flush.entry_count, + root = %flush.merkle_root_hex, + tx_hash = %receipt.tx_hash, + attempts = receipt.attempts_used, + "tier-A batch anchored on-chain" + ); + Some(record) + } + Err(failure) => { + // Durability first: the entries go back on the queue so the + // next tick re-batches them under a fresh root. + state + .requeue_v2(&flush.operator_omni, flush.entries.clone()) + .await; + emit_batch_failed(state, flush, failure.attempts, &failure.last_error).await; + error!( + operator_omni = %flush.operator_omni, + entries = flush.entry_count, + root = %flush.merkle_root_hex, + attempts = failure.attempts, + last_error = %failure.last_error, + "tier-A anchor FAILED after retries — entries re-queued, audit.batch_failed emitted" + ); + None + } + } +} + +/// TTL-cached `operatorMasterWallet(omni) != 0` check (the #109 anti-spam +/// anchor gate). Positive answers cache 10 min; negative 60 s. +async fn operator_anchor_allowed( + state: &SharedState, + operator_hex: &str, + operator32: [u8; 32], +) -> anyhow::Result { + const POSITIVE_TTL: std::time::Duration = std::time::Duration::from_secs(600); + const NEGATIVE_TTL: std::time::Duration = std::time::Duration::from_secs(60); + let key = operator_hex.to_lowercase(); + { + let cache = state.anchor_gate_cache.lock().await; + if let Some((allowed, at)) = cache.get(&key) { + let ttl = if *allowed { POSITIVE_TTL } else { NEGATIVE_TTL }; + if at.elapsed() < ttl { + return Ok(*allowed); + } + } + } + let relay = state + .relay + .as_ref() + .ok_or_else(|| anyhow::anyhow!("gate called without relay"))?; + let allowed = crate::anchor::operator_has_master(relay, &state.http, operator32).await?; + state + .anchor_gate_cache + .lock() + .await + .insert(key, (allowed, std::time::Instant::now())); + Ok(allowed) +} + +/// Emit the `AuditBatchFailed` envelope: stored by hash, queued for a +/// future anchor (so the failure itself lands on-chain once the chain +/// recovers), and pushed to the live feed. +async fn emit_batch_failed( + state: &SharedState, + flush: &FlushV2Result, + attempts: u32, + last_error: &str, +) { + let Some(relay) = state.relay.as_ref() else { + return; + }; + let ts = now_unix(); + let mut truncated = last_error.to_string(); + truncated.truncate(512); + let body = AuditBatchFailedBody { + merkle_root: flush.merkle_root_hex.clone(), + entry_count: flush.entry_count, + attempts: attempts.min(u8::MAX as u32) as u8, + last_error: truncated, + }; + let envelope = match envelope_for( + relay.relay_omni, + decode32_hex(&flush.operator_omni), + AuditOpKind::AuditBatchFailed, + body, + AuditResult::Failure, + None, + None, + ) { + Ok(mut e) => { + e.ts_unix = ts; + e + } + Err(e) => { + warn!(error = %e, "batch_failed envelope build failed"); + return; + } + }; + let (cbor, env_hash) = match (envelope.to_canonical_cbor(), envelope.envelope_hash()) { + (Ok(c), Ok(h)) => (c, h), + _ => { + warn!("batch_failed envelope encode failed"); + return; + } + }; + let env_hash_hex = format!("0x{}", hex::encode(env_hash)); + state + .store_envelope(env_hash_hex.clone(), cbor.clone()) + .await; + if let Some(archive) = &state.archive { + archive.archive_envelope(env_hash_hex.clone(), cbor); + } + state + .queue_v2( + flush.operator_omni.clone(), + V2QueueEntry { + envelope_hash: env_hash_hex.clone(), + op_kind: AuditOpKind::AuditBatchFailed as u8, + actor_omni: relay.relay_omni_hex(), + ts_unix: ts, + }, + ) + .await; + let evt = state + .push_feed(FeedEvent { + kind: "batch_failed".into(), + envelope_hash: env_hash_hex, + ts_unix: ts, + actor_omni: relay.relay_omni_hex(), + operator_omni: flush.operator_omni.clone(), + op_kind: AuditOpKind::AuditBatchFailed as u8, + op_kind_label: AuditOpKind::AuditBatchFailed.label().into(), + result: AuditResult::Failure as u8, + intent_text: None, + tx_hash: None, + merkle_root: Some(flush.merkle_root_hex.clone()), + entry_count: Some(flush.entry_count), + }) + .await; + if let Some(archive) = &state.archive { + archive.archive_feed_event(evt); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::anchor::RelayConfig; + use crate::state::{State, V2QueueEntry}; + use axum::{routing::post, Json, Router}; + use serde_json::{json, Value}; + use std::sync::Arc; + use std::time::Duration; + + /// Fake JSON-RPC node for the full flush→gate→anchor path. Operator + /// omni 0x22… is registered (non-zero master); anything else isn't. + async fn spawn_fake_rpc() -> String { + let app = Router::new().route( + "/", + post(move |Json(req): Json| async move { + let method = req.get("method").and_then(|m| m.as_str()).unwrap_or(""); + let result = match method { + "eth_getTransactionCount" => json!("0x0"), + "eth_gasPrice" => json!("0x3b9aca00"), + "eth_sendRawTransaction" => json!(format!("0x{}", "cd".repeat(32))), + "eth_getTransactionReceipt" => json!({"status": "0x1"}), + "eth_call" => { + let data = req["params"][0]["data"].as_str().unwrap_or(""); + if data.ends_with(&"22".repeat(32)) { + json!(format!( + "0x{:0>64}", + "9d8a62f656a8d1615c1294fd71e9cfb3e4855a4f" + )) + } else { + json!(format!("0x{}", "00".repeat(32))) + } + } + other => panic!("unexpected method {other}"), + }; + Json(json!({"jsonrpc": "2.0", "id": 1, "result": result})) + }), + ); + let listener = tokio::net::TcpListener::bind("127.0.0.1:0").await.unwrap(); + let addr = listener.local_addr().unwrap(); + tokio::spawn(async move { + axum::serve(listener, app).await.ok(); + }); + format!("http://{addr}/") + } + + fn relay_cfg(rpc_url: String) -> RelayConfig { + RelayConfig::build( + rpc_url, + 212013, + &format!("0x{}", "11".repeat(20)), + &format!("0x{}", "33".repeat(20)), + &format!("0x{}", "46".repeat(32)), + 200_000, + 3, + Duration::ZERO, + Duration::from_secs(5), + ) + .unwrap() + } + + fn entry(hash_byte: u8, op_kind: u8) -> V2QueueEntry { + V2QueueEntry { + envelope_hash: format!("0x{}", hex::encode([hash_byte; 32])), + op_kind, + actor_omni: format!("0x{}", "aa".repeat(32)), + ts_unix: 1_700_000_000, + } + } + + #[tokio::test] + async fn registered_operator_batch_anchors_end_to_end() { + let rpc = spawn_fake_rpc().await; + let state = Arc::new(State::new("/tmp".into()).with_relay(Some(relay_cfg(rpc)))); + let registered_op = format!("0x{}", "22".repeat(32)); + state.queue_v2(registered_op.clone(), entry(0x01, 1)).await; + state.queue_v2(registered_op.clone(), entry(0x02, 11)).await; + + let mut feed = state.subscribe_feed(); + let (flushed, anchored) = flush_v2_and_anchor(&state, None).await.unwrap(); + std::fs::remove_file(&flushed[0].leaves_path).ok(); + assert_eq!(flushed.len(), 1); + assert_eq!(anchored.len(), 1); + assert_eq!(anchored[0].tx_hash, format!("0x{}", "cd".repeat(32))); + assert_eq!(anchored[0].entries.len(), 2); + + // The anchor surfaced in the live feed + the anchors endpoint state + // + the by-hash envelope store (typed back as AuditRootAnchor). + let evt = feed.try_recv().expect("anchor feed event"); + assert_eq!(evt.kind, "anchor"); + assert_eq!(evt.op_kind, 90); + assert_eq!(evt.tx_hash.as_deref(), Some(anchored[0].tx_hash.as_str())); + let records = state.anchors_for(®istered_op).await; + assert_eq!(records.len(), 1); + let cbor = state + .get_envelope(&anchored[0].anchor_envelope_hash) + .await + .expect("anchor envelope stored"); + let env = agentkeys_core::audit::AuditEnvelope::from_canonical_cbor(&cbor).unwrap(); + match env.typed_body().unwrap() { + agentkeys_core::audit::TypedAuditBody::AuditRootAnchor(b) => { + assert_eq!(b.entry_count, 2); + assert_eq!(b.merkle_root, anchored[0].merkle_root_hex); + } + other => panic!("unexpected body {other:?}"), + } + // Queue is empty — nothing re-queued on success. + assert!(state.flush_v2(®istered_op).await.unwrap().is_none()); + } + + #[tokio::test] + async fn unregistered_operator_batch_is_dropped_not_anchored() { + let rpc = spawn_fake_rpc().await; + let state = Arc::new(State::new("/tmp".into()).with_relay(Some(relay_cfg(rpc)))); + let spam_op = format!("0x{}", "99".repeat(32)); + state.queue_v2(spam_op.clone(), entry(0x05, 1)).await; + + let (flushed, anchored) = flush_v2_and_anchor(&state, None).await.unwrap(); + std::fs::remove_file(&flushed[0].leaves_path).ok(); + assert_eq!(flushed.len(), 1, "flush still drains"); + assert!(anchored.is_empty(), "no gas burned for spam omnis"); + // Dropped, not re-queued — the queue stays empty. + assert!(state.flush_v2(&spam_op).await.unwrap().is_none()); + } + + #[tokio::test] + async fn rpc_outage_requeues_and_emits_nothing() { + // Relay points at a closed port: the GATE check itself fails → + // conservative re-queue (no drop, no batch_failed — transient). + let listener = tokio::net::TcpListener::bind("127.0.0.1:0").await.unwrap(); + let dead = format!("http://{}/", listener.local_addr().unwrap()); + drop(listener); + let state = Arc::new(State::new("/tmp".into()).with_relay(Some(relay_cfg(dead)))); + let op = format!("0x{}", "22".repeat(32)); + state.queue_v2(op.clone(), entry(0x07, 1)).await; + + let (flushed, anchored) = flush_v2_and_anchor(&state, None).await.unwrap(); + std::fs::remove_file(&flushed[0].leaves_path).ok(); + assert!(anchored.is_empty()); + let r = state.flush_v2(&op).await.unwrap().expect("re-queued"); + std::fs::remove_file(&r.leaves_path).ok(); + assert_eq!(r.entry_count, 1, "entries preserved for the next tick"); + } +} diff --git a/crates/agentkeys-worker-audit/src/state.rs b/crates/agentkeys-worker-audit/src/state.rs index 06a4448a..64f351b6 100644 --- a/crates/agentkeys-worker-audit/src/state.rs +++ b/crates/agentkeys-worker-audit/src/state.rs @@ -1,14 +1,25 @@ -//! Per-operator in-memory event queue + flush logic. +//! Per-operator in-memory event queue + flush logic, plus the #109 Tier-1 +//! real-time feed surfaces: per-actor ring buffers, the SSE broadcast +//! channel, and the anchored-batch history (with Merkle proofs). -use std::collections::HashMap; +use std::collections::{HashMap, VecDeque}; use std::sync::Arc; use std::time::{SystemTime, UNIX_EPOCH}; use serde::{Deserialize, Serialize}; -use tokio::sync::Mutex; +use tokio::sync::{broadcast, Mutex}; use crate::merkle::{keccak256, merkle_proof, merkle_root, Bytes32}; +/// Per-actor ring-buffer capacity (issue #109: "last 1000 events per +/// actor"). Tests shrink it via `State::with_caps`. +pub const DEFAULT_RING_CAP: usize = 1000; +/// Anchored-batch records retained per operator. +pub const DEFAULT_ANCHOR_CAP: usize = 50; +/// Broadcast fan-out capacity — slow SSE subscribers that lag past this +/// many events miss them in the live stream (they re-sync via backfill). +const FEED_CHANNEL_CAP: usize = 1024; + #[derive(Clone, Debug, Serialize, Deserialize)] pub struct AuditEvent { /// 0x-prefixed 32-byte hex. @@ -62,7 +73,36 @@ pub struct FlushV2Result { pub entries: Vec, } -#[derive(Default)] +/// One entry in the Tier-1 real-time feed (issue #109): the JSON shape the +/// SSE stream emits, the ring buffers hold, and the S3 archive persists. +/// The shape has ONE owner — `agentkeys_types::audit_feed::AuditFeedEvent` +/// — shared with the daemon's feed bridge (the #203 one-owner rule). +pub use agentkeys_types::audit_feed::AuditFeedEvent as FeedEvent; + +/// Merkle membership proof for one batch entry — what the tamper test and +/// external verifiers consume from `GET /v1/audit/anchors`. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct AnchorEntryProof { + pub envelope_hash: String, + pub leaf_index: usize, + pub proof: Vec, +} + +/// One anchored batch: the root + the on-chain commitment + per-entry +/// proofs. Retained per operator (last `DEFAULT_ANCHOR_CAP`). +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct AnchorRecord { + pub operator_omni: String, + pub merkle_root_hex: String, + pub op_kind_bitmap_hex: String, + pub entry_count: u64, + /// Hash of the `AuditRootAnchor` envelope committed on-chain. + pub anchor_envelope_hash: String, + pub tx_hash: String, + pub anchored_ts_unix: u64, + pub entries: Vec, +} + pub struct State { /// operator_omni (0x...) → queue of pending events. queues: Mutex>>, @@ -71,27 +111,201 @@ pub struct State { /// `envelope_hash` (lowercased 0x-hex) → canonical CBOR bytes. /// Populated by `POST /v1/audit/append/v2`; read by `GET /// /v1/audit/envelope/`. Per arch.md §15.3a issue #97 phase B. - /// - /// In-memory for v0 — the chain commitment is the durability - /// mechanism; if the worker restarts before a chain `appendV2` lands, - /// callers re-emit. Persistent storage (e.g., S3 - /// `s3:///audit/envelopes/.cbor`) is tracked as a - /// follow-up alongside the contract redeploy. + /// The S3 cold archive (#109, `archive.rs`) is the durable layer; this + /// map is the hot cache. envelopes: Mutex>>, /// operator_omni (0x...) → V2 envelopes awaiting the tier-A on-chain - /// anchor (`appendRootV2`). Fed by `POST /v1/audit/append/v2` (#229); - /// drained by the same flush endpoints/timer as the V1 queues. + /// anchor. Fed by `POST /v1/audit/append/v2` (#229); drained by the + /// flush endpoints/timer (#109: anchored on-chain when the relay is + /// configured). v2_queues: Mutex>>, + /// actor_omni (0x...) → last `ring_cap` feed events (#109 Tier 1). + rings: Mutex>>, + /// operator_omni (0x...) → recent anchored batches with proofs. + anchors: Mutex>>, + /// Live fan-out to SSE subscribers. + feed_tx: broadcast::Sender, + ring_cap: usize, + anchor_cap: usize, + /// Tier-A anchor relay (#109). `None` = degraded mode: flushes log the + /// `appendRootV2` inputs (pre-#109 behavior) and anchor nothing. + pub relay: Option, + /// Shared HTTP client for chain RPC. + pub http: reqwest::Client, + /// S3 cold archive (#109). `None` = in-memory only. + pub archive: Option, + /// Anti-spam anchor-gate cache (#109): operator_omni → (has_master, + /// checked_at). Positive entries valid 10 min, negative 60 s (so a + /// freshly-registered operator isn't held back long). + pub anchor_gate_cache: Mutex>, } impl State { pub fn new(leaves_dir: String) -> Self { + Self::with_caps(leaves_dir, DEFAULT_RING_CAP, DEFAULT_ANCHOR_CAP) + } + + pub fn with_caps(leaves_dir: String, ring_cap: usize, anchor_cap: usize) -> Self { + let (feed_tx, _) = broadcast::channel(FEED_CHANNEL_CAP); Self { queues: Mutex::new(HashMap::new()), leaves_dir, envelopes: Mutex::new(HashMap::new()), v2_queues: Mutex::new(HashMap::new()), + rings: Mutex::new(HashMap::new()), + anchors: Mutex::new(HashMap::new()), + feed_tx, + ring_cap, + anchor_cap, + relay: None, + http: reqwest::Client::new(), + archive: None, + anchor_gate_cache: Mutex::new(HashMap::new()), + } + } + + /// Attach the tier-A anchor relay (builder style — boot path only). + pub fn with_relay(mut self, relay: Option) -> Self { + self.relay = relay; + self + } + + /// Attach the S3 cold archive (builder style — boot path only). + pub fn with_archive(mut self, archive: Option) -> Self { + self.archive = archive; + self + } + + /// Subscribe to the live feed (SSE handler + the daemon bridge). + pub fn subscribe_feed(&self) -> broadcast::Receiver { + self.feed_tx.subscribe() + } + + /// Push a feed event: append to the actor's ring buffer (evicting past + /// `ring_cap`) and fan out to live subscribers. Returns the event back + /// so callers can archive it. + pub async fn push_feed(&self, evt: FeedEvent) -> FeedEvent { + { + let mut rings = self.rings.lock().await; + let ring = rings.entry(evt.actor_omni.clone()).or_default(); + ring.push_back(evt.clone()); + while ring.len() > self.ring_cap { + ring.pop_front(); + } + } + // Send fails only when no subscriber exists — that's fine. + let _ = self.feed_tx.send(evt.clone()); + evt + } + + /// Restore one actor's ring from the cold archive (boot-time, #109). + /// Events are assumed chronologically sorted; caps at `ring_cap`. + pub async fn restore_ring(&self, actor_omni: String, events: Vec) { + let mut ring: VecDeque = events.into(); + while ring.len() > self.ring_cap { + ring.pop_front(); + } + let mut rings = self.rings.lock().await; + rings.insert(actor_omni, ring); + } + + /// Recent feed events, optionally filtered by operator and/or actor, + /// chronologically sorted, capped to the most recent `limit`. Powers + /// SSE backfill on connect. + pub async fn backfill( + &self, + operator_omni: Option<&str>, + actor_omni: Option<&str>, + limit: usize, + ) -> Vec { + let rings = self.rings.lock().await; + let mut out: Vec = Vec::new(); + for (actor, ring) in rings.iter() { + if let Some(a) = actor_omni { + if !a.eq_ignore_ascii_case(actor) { + continue; + } + } + for e in ring.iter() { + if let Some(op) = operator_omni { + if !op.eq_ignore_ascii_case(&e.operator_omni) { + continue; + } + } + out.push(e.clone()); + } + } + out.sort_by_key(|e| e.ts_unix); + if out.len() > limit { + out.drain(..out.len() - limit); + } + out + } + + /// Record an anchored batch (computing per-entry Merkle proofs) and + /// surface it in the feed. Returns the stored record. + pub async fn record_anchor( + &self, + flush: &FlushV2Result, + anchor_envelope_hash: String, + tx_hash: String, + anchored_ts_unix: u64, + ) -> AnchorRecord { + let leaves: Vec = flush + .entries + .iter() + .map(|e| decode32(&e.envelope_hash)) + .collect(); + let entries = flush + .entries + .iter() + .enumerate() + .map(|(i, e)| AnchorEntryProof { + envelope_hash: e.envelope_hash.clone(), + leaf_index: i, + proof: merkle_proof(&leaves, i) + .iter() + .map(|p| format!("0x{}", hex::encode(p))) + .collect(), + }) + .collect(); + let record = AnchorRecord { + operator_omni: flush.operator_omni.clone(), + merkle_root_hex: flush.merkle_root_hex.clone(), + op_kind_bitmap_hex: flush.op_kind_bitmap_hex.clone(), + entry_count: flush.entry_count, + anchor_envelope_hash, + tx_hash, + anchored_ts_unix, + entries, + }; + let mut anchors = self.anchors.lock().await; + let v = anchors.entry(flush.operator_omni.clone()).or_default(); + v.push(record.clone()); + while v.len() > self.anchor_cap { + v.remove(0); } + record + } + + /// Recent anchored batches for one operator (newest last). + pub async fn anchors_for(&self, operator_omni: &str) -> Vec { + let anchors = self.anchors.lock().await; + anchors + .iter() + .find(|(op, _)| op.eq_ignore_ascii_case(operator_omni)) + .map(|(_, v)| v.clone()) + .unwrap_or_default() + } + + /// Put entries BACK at the head of an operator's V2 queue after a + /// failed anchor — the next flush re-batches them (fresh root). + pub async fn requeue_v2(&self, operator_omni: &str, entries: Vec) { + let mut q = self.v2_queues.lock().await; + let v = q.entry(operator_omni.to_string()).or_default(); + let mut merged = entries; + merged.append(v); + *v = merged; } /// Store a canonical-CBOR-encoded `AuditEnvelope` keyed by its @@ -377,6 +591,114 @@ mod tests { std::fs::remove_file(&r.leaves_path).ok(); } + fn feed(actor: u8, operator: u8, ts: u64, hash: u8) -> FeedEvent { + FeedEvent { + kind: "event".into(), + envelope_hash: format!("0x{}", hex::encode([hash; 32])), + ts_unix: ts, + actor_omni: format!("0x{}", hex::encode([actor; 32])), + operator_omni: format!("0x{}", hex::encode([operator; 32])), + op_kind: 1, + op_kind_label: "cred.fetch".into(), + result: 0, + intent_text: None, + tx_hash: None, + merkle_root: None, + entry_count: None, + } + } + + #[tokio::test] + async fn ring_caps_per_actor_and_backfill_filters() { + let s = State::with_caps("/tmp".into(), 3, 2); + for i in 0..5u8 { + s.push_feed(feed(0xA1, 0xB1, 100 + i as u64, i)).await; + } + s.push_feed(feed(0xA2, 0xB2, 200, 9)).await; + // Actor 0xA1's ring evicted down to the cap (last 3 of 5). + let all = s.backfill(None, None, 100).await; + assert_eq!(all.len(), 4, "3 capped + 1 other actor"); + let a1 = s + .backfill(None, Some(&format!("0x{}", hex::encode([0xA1; 32]))), 100) + .await; + assert_eq!(a1.len(), 3); + assert_eq!(a1[0].ts_unix, 102, "oldest two evicted"); + // Operator filter. + let b2 = s + .backfill(Some(&format!("0x{}", hex::encode([0xB2; 32]))), None, 100) + .await; + assert_eq!(b2.len(), 1); + // Limit takes the most recent. + let limited = s.backfill(None, None, 2).await; + assert_eq!(limited.len(), 2); + assert_eq!(limited[1].ts_unix, 200); + } + + #[tokio::test] + async fn push_feed_fans_out_to_subscribers() { + let s = State::with_caps("/tmp".into(), 10, 2); + let mut rx = s.subscribe_feed(); + s.push_feed(feed(0xA1, 0xB1, 100, 1)).await; + let got = rx.recv().await.expect("live event"); + assert_eq!(got.ts_unix, 100); + } + + #[tokio::test] + async fn requeue_v2_puts_entries_back_at_the_head() { + let s = State::new("/tmp".to_string()); + s.queue_v2("0xop".into(), v2(0x01, 1)).await; + let r = s.flush_v2("0xop").await.unwrap().expect("non-empty"); + std::fs::remove_file(&r.leaves_path).ok(); + // Anchor failed → entries go back; a new event arrives meanwhile. + s.queue_v2("0xop".into(), v2(0x02, 11)).await; + s.requeue_v2("0xop", r.entries.clone()).await; + let r2 = s.flush_v2("0xop").await.unwrap().expect("non-empty"); + std::fs::remove_file(&r2.leaves_path).ok(); + assert_eq!(r2.entry_count, 2); + assert_eq!( + r2.entries[0].envelope_hash, + format!("0x{}", hex::encode([0x01; 32])), + "re-queued entry batches FIRST (oldest preserved)" + ); + } + + #[tokio::test] + async fn record_anchor_proofs_verify_and_tampered_leaf_fails() { + let s = State::new("/tmp".to_string()); + s.queue_v2("0xop".into(), v2(0x01, 1)).await; + s.queue_v2("0xop".into(), v2(0x02, 11)).await; + s.queue_v2("0xop".into(), v2(0x03, 81)).await; + let flush = s.flush_v2("0xop").await.unwrap().expect("non-empty"); + std::fs::remove_file(&flush.leaves_path).ok(); + let record = s + .record_anchor(&flush, "0xanchorhash".into(), "0xtx".into(), 1_700_000_100) + .await; + assert_eq!(record.entries.len(), 3); + let root = decode32(&record.merkle_root_hex); + for entry in &record.entries { + let leaf = decode32(&entry.envelope_hash); + let proof: Vec = entry.proof.iter().map(|p| decode32(p)).collect(); + assert!( + crate::merkle::verify_proof(leaf, &proof, root), + "genuine leaf {} verifies", + entry.leaf_index + ); + // The #109 tamper test: flip one byte of the event → the + // recomputed leaf no longer matches the anchored root. + let mut tampered = leaf; + tampered[0] ^= 0xFF; + assert!( + !crate::merkle::verify_proof(tampered, &proof, root), + "tampered leaf {} must fail", + entry.leaf_index + ); + } + // Anchors are retrievable per operator (case-insensitive). + let got = s.anchors_for("0xOP").await; + assert_eq!(got.len(), 1); + assert_eq!(got[0].tx_hash, "0xtx"); + } + #[test] fn op_kind_bitmap_lsb_is_op_kind_zero() { let hexmap = op_kind_bitmap_hex([0u8].into_iter()); diff --git a/docs/arch.md b/docs/arch.md index ff6e036f..dd083dfc 100644 --- a/docs/arch.md +++ b/docs/arch.md @@ -1142,8 +1142,10 @@ and never reordered**. Grouped by 10s leaves room for related ops. | `ConfigPut` | 80 | `{key: string, payload_hash: [u8;32]}` | config-service (#201, #229) | | `ConfigGet` | 81 | `{key: string, cap_hash: [u8;32]}` | config-service (#201, #229) | | `ConfigTeardown` | 82 | `{actor_target: [u8;32]}` | config-service (#201, #229) | +| `AuditRootAnchor` | 90 | `{merkle_root: [u8;32], op_kind_bitmap: [u8;32], entry_count: u64, relay_address: [u8;20]}` | audit-service tier-A relay (#109) | +| `AuditBatchFailed` | 91 | `{merkle_root: [u8;32], entry_count: u64, attempts: u8, last_error: string}` | audit-service tier-A relay (#109) | -Byte ranges `3-9`, `13-19`, `22-29`, `32-39`, `42-49`, `53-59`, `62-69`, `71-79`, `83-89`, `90-255` are reserved for future extensions in the same family (config claimed `80-89` per #229). +Byte ranges `3-9`, `13-19`, `22-29`, `32-39`, `42-49`, `53-59`, `62-69`, `71-79`, `83-89`, `92-99`, `100-255` are reserved for future extensions in the same family (config claimed `80-89` per #229; audit-service meta claimed `90-99` per #109). **Data-plane emit sites are LIVE (#229).** The cred / memory / config workers emit one envelope per store / fetch / teardown — after cap-verify, before the @@ -1214,6 +1216,57 @@ per-service draft shape, so this is a pre-first-emit schema fix, not a break (invariant #7 forbids reusing/reordering *numbers*; it does not freeze a never-emitted body draft). +**Two-tier audit is LIVE (#109): the real-time feed + the autonomous tier-A +anchor.** The audit worker is the aggregation point for every emit site +above, and serves both tiers from the same `AuditEnvelope` store: + +- **Tier 1 — off-chain real-time feed.** Every `append/v2` fans out + in-process to `GET /v1/audit/stream` (SSE; `?operator=`/`?actor=` filters + + `?backfill=N` ring-buffer replay) and lands in a per-actor ring buffer + (last 1000 events/actor). The shape has ONE owner — + [`agentkeys_types::audit_feed::AuditFeedEvent`](../crates/agentkeys-types/src/audit_feed.rs) + (#203 one-owner rule). The **daemon bridges** the stream (filtered to the + session operator) into its existing `ApiAuditEvent` web feed, deduping by + envelope hash against the locally-pushed submit events — so worker-side + ops (agent cred fetches, memory reads, denials) and anchor events appear + live in the parent UI with no new web socket. An `AGENTKEYS_AUDIT_S3_BUCKET` + cold archive (metadata + envelope CBOR only, never plaintext; bucket + + EC2-instance-role grant provisioned by + [`scripts/provision-audit-archive.sh`](../scripts/provision-audit-archive.sh)) + restores the rings on worker restart and backs `GET /v1/audit/envelope/:hash` + across restarts. +- **Tier 2 — autonomous on-chain anchor (default-on, 2-min cadence).** + Every `AGENTKEYS_AUDIT_BATCH_SECONDS` (default 120 — a PRODUCT decision, + don't relax without checking the demo storyboard) the worker drains each + operator's V2 queue, Merkle-roots the envelope hashes, wraps the root in + an `AuditRootAnchor` (90) envelope and commits THAT envelope's hash via + the ungated `CredentialAudit.appendV2(operatorOmni, relayActorOmni, 90, + envelopeHash)` — signed by the **tier-A relay EOA** (key generated on the + broker host by `setup-broker-host.sh`, 0600, never leaves the host; + funded idempotently by + [`scripts/heima-fund-audit-relay.sh`](../scripts/heima-fund-audit-relay.sh) + via `setup-heima.sh` step 14 reading `GET /v1/audit/relay-info`). + **Why not `appendRootV2`:** that gate requires the operator master, the + registry rejects EOA masters (`MasterMustBeAccount`), and a prod master + is a Touch-ID passkey that can't sign on a timer — while `appendV2` is + open-by-design with the REAL operator omni as an indexed topic, and the + honest anchor envelope (committed by hash) is exactly what the open-enum + §15.3b design exists for (zero contract change). Genuine anchors are + distinguished from third-party spam by `tx.from == relay_address` + (published at `relay-info`) — matching the tier-A trust row above ("only + shared service-relay-wallet" appears on chain). The master-gated + `appendRoot`/`appendRootV2` path REMAINS the sovereign tier-B/C route + (`heima-worker-smoke.sh` exercises it). Failed submissions retry ×3 with + exponential backoff; a persistently-failed batch is **re-queued** (the + next tick re-batches it under a fresh root) and an `AuditBatchFailed` + (91) envelope is emitted into the store + queue + feed with an ERROR log + (journald = the operator alert path). `GET /v1/audit/anchors/:operator` + returns recent anchors WITH per-entry Merkle proofs — the tamper check + (modify any served event → its recomputed leaf fails the proof against + the anchored root) and the parent UI's "Anchored ✓" badge both consume + it. With no relay key configured the worker boots in the pre-#109 + degraded mode: flushes log the `appendRootV2` inputs and anchor nothing. + #### Forward-compat / non-break design The trade-off when a new op_kind lands is **"uglier UI temporarily for old diff --git a/docs/operator-runbook-harness.md b/docs/operator-runbook-harness.md index abd643ac..23abccd4 100644 --- a/docs/operator-runbook-harness.md +++ b/docs/operator-runbook-harness.md @@ -233,7 +233,18 @@ commitment), and the envelope must NOT contain the roundtrip plaintext. Skip rea `audit-receipt-missing` (worker predates #229 — redeploy the broker host — or the emit dropped in best-effort mode) and `audit-url-unset` (stale env file). The tier-A on-chain anchor itself is exercised by `scripts/heima-worker-smoke.sh` (stage-2 step -10), which now also flushes the V2 envelope queue and submits `appendRootV2`. +10), which flushes the V2 envelope queue, submits the master-gated `appendRoot` +(tier-B/C sovereign path), and — since #109 — additionally asserts the **two-tier +wiring**: the appended envelope must appear on the worker's Tier-1 SSE backfill +(`GET /v1/audit/stream?backfill=N`), and the worker's autonomous tier-A relay must +anchor the batch on-chain (poll `GET /v1/audit/anchors/:operator` ≤90 s for the +record, `cast receipt` confirms the `appendV2` tx, then a local Merkle-proof walk +verifies the genuine envelope AND proves a tampered one fails — the #109 +tamper-evidence check). Tolerated smoke skips: `relay-not-configured` (host has no +`/etc/agentkeys/audit-relay.key` — re-run `setup-broker-host.sh`) and +`anchor-not-recorded` (operator unregistered on the anti-spam gate, or relay +unfunded — `bash scripts/heima-fund-audit-relay.sh`, auto-run by `setup-heima.sh` +step 14). ### CI flag reference diff --git a/docs/plan/issue-109-two-tier-audit.md b/docs/plan/issue-109-two-tier-audit.md new file mode 100644 index 00000000..373c7bc8 --- /dev/null +++ b/docs/plan/issue-109-two-tier-audit.md @@ -0,0 +1,75 @@ +# Issue #109 — Two-tier audit wiring (real-time off-chain feed + 2-min on-chain anchor) + +**Status:** shipped in PR #281 (all 10 steps; step 9's assertions live in +`heima-worker-smoke.sh` rather than a new stage-3 step — no renumbering of the +23-step demo). Live anchor-loop verify: DONE in PR #281's CI (Heima mainnet test stack — +SSE backfill + on-chain anchor tx + Merkle proof + tamper-fail all green). +Deferred: prod-host redeploy (operator); subscan renderers for op_kinds 90/91. Builds on #229 (data-plane emits + V2 queues) and #97/#270 +(control-plane emits + daemon/web audit receipts). Closes the #229-deferred open +design item "audit-worker-initiated `appendRootV2` chain submission (tier-A relay +wallet)". + +## Design decisions + +### Tier 2 anchor — `appendV2` + `AuditRootAnchor` op_kind, NOT `appendRootV2` + +`CredentialAudit.appendRootV2` is gated `msg.sender == registry.operatorMasterWallet(operatorOmni)`, +and `SidecarRegistry.registerFirstMasterDevice` rejects EOA masters +(`MasterMustBeAccount`) — so a hosted relay EOA can never pass that gate, and a +prod master is a Touch-ID passkey that cannot sign on a 2-minute cadence. Three +options considered: + +| Option | Verdict | +|---|---| +| Contract change (relay allowlist on `appendRootV2`) | Rejected — mainnet redeploy ceremony for something the open-enum envelope design already solves (§15.3b invariant #6: "new op_kinds need ZERO contract redeploys") | +| Relay as software-P256Account master (CI-style) registered per-operator | Rejected — heavy ceremony, couples audit anchoring to the bundler's liveness (#241 worked to REMOVE that coupling) | +| **Anchor via ungated `appendV2` with a new `AuditRootAnchor` op_kind (90)** | **Chosen** — zero contract change, real operator omni stays an indexed topic, plain funded EOA relay, one tx per operator-batch | + +The anchor is itself an honest `AuditEnvelope`: op_kind 90, body +`{merkle_root, op_kind_bitmap, entry_count, relay_address}` over the batch's +envelope-hash leaves. The chain event commits the anchor envelope's hash; the +envelope commits the root; the leaves verify against the root (existing +domain-separated Merkle). Genuine anchors are distinguished from third-party +spam by `tx.from == relay_address` (published at `GET /v1/audit/relay-info`), +matching arch.md §15.3 tier A verbatim: "only shared service-relay-wallet" +appears on chain. The master-gated `appendRootV2` path REMAINS the sovereign +tier-B/C route (`heima-worker-smoke.sh` unchanged). + +### Tier 1 feed — worker SSE, daemon bridges into the EXISTING UI feed + +The parent-control UI already consumes the daemon's `/v1/audit/stream` SSE + +`/v1/anchor/status` (synthetic today). Rather than a second UI socket to the +hosted worker, the daemon subscribes to the worker's new SSE (filtered to the +session operator omni), maps envelopes into the existing `ApiAuditEvent` feed +(dedup by envelope hash — the broker submit-relay events would otherwise appear +twice), and flips `state.anchor` to REAL on anchor feed events. + +## Implementation order + +| # | Step | Files | +|---|---|---| +| 1 | core: op_kinds 90 `AuditRootAnchor` + 91 `AuditBatchFailed` (ritual §15.3b: variants, bodies, typed arms, roundtrip tests, vectors) | `crates/agentkeys-core/src/audit/{op_kind,bodies,mod}.rs` | +| 2 | core: move `legacy_tx.rs` bundler → core (bundler re-imports) | `crates/agentkeys-core/src/legacy_tx.rs`, `crates/agentkeys-bundler/` | +| 3 | worker-audit: anchor module — relay key load, raw JSON-RPC (Heima-safe), `appendV2` calldata, retry ×3 exp backoff, re-queue + `AuditBatchFailed` on persistent failure; `AGENTKEYS_AUDIT_BATCH_SECONDS` (default 120, legacy var honored); degraded log-only mode when unconfigured; `GET /v1/audit/relay-info` | `crates/agentkeys-worker-audit/src/{anchor,main,state}.rs` | +| 4 | worker-audit: Tier-1 feed — per-actor ring buffer (1000), broadcast, `GET /v1/audit/stream` SSE (operator/actor filter + backfill), anchors history `GET /v1/audit/anchors` with per-entry Merkle proofs | `crates/agentkeys-worker-audit/src/{state,handlers,lib,main}.rs` | +| 5 | worker-audit: S3 cold archive (env-gated bucket+prefix) — async PUT of feed events + envelope CBOR, boot-time ring restore, `get_envelope` S3 fallback | `crates/agentkeys-worker-audit/src/archive.rs` | +| 6 | daemon: worker-feed bridge → `ApiAuditEvent` push (dedup) + real `state.anchor`; reconnect w/ backoff; hermetic-test seam | `crates/agentkeys-daemon/src/ui_bridge.rs` | +| 7 | deploy: setup-broker-host.sh — relay-key gen (skip-if-exists), worker-audit env block (batch seconds, chain RPC/profile, relay key, S3 bucket/prefix), nginx SSE-friendly location for `/v1/audit/stream` | `scripts/setup-broker-host.sh` | +| 8 | chain: `scripts/heima-fund-audit-relay.sh` (idempotent fund from deploy wallet via relay-info) wired into `setup-heima.sh` | `scripts/` | +| 9 | harness: stage-3 audit-feed + anchor + tamper-evidence assertions; runbook + harness/CLAUDE.md sync | `harness/v2-stage3-demo.sh`, `docs/operator-runbook-harness.md`, `harness/CLAUDE.md` | +| 10 | docs: arch.md §15.3/§15.3a (tier-A hosted anchor semantics, new rows, Tier-1 feed surface), user-manual.md (live feed + anchored badge) | `docs/` | + +## Acceptance criteria mapping (issue #109) + +- Denial/revocation in parent UI ≤200ms → steps 4+6 (worker broadcast → daemon SSE; in-process fan-out, no polling) +- On-chain anchor ≤2min → step 3 (default 120s cadence) +- `AGENTKEYS_AUDIT_BATCH_SECONDS` default 120 → step 3 +- Restart recovery of last 1000/actor from S3 → step 5 +- Tamper test → step 9 (modify fetched event → recomputed leaf fails Merkle proof from `/v1/audit/anchors`) +- Retry + `audit.batch_failed` + alert → step 3 (op_kind 91 envelope + ERROR log; the envelope itself is queued so it anchors once the chain recovers) + +## Out of scope (unchanged from issue) + +Real-time on-chain audit; audit replay UX (M4); cross-actor regulator views; +per-vendor retention. Also out: explorer renderers for op_kinds 90/91 +(subscan-essentials#12 — `Unknown(byte)` fallback per invariant #4). diff --git a/docs/user-manual.md b/docs/user-manual.md index cb2067ea..d9812537 100644 --- a/docs/user-manual.md +++ b/docs/user-manual.md @@ -246,3 +246,38 @@ banner instead of failing. Scope grants are **set-replace**: the envelope's `service_ids` list is the FULL replacement grant (an empty set is the revoke-all), so compare two consecutive grant envelopes to see what changed. + +## Live audit feed + on-chain anchor badge (#109) + +The **audit** page is now a **live feed**, not just a log of your own +clicks: the daemon subscribes to the hosted audit worker's event stream +(filtered to your operator identity) and folds in **worker-side events you +never triggered from this app** — an agent fetching a credential in its +sandbox, memory reads/writes, and **denials** (`NOT PERMITTED` rows, shown +red). Events typically appear within a fraction of a second of the +operation. The same event never shows twice: rows you triggered locally and +the worker's copy are deduplicated by their envelope-hash receipt. + +Two feed-only row types come from the audit service itself (chip `anchor`): + +- **`audit.root_anchor`** — every ~2 minutes the audit service batches your + recent events into a Merkle tree and commits the batch on-chain. The row + carries the batch's root and the **transaction hash** (click through to + the explorer). This is what the **"Anchored ✓ HH:MM"** badge reflects — + it is REAL chain state, updated from these events. Expect up to a + 2-minute lag between an event appearing in the feed (real-time) and its + batch anchoring (the deliberate batching cadence); the feed row is your + instant view, the anchor is the tamper-evidence. +- **`audit.batch_failed`** — the service could not land an anchor after + retries (chain outage, relay out of gas). **No events are lost**: the + batch re-queues and anchors with the next successful tick; the failure + itself is recorded (and later anchored too). If these persist, the + operator funds the relay: `bash scripts/heima-fund-audit-relay.sh`. + +To verify tamper-evidence yourself: +`curl https://audit.litentry.org/v1/audit/anchors/` returns +recent anchored batches with a Merkle proof per event — recompute any +served envelope's hash and check it against the anchored root; a modified +event fails its proof. If the daemon was started without +`--audit-worker-url` (no-infra dev), the feed shows only local events and +the anchor badge stays at its placeholder — nothing errors. diff --git a/scripts/heima-fund-audit-relay.sh b/scripts/heima-fund-audit-relay.sh new file mode 100755 index 00000000..22bffef5 --- /dev/null +++ b/scripts/heima-fund-audit-relay.sh @@ -0,0 +1,87 @@ +#!/usr/bin/env bash +# scripts/heima-fund-audit-relay.sh — fund the audit worker's tier-A anchor +# relay EOA from the deploy wallet (issue #109). +# +# The relay key is generated ON the broker host by setup-broker-host.sh +# (/etc/agentkeys/audit-relay.key, 0600, never leaves the host). This +# laptop-side helper discovers its PUBLIC address via the worker's +# `GET /v1/audit/relay-info` and tops it up so the 2-minute `appendV2` +# anchor txs have gas. Anchor txs are emit-only (~50-100k gas), so the +# default 0.5 HEI covers thousands of batches. +# +# DELIBERATELY operator-run + idempotent — funding delegates to +# scripts/heima-fund-account.sh (skips when balance >= --amount-hei). +# Folded into setup-heima.sh step 14 (the tier-A smoke step); callable +# directly for surgical re-runs. +# +# Tolerated prereq gaps (exit 0 + "skipped" JSON, so setup-heima.sh stays +# green before the broker host is deployed): +# - audit worker unreachable → relay-worker-unreachable +# - worker runs in degraded mode (no relay key) → relay-not-configured +# +# Usage: +# bash scripts/heima-fund-audit-relay.sh [--amount-hei 0.5] [--relay-addr 0x..] [--dry-run] +# +# Env: +# AGENTKEYS_WORKER_AUDIT_URL (from operator-workstation.env; the relay-info source) +# AUDIT_RELAY_FUND_HEI (default 0.5 — override the top-up threshold) +# + the deployer-key resolution heima-fund-account.sh documents. + +set -euo pipefail + +AMOUNT_HEI="${AUDIT_RELAY_FUND_HEI:-0.5}" +RELAY_ADDR="" +DRY_RUN=0 + +while [ $# -gt 0 ]; do + case "$1" in + --amount-hei) [ $# -lt 2 ] && { echo "--amount-hei requires a value" >&2; exit 1; }; AMOUNT_HEI="$2"; shift 2 ;; + --amount-hei=*) AMOUNT_HEI="${1#*=}"; shift ;; + --relay-addr) [ $# -lt 2 ] && { echo "--relay-addr requires a value" >&2; exit 1; }; RELAY_ADDR="$2"; shift 2 ;; + --relay-addr=*) RELAY_ADDR="${1#*=}"; shift ;; + --dry-run) DRY_RUN=1; shift ;; + --help|-h) + sed -n '2,/^set -euo/p' "$0" | sed 's/^# \{0,1\}//' | sed '$d'; exit 0 ;; + *) echo "unknown flag: $1 (try --help)" >&2; exit 1 ;; + esac +done + +if [ -t 2 ]; then + C_HEAD='\033[1;36m'; C_OK='\033[1;32m'; C_SKIP='\033[1;33m'; C_ERR='\033[1;31m'; C_RESET='\033[0m' +else + C_HEAD=''; C_OK=''; C_SKIP=''; C_ERR=''; C_RESET='' +fi +log() { printf "${C_HEAD}==>${C_RESET} %s\n" "$*" >&2; } +ok() { printf " ${C_OK}ok${C_RESET} %s\n" "$*" >&2; } +skip() { printf " ${C_SKIP}skip${C_RESET} %s\n" "$*" >&2; } +die() { printf " ${C_ERR}fail${C_RESET} %s\n" "$*" >&2; exit 1; } + +REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)" +ENV_FILE="${ENV_FILE:-$REPO_ROOT/scripts/operator-workstation.env}" +[ -f "$ENV_FILE" ] || die "missing $ENV_FILE" +set -a; . "$ENV_FILE"; set +a + +if [ -z "$RELAY_ADDR" ]; then + AUDIT_URL="${AGENTKEYS_WORKER_AUDIT_URL:?AGENTKEYS_WORKER_AUDIT_URL required (or pass --relay-addr)}" + log "Discovering relay address: $AUDIT_URL/v1/audit/relay-info" + info_json=$(curl -fsS --max-time 10 "$AUDIT_URL/v1/audit/relay-info" 2>/dev/null) || { + skip "audit worker unreachable at $AUDIT_URL — deploy the broker host first (setup-broker-host.sh), then re-run" + printf '{"ok":true,"skipped":"relay-worker-unreachable"}\n' + exit 0 + } + enabled=$(printf '%s' "$info_json" | jq -r '.anchor_enabled // false') + if [ "$enabled" != "true" ]; then + skip "worker reports anchor_enabled=false (no relay key on host) — re-run setup-broker-host.sh, then re-run" + printf '{"ok":true,"skipped":"relay-not-configured"}\n' + exit 0 + fi + RELAY_ADDR=$(printf '%s' "$info_json" | jq -r '.relay_address // empty') + [ -n "$RELAY_ADDR" ] || die "relay-info carried no relay_address: $info_json" + balance=$(printf '%s' "$info_json" | jq -r '.balance_wei // "unknown"') + ok "relay address $RELAY_ADDR (balance ${balance} wei)" +fi + +log "Funding relay $RELAY_ADDR to >= $AMOUNT_HEI HEI (idempotent)" +fund_args=(--to "$RELAY_ADDR" --amount-hei "$AMOUNT_HEI") +[ "$DRY_RUN" = "1" ] && fund_args+=(--dry-run) +bash "$REPO_ROOT/scripts/heima-fund-account.sh" "${fund_args[@]}" diff --git a/scripts/heima-worker-smoke.sh b/scripts/heima-worker-smoke.sh index 82213411..c072a848 100755 --- a/scripts/heima-worker-smoke.sh +++ b/scripts/heima-worker-smoke.sh @@ -198,12 +198,30 @@ else [ -z "$V2_ENVELOPE_HASH" ] && die "append/v2 returned no envelope_hash — body: $V2_OUT" ok "queued V2 envelope (op_kind=1 cred.fetch) — envelope_hash=$V2_ENVELOPE_HASH" + # ─── #109 Tier-1 feed: the appended envelope must be visible on the SSE + # stream's ring-buffer backfill (read-only; deterministic — no live-race). + log "Tier-1 SSE feed (#109) — envelope visible via backfill" + SSE_OUT=$(curl -sN --max-time 5 \ + "$AUDIT_URL/v1/audit/stream?operator=0x$OPERATOR_OMNI&backfill=50" \ + 2>/dev/null | head -c 65536 || true) + if printf '%s' "$SSE_OUT" | grep -q "$V2_ENVELOPE_HASH"; then + ok "SSE backfill carries the appended envelope" + else + die "appended envelope $V2_ENVELOPE_HASH missing from the SSE backfill (is the worker pre-#109?)" + fi + if [ "$DRY_RUN" = "1" ]; then log "DRY RUN — would flush + appendRoot + appendRootV2 now" echo "{\"ok\":true,\"dry_run\":true,\"audit_queued\":2,\"audit_v2_queued\":1}" exit 0 fi + # #109: top up the tier-A anchor relay BEFORE flushing (idempotent + # skip-if-funded; tolerated skip when the relay/worker isn't deployed) so + # the background anchor task this flush schedules has gas on first runs. + bash "$REPO_ROOT/scripts/heima-fund-audit-relay.sh" >/dev/null \ + || info "audit-relay funding failed — the anchor leg below may skip" + log "Flushing queue → Merkle root" FLUSH_OUT=$(curl -sf --max-time 10 -X POST "$AUDIT_URL/v1/audit/flush/0x$OPERATOR_OMNI" 2>&1) \ || die "flush failed: $FLUSH_OUT" @@ -274,6 +292,69 @@ else [ "$STORED_ROOT_LC" = "$ROOT_LC" ] || die "stored root $STORED_ROOT != flushed root $ROOT" ok "on-chain root matches flushed root (idx $LAST_IDX)" + # ─── #109 Tier-A relay anchor + tamper-evidence ──────────────────────────── + # The flush above handed the V2 batch to the worker's background anchor + # task (appendV2 + AuditRootAnchor envelope, signed by the relay EOA). + # Poll /v1/audit/anchors for the confirmed record, then verify the Merkle + # proof walk locally — and that a TAMPERED leaf fails it (the #109 + # acceptance check). Tolerated skips: relay unconfigured (degraded host) + # or the operator unregistered on the anti-spam gate. + log "Tier-A relay anchor (#109) — poll for the confirmed anchor record" + RELAY_INFO=$(curl -sf --max-time 10 "$AUDIT_URL/v1/audit/relay-info" 2>/dev/null || echo '{}') + if [ "$(echo "$RELAY_INFO" | jq -r '.anchor_enabled // false')" != "true" ]; then + info "skip relay-not-configured — worker in degraded mode (no AGENTKEYS_AUDIT_RELAY_KEY_FILE on the host)" + else + ANCHOR_RECORD="" + for _i in $(seq 1 30); do + ANCHORS=$(curl -sf --max-time 10 "$AUDIT_URL/v1/audit/anchors/0x$OPERATOR_OMNI" 2>/dev/null || echo '{}') + ANCHOR_RECORD=$(echo "$ANCHORS" | jq -c --arg h "$V2_ENVELOPE_HASH" \ + '[.anchors[]? | select([.entries[].envelope_hash] | index($h))] | last // empty') + [ -n "$ANCHOR_RECORD" ] && break + sleep 3 + done + if [ -z "$ANCHOR_RECORD" ]; then + info "skip anchor-not-recorded after 90s — operator likely unregistered on the anti-spam gate (registered master required), or the relay is unfunded (run scripts/heima-fund-audit-relay.sh); check worker logs" + else + ANCHOR_TX=$(echo "$ANCHOR_RECORD" | jq -r '.tx_hash') + ANCHOR_ROOT=$(echo "$ANCHOR_RECORD" | jq -r '.merkle_root_hex') + TX_STATUS=$(cast receipt "$ANCHOR_TX" status --rpc-url "$RPC_HTTP" 2>/dev/null || echo "") + # cast prints the success status as "true" (newer) / "1"/"0x1" (older) — + # match all three; failure ("false"/"0x0") falls through to die. + case "$TX_STATUS" in + *true*|*1*) ok "anchor tx confirmed on-chain: $ANCHOR_TX" ;; + *) die "anchor tx $ANCHOR_TX not confirmed (status: ${TX_STATUS:-unreadable})" ;; + esac + # Merkle-proof walk (mirrors CredentialAudit.verifyEntryInRoot: leaf + # prefixed 0x00, internal nodes 0x01 over the sorted pair). + PROOF_LINES=$(echo "$ANCHOR_RECORD" | jq -r --arg h "$V2_ENVELOPE_HASH" \ + '.entries[] | select(.envelope_hash == $h) | .proof[]') + walk_proof() { + local computed sibling lo hi + computed=$(cast keccak "0x00${1#0x}") + for sibling in $PROOF_LINES; do + if [ "$(printf '%s\n%s\n' "${computed#0x}" "${sibling#0x}" | LC_ALL=C sort | head -1)" = "${computed#0x}" ]; then + lo=$computed; hi=$sibling + else + lo=$sibling; hi=$computed + fi + computed=$(cast keccak "0x01${lo#0x}${hi#0x}") + done + printf '%s' "$computed" + } + GENUINE_ROOT=$(walk_proof "$V2_ENVELOPE_HASH") + [ "$GENUINE_ROOT" = "$ANCHOR_ROOT" ] \ + || die "genuine envelope failed its Merkle proof (walked $GENUINE_ROOT, anchored $ANCHOR_ROOT)" + ok "genuine envelope verifies against the anchored root" + FIRST_BYTE=$(printf '%s' "${V2_ENVELOPE_HASH#0x}" | cut -c1-2) + FLIP=ff; [ "$FIRST_BYTE" = "ff" ] && FLIP=00 + TAMPERED_HASH="0x${FLIP}$(printf '%s' "${V2_ENVELOPE_HASH#0x}" | cut -c3-)" + TAMPERED_ROOT=$(walk_proof "$TAMPERED_HASH") + [ "$TAMPERED_ROOT" != "$ANCHOR_ROOT" ] \ + || die "TAMPERED envelope still verified — tamper-evidence broken" + ok "tamper-evidence: a modified event fails its proof against the anchored root" + fi + fi + # ─── V2 envelope batch → appendRootV2 (#229 two-tier anchor) ────────────── # The same flush above also drained the V2 envelope queue (the durable # audit feed the data-plane workers emit into). Commit its Merkle root + diff --git a/scripts/operator-workstation.env b/scripts/operator-workstation.env index 971cd6df..dcf83dec 100644 --- a/scripts/operator-workstation.env +++ b/scripts/operator-workstation.env @@ -105,6 +105,11 @@ MEMORY_BUCKET=agentkeys-memory-${ACCOUNT_ID} # Provisioned by scripts/provision-config-bucket.sh. CONFIG_BUCKET=agentkeys-config-${ACCOUNT_ID} +# #109 audit cold archive — Tier-1 feed events + envelope CBOR (metadata only, +# never plaintext). Written by the audit worker via the broker host's EC2 +# instance role; provisioned by scripts/provision-audit-archive.sh. +AUDIT_BUCKET=agentkeys-audit-${ACCOUNT_ID} + # ─── Signer (dev_key_service, issue #74 step 1b) ───────────────────────────── # The dedicated signer listener (`agentkeys-signer.service`, :8092 loopback) # is fronted publicly by nginx at a separate hostname under the same parent diff --git a/scripts/operator-workstation.test.env b/scripts/operator-workstation.test.env index 4674d431..a1d13815 100644 --- a/scripts/operator-workstation.test.env +++ b/scripts/operator-workstation.test.env @@ -55,6 +55,7 @@ CONFIG_ROLE_ARN=arn:aws:iam::${ACCOUNT_ID}:role/agentkeys-config-role-test # Test per-data-class buckets. VAULT_BUCKET=agentkeys-vault-test-${ACCOUNT_ID} MEMORY_BUCKET=agentkeys-memory-test-${ACCOUNT_ID} +AUDIT_BUCKET=agentkeys-audit-test-${ACCOUNT_ID} # Test config bucket (#201) — distinct bucket so a config-worker compromise on # test can't read prod memory/cred blobs (and vice-versa). CONFIG_BUCKET=agentkeys-config-test-${ACCOUNT_ID} diff --git a/scripts/provision-audit-archive.sh b/scripts/provision-audit-archive.sh new file mode 100755 index 00000000..1c2a6f53 --- /dev/null +++ b/scripts/provision-audit-archive.sh @@ -0,0 +1,183 @@ +#!/usr/bin/env bash +# scripts/provision-audit-archive.sh — idempotent creation of the audit +# cold-archive bucket ($AUDIT_BUCKET) + the broker EC2 instance-role grant +# that lets the co-located audit worker write/read it (issue #109). +# +# Mirror of scripts/provision-config-bucket.sh for the bucket half. The +# archive holds Tier-1 feed events + canonical envelope CBOR — envelope +# HASHES and op metadata only, never plaintext payloads (#229 rule) — but +# per arch.md §17.2 it still gets its OWN bucket: folding it into vault/ +# memory/config would collapse the per-data-class blast radius. +# +# Unlike the OIDC-assumed per-actor roles (provision-*-role.sh), the +# archive's writer is the audit WORKER itself via the broker host's EC2 +# instance profile (same credential path the email worker uses) — so the +# grant is an inline policy on the instance role, not a federated role. +# The instance role is discovered from the env-keyed broker EIP tag +# (agentkeys-broker-eip[-test]) per the CLAUDE.md prod-vs-test rule. +# +# What it does (each step idempotent via "check first, then act"): +# 1. head-bucket — if 200, skip create. +# 2. create-bucket if missing (LocationConstraint only for non-us-east-1). +# 3. put-public-access-block (idempotent overwrite). +# 4. put-bucket-encryption with SSE-S3 AES-256 default. +# 5. resolve broker EIP (by Name tag) → instance → instance-profile role. +# 6. put-role-policy AuditArchiveS3 (skip when the doc already matches). +# +# Required env (sourced from scripts/operator-workstation.env): +# ACCOUNT_ID, REGION, AUDIT_BUCKET +# +# Required AWS profile: agentkeys-admin +# +# Usage: +# bash scripts/provision-audit-archive.sh +# bash scripts/provision-audit-archive.sh --dry-run +# ENV_FILE=scripts/operator-workstation.test.env bash scripts/provision-audit-archive.sh + +set -euo pipefail + +DRY_RUN=0 +while [ $# -gt 0 ]; do + case "$1" in + --dry-run) DRY_RUN=1; shift ;; + --help|-h) + sed -n '2,/^set -euo/p' "$0" | sed 's/^# \{0,1\}//' | sed '$d'; exit 0 ;; + *) echo "unknown flag: $1 (try --help)" >&2; exit 1 ;; + esac +done + +REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)" +ENV_FILE="${ENV_FILE:-$REPO_ROOT/scripts/operator-workstation.env}" + +if [ -t 2 ]; then + C_HEAD='\033[1;36m'; C_OK='\033[1;32m'; C_SKIP='\033[1;33m' + C_WARN='\033[1;33m'; C_ERR='\033[1;31m'; C_RESET='\033[0m' +else + C_HEAD=''; C_OK=''; C_SKIP=''; C_WARN=''; C_ERR=''; C_RESET='' +fi +log() { printf "${C_HEAD}==>${C_RESET} %s\n" "$*" >&2; } +ok() { printf " ${C_OK}ok${C_RESET} %s\n" "$*" >&2; } +skip() { printf " ${C_SKIP}skip${C_RESET} %s\n" "$*" >&2; } +warn() { printf " ${C_WARN}warn${C_RESET} %s\n" "$*" >&2; } +die() { printf " ${C_ERR}fail${C_RESET} %s\n" "$*" >&2; exit 1; } + +[ -f "$ENV_FILE" ] || die "missing $ENV_FILE" +set -a; . "$ENV_FILE"; set +a + +ACCOUNT_ID="${ACCOUNT_ID:?ACCOUNT_ID required}" +REGION="${REGION:?REGION required}" +AUDIT_BUCKET="${AUDIT_BUCKET:?AUDIT_BUCKET required — add it to operator-workstation.env}" + +# prod vs CI/test broker EIP selection (CLAUDE.md: keyed on the env file). +EIP_TAG="agentkeys-broker-eip" +case "$(basename "$ENV_FILE")" in + *test*) EIP_TAG="agentkeys-broker-eip-test" ;; +esac + +log "Preflight: AWS caller identity" +caller_arn=$(aws sts get-caller-identity --query Arn --output text 2>&1) \ + || die "aws sts get-caller-identity failed: $caller_arn" +arn_lc=$(printf '%s' "$caller_arn" | tr '[:upper:]' '[:lower:]') +case "$arn_lc" in + *":user/agentkeys-admin"*) ok "caller is admin: $caller_arn" ;; + *) die "caller is $caller_arn — needs agentkeys-admin. Run: awsp agentkeys-admin" ;; +esac + +# Step 1+2: bucket existence +log "Bucket existence: s3://$AUDIT_BUCKET" +if aws s3api head-bucket --bucket "$AUDIT_BUCKET" --region "$REGION" >/dev/null 2>&1; then + skip "bucket already exists" +else + if [ "$DRY_RUN" = "1" ]; then + log "DRY RUN — would create-bucket $AUDIT_BUCKET in $REGION" + else + log "Creating bucket" + if [ "$REGION" = "us-east-1" ]; then + aws s3api create-bucket --bucket "$AUDIT_BUCKET" --region "$REGION" \ + || die "create-bucket failed" + else + aws s3api create-bucket --bucket "$AUDIT_BUCKET" --region "$REGION" \ + --create-bucket-configuration "LocationConstraint=$REGION" \ + || die "create-bucket failed" + fi + ok "bucket created" + fi +fi + +# Step 3: block public access +log "Public access block" +pab_target=$(jq -n '{ + BlockPublicAcls: true, IgnorePublicAcls: true, + BlockPublicPolicy: true, RestrictPublicBuckets: true +}') +if [ "$DRY_RUN" = "1" ]; then + log "DRY RUN — would put-public-access-block: $pab_target" +else + aws s3api put-public-access-block --bucket "$AUDIT_BUCKET" --region "$REGION" \ + --public-access-block-configuration "$pab_target" \ + || die "put-public-access-block failed" + ok "block-public-access applied (all four flags = true)" +fi + +# Step 4: default encryption SSE-S3 +log "Default encryption (SSE-S3 AES-256)" +enc_target=$(jq -n '{ + Rules: [ { ApplyServerSideEncryptionByDefault: { SSEAlgorithm: "AES256" } } ] +}') +if [ "$DRY_RUN" = "1" ]; then + log "DRY RUN — would put-bucket-encryption: $enc_target" +else + aws s3api put-bucket-encryption --bucket "$AUDIT_BUCKET" --region "$REGION" \ + --server-side-encryption-configuration "$enc_target" \ + || die "put-bucket-encryption failed" + ok "default SSE-S3 applied (feed events are metadata-only; this is belt-and-braces)" +fi + +# Step 5: resolve the broker host's instance role (env-keyed EIP tag — never +# a first-match describe-addresses, per the #201 incident rule). +log "Resolving broker instance role via EIP tag $EIP_TAG" +EIP=$(aws ec2 describe-addresses --region "$REGION" \ + --filters "Name=tag:Name,Values=$EIP_TAG" \ + --query 'Addresses[0].PublicIp' --output text 2>/dev/null || true) +[ -n "$EIP" ] && [ "$EIP" != "None" ] || die "no EIP with tag $EIP_TAG in $REGION" +INSTANCE_ID=$(aws ec2 describe-instances --region "$REGION" \ + --filters "Name=ip-address,Values=$EIP" \ + --query 'Reservations[0].Instances[0].InstanceId' --output text 2>/dev/null || true) +[ -n "$INSTANCE_ID" ] && [ "$INSTANCE_ID" != "None" ] || die "no instance behind EIP $EIP" +PROFILE_ARN=$(aws ec2 describe-instances --region "$REGION" --instance-ids "$INSTANCE_ID" \ + --query 'Reservations[0].Instances[0].IamInstanceProfile.Arn' --output text 2>/dev/null || true) +[ -n "$PROFILE_ARN" ] && [ "$PROFILE_ARN" != "None" ] \ + || die "instance $INSTANCE_ID has no instance profile (docs/cloud-bootstrap.md §6)" +ROLE_NAME=$(aws iam get-instance-profile --instance-profile-name "${PROFILE_ARN##*/}" \ + --query 'InstanceProfile.Roles[0].RoleName' --output text 2>/dev/null || true) +[ -n "$ROLE_NAME" ] && [ "$ROLE_NAME" != "None" ] || die "instance profile ${PROFILE_ARN##*/} has no role" +ok "instance role: $ROLE_NAME (instance $INSTANCE_ID @ $EIP)" + +# Step 6: inline role policy (skip when the live doc already matches). +POLICY_NAME="AuditArchiveS3" +policy_target=$(jq -n --arg bucket "$AUDIT_BUCKET" '{ + Version: "2012-10-17", + Statement: [ + {Sid: "AuditArchiveObjects", Effect: "Allow", + Action: ["s3:PutObject", "s3:GetObject"], + Resource: "arn:aws:s3:::\($bucket)/*"}, + {Sid: "AuditArchiveList", Effect: "Allow", + Action: ["s3:ListBucket"], + Resource: "arn:aws:s3:::\($bucket)"} + ] +}') +log "Instance-role inline policy $POLICY_NAME" +current=$(aws iam get-role-policy --role-name "$ROLE_NAME" --policy-name "$POLICY_NAME" \ + --query 'PolicyDocument' --output json 2>/dev/null || echo "{}") +if [ "$(echo "$current" | jq -S .)" = "$(echo "$policy_target" | jq -S .)" ]; then + skip "policy already matches" +elif [ "$DRY_RUN" = "1" ]; then + log "DRY RUN — would put-role-policy $POLICY_NAME on $ROLE_NAME: $policy_target" +else + aws iam put-role-policy --role-name "$ROLE_NAME" --policy-name "$POLICY_NAME" \ + --policy-document "$policy_target" \ + || die "put-role-policy failed" + ok "policy applied" +fi + +ok "audit archive provisioning complete: s3://$AUDIT_BUCKET (writer: $ROLE_NAME)" diff --git a/scripts/setup-broker-host.sh b/scripts/setup-broker-host.sh index 6bce9876..3df2838c 100755 --- a/scripts/setup-broker-host.sh +++ b/scripts/setup-broker-host.sh @@ -74,6 +74,7 @@ CHAIN_RPC="" VAULT_BUCKET="" MEMORY_BUCKET="" CONFIG_BUCKET="" +AUDIT_BUCKET="" # #109 audit cold-archive bucket (provision-audit-archive.sh) SCOPE_ADDR="" REGISTRY_ADDR="" K3_COUNTER_ADDR="" @@ -126,6 +127,7 @@ while (( $# > 0 )); do --vault-bucket) VAULT_BUCKET="$2"; shift 2 ;; --memory-bucket) MEMORY_BUCKET="$2"; shift 2 ;; --config-bucket) CONFIG_BUCKET="$2"; shift 2 ;; + --audit-bucket) AUDIT_BUCKET="$2"; shift 2 ;; --scope-addr) SCOPE_ADDR="$2"; shift 2 ;; --registry-addr) REGISTRY_ADDR="$2"; shift 2 ;; --k3-counter-addr) K3_COUNTER_ADDR="$2"; shift 2 ;; @@ -355,6 +357,9 @@ fi if [[ -z "$CONFIG_BUCKET" ]]; then CONFIG_BUCKET="$(read_envfile_var /etc/agentkeys/worker-config.env CONFIG_BUCKET)" fi +if [[ -z "$AUDIT_BUCKET" ]]; then + AUDIT_BUCKET="$(read_envfile_var /etc/agentkeys/worker-audit.env AGENTKEYS_AUDIT_S3_BUCKET)" +fi # Contract addresses (SCOPE/REGISTRY/K3) are NOT read from the host worker env # here — unlike buckets/RPC (operator overrides that should stick across re-runs), # contract addresses are DEPLOY OUTPUTS that change on every redeploy. Reading the @@ -546,6 +551,7 @@ if [[ -z "$CLASSIFY_HOST" ]]; then CLASSIFY_HOST="$(derive_companion classify)"; [[ -z "$VAULT_BUCKET" ]] && VAULT_BUCKET="agentkeys-vault${SUFFIX}-${ACCOUNT_ID}" [[ -z "$MEMORY_BUCKET" ]] && MEMORY_BUCKET="agentkeys-memory${SUFFIX}-${ACCOUNT_ID}" [[ -z "$CONFIG_BUCKET" ]] && CONFIG_BUCKET="agentkeys-config${SUFFIX}-${ACCOUNT_ID}" +[[ -z "$AUDIT_BUCKET" ]] && AUDIT_BUCKET="agentkeys-audit${SUFFIX}-${ACCOUNT_ID}" # Test mode flips the email-from default to the -test subdomain too # (operator can still override via --email-from). Slot N>=2 uses the # slot's own SES identity (noreply-test-N@bots-test-N.). @@ -586,6 +592,7 @@ unset _env_file_to_source [[ -z "$SCOPE_ADDR" ]] && SCOPE_ADDR="${SCOPE_CONTRACT_ADDRESS_HEIMA:-}" [[ -z "$REGISTRY_ADDR" ]] && REGISTRY_ADDR="${SIDECAR_REGISTRY_ADDRESS_HEIMA:-}" [[ -z "$K3_COUNTER_ADDR" ]] && K3_COUNTER_ADDR="${K3_EPOCH_COUNTER_ADDRESS_HEIMA:-}" +AUDIT_CONTRACT_ADDR="${CREDENTIAL_AUDIT_ADDRESS_HEIMA:-}" # #109 anchor target (env-aware: test stack has its own) # Last-resort fallback to the host's worker env — ONLY when neither a CLI flag nor # operator-workstation.env supplied the address (e.g. a host without a sourced # env file). A redeploy's fresh operator-workstation.env addresses always win over @@ -1237,18 +1244,49 @@ WORKER_CONFIG_ENV_FILE=$DEV_KEY_SERVICE_ENV_DIR/worker-config.env WORKER_CLASSIFY_ENV_FILE=$DEV_KEY_SERVICE_ENV_DIR/worker-classify.env if [[ "$WITH_WORKERS" == "yes" ]]; then - # audit + email: no secrets. Mode 0644 is fine; the values are public - # config (bucket name, leaves dir). Rewrite on every run so bucket / - # region overrides via --vault-bucket / --region take effect. + # audit + email env FILES: no secrets (the audit relay key is a separate + # 0600 file). Mode 0644 is fine; the values are public config. Rewrite on + # every run so bucket / region overrides take effect. + # #109 two-tier audit: the relay PRIVATE key lives in a separate 0600 file + # (generated below, preserved across re-runs); the env file itself stays + # secret-free. Empty RPC/contract values fall back to the compiled-in chain + # profile inside the worker. log "Writing $WORKER_AUDIT_ENV_FILE" sudo tee "$WORKER_AUDIT_ENV_FILE" >/dev/null </dev/null + fi + sudo chown agentkeys:agentkeys /etc/agentkeys/audit-relay.key + sudo chmod 0600 /etc/agentkeys/audit-relay.key + log "Writing $WORKER_EMAIL_ENV_FILE" sudo tee "$WORKER_EMAIL_ENV_FILE" >/dev/null </dev/null <