From 8f32ef1a112f5fd486a9b9383a70f4e00a2eb1d8 Mon Sep 17 00:00:00 2001 From: Samuel Laferriere <9342524+samlaf@users.noreply.github.com> Date: Thu, 2 Jul 2026 21:52:15 +0800 Subject: [PATCH] feat(deploy_tee): configure founds the whole cohort in parallel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit seismic-tee-bootstrap configure now takes --genesis plus repeatable --join descriptors: it assigns exactly one genesis node (peers = [], mints root_key) and points every joiner at http://:7878, POSTs all configs and watches all first-boot wipes concurrently (in-place multi-line dashboard on a TTY; change-only lines otherwise), then reports per-node results and exits non-zero if any node failed. Assigning roles in a single invocation makes a double-genesis split unrepresentable, and duplicate descriptors (same name or IP) are rejected up front. ctrl-C stops only the watching — POSTed nodes keep provisioning server-side. Also points the dev Pulumi env at the seismic-dev_2026-07-02 image (seismic-images#38 joiner-cascade fix) in eastus / DC4es_v6. Verified live at N=2: one command configured genesis + joiner, watched both wipes in parallel, node 2 fetched root_key from node 1, and both nodes' RPC endpoints came up. --- deploy_tee/bootstrap_cli.py | 2 +- deploy_tee/cohort_configure.py | 300 +++++++++++++++--- .../pulumi/seismic_node/Pulumi.dev.yaml | 16 +- deploy_tee/tests/test_cohort_configure.py | 71 +++++ 4 files changed, 334 insertions(+), 55 deletions(-) create mode 100644 deploy_tee/tests/test_cohort_configure.py diff --git a/deploy_tee/bootstrap_cli.py b/deploy_tee/bootstrap_cli.py index f235ff8..3d226d2 100644 --- a/deploy_tee/bootstrap_cli.py +++ b/deploy_tee/bootstrap_cli.py @@ -43,7 +43,7 @@ def down(argv: tuple[str, ...]) -> None: @app.command(name="configure", context_settings=PASSTHROUGH, add_help_option=False) @click.argument("argv", nargs=-1, type=click.UNPROCESSED) def configure(argv: tuple[str, ...]) -> None: - """Configure the network's genesis node (mints root_key locally).""" + """Configure a cohort in parallel: one genesis + N joiners, one command.""" from deploy_tee import cohort_configure forward(cohort_configure.main, "seismic-tee-bootstrap configure", argv) diff --git a/deploy_tee/cohort_configure.py b/deploy_tee/cohort_configure.py index e219818..badbc41 100644 --- a/deploy_tee/cohort_configure.py +++ b/deploy_tee/cohort_configure.py @@ -1,32 +1,232 @@ -"""`seismic-tee-bootstrap configure` — found a network's genesis node. - -Configures the one node that *founds* a network: it mints `root_key` locally -with OsRng (`genesis_node = true`, no peers) instead of fetching it from a -peer. Designating the genesis node is a founding act, so it lives on the -internal bootstrap CLI — not the operator `seismic-tee configure`, which only -ever joins an existing network. - -Reuses the shared config-delivery primitive (`deploy_tee.configure`), so the -POSTed config, the tdx-init contract, and the LUKS-wipe watch are identical to -a joining node's — only `genesis_node`/`peers` differ. - -Today this configures a single genesis node. The parallel cohort founder — -assign exactly one genesis + N joiners, POST all in parallel, and watch every -node's disk wipe at once — is the planned extension of this command (so a -4-node bootstrap is one invocation, not four terminals). +"""`seismic-tee-bootstrap configure` — found a network in one command. + +Configures a whole cohort at once: the one genesis node (`--genesis`, mints +`root_key` locally) plus every joining node (`--join`, fetches `root_key` from +genesis via `getWrappedRootKey`). All nodes are POSTed and their first-boot +LUKS wipes watched **in parallel**, so an N-node bootstrap is one command +instead of N terminals. Exactly one node is genesis — assigned here, not left +to a per-node flag — so a double-genesis network split is unrepresentable. + +Founding is an internal act, so this lives on the bootstrap CLI; joining an +already-live network is the operator `seismic-tee configure`. Both go through +the same `build_config` / `post_config_to_tdx_init` primitives and +`status.poll_provisioning`, so each node's POSTed config and wipe-watch are +identical — only the role (`genesis_node`/`peers`) differs. """ import argparse +import logging +import shutil +import sys +import threading +import time +from collections import Counter +from concurrent.futures import Future, ThreadPoolExecutor +from dataclasses import dataclass, field from pathlib import Path -from deploy_tee.configure import deliver_config +from deploy_tee import manifest as manifest_mod +from deploy_tee.configure import ( + ENCLAVE_PEER_PORT, + TDX_INIT_PORT, + build_config, + post_config_to_tdx_init, +) +from deploy_tee.descriptor import load_descriptor, require +from deploy_tee.status import poll_provisioning from deploy_tee.utils.logging_setup import setup_logging +logger = logging.getLogger(__name__) + + +@dataclass +class Node: + """One cohort member, resolved from its descriptor + assigned role.""" + + name: str # short label (the descriptor filename stem) + public_ip: str + fqdn: str + genesis: bool + peers: list[str] = field(default_factory=list) + + +def _load_node(descriptor_path: Path, *, genesis: bool, peers: list[str]) -> Node: + descriptor = load_descriptor(descriptor_path) + return Node( + name=descriptor_path.stem, + public_ip=require(descriptor, "public_ip", descriptor_path), + fqdn=require(descriptor, "fqdn", descriptor_path), + genesis=genesis, + peers=peers, + ) + + +def build_cohort(genesis_path: Path, join_paths: list[Path]) -> list[Node]: + """Resolve the cohort: exactly one genesis (peers empty — it mints), and + every joiner pointed at the genesis node's enclave endpoint + (`http://:7878`), so joiners fetch `root_key` from it. Role + assignment lives here, not in a per-node flag, so there is exactly one + genesis by construction. + """ + genesis = _load_node(genesis_path, genesis=True, peers=[]) + genesis_peer = f"http://{genesis.public_ip}:{ENCLAVE_PEER_PORT}" + joiners = [_load_node(p, genesis=False, peers=[genesis_peer]) for p in join_paths] + nodes = [genesis, *joiners] + + # A descriptor passed twice (--genesis reused as --join, or a copy-pasted + # --join) would race two conflicting POSTs against one node and silently + # collide on the name-keyed dashboard/result dicts — refuse instead. + for what, counts in ( + ("name", Counter(n.name for n in nodes)), + ("public_ip", Counter(n.public_ip for n in nodes)), + ): + dupes = sorted(k for k, c in counts.items() if c > 1) + if dupes: + raise SystemExit( + f"duplicate node {what}(s) in cohort: {', '.join(dupes)} — " + "was the same descriptor passed more than once?" + ) + return nodes + + +def _configure_node( + node: Node, + manifest_path: Path, + email: str, + no_wait: bool, + states: dict[str, str], + stop: threading.Event, +) -> bool: + """Build + POST one node's config, then (unless `no_wait`) poll its LUKS + wipe, writing the latest status line into `states[node.name]` for the + dashboard. Returns whether the node reached a ready state. Never raises — + a failure is recorded in `states` and reflected in the return, so one bad + node doesn't abort the rest of the cohort. `stop` (set on ctrl-C) ends the + wipe watch early so the worker joins promptly. + """ + try: + states[node.name] = "building config…" + config = build_config( + manifest_path, node.fqdn, email, genesis_node=node.genesis, peers=node.peers + ) + states[node.name] = f"POSTing config to tdx-init :{TDX_INIT_PORT}…" + post_config_to_tdx_init(node.public_ip, config) + if no_wait: + states[node.name] = "config delivered (not waiting)" + return True + for update in poll_provisioning(node.public_ip, stop=stop): + states[node.name] = update.line + if update.done: + return update.ok + return False # stopped early, or defensive against a silent generator end + except Exception as e: # noqa: BLE001 — surface per node, keep the cohort going + states[node.name] = f"ERROR: {e}" + return False + + +class _Dashboard: + """Render N nodes' live status: an in-place multi-line block on a TTY, + else one line per node printed only when it changes (readable in CI logs). + """ + + def __init__(self, nodes: list[Node]) -> None: + self.isatty = sys.stdout.isatty() + self.order = [n.name for n in nodes] + self.labels = { + n.name: n.name + (" (genesis)" if n.genesis else "") for n in nodes + } + self._width = max(len(label) for label in self.labels.values()) + self._painted = False + self._last: dict[str, str] = {} + + def render(self, states: dict[str, str]) -> None: + if self.isatty: + cols = shutil.get_terminal_size((100, 24)).columns + if self._painted: + sys.stdout.write(f"\033[{len(self.order)}A") # cursor up N lines + for name in self.order: + text = ( + f"{self.labels[name].rjust(self._width)} {states.get(name, '…')}" + ) + if len(text) >= cols: + text = text[: cols - 1] + "…" + sys.stdout.write(f"\033[2K{text}\n") # clear line + write + sys.stdout.flush() + self._painted = True + else: + for name in self.order: + line = states.get(name, "…") + if self._last.get(name) != line: + print(f"{self.labels[name]}: {line}", flush=True) + self._last[name] = line + + +def _run_cohort( + nodes: list[Node], manifest_path: Path, email: str, no_wait: bool +) -> dict[str, bool]: + """Configure every node concurrently, refreshing the dashboard until all + workers finish. Returns {node name: ok}. Threads suit this — the work is + blocking HTTP (POST + status polling), and N is small. + """ + # The shared primitives log at INFO; that would corrupt the in-place + # dashboard, and the per-node status lines convey the same progress. Quiet + # them for the dashboard's duration (the process exits after, so no restore). + logging.getLogger("deploy_tee").setLevel(logging.WARNING) + + states: dict[str, str] = {n.name: "queued…" for n in nodes} + dashboard = _Dashboard(nodes) + stop = threading.Event() + futures: dict[str, Future[bool]] = {} + with ThreadPoolExecutor(max_workers=len(nodes)) as pool: + for node in nodes: + futures[node.name] = pool.submit( + _configure_node, node, manifest_path, email, no_wait, states, stop + ) + try: + # Refresh while workers block on POST/poll. + while not all(f.done() for f in futures.values()): + dashboard.render(states) + time.sleep(1) + except KeyboardInterrupt: + # Must set `stop` before the pool's context exit joins the workers + # — otherwise a wipe watch blocks that join for up to 1h+. With it, + # workers exit within a poll interval (a worker still inside the + # POST's listener wait is bounded at ~3min). The POSTs that landed + # keep provisioning server-side either way. + stop.set() + print("\nStopped watching — configured nodes keep provisioning.") + raise SystemExit(130) from None + dashboard.render(states) # final paint of terminal states + return {name: f.result() for name, f in futures.items()} + + +def _report(nodes: list[Node], results: dict[str, bool]) -> None: + print("\n" + "=" * 80) + print("COHORT CONFIGURED") + print("=" * 80) + for node in nodes: + ok = results.get(node.name, False) + role = "genesis" if node.genesis else "join" + print(f" {'✓' if ok else '✗'} {node.name} ({role}) — https://{node.fqdn}/rpc") + print("=" * 80 + "\n") + + genesis_failed = any(n.genesis and not results.get(n.name, False) for n in nodes) + if genesis_failed: + print( + "Genesis node did not come up — joiners cannot fetch root_key until " + "it does; they will keep retrying. Fix genesis first." + ) + failed = [n.name for n in nodes if not results.get(n.name, False)] + if failed: + raise SystemExit( + f"{len(failed)}/{len(nodes)} node(s) failed: {', '.join(failed)}" + ) + def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser( prog="seismic-tee-bootstrap configure", - description="Configure a network's genesis node (mints root_key locally).", + description="Configure a network cohort in parallel: one genesis + N joiners.", ) parser.add_argument( "--genesis", @@ -34,9 +234,20 @@ def parse_args() -> argparse.Namespace: required=True, metavar="DESCRIPTOR", help=( - "Node descriptor JSON for the genesis node (from `up`, or " - "`pulumi stack output --json`). Exactly one node per network is " - "genesis; every other node joins via `seismic-tee configure`." + "Descriptor for the one genesis node (mints root_key locally). " + "Exactly one node per network is genesis; assigning it here (not a " + "per-node flag) makes a double-genesis split impossible." + ), + ) + parser.add_argument( + "--join", + type=Path, + action="append", + default=[], + metavar="DESCRIPTOR", + help=( + "Descriptor for a joining node (fetches root_key from genesis via " + "getWrappedRootKey). Repeatable; omit for a genesis-only bring-up." ), ) parser.add_argument( @@ -44,50 +255,47 @@ def parse_args() -> argparse.Namespace: type=Path, required=True, metavar="FILE", - help=( - "Network manifest JSON (from `manifest assemble`). Merged into the " - "POSTed config as [network].manifest_base64; the same bytes every " - "node in the network uses." - ), + help="Network manifest JSON (from `manifest assemble`); → [network].", ) parser.add_argument( "--email", default="ops@seismic.systems", - help=( - "Contact email for the node's certbot registration; goes into " - "[domain].email. Default: ops@seismic.systems." - ), + help="certbot contact email → [domain].email (default: ops@seismic.systems).", ) parser.add_argument( "--no-wait", action="store_true", default=False, - help=( - "Don't watch first-boot LUKS provisioning after POSTing. Use for " - "CI/headless runs where there's no TTY and the wipe can take 1h+." - ), + help="Don't watch first-boot LUKS provisioning after POSTing.", ) args = parser.parse_args() - if not args.genesis.is_file(): - raise SystemExit(f"--genesis descriptor not found: {args.genesis}") - if not args.manifest.is_file(): - raise SystemExit(f"--manifest file not found: {args.manifest}") + for path in [args.genesis, *args.join, args.manifest]: + if not path.is_file(): + raise SystemExit(f"file not found: {path}") return args def main() -> None: setup_logging() args = parse_args() - # The founding node: genesis_node=True, no peers (it mints, never fetches). - deliver_config( - args.genesis, - args.manifest, - args.email, - genesis_node=True, - peers=[], - no_wait=args.no_wait, + + # Validate the shared manifest once, so a bad one fails fast here rather + # than as N identical per-worker errors mid-dashboard. + try: + manifest_mod.validate_manifest_schema(args.manifest.read_bytes()) + except manifest_mod.ManifestSchemaError as e: + raise SystemExit(f"--manifest {args.manifest}: invalid manifest: {e}") from None + + nodes = build_cohort(args.genesis, args.join) + joiners = [n.name for n in nodes if not n.genesis] + print( + f"Configuring {len(nodes)} node(s): genesis={nodes[0].name}" + + (f", joining={joiners}" if joiners else " (genesis-only)") ) + results = _run_cohort(nodes, args.manifest, args.email, args.no_wait) + _report(nodes, results) + if __name__ == "__main__": main() diff --git a/deploy_tee/pulumi/seismic_node/Pulumi.dev.yaml b/deploy_tee/pulumi/seismic_node/Pulumi.dev.yaml index ac363f3..ec751a4 100644 --- a/deploy_tee/pulumi/seismic_node/Pulumi.dev.yaml +++ b/deploy_tee/pulumi/seismic_node/Pulumi.dev.yaml @@ -4,28 +4,28 @@ # (resource_group / vm_name / dns_record_name → -). Copy to a sibling # Pulumi..yaml (e.g. testnet) for non-dev cohorts. config: - # DCedsv6 TDX SKUs with local NVMe are currently available in westus3, - # which is useful for local/dev testing. + # TDX SKU availability varies by region: DCesv6 (no local disk, used below) + # is available in eastus; the DCedsv6 variants with local NVMe live in westus3. # See https://techcommunity.microsoft.com/blog/azureconfidentialcomputingblog/announcing-general-availability-of-azure-intel%C2%AE-tdx-confidential-vms/4495693 - azure-native:location: westus3 + azure-native:location: eastus # Resources that must already exist and are re-used across stacks. seismic-tee-deploy:dns_zone_resource_group: devnet2 seismic-tee-deploy:dns_zone_name: seismicdev.net # ARM ID of the storage account hosting the VHD. # Required so Azure can authorize the cross-RG blob read for managed disk Import. - # Get via: az storage account show -n seismicimageswus3 -g seismic-images --query id -o tsv - seismic-tee-deploy:vhd_storage_account_id: /subscriptions/214887ea-51a7-4ca7-9cec-29b3cf3d311c/resourceGroups/seismic-images/providers/Microsoft.Storage/storageAccounts/seismicimageswus3 - seismic-tee-deploy:vhd_blob_url: https://seismicimageswus3.blob.core.windows.net/dev/seismic-dev_2026-05-29.2bfa8f.vhd + # Get via: az storage account show -n seismicimages -g seismic-images --query id -o tsv + seismic-tee-deploy:vhd_storage_account_id: /subscriptions/214887ea-51a7-4ca7-9cec-29b3cf3d311c/resourceGroups/seismic-images/providers/Microsoft.Storage/storageAccounts/seismicimages + seismic-tee-deploy:vhd_blob_url: https://seismicimages.blob.core.windows.net/dev/seismic-dev_2026-07-02.7b7342.vhd # Stack-specific deploy config. seismic-tee-deploy:dns_record_name: tee-dev-az-1 # → tee-dev-az-1.seismicdev.net seismic-tee-deploy:resource_group: seismic-tee-dev seismic-tee-deploy:vm_name: tee-dev-az-1 # D = general-purpose, C = confidential, e = Intel TDX, d = local disk, # s = premium storage capable. - seismic-tee-deploy:vm_size: Standard_DC4eds_v6 + seismic-tee-deploy:vm_size: Standard_DC4es_v6 # Keep small for dev deploys: first-boot dm-integrity initialization is slow # on large disks. See README "Local testing: data-disk formatting time". - seismic-tee-deploy:data_disk_size_gb: "32" + seismic-tee-deploy:data_disk_size_gb: "8" # Source IP (CIDR) allowed to SSH. Use your public IP to lock down :22. seismic-tee-deploy:source_ip_cidr: 0.0.0.0/0 encryptionsalt: v1:cgAomGIsWCk=:v1:/sXIE2a9HDrpnOBC:cctZJQ9Ww80lgIXuoQaJ+RJ8ewzu2A== diff --git a/deploy_tee/tests/test_cohort_configure.py b/deploy_tee/tests/test_cohort_configure.py new file mode 100644 index 0000000..95f6762 --- /dev/null +++ b/deploy_tee/tests/test_cohort_configure.py @@ -0,0 +1,71 @@ +"""Tests for deploy_tee.cohort_configure role/peer assignment (stdlib unittest). + +Covers only the pure logic worth pinning — how the cohort is assembled from +descriptors (exactly one genesis, joiners pointed at genesis:7878). The +parallel driver + dashboard are verified by hand against live nodes. + +Run with: + uv run python -m unittest discover -s deploy_tee/tests -v +""" + +import json +import tempfile +import unittest +from pathlib import Path + +from deploy_tee.cohort_configure import build_cohort +from deploy_tee.configure import ENCLAVE_PEER_PORT + + +def _descriptor(name: str, public_ip: str, fqdn: str) -> Path: + d = tempfile.mkdtemp() + path = Path(d) / f"{name}.json" + path.write_text(json.dumps({"public_ip": public_ip, "fqdn": fqdn})) + return path + + +class BuildCohortTests(unittest.TestCase): + def test_genesis_and_joiners(self): + g = _descriptor("node-1", "1.1.1.1", "n1.example.com") + j2 = _descriptor("node-2", "2.2.2.2", "n2.example.com") + j3 = _descriptor("node-3", "3.3.3.3", "n3.example.com") + + nodes = build_cohort(g, [j2, j3]) + + # Genesis first, exactly one, no peers (it mints root_key). + self.assertEqual(len(nodes), 3) + self.assertTrue(nodes[0].genesis) + self.assertEqual(nodes[0].name, "node-1") + self.assertEqual(nodes[0].peers, []) + self.assertEqual(sum(n.genesis for n in nodes), 1) + + # Every joiner points only at the genesis node's enclave endpoint. + genesis_peer = f"http://1.1.1.1:{ENCLAVE_PEER_PORT}" + for joiner in nodes[1:]: + self.assertFalse(joiner.genesis) + self.assertEqual(joiner.peers, [genesis_peer]) + + def test_genesis_only(self): + g = _descriptor("node-1", "1.1.1.1", "n1.example.com") + nodes = build_cohort(g, []) + self.assertEqual(len(nodes), 1) + self.assertTrue(nodes[0].genesis) + self.assertEqual(nodes[0].peers, []) + + def test_duplicate_descriptor_rejected(self): + # Same descriptor as --genesis and --join would race conflicting POSTs + # (genesis_node=true and =false) against one node. + g = _descriptor("node-1", "1.1.1.1", "n1.example.com") + with self.assertRaises(SystemExit): + build_cohort(g, [g]) + + def test_duplicate_ip_rejected(self): + # Distinct descriptor files can still point at the same node. + g = _descriptor("node-1", "1.1.1.1", "n1.example.com") + j = _descriptor("node-2", "1.1.1.1", "n2.example.com") + with self.assertRaises(SystemExit): + build_cohort(g, [j]) + + +if __name__ == "__main__": + unittest.main()