Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 15 additions & 12 deletions deploy_tee/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -150,25 +150,28 @@ uv run seismic-tee-bootstrap genesis --node /tmp/dev-bootstrap-node-1.json /tmp/
After step 3 each node is up; after step 4 they produce blocks. RPC is at
`https://<fqdn>/rpc` (the `fqdn` from each descriptor).

## The TOML config (`--config`) and manifest (`--manifest`)
## The TOML config (`--config`), manifest (`--manifest`), email (`--email`)

The per-node TOML carries `[domain]` + `[enclave]`. The network-wide
manifest is a separate file passed via `--manifest`; `configure` merges it
into the POST as `[network].manifest_base64`, so the manifest is never
copy-pasted into each node.toml. The combined schema is owned by `tdx-init`
(`#[serde(deny_unknown_fields)]`):
The per-node TOML carries **`[enclave]` only**. `configure` assembles the
full POST by appending two tables it owns, so neither is copy-pasted into
each node.toml (and can't drift):
- `[domain]` — `name` from the descriptor's `fqdn` (the cert domain), `email`
from `--email` (default `ops@seismic.systems`).
- `[network].manifest_base64` — from `--manifest`.

```toml
[domain]
name = "az-1.seismicdev.net" # FQDN clients reach this VM at
email = "ops@seismic.systems" # Let's Encrypt registration
`configure` rejects a node.toml that carries `[domain]` or `[network]`. The
combined schema is owned by `tdx-init` (`#[serde(deny_unknown_fields)]`):

```toml
# node.toml — [enclave] only
[enclave] # optional; defaults applied if absent
genesis_node = false # exactly one VM per network sets true
peers = ["http://az-1.seismicdev.net:7878"] # required when genesis_node = false

# [network].manifest_base64 is added by `configure` from --manifest — do NOT
# put it in the node.toml. tdx-init requires it and 400s a POST without it.
# configure appends, before POSTing:
# [domain] name = <descriptor fqdn>, email = <--email>
# [network] manifest_base64 = <--manifest>
# tdx-init requires both and 400s a POST missing them.
```

Authoritative reference:
Expand Down
77 changes: 52 additions & 25 deletions deploy_tee/configure.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
"""

import argparse
import json
import logging
import tempfile
import time
Expand All @@ -41,28 +42,34 @@
TDX_INIT_RETRY_INTERVAL_SECONDS = 5


def build_merged_config(config_path: Path, manifest_path: Path) -> Path:
"""Merge the per-node config with the network manifest into a throwaway
temp config for POSTing, without mutating either source file.

The manifest is a network-wide artifact shared by every node; the
node.toml is per-node (domain + enclave). Keeping them separate on disk
and merging only here avoids copy-pasting the manifest into each node's
config (and the drift that invites). enclave-server requires the manifest
at startup, so tdx-init now rejects a config without [network] — this is
where that section gets added. The merge is a text append, so the
operator's node.toml comments survive.
def build_merged_config(
config_path: Path, manifest_path: Path, fqdn: str, email: str
) -> Path:
"""Assemble the config POSTed to tdx-init, mutating no source. The fields
come from four inputs: the operator's per-node node.toml (`[enclave]`),
the descriptor's fqdn (→ `[domain].name`, the cert domain), CLI flags
(`--email` → `[domain].email`; more node fields may become flags later),
and the network manifest (`--manifest` → `[network]`).

node.toml carries `[enclave]` only; `[domain]` and `[network]` are appended
here as fresh tables, a plain text append that preserves node.toml comments
and never reopens an existing table. That is also why node.toml must NOT
carry `[domain]`/`[network]` itself — a second copy of the cert domain or
network identity is exactly the drift that breaks cert issuance / forks the
network.
"""
config_text = config_path.read_text(encoding="utf-8")
try:
parsed = tomllib.loads(config_text)
except tomllib.TOMLDecodeError as e:
raise SystemExit(f"--config {config_path}: could not parse TOML: {e}") from None
if "network" in parsed:
raise SystemExit(
f"--config {config_path} already has a [network] section; the "
"manifest must come from --manifest. Remove it from the node.toml."
)
for owned in ("domain", "network"):
if owned in parsed:
raise SystemExit(
f"--config {config_path} must not contain [{owned}] — configure "
"supplies it ([domain] from the descriptor fqdn + --email, "
f"[network] from --manifest). Remove [{owned}] from the node.toml."
)

manifest_bytes = manifest_path.read_bytes()
try:
Expand All @@ -71,9 +78,15 @@ def build_merged_config(config_path: Path, manifest_path: Path) -> Path:
except manifest_mod.ManifestSchemaError as e:
raise SystemExit(f"--manifest {manifest_path}: invalid manifest: {e}") from None

# json.dumps emits valid TOML basic strings for these simple ASCII values.
domain_section = (
f"[domain]\nname = {json.dumps(fqdn)}\nemail = {json.dumps(email)}\n"
)
merged = (
config_text.rstrip("\n")
+ "\n\n"
+ domain_section
+ "\n"
+ manifest_mod.render_network_section(manifest_bytes)
)
with tempfile.NamedTemporaryFile(
Expand Down Expand Up @@ -159,9 +172,10 @@ def parse_args() -> argparse.Namespace:
type=Path,
required=True,
help=(
"Path to the per-node TOML ([domain] + [enclave]). Merged with "
"--manifest at POST time; must not itself carry a [network] "
"section. See SeismicSystems/enclave crates/tdx-init for the schema."
"Path to the per-node TOML ([enclave] only: genesis_node, peers). "
"configure supplies [domain] (from the descriptor fqdn + --email) "
"and [network] (from --manifest), so the file must not carry them. "
"See SeismicSystems/enclave crates/tdx-init for the schema."
),
)
parser.add_argument(
Expand All @@ -175,6 +189,15 @@ def parse_args() -> argparse.Namespace:
"shared across every node, so it lives outside the node.toml."
),
)
parser.add_argument(
"--email",
default="ops@seismic.systems",
help=(
"Contact email for the node's Let's Encrypt registration (certbot); "
"goes into [domain].email of the POSTed config. Same across a "
"cohort. Default: ops@seismic.systems."
),
)
parser.add_argument(
"--measurements",
type=Path,
Expand Down Expand Up @@ -233,14 +256,18 @@ def main() -> None:
"attested-tls). Re-run without --measurements to POST config only."
)

# Merge node config + manifest before contacting the node, so bad local
# input fails fast (and a manifest-less POST can't crash-loop the node).
merged_config = build_merged_config(args.config, args.manifest)
logger.info(f"Merged {args.config} + {args.manifest} -> {merged_config}")

descriptor = load_descriptor(args.node)
public_ip = require(descriptor, "public_ip", args.node)
fqdn = descriptor.get("fqdn", public_ip)
# fqdn is the cert domain (→ [domain].name) and must resolve to this node,
# so it's required (no public_ip fallback): a wrong/absent name fails certbot
# at boot. configure injects it from the descriptor rather than the operator
# retyping it in node.toml, so the cert domain and the DNS record can't drift.
fqdn = require(descriptor, "fqdn", args.node)

# Assemble the POST config (node.toml [enclave] + injected [domain]/[network])
# before contacting the node, so bad local input fails fast.
merged_config = build_merged_config(args.config, args.manifest, fqdn, args.email)
logger.info(f"Built config for {fqdn} -> {merged_config}")

logger.info(f"Configuring node {fqdn} ({public_ip})...")
post_config_to_tdx_init(public_ip, merged_config)
Expand Down
6 changes: 3 additions & 3 deletions deploy_tee/pulumi/seismic_node/node.bootstrap.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[domain]
name = "tee-dev-az-1.seismicdev.net"
email = "ops@seismic.systems"
# Per-node config POSTed via `seismic-tee configure`, which supplies the rest:
# [domain] (name from the descriptor fqdn + --email) and [network] (from
# --manifest). This file carries [enclave] only.

[enclave]
# Only use this for the first boot of the first node in a brand-new network.
Expand Down
6 changes: 3 additions & 3 deletions deploy_tee/pulumi/seismic_node/node.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[domain]
name = "tee-dev-az-1.seismicdev.net"
email = "ops@seismic.systems"
# Per-node config POSTed via `seismic-tee configure`, which supplies the rest:
# [domain] (name from the descriptor fqdn + --email) and [network] (from
# --manifest). This file carries [enclave] only.

[enclave]
# Normal steady-state config. After the initial bootstrap, nodes should fetch
Expand Down
28 changes: 17 additions & 11 deletions deploy_tee/tests/test_configure.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,14 @@
# duplicate the schema here; build_merged_config validates it before merging.
from deploy_tee.tests.test_manifest import FIXTURE_MANIFEST

# node.toml carries [enclave] only; configure supplies [domain] and [network].
NODE_TOML = """\
[domain]
name = "node1.example.com"
email = "ops@example.com"

[enclave]
genesis_node = true
peers = []
"""
FQDN = "node1.example.com"
EMAIL = "ops@example.com"


def _write(suffix: str, data) -> Path:
Expand All @@ -49,25 +48,32 @@ def _node(self, text: str) -> Path:
self._tmp.append(p)
return p

def test_merges_manifest_into_network_section(self):
out = build_merged_config(self._node(NODE_TOML), self.manifest)
def test_injects_domain_and_network_keeps_enclave(self):
out = build_merged_config(self._node(NODE_TOML), self.manifest, FQDN, EMAIL)
self._tmp.append(out)
merged = tomllib.loads(out.read_text())
# Manifest landed as [network], and the per-node content is preserved.
# configure supplies [domain] (fqdn + email) and [network]; the
# operator's [enclave] survives untouched.
self.assertEqual(merged["domain"]["name"], FQDN)
self.assertEqual(merged["domain"]["email"], EMAIL)
self.assertTrue(merged["network"]["manifest_base64"])
self.assertTrue(merged["enclave"]["genesis_node"])
self.assertEqual(merged["domain"]["name"], "node1.example.com")

def test_rejects_config_that_already_has_network(self):
def test_rejects_config_with_domain(self):
node = self._node(NODE_TOML + '\n[domain]\nname = "x"\nemail = "y"\n')
with self.assertRaises(SystemExit):
build_merged_config(node, self.manifest, FQDN, EMAIL)

def test_rejects_config_with_network(self):
node = self._node(NODE_TOML + '\n[network]\nmanifest_base64 = "ZWU="\n')
with self.assertRaises(SystemExit):
build_merged_config(node, self.manifest)
build_merged_config(node, self.manifest, FQDN, EMAIL)

def test_rejects_invalid_manifest(self):
bad = _write(".json", "{not json")
self._tmp.append(bad)
with self.assertRaises(SystemExit):
build_merged_config(self._node(NODE_TOML), bad)
build_merged_config(self._node(NODE_TOML), bad, FQDN, EMAIL)


if __name__ == "__main__":
Expand Down
Loading