From 21c844dc9702b0590c7ce5fcf98cf1ce17b5a506 Mon Sep 17 00:00:00 2001 From: Samuel Laferriere <9342524+samlaf@users.noreply.github.com> Date: Fri, 3 Jul 2026 00:21:23 +0800 Subject: [PATCH 1/2] feat(deploy_tee): genesis pins eth_genesis_hash from the manifest MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ceremony now takes --manifest and pins eth.genesis_hash from it — the value `manifest assemble` computed offline at deploy time — instead of deriving it from whatever the cohort happens to serve. Each node's reth is then asserted to serve that hash as block 0, so a stale image or wrong genesis fails at ceremony time instead of parking summit in SYNCING forever (summit pinning a hash reth doesn't know produces no error on either side). Failure output lists the whole cohort (✓/✗ per node, unreachable nodes included), so one bad node is distinguishable from a manifest that matches nobody, and a down node doesn't hide the rest. -g becomes a dev-only override; the cohort assertion still runs against the overridden value. Also fix --node to accept the repeated-flag form: it was nargs="+" only, so `--node a.json --node b.json` silently replaced a with b and ran the ceremony against a partial cohort — which is how a uniform cohort disagreeing with a stale manifest masqueraded as a single mismatched node during the N=2 bring-up. Both forms now accumulate, duplicates are rejected, and descriptor paths are validated up front. --- deploy_tee/genesis.py | 121 ++++++++++++++++++++++++-- deploy_tee/tests/test_genesis.py | 145 +++++++++++++++++++++++++++++++ 2 files changed, 259 insertions(+), 7 deletions(-) create mode 100644 deploy_tee/tests/test_genesis.py diff --git a/deploy_tee/genesis.py b/deploy_tee/genesis.py index 235f2a1..b9cfe60 100644 --- a/deploy_tee/genesis.py +++ b/deploy_tee/genesis.py @@ -6,6 +6,13 @@ joining an already-bootstrapped network never runs it (it joins via the deposit contract + sync, and is configured with `genesis_node = false`). +`eth_genesis_hash` comes from the network manifest (`--manifest`), where +`manifest assemble` pinned it at deploy time — the ceremony needs no +`seismic-reth` binary. Before building anything, every cohort node's +reth is asserted to actually serve that hash as block 0, so a node +booted from a stale image or wrong genesis fails the ceremony loudly +instead of parking summit in SYNCING forever. + Each node is located by a descriptor file (see deploy_tee/descriptor.py), so this never calls Pulumi. """ @@ -17,6 +24,9 @@ import tempfile from pathlib import Path +import requests + +from deploy_tee import manifest as manifest_mod from deploy_tee.descriptor import load_descriptor, require from deploy_tee.utils.summit_client import SummitClient @@ -45,17 +55,21 @@ ] -def _parse_args() -> argparse.Namespace: +def _parse_args(argv: list[str] | None = None) -> argparse.Namespace: parser = argparse.ArgumentParser() parser.add_argument( "--node", + type=Path, nargs="+", + action="append", required=True, metavar="DESCRIPTOR", help=( "Node descriptor JSON file(s), one per cohort node in validator " - "order (e.g. --node node-1.json node-2.json node-3.json). Each " - "descriptor's fqdn/public_ip locates that node. Produce them " + "order — `--node n1.json n2.json` and `--node n1.json --node " + "n2.json` both work (with plain nargs, a repeated flag silently " + "*replaces* the earlier one and drops nodes from the ceremony). " + "Each descriptor's fqdn/public_ip locates that node; produce them " "standalone, e.g. `pulumi stack output --json > node-1.json`." ), ) @@ -70,16 +84,41 @@ def _parse_args() -> argparse.Namespace: "`genesis` binary fills in the cohort's validators." ), ) + parser.add_argument( + "--manifest", + type=Path, + required=True, + metavar="FILE", + help=( + "Network manifest JSON (from `manifest assemble`) — the same file " + "every node was configured with. Its eth.genesis_hash is pinned " + "into genesis.toml." + ), + ) parser.add_argument( "-g", "--genesis-hash", type=str, default=None, - help="Eth genesis hash; overrides the template's. Pin to reth's actual hash.", + help=( + "Dev-only override of the manifest's eth.genesis_hash. The cohort " + "assertion still runs against the overridden value." + ), ) - args = parser.parse_args() + args = parser.parse_args(argv) + # append+nargs yields one list per --node occurrence; flatten to the + # cohort list callers expect. + args.node = [path for group in args.node for path in group] + dupes = sorted({str(p) for p in args.node if args.node.count(p) > 1}) + if dupes: + raise SystemExit(f"duplicate --node descriptor(s): {', '.join(dupes)}") + for path in args.node: + if not path.is_file(): + raise SystemExit(f"--node descriptor not found: {path}") if not args.summit_template.is_file(): raise SystemExit(f"--summit-template not found: {args.summit_template}") + if not args.manifest.is_file(): + raise SystemExit(f"--manifest file not found: {args.manifest}") return args @@ -120,9 +159,61 @@ def _get_pubkeys( return validators, node_clients +def _assert_cohort_genesis_hash(descriptors: list[Path], expected: str) -> None: + """Assert every cohort node's reth serves `expected` as block 0. + + summit's genesis `eth_genesis_hash` must equal reth's real genesis hash: + summit uses it as its initial forkchoice head, and a hash reth doesn't + know parks reth in SYNCING forever with no error on either side (reth + can't tell the unknown hash was meant to be its block 0). The manifest + declares the intended hash; reading each node's live block 0 catches a + node booted from a different image/genesis before the ceremony pins the + validator set — and doubles as a reth-readiness probe before + send_genesis. + """ + # Query every node before judging, and report the full cohort on failure — + # a partial listing is ambiguous with "stopped at the first bad node", and + # which nodes match is exactly the diagnostic (one stale node vs. a + # manifest that matches nobody). + observed: dict[Path, str] = {} # block-0 hash, or an error description + for path in descriptors: + descriptor = load_descriptor(path) + fqdn = require(descriptor, "fqdn", path) + url = f"https://{fqdn}/rpc" # nginx proxies /rpc -> reth :8545 + try: + response = requests.post( + url, + json={ + "jsonrpc": "2.0", + "id": 1, + "method": "eth_getBlockByNumber", + "params": ["0x0", False], + }, + timeout=30, + ) + response.raise_for_status() + data = response.json() + if data.get("result") is None: + raise RuntimeError( + f"eth_getBlockByNumber returned {data.get('error') or data}" + ) + observed[path] = data["result"]["hash"] + except Exception as e: + observed[path] = f"unreachable via {url}: {e}" + if any(h.lower() != expected.lower() for h in observed.values()): + listing = "\n".join( + f" {'✓' if h.lower() == expected.lower() else '✗'} {path}: {h}" + for path, h in observed.items() + ) + raise SystemExit( + "Cohort disagrees with the declared eth_genesis_hash (stale image " + "or wrong reth genesis?); refusing to build genesis.toml:\n" + f" declared: {expected}\n{listing}" + ) + + def main(): args = _parse_args() - genesis_arg = ["-g", args.genesis_hash] if args.genesis_hash else [] # `genesis` is summit's binary; expect it on PATH (build summit and symlink # its target/debug/genesis onto PATH, the same way summit expects `reth`). @@ -135,6 +226,21 @@ def main(): "`ln -s /target/debug/genesis ~/.cargo/bin/genesis`." ) + try: + manifest = manifest_mod.validate_manifest_schema(args.manifest.read_bytes()) + except manifest_mod.ManifestSchemaError as e: + raise SystemExit(f"--manifest {args.manifest}: invalid manifest: {e}") from None + manifest_hash = manifest["eth"]["genesis_hash"] + + genesis_hash = args.genesis_hash or manifest_hash + if genesis_hash.lower() != manifest_hash.lower(): + print( + f"WARNING: -g {genesis_hash} overrides the manifest's " + f"eth.genesis_hash {manifest_hash}" + ) + print(f"Pinning eth_genesis_hash = {genesis_hash}") + _assert_cohort_genesis_hash(args.node, genesis_hash) + tmpdir = tempfile.mkdtemp() validators, node_clients = _get_pubkeys(args.node) @@ -153,7 +259,8 @@ def main(): str(args.summit_template), "-v", tmp_validators, - *genesis_arg, + "-g", + genesis_hash, ], check=True, ) diff --git a/deploy_tee/tests/test_genesis.py b/deploy_tee/tests/test_genesis.py new file mode 100644 index 0000000..53e805e --- /dev/null +++ b/deploy_tee/tests/test_genesis.py @@ -0,0 +1,145 @@ +"""Tests for deploy_tee.genesis (stdlib unittest; no test deps). + +Run with: + uv run python -m unittest discover -s deploy_tee/tests -v +""" + +import json +import tempfile +import unittest +from pathlib import Path +from unittest import mock + +from deploy_tee import genesis + + +class ParseArgsTests(unittest.TestCase): + """Both `--node a b` and `--node a --node b` must yield the full cohort — + with plain nargs="+", a repeated flag silently replaced the earlier one + and the ceremony ran against a partial cohort.""" + + def setUp(self): + self._tmp = tempfile.TemporaryDirectory() + self.addCleanup(self._tmp.cleanup) + self.n1 = self._file("n1.json") + self.n2 = self._file("n2.json") + self.template = self._file("template.toml") + self.manifest = self._file("manifest.json") + self.common = ["--summit-template", str(self.template), + "--manifest", str(self.manifest)] + + def _file(self, name: str) -> Path: + path = Path(self._tmp.name) / name + path.write_text("{}") + return path + + def test_single_flag_multiple_values(self): + args = genesis._parse_args( + ["--node", str(self.n1), str(self.n2), *self.common] + ) + self.assertEqual(args.node, [self.n1, self.n2]) + + def test_repeated_flag_accumulates(self): + args = genesis._parse_args( + ["--node", str(self.n1), "--node", str(self.n2), *self.common] + ) + self.assertEqual(args.node, [self.n1, self.n2]) + + def test_duplicate_descriptor_rejected(self): + with self.assertRaises(SystemExit) as ctx: + genesis._parse_args( + ["--node", str(self.n1), "--node", str(self.n1), *self.common] + ) + self.assertIn("duplicate", str(ctx.exception)) + + +class AssertCohortGenesisHashTests(unittest.TestCase): + """The manifest declares eth_genesis_hash; every node's live reth must + serve it as block 0, else the ceremony refuses (stale image / wrong + genesis on that node).""" + + HASH = "0x" + "ab" * 32 + OTHER_HASH = "0x" + "cd" * 32 + + def setUp(self): + self._tmp = tempfile.TemporaryDirectory() + self.addCleanup(self._tmp.cleanup) + + def _descriptor(self, name: str, fqdn: str) -> Path: + path = Path(self._tmp.name) / f"{name}.json" + path.write_text(json.dumps({"public_ip": "203.0.113.1", "fqdn": fqdn})) + return path + + def _resp(self, body: dict): + r = mock.Mock() + r.raise_for_status.return_value = None + r.json.return_value = body + return r + + def _block_resp(self, hash_: str): + return self._resp({"jsonrpc": "2.0", "id": 1, "result": {"hash": hash_}}) + + def test_matching_cohort_passes(self): + nodes = [ + self._descriptor("node-1", "a.example"), + self._descriptor("node-2", "b.example"), + ] + responses = [self._block_resp(self.HASH), self._block_resp(self.HASH)] + with mock.patch.object(genesis.requests, "post", side_effect=responses) as post: + genesis._assert_cohort_genesis_hash(nodes, self.HASH) + # Queries block 0 on each node's reth through nginx's /rpc proxy. + urls = [call.args[0] for call in post.call_args_list] + self.assertEqual(urls, ["https://a.example/rpc", "https://b.example/rpc"]) + envelope = post.call_args[1]["json"] + self.assertEqual(envelope["method"], "eth_getBlockByNumber") + self.assertEqual(envelope["params"], ["0x0", False]) + + def test_hash_comparison_is_case_insensitive(self): + nodes = [self._descriptor("node-1", "a.example")] + with mock.patch.object( + genesis.requests, "post", return_value=self._block_resp(self.HASH) + ): + genesis._assert_cohort_genesis_hash(nodes, self.HASH.upper()) + + def test_mismatching_node_exits_listing_whole_cohort(self): + nodes = [ + self._descriptor("node-1", "a.example"), + self._descriptor("node-2", "b.example"), + ] + responses = [self._block_resp(self.HASH), self._block_resp(self.OTHER_HASH)] + with mock.patch.object(genesis.requests, "post", side_effect=responses): + with self.assertRaises(SystemExit) as ctx: + genesis._assert_cohort_genesis_hash(nodes, self.HASH) + # Full-cohort listing: the matching node too, so one bad node is + # distinguishable from a manifest that matches nobody. + msg = str(ctx.exception) + self.assertIn(self.HASH, msg) + self.assertIn(f"✓ {nodes[0]}", msg) + self.assertIn(f"✗ {nodes[1]}", msg) + self.assertIn(self.OTHER_HASH, msg) + + def test_unreachable_node_reported_without_hiding_others(self): + # First node down; second still queried and reported. + nodes = [ + self._descriptor("node-1", "a.example"), + self._descriptor("node-2", "b.example"), + ] + responses = [OSError("boom"), self._block_resp(self.HASH)] + with mock.patch.object(genesis.requests, "post", side_effect=responses): + with self.assertRaises(SystemExit) as ctx: + genesis._assert_cohort_genesis_hash(nodes, self.HASH) + msg = str(ctx.exception) + self.assertIn("https://a.example/rpc", msg) + self.assertIn(f"✓ {nodes[1]}", msg) + + def test_rpc_error_response_exits(self): + nodes = [self._descriptor("node-1", "a.example")] + resp = self._resp({"jsonrpc": "2.0", "id": 1, "error": {"message": "boom"}}) + with mock.patch.object(genesis.requests, "post", return_value=resp): + with self.assertRaises(SystemExit) as ctx: + genesis._assert_cohort_genesis_hash(nodes, self.HASH) + self.assertIn("boom", str(ctx.exception)) + + +if __name__ == "__main__": + unittest.main() From 21db915588f7ced1d991a28dcff4480614b24265 Mon Sep 17 00:00:00 2001 From: Samuel Laferriere <9342524+samlaf@users.noreply.github.com> Date: Fri, 3 Jul 2026 00:38:10 +0800 Subject: [PATCH 2/2] format --- deploy_tee/tests/test_genesis.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/deploy_tee/tests/test_genesis.py b/deploy_tee/tests/test_genesis.py index 53e805e..6a86b8a 100644 --- a/deploy_tee/tests/test_genesis.py +++ b/deploy_tee/tests/test_genesis.py @@ -25,8 +25,12 @@ def setUp(self): self.n2 = self._file("n2.json") self.template = self._file("template.toml") self.manifest = self._file("manifest.json") - self.common = ["--summit-template", str(self.template), - "--manifest", str(self.manifest)] + self.common = [ + "--summit-template", + str(self.template), + "--manifest", + str(self.manifest), + ] def _file(self, name: str) -> Path: path = Path(self._tmp.name) / name @@ -34,9 +38,7 @@ def _file(self, name: str) -> Path: return path def test_single_flag_multiple_values(self): - args = genesis._parse_args( - ["--node", str(self.n1), str(self.n2), *self.common] - ) + args = genesis._parse_args(["--node", str(self.n1), str(self.n2), *self.common]) self.assertEqual(args.node, [self.n1, self.n2]) def test_repeated_flag_accumulates(self):