From 70659a6b0247b21239f150e889491c9a80f2afa7 Mon Sep 17 00:00:00 2001 From: Patrick Lee Scott Date: Fri, 29 May 2026 17:29:35 -0500 Subject: [PATCH] feat(local): per-provider DRCs, `hops local doctor`, and global --context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Split the shared `local-dev` DeploymentRuntimeConfig into per-provider DRCs (local-dev-kubernetes, local-dev-helm), each with its own cluster-admin ServiceAccount + ClusterRoleBinding, and point each provider's runtimeConfigRef at its own DRC. A shared DRC let one provider's runtime image/SA silently clobber the other's pod. Add `hops local doctor`: verifies what `hops local start` set up — crossplane, both providers (installed / healthy / runtimeConfigRef pinned to its own DRC / DRC present / cluster-admin binding / ProviderConfig) and the registry — and reports drift with a non-zero exit + remediation. Catches a provider whose runtimeConfigRef reverted to `default`, dropping its cluster-admin SA (which breaks observing XRs through the in-cluster ProviderConfig). Add a global `--context` flag to `hops local` so every subcommand can target a context (e.g. `hops local aws --refresh --profile hops --context colima`), given before or after the subcommand. Plumbs through HOPS_KUBE_CONTEXT_ENV like config/provider install. Co-Authored-By: Claude Opus 4.8 (1M context) --- bootstrap/drc/helm.yaml | 28 ++ bootstrap/drc/kubernetes.yaml | 28 ++ bootstrap/drc/local-dev.yaml | 21 -- bootstrap/providers/provider-helm.yaml | 2 +- bootstrap/providers/provider-kubernetes.yaml | 2 +- src/commands/local/doctor.rs | 327 +++++++++++++++++++ src/commands/local/mod.rs | 17 + src/commands/local/start.rs | 12 +- 8 files changed, 410 insertions(+), 27 deletions(-) create mode 100644 bootstrap/drc/helm.yaml create mode 100644 bootstrap/drc/kubernetes.yaml delete mode 100644 bootstrap/drc/local-dev.yaml create mode 100644 src/commands/local/doctor.rs diff --git a/bootstrap/drc/helm.yaml b/bootstrap/drc/helm.yaml new file mode 100644 index 0000000..0c5c6b0 --- /dev/null +++ b/bootstrap/drc/helm.yaml @@ -0,0 +1,28 @@ +# Per-provider DeploymentRuntimeConfig for provider-helm (local dev). +# +# Each provider gets its OWN uniquely-named DRC + cluster-admin ServiceAccount. +# Providers must NOT share a DRC: a shared runtime config lets one provider's +# image/SA silently clobber the other's pod (and makes drift impossible to +# attribute). This mirrors the per-provider DRCs the +# crossplane-helm-provider-stack composes for remote clusters. +apiVersion: pkg.crossplane.io/v1beta1 +kind: DeploymentRuntimeConfig +metadata: + name: local-dev-helm +spec: + serviceAccountTemplate: + metadata: + name: local-dev-helm +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: local-dev-helm-cluster-admin +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: cluster-admin +subjects: + - kind: ServiceAccount + name: local-dev-helm + namespace: crossplane-system diff --git a/bootstrap/drc/kubernetes.yaml b/bootstrap/drc/kubernetes.yaml new file mode 100644 index 0000000..7ce229d --- /dev/null +++ b/bootstrap/drc/kubernetes.yaml @@ -0,0 +1,28 @@ +# Per-provider DeploymentRuntimeConfig for provider-kubernetes (local dev). +# +# Each provider gets its OWN uniquely-named DRC + cluster-admin ServiceAccount. +# Providers must NOT share a DRC: a shared runtime config lets one provider's +# image/SA silently clobber the other's pod (and makes drift impossible to +# attribute). This mirrors the per-provider DRCs the +# crossplane-kubernetes-provider-stack composes for remote clusters. +apiVersion: pkg.crossplane.io/v1beta1 +kind: DeploymentRuntimeConfig +metadata: + name: local-dev-kubernetes +spec: + serviceAccountTemplate: + metadata: + name: local-dev-kubernetes +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: local-dev-kubernetes-cluster-admin +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: cluster-admin +subjects: + - kind: ServiceAccount + name: local-dev-kubernetes + namespace: crossplane-system diff --git a/bootstrap/drc/local-dev.yaml b/bootstrap/drc/local-dev.yaml deleted file mode 100644 index 59189a4..0000000 --- a/bootstrap/drc/local-dev.yaml +++ /dev/null @@ -1,21 +0,0 @@ -apiVersion: pkg.crossplane.io/v1beta1 -kind: DeploymentRuntimeConfig -metadata: - name: local-dev -spec: - serviceAccountTemplate: - metadata: - name: local-dev ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - name: local-dev-cluster-admin -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: cluster-admin -subjects: - - kind: ServiceAccount - name: local-dev - namespace: crossplane-system diff --git a/bootstrap/providers/provider-helm.yaml b/bootstrap/providers/provider-helm.yaml index 42dba53..6eb6449 100644 --- a/bootstrap/providers/provider-helm.yaml +++ b/bootstrap/providers/provider-helm.yaml @@ -5,4 +5,4 @@ metadata: spec: package: xpkg.crossplane.io/crossplane-contrib/provider-helm:v1.1.0 runtimeConfigRef: - name: local-dev + name: local-dev-helm diff --git a/bootstrap/providers/provider-kubernetes.yaml b/bootstrap/providers/provider-kubernetes.yaml index 9e7a2dc..e89eedd 100644 --- a/bootstrap/providers/provider-kubernetes.yaml +++ b/bootstrap/providers/provider-kubernetes.yaml @@ -5,4 +5,4 @@ metadata: spec: package: xpkg.crossplane.io/crossplane-contrib/provider-kubernetes:v1.2.0 runtimeConfigRef: - name: local-dev + name: local-dev-kubernetes diff --git a/src/commands/local/doctor.rs b/src/commands/local/doctor.rs new file mode 100644 index 0000000..979a445 --- /dev/null +++ b/src/commands/local/doctor.rs @@ -0,0 +1,327 @@ +use super::run_cmd_output; +use std::error::Error; + +/// `hops local doctor` — verify what `hops local start` set up on the current +/// cluster and surface drift. +/// +/// The motivating failure: a provider's `runtimeConfigRef` can silently revert +/// to `default` (e.g. when a `dependsOn` re-resolution re-owns the Provider), +/// dropping the pinned cluster-admin ServiceAccount. The provider then can't +/// observe XRs through the in-cluster `default` ProviderConfig, breaking the +/// consumer-Observe pattern with no obvious signal. This command makes that +/// drift (and missing DRCs / bindings / ProviderConfigs) visible. +pub fn run() -> Result<(), Box> { + match std::env::var(super::HOPS_KUBE_CONTEXT_ENV) { + Ok(ctx) if !ctx.is_empty() => { + log::info!("Checking local cluster setup (context: {})...", ctx) + } + _ => log::info!("Checking local cluster setup (current kube context)..."), + } + + let mut d = Doctor::new(); + + d.section("Crossplane"); + let xp = deployment_available("crossplane-system", "crossplane"); + d.check( + "crossplane deployment Available", + xp, + if xp { + String::new() + } else { + "crossplane is not Available in crossplane-system".into() + }, + ); + + // The two providers `hops local start` bootstraps, each with its OWN + // per-provider cluster-admin DRC (never shared). + check_provider( + &mut d, + &ProviderExpectation { + title: "provider-kubernetes", + provider_name: "crossplane-contrib-provider-kubernetes", + drc: "local-dev-kubernetes", + binding: "local-dev-kubernetes-cluster-admin", + sa: "local-dev-kubernetes", + pc_resource: "providerconfig.kubernetes.m.crossplane.io", + pc_name: "default", + pc_namespace: "default", + }, + ); + check_provider( + &mut d, + &ProviderExpectation { + title: "provider-helm", + provider_name: "crossplane-contrib-provider-helm", + drc: "local-dev-helm", + binding: "local-dev-helm-cluster-admin", + sa: "local-dev-helm", + pc_resource: "providerconfig.helm.m.crossplane.io", + pc_name: "default", + pc_namespace: "default", + }, + ); + + d.section("Registry"); + let reg = deployment_available("crossplane-system", "registry"); + d.check( + "local package registry Available", + reg, + if reg { + String::new() + } else { + "registry deployment not Available in crossplane-system".into() + }, + ); + + d.print(); + + if d.ok() { + Ok(()) + } else { + Err(format!( + "{} check(s) failed. If a provider drifted off its DRC, re-run `hops local start` to re-apply the bootstrap.", + d.failed_count() + ) + .into()) + } +} + +/// What a bootstrapped provider should look like once `hops local start` ran. +struct ProviderExpectation<'a> { + title: &'a str, + provider_name: &'a str, + drc: &'a str, + binding: &'a str, + sa: &'a str, + pc_resource: &'a str, + pc_name: &'a str, + pc_namespace: &'a str, +} + +fn check_provider(d: &mut Doctor, e: &ProviderExpectation) { + d.section(e.title); + + if !exists(&["get", "provider.pkg.crossplane.io", e.provider_name]) { + d.check( + "Provider installed", + false, + format!("Provider '{}' not found", e.provider_name), + ); + return; + } + + let installed = provider_condition(e.provider_name, "Installed"); + d.check( + "Provider installed", + installed == "True", + cond_detail("Installed", &installed), + ); + + let healthy = provider_condition(e.provider_name, "Healthy"); + d.check( + "Provider healthy", + healthy == "True", + cond_detail("Healthy", &healthy), + ); + + // Drift detector: the Provider must point at its OWN per-provider DRC. + let rc = jsonpath(&[ + "get", + "provider.pkg.crossplane.io", + e.provider_name, + "-o", + "jsonpath={.spec.runtimeConfigRef.name}", + ]) + .unwrap_or_default(); + let rc_ok = rc == e.drc; + d.check( + "runtimeConfigRef pinned to its own DRC", + rc_ok, + if rc_ok { + String::new() + } else { + format!( + "runtimeConfigRef is \"{}\" (expected \"{}\") — drifted; provider lacks its cluster-admin ServiceAccount and cannot observe XRs", + if rc.is_empty() { "" } else { &rc }, + e.drc + ) + }, + ); + + let drc_ok = exists(&["get", "deploymentruntimeconfig", e.drc]); + d.check( + "DeploymentRuntimeConfig present", + drc_ok, + if drc_ok { + String::new() + } else { + format!("DeploymentRuntimeConfig \"{}\" missing", e.drc) + }, + ); + + // cluster-admin ClusterRoleBinding bound to the pinned SA. + let role = jsonpath(&[ + "get", + "clusterrolebinding", + e.binding, + "-o", + "jsonpath={.roleRef.name}", + ]); + let subjects = jsonpath(&[ + "get", + "clusterrolebinding", + e.binding, + "-o", + "jsonpath={.subjects[?(@.kind==\"ServiceAccount\")].name}", + ]); + let binding_ok = role.as_deref() == Some("cluster-admin") + && subjects + .as_deref() + .map(|s| s.split_whitespace().any(|n| n == e.sa)) + .unwrap_or(false); + d.check( + "cluster-admin binding -> pinned SA", + binding_ok, + if binding_ok { + String::new() + } else { + format!( + "ClusterRoleBinding \"{}\" missing or not binding cluster-admin to ServiceAccount \"{}\"", + e.binding, e.sa + ) + }, + ); + + let pc_ok = exists(&[ + "get", + e.pc_resource, + e.pc_name, + "-n", + e.pc_namespace, + ]); + d.check( + "ProviderConfig present", + pc_ok, + if pc_ok { + String::new() + } else { + format!("{}/{} missing in namespace {}", e.pc_resource, e.pc_name, e.pc_namespace) + }, + ); +} + +fn provider_condition(provider: &str, cond: &str) -> String { + jsonpath(&[ + "get", + "provider.pkg.crossplane.io", + provider, + "-o", + &format!("jsonpath={{.status.conditions[?(@.type==\"{}\")].status}}", cond), + ]) + .unwrap_or_default() +} + +fn cond_detail(cond: &str, status: &str) -> String { + if status == "True" { + String::new() + } else { + format!("{}={}", cond, if status.is_empty() { "" } else { status }) + } +} + +/// True when `kubectl get ` finds the resource. Uses `--ignore-not-found` +/// so a missing resource is `false` rather than an error. +fn exists(get_args: &[&str]) -> bool { + let mut args = get_args.to_vec(); + args.extend_from_slice(&["--ignore-not-found", "-o", "name"]); + run_cmd_output("kubectl", &args) + .map(|s| !s.trim().is_empty()) + .unwrap_or(false) +} + +/// Run a kubectl query, returning the trimmed stdout or `None` if kubectl errors +/// (e.g. the resource does not exist). +fn jsonpath(args: &[&str]) -> Option { + run_cmd_output("kubectl", args) + .ok() + .map(|s| s.trim().to_string()) +} + +fn deployment_available(ns: &str, name: &str) -> bool { + jsonpath(&[ + "get", + "deployment", + name, + "-n", + ns, + "-o", + "jsonpath={.status.conditions[?(@.type==\"Available\")].status}", + ]) + .map(|s| s == "True") + .unwrap_or(false) +} + +enum Entry { + Section(String), + Check { + label: String, + ok: bool, + detail: String, + }, +} + +struct Doctor { + entries: Vec, +} + +impl Doctor { + fn new() -> Self { + Doctor { + entries: Vec::new(), + } + } + + fn section(&mut self, title: &str) { + self.entries.push(Entry::Section(title.to_string())); + } + + fn check(&mut self, label: &str, ok: bool, detail: String) { + self.entries.push(Entry::Check { + label: label.to_string(), + ok, + detail, + }); + } + + fn failed_count(&self) -> usize { + self.entries + .iter() + .filter(|e| matches!(e, Entry::Check { ok: false, .. })) + .count() + } + + fn ok(&self) -> bool { + self.failed_count() == 0 + } + + fn print(&self) { + for entry in &self.entries { + match entry { + Entry::Section(title) => println!("\n{}", title), + Entry::Check { label, ok, detail } => { + let mark = if *ok { "✓" } else { "✗" }; + if detail.is_empty() { + println!(" {} {}", mark, label); + } else { + println!(" {} {} — {}", mark, label, detail); + } + } + } + } + if self.ok() { + println!("\nAll checks passed."); + } else { + println!("\n{} issue(s) found.", self.failed_count()); + } + } +} diff --git a/src/commands/local/mod.rs b/src/commands/local/mod.rs index 9d7a665..0fa9967 100644 --- a/src/commands/local/mod.rs +++ b/src/commands/local/mod.rs @@ -1,5 +1,6 @@ mod aws; mod destroy; +mod doctor; mod github; mod install; mod listmonk; @@ -50,6 +51,12 @@ pub fn kubectl_command(args: &[&str]) -> Command { pub struct LocalArgs { #[command(subcommand)] pub command: LocalCommands, + + /// Kubernetes context to use for all kubectl commands (e.g. "colima"). + /// Global: applies to every `hops local` subcommand and may be given before + /// or after the subcommand. + #[arg(long, global = true)] + pub context: Option, } #[derive(Subcommand, Debug)] @@ -60,6 +67,8 @@ pub enum LocalCommands { Reset, /// Start local k8s cluster with Crossplane and providers Start, + /// Check what `hops local start` set up and report drift + Doctor, /// Configure crossplane-contrib provider-family-aws and AWS ProviderConfig Aws(aws::AwsArgs), /// Configure crossplane-contrib provider-upjet-github and GitHub ProviderConfig @@ -77,10 +86,18 @@ pub enum LocalCommands { } pub fn run(args: &LocalArgs) -> Result<(), Box> { + // Plumb --context through the same env channel the kubectl helpers read, so + // every subcommand's kubectl calls target the chosen context. + if let Some(ctx) = &args.context { + if !ctx.is_empty() { + std::env::set_var(HOPS_KUBE_CONTEXT_ENV, ctx); + } + } match &args.command { LocalCommands::Install => install::run(), LocalCommands::Reset => reset::run(), LocalCommands::Start => start::run(), + LocalCommands::Doctor => doctor::run(), LocalCommands::Aws(aws_args) => aws::run(aws_args), LocalCommands::Github(github_args) => github::run(github_args), LocalCommands::Zitadel(zitadel_args) => zitadel::run(zitadel_args), diff --git a/src/commands/local/start.rs b/src/commands/local/start.rs index ee7cab4..d0a4d8c 100644 --- a/src/commands/local/start.rs +++ b/src/commands/local/start.rs @@ -5,7 +5,10 @@ use std::process::{Command, Stdio}; use std::thread; use std::time::Duration; -const DRC: &str = include_str!("../../../bootstrap/drc/local-dev.yaml"); +// Per-provider DRCs — never shared. Each pins its own cluster-admin SA so the +// providers can never clobber each other's runtime config. See bootstrap/drc/. +const DRC_K8S: &str = include_str!("../../../bootstrap/drc/kubernetes.yaml"); +const DRC_HELM: &str = include_str!("../../../bootstrap/drc/helm.yaml"); const PROVIDER_HELM: &str = include_str!("../../../bootstrap/providers/provider-helm.yaml"); const PROVIDER_K8S: &str = include_str!("../../../bootstrap/providers/provider-kubernetes.yaml"); const PC_HELM: &str = include_str!("../../../bootstrap/helm/pc.yaml"); @@ -78,9 +81,10 @@ pub fn run() -> Result<(), Box> { log::info!("Waiting for Crossplane to be ready..."); wait_for_deployment("crossplane-system", "crossplane")?; - // 7. Deploy DRC (cluster-admin SA for provider pods) - log::info!("Applying DeploymentRuntimeConfig..."); - kubectl_apply_stdin(DRC)?; + // 7. Deploy per-provider DRCs (each pins its own cluster-admin SA) + log::info!("Applying DeploymentRuntimeConfigs (per-provider)..."); + kubectl_apply_stdin(DRC_K8S)?; + kubectl_apply_stdin(DRC_HELM)?; // 8. Install providers log::info!("Installing providers...");