From 897d2d6347a85c61635a0dfda855354e0409a171 Mon Sep 17 00:00:00 2001 From: bdchatham Date: Tue, 21 Apr 2026 09:56:08 -0700 Subject: [PATCH] feat: SeiNode import existing PVC + ensure-data-pvc create-path fix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements the direction + LLD merged in #106 and #112. New spec field spec.dataVolume.import.pvcName (optional, immutable, DNS1123-validated) lets a SeiNode adopt a pre-existing PVC instead of provisioning one. The controller validates the referenced PVC against seven requirements (exists, not terminating, Bound, RWO, capacity >= mode default, underlying PV capacity consistent, PV not Failed) and never mutates it. Transient failures loop at the executor's TaskPollInterval; unrecoverable failures mark the plan Failed with a specific Reason. Tracked via the new ImportPVCReady Condition on SeiNode status. Reason strings are treated as a public alerting contract — renames are breaking changes. ensure-data-pvc now uses Get-then-Create on the create path. If a PVC with the expected name exists and is NOT controlled by this SeiNode, the task fails terminally (this is the #104 fix). If it IS controlled, treat as Complete (crash-recovery idempotence). Finalizer (deleteNodeDataPVC) skips PVC deletion when import is set — operator owns the storage lifecycle for imported volumes. RBAC adds get/list/watch on persistentvolumes for the PV consistency checks; no write verbs. Tests: 18 unit tests in internal/task/ensure_pvc_test.go covering create-path happy/crash-recovery/non-controlled-conflict and each of the seven import-path validation requirements in both transient and terminal classification. 5 integration tests in internal/controller/node/import_pvc_test.go covering end-to-end import reaches-Running, late-PVC-converges, terminal-marks-Failed, deletion-preserves-imported-PVC, and a regression guard for the non-import deletion path. Deferred to a follow-up: metrics and events from LLD §8. The feature is functionally complete without them; separate PR to keep this one focused. Closes #105. Co-Authored-By: Claude Opus 4.7 (1M context) --- api/v1alpha1/seinode_types.go | 53 +++ api/v1alpha1/zz_generated.deepcopy.go | 40 ++ config/crd/sei.io_seinodedeployments.yaml | 31 ++ config/crd/sei.io_seinodes.yaml | 31 ++ config/rbac/role.yaml | 1 + internal/controller/node/controller.go | 53 +++ internal/controller/node/import_pvc_test.go | 222 +++++++++ internal/planner/planner.go | 103 +++++ internal/task/ensure_pvc.go | 218 ++++++++- internal/task/ensure_pvc_test.go | 474 ++++++++++++++++++++ manifests/role.yaml | 1 + manifests/sei.io_seinodedeployments.yaml | 31 ++ manifests/sei.io_seinodes.yaml | 31 ++ 13 files changed, 1284 insertions(+), 5 deletions(-) create mode 100644 internal/controller/node/import_pvc_test.go create mode 100644 internal/task/ensure_pvc_test.go diff --git a/api/v1alpha1/seinode_types.go b/api/v1alpha1/seinode_types.go index 048a89a..c37bb33 100644 --- a/api/v1alpha1/seinode_types.go +++ b/api/v1alpha1/seinode_types.go @@ -47,6 +47,12 @@ type SeiNodeSpec struct { // +optional PodLabels map[string]string `json:"podLabels,omitempty"` + // DataVolume configures the data PersistentVolumeClaim for this node. + // When omitted, the controller creates a PVC using the node's mode-default + // storage class and size (see noderesource.DefaultStorageForMode). + // +optional + DataVolume *DataVolumeSpec `json:"dataVolume,omitempty"` + // --- Mode-specific sub-specs (exactly one must be set) --- // FullNode configures a chain-following full node (absorbs the "rpc" role). @@ -66,6 +72,31 @@ type SeiNodeSpec struct { Validator *ValidatorSpec `json:"validator,omitempty"` } +// DataVolumeSpec configures how the data PVC is sourced. +type DataVolumeSpec struct { + // Import references a pre-existing PersistentVolumeClaim in the same + // namespace as the SeiNode, instead of creating a new one. The + // controller validates the referenced PVC but never mutates it. + // + // When Import is set, the controller never deletes the referenced PVC + // on SeiNode deletion — storage lifecycle is the operator's responsibility. + // +optional + Import *DataVolumeImport `json:"import,omitempty"` +} + +// DataVolumeImport names a pre-existing PVC to adopt as this node's data volume. +type DataVolumeImport struct { + // PVCName is the name of a PersistentVolumeClaim in the SeiNode's + // namespace. The PVC must be Bound, ReadWriteOnce, and sized at or above + // the node mode's default storage size. Immutable after creation. + // + // +kubebuilder:validation:MinLength=1 + // +kubebuilder:validation:MaxLength=253 + // +kubebuilder:validation:Pattern=`^[a-z0-9]([-a-z0-9]*[a-z0-9])?$` + // +kubebuilder:validation:XValidation:rule="self == oldSelf",message="pvcName is immutable" + PVCName string `json:"pvcName"` +} + // SnapshotSource returns the SnapshotSource from whichever mode sub-spec is // populated, or nil if no snapshot is configured. Archive nodes always return // nil because they use state sync (configured internally by the planner). @@ -209,6 +240,28 @@ const ( const ( // ConditionNodeUpdateInProgress indicates an image update is being rolled out. ConditionNodeUpdateInProgress = "NodeUpdateInProgress" + + // ConditionImportPVCReady indicates whether an imported data PVC passes all + // validation requirements. Only set on SeiNodes with spec.dataVolume.import. + ConditionImportPVCReady = "ImportPVCReady" +) + +// Reasons for the ImportPVCReady condition. These strings form a public +// alerting contract: Prometheus alerts, audit tools, and operator scripts +// may key on these exact values. Renaming or removing a reason is a breaking +// change and requires a deprecation window. Adding a new reason is additive +// and backward compatible. +const ( + ReasonImportValidated = "PVCValidated" + ReasonImportPVCNotFound = "PVCNotFound" + ReasonImportPVCTerminating = "PVCTerminating" + ReasonImportPVCNotBound = "PVCNotBound" // Pending/Released + ReasonImportPVCLost = "PVCLost" // terminal + ReasonImportAccessModeInvalid = "AccessModeInvalid" + ReasonImportCapacityTooSmall = "CapacityTooSmall" + ReasonImportPVMissing = "UnderlyingPVMissing" + ReasonImportPVCapacityMismatch = "UnderlyingPVCapacityMismatch" + ReasonImportPVFailed = "UnderlyingPVFailed" ) // SeiNodeStatus defines the observed state of a SeiNode. diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index c403767..8c3fa14 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -31,6 +31,41 @@ func (in *ArchiveSpec) DeepCopy() *ArchiveSpec { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *DataVolumeImport) DeepCopyInto(out *DataVolumeImport) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DataVolumeImport. +func (in *DataVolumeImport) DeepCopy() *DataVolumeImport { + if in == nil { + return nil + } + out := new(DataVolumeImport) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *DataVolumeSpec) DeepCopyInto(out *DataVolumeSpec) { + *out = *in + if in.Import != nil { + in, out := &in.Import, &out.Import + *out = new(DataVolumeImport) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DataVolumeSpec. +func (in *DataVolumeSpec) DeepCopy() *DataVolumeSpec { + if in == nil { + return nil + } + out := new(DataVolumeSpec) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *EC2TagsPeerSource) DeepCopyInto(out *EC2TagsPeerSource) { *out = *in @@ -725,6 +760,11 @@ func (in *SeiNodeSpec) DeepCopyInto(out *SeiNodeSpec) { (*out)[key] = val } } + if in.DataVolume != nil { + in, out := &in.DataVolume, &out.DataVolume + *out = new(DataVolumeSpec) + (*in).DeepCopyInto(*out) + } if in.FullNode != nil { in, out := &in.FullNode, &out.FullNode *out = new(FullNodeSpec) diff --git a/config/crd/sei.io_seinodedeployments.yaml b/config/crd/sei.io_seinodedeployments.yaml index 023e679..5f20a7c 100644 --- a/config/crd/sei.io_seinodedeployments.yaml +++ b/config/crd/sei.io_seinodedeployments.yaml @@ -257,6 +257,37 @@ spec: description: ChainID of the chain this node belongs to. minLength: 1 type: string + dataVolume: + description: |- + DataVolume configures the data PersistentVolumeClaim for this node. + When omitted, the controller creates a PVC using the node's mode-default + storage class and size (see noderesource.DefaultStorageForMode). + properties: + import: + description: |- + Import references a pre-existing PersistentVolumeClaim in the same + namespace as the SeiNode, instead of creating a new one. The + controller validates the referenced PVC but never mutates it. + + When Import is set, the controller never deletes the referenced PVC + on SeiNode deletion — storage lifecycle is the operator's responsibility. + properties: + pvcName: + description: |- + PVCName is the name of a PersistentVolumeClaim in the SeiNode's + namespace. The PVC must be Bound, ReadWriteOnce, and sized at or above + the node mode's default storage size. Immutable after creation. + maxLength: 253 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + x-kubernetes-validations: + - message: pvcName is immutable + rule: self == oldSelf + required: + - pvcName + type: object + type: object entrypoint: description: Entrypoint overrides the image command for the running node process. diff --git a/config/crd/sei.io_seinodes.yaml b/config/crd/sei.io_seinodes.yaml index 2e52a3f..9e5d664 100644 --- a/config/crd/sei.io_seinodes.yaml +++ b/config/crd/sei.io_seinodes.yaml @@ -87,6 +87,37 @@ spec: description: ChainID of the chain this node belongs to. minLength: 1 type: string + dataVolume: + description: |- + DataVolume configures the data PersistentVolumeClaim for this node. + When omitted, the controller creates a PVC using the node's mode-default + storage class and size (see noderesource.DefaultStorageForMode). + properties: + import: + description: |- + Import references a pre-existing PersistentVolumeClaim in the same + namespace as the SeiNode, instead of creating a new one. The + controller validates the referenced PVC but never mutates it. + + When Import is set, the controller never deletes the referenced PVC + on SeiNode deletion — storage lifecycle is the operator's responsibility. + properties: + pvcName: + description: |- + PVCName is the name of a PersistentVolumeClaim in the SeiNode's + namespace. The PVC must be Bound, ReadWriteOnce, and sized at or above + the node mode's default storage size. Immutable after creation. + maxLength: 253 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + x-kubernetes-validations: + - message: pvcName is immutable + rule: self == oldSelf + required: + - pvcName + type: object + type: object entrypoint: description: Entrypoint overrides the image command for the running node process. diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 99209e7..6b10c4d 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -27,6 +27,7 @@ rules: - apiGroups: - "" resources: + - persistentvolumes - pods verbs: - get diff --git a/internal/controller/node/controller.go b/internal/controller/node/controller.go index 7020ddd..eb1abe6 100644 --- a/internal/controller/node/controller.go +++ b/internal/controller/node/controller.go @@ -53,6 +53,7 @@ type SeiNodeReconciler struct { // +kubebuilder:rbac:groups=batch,resources=jobs,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups="",resources=services,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups="",resources=persistentvolumeclaims,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups="",resources=persistentvolumes,verbs=get;list;watch // +kubebuilder:rbac:groups="",resources=pods,verbs=get;list;watch // +kubebuilder:rbac:groups="",resources=events,verbs=create;patch @@ -122,6 +123,16 @@ func (r *SeiNodeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct statusDirty = true } + // Sync the ImportPVCReady condition from the current ensure-data-pvc + // task state, before flushing status. No-op when import is not configured. + // If the condition list mutates and no other status changes occurred, the + // patch still covers it because the condition is part of status. + condBefore := conditionsSnapshot(node) + planner.ReconcileImportPVCCondition(node) + if !statusDirty && !conditionsEqual(condBefore, conditionsSnapshot(node)) { + statusDirty = true + } + if statusDirty { if err := r.Status().Patch(ctx, node, statusBase); err != nil { if execErr != nil { @@ -216,6 +227,14 @@ func (r *SeiNodeReconciler) handleNodeDeletion(ctx context.Context, node *seiv1a } func (r *SeiNodeReconciler) deleteNodeDataPVC(ctx context.Context, node *seiv1alpha1.SeiNode) error { + // Imported PVCs are managed externally — never delete them. + if node.Spec.DataVolume != nil && node.Spec.DataVolume.Import != nil && + node.Spec.DataVolume.Import.PVCName != "" { + log.FromContext(ctx).Info("skipping data PVC delete for imported volume", + "pvc", node.Spec.DataVolume.Import.PVCName) + return nil + } + pvc := &corev1.PersistentVolumeClaim{} err := r.Get(ctx, types.NamespacedName{Name: noderesource.DataPVCName(node), Namespace: node.Namespace}, pvc) if apierrors.IsNotFound(err) { @@ -226,3 +245,37 @@ func (r *SeiNodeReconciler) deleteNodeDataPVC(ctx context.Context, node *seiv1al } return r.Delete(ctx, pvc) } + +// conditionsSnapshot returns a compact comparable representation of the +// node's current conditions for change detection prior to status flush. +type conditionKey struct { + Type string + Status string + Reason string + Message string +} + +func conditionsSnapshot(node *seiv1alpha1.SeiNode) []conditionKey { + out := make([]conditionKey, len(node.Status.Conditions)) + for i, c := range node.Status.Conditions { + out[i] = conditionKey{ + Type: c.Type, + Status: string(c.Status), + Reason: c.Reason, + Message: c.Message, + } + } + return out +} + +func conditionsEqual(a, b []conditionKey) bool { + if len(a) != len(b) { + return false + } + for i := range a { + if a[i] != b[i] { + return false + } + } + return true +} diff --git a/internal/controller/node/import_pvc_test.go b/internal/controller/node/import_pvc_test.go new file mode 100644 index 0000000..0796941 --- /dev/null +++ b/internal/controller/node/import_pvc_test.go @@ -0,0 +1,222 @@ +package node + +import ( + "context" + "testing" + + . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + + seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1" +) + +// importTestNode returns a SeiNode wired for importing a pre-existing PVC. +func importTestNode(name, namespace, pvcName string) *seiv1alpha1.SeiNode { //nolint:unparam // test helper designed for reuse + n := newSnapshotNode(name, namespace) + n.Spec.DataVolume = &seiv1alpha1.DataVolumeSpec{ + Import: &seiv1alpha1.DataVolumeImport{PVCName: pvcName}, + } + return n +} + +// boundPVC returns a validly-configured PVC for import tests. +func boundPVC(name, namespace, capacity string) *corev1.PersistentVolumeClaim { //nolint:unparam // test helper designed for reuse + return &corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: namespace}, + Spec: corev1.PersistentVolumeClaimSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, + VolumeName: "pv-" + name, + }, + Status: corev1.PersistentVolumeClaimStatus{ + Phase: corev1.ClaimBound, + Capacity: corev1.ResourceList{ + corev1.ResourceStorage: resource.MustParse(capacity), + }, + }, + } +} + +// boundPV returns a matching PV. +func boundPV(name, capacity string) *corev1.PersistentVolume { + return &corev1.PersistentVolume{ + ObjectMeta: metav1.ObjectMeta{Name: name}, + Spec: corev1.PersistentVolumeSpec{ + Capacity: corev1.ResourceList{corev1.ResourceStorage: resource.MustParse(capacity)}, + }, + Status: corev1.PersistentVolumeStatus{Phase: corev1.VolumeBound}, + } +} + +func TestController_Import_Happy_ReachesRunning(t *testing.T) { + g := NewWithT(t) + + node := importTestNode("imp-0", "default", "external-data-0") + pvc := boundPVC("external-data-0", "default", "2000Gi") + pv := boundPV("pv-external-data-0", "2000Gi") + + mock := &mockSidecarClient{} + r, c := newProgressionReconciler(t, mock, node, pvc, pv) + + fetch := func() *seiv1alpha1.SeiNode { + return fetchNode(t, c, node.Name, node.Namespace) + } + + // Reconcile 1: finalizer + build plan. + reconcileOnce(t, g, r, node.Name, node.Namespace) + n := fetch() + g.Expect(n.Status.Plan).NotTo(BeNil()) + + // Reconcile 2: drives import validation to completion and advances + // through the remaining infrastructure tasks. + reconcileOnce(t, g, r, node.Name, node.Namespace) + n = fetch() + cond := meta.FindStatusCondition(n.Status.Conditions, seiv1alpha1.ConditionImportPVCReady) + g.Expect(cond).NotTo(BeNil(), "ImportPVCReady condition should be set after validation") + g.Expect(cond.Status).To(Equal(metav1.ConditionTrue)) + g.Expect(cond.Reason).To(Equal(seiv1alpha1.ReasonImportValidated)) + + // Drive the sidecar progression through to completion. + driveTask(t, g, r, mock, fetch, "snapshot-restore") + driveTask(t, g, r, mock, fetch, "configure-genesis") + driveTask(t, g, r, mock, fetch, "config-apply") + driveTask(t, g, r, mock, fetch, "configure-state-sync") + + updated := fetch() + g.Expect(updated.Status.Phase).To(Equal(seiv1alpha1.PhaseRunning)) + + // Condition stays True once init completes (plan goes away). + cond = meta.FindStatusCondition(updated.Status.Conditions, seiv1alpha1.ConditionImportPVCReady) + g.Expect(cond).NotTo(BeNil()) + g.Expect(cond.Status).To(Equal(metav1.ConditionTrue)) +} + +func TestController_Import_LatePVC_Converges(t *testing.T) { + g := NewWithT(t) + ctx := context.Background() + + node := importTestNode("imp-late", "default", "late-pvc") + + mock := &mockSidecarClient{} + r, c := newProgressionReconciler(t, mock, node) + + fetch := func() *seiv1alpha1.SeiNode { + return fetchNode(t, c, node.Name, node.Namespace) + } + + // Reconcile 1: finalizer + build plan. Reconcile 2: first validation pass + // sees no PVC → transient, condition goes False/PVCNotFound. + reconcileOnce(t, g, r, node.Name, node.Namespace) + reconcileOnce(t, g, r, node.Name, node.Namespace) + + n := fetch() + g.Expect(n.Status.Phase).To(Equal(seiv1alpha1.PhaseInitializing)) + cond := meta.FindStatusCondition(n.Status.Conditions, seiv1alpha1.ConditionImportPVCReady) + g.Expect(cond).NotTo(BeNil(), "ImportPVCReady condition should be set while stuck") + g.Expect(cond.Status).To(Equal(metav1.ConditionFalse)) + g.Expect(cond.Reason).To(Equal(seiv1alpha1.ReasonImportPVCNotFound)) + + // Operator creates the PVC and PV out-of-band. + g.Expect(c.Create(ctx, boundPVC("late-pvc", "default", "2000Gi"))).To(Succeed()) + g.Expect(c.Create(ctx, boundPV("pv-late-pvc", "2000Gi"))).To(Succeed()) + + // Next reconcile: validation passes, task completes, plan advances. + reconcileOnce(t, g, r, node.Name, node.Namespace) + + n = fetch() + cond = meta.FindStatusCondition(n.Status.Conditions, seiv1alpha1.ConditionImportPVCReady) + g.Expect(cond).NotTo(BeNil()) + g.Expect(cond.Status).To(Equal(metav1.ConditionTrue)) + g.Expect(cond.Reason).To(Equal(seiv1alpha1.ReasonImportValidated)) +} + +func TestController_Import_TerminalFailure_MarksFailed(t *testing.T) { + g := NewWithT(t) + + node := importTestNode("imp-bad", "default", "bad-access") + pvc := boundPVC("bad-access", "default", "2000Gi") + pvc.Spec.AccessModes = []corev1.PersistentVolumeAccessMode{corev1.ReadOnlyMany} + + mock := &mockSidecarClient{} + r, c := newProgressionReconciler(t, mock, node, pvc) + + fetch := func() *seiv1alpha1.SeiNode { + return fetchNode(t, c, node.Name, node.Namespace) + } + + // Reconcile 1: build plan. Reconcile 2: execute ensure-data-pvc, + // which returns Terminal and fails the plan. Reconcile 3: planner + // observes the failed plan and clears it. + reconcileOnce(t, g, r, node.Name, node.Namespace) + reconcileOnce(t, g, r, node.Name, node.Namespace) + + n := fetch() + g.Expect(n.Status.Phase).To(Equal(seiv1alpha1.PhaseFailed)) + g.Expect(n.Status.Plan).NotTo(BeNil()) + g.Expect(n.Status.Plan.Phase).To(Equal(seiv1alpha1.TaskPlanFailed)) + g.Expect(n.Status.Plan.FailedTaskDetail).NotTo(BeNil()) + g.Expect(n.Status.Plan.FailedTaskDetail.Error).To(ContainSubstring(seiv1alpha1.ReasonImportAccessModeInvalid)) + + cond := meta.FindStatusCondition(n.Status.Conditions, seiv1alpha1.ConditionImportPVCReady) + g.Expect(cond).NotTo(BeNil(), "condition should reflect the terminal failure") + g.Expect(cond.Status).To(Equal(metav1.ConditionFalse)) + g.Expect(cond.Reason).To(Equal(seiv1alpha1.ReasonImportAccessModeInvalid)) +} + +func TestController_Import_Deletion_PreservesPVC(t *testing.T) { + g := NewWithT(t) + ctx := context.Background() + + node := importTestNode("imp-del", "default", "preserve-me") + node.Finalizers = []string{nodeFinalizerName} + pvc := boundPVC("preserve-me", "default", "2000Gi") + + mock := &mockSidecarClient{} + r, c := newProgressionReconciler(t, mock, node, pvc) + + g.Expect(c.Delete(ctx, node)).To(Succeed()) + // Re-fetch after delete so DeletionTimestamp is populated. + _ = c.Get(ctx, types.NamespacedName{Name: node.Name, Namespace: node.Namespace}, node) + + _, err := r.Reconcile(ctx, nodeReqFor(node.Name, node.Namespace)) + g.Expect(err).NotTo(HaveOccurred()) + + // PVC must still exist — imported PVCs are never deleted by the controller. + remaining := &corev1.PersistentVolumeClaim{} + err = c.Get(ctx, types.NamespacedName{Name: "preserve-me", Namespace: "default"}, remaining) + g.Expect(err).NotTo(HaveOccurred(), "imported PVC must be preserved across SeiNode deletion") +} + +func TestController_NoImport_Deletion_DeletesPVC(t *testing.T) { + g := NewWithT(t) + ctx := context.Background() + + // Node without dataVolume.import — standard create-path; PVC should be + // cleaned up by the finalizer on deletion. + node := newSnapshotNode("noimp-del", "default") + node.Finalizers = []string{nodeFinalizerName} + + pvc := &corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "data-noimp-del", + Namespace: "default", + }, + } + + mock := &mockSidecarClient{} + r, c := newProgressionReconciler(t, mock, node, pvc) + + g.Expect(c.Delete(ctx, node)).To(Succeed()) + _ = c.Get(ctx, types.NamespacedName{Name: node.Name, Namespace: node.Namespace}, node) + + _, err := r.Reconcile(ctx, nodeReqFor(node.Name, node.Namespace)) + g.Expect(err).NotTo(HaveOccurred()) + + remaining := &corev1.PersistentVolumeClaim{} + err = c.Get(ctx, types.NamespacedName{Name: "data-noimp-del", Namespace: "default"}, remaining) + g.Expect(apierrors.IsNotFound(err)).To(BeTrue(), "non-imported PVC should be deleted") +} diff --git a/internal/planner/planner.go b/internal/planner/planner.go index 83039bf..6504a26 100644 --- a/internal/planner/planner.go +++ b/internal/planner/planner.go @@ -6,6 +6,7 @@ import ( "fmt" "maps" "slices" + "strings" "time" "github.com/google/uuid" @@ -196,6 +197,108 @@ func setNodeUpdateCondition(node *seiv1alpha1.SeiNode, status metav1.ConditionSt }) } +// ReconcileImportPVCCondition synchronizes the ImportPVCReady condition on +// a SeiNode based on the state of its ensure-data-pvc task. Called after +// plan execution and before the status flush. +// +// Behavior: +// - No import configured → condition is removed. +// - ensure-data-pvc task Complete → True/PVCValidated. +// - ensure-data-pvc task Pending with Error set → False + parsed Reason/Message. +// - ensure-data-pvc task Pending without Error → False/PVCValidated-pending (initial state). +// - Task absent (plan already terminal) → existing condition left as-is. +func ReconcileImportPVCCondition(node *seiv1alpha1.SeiNode) { + if node == nil { + return + } + // No import configured — strip the condition if present. + if node.Spec.DataVolume == nil || node.Spec.DataVolume.Import == nil || + node.Spec.DataVolume.Import.PVCName == "" { + meta.RemoveStatusCondition(&node.Status.Conditions, seiv1alpha1.ConditionImportPVCReady) + return + } + + plan := node.Status.Plan + if plan == nil { + // Plan has been consumed (e.g., init complete). Leave existing + // condition as-is — once True on successful init, it stays True. + return + } + + t := findTaskByType(plan, task.TaskTypeEnsureDataPVC) + if t == nil { + // Task absent (non-init plan, e.g., NodeUpdate) — leave as-is. + return + } + + name := node.Spec.DataVolume.Import.PVCName + + switch t.Status { + case seiv1alpha1.TaskComplete: + setImportPVCCondition(node, metav1.ConditionTrue, + seiv1alpha1.ReasonImportValidated, + fmt.Sprintf("PVC %q passes all import requirements", name)) + case seiv1alpha1.TaskFailed: + reason, msg := parseTaskError(t.Error) + if reason == "" { + reason = seiv1alpha1.ReasonImportPVCNotFound + } + if msg == "" { + msg = t.Error + } + setImportPVCCondition(node, metav1.ConditionFalse, reason, msg) + default: // Pending (Running in plan terms) + if t.Error == "" { + // No validation run yet — leave condition in its current state. + // First validation pass will populate Error. + return + } + reason, msg := parseTaskError(t.Error) + if reason == "" { + reason = seiv1alpha1.ReasonImportPVCNotFound + } + if msg == "" { + msg = t.Error + } + setImportPVCCondition(node, metav1.ConditionFalse, reason, msg) + } +} + +// setImportPVCCondition sets or updates the ImportPVCReady condition. +func setImportPVCCondition(node *seiv1alpha1.SeiNode, status metav1.ConditionStatus, reason, message string) { + meta.SetStatusCondition(&node.Status.Conditions, metav1.Condition{ + Type: seiv1alpha1.ConditionImportPVCReady, + Status: status, + Reason: reason, + Message: message, + ObservedGeneration: node.Generation, + }) +} + +// findTaskByType returns the first task of the given type in the plan, or nil. +func findTaskByType(plan *seiv1alpha1.TaskPlan, taskType string) *seiv1alpha1.PlannedTask { + for i := range plan.Tasks { + if plan.Tasks[i].Type == taskType { + return &plan.Tasks[i] + } + } + return nil +} + +// parseTaskError splits a ": " task error string, as written +// by recordTransient/failTask, into its two components. Returns ("", err) when +// the format does not match. +func parseTaskError(err string) (reason, message string) { + if err == "" { + return "", "" + } + r, m, ok := strings.Cut(err, ": ") + if !ok { + return "", err + } + return r, m +} + // classifyPlan returns the plan type for metrics. func classifyPlan(plan *seiv1alpha1.TaskPlan) string { for _, t := range plan.Tasks { diff --git a/internal/task/ensure_pvc.go b/internal/task/ensure_pvc.go index 34c0fdb..da34cf4 100644 --- a/internal/task/ensure_pvc.go +++ b/internal/task/ensure_pvc.go @@ -4,8 +4,13 @@ import ( "context" "encoding/json" "fmt" + "slices" + corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" ctrl "sigs.k8s.io/controller-runtime" seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1" @@ -22,6 +27,15 @@ type EnsureDataPVCParams struct { Namespace string `json:"namespace"` } +// importState classifies the outcome of a single import validation pass. +type importState int + +const ( + importValid importState = iota + importTransient + importTerminal +) + type ensureDataPVCExecution struct { taskBase params EnsureDataPVCParams @@ -42,29 +56,223 @@ func deserializeEnsureDataPVC(id string, params json.RawMessage, cfg ExecutionCo }, nil } +// importPVCName safely extracts the imported PVC name from the SeiNode spec, +// returning "" when no import is configured. +func importPVCName(node *seiv1alpha1.SeiNode) string { + if node.Spec.DataVolume == nil || node.Spec.DataVolume.Import == nil { + return "" + } + return node.Spec.DataVolume.Import.PVCName +} + +// Execute dispatches to the create or import branch depending on the SeiNode's +// dataVolume configuration. func (e *ensureDataPVCExecution) Execute(ctx context.Context) error { node, err := ResourceAs[*seiv1alpha1.SeiNode](e.cfg) if err != nil { - return err + return Terminal(err) } + if name := importPVCName(node); name != "" { + return e.executeImport(ctx, node, name) + } + return e.executeCreate(ctx, node) +} + +// executeCreate is the create-path: Get-then-Create, failing if an +// unexpected PVC already exists. Fixes #104. +func (e *ensureDataPVCExecution) executeCreate(ctx context.Context, node *seiv1alpha1.SeiNode) error { desired := noderesource.GenerateDataPVC(node, e.cfg.Platform) if err := ctrl.SetControllerReference(node, desired, e.cfg.Scheme); err != nil { - return fmt.Errorf("setting owner reference on data PVC: %w", err) + return Terminal(fmt.Errorf("setting owner reference on data PVC: %w", err)) } - if err := e.cfg.KubeClient.Create(ctx, desired); err != nil { - if apierrors.IsAlreadyExists(err) { + existing := &corev1.PersistentVolumeClaim{} + key := types.NamespacedName{Name: desired.Name, Namespace: desired.Namespace} + switch err := e.cfg.KubeClient.Get(ctx, key, existing); { + case apierrors.IsNotFound(err): + // proceed to Create + case err != nil: + return fmt.Errorf("checking for existing data PVC: %w", err) + default: + // PVC exists. Accept if we own it (crash-recovery); else fail. + if metav1.IsControlledBy(existing, node) { e.complete() return nil } - return fmt.Errorf("creating data PVC: %w", err) + return Terminal(fmt.Errorf( + "data PVC %q already exists and is not owned by SeiNode %q; "+ + "set spec.dataVolume.import.pvcName to adopt, or delete the PVC", + existing.Name, node.Name)) } + if err := e.cfg.KubeClient.Create(ctx, desired); err != nil { + if apierrors.IsAlreadyExists(err) { + // Lost the race with another actor between Get and Create; + // requeue so the next reconcile's Get resolves ownership. + return fmt.Errorf("data PVC created concurrently: %w", err) + } + return fmt.Errorf("creating data PVC: %w", err) + } e.complete() return nil } +// executeImport is the import-path: validate an externally-managed PVC. +// Transient failures leave the task Running (retry next reconcile); terminal +// failures return Terminal() so the plan fails. +func (e *ensureDataPVCExecution) executeImport(ctx context.Context, node *seiv1alpha1.SeiNode, name string) error { + reason, msg, state := e.validateImport(ctx, node, name) + recordTransient(node, reason, msg) + + switch state { + case importValid: + e.complete() + return nil + case importTerminal: + return Terminal(fmt.Errorf("%s: %s", reason, msg)) + default: + // transient — remain Running, executor will poll on TaskPollInterval. + return nil + } +} + +// validateImport runs the seven checks in order, returning on the first defect. +// Returns (Reason, Message, state) per the import-state machine in the LLD. +func (e *ensureDataPVCExecution) validateImport( + ctx context.Context, + node *seiv1alpha1.SeiNode, + name string, +) (string, string, importState) { + // 1. PVC exists in the node's namespace. + pvc := &corev1.PersistentVolumeClaim{} + key := types.NamespacedName{Name: name, Namespace: node.Namespace} + if err := e.cfg.KubeClient.Get(ctx, key, pvc); err != nil { + if apierrors.IsNotFound(err) { + return seiv1alpha1.ReasonImportPVCNotFound, + fmt.Sprintf("PVC %q not found in namespace %q", name, node.Namespace), + importTransient + } + // Unexpected API error — treat as transient; executor retries. + return seiv1alpha1.ReasonImportPVCNotFound, + fmt.Sprintf("getting PVC %q: %v", name, err), + importTransient + } + + // 2. deletionTimestamp must be nil. + if pvc.DeletionTimestamp != nil { + return seiv1alpha1.ReasonImportPVCTerminating, + fmt.Sprintf("PVC %q is being deleted (deletionTimestamp=%s)", name, pvc.DeletionTimestamp), + importTransient + } + + // 3. phase must be Bound. + switch pvc.Status.Phase { + case corev1.ClaimBound: + // continue + case corev1.ClaimLost: + return seiv1alpha1.ReasonImportPVCLost, + fmt.Sprintf("PVC %q phase is Lost; underlying PV is gone", name), + importTerminal + default: + return seiv1alpha1.ReasonImportPVCNotBound, + fmt.Sprintf("PVC %q phase is %q, waiting for Bound", name, pvc.Status.Phase), + importTransient + } + + // 4. accessModes must contain ReadWriteOnce. + if !containsAccessMode(pvc.Spec.AccessModes, corev1.ReadWriteOnce) { + return seiv1alpha1.ReasonImportAccessModeInvalid, + fmt.Sprintf("PVC %q accessModes %v does not include ReadWriteOnce", name, pvc.Spec.AccessModes), + importTerminal + } + + // 5. status.capacity.storage must be >= mode-default required size. + _, requiredStr := noderesource.DefaultStorageForMode(noderesource.NodeMode(node), e.cfg.Platform) + required, parseErr := resource.ParseQuantity(requiredStr) + if parseErr != nil { + // Misconfigured platform default — treat as terminal since the + // controller cannot compute the requirement. + return seiv1alpha1.ReasonImportCapacityTooSmall, + fmt.Sprintf("cannot parse required storage %q: %v", requiredStr, parseErr), + importTerminal + } + actual, haveActual := pvc.Status.Capacity[corev1.ResourceStorage] + if !haveActual { + return seiv1alpha1.ReasonImportCapacityTooSmall, + fmt.Sprintf("PVC %q has no status.capacity.storage reported yet", name), + importTransient + } + if actual.Cmp(required) < 0 { + return seiv1alpha1.ReasonImportCapacityTooSmall, + fmt.Sprintf("PVC %q capacity %s is less than required %s", name, actual.String(), required.String()), + importTerminal + } + + // 6 & 7. Underlying PV must exist, match capacity, and not be Failed. + pvName := pvc.Spec.VolumeName + if pvName == "" { + // Should be impossible for a Bound PVC, but guard anyway. + return seiv1alpha1.ReasonImportPVMissing, + fmt.Sprintf("PVC %q is Bound but has no spec.volumeName", name), + importTransient + } + pv := &corev1.PersistentVolume{} + if err := e.cfg.KubeClient.Get(ctx, types.NamespacedName{Name: pvName}, pv); err != nil { + if apierrors.IsNotFound(err) { + return seiv1alpha1.ReasonImportPVMissing, + fmt.Sprintf("underlying PV %q for PVC %q not found", pvName, name), + importTransient + } + return seiv1alpha1.ReasonImportPVMissing, + fmt.Sprintf("getting PV %q: %v", pvName, err), + importTransient + } + if pv.Status.Phase == corev1.VolumeFailed { + return seiv1alpha1.ReasonImportPVFailed, + fmt.Sprintf("underlying PV %q for PVC %q is in phase Failed", pvName, name), + importTerminal + } + pvCap, havePVCap := pv.Spec.Capacity[corev1.ResourceStorage] + if !havePVCap || pvCap.Cmp(actual) != 0 { + return seiv1alpha1.ReasonImportPVCapacityMismatch, + fmt.Sprintf("underlying PV %q capacity %s does not match PVC %q capacity %s", + pvName, pvCap.String(), name, actual.String()), + importTerminal + } + + return seiv1alpha1.ReasonImportValidated, + fmt.Sprintf("PVC %q passes all import requirements", name), + importValid +} + func (e *ensureDataPVCExecution) Status(_ context.Context) ExecutionStatus { return e.DefaultStatus() } + +// containsAccessMode reports whether modes contains the given access mode. +func containsAccessMode(modes []corev1.PersistentVolumeAccessMode, want corev1.PersistentVolumeAccessMode) bool { + return slices.Contains(modes, want) +} + +// recordTransient writes the latest validation reason+message into the +// ensure-data-pvc task's Error field on the in-memory plan. The planner +// parses this to set the ImportPVCReady condition. +// +// Format: ": ". An empty reason clears the field. +func recordTransient(node *seiv1alpha1.SeiNode, reason, msg string) { + if node.Status.Plan == nil { + return + } + for i := range node.Status.Plan.Tasks { + if node.Status.Plan.Tasks[i].Type != TaskTypeEnsureDataPVC { + continue + } + if reason == "" { + node.Status.Plan.Tasks[i].Error = "" + } else { + node.Status.Plan.Tasks[i].Error = fmt.Sprintf("%s: %s", reason, msg) + } + return + } +} diff --git a/internal/task/ensure_pvc_test.go b/internal/task/ensure_pvc_test.go new file mode 100644 index 0000000..19413c6 --- /dev/null +++ b/internal/task/ensure_pvc_test.go @@ -0,0 +1,474 @@ +package task + +import ( + "context" + "encoding/json" + "testing" + + . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + clientgoscheme "k8s.io/client-go/kubernetes/scheme" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1" + "github.com/sei-protocol/sei-k8s-controller/internal/noderesource" + "github.com/sei-protocol/sei-k8s-controller/internal/platform/platformtest" +) + +// --- Fixtures --- + +func ensurePVCScheme(t *testing.T) *runtime.Scheme { + t.Helper() + s := runtime.NewScheme() + if err := clientgoscheme.AddToScheme(s); err != nil { + t.Fatal(err) + } + if err := seiv1alpha1.AddToScheme(s); err != nil { + t.Fatal(err) + } + return s +} + +// ensurePVCNode returns a full-node SeiNode (uses default storage size). +func ensurePVCNode() *seiv1alpha1.SeiNode { + return &seiv1alpha1.SeiNode{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node-1", + Namespace: "default", + UID: "uid-node-1", + }, + Spec: seiv1alpha1.SeiNodeSpec{ + ChainID: "atlantic-2", + Image: "sei:v1.0.0", + FullNode: &seiv1alpha1.FullNodeSpec{}, + }, + Status: seiv1alpha1.SeiNodeStatus{ + Plan: &seiv1alpha1.TaskPlan{ + Phase: seiv1alpha1.TaskPlanActive, + Tasks: []seiv1alpha1.PlannedTask{ + {Type: TaskTypeEnsureDataPVC, ID: "ensure-1", Status: seiv1alpha1.TaskPending}, + }, + }, + }, + } +} + +// importNode returns a SeiNode configured with spec.dataVolume.import.pvcName. +func importNode(pvcName string) *seiv1alpha1.SeiNode { + n := ensurePVCNode() + n.Spec.DataVolume = &seiv1alpha1.DataVolumeSpec{ + Import: &seiv1alpha1.DataVolumeImport{PVCName: pvcName}, + } + return n +} + +// validImportedPVC returns a PVC that satisfies all seven import requirements. +func validImportedPVC(name, ns string, capacity string) *corev1.PersistentVolumeClaim { //nolint:unparam // test helper designed for reuse + return &corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: ns}, + Spec: corev1.PersistentVolumeClaimSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, + VolumeName: "pv-" + name, + }, + Status: corev1.PersistentVolumeClaimStatus{ + Phase: corev1.ClaimBound, + Capacity: corev1.ResourceList{ + corev1.ResourceStorage: resource.MustParse(capacity), + }, + }, + } +} + +// validImportedPV returns a PV that matches the given PVC's capacity. +func validImportedPV(name, capacity string) *corev1.PersistentVolume { + return &corev1.PersistentVolume{ + ObjectMeta: metav1.ObjectMeta{Name: name}, + Spec: corev1.PersistentVolumeSpec{ + Capacity: corev1.ResourceList{ + corev1.ResourceStorage: resource.MustParse(capacity), + }, + }, + Status: corev1.PersistentVolumeStatus{Phase: corev1.VolumeBound}, + } +} + +// newEnsurePVCExec builds a task execution against a fake client populated +// with objs. The node is the resource carried in the ExecutionConfig. +func newEnsurePVCExec(t *testing.T, node *seiv1alpha1.SeiNode, objs ...client.Object) (TaskExecution, client.Client) { + t.Helper() + s := ensurePVCScheme(t) + c := fake.NewClientBuilder().WithScheme(s).WithObjects(objs...).Build() + cfg := ExecutionConfig{ + KubeClient: c, + Scheme: s, + Resource: node, + Platform: platformtest.Config(), + } + params := EnsureDataPVCParams{NodeName: node.Name, Namespace: node.Namespace} + raw, _ := json.Marshal(params) + exec, err := deserializeEnsureDataPVC("ensure-1", raw, cfg) + if err != nil { + t.Fatal(err) + } + return exec, c +} + +// taskErrorFor returns the current Error string on the ensure-data-pvc task +// within node.Status.Plan, or "" if not present. +func taskErrorFor(node *seiv1alpha1.SeiNode) string { + if node.Status.Plan == nil { + return "" + } + for _, t := range node.Status.Plan.Tasks { + if t.Type == TaskTypeEnsureDataPVC { + return t.Error + } + } + return "" +} + +// --- Create-path tests --- + +func TestEnsureDataPVC_Create_PVCMissing_CreatesAndCompletes(t *testing.T) { + g := NewWithT(t) + node := ensurePVCNode() + exec, c := newEnsurePVCExec(t, node) + + g.Expect(exec.Execute(context.Background())).To(Succeed()) + g.Expect(exec.Status(context.Background())).To(Equal(ExecutionComplete)) + + pvc := &corev1.PersistentVolumeClaim{} + g.Expect(c.Get(context.Background(), types.NamespacedName{ + Name: noderesource.DataPVCName(node), Namespace: node.Namespace, + }, pvc)).To(Succeed()) + g.Expect(pvc.Spec.AccessModes).To(ConsistOf(corev1.ReadWriteOnce)) +} + +func TestEnsureDataPVC_Create_PVCExistsOwnedByUs_Completes(t *testing.T) { + g := NewWithT(t) + node := ensurePVCNode() + + // Pre-existing PVC owned by this SeiNode (crash-recovery scenario). + existing := noderesource.GenerateDataPVC(node, platformtest.Config()) + trueVal := true + existing.OwnerReferences = []metav1.OwnerReference{{ + APIVersion: seiv1alpha1.GroupVersion.String(), + Kind: "SeiNode", + Name: node.Name, + UID: node.UID, + Controller: &trueVal, + BlockOwnerDeletion: &trueVal, + }} + + exec, _ := newEnsurePVCExec(t, node, existing) + + g.Expect(exec.Execute(context.Background())).To(Succeed()) + g.Expect(exec.Status(context.Background())).To(Equal(ExecutionComplete)) +} + +func TestEnsureDataPVC_Create_PVCExistsNotOwned_TerminalError(t *testing.T) { + g := NewWithT(t) + node := ensurePVCNode() + + // Pre-existing PVC with no owner references — the #104 bug scenario. + foreign := noderesource.GenerateDataPVC(node, platformtest.Config()) + foreign.OwnerReferences = nil + + exec, _ := newEnsurePVCExec(t, node, foreign) + + err := exec.Execute(context.Background()) + g.Expect(err).To(HaveOccurred()) + var termErr *TerminalError + g.Expect(err).To(BeAssignableToTypeOf(termErr)) + g.Expect(err.Error()).To(ContainSubstring("already exists and is not owned")) +} + +func TestEnsureDataPVC_Create_AlreadyExistsRace_Requeues(t *testing.T) { + g := NewWithT(t) + node := ensurePVCNode() + + // Start with no PVC in the cache. We'll use an intercepting client + // that returns AlreadyExists on Create. + s := ensurePVCScheme(t) + base := fake.NewClientBuilder().WithScheme(s).Build() + c := &raceClient{Client: base} + + cfg := ExecutionConfig{ + KubeClient: c, + Scheme: s, + Resource: node, + Platform: platformtest.Config(), + } + raw, _ := json.Marshal(EnsureDataPVCParams{NodeName: node.Name, Namespace: node.Namespace}) + exec, err := deserializeEnsureDataPVC("ensure-1", raw, cfg) + g.Expect(err).NotTo(HaveOccurred()) + + runErr := exec.Execute(context.Background()) + g.Expect(runErr).To(HaveOccurred()) + // Transient (non-terminal) — plain error, executor will retry. + var termErr *TerminalError + g.Expect(runErr).NotTo(BeAssignableToTypeOf(termErr)) + g.Expect(runErr.Error()).To(ContainSubstring("concurrently")) +} + +// raceClient returns AlreadyExists on Create, simulating a race between +// Get-NotFound and Create. +type raceClient struct { + client.Client +} + +func (r *raceClient) Create(_ context.Context, _ client.Object, _ ...client.CreateOption) error { + // Return a Kubernetes AlreadyExists error. + return &statusErr{reason: metav1.StatusReasonAlreadyExists} +} + +type statusErr struct{ reason metav1.StatusReason } + +func (e *statusErr) Error() string { return string(e.reason) } +func (e *statusErr) Status() metav1.Status { + return metav1.Status{Status: metav1.StatusFailure, Reason: e.reason} +} + +// --- Import-path: happy path --- + +func TestEnsureDataPVC_Import_AllRequirementsMet_Completes(t *testing.T) { + g := NewWithT(t) + node := importNode("data-archive-0-0") + + // full-node uses StorageSizeDefault = "2000Gi" per platformtest.Config. + pvc := validImportedPVC("data-archive-0-0", "default", "2000Gi") + pv := validImportedPV("pv-data-archive-0-0", "2000Gi") + + exec, _ := newEnsurePVCExec(t, node, pvc, pv) + + g.Expect(exec.Execute(context.Background())).To(Succeed()) + g.Expect(exec.Status(context.Background())).To(Equal(ExecutionComplete)) +} + +// --- Import-path: per-requirement failures --- + +func TestEnsureDataPVC_Import_PVCNotFound_Transient(t *testing.T) { + g := NewWithT(t) + node := importNode("data-missing") + exec, _ := newEnsurePVCExec(t, node) + + err := exec.Execute(context.Background()) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(exec.Status(context.Background())).To(Equal(ExecutionRunning)) + g.Expect(taskErrorFor(node)).To(HavePrefix(seiv1alpha1.ReasonImportPVCNotFound + ":")) +} + +func TestEnsureDataPVC_Import_PVCNotFound_ThenAppears_Completes(t *testing.T) { + g := NewWithT(t) + node := importNode("data-late") + + // First pass: no PVC yet. + s := ensurePVCScheme(t) + c := fake.NewClientBuilder().WithScheme(s).Build() + cfg := ExecutionConfig{ + KubeClient: c, Scheme: s, Resource: node, Platform: platformtest.Config(), + } + raw, _ := json.Marshal(EnsureDataPVCParams{NodeName: node.Name, Namespace: node.Namespace}) + exec, err := deserializeEnsureDataPVC("ensure-1", raw, cfg) + g.Expect(err).NotTo(HaveOccurred()) + + g.Expect(exec.Execute(context.Background())).To(Succeed()) + g.Expect(exec.Status(context.Background())).To(Equal(ExecutionRunning)) + + // PVC and PV show up; on the next Execute, task completes. + g.Expect(c.Create(context.Background(), validImportedPVC("data-late", "default", "2000Gi"))).To(Succeed()) + g.Expect(c.Create(context.Background(), validImportedPV("pv-data-late", "2000Gi"))).To(Succeed()) + + // Re-deserialize (executor does this every reconcile). + exec, err = deserializeEnsureDataPVC("ensure-1", raw, cfg) + g.Expect(err).NotTo(HaveOccurred()) + + g.Expect(exec.Execute(context.Background())).To(Succeed()) + g.Expect(exec.Status(context.Background())).To(Equal(ExecutionComplete)) +} + +func TestEnsureDataPVC_Import_PVCTerminating_Transient(t *testing.T) { + g := NewWithT(t) + node := importNode("data-term") + now := metav1.Now() + pvc := validImportedPVC("data-term", "default", "2000Gi") + pvc.DeletionTimestamp = &now + pvc.Finalizers = []string{"kubernetes.io/pvc-protection"} + + exec, _ := newEnsurePVCExec(t, node, pvc) + + g.Expect(exec.Execute(context.Background())).To(Succeed()) + g.Expect(exec.Status(context.Background())).To(Equal(ExecutionRunning)) + g.Expect(taskErrorFor(node)).To(HavePrefix(seiv1alpha1.ReasonImportPVCTerminating + ":")) +} + +func TestEnsureDataPVC_Import_PVCPending_Transient(t *testing.T) { + g := NewWithT(t) + node := importNode("data-pending") + pvc := validImportedPVC("data-pending", "default", "2000Gi") + pvc.Status.Phase = corev1.ClaimPending + + exec, _ := newEnsurePVCExec(t, node, pvc) + + g.Expect(exec.Execute(context.Background())).To(Succeed()) + g.Expect(exec.Status(context.Background())).To(Equal(ExecutionRunning)) + g.Expect(taskErrorFor(node)).To(HavePrefix(seiv1alpha1.ReasonImportPVCNotBound + ":")) +} + +func TestEnsureDataPVC_Import_PVCReleased_Transient(t *testing.T) { + g := NewWithT(t) + node := importNode("data-released") + pvc := validImportedPVC("data-released", "default", "2000Gi") + pvc.Status.Phase = corev1.PersistentVolumeClaimPhase("Released") + + exec, _ := newEnsurePVCExec(t, node, pvc) + + g.Expect(exec.Execute(context.Background())).To(Succeed()) + g.Expect(exec.Status(context.Background())).To(Equal(ExecutionRunning)) + g.Expect(taskErrorFor(node)).To(HavePrefix(seiv1alpha1.ReasonImportPVCNotBound + ":")) +} + +func TestEnsureDataPVC_Import_PVCLost_Terminal(t *testing.T) { + g := NewWithT(t) + node := importNode("data-lost") + pvc := validImportedPVC("data-lost", "default", "2000Gi") + pvc.Status.Phase = corev1.ClaimLost + + exec, _ := newEnsurePVCExec(t, node, pvc) + + err := exec.Execute(context.Background()) + g.Expect(err).To(HaveOccurred()) + var termErr *TerminalError + g.Expect(err).To(BeAssignableToTypeOf(termErr)) + g.Expect(err.Error()).To(ContainSubstring(seiv1alpha1.ReasonImportPVCLost)) + g.Expect(taskErrorFor(node)).To(HavePrefix(seiv1alpha1.ReasonImportPVCLost + ":")) +} + +func TestEnsureDataPVC_Import_WrongAccessMode_Terminal(t *testing.T) { + g := NewWithT(t) + node := importNode("data-rox") + pvc := validImportedPVC("data-rox", "default", "2000Gi") + pvc.Spec.AccessModes = []corev1.PersistentVolumeAccessMode{corev1.ReadOnlyMany} + + exec, _ := newEnsurePVCExec(t, node, pvc) + + err := exec.Execute(context.Background()) + g.Expect(err).To(HaveOccurred()) + var termErr *TerminalError + g.Expect(err).To(BeAssignableToTypeOf(termErr)) + g.Expect(err.Error()).To(ContainSubstring(seiv1alpha1.ReasonImportAccessModeInvalid)) +} + +func TestEnsureDataPVC_Import_CapacityTooSmall_Terminal(t *testing.T) { + g := NewWithT(t) + node := importNode("data-small") + pvc := validImportedPVC("data-small", "default", "100Gi") // needs 2000Gi + pv := validImportedPV("pv-data-small", "100Gi") + + exec, _ := newEnsurePVCExec(t, node, pvc, pv) + + err := exec.Execute(context.Background()) + g.Expect(err).To(HaveOccurred()) + var termErr *TerminalError + g.Expect(err).To(BeAssignableToTypeOf(termErr)) + g.Expect(err.Error()).To(ContainSubstring(seiv1alpha1.ReasonImportCapacityTooSmall)) +} + +func TestEnsureDataPVC_Import_CapacityUnset_Transient(t *testing.T) { + g := NewWithT(t) + node := importNode("data-nocap") + pvc := validImportedPVC("data-nocap", "default", "2000Gi") + pvc.Status.Capacity = nil // not reported yet + + exec, _ := newEnsurePVCExec(t, node, pvc) + + g.Expect(exec.Execute(context.Background())).To(Succeed()) + g.Expect(exec.Status(context.Background())).To(Equal(ExecutionRunning)) + g.Expect(taskErrorFor(node)).To(HavePrefix(seiv1alpha1.ReasonImportCapacityTooSmall + ":")) +} + +func TestEnsureDataPVC_Import_PVCapacityMismatch_Terminal(t *testing.T) { + g := NewWithT(t) + node := importNode("data-mismatch") + pvc := validImportedPVC("data-mismatch", "default", "2000Gi") + pv := validImportedPV("pv-data-mismatch", "3000Gi") + + exec, _ := newEnsurePVCExec(t, node, pvc, pv) + + err := exec.Execute(context.Background()) + g.Expect(err).To(HaveOccurred()) + var termErr *TerminalError + g.Expect(err).To(BeAssignableToTypeOf(termErr)) + g.Expect(err.Error()).To(ContainSubstring(seiv1alpha1.ReasonImportPVCapacityMismatch)) +} + +func TestEnsureDataPVC_Import_PVMissing_Transient(t *testing.T) { + g := NewWithT(t) + node := importNode("data-nopv") + pvc := validImportedPVC("data-nopv", "default", "2000Gi") + + exec, _ := newEnsurePVCExec(t, node, pvc) // no PV + + g.Expect(exec.Execute(context.Background())).To(Succeed()) + g.Expect(exec.Status(context.Background())).To(Equal(ExecutionRunning)) + g.Expect(taskErrorFor(node)).To(HavePrefix(seiv1alpha1.ReasonImportPVMissing + ":")) +} + +func TestEnsureDataPVC_Import_PVFailed_Terminal(t *testing.T) { + g := NewWithT(t) + node := importNode("data-pvfail") + pvc := validImportedPVC("data-pvfail", "default", "2000Gi") + pv := validImportedPV("pv-data-pvfail", "2000Gi") + pv.Status.Phase = corev1.VolumeFailed + + exec, _ := newEnsurePVCExec(t, node, pvc, pv) + + err := exec.Execute(context.Background()) + g.Expect(err).To(HaveOccurred()) + var termErr *TerminalError + g.Expect(err).To(BeAssignableToTypeOf(termErr)) + g.Expect(err.Error()).To(ContainSubstring(seiv1alpha1.ReasonImportPVFailed)) +} + +// --- Poll cadence regression guard --- + +func TestEnsureDataPVC_Import_TransientValidationRepeats(t *testing.T) { + g := NewWithT(t) + node := importNode("data-stuck") + + s := ensurePVCScheme(t) + base := fake.NewClientBuilder().WithScheme(s).Build() + counter := &countingClient{Client: base} + cfg := ExecutionConfig{ + KubeClient: counter, Scheme: s, Resource: node, Platform: platformtest.Config(), + } + raw, _ := json.Marshal(EnsureDataPVCParams{NodeName: node.Name, Namespace: node.Namespace}) + + // Simulate N reconciles against a persistently-missing PVC. Each should + // issue exactly one Get (no extra polling or stored state). + const reconciles = 3 + for range reconciles { + exec, err := deserializeEnsureDataPVC("ensure-1", raw, cfg) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(exec.Execute(context.Background())).To(Succeed()) + g.Expect(exec.Status(context.Background())).To(Equal(ExecutionRunning)) + } + g.Expect(counter.getCount).To(Equal(reconciles), + "expected one Get per reconcile when stuck in transient-missing") +} + +// countingClient counts Get calls. +type countingClient struct { + client.Client + getCount int +} + +func (c *countingClient) Get(ctx context.Context, key client.ObjectKey, obj client.Object, opts ...client.GetOption) error { + c.getCount++ + return c.Client.Get(ctx, key, obj, opts...) +} diff --git a/manifests/role.yaml b/manifests/role.yaml index 99209e7..6b10c4d 100644 --- a/manifests/role.yaml +++ b/manifests/role.yaml @@ -27,6 +27,7 @@ rules: - apiGroups: - "" resources: + - persistentvolumes - pods verbs: - get diff --git a/manifests/sei.io_seinodedeployments.yaml b/manifests/sei.io_seinodedeployments.yaml index 023e679..5f20a7c 100644 --- a/manifests/sei.io_seinodedeployments.yaml +++ b/manifests/sei.io_seinodedeployments.yaml @@ -257,6 +257,37 @@ spec: description: ChainID of the chain this node belongs to. minLength: 1 type: string + dataVolume: + description: |- + DataVolume configures the data PersistentVolumeClaim for this node. + When omitted, the controller creates a PVC using the node's mode-default + storage class and size (see noderesource.DefaultStorageForMode). + properties: + import: + description: |- + Import references a pre-existing PersistentVolumeClaim in the same + namespace as the SeiNode, instead of creating a new one. The + controller validates the referenced PVC but never mutates it. + + When Import is set, the controller never deletes the referenced PVC + on SeiNode deletion — storage lifecycle is the operator's responsibility. + properties: + pvcName: + description: |- + PVCName is the name of a PersistentVolumeClaim in the SeiNode's + namespace. The PVC must be Bound, ReadWriteOnce, and sized at or above + the node mode's default storage size. Immutable after creation. + maxLength: 253 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + x-kubernetes-validations: + - message: pvcName is immutable + rule: self == oldSelf + required: + - pvcName + type: object + type: object entrypoint: description: Entrypoint overrides the image command for the running node process. diff --git a/manifests/sei.io_seinodes.yaml b/manifests/sei.io_seinodes.yaml index 2e52a3f..9e5d664 100644 --- a/manifests/sei.io_seinodes.yaml +++ b/manifests/sei.io_seinodes.yaml @@ -87,6 +87,37 @@ spec: description: ChainID of the chain this node belongs to. minLength: 1 type: string + dataVolume: + description: |- + DataVolume configures the data PersistentVolumeClaim for this node. + When omitted, the controller creates a PVC using the node's mode-default + storage class and size (see noderesource.DefaultStorageForMode). + properties: + import: + description: |- + Import references a pre-existing PersistentVolumeClaim in the same + namespace as the SeiNode, instead of creating a new one. The + controller validates the referenced PVC but never mutates it. + + When Import is set, the controller never deletes the referenced PVC + on SeiNode deletion — storage lifecycle is the operator's responsibility. + properties: + pvcName: + description: |- + PVCName is the name of a PersistentVolumeClaim in the SeiNode's + namespace. The PVC must be Bound, ReadWriteOnce, and sized at or above + the node mode's default storage size. Immutable after creation. + maxLength: 253 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + x-kubernetes-validations: + - message: pvcName is immutable + rule: self == oldSelf + required: + - pvcName + type: object + type: object entrypoint: description: Entrypoint overrides the image command for the running node process.