diff --git a/cmd/gpu-operator/main.go b/cmd/gpu-operator/main.go index 9ac5df1072..c7517e6fc8 100644 --- a/cmd/gpu-operator/main.go +++ b/cmd/gpu-operator/main.go @@ -51,6 +51,7 @@ import ( "github.com/NVIDIA/gpu-operator/controllers/clusterinfo" "github.com/NVIDIA/gpu-operator/internal/consts" "github.com/NVIDIA/gpu-operator/internal/info" + "github.com/NVIDIA/gpu-operator/internal/predicates" // +kubebuilder:scaffold:imports ) @@ -184,7 +185,10 @@ func main() { setupLog.Error(err, "unable to create new ClusterUpdateStateManager", "controller", "Upgrade") os.Exit(1) } - clusterUpgradeStateManager = clusterUpgradeStateManager.WithPodDeletionEnabled(gpuPodSpecFilter).WithValidationEnabled("app=nvidia-operator-validator") + clusterUpgradeStateManager = clusterUpgradeStateManager. + WithPodDeletionEnabled(gpuPodSpecFilter). + WithValidationEnabled("app=nvidia-operator-validator"). + WithRestartOnlyPredicate(predicates.DriverPodRestartOnly(upgradeLogger)) if err = (&controllers.UpgradeReconciler{ Client: mgr.GetClient(), diff --git a/controllers/object_controls.go b/controllers/object_controls.go index 299da2a45b..aa6a54930b 100644 --- a/controllers/object_controls.go +++ b/controllers/object_controls.go @@ -1064,19 +1064,19 @@ func TransformDriver(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n C // Set the computed digest in driver-manager initContainer driverManagerContainer := findContainerByName(obj.Spec.Template.Spec.InitContainers, "k8s-driver-manager") if driverManagerContainer != nil { - setContainerEnv(driverManagerContainer, "DRIVER_CONFIG_DIGEST", configDigest) + setContainerEnv(driverManagerContainer, driverconfig.DriverConfigDigestEnvName, configDigest) } // Set the computed digest in nvidia-driver container driverContainer := findContainerByName(obj.Spec.Template.Spec.Containers, "nvidia-driver-ctr") if driverContainer != nil { - setContainerEnv(driverContainer, "DRIVER_CONFIG_DIGEST", configDigest) + setContainerEnv(driverContainer, driverconfig.DriverConfigDigestEnvName, configDigest) } // Used by dtk-build-driver to determine if fast path should be used (skip rebuild) driverToolkitContainer := findContainerByName(obj.Spec.Template.Spec.Containers, "openshift-driver-toolkit-ctr") if driverToolkitContainer != nil { - setContainerEnv(driverToolkitContainer, "DRIVER_CONFIG_DIGEST", configDigest) + setContainerEnv(driverToolkitContainer, driverconfig.DriverConfigDigestEnvName, configDigest) } // set hostNetwork for driver if specified diff --git a/go.mod b/go.mod index 5c1a582a3a..fd02e5230d 100644 --- a/go.mod +++ b/go.mod @@ -6,7 +6,7 @@ require ( github.com/Masterminds/sprig/v3 v3.3.0 github.com/NVIDIA/go-nvlib v0.11.0 github.com/NVIDIA/k8s-kata-manager v0.2.3 - github.com/NVIDIA/k8s-operator-libs v0.0.0-20260505175649-fa6a3643c441 + github.com/NVIDIA/k8s-operator-libs v0.0.0-20260629200812-d720f2557494 github.com/NVIDIA/nvidia-container-toolkit v1.19.1 github.com/cyphar/filepath-securejoin v0.7.0 github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc diff --git a/go.sum b/go.sum index 40fb51e6e6..147e583551 100644 --- a/go.sum +++ b/go.sum @@ -18,8 +18,8 @@ github.com/NVIDIA/go-nvlib v0.11.0 h1:J6c9deWGJ1x4yY7fKg+aOdm2v5+WmCIeCLsuaO3tRt github.com/NVIDIA/go-nvlib v0.11.0/go.mod h1:uQNH63NoDuSfn/1lixD1D1Hvhko/xdnBHmc4H1mFUlY= github.com/NVIDIA/k8s-kata-manager v0.2.3 h1:d5+gRFqU5el/fKMXhHUaPY7haj+dbHL4nDsO/q05LBo= github.com/NVIDIA/k8s-kata-manager v0.2.3/go.mod h1:xx5OUiMsHyKbyX0JjKHqAftvqS8vx00LFn/5EaMdtB4= -github.com/NVIDIA/k8s-operator-libs v0.0.0-20260505175649-fa6a3643c441 h1:U+1f77CBKtvJEL/wzze5mY2+Y3XQ5ZgRK0R2Ru2phz4= -github.com/NVIDIA/k8s-operator-libs v0.0.0-20260505175649-fa6a3643c441/go.mod h1:L+aiCiTKN63AX9SWz/F8pv9Jw9FIfI+dAEr7VA+KowE= +github.com/NVIDIA/k8s-operator-libs v0.0.0-20260629200812-d720f2557494 h1:j+tWK79l9AouBulQps7rxILLhy2fWYcEhH4zgYjth/o= +github.com/NVIDIA/k8s-operator-libs v0.0.0-20260629200812-d720f2557494/go.mod h1:L+aiCiTKN63AX9SWz/F8pv9Jw9FIfI+dAEr7VA+KowE= github.com/NVIDIA/nvidia-container-toolkit v1.19.1 h1:1sV4ddFrBccqL9Lbzcdu50w2j5FhyNJpN5hXTfCsjps= github.com/NVIDIA/nvidia-container-toolkit v1.19.1/go.mod h1:yGsZ4s2lMjfE4r8/DMUPVpaFhRGkWvo2H++/Dy84nVc= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= diff --git a/internal/config/driver_config_digest.go b/internal/config/driver_config_digest.go index 565b14ed45..526d76a846 100644 --- a/internal/config/driver_config_digest.go +++ b/internal/config/driver_config_digest.go @@ -22,6 +22,38 @@ import ( corev1 "k8s.io/api/core/v1" ) +// DriverConfigDigestEnvName is the env var the operator sets on the driver pod +// template, carrying a hash of the install-relevant driver config (DriverInstallState). +const DriverConfigDigestEnvName = "DRIVER_CONFIG_DIGEST" + +// DriverConfigDigestFromPodSpec returns the DRIVER_CONFIG_DIGEST value from a driver +// pod spec, or "" if absent. The env is set identically on every driver container, so +// the first non-empty value (init containers first) is returned. +func DriverConfigDigestFromPodSpec(spec *corev1.PodSpec) string { + if spec == nil { + return "" + } + digestFromEnv := func(env []corev1.EnvVar) string { + for _, e := range env { + if e.Name == DriverConfigDigestEnvName { + return e.Value + } + } + return "" + } + for _, initCtr := range spec.InitContainers { + if v := digestFromEnv(initCtr.Env); v != "" { + return v + } + } + for _, ctr := range spec.Containers { + if v := digestFromEnv(ctr.Env); v != "" { + return v + } + } + return "" +} + // DriverInstallState lists all fields that affect driver installation. // Changes to these fields trigger a driver reinstall. // diff --git a/internal/config/driver_config_digest_test.go b/internal/config/driver_config_digest_test.go index b9adae2e41..10da9cd0a4 100644 --- a/internal/config/driver_config_digest_test.go +++ b/internal/config/driver_config_digest_test.go @@ -309,3 +309,81 @@ func TestExtractVolumes(t *testing.T) { }) } } + +// containerWithConfigDigest builds a container carrying the DRIVER_CONFIG_DIGEST env +// when digest is non-empty (matching how object_controls.go sets it). +func containerWithConfigDigest(name, digest string) corev1.Container { + c := corev1.Container{Name: name} + if digest != "" { + c.Env = []corev1.EnvVar{{Name: DriverConfigDigestEnvName, Value: digest}} + } + return c +} + +func TestDriverConfigDigestFromPodSpec(t *testing.T) { + tests := []struct { + name string + spec *corev1.PodSpec + want string + }{ + { + name: "digest on k8s-driver-manager init container", + spec: &corev1.PodSpec{ + InitContainers: []corev1.Container{containerWithConfigDigest("k8s-driver-manager", "abc123")}, + Containers: []corev1.Container{containerWithConfigDigest("nvidia-driver-ctr", "")}, + }, + want: "abc123", + }, + { + name: "digest on nvidia-driver-ctr main container", + spec: &corev1.PodSpec{ + Containers: []corev1.Container{containerWithConfigDigest("nvidia-driver-ctr", "def456")}, + }, + want: "def456", + }, + { + name: "digest on OCP openshift-driver-toolkit-ctr", + spec: &corev1.PodSpec{ + Containers: []corev1.Container{containerWithConfigDigest("openshift-driver-toolkit-ctr", "ocp789")}, + }, + want: "ocp789", + }, + { + name: "init container digest takes precedence over main container", + spec: &corev1.PodSpec{ + InitContainers: []corev1.Container{containerWithConfigDigest("k8s-driver-manager", "init-digest")}, + Containers: []corev1.Container{containerWithConfigDigest("nvidia-driver-ctr", "main-digest")}, + }, + want: "init-digest", + }, + { + name: "empty init digest is skipped; main container value used", + spec: &corev1.PodSpec{ + InitContainers: []corev1.Container{{ + Name: "k8s-driver-manager", + Env: []corev1.EnvVar{{Name: DriverConfigDigestEnvName, Value: ""}}, + }}, + Containers: []corev1.Container{containerWithConfigDigest("nvidia-driver-ctr", "main-digest")}, + }, + want: "main-digest", + }, + { + name: "no digest anywhere", + spec: &corev1.PodSpec{ + InitContainers: []corev1.Container{{Name: "k8s-driver-manager"}}, + Containers: []corev1.Container{{Name: "nvidia-driver-ctr"}}, + }, + want: "", + }, + { + name: "nil spec", + spec: nil, + want: "", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.want, DriverConfigDigestFromPodSpec(tt.spec)) + }) + } +} diff --git a/internal/predicates/restart_only.go b/internal/predicates/restart_only.go new file mode 100644 index 0000000000..f1cf2b3b4e --- /dev/null +++ b/internal/predicates/restart_only.go @@ -0,0 +1,50 @@ +/** +# Copyright (c) NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +// Package predicates holds predicates the upgrade controller registers on the +// k8s-operator-libs upgrade state manager. +package predicates + +import ( + "github.com/go-logr/logr" + corev1 "k8s.io/api/core/v1" + + "github.com/NVIDIA/k8s-operator-libs/pkg/consts" + "github.com/NVIDIA/k8s-operator-libs/pkg/upgrade" + + driverconfig "github.com/NVIDIA/gpu-operator/internal/config" +) + +// DriverPodRestartOnly returns the upgrade controller's RestartOnlyPredicate: it allows an +// out-of-sync driver pod to be restarted in place when the running pod spec and the desired +// DaemonSet template spec have the same DRIVER_CONFIG_DIGEST, i.e. the install-relevant +// config is unchanged (e.g. only a helm.sh/chart label changed). If either digest is missing, +// it returns false and the node takes the full upgrade flow. +func DriverPodRestartOnly(log logr.Logger) upgrade.RestartOnlyPredicate { + return func(running, desired *corev1.PodSpec) (bool, error) { + desiredDigest := driverconfig.DriverConfigDigestFromPodSpec(desired) + runningDigest := driverconfig.DriverConfigDigestFromPodSpec(running) + if desiredDigest == "" || runningDigest == "" { + log.V(consts.LogLevelDebug).Info("driver config digest missing; taking full upgrade flow", + "desiredDigest", desiredDigest, "runningDigest", runningDigest) + return false, nil + } + restartOnly := desiredDigest == runningDigest + log.V(consts.LogLevelDebug).Info("evaluated driver config digest for restart-only routing", + "desiredDigest", desiredDigest, "runningDigest", runningDigest, "restartOnly", restartOnly) + return restartOnly, nil + } +} diff --git a/internal/predicates/restart_only_test.go b/internal/predicates/restart_only_test.go new file mode 100644 index 0000000000..33d12243c7 --- /dev/null +++ b/internal/predicates/restart_only_test.go @@ -0,0 +1,59 @@ +/** +# Copyright (c) NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package predicates + +import ( + "testing" + + "github.com/go-logr/logr" + "github.com/stretchr/testify/assert" + corev1 "k8s.io/api/core/v1" + + driverconfig "github.com/NVIDIA/gpu-operator/internal/config" +) + +func TestDriverPodRestartOnly(t *testing.T) { + podSpec := func(digest string) *corev1.PodSpec { + return &corev1.PodSpec{Containers: []corev1.Container{{ + Name: "nvidia-driver-ctr", + Env: []corev1.EnvVar{{Name: driverconfig.DriverConfigDigestEnvName, Value: digest}}, + }}} + } + + predicate := DriverPodRestartOnly(logr.Discard()) + + tests := []struct { + name string + running *corev1.PodSpec + desired *corev1.PodSpec + wantRestart bool + }{ + {name: "equal digests -> restart-only", running: podSpec("same"), desired: podSpec("same"), wantRestart: true}, + {name: "differing digests -> full upgrade", running: podSpec("old"), desired: podSpec("new"), wantRestart: false}, + {name: "missing digest on running pod -> full upgrade", running: podSpec(""), desired: podSpec("new"), wantRestart: false}, + {name: "missing digest on desired template -> full upgrade", running: podSpec("old"), desired: podSpec(""), wantRestart: false}, + {name: "nil running spec -> full upgrade", running: nil, desired: podSpec("x"), wantRestart: false}, + {name: "nil desired spec -> full upgrade", running: podSpec("x"), desired: nil, wantRestart: false}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := predicate(tt.running, tt.desired) + assert.NoError(t, err) + assert.Equal(t, tt.wantRestart, got) + }) + } +} diff --git a/vendor/github.com/NVIDIA/k8s-operator-libs/pkg/upgrade/common_manager.go b/vendor/github.com/NVIDIA/k8s-operator-libs/pkg/upgrade/common_manager.go index 52b47b31c0..2a16d979e2 100644 --- a/vendor/github.com/NVIDIA/k8s-operator-libs/pkg/upgrade/common_manager.go +++ b/vendor/github.com/NVIDIA/k8s-operator-libs/pkg/upgrade/common_manager.go @@ -79,6 +79,17 @@ func NewClusterUpgradeState() ClusterUpgradeState { return ClusterUpgradeState{NodeStates: make(map[string][]*NodeUpgradeState)} } +// RestartOnlyPredicate is used for a node whose driver pod is out-of-sync with its +// DaemonSet. running is the live driver pod's spec; desired is the DaemonSet template's +// pod spec. Returning true means the difference does not affect the installed driver, so +// the node is cordoned and the driver pod restarted in place, skipping pod-deletion +// (workload eviction) and drain; the consumer guarantees the running driver does not need +// to change across the restart. Returning false (the default when unset) routes the node +// through the full upgrade flow. Returning an error keeps the node in upgrade-required to +// be retried on a later reconcile. It is never called for orphaned pods, upgrade-requested +// nodes, or nodes waiting for safe driver load. +type RestartOnlyPredicate func(running, desired *corev1.PodSpec) (bool, error) + // CommonUpgradeManagerImpl is an implementation of the CommonUpgradeStateManager interface. // It facilitates common logic implementation for both upgrade modes: in-place and requestor (e.g. maintenance OP). type CommonUpgradeManagerImpl struct { @@ -97,6 +108,8 @@ type CommonUpgradeManagerImpl struct { // optional states podDeletionStateEnabled bool validationStateEnabled bool + // optional: when set, route immaterial pod-template changes to a restart-only path + restartOnlyPredicate RestartOnlyPredicate } // NewCommonUpgradeStateManager creates a new instance of CommonUpgradeManagerImpl diff --git a/vendor/github.com/NVIDIA/k8s-operator-libs/pkg/upgrade/upgrade_inplace.go b/vendor/github.com/NVIDIA/k8s-operator-libs/pkg/upgrade/upgrade_inplace.go index 02fccd3659..d11f0dc4b1 100644 --- a/vendor/github.com/NVIDIA/k8s-operator-libs/pkg/upgrade/upgrade_inplace.go +++ b/vendor/github.com/NVIDIA/k8s-operator-libs/pkg/upgrade/upgrade_inplace.go @@ -18,7 +18,9 @@ package upgrade import ( "context" + "fmt" + corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/util/intstr" "github.com/NVIDIA/k8s-operator-libs/api/upgrade/v1alpha1" @@ -69,7 +71,8 @@ func (m *InplaceNodeStateManagerImpl) ProcessUpgradeRequiredNodes( "maximum nodes that can be unavailable", maxUnavailable) for _, nodeState := range currentClusterState.NodeStates[UpgradeStateUpgradeRequired] { - if m.IsUpgradeRequested(nodeState.Node) { + upgradeRequested := m.IsUpgradeRequested(nodeState.Node) + if upgradeRequested { // Make sure to remove the upgrade-requested annotation err := m.NodeUpgradeStateProvider.ChangeNodeUpgradeAnnotation(ctx, nodeState.Node, GetUpgradeRequestedAnnotationKey(), "null") @@ -96,14 +99,26 @@ func (m *InplaceNodeStateManagerImpl) ProcessUpgradeRequiredNodes( } } - err := m.NodeUpgradeStateProvider.ChangeNodeUpgradeState(ctx, nodeState.Node, UpgradeStateCordonRequired) + targetState, terr := m.nextStateForUpgradeRequiredNode(ctx, nodeState, upgradeRequested) + if terr != nil { + // Keep the node in upgrade-required and retry on the next reconcile instead + // of starting a full upgrade. + m.Log.V(consts.LogLevelError).Error(terr, + "could not determine next upgrade state; node kept in upgrade-required for retry", + "node", nodeState.Node.Name) + logEventf(m.EventRecorder, nodeState.Node, corev1.EventTypeWarning, GetEventReason(), + "%v, will retry", terr) + continue + } + + err := m.NodeUpgradeStateProvider.ChangeNodeUpgradeState(ctx, nodeState.Node, targetState) if err == nil { upgradesAvailable-- - m.Log.V(consts.LogLevelInfo).Info("Node waiting for cordon", - "node", nodeState.Node.Name) + m.Log.V(consts.LogLevelInfo).Info("Node moving to next upgrade state", + "node", nodeState.Node.Name, "state", targetState) } else { m.Log.V(consts.LogLevelError).Error( - err, "Failed to change node upgrade state", "state", UpgradeStateCordonRequired) + err, "Failed to change node upgrade state", "state", targetState) return err } } @@ -111,6 +126,57 @@ func (m *InplaceNodeStateManagerImpl) ProcessUpgradeRequiredNodes( return nil } +// nextStateForUpgradeRequiredNode determines the state a node in upgrade-required moves to. +// It returns UpgradeStatePodRestartRequired when a registered restart-only predicate matches +// (after cordoning the node), and UpgradeStateCordonRequired for the full upgrade flow otherwise. +// A non-nil error means the decision could not be made; the caller keeps the node in +// upgrade-required and retries on the next reconcile. +func (m *InplaceNodeStateManagerImpl) nextStateForUpgradeRequiredNode( + ctx context.Context, nodeState *NodeUpgradeState, upgradeRequested bool) (string, error) { + restartOnly, err := m.shouldRestartOnly(ctx, nodeState, upgradeRequested) + if err != nil { + return "", err + } + if !restartOnly { + return UpgradeStateCordonRequired, nil + } + // Restart-only change: cordon the node so it stays unschedulable if the pod restart fails, as in + // the full upgrade flow, then restart the driver pod without evicting workloads. + m.Log.V(consts.LogLevelInfo).Info( + "Restart-only change detected; cordoning node and restarting driver pod in place, "+ + "skipping pod-deletion and drain", "node", nodeState.Node.Name) + if err := m.CordonManager.Cordon(ctx, nodeState.Node); err != nil { + return "", fmt.Errorf("failed to cordon node for restart-only upgrade: %w", err) + } + return UpgradeStatePodRestartRequired, nil +} + +// shouldRestartOnly reports whether the node qualifies for an in-place driver pod restart instead +// of the full upgrade flow. It is false when no predicate is registered, for orphaned pods, for +// nodes that explicitly requested an upgrade, and for nodes waiting for safe driver load (which +// must take the full flow so workloads are evicted before the load is unblocked at +// pod-restart-required). +func (m *InplaceNodeStateManagerImpl) shouldRestartOnly( + ctx context.Context, nodeState *NodeUpgradeState, upgradeRequested bool) (bool, error) { + if m.restartOnlyPredicate == nil || upgradeRequested || nodeState.IsOrphanedPod() || + nodeState.DriverPod == nil { + return false, nil + } + waitingForSafeLoad, err := m.SafeDriverLoadManager.IsWaitingForSafeDriverLoad(ctx, nodeState.Node) + if err != nil { + return false, fmt.Errorf("failed to check safe driver load status: %w", err) + } + if waitingForSafeLoad { + return false, nil + } + restartOnly, err := m.restartOnlyPredicate(&nodeState.DriverPod.Spec, + &nodeState.DriverDaemonSet.Spec.Template.Spec) + if err != nil { + return false, fmt.Errorf("failed to evaluate restart-only predicate: %w", err) + } + return restartOnly, nil +} + // ProcessNodeMaintenanceRequiredNodes is a used to satisfy ProcessNodeStateManager interface func (m *InplaceNodeStateManagerImpl) ProcessNodeMaintenanceRequiredNodes(ctx context.Context, currentClusterState *ClusterUpgradeState) error { diff --git a/vendor/github.com/NVIDIA/k8s-operator-libs/pkg/upgrade/upgrade_state.go b/vendor/github.com/NVIDIA/k8s-operator-libs/pkg/upgrade/upgrade_state.go index 4c1d626937..bb94207488 100644 --- a/vendor/github.com/NVIDIA/k8s-operator-libs/pkg/upgrade/upgrade_state.go +++ b/vendor/github.com/NVIDIA/k8s-operator-libs/pkg/upgrade/upgrade_state.go @@ -40,6 +40,9 @@ type ClusterUpgradeStateManager interface { // WithValidationEnabled provides an option to enable the optional 'validation' state // and pass a podSelector to specify which pods are performing the validation WithValidationEnabled(podSelector string) ClusterUpgradeStateManager + // WithRestartOnlyPredicate registers an optional predicate (see RestartOnlyPredicate); + // a nil predicate, the default, keeps the full upgrade flow for every out-of-sync node. + WithRestartOnlyPredicate(predicate RestartOnlyPredicate) ClusterUpgradeStateManager // BuildState builds a point-in-time snapshot of the driver upgrade state in the cluster. BuildState(ctx context.Context, namespace string, driverLabels map[string]string) (*ClusterUpgradeState, error) @@ -349,6 +352,14 @@ func (m *ClusterUpgradeStateManagerImpl) WithValidationEnabled(podSelector strin return m } +// WithRestartOnlyPredicate registers an optional restart-only predicate; a nil predicate +// preserves the default full upgrade flow for every out-of-sync node. +func (m *ClusterUpgradeStateManagerImpl) WithRestartOnlyPredicate( + predicate RestartOnlyPredicate) ClusterUpgradeStateManager { + m.restartOnlyPredicate = predicate + return m +} + // buildNodeUpgradeState creates a mapping between a node, // the driver POD running on them and the daemon set, controlling this pod func (m *ClusterUpgradeStateManagerImpl) buildNodeUpgradeState( diff --git a/vendor/modules.txt b/vendor/modules.txt index cb48f77fbe..4c390f4b5b 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -36,7 +36,7 @@ github.com/NVIDIA/go-nvlib/pkg/pciids # github.com/NVIDIA/k8s-kata-manager v0.2.3 ## explicit; go 1.23.0 github.com/NVIDIA/k8s-kata-manager/api/v1alpha1/config -# github.com/NVIDIA/k8s-operator-libs v0.0.0-20260505175649-fa6a3643c441 +# github.com/NVIDIA/k8s-operator-libs v0.0.0-20260629200812-d720f2557494 ## explicit; go 1.26.0 github.com/NVIDIA/k8s-operator-libs/api/upgrade/v1alpha1 github.com/NVIDIA/k8s-operator-libs/pkg/consts