Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 51 additions & 0 deletions controllers/active_config.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/

package controllers

import (
"context"
"fmt"

"sigs.k8s.io/controller-runtime/pkg/client"

gpuv1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1"
nvidiav1alpha1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1alpha1"
)

// resolveActiveConfig determines which cluster-wide configuration drives the operands: a
// ClusterPolicy takes precedence, otherwise the DRA-based GPUClusterConfig. The two CRs are
// mutually exclusive, so at most one of the returned values is non-nil; both nil means
// neither CR exists.
func resolveActiveConfig(ctx context.Context, c client.Client) (*gpuv1.ClusterPolicy, *nvidiav1alpha1.GPUClusterConfig, error) {
clusterPolicies := &gpuv1.ClusterPolicyList{}
if err := c.List(ctx, clusterPolicies); err != nil {
return nil, nil, fmt.Errorf("failed to list ClusterPolicy: %w", err)
}
if len(clusterPolicies.Items) > 0 {
return &clusterPolicies.Items[0], nil, nil
}

gpuClusterConfigs := &nvidiav1alpha1.GPUClusterConfigList{}
if err := c.List(ctx, gpuClusterConfigs); err != nil {
return nil, nil, fmt.Errorf("failed to list GPUClusterConfig: %w", err)
}
if len(gpuClusterConfigs.Items) > 0 {
return nil, &gpuClusterConfigs.Items[0], nil
}

return nil, nil, nil
}
14 changes: 8 additions & 6 deletions controllers/gpuclusterconfig_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ import (
"sigs.k8s.io/controller-runtime/pkg/reconcile"
"sigs.k8s.io/controller-runtime/pkg/source"

gpuv1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1"
nvidiav1alpha1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1alpha1"
"github.com/NVIDIA/gpu-operator/controllers/clusterinfo"
"github.com/NVIDIA/gpu-operator/internal/conditions"
Expand Down Expand Up @@ -82,11 +81,11 @@ func (r *GPUClusterConfigReconciler) Reconcile(ctx context.Context, req ctrl.Req

// GPUClusterConfig (DRA path) is mutually exclusive with ClusterPolicy: if one
// exists, yield to it rather than deploying the DRA stack alongside it.
clusterPolicies := &gpuv1.ClusterPolicyList{}
if err := r.List(ctx, clusterPolicies); err != nil {
return ctrl.Result{}, fmt.Errorf("error listing ClusterPolicies: %w", err)
clusterPolicy, _, err := resolveActiveConfig(ctx, r.Client)
if err != nil {
return ctrl.Result{}, err
}
if len(clusterPolicies.Items) > 0 {
if clusterPolicy != nil {
logger.V(consts.LogLevelWarning).Info("ClusterPolicy present, skipping mutually exclusive GPUClusterConfig")
if err := r.updateCrStatus(ctx, instance, nvidiav1alpha1.Disabled); err != nil {
return ctrl.Result{}, err
Expand All @@ -95,7 +94,10 @@ func (r *GPUClusterConfigReconciler) Reconcile(ctx context.Context, req ctrl.Req
if condErr := r.conditionUpdater.SetConditionsError(ctx, instance, conditions.ReconcileFailed, msg); condErr != nil {
logger.Error(condErr, "failed to set condition")
}
return ctrl.Result{}, nil
// Requeue so the ClusterPolicy's deletion is noticed and the instance
// recovers; nothing watches ClusterPolicy here, mirroring the ready-path
// resync below.
return ctrl.Result{RequeueAfter: time.Minute}, nil
}

// Singleton, first-wins (mirroring ClusterPolicy): the first instance to reconcile
Expand Down
21 changes: 16 additions & 5 deletions controllers/gpuclusterconfig_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,14 +61,17 @@ func newGPUClusterConfigReconciler(t *testing.T, objs ...client.Object) (*GPUClu
}

// fakeStateManager returns canned SyncState results so the controller tests don't load
// real manifests. GetWatchSources is promoted from the embedded (nil) interface and is
// never called here — only SetupWithManager calls it, which these tests skip.
// real manifests. It records the last info catalog passed to SyncState so tests can
// assert on its entries. GetWatchSources is promoted from the embedded (nil) interface
// and is never called here; only SetupWithManager calls it, which these tests skip.
type fakeStateManager struct {
state.Manager
results state.Results
results state.Results
lastCatalog state.InfoCatalog
}

func (f *fakeStateManager) SyncState(_ context.Context, _ interface{}, _ state.InfoCatalog) state.Results {
func (f *fakeStateManager) SyncState(_ context.Context, _ interface{}, catalog state.InfoCatalog) state.Results {
f.lastCatalog = catalog
return f.results
}

Expand Down Expand Up @@ -114,14 +117,22 @@ func TestGPUClusterConfigReconcileNotFound(t *testing.T) {

// A ClusterPolicy in the cluster disables the GPUClusterConfig: the two paths are
// mutually exclusive, so the DRA stack is not deployed alongside ClusterPolicy.
// The result requeues so the instance recovers once the ClusterPolicy is removed.
func TestGPUClusterConfigDisabledByClusterPolicy(t *testing.T) {
cfg := &nvidiav1alpha1.GPUClusterConfig{ObjectMeta: metav1.ObjectMeta{Name: "config"}}
cp := &gpuv1.ClusterPolicy{ObjectMeta: metav1.ObjectMeta{Name: "cluster-policy"}}
r, c := newGPUClusterConfigReconciler(t, cfg, cp)

gccReconcile(t, r, cfg.Name)
res, err := r.Reconcile(t.Context(), gccRequest(cfg.Name))
require.NoError(t, err)
require.Positive(t, res.RequeueAfter, "disabled instance must requeue to detect ClusterPolicy removal")

require.Equal(t, nvidiav1alpha1.Disabled, gccState(t, c, cfg.Name))

// Removing the ClusterPolicy lets the next reconcile recover the instance.
require.NoError(t, c.Delete(t.Context(), cp))
gccReconcile(t, r, cfg.Name)
require.Equal(t, nvidiav1alpha1.Ready, gccState(t, c, cfg.Name))
}

// First-reconciled wins (mirroring ClusterPolicy): whichever instance reconciles first
Expand Down
97 changes: 73 additions & 24 deletions controllers/nodelabeling_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,15 @@ type NodeLabelingReconciler struct {
}

// nodeLabelingController holds per-reconcile state so that helper methods don't need to
// re-receive that state as arguments.
// re-receive that state as arguments. Exactly one of clusterPolicy or gpuClusterConfig is
// set, selecting the ClusterPolicy stack (the default) or the DRA-based GPUClusterConfig
// stack; the two CRs are mutually exclusive.
type nodeLabelingController struct {
client client.Client
namespace string
clusterPolicy *gpuv1.ClusterPolicy
logger logr.Logger
client client.Client
namespace string
clusterPolicy *gpuv1.ClusterPolicy
gpuClusterConfig *nvidiav1alpha1.GPUClusterConfig
logger logr.Logger
}

// +kubebuilder:rbac:groups="",resources=nodes,verbs=get;list;watch;update;patch
Expand All @@ -67,34 +70,34 @@ type nodeLabelingController struct {
func (r *NodeLabelingReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
r.Log.Info("Reconciling node labels")

clusterPolicyList := &gpuv1.ClusterPolicyList{}
if err := r.List(ctx, clusterPolicyList); err != nil {
return reconcile.Result{}, fmt.Errorf("failed to list ClusterPolicy: %w", err)
// Default to the ClusterPolicy stack; fall back to the GPUClusterConfig (DRA) stack when
// no ClusterPolicy exists. Neither existing means there is nothing to label.
clusterPolicy, gpuClusterConfig, err := resolveActiveConfig(ctx, r.Client)
if err != nil {
return reconcile.Result{}, err
}

// (cdesiniotis) Return early if a ClusterPolicy CR does not exist.
// This means that nodes will not get labeled unless a ClusterPolicy
// CR has been created. This may be relaxed in the future when the
// NVIDIA DRA Driver for GPUs is integrated with the GPU Operator
// and new CRDs are introduced.
if len(clusterPolicyList.Items) == 0 {
r.Log.Info("No ClusterPolicy CR exists, skipping node labeling")
if clusterPolicy == nil && gpuClusterConfig == nil {
r.Log.Info("No ClusterPolicy or GPUClusterConfig CR exists, skipping node labeling")
return reconcile.Result{}, nil
}
clusterPolicy := &clusterPolicyList.Items[0]

nlc := &nodeLabelingController{
client: r.Client,
namespace: r.Namespace,
clusterPolicy: clusterPolicy,
logger: r.Log,
client: r.Client,
namespace: r.Namespace,
clusterPolicy: clusterPolicy,
gpuClusterConfig: gpuClusterConfig,
logger: r.Log,
}

if err := nlc.labelGPUNodes(ctx); err != nil {
return reconcile.Result{}, err
}

if nlc.clusterPolicy.Spec.Driver.UseNvidiaDriverCRDType() {
// Route each GPU node to its NVIDIADriver CR. Skipping this leaves the NVIDIADriver controller owning no nodes, and it
// then removes the driver DaemonSet.
usesNvidiaDriverCRD := nlc.gpuClusterConfig != nil ||
(nlc.clusterPolicy != nil && nlc.clusterPolicy.Spec.Driver.UseNvidiaDriverCRDType())
if usesNvidiaDriverCRD {
if _, err := nvidiadriverutil.AssignOwners(ctx, r.Client); err != nil {
return reconcile.Result{}, fmt.Errorf("failed to assign NVIDIADriver owners to nodes: %w", err)
}
Expand All @@ -103,8 +106,11 @@ func (r *NodeLabelingReconciler) Reconcile(ctx context.Context, req ctrl.Request
}
}

if err := nlc.applyDriverAutoUpgradeAnnotation(ctx); err != nil {
return reconcile.Result{}, err
// The driver auto-upgrade annotation is derived from ClusterPolicy spec.
if nlc.clusterPolicy != nil {
if err := nlc.applyDriverAutoUpgradeAnnotation(ctx); err != nil {
return reconcile.Result{}, err
}
}

return reconcile.Result{}, nil
Expand Down Expand Up @@ -160,6 +166,10 @@ func (nlc *nodeLabelingController) reconcileCommonGPULabel(labels map[string]str
// appropriate. If the node does not have the common GPU label, all state labels are removed.
// Returns true if labels were modified.
func (nlc *nodeLabelingController) updateGPUStateLabels(labels map[string]string, nodeName string) bool {
if nlc.gpuClusterConfig != nil {
return updateGPUClusterConfigStateLabels(labels)
}

if !hasCommonGPULabel(labels) {
return removeAllGPUStateLabels(labels)
}
Expand Down Expand Up @@ -200,6 +210,31 @@ func (nlc *nodeLabelingController) updateGPUStateLabels(labels map[string]string
return modified
}

// updateGPUClusterConfigStateLabels is the GPUClusterConfig analogue of the ClusterPolicy
// gpuWorkloadConfiguration state-label logic: it sets the DRA operand deploy labels on a GPU
// node and removes them once the GPUs are gone. Like the ClusterPolicy path it honors an
// existing value (set only when absent) so the k8s-driver-manager can pause an operand by
// flipping its label to drain it off a node during a driver reload. Returns true if modified.
func updateGPUClusterConfigStateLabels(labels map[string]string) bool {
modified := false
if !hasCommonGPULabel(labels) {
for key := range gpuClusterConfigStateLabels {
if _, ok := labels[key]; ok {
delete(labels, key)
modified = true
}
}
return modified
}
for key, value := range gpuClusterConfigStateLabels {
if _, ok := labels[key]; !ok {
labels[key] = value
modified = true
}
}
return modified
}

func (nlc *nodeLabelingController) setDriverAutoUpgradeAnnotation(ctx context.Context, node *corev1.Node, autoUpgradeEnabled bool) error {
annotationValue, annotationExists := node.Annotations[driverAutoUpgradeAnnotationKey]
updateRequired := false
Expand Down Expand Up @@ -383,6 +418,20 @@ func (r *NodeLabelingReconciler) SetupWithManager(ctx context.Context, mgr ctrl.
return fmt.Errorf("error watching ClusterPolicy: %w", err)
}

// Watch GPUClusterConfig so GPU nodes are (re)labeled for the DRA stack as the CR is
// created or removed, mirroring the ClusterPolicy watch above.
gpuClusterConfigMapFn := func(ctx context.Context, gcc *nvidiav1alpha1.GPUClusterConfig) []reconcile.Request {
return mapToSingleton(ctx, gcc)
}
if err := c.Watch(source.Kind(
mgr.GetCache(),
&nvidiav1alpha1.GPUClusterConfig{},
handler.TypedEnqueueRequestsFromMapFunc(gpuClusterConfigMapFn),
predicate.TypedGenerationChangedPredicate[*nvidiav1alpha1.GPUClusterConfig]{},
)); err != nil {
return fmt.Errorf("error watching GPUClusterConfig: %w", err)
}

// Watch NVIDIADriver including delete events so owner labels are cleaned up promptly.
nvidiaDriverMapFn := func(ctx context.Context, nd *nvidiav1alpha1.NVIDIADriver) []reconcile.Request {
return mapToSingleton(ctx, nd)
Expand Down
Loading
Loading