diff --git a/legacy/gcp-pd-csi-driver-operator/pkg/operator/starter.go b/legacy/gcp-pd-csi-driver-operator/pkg/operator/starter.go index 01742570a..30630e433 100644 --- a/legacy/gcp-pd-csi-driver-operator/pkg/operator/starter.go +++ b/legacy/gcp-pd-csi-driver-operator/pkg/operator/starter.go @@ -12,6 +12,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/dynamic" kubeclient "k8s.io/client-go/kubernetes" "k8s.io/client-go/rest" @@ -60,6 +61,10 @@ const ( ocpDefaultLabelFmt = "kubernetes-io-cluster-%s=owned" operatorImageVersionEnvVarName = "OPERATOR_IMAGE_VERSION" + + // gcpDedicatedRegionPrefix is the prefix for GCP Dedicated regions. + // GCP Dedicated regions start with "u-" (e.g. "u-germany-northeast1"). + gcpDedicatedRegionPrefix = "u-" ) func RunOperator(ctx context.Context, controllerConfig *controllercmd.ControllerContext) error { @@ -243,23 +248,23 @@ func RunOperator(ctx context.Context, controllerConfig *controllercmd.Controller dynamicClient, assets.ReadFile, "servicemonitor.yaml", - ).WithStorageClassController( + ) + + storageClassFiles, err := getStorageClassFiles(ctx, configClient) + if err != nil { + return err + } + + csiControllerSet = csiControllerSet.WithStorageClassController( "GCPPDDriverStorageClassController", assets.ReadFile, - []string{ - "storageclass.yaml", - "storageclass_ssd.yaml", - }, + storageClassFiles, kubeClient, kubeInformersForNamespaces.InformersFor(""), operatorInformers, getKMSKeyHook(operatorInformers.Operator().V1().ClusterCSIDrivers().Lister()), ) - if err != nil { - return err - } - klog.Info("Starting the informers") go kubeInformersForNamespaces.Start(ctx.Done()) go dynamicInformers.Start(ctx.Done()) @@ -274,6 +279,49 @@ func RunOperator(ctx context.Context, controllerConfig *controllercmd.Controller return nil } +// getStorageClassFiles returns the list of StorageClass asset files to use, +// based on whether the cluster runs on GCP Dedicated. +// It retries for up to 1 minute to fetch the Infrastructure CR, because during +// early cluster installation the CR may not exist yet. +// On GCP Dedicated, only hyperdisk-balanced is supported. +// On regular GCP, standard-csi and ssd-csi are used. +func getStorageClassFiles(ctx context.Context, configClient configclient.Interface) ([]string, error) { + regularFiles := []string{ + "storageclass.yaml", + "storageclass_ssd.yaml", + } + gcpDedicatedFiles := []string{ + "storageclass_hyperdisk_balanced.yaml", + } + + var region string + var lastErr error + err := wait.PollUntilContextTimeout(ctx, 5*time.Second, 1*time.Minute, true, func(ctx context.Context) (bool, error) { + infra, err := configClient.ConfigV1().Infrastructures().Get(ctx, globalInfrastructureName, metav1.GetOptions{}) + if err != nil { + lastErr = err + klog.V(4).Infof("Failed to get Infrastructure CR, will retry: %v", err) + return false, nil + } + if infra.Status.PlatformStatus == nil || infra.Status.PlatformStatus.GCP == nil { + klog.V(4).Infof("Infrastructure CR has no GCP PlatformStatus, assuming regular GCP") + return true, nil + } + region = infra.Status.PlatformStatus.GCP.Region + return true, nil + }) + if err != nil { + return nil, fmt.Errorf("failed to get Infrastructure CR: %w", lastErr) + } + + if strings.HasPrefix(region, gcpDedicatedRegionPrefix) { + klog.Infof("GCP Dedicated detected (region %q), using hyperdisk-balanced StorageClass", region) + return gcpDedicatedFiles, nil + } + klog.Infof("Regular GCP detected (region %q), using standard StorageClasses", region) + return regularFiles, nil +} + // withCustomLabels adds labels from Infrastructure.Status.PlatformStatus.GCP.ResourceLabels to the // driver command line as --extra-labels==,=,... func withCustomLabels(infraLister configlisters.InfrastructureLister) dc.DeploymentHookFunc { diff --git a/legacy/gcp-pd-csi-driver-operator/pkg/operator/storageclass_test.go b/legacy/gcp-pd-csi-driver-operator/pkg/operator/storageclass_test.go new file mode 100644 index 000000000..e6a193e42 --- /dev/null +++ b/legacy/gcp-pd-csi-driver-operator/pkg/operator/storageclass_test.go @@ -0,0 +1,129 @@ +package operator + +import ( + "context" + "testing" + "time" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + v1 "github.com/openshift/api/config/v1" + fakeconfig "github.com/openshift/client-go/config/clientset/versioned/fake" +) + +func TestGetStorageClassFiles(t *testing.T) { + tests := []struct { + name string + infra *v1.Infrastructure + expectedFiles []string + }{ + { + name: "regular GCP region", + infra: &v1.Infrastructure{ + ObjectMeta: metav1.ObjectMeta{Name: "cluster"}, + Status: v1.InfrastructureStatus{ + PlatformStatus: &v1.PlatformStatus{ + GCP: &v1.GCPPlatformStatus{ + Region: "us-central1", + }, + }, + }, + }, + expectedFiles: []string{"storageclass.yaml", "storageclass_ssd.yaml"}, + }, + { + name: "regular GCP region europe", + infra: &v1.Infrastructure{ + ObjectMeta: metav1.ObjectMeta{Name: "cluster"}, + Status: v1.InfrastructureStatus{ + PlatformStatus: &v1.PlatformStatus{ + GCP: &v1.GCPPlatformStatus{ + Region: "europe-west1", + }, + }, + }, + }, + expectedFiles: []string{"storageclass.yaml", "storageclass_ssd.yaml"}, + }, + { + name: "GCP Dedicated region", + infra: &v1.Infrastructure{ + ObjectMeta: metav1.ObjectMeta{Name: "cluster"}, + Status: v1.InfrastructureStatus{ + PlatformStatus: &v1.PlatformStatus{ + GCP: &v1.GCPPlatformStatus{ + Region: "u-germany-northeast1", + }, + }, + }, + }, + expectedFiles: []string{"storageclass_hyperdisk_balanced.yaml"}, + }, + { + name: "empty region defaults to regular GCP", + infra: &v1.Infrastructure{ + ObjectMeta: metav1.ObjectMeta{Name: "cluster"}, + Status: v1.InfrastructureStatus{ + PlatformStatus: &v1.PlatformStatus{ + GCP: &v1.GCPPlatformStatus{ + Region: "", + }, + }, + }, + }, + expectedFiles: []string{"storageclass.yaml", "storageclass_ssd.yaml"}, + }, + { + name: "nil GCP status defaults to regular GCP", + infra: &v1.Infrastructure{ + ObjectMeta: metav1.ObjectMeta{Name: "cluster"}, + Status: v1.InfrastructureStatus{ + PlatformStatus: &v1.PlatformStatus{}, + }, + }, + expectedFiles: []string{"storageclass.yaml", "storageclass_ssd.yaml"}, + }, + { + name: "nil PlatformStatus defaults to regular GCP", + infra: &v1.Infrastructure{ + ObjectMeta: metav1.ObjectMeta{Name: "cluster"}, + Status: v1.InfrastructureStatus{}, + }, + expectedFiles: []string{"storageclass.yaml", "storageclass_ssd.yaml"}, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + configClient := fakeconfig.NewSimpleClientset(test.infra) + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + files, err := getStorageClassFiles(ctx, configClient) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if len(files) != len(test.expectedFiles) { + t.Fatalf("expected %d files, got %d: %v", len(test.expectedFiles), len(files), files) + } + for i, f := range files { + if f != test.expectedFiles[i] { + t.Errorf("file[%d]: expected %q, got %q", i, test.expectedFiles[i], f) + } + } + }) + } +} + +func TestGetStorageClassFilesNoInfrastructure(t *testing.T) { + configClient := fakeconfig.NewSimpleClientset() + // Use a short timeout so the retry loop finishes quickly in tests. + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + _, err := getStorageClassFiles(ctx, configClient) + if err == nil { + t.Fatal("expected error when Infrastructure CR does not exist, got nil") + } +}