diff --git a/.github/workflows/kagenti-ci.yml b/.github/workflows/kagenti-ci.yml new file mode 100644 index 0000000..cef7b63 --- /dev/null +++ b/.github/workflows/kagenti-ci.yml @@ -0,0 +1,55 @@ +name: Kagenti CI + +on: + pull_request: + branches: [main, mvp] + push: + branches: [mvp] + +permissions: + contents: read + +jobs: + build: + name: Build + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-go@v5 + with: + go-version-file: go.mod + + - name: Build + run: go build -o openshell-driver-openshift ./cmd/driver/ + + lint: + name: Lint + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-go@v5 + with: + go-version-file: go.mod + + - uses: golangci/golangci-lint-action@v6 + with: + version: latest + args: --timeout=5m + + test: + name: Test + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-go@v5 + with: + go-version-file: go.mod + + - name: Unit tests + run: go test ./internal/driver/ -timeout 30s -v + + - name: gRPC tests + run: go test ./internal/grpctest/ -timeout 30s -v diff --git a/README.md b/README.md index af9cd3e..fc688a3 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,12 @@ -# openshell-driver-openshift +# openshell-driver-openshift (Kagenti Fork) + +> **This is a [Kagenti](https://github.com/kagenti/kagenti) fork of [zanetworker/openshell-driver-openshift](https://github.com/zanetworker/openshell-driver-openshift).** +> +> The `mvp` branch adds namespace flag, tenant labels, scoped RBAC, and dtach session +> persistence for multi-tenant OpenShell deployments. +> See the [epic](https://github.com/kagenti/kagenti/issues/1363) for the full plan. +> +> **Upstream tracking:** `main` is kept in sync with upstream. Fork-specific work happens on `mvp`. An [OpenShell](https://github.com/NVIDIA/OpenShell) compute driver for OpenShift/Kubernetes clusters. Implements the `ComputeDriver` gRPC contract (`compute_driver.proto`) to provision agent sandboxes as `agents.x-k8s.io/v1alpha1/Sandbox` CRDs. diff --git a/cmd/driver/main.go b/cmd/driver/main.go index 126e006..75b3d3a 100644 --- a/cmd/driver/main.go +++ b/cmd/driver/main.go @@ -24,16 +24,24 @@ func main() { cfg := driver.DefaultConfig() flag.StringVar(&cfg.Namespace, "namespace", cfg.Namespace, "Kubernetes namespace where sandboxes are provisioned") + flag.StringVar(&cfg.Tenant, "tenant", cfg.Tenant, + "Tenant name for pod labels (openshell.ai/tenant, kagenti.io/team); defaults to namespace if empty") flag.StringVar(&cfg.SupervisorImage, "supervisor-image", cfg.SupervisorImage, "Container image that contains the supervisor binary") flag.StringVar(&cfg.SupervisorBinaryPath, "supervisor-binary-path", cfg.SupervisorBinaryPath, "Path to the supervisor binary inside the supervisor image") + flag.StringVar(&cfg.DtachBinaryPath, "dtach-binary-path", cfg.DtachBinaryPath, + "Path to the dtach binary inside the supervisor image") flag.StringVar(&cfg.SupervisorMountPath, "supervisor-mount-path", cfg.SupervisorMountPath, "Mount path for the supervisor binary volume in the agent container") flag.StringVar(&cfg.GatewayEndpoint, "gateway-endpoint", cfg.GatewayEndpoint, "Gateway gRPC endpoint for supervisor callback (OPENSHELL_ENDPOINT)") flag.Parse() + if cfg.Tenant == "" { + cfg.Tenant = cfg.Namespace + } + logger := slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{ Level: slog.LevelInfo, })) diff --git a/deploy/gateway-with-driver.yaml b/deploy/gateway-with-driver.yaml index 3bca0d2..cbeeb67 100644 --- a/deploy/gateway-with-driver.yaml +++ b/deploy/gateway-with-driver.yaml @@ -15,10 +15,13 @@ # -n agent-sandbox-system # # 3. Privileged service account for sandbox pods: -# kubectl create serviceaccount openshell-sandbox -n default -# oc adm policy add-scc-to-user privileged -z openshell-sandbox -n default +# kubectl create serviceaccount openshell-sandbox -n ${NAMESPACE} +# oc adm policy add-scc-to-user privileged -z openshell-sandbox -n ${NAMESPACE} # # Usage: +# # Set the target tenant namespace +# export NAMESPACE=team1 +# # # Generate a shared handshake secret (required by gateway) # export HANDSHAKE_SECRET=$(openssl rand -hex 32) # @@ -26,7 +29,7 @@ # envsubst < deploy/gateway-with-driver.yaml | kubectl apply -f - # # # Register the gateway with the CLI -# kubectl port-forward svc/openshell-gateway 8080:8080 & +# kubectl port-forward svc/openshell-gateway 8080:8080 -n ${NAMESPACE} & # openshell gateway add http://localhost:8080 --local # # # Create a provider with your API key @@ -44,26 +47,43 @@ apiVersion: v1 kind: ServiceAccount metadata: name: openshell-gateway - namespace: default + namespace: ${NAMESPACE} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: openshell-gateway + namespace: ${NAMESPACE} +rules: + - apiGroups: ["agents.x-k8s.io"] + resources: ["sandboxes"] + verbs: ["get", "list", "watch", "create", "update", "delete"] + - apiGroups: [""] + resources: ["pods", "events", "persistentvolumeclaims"] + verbs: ["get", "list", "watch"] + - apiGroups: [""] + resources: ["secrets"] + verbs: ["get", "list", "watch", "create"] --- apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding +kind: RoleBinding metadata: name: openshell-gateway + namespace: ${NAMESPACE} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: openshell-gateway subjects: - kind: ServiceAccount name: openshell-gateway - namespace: default -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: cluster-admin + namespace: ${NAMESPACE} --- apiVersion: v1 kind: Service metadata: name: openshell-gateway - namespace: default + namespace: ${NAMESPACE} spec: selector: app: openshell-gateway @@ -76,7 +96,7 @@ apiVersion: apps/v1 kind: Deployment metadata: name: openshell-gateway - namespace: default + namespace: ${NAMESPACE} spec: replicas: 1 selector: @@ -97,11 +117,11 @@ spec: - --socket - /shared/driver.sock - --namespace - - default + - ${NAMESPACE} - --supervisor-image - quay.io/azaalouk/openshell-supervisor:latest - --gateway-endpoint - - "http://openshell-gateway.default.svc.cluster.local:8080" + - "http://openshell-gateway.${NAMESPACE}.svc.cluster.local:8080" volumeMounts: - name: shared-socket mountPath: /shared @@ -119,11 +139,11 @@ spec: - --compute-driver-socket - /shared/driver.sock - --sandbox-namespace - - default + - ${NAMESPACE} - --sandbox-image - quay.io/azaalouk/demo-sandbox-claude:latest - --grpc-endpoint - - "http://openshell-gateway.default.svc.cluster.local:8080" + - "http://openshell-gateway.${NAMESPACE}.svc.cluster.local:8080" - --ssh-handshake-secret - "${HANDSHAKE_SECRET}" - --log-level diff --git a/internal/driver/config.go b/internal/driver/config.go index 3e4fa37..757f819 100644 --- a/internal/driver/config.go +++ b/internal/driver/config.go @@ -2,8 +2,10 @@ package driver type Config struct { Namespace string + Tenant string // openshell.ai/tenant and kagenti.io/team label value; defaults to Namespace if empty SupervisorImage string SupervisorBinaryPath string + DtachBinaryPath string SupervisorMountPath string GatewayEndpoint string } @@ -13,6 +15,7 @@ func DefaultConfig() Config { Namespace: "openshell-system", SupervisorImage: "quay.io/azaalouk/openshell-supervisor:latest", SupervisorBinaryPath: "/usr/local/bin/openshell-sandbox", + DtachBinaryPath: "/usr/local/bin/dtach", SupervisorMountPath: "/opt/openshell/bin", } } diff --git a/internal/driver/config_test.go b/internal/driver/config_test.go index 2cc8d83..5283603 100644 --- a/internal/driver/config_test.go +++ b/internal/driver/config_test.go @@ -13,6 +13,7 @@ func TestDefaultConfig(t *testing.T) { {"Namespace", cfg.Namespace, "openshell-system"}, {"SupervisorImage", cfg.SupervisorImage, "quay.io/azaalouk/openshell-supervisor:latest"}, {"SupervisorBinaryPath", cfg.SupervisorBinaryPath, "/usr/local/bin/openshell-sandbox"}, + {"DtachBinaryPath", cfg.DtachBinaryPath, "/usr/local/bin/dtach"}, {"SupervisorMountPath", cfg.SupervisorMountPath, "/opt/openshell/bin"}, } diff --git a/internal/driver/provisioner.go b/internal/driver/provisioner.go index 8a94d79..3ad1598 100644 --- a/internal/driver/provisioner.go +++ b/internal/driver/provisioner.go @@ -22,9 +22,11 @@ var sandboxGVR = schema.GroupVersionResource{ } const ( - labelSandboxID = "openshell.ai/sandbox-id" - labelManagedBy = "openshell.ai/managed-by" - labelKagenti = "kagenti.io/type" + labelSandboxID = "openshell.ai/sandbox-id" + labelManagedBy = "openshell.ai/managed-by" + labelKagenti = "kagenti.io/type" + labelTenant = "openshell.ai/tenant" + labelKagentiTeam = "kagenti.io/team" ) // K8sProvisioner implements SandboxProvisioner using the Kubernetes API. It @@ -76,6 +78,10 @@ func (p *K8sProvisioner) Create(ctx context.Context, sb *pb.DriverSandbox) error labelManagedBy: "openshell", labelKagenti: "agent", }) + if p.cfg.Tenant != "" { + labels[labelTenant] = p.cfg.Tenant + labels[labelKagentiTeam] = p.cfg.Tenant + } obj := &unstructured.Unstructured{ Object: map[string]interface{}{ @@ -223,11 +229,16 @@ func (p *K8sProvisioner) buildSandboxSpec(sb *pb.DriverSandbox) map[string]inter spec := sb.GetSpec() tmpl := spec.GetTemplate() - // Supervisor init container copies the binary into the shared volume. + // Supervisor init container copies both the supervisor and dtach binaries into the shared volume. initContainer := map[string]interface{}{ - "name": "supervisor-init", - "image": p.cfg.SupervisorImage, - "command": []interface{}{"cp", p.cfg.SupervisorBinaryPath, p.cfg.SupervisorMountPath + "/"}, + "name": "supervisor-init", + "image": p.cfg.SupervisorImage, + "command": []interface{}{ + "sh", "-c", + fmt.Sprintf("cp %s %s/ && cp %s %s/", + p.cfg.SupervisorBinaryPath, p.cfg.SupervisorMountPath, + p.cfg.DtachBinaryPath, p.cfg.SupervisorMountPath), + }, "volumeMounts": []interface{}{ map[string]interface{}{ "name": "supervisor-bin", @@ -282,14 +293,20 @@ func (p *K8sProvisioner) buildSandboxSpec(sb *pb.DriverSandbox) map[string]inter } } + podLabels := mergeMaps(tmpl.GetLabels(), map[string]string{ + labelSandboxID: sb.GetId(), + labelManagedBy: "openshell", + labelKagenti: "agent", + }) + if p.cfg.Tenant != "" { + podLabels[labelTenant] = p.cfg.Tenant + podLabels[labelKagentiTeam] = p.cfg.Tenant + } + return map[string]interface{}{ "podTemplate": map[string]interface{}{ "metadata": map[string]interface{}{ - "labels": mergeMaps(tmpl.GetLabels(), map[string]string{ - labelSandboxID: sb.GetId(), - labelManagedBy: "openshell", - labelKagenti: "agent", - }), + "labels": podLabels, }, "spec": podSpec, }, diff --git a/internal/driver/provisioner_test.go b/internal/driver/provisioner_test.go index 4c89977..fa1f8ba 100644 --- a/internal/driver/provisioner_test.go +++ b/internal/driver/provisioner_test.go @@ -4,6 +4,7 @@ import ( "context" "log/slog" "os" + "strings" "testing" pb "github.com/zanetworker/openshell-driver-openshift/gen/computev1" @@ -185,13 +186,17 @@ func TestBuildSandboxSpec_SupervisorInitContainer(t *testing.T) { t.Errorf("expected image %s, got %v", cfg.SupervisorImage, initC["image"]) } - // Verify command copies supervisor binary. + // Verify command copies both supervisor and dtach binaries via sh -c. cmd := initC["command"].([]interface{}) - if len(cmd) != 3 || cmd[0] != "cp" { - t.Errorf("expected cp command, got %v", cmd) + if len(cmd) != 3 || cmd[0] != "sh" || cmd[1] != "-c" { + t.Errorf("expected sh -c command, got %v", cmd) } - if cmd[1] != cfg.SupervisorBinaryPath { - t.Errorf("expected source %s, got %v", cfg.SupervisorBinaryPath, cmd[1]) + script := cmd[2].(string) + if !strings.Contains(script, cfg.SupervisorBinaryPath) { + t.Errorf("expected script to contain supervisor path %s, got %s", cfg.SupervisorBinaryPath, script) + } + if !strings.Contains(script, cfg.DtachBinaryPath) { + t.Errorf("expected script to contain dtach path %s, got %s", cfg.DtachBinaryPath, script) } // Verify agent container runs supervisor. @@ -292,6 +297,45 @@ func TestBuildSandboxSpec_Labels(t *testing.T) { if labels[labelManagedBy] != "openshell" { t.Errorf("expected managed-by label, got %v", labels[labelManagedBy]) } + // No tenant configured in testConfig() — tenant labels must be absent. + if _, ok := labels[labelTenant]; ok { + t.Errorf("expected no %s label when tenant is empty, got %v", labelTenant, labels[labelTenant]) + } +} + +func TestBuildSandboxSpec_TenantLabels(t *testing.T) { + cfg := testConfig() + cfg.Tenant = "team1" + + logger := testLogger() + scheme := runtime.NewScheme() + dynClient := dynamicfake.NewSimpleDynamicClientWithCustomListKinds( + scheme, + map[schema.GroupVersionResource]string{sandboxGVR: "SandboxList"}, + ) + clientset := kubefake.NewSimpleClientset() + p := NewK8sProvisioner(dynClient, clientset, cfg, logger) + + sb := &pb.DriverSandbox{ + Id: "sb-tenant", + Spec: &pb.DriverSandboxSpec{ + Template: &pb.DriverSandboxTemplate{ + Image: "img:latest", + }, + }, + } + + spec := p.buildSandboxSpec(sb) + podTemplate := spec["podTemplate"].(map[string]interface{}) + meta := podTemplate["metadata"].(map[string]interface{}) + podLabels := meta["labels"].(map[string]interface{}) + + if podLabels[labelTenant] != "team1" { + t.Errorf("expected %s=team1, got %v", labelTenant, podLabels[labelTenant]) + } + if podLabels[labelKagentiTeam] != "team1" { + t.Errorf("expected %s=team1, got %v", labelKagentiTeam, podLabels[labelKagentiTeam]) + } } func TestNewWithDeps(t *testing.T) { diff --git a/internal/grpctest/contract_test.go b/internal/grpctest/contract_test.go index 54a7ff3..50d307f 100644 --- a/internal/grpctest/contract_test.go +++ b/internal/grpctest/contract_test.go @@ -72,9 +72,7 @@ func startTestServer(t *testing.T) (pb.ComputeDriverClient, func()) { pb.RegisterComputeDriverServer(srv, drv) go func() { - if err := srv.Serve(lis); err != nil { - // Serve returns an error after GracefulStop; ignore. - } + _ = srv.Serve(lis) // returns error after GracefulStop }() conn, err := grpc.NewClient(