Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 55 additions & 0 deletions .github/workflows/kagenti-ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
name: Kagenti CI

on:
pull_request:
branches: [main, mvp]
push:
branches: [mvp]

permissions:
contents: read

jobs:
build:
name: Build
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

- uses: actions/setup-go@v5
with:
go-version-file: go.mod

- name: Build
run: go build -o openshell-driver-openshift ./cmd/driver/

lint:
name: Lint
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

- uses: actions/setup-go@v5
with:
go-version-file: go.mod

- uses: golangci/golangci-lint-action@v6
with:
version: latest
args: --timeout=5m

test:
name: Test
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

- uses: actions/setup-go@v5
with:
go-version-file: go.mod

- name: Unit tests
run: go test ./internal/driver/ -timeout 30s -v

- name: gRPC tests
run: go test ./internal/grpctest/ -timeout 30s -v
10 changes: 9 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,12 @@
# openshell-driver-openshift
# openshell-driver-openshift (Kagenti Fork)

> **This is a [Kagenti](https://github.com/kagenti/kagenti) fork of [zanetworker/openshell-driver-openshift](https://github.com/zanetworker/openshell-driver-openshift).**
>
> The `mvp` branch adds namespace flag, tenant labels, scoped RBAC, and dtach session
> persistence for multi-tenant OpenShell deployments.
> See the [epic](https://github.com/kagenti/kagenti/issues/1363) for the full plan.
>
> **Upstream tracking:** `main` is kept in sync with upstream. Fork-specific work happens on `mvp`.

An [OpenShell](https://github.com/NVIDIA/OpenShell) compute driver for OpenShift/Kubernetes clusters. Implements the `ComputeDriver` gRPC contract (`compute_driver.proto`) to provision agent sandboxes as `agents.x-k8s.io/v1alpha1/Sandbox` CRDs.

Expand Down
8 changes: 8 additions & 0 deletions cmd/driver/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,24 @@ func main() {
cfg := driver.DefaultConfig()
flag.StringVar(&cfg.Namespace, "namespace", cfg.Namespace,
"Kubernetes namespace where sandboxes are provisioned")
flag.StringVar(&cfg.Tenant, "tenant", cfg.Tenant,
"Tenant name for pod labels (openshell.ai/tenant, kagenti.io/team); defaults to namespace if empty")
flag.StringVar(&cfg.SupervisorImage, "supervisor-image", cfg.SupervisorImage,
"Container image that contains the supervisor binary")
flag.StringVar(&cfg.SupervisorBinaryPath, "supervisor-binary-path", cfg.SupervisorBinaryPath,
"Path to the supervisor binary inside the supervisor image")
flag.StringVar(&cfg.DtachBinaryPath, "dtach-binary-path", cfg.DtachBinaryPath,
"Path to the dtach binary inside the supervisor image")
flag.StringVar(&cfg.SupervisorMountPath, "supervisor-mount-path", cfg.SupervisorMountPath,
"Mount path for the supervisor binary volume in the agent container")
flag.StringVar(&cfg.GatewayEndpoint, "gateway-endpoint", cfg.GatewayEndpoint,
"Gateway gRPC endpoint for supervisor callback (OPENSHELL_ENDPOINT)")
flag.Parse()

if cfg.Tenant == "" {
cfg.Tenant = cfg.Namespace
}

logger := slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{
Level: slog.LevelInfo,
}))
Expand Down
52 changes: 36 additions & 16 deletions deploy/gateway-with-driver.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,21 @@
# -n agent-sandbox-system
#
# 3. Privileged service account for sandbox pods:
# kubectl create serviceaccount openshell-sandbox -n default
# oc adm policy add-scc-to-user privileged -z openshell-sandbox -n default
# kubectl create serviceaccount openshell-sandbox -n ${NAMESPACE}
# oc adm policy add-scc-to-user privileged -z openshell-sandbox -n ${NAMESPACE}
#
# Usage:
# # Set the target tenant namespace
# export NAMESPACE=team1
#
# # Generate a shared handshake secret (required by gateway)
# export HANDSHAKE_SECRET=$(openssl rand -hex 32)
#
# # Substitute and apply
# envsubst < deploy/gateway-with-driver.yaml | kubectl apply -f -
#
# # Register the gateway with the CLI
# kubectl port-forward svc/openshell-gateway 8080:8080 &
# kubectl port-forward svc/openshell-gateway 8080:8080 -n ${NAMESPACE} &
# openshell gateway add http://localhost:8080 --local
#
# # Create a provider with your API key
Expand All @@ -44,26 +47,43 @@ apiVersion: v1
kind: ServiceAccount
metadata:
name: openshell-gateway
namespace: default
namespace: ${NAMESPACE}
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: openshell-gateway
namespace: ${NAMESPACE}
rules:
- apiGroups: ["agents.x-k8s.io"]
resources: ["sandboxes"]
verbs: ["get", "list", "watch", "create", "update", "delete"]
- apiGroups: [""]
resources: ["pods", "events", "persistentvolumeclaims"]
verbs: ["get", "list", "watch"]
- apiGroups: [""]
resources: ["secrets"]
verbs: ["get", "list", "watch", "create"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
kind: RoleBinding
metadata:
name: openshell-gateway
namespace: ${NAMESPACE}
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: openshell-gateway
subjects:
- kind: ServiceAccount
name: openshell-gateway
namespace: default
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: cluster-admin
namespace: ${NAMESPACE}
---
apiVersion: v1
kind: Service
metadata:
name: openshell-gateway
namespace: default
namespace: ${NAMESPACE}
spec:
selector:
app: openshell-gateway
Expand All @@ -76,7 +96,7 @@ apiVersion: apps/v1
kind: Deployment
metadata:
name: openshell-gateway
namespace: default
namespace: ${NAMESPACE}
spec:
replicas: 1
selector:
Expand All @@ -97,11 +117,11 @@ spec:
- --socket
- /shared/driver.sock
- --namespace
- default
- ${NAMESPACE}
- --supervisor-image
- quay.io/azaalouk/openshell-supervisor:latest
- --gateway-endpoint
- "http://openshell-gateway.default.svc.cluster.local:8080"
- "http://openshell-gateway.${NAMESPACE}.svc.cluster.local:8080"
volumeMounts:
- name: shared-socket
mountPath: /shared
Expand All @@ -119,11 +139,11 @@ spec:
- --compute-driver-socket
- /shared/driver.sock
- --sandbox-namespace
- default
- ${NAMESPACE}
- --sandbox-image
- quay.io/azaalouk/demo-sandbox-claude:latest
- --grpc-endpoint
- "http://openshell-gateway.default.svc.cluster.local:8080"
- "http://openshell-gateway.${NAMESPACE}.svc.cluster.local:8080"
- --ssh-handshake-secret
- "${HANDSHAKE_SECRET}"
- --log-level
Expand Down
3 changes: 3 additions & 0 deletions internal/driver/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@ package driver

type Config struct {
Namespace string
Tenant string // openshell.ai/tenant and kagenti.io/team label value; defaults to Namespace if empty
SupervisorImage string
SupervisorBinaryPath string
DtachBinaryPath string
SupervisorMountPath string
GatewayEndpoint string
}
Expand All @@ -13,6 +15,7 @@ func DefaultConfig() Config {
Namespace: "openshell-system",
SupervisorImage: "quay.io/azaalouk/openshell-supervisor:latest",
SupervisorBinaryPath: "/usr/local/bin/openshell-sandbox",
DtachBinaryPath: "/usr/local/bin/dtach",
SupervisorMountPath: "/opt/openshell/bin",
}
}
1 change: 1 addition & 0 deletions internal/driver/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ func TestDefaultConfig(t *testing.T) {
{"Namespace", cfg.Namespace, "openshell-system"},
{"SupervisorImage", cfg.SupervisorImage, "quay.io/azaalouk/openshell-supervisor:latest"},
{"SupervisorBinaryPath", cfg.SupervisorBinaryPath, "/usr/local/bin/openshell-sandbox"},
{"DtachBinaryPath", cfg.DtachBinaryPath, "/usr/local/bin/dtach"},
{"SupervisorMountPath", cfg.SupervisorMountPath, "/opt/openshell/bin"},
}

Expand Down
41 changes: 29 additions & 12 deletions internal/driver/provisioner.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,11 @@ var sandboxGVR = schema.GroupVersionResource{
}

const (
labelSandboxID = "openshell.ai/sandbox-id"
labelManagedBy = "openshell.ai/managed-by"
labelKagenti = "kagenti.io/type"
labelSandboxID = "openshell.ai/sandbox-id"
labelManagedBy = "openshell.ai/managed-by"
labelKagenti = "kagenti.io/type"
labelTenant = "openshell.ai/tenant"
labelKagentiTeam = "kagenti.io/team"
)

// K8sProvisioner implements SandboxProvisioner using the Kubernetes API. It
Expand Down Expand Up @@ -76,6 +78,10 @@ func (p *K8sProvisioner) Create(ctx context.Context, sb *pb.DriverSandbox) error
labelManagedBy: "openshell",
labelKagenti: "agent",
})
if p.cfg.Tenant != "" {
labels[labelTenant] = p.cfg.Tenant
labels[labelKagentiTeam] = p.cfg.Tenant
}

obj := &unstructured.Unstructured{
Object: map[string]interface{}{
Expand Down Expand Up @@ -223,11 +229,16 @@ func (p *K8sProvisioner) buildSandboxSpec(sb *pb.DriverSandbox) map[string]inter
spec := sb.GetSpec()
tmpl := spec.GetTemplate()

// Supervisor init container copies the binary into the shared volume.
// Supervisor init container copies both the supervisor and dtach binaries into the shared volume.
initContainer := map[string]interface{}{
"name": "supervisor-init",
"image": p.cfg.SupervisorImage,
"command": []interface{}{"cp", p.cfg.SupervisorBinaryPath, p.cfg.SupervisorMountPath + "/"},
"name": "supervisor-init",
"image": p.cfg.SupervisorImage,
"command": []interface{}{
"sh", "-c",
fmt.Sprintf("cp %s %s/ && cp %s %s/",
p.cfg.SupervisorBinaryPath, p.cfg.SupervisorMountPath,
p.cfg.DtachBinaryPath, p.cfg.SupervisorMountPath),
},
"volumeMounts": []interface{}{
map[string]interface{}{
"name": "supervisor-bin",
Expand Down Expand Up @@ -282,14 +293,20 @@ func (p *K8sProvisioner) buildSandboxSpec(sb *pb.DriverSandbox) map[string]inter
}
}

podLabels := mergeMaps(tmpl.GetLabels(), map[string]string{
labelSandboxID: sb.GetId(),
labelManagedBy: "openshell",
labelKagenti: "agent",
})
if p.cfg.Tenant != "" {
podLabels[labelTenant] = p.cfg.Tenant
podLabels[labelKagentiTeam] = p.cfg.Tenant
}

return map[string]interface{}{
"podTemplate": map[string]interface{}{
"metadata": map[string]interface{}{
"labels": mergeMaps(tmpl.GetLabels(), map[string]string{
labelSandboxID: sb.GetId(),
labelManagedBy: "openshell",
labelKagenti: "agent",
}),
"labels": podLabels,
},
"spec": podSpec,
},
Expand Down
54 changes: 49 additions & 5 deletions internal/driver/provisioner_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"context"
"log/slog"
"os"
"strings"
"testing"

pb "github.com/zanetworker/openshell-driver-openshift/gen/computev1"
Expand Down Expand Up @@ -185,13 +186,17 @@ func TestBuildSandboxSpec_SupervisorInitContainer(t *testing.T) {
t.Errorf("expected image %s, got %v", cfg.SupervisorImage, initC["image"])
}

// Verify command copies supervisor binary.
// Verify command copies both supervisor and dtach binaries via sh -c.
cmd := initC["command"].([]interface{})
if len(cmd) != 3 || cmd[0] != "cp" {
t.Errorf("expected cp command, got %v", cmd)
if len(cmd) != 3 || cmd[0] != "sh" || cmd[1] != "-c" {
t.Errorf("expected sh -c command, got %v", cmd)
}
if cmd[1] != cfg.SupervisorBinaryPath {
t.Errorf("expected source %s, got %v", cfg.SupervisorBinaryPath, cmd[1])
script := cmd[2].(string)
if !strings.Contains(script, cfg.SupervisorBinaryPath) {
t.Errorf("expected script to contain supervisor path %s, got %s", cfg.SupervisorBinaryPath, script)
}
if !strings.Contains(script, cfg.DtachBinaryPath) {
t.Errorf("expected script to contain dtach path %s, got %s", cfg.DtachBinaryPath, script)
}

// Verify agent container runs supervisor.
Expand Down Expand Up @@ -292,6 +297,45 @@ func TestBuildSandboxSpec_Labels(t *testing.T) {
if labels[labelManagedBy] != "openshell" {
t.Errorf("expected managed-by label, got %v", labels[labelManagedBy])
}
// No tenant configured in testConfig() — tenant labels must be absent.
if _, ok := labels[labelTenant]; ok {
t.Errorf("expected no %s label when tenant is empty, got %v", labelTenant, labels[labelTenant])
}
}

func TestBuildSandboxSpec_TenantLabels(t *testing.T) {
cfg := testConfig()
cfg.Tenant = "team1"

logger := testLogger()
scheme := runtime.NewScheme()
dynClient := dynamicfake.NewSimpleDynamicClientWithCustomListKinds(
scheme,
map[schema.GroupVersionResource]string{sandboxGVR: "SandboxList"},
)
clientset := kubefake.NewSimpleClientset()
p := NewK8sProvisioner(dynClient, clientset, cfg, logger)

sb := &pb.DriverSandbox{
Id: "sb-tenant",
Spec: &pb.DriverSandboxSpec{
Template: &pb.DriverSandboxTemplate{
Image: "img:latest",
},
},
}

spec := p.buildSandboxSpec(sb)
podTemplate := spec["podTemplate"].(map[string]interface{})
meta := podTemplate["metadata"].(map[string]interface{})
podLabels := meta["labels"].(map[string]interface{})

if podLabels[labelTenant] != "team1" {
t.Errorf("expected %s=team1, got %v", labelTenant, podLabels[labelTenant])
}
if podLabels[labelKagentiTeam] != "team1" {
t.Errorf("expected %s=team1, got %v", labelKagentiTeam, podLabels[labelKagentiTeam])
}
}

func TestNewWithDeps(t *testing.T) {
Expand Down
4 changes: 1 addition & 3 deletions internal/grpctest/contract_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,7 @@ func startTestServer(t *testing.T) (pb.ComputeDriverClient, func()) {
pb.RegisterComputeDriverServer(srv, drv)

go func() {
if err := srv.Serve(lis); err != nil {
// Serve returns an error after GracefulStop; ignore.
}
_ = srv.Serve(lis) // returns error after GracefulStop
}()

conn, err := grpc.NewClient(
Expand Down
Loading