From a228b85732209ee13bd38ba8163a61c9cc2344a5 Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Mon, 11 May 2026 11:15:17 +0200 Subject: [PATCH 1/3] Add tests for AMDGPU --- .buildkite/pipeline.yml | 35 +++++++++++++++++++++++++++++++++-- Project.toml | 4 +++- test/gpu.jl | 3 ++- 3 files changed, 38 insertions(+), 4 deletions(-) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index d79b62c6..5f4f584f 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -2,7 +2,7 @@ env: SECRET_CODECOV_TOKEN: "NsHKj2ZxqUDfErNc+zlH6erC00pk0XRZeNAaU+hyRg6oHlIuSUVL53Z0/MW6Xeq8mBsYsfdG3rmE+h0hoGXj6swpmtjCnLI0CAHUSVOTKNHQ4R6VmKuNnLkNQX7+GO6PEcnV+sCMDSt/nhci0lUl/9qo+6uT/VA+9E6XiKOsKV8nL+kb/GDNJqrG8u2JJzd9EcrFG9Vf4p7tLgsafhQq+yQeVdeYxPWKPx2x6+K2w2WrGel0RlVfyYFLEGHo4TW4+OPPoMOJBCA+kkE2I8OlqzzMUMkULhwhWujHyOrWBZ74EFY2zbwYD/iiYTlGJW8UWaOn561uJp3J7+nab4nEYA==;U2FsdGVkX1/EACeMbht8x2ar6VrhBrcGZUtM4/B4viOz590nUZNIUkWPkjpmdriAAP3t1KEj2LlRg+z/FK+CSQ==" steps: - - label: "Julia v1" + - label: "Julia v1 -- CUDA" plugins: - JuliaCI/julia#v1: version: "1" @@ -17,7 +17,7 @@ steps: if: build.message !~ /\[skip tests\]/ timeout_in_minutes: 30 - - label: "Julia LTS" + - label: "Julia LTS -- CUDA" plugins: - JuliaCI/julia#v1: version: "1.10" # "lts" isn't valid @@ -31,3 +31,34 @@ steps: cuda: "*" if: build.message !~ /\[skip tests\]/ timeout_in_minutes: 30 + - label: "Julia v1 -- AMDGPU" + plugins: + - JuliaCI/julia#v1: + version: "1" + - JuliaCI/julia-test#v1: ~ + - JuliaCI/julia-coverage#v1: + dirs: + - src + - ext + agents: + queue: "juliagpu" + rocm: "*" + rocmgpu: "*" + if: build.message !~ /\[skip tests\]/ + timeout_in_minutes: 30 + + - label: "Julia LTS -- AMDGPU" + plugins: + - JuliaCI/julia#v1: + version: "1.10" # "lts" isn't valid + - JuliaCI/julia-test#v1: ~ + - JuliaCI/julia-coverage#v1: + dirs: + - src + - ext + agents: + queue: "juliagpu" + rocm: "*" + rocmgpu: "*" + if: build.message !~ /\[skip tests\]/ + timeout_in_minutes: 30 diff --git a/Project.toml b/Project.toml index 187bcbaf..32e03960 100644 --- a/Project.toml +++ b/Project.toml @@ -35,6 +35,7 @@ TensorOperationscuTENSORExt = "cuTENSOR" TensorOperationsJLArraysExt = "JLArrays" [compat] +AMDGPU = "2" Aqua = "0.6, 0.7, 0.8" Adapt = "4" Bumper = "0.6, 0.7" @@ -65,6 +66,7 @@ julia = "1.10" [extras] Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" +AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e" Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" Bumper = "8ce10254-0962-460f-a3d8-1f77fea1446e" CUDACore = "bd0ed864-bdfe-4181-a5ed-ce625a5fdea2" @@ -81,4 +83,4 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" cuTENSOR = "011b41b2-24ef-40a8-b3eb-fa098493e9e1" [targets] -test = ["Test", "Random", "DynamicPolynomials", "ChainRulesTestUtils", "ChainRulesCore", "cuRAND", "CUDACore", "cuTENSOR", "Aqua", "Logging", "Bumper", "Mooncake", "Enzyme", "EnzymeTestUtils", "Adapt", "JLArrays"] +test = ["Test", "Random", "DynamicPolynomials", "ChainRulesTestUtils", "ChainRulesCore", "cuRAND", "CUDACore", "cuTENSOR", "Aqua", "Logging", "Bumper", "Mooncake", "Enzyme", "EnzymeTestUtils", "Adapt", "JLArrays", "AMDGPU"] diff --git a/test/gpu.jl b/test/gpu.jl index 65fb3839..592a1702 100644 --- a/test/gpu.jl +++ b/test/gpu.jl @@ -5,7 +5,7 @@ using Adapt using TupleTools using JLArrays using VectorInterface -using CUDACore +using CUDACore, AMDGPU test_result(a::AbstractArray, b::AbstractArray; kwargs...) = isapprox(collect(a), collect(b); kwargs...) @@ -24,6 +24,7 @@ end ATs = [] !is_buildkite && push!(ATs, JLArray) CUDACore.functional() && push!(ATs, CuArray) +AMDGPU.functional() && push!(ATs, ROCArray) backends = [StridedBLAS(), StridedNative()] From f10e81ace6fd2a0ed96c2a1b1198f3c96b614f46 Mon Sep 17 00:00:00 2001 From: lkdvos Date: Mon, 11 May 2026 13:06:20 -0400 Subject: [PATCH 2/3] Add AMDGPU allocator support --- Project.toml | 3 +- ext/TensorOperationsAMDGPUExt.jl | 49 ++++++++++++++++++++++++++++++++ src/implementation/allocator.jl | 7 +++++ 3 files changed, 58 insertions(+), 1 deletion(-) create mode 100644 ext/TensorOperationsAMDGPUExt.jl diff --git a/Project.toml b/Project.toml index 32e03960..9ff0ece4 100644 --- a/Project.toml +++ b/Project.toml @@ -17,6 +17,7 @@ TupleTools = "9d95972d-f1c8-5527-a6e0-b4b365fa01f6" VectorInterface = "409d34a3-91d5-4945-b6ec-7529ddf182d8" [weakdeps] +AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e" Bumper = "8ce10254-0962-460f-a3d8-1f77fea1446e" ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" CUDACore = "bd0ed864-bdfe-4181-a5ed-ce625a5fdea2" @@ -26,6 +27,7 @@ Mooncake = "da2b9cff-9c12-43a0-ae48-6db2b0edb7d6" cuTENSOR = "011b41b2-24ef-40a8-b3eb-fa098493e9e1" [extensions] +TensorOperationsAMDGPUExt = "AMDGPU" TensorOperationsBumperExt = "Bumper" TensorOperationsChainRulesCoreExt = "ChainRulesCore" TensorOperationsMooncakeExt = "Mooncake" @@ -66,7 +68,6 @@ julia = "1.10" [extras] Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" -AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e" Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" Bumper = "8ce10254-0962-460f-a3d8-1f77fea1446e" CUDACore = "bd0ed864-bdfe-4181-a5ed-ce625a5fdea2" diff --git a/ext/TensorOperationsAMDGPUExt.jl b/ext/TensorOperationsAMDGPUExt.jl new file mode 100644 index 00000000..9229ef54 --- /dev/null +++ b/ext/TensorOperationsAMDGPUExt.jl @@ -0,0 +1,49 @@ +module TensorOperationsAMDGPUExt + +using AMDGPU +using TensorOperations +using TensorOperations: TensorOperations as TO + +#------------------------------------------------------------------------------------------- +# Allocator +#------------------------------------------------------------------------------------------- + +TO.tensoradd_type(TC, A::AnyRocArray, pA::Index2Tuple, conjA::Bool) = + ROCArray{TC, TO.numind(pA)} + +function TO.tensoralloc_add( + TC, A::AbstractArray, pA::Index2Tuple, conjA::Bool, + istemp::Val, allocator::TO.AMDAllocator + ) + ttype = ROCArray{TC, TO.numind(pA)} + structure = TO.tensoradd_structure(A, pA, conjA) + return TO.tensoralloc(ttype, structure, istemp, allocator)::ttype +end + +function TO.tensoralloc_contract( + TC, + A::AbstractArray, pA::Index2Tuple, conjA::Bool, + B::AbstractArray, pB::Index2Tuple, conjB::Bool, + pAB::Index2Tuple, + istemp::Val, allocator::TO.AMDAllocator + ) + ttype = ROCArray{TC, TO.numind(pAB)} + structure = TO.tensorcontract_structure(A, pA, conjA, B, pB, conjB, pAB) + return TO.tensoralloc(ttype, structure, istemp, allocator)::ttype +end + +# NOTE: the general implementation in the `DefaultAllocator` case works just fine, without +# selecting an explicit memory model +function TO.tensoralloc( + ::Type{<:ROCArray{T, N}}, structure, + ::Val{istemp}, allocator::TO.AMDAllocator + ) where {T, N} + return ROCArray{T, N}(undef, structure) +end + +function TO.tensorfree!(C::ROCArray, ::TO.AMDAllocator) + AMDGPU.unsafe_free!(C) + return nothing +end + +end diff --git a/src/implementation/allocator.jl b/src/implementation/allocator.jl index 7ac5ea8b..bda32ff4 100644 --- a/src/implementation/allocator.jl +++ b/src/implementation/allocator.jl @@ -30,6 +30,13 @@ parameters `Min`, `Mout`, `Mtemp` can be any of the CUDA.jl memory types, i.e. """ struct CUDAAllocator{Mout, Min, Mtemp} end +""" + AMDAllocator() + +Allocator that uses the AMD memory manager and will thus allocate `ROCArray` instances. +""" +struct AMDAllocator end + """ ManualAllocator() From d7fe30a13e1be22e6c21572fad4e59f16b016cd4 Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Wed, 13 May 2026 12:13:15 +0200 Subject: [PATCH 3/3] Get AMDGPU tensor ops working --- Project.toml | 3 +++ ext/TensorOperationsAMDGPUExt.jl | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/Project.toml b/Project.toml index 9ff0ece4..461fad09 100644 --- a/Project.toml +++ b/Project.toml @@ -85,3 +85,6 @@ cuTENSOR = "011b41b2-24ef-40a8-b3eb-fa098493e9e1" [targets] test = ["Test", "Random", "DynamicPolynomials", "ChainRulesTestUtils", "ChainRulesCore", "cuRAND", "CUDACore", "cuTENSOR", "Aqua", "Logging", "Bumper", "Mooncake", "Enzyme", "EnzymeTestUtils", "Adapt", "JLArrays", "AMDGPU"] + +[sources] +Strided = {url = "https://github.com/QuantumKitHub/Strided.jl/", rev = "ksh/gemm"} diff --git a/ext/TensorOperationsAMDGPUExt.jl b/ext/TensorOperationsAMDGPUExt.jl index 9229ef54..d77e3669 100644 --- a/ext/TensorOperationsAMDGPUExt.jl +++ b/ext/TensorOperationsAMDGPUExt.jl @@ -8,7 +8,7 @@ using TensorOperations: TensorOperations as TO # Allocator #------------------------------------------------------------------------------------------- -TO.tensoradd_type(TC, A::AnyRocArray, pA::Index2Tuple, conjA::Bool) = +TO.tensoradd_type(TC, A::AnyROCArray, pA::Index2Tuple, conjA::Bool) = ROCArray{TC, TO.numind(pA)} function TO.tensoralloc_add( @@ -37,7 +37,7 @@ end function TO.tensoralloc( ::Type{<:ROCArray{T, N}}, structure, ::Val{istemp}, allocator::TO.AMDAllocator - ) where {T, N} + ) where {T, N, istemp} return ROCArray{T, N}(undef, structure) end