Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 33 additions & 2 deletions .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ env:
SECRET_CODECOV_TOKEN: "NsHKj2ZxqUDfErNc+zlH6erC00pk0XRZeNAaU+hyRg6oHlIuSUVL53Z0/MW6Xeq8mBsYsfdG3rmE+h0hoGXj6swpmtjCnLI0CAHUSVOTKNHQ4R6VmKuNnLkNQX7+GO6PEcnV+sCMDSt/nhci0lUl/9qo+6uT/VA+9E6XiKOsKV8nL+kb/GDNJqrG8u2JJzd9EcrFG9Vf4p7tLgsafhQq+yQeVdeYxPWKPx2x6+K2w2WrGel0RlVfyYFLEGHo4TW4+OPPoMOJBCA+kkE2I8OlqzzMUMkULhwhWujHyOrWBZ74EFY2zbwYD/iiYTlGJW8UWaOn561uJp3J7+nab4nEYA==;U2FsdGVkX1/EACeMbht8x2ar6VrhBrcGZUtM4/B4viOz590nUZNIUkWPkjpmdriAAP3t1KEj2LlRg+z/FK+CSQ=="

steps:
- label: "Julia v1"
- label: "Julia v1 -- CUDA"
plugins:
- JuliaCI/julia#v1:
version: "1"
Expand All @@ -17,7 +17,7 @@ steps:
if: build.message !~ /\[skip tests\]/
timeout_in_minutes: 30

- label: "Julia LTS"
- label: "Julia LTS -- CUDA"
plugins:
- JuliaCI/julia#v1:
version: "1.10" # "lts" isn't valid
Expand All @@ -31,3 +31,34 @@ steps:
cuda: "*"
if: build.message !~ /\[skip tests\]/
timeout_in_minutes: 30
- label: "Julia v1 -- AMDGPU"
plugins:
- JuliaCI/julia#v1:
version: "1"
- JuliaCI/julia-test#v1: ~
- JuliaCI/julia-coverage#v1:
dirs:
- src
- ext
agents:
queue: "juliagpu"
rocm: "*"
rocmgpu: "*"
if: build.message !~ /\[skip tests\]/
timeout_in_minutes: 30

- label: "Julia LTS -- AMDGPU"
plugins:
- JuliaCI/julia#v1:
version: "1.10" # "lts" isn't valid
- JuliaCI/julia-test#v1: ~
- JuliaCI/julia-coverage#v1:
dirs:
- src
- ext
agents:
queue: "juliagpu"
rocm: "*"
rocmgpu: "*"
if: build.message !~ /\[skip tests\]/
timeout_in_minutes: 30
8 changes: 7 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ TupleTools = "9d95972d-f1c8-5527-a6e0-b4b365fa01f6"
VectorInterface = "409d34a3-91d5-4945-b6ec-7529ddf182d8"

[weakdeps]
AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
Bumper = "8ce10254-0962-460f-a3d8-1f77fea1446e"
ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
CUDACore = "bd0ed864-bdfe-4181-a5ed-ce625a5fdea2"
Expand All @@ -26,6 +27,7 @@ Mooncake = "da2b9cff-9c12-43a0-ae48-6db2b0edb7d6"
cuTENSOR = "011b41b2-24ef-40a8-b3eb-fa098493e9e1"

[extensions]
TensorOperationsAMDGPUExt = "AMDGPU"
TensorOperationsBumperExt = "Bumper"
TensorOperationsChainRulesCoreExt = "ChainRulesCore"
TensorOperationsMooncakeExt = "Mooncake"
Expand All @@ -35,6 +37,7 @@ TensorOperationscuTENSORExt = "cuTENSOR"
TensorOperationsJLArraysExt = "JLArrays"

[compat]
AMDGPU = "2"
Aqua = "0.6, 0.7, 0.8"
Adapt = "4"
Bumper = "0.6, 0.7"
Expand Down Expand Up @@ -81,4 +84,7 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
cuTENSOR = "011b41b2-24ef-40a8-b3eb-fa098493e9e1"

[targets]
test = ["Test", "Random", "DynamicPolynomials", "ChainRulesTestUtils", "ChainRulesCore", "cuRAND", "CUDACore", "cuTENSOR", "Aqua", "Logging", "Bumper", "Mooncake", "Enzyme", "EnzymeTestUtils", "Adapt", "JLArrays"]
test = ["Test", "Random", "DynamicPolynomials", "ChainRulesTestUtils", "ChainRulesCore", "cuRAND", "CUDACore", "cuTENSOR", "Aqua", "Logging", "Bumper", "Mooncake", "Enzyme", "EnzymeTestUtils", "Adapt", "JLArrays", "AMDGPU"]

[sources]
Strided = {url = "https://github.com/QuantumKitHub/Strided.jl/", rev = "ksh/gemm"}
49 changes: 49 additions & 0 deletions ext/TensorOperationsAMDGPUExt.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
module TensorOperationsAMDGPUExt

using AMDGPU
using TensorOperations
using TensorOperations: TensorOperations as TO

#-------------------------------------------------------------------------------------------
# Allocator
#-------------------------------------------------------------------------------------------

TO.tensoradd_type(TC, A::AnyROCArray, pA::Index2Tuple, conjA::Bool) =
ROCArray{TC, TO.numind(pA)}

function TO.tensoralloc_add(
TC, A::AbstractArray, pA::Index2Tuple, conjA::Bool,
istemp::Val, allocator::TO.AMDAllocator
)
ttype = ROCArray{TC, TO.numind(pA)}
structure = TO.tensoradd_structure(A, pA, conjA)
return TO.tensoralloc(ttype, structure, istemp, allocator)::ttype
end

function TO.tensoralloc_contract(
TC,
A::AbstractArray, pA::Index2Tuple, conjA::Bool,
B::AbstractArray, pB::Index2Tuple, conjB::Bool,
pAB::Index2Tuple,
istemp::Val, allocator::TO.AMDAllocator
)
ttype = ROCArray{TC, TO.numind(pAB)}
structure = TO.tensorcontract_structure(A, pA, conjA, B, pB, conjB, pAB)
return TO.tensoralloc(ttype, structure, istemp, allocator)::ttype
end

# NOTE: the general implementation in the `DefaultAllocator` case works just fine, without
# selecting an explicit memory model
function TO.tensoralloc(
::Type{<:ROCArray{T, N}}, structure,
::Val{istemp}, allocator::TO.AMDAllocator
) where {T, N, istemp}
return ROCArray{T, N}(undef, structure)
end

function TO.tensorfree!(C::ROCArray, ::TO.AMDAllocator)
AMDGPU.unsafe_free!(C)
return nothing
end

end
7 changes: 7 additions & 0 deletions src/implementation/allocator.jl
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,13 @@ parameters `Min`, `Mout`, `Mtemp` can be any of the CUDA.jl memory types, i.e.
"""
struct CUDAAllocator{Mout, Min, Mtemp} end

"""
AMDAllocator()

Allocator that uses the AMD memory manager and will thus allocate `ROCArray` instances.
"""
struct AMDAllocator end

"""
ManualAllocator()

Expand Down
3 changes: 2 additions & 1 deletion test/gpu.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ using Adapt
using TupleTools
using JLArrays
using VectorInterface
using CUDACore
using CUDACore, AMDGPU

test_result(a::AbstractArray, b::AbstractArray; kwargs...) =
isapprox(collect(a), collect(b); kwargs...)
Expand All @@ -24,6 +24,7 @@ end
ATs = []
!is_buildkite && push!(ATs, JLArray)
CUDACore.functional() && push!(ATs, CuArray)
AMDGPU.functional() && push!(ATs, ROCArray)

backends = [StridedBLAS(), StridedNative()]

Expand Down
Loading