diff --git a/ext/TensorKitCUDAExt/cutensormap.jl b/ext/TensorKitCUDAExt/cutensormap.jl index 2fefb3a24..a1d2d1b66 100644 --- a/ext/TensorKitCUDAExt/cutensormap.jl +++ b/ext/TensorKitCUDAExt/cutensormap.jl @@ -6,6 +6,9 @@ const AdjointCuTensorMap{T, S, N₁, N₂} = AdjointTensorMap{T, S, N₁, N₂, function CuTensorMap(t::TensorMap{T, S, N₁, N₂, A}) where {T, S, N₁, N₂, A} return CuTensorMap{T, S, N₁, N₂}(CuArray{T}(t.data), space(t)) end +function TensorMap{T, S, N₁, N₂, DA}(t::TensorMap{T, S, N₁, N₂, HA}) where {T, S, N₁, N₂, DA <: CuArray{T}, HA <: Array{T}} + return CuTensorMap{T, S, N₁, N₂}(CuArray{T}(t.data), space(t)) +end # project_symmetric! doesn't yet work for GPU types, so do this on the host, then copy function TensorKit.project_symmetric_and_check(::Type{T}, ::Type{A}, data::AbstractArray, V::TensorMapSpace; tol = sqrt(eps(real(float(eltype(data)))))) where {T, A <: CuVector{T}} @@ -101,18 +104,6 @@ function TensorKit.scalar(t::CuTensorMap{T, S, 0, 0}) where {T, S} return isempty(inds) ? zero(scalartype(t)) : @allowscalar @inbounds t.data[only(inds)] end -function Base.convert( - TT::Type{CuTensorMap{T, S, N₁, N₂}}, - t::AbstractTensorMap{<:Any, S, N₁, N₂} - ) where {T, S, N₁, N₂} - if typeof(t) === TT - return t - else - tnew = TT(undef, space(t)) - return copy!(tnew, t) - end -end - function LinearAlgebra.isposdef(t::CuTensorMap) domain(t) == codomain(t) || throw(SpaceMismatch("`isposdef` requires domain and codomain to be the same")) @@ -138,10 +129,9 @@ function Base.promote_rule( return CuTensorMap{T, S, N₁, N₂} end -TensorKit.promote_storage_rule(::Type{CuArray{T, N}}, ::Type{<:CuArray{T, N}}) where {T, N} = +TensorKit.promote_storage_rule(::Type{<:CuArray{T, N}}, ::Type{<:CuArray{T, N}}) where {T, N} = CuArray{T, N, CUDA.default_memory} - # CuTensorMap exponentation: function TensorKit.exp!(t::CuTensorMap) domain(t) == codomain(t) || diff --git a/ext/TensorKitCUDAExt/truncation.jl b/ext/TensorKitCUDAExt/truncation.jl index 019ded97b..42d623730 100644 --- a/ext/TensorKitCUDAExt/truncation.jl +++ b/ext/TensorKitCUDAExt/truncation.jl @@ -10,7 +10,7 @@ function MatrixAlgebraKit.findtruncated( fill!(v, dim(c)) end - perm = sortperm(parent(values); strategy.by, strategy.rev) + perm = isempty(parent(values)) ? Int64[] : sortperm(parent(values); strategy.by, strategy.rev) cumulative_dim = cumsum(Base.permute!(parent(dims), perm)) result = similar(values, Bool) @@ -36,7 +36,7 @@ function MatrixAlgebraKit.findtruncated( end end - perm = sortperm(parent(values); by = abs, rev = false) + perm = isempty(parent(values)) ? Int64[] : sortperm(parent(values); by = abs, rev = false) cumulative_err = cumsum(Base.permute!(parent(ϵᵖ), perm)) result = similar(values, Bool) diff --git a/src/tensors/abstracttensor.jl b/src/tensors/abstracttensor.jl index 628845526..56c3da839 100644 --- a/src/tensors/abstracttensor.jl +++ b/src/tensors/abstracttensor.jl @@ -53,9 +53,7 @@ storagetype(t) = storagetype(typeof(t)) function storagetype(::Type{T}) where {T <: AbstractTensorMap} if T isa Union # attempt to be slightly more specific by promoting unions - Ma = storagetype(T.a) - Mb = storagetype(T.b) - return promote_storagetype(Ma, Mb) + return promote_storagetype(T.a, T.b) else # fallback definition by using scalartype return similarstoragetype(scalartype(T)) @@ -103,8 +101,15 @@ similarstoragetype(X::Type, ::Type{T}) where {T <: Number} = # implement on tensors similarstoragetype(::Type{TT}) where {TT <: AbstractTensorMap} = similarstoragetype(storagetype(TT)) -similarstoragetype(::Type{TT}, ::Type{T}) where {TT <: AbstractTensorMap, T <: Number} = - similarstoragetype(storagetype(TT), T) +function similarstoragetype(::Type{TT}, ::Type{T}) where {TT <: AbstractTensorMap, T <: Number} + return similarstoragetype(storagetype(TT), T) +end +function similarstoragetype(::Type{<:AbstractTensorMap{T, S, N₁, N₂}}, ::Type{TA}) where {T <: Number, TA <: DenseVector, S, N₁, N₂} + return similarstoragetype(TA, T) +end +function similarstoragetype(t::AbstractTensorMap{T, S, N₁, N₂}, ::Type{TA}) where {T <: Number, TA <: DenseVector, S, N₁, N₂} + return similarstoragetype(typeof(t), TA) +end # implement on arrays similarstoragetype(::Type{A}) where {A <: DenseVector{<:Number}} = A diff --git a/src/tensors/adjoint.jl b/src/tensors/adjoint.jl index dfc1a4471..ca484e77b 100644 --- a/src/tensors/adjoint.jl +++ b/src/tensors/adjoint.jl @@ -50,8 +50,6 @@ Base.@propagate_inbounds function subblock(t::AdjointTensorMap, (f₁, f₂)::Tu return permutedims(conj(data), (domainind(tp)..., codomainind(tp)...)) end -to_cpu(t::AdjointTensorMap) = adjoint(to_cpu(adjoint(t))) - # Show #------ function Base.showarg(io::IO, t::AdjointTensorMap, toplevel::Bool) diff --git a/src/tensors/braidingtensor.jl b/src/tensors/braidingtensor.jl index d28b2e1df..f12123384 100644 --- a/src/tensors/braidingtensor.jl +++ b/src/tensors/braidingtensor.jl @@ -190,7 +190,6 @@ has_shared_permute(t::BraidingTensor, ::Index2Tuple) = false function add_transform!( tdst::AbstractTensorMap, tsrc::BraidingTensor, (p₁, p₂)::Index2Tuple, - fusiontreetransform, α::Number, β::Number, backend::AbstractBackend... ) return add_transform!( diff --git a/src/tensors/diagonal.jl b/src/tensors/diagonal.jl index b2ac4134b..e73ad2787 100644 --- a/src/tensors/diagonal.jl +++ b/src/tensors/diagonal.jl @@ -280,7 +280,7 @@ end # ---------------- function TO.tensoradd_type(TC, A::DiagonalTensorMap, ::Index2Tuple{1, 1}, ::Bool) M = similarstoragetype(A, TC) - return DiagonalTensorMap{TC, spacetype(A), M} + return DiagonalTensorMap{scalartype(M), spacetype(A), M} end function TO.tensorcontract_type( diff --git a/src/tensors/indexmanipulations.jl b/src/tensors/indexmanipulations.jl index 3108abb17..f7e2f9ecf 100644 --- a/src/tensors/indexmanipulations.jl +++ b/src/tensors/indexmanipulations.jl @@ -17,6 +17,8 @@ for (operation, manipulation) in ( $promote_op(::Type{T}, ::Type{I}) where {T <: Number, I <: Sector} = sectorscalartype(I) <: Integer ? T : sectorscalartype(I) <: Real ? float(T) : complex(T) + $promote_op(::Type{TA}, ::Type{I}) where {TA <: DenseVector, I <: Sector} = + similarstoragetype(TA, $promote_op(eltype(TA), I)) # TODO: currently the manipulations all use sectorscalartype, change to: # $manipulation_scalartype(I) <: Integer ? T : # $manipulation_scalartype(I) <: Real ? float(T) : complex(T) diff --git a/src/tensors/tensor.jl b/src/tensors/tensor.jl index 342c83186..531c58638 100644 --- a/src/tensors/tensor.jl +++ b/src/tensors/tensor.jl @@ -21,7 +21,6 @@ struct TensorMap{T, S <: IndexSpace, N₁, N₂, A <: DenseVector{T}} <: Abstrac end return TensorMap{T, S, N₁, N₂, A}(data, space) end - # constructors from data function TensorMap{T, S, N₁, N₂, A}( data::A, space::TensorMapSpace{S, N₁, N₂} @@ -34,6 +33,9 @@ struct TensorMap{T, S <: IndexSpace, N₁, N₂, A <: DenseVector{T}} <: Abstrac return new{T, S, N₁, N₂, A}(data, space) end end +# constructors from another TensorMap -- no-op +TensorMap{T, S, N₁, N₂, A}(t::TensorMap{T, S, N₁, N₂, A}) where {T, S <: IndexSpace, N₁, N₂, A <: DenseVector{T}} = t +TensorMap{T, S, N₁, N₂, A}(t::TensorMap{T, S, N₁, N₂}) where {T, S <: IndexSpace, N₁, N₂, A <: DenseVector{T}} = TensorMap(A(t.data), space(t)) """ Tensor{T, S, N, A<:DenseVector{T}} = TensorMap{T, S, N, 0, A} @@ -407,11 +409,6 @@ for randf in (:rand, :randn, :randexp, :randisometry) end end -# Moving arbitrary TensorMaps to CPU -#----------------------------- -to_cpu(t::TensorMapWithStorage{T, Vector{T}}) where {T} = t # no op -to_cpu(t::TensorMap) = convert(TensorMapWithStorage{scalartype(t), similarstoragetype(scalartype(t))}, t) - # Efficient copy constructors #----------------------------- Base.copy(t::TensorMap) = typeof(t)(copy(t.data), t.space) diff --git a/src/tensors/tensoroperations.jl b/src/tensors/tensoroperations.jl index 3fc79cf0c..f42f7d94f 100644 --- a/src/tensors/tensoroperations.jl +++ b/src/tensors/tensoroperations.jl @@ -379,7 +379,7 @@ function blas_contract!( bstyle = BraidingStyle(sectortype(C)) bstyle isa SymmetricBraiding || throw(SectorMismatch("only tensors with symmetric braiding rules can be contracted; try `@planar` instead")) - TC = scalartype(C) + TC = storagetype(C) # without this, Anew below has wrong storagetype # check which tensors have to be permuted/copied copyA = !(TO.isblascontractable(A, pA) && scalartype(A) === TC) diff --git a/test/amd/tensors.jl b/test/amd/tensors.jl index 0c6898405..2ac522daa 100644 --- a/test/amd/tensors.jl +++ b/test/amd/tensors.jl @@ -97,7 +97,7 @@ for V in spacelist for T in (Int, Float32, ComplexF64) t = @constinferred AMDGPU.rand(T, W) d = convert(Dict, t) - @test TensorKit.to_cpu(t) == convert(TensorMap, d) + @test adapt(Array, t) == convert(TensorMap, d) end end symmetricbraiding && @timedtestset "Basic linear algebra" begin @@ -189,10 +189,10 @@ for V in spacelist t = AMDGPU.rand(T, W) t2 = @constinferred AMDGPU.rand!(similar(t)) α = rand(T) - @test norm(t, 2) ≈ norm(TensorKit.to_cpu(t), 2) - @test dot(t2, t) ≈ dot(TensorKit.to_cpu(t2), TensorKit.to_cpu(t)) - @test TensorKit.to_cpu(α * t) ≈ α * TensorKit.to_cpu(t) - @test TensorKit.to_cpu(t + t) ≈ 2 * TensorKit.to_cpu(t) + @test norm(t, 2) ≈ norm(adapt(Array, t), 2) + @test dot(t2, t) ≈ dot(adapt(Array, t2), adapt(Array, t)) + @test adapt(Array, α * t) ≈ α * adapt(Array, t) + @test adapt(Array, t + t) ≈ 2 * adapt(Array, t) end end @timedtestset "Real and imaginary parts" begin @@ -202,17 +202,17 @@ for V in spacelist tr = @constinferred real(t) @test scalartype(tr) <: Real - @test real(TensorKit.to_cpu(t)) == TensorKit.to_cpu(tr) + @test real(adapt(Array, t)) == adapt(Array, tr) @test storagetype(tr) == ROCVector{real(T), AMDGPU.Mem.HIPBuffer} ti = @constinferred imag(t) @test scalartype(ti) <: Real - @test imag(TensorKit.to_cpu(t)) == TensorKit.to_cpu(ti) + @test imag(adapt(Array, t)) == adapt(Array, ti) @test storagetype(ti) == ROCVector{real(T), AMDGPU.Mem.HIPBuffer} tc = @inferred complex(t) @test scalartype(tc) <: Complex - @test complex(TensorKit.to_cpu(t)) == TensorKit.to_cpu(tc) + @test complex(adapt(Array, t)) == adapt(Array, tc) @test storagetype(tc) == ROCVector{complex(T), AMDGPU.Mem.HIPBuffer} tc2 = @inferred complex(tr, ti) @@ -275,13 +275,13 @@ for V in spacelist p1 = ntuple(n -> p[n], k) p2 = ntuple(n -> p[k + n], 5 - k) dt2 = AMDGPU.@allowscalar permute(t, (p1, p2)) - ht2 = permute(TensorKit.to_cpu(t), (p1, p2)) - @test ht2 == TensorKit.to_cpu(dt2) + ht2 = permute(adapt(Array, t), (p1, p2)) + @test ht2 == adapt(Array, dt2) end dt3 = AMDGPU.@allowscalar repartition(t, k) - ht3 = repartition(TensorKit.to_cpu(t), k) - @test ht3 == TensorKit.to_cpu(dt3) + ht3 = repartition(adapt(Array, t), k) + @test ht3 == adapt(Array, dt3) end end symmetricbraiding && @timedtestset "Full trace: test self-consistency" begin @@ -339,10 +339,10 @@ for V in spacelist @tensor dHrA12[a, s1, s2, c] := drhoL[a, a'] * conj(dA1[a', t1, b]) * dA2[b, t2, c'] * drhoR[c', c] * dH[s1, s2, t1, t2] - @tensor hHrA12[a, s1, s2, c] := TensorKit.to_cpu(drhoL)[a, a'] * conj(TensorKit.to_cpu(dA1)[a', t1, b]) * - TensorKit.to_cpu(dA2)[b, t2, c'] * TensorKit.to_cpu(drhoR)[c', c] * - TensorKit.to_cpu(dH)[s1, s2, t1, t2] - @test TensorKit.to_cpu(dHrA12) ≈ hHrA12 + @tensor hHrA12[a, s1, s2, c] := adapt(Array, drhoL)[a, a'] * conj(adapt(Array, dA1)[a', t1, b]) * + adapt(Array, dA2)[b, t2, c'] * adapt(Array, drhoR)[c', c] * + adapt(Array, dH)[s1, s2, t1, t2] + @test adapt(Array, dHrA12) ≈ hHrA12 end end=# # doesn't yet work because of AdjointTensor BraidingStyle(I) isa HasBraiding && @timedtestset "Index flipping: test flipping inverse" begin @@ -422,31 +422,31 @@ for V in spacelist t1 = AMDGPU.rand(T, W1, W1) t2 = AMDGPU.rand(T, W2, W2) t = AMDGPU.rand(T, W1, W2) - ht1 = TensorKit.to_cpu(t1) - ht2 = TensorKit.to_cpu(t2) - ht = TensorKit.to_cpu(t) - @test TensorKit.to_cpu(t1 * t) ≈ ht1 * ht - @test TensorKit.to_cpu(t1' * t) ≈ ht1' * ht - @test TensorKit.to_cpu(t2 * t') ≈ ht2 * ht' - @test TensorKit.to_cpu(t2' * t') ≈ ht2' * ht' + ht1 = adapt(Array, t1) + ht2 = adapt(Array, t2) + ht = adapt(Array, t) + @test adapt(Array, t1 * t) ≈ ht1 * ht + @test adapt(Array, t1' * t) ≈ ht1' * ht + @test adapt(Array, t2 * t') ≈ ht2 * ht' + @test adapt(Array, t2' * t') ≈ ht2' * ht' #=AMDGPU.@allowscalar begin - @test TensorKit.to_cpu(inv(t1)) ≈ inv(ht1) - @test TensorKit.to_cpu(pinv(t)) ≈ pinv(ht) + @test adapt(Array, inv(t1)) ≈ inv(ht1) + @test adapt(Array, pinv(t)) ≈ pinv(ht) if T == Float32 || T == ComplexF32 continue end - @test TensorKit.to_cpu(t1 \ t) ≈ ht1 \ ht - @test TensorKit.to_cpu(t1' \ t) ≈ ht1' \ ht - @test TensorKit.to_cpu(t2 \ t') ≈ ht2 \ ht' - @test TensorKit.to_cpu(t2' \ t') ≈ ht2' \ ht' + @test adapt(Array, t1 \ t) ≈ ht1 \ ht + @test adapt(Array, t1' \ t) ≈ ht1' \ ht + @test adapt(Array, t2 \ t') ≈ ht2 \ ht' + @test adapt(Array, t2' \ t') ≈ ht2' \ ht' - @test TensorKit.to_cpu(t2 / t) ≈ ht2 / ht - @test TensorKit.to_cpu(t2' / t) ≈ ht2' / ht - @test TensorKit.to_cpu(t1 / t') ≈ ht1 / ht' - @test TensorKit.to_cpu(t1' / t') ≈ ht1' / ht' + @test adapt(Array, t2 / t) ≈ ht2 / ht + @test adapt(Array, t2' / t) ≈ ht2' / ht + @test adapt(Array, t1 / t') ≈ ht1 / ht' + @test adapt(Array, t1' / t') ≈ ht1' / ht' end=# end end @@ -456,11 +456,11 @@ for V in spacelist #=t = project_hermitian!(AMDGPU.randn(T, W, W)) s = dim(W) @test (@constinferred sqrt(t))^2 ≈ t - @test TensorKit.to_cpu(sqrt(t)) ≈ sqrt(TensorKit.to_cpu(t)) + @test adapt(Array, sqrt(t)) ≈ sqrt(adapt(Array, t)) expt = @constinferred exp(t) - @test TensorKit.to_cpu(expt) ≈ exp(TensorKit.to_cpu(t)) + @test adapt(Array, expt) ≈ exp(adapt(Array, t)) @test exp(@constinferred log(project_hermitian!(expt))) ≈ expt - @test TensorKit.to_cpu(log(project_hermitian!(expt))) ≈ log(TensorKit.to_cpu(expt)) + @test adapt(Array, log(project_hermitian!(expt))) ≈ log(adapt(Array, expt)) @test (@constinferred cos(t))^2 + (@constinferred sin(t))^2 ≈ id(storagetype(t), W) diff --git a/test/cuda/tensors.jl b/test/cuda/tensors.jl index 738440bef..973118cac 100644 --- a/test/cuda/tensors.jl +++ b/test/cuda/tensors.jl @@ -14,6 +14,7 @@ for V in spacelist println("---------------------------------------") println("CUDA Tensors with symmetry: $Istr") println("---------------------------------------") + hasbraiding = BraidingStyle(I) isa HasBraiding symmetricbraiding = BraidingStyle(I) isa SymmetricBraiding @timedtestset "Tensors with symmetry: $Istr" verbose = true begin V1, V2, V3, V4, V5 = V @@ -115,7 +116,7 @@ for V in spacelist for T in (Int, Float32, ComplexF64) t = @constinferred cuRAND.rand(T, W) d = convert(Dict, t) - @test convert(Dict, TensorKit.to_cpu(t)) == d + @test convert(Dict, adapt(Array, t)) == d end end symmetricbraiding && @timedtestset "Basic linear algebra" begin @@ -207,10 +208,10 @@ for V in spacelist t = cuRAND.rand(T, W) t2 = @constinferred cuRAND.rand!(similar(t)) α = rand(T) - @test norm(t, 2) ≈ norm(TensorKit.to_cpu(t), 2) - @test dot(t2, t) ≈ dot(TensorKit.to_cpu(t2), TensorKit.to_cpu(t)) - @test TensorKit.to_cpu(α * t) ≈ α * TensorKit.to_cpu(t) - @test TensorKit.to_cpu(t + t) ≈ 2 * TensorKit.to_cpu(t) + @test norm(t, 2) ≈ norm(adapt(Array, t), 2) + @test dot(t2, t) ≈ dot(adapt(Array, t2), adapt(Array, t)) + @test adapt(Vector{T}, (α * t)) ≈ α * adapt(Vector{T}, t) + @test adapt(Vector{T}, (t + t)) ≈ 2 * adapt(Vector{T}, t) end end @timedtestset "Real and imaginary parts" begin @@ -220,17 +221,17 @@ for V in spacelist tr = @constinferred real(t) @test scalartype(tr) <: Real - @test real(TensorKit.to_cpu(t)) == TensorKit.to_cpu(tr) + @test real(adapt(Array, t)) == adapt(Array, tr) @test storagetype(tr) == CuVector{real(T), CUDA.DeviceMemory} ti = @constinferred imag(t) @test scalartype(ti) <: Real - @test imag(TensorKit.to_cpu(t)) == TensorKit.to_cpu(ti) + @test imag(adapt(Array, t)) == adapt(Array, ti) @test storagetype(ti) == CuVector{real(T), CUDA.DeviceMemory} tc = @inferred complex(t) @test scalartype(tc) <: Complex - @test complex(TensorKit.to_cpu(t)) == TensorKit.to_cpu(tc) + @test complex(adapt(Array, t)) == adapt(Array, tc) @test storagetype(tc) == CuVector{complex(T), CUDA.DeviceMemory} tc2 = @inferred complex(tr, ti) @@ -243,7 +244,7 @@ for V in spacelist W = V1 ⊗ V2 t = @constinferred cuRAND.randn(W ← W) @test typeof(convert(typeof(t), t')) == typeof(t) - @test typeof(TensorKit.to_cpu(t')) == typeof(TensorKit.to_cpu(t)') + @test typeof(adapt(Array, t')) == typeof(adapt(Array, t)') tc = complex(t) @test convert(typeof(tc), t) == tc @test typeof(convert(typeof(tc), t)) == typeof(tc) @@ -263,16 +264,22 @@ for V in spacelist symmetricbraiding && @timedtestset "Permutations: test via inner product invariance" begin W = V1 ⊗ V2 ⊗ V3 ⊗ V4 ⊗ V5 t = cuRAND.rand(ComplexF64, W) + ht = adapt(Vector{ComplexF64}, t) t′ = cuRAND.randn!(similar(t)) + ht′ = adapt(Vector{ComplexF64}, t′) + dot_htt′ = dot(ht′, ht) + dot_tt′ = dot(t′, t) + @test dot_tt′ ≈ dot_htt′ + norm_t = norm(t) for k in 0:5 for p in permutations(1:5) p1 = ntuple(n -> p[n], k) p2 = ntuple(n -> p[k + n], 5 - k) t2 = @constinferred permute(t, (p1, p2)) - t2 = permute(t, (p1, p2)) - @test norm(t2) ≈ norm(t) t2′ = permute(t′, (p1, p2)) - @test dot(t2′, t2) ≈ dot(t′, t) ≈ dot(transpose(t2′), transpose(t2)) + @test norm(t2) ≈ norm_t + @test dot(t2′, t2) ≈ dot_tt′ + @test dot(transpose(t2′), transpose(t2)) ≈ dot_tt′ end t3 = @constinferred repartition(t, $k) t3 = repartition(t, k) @@ -290,32 +297,29 @@ for V in spacelist p1 = ntuple(n -> p[n], k) p2 = ntuple(n -> p[k + n], 5 - k) dt2 = permute(t, (p1, p2)) - ht2 = permute(TensorKit.to_cpu(t), (p1, p2)) - @test ht2 ≈ TensorKit.to_cpu(dt2) + ht2 = permute(adapt(Array, t), (p1, p2)) + @test ht2 ≈ adapt(Array, dt2) end - - dt3 = CUDA.@allowscalar repartition(t, k) - ht3 = repartition(TensorKit.to_cpu(t), k) - @test ht3 ≈ TensorKit.to_cpu(dt3) + dt3 = repartition(t, k) + ht3 = repartition(adapt(Array, t), k) + @test ht3 ≈ adapt(Array, dt3) end end symmetricbraiding && @timedtestset "Full trace: test self-consistency" begin t = cuRAND.rand(ComplexF64, V1 ⊗ V2' ⊗ V2 ⊗ V1') - CUDA.@allowscalar begin - t2 = permute(t, ((1, 2), (4, 3))) - s = @constinferred tr(t2) - @test conj(s) ≈ tr(t2') - if !isdual(V1) - t2 = twist!(t2, 1) - end - if isdual(V2) - t2 = twist!(t2, 2) - end - ss = tr(t2) - @tensor s2 = t[a, b, b, a] - @tensor t3[a, b] := t[a, c, c, b] - @tensor s3 = t3[a, a] + t2 = permute(t, ((1, 2), (4, 3))) + s = @constinferred tr(t2) + @test conj(s) ≈ tr(t2') + if !isdual(V1) + t2 = twist!(t2, 1) + end + if isdual(V2) + t2 = twist!(t2, 2) end + ss = tr(t2) + @tensor s2 = t[a, b, b, a] + @tensor t3[a, b] := t[a, c, c, b] + @tensor s3 = t3[a, a] @test ss ≈ s2 @test ss ≈ s3 end @@ -328,20 +332,16 @@ for V in spacelist end symmetricbraiding && @timedtestset "Trace: test via conversion" begin t = cuRAND.rand(ComplexF64, V1 ⊗ V2' ⊗ V3 ⊗ V2 ⊗ V1' ⊗ V3') - CUDA.@allowscalar begin - @tensor t2[a, b] := t[c, d, b, d, c, a] - @tensor t3[a, b] := ad(t)[c, d, b, d, c, a] - end + @tensor t2[a, b] := t[c, d, b, d, c, a] + @tensor t3[a, b] := ad(t)[c, d, b, d, c, a] @test t3 ≈ ad(t2) end symmetricbraiding && @timedtestset "Trace and contraction" begin t1 = cuRAND.rand(ComplexF64, V1 ⊗ V2 ⊗ V3) t2 = cuRAND.rand(ComplexF64, V2' ⊗ V4 ⊗ V1') - CUDA.@allowscalar begin - t3 = t1 ⊗ t2 - @tensor ta[a, b] := t1[x, y, a] * t2[y, b, x] - @tensor tb[a, b] := t3[x, y, a, y, b, x] - end + t3 = t1 ⊗ t2 + @tensor ta[a, b] := t1[x, y, a] * t2[y, b, x] + @tensor tb[a, b] := t3[x, y, a, y, b, x] @test ta ≈ tb end if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) @@ -354,50 +354,44 @@ for V in spacelist @tensor dHrA12[a, s1, s2, c] := drhoL[a, a'] * conj(dA1[a', t1, b]) * dA2[b, t2, c'] * drhoR[c', c] * dH[s1, s2, t1, t2] - @tensor hHrA12[a, s1, s2, c] := TensorKit.to_cpu(drhoL)[a, a'] * conj(TensorKit.to_cpu(dA1)[a', t1, b]) * - TensorKit.to_cpu(dA2)[b, t2, c'] * TensorKit.to_cpu(drhoR)[c', c] * - TensorKit.to_cpu(dH)[s1, s2, t1, t2] - @test TensorKit.to_cpu(dHrA12) ≈ hHrA12 + @tensor hHrA12[a, s1, s2, c] := adapt(Array, drhoL)[a, a'] * conj(adapt(Array, dA1)[a', t1, b]) * + adapt(Array, dA2)[b, t2, c'] * adapt(Array, drhoR)[c', c] * + adapt(Array, dH)[s1, s2, t1, t2] + @test adapt(Array, dHrA12) ≈ hHrA12 end end - BraidingStyle(I) isa HasBraiding && @timedtestset "Index flipping: test flipping inverse" begin + hasbraiding && @timedtestset "Index flipping: test flipping inverse" begin t = cuRAND.rand(ComplexF64, V1 ⊗ V2 ⊗ V3 ← (V4 ⊗ V5)') for i in 1:5 - CUDA.@allowscalar begin - @test t ≈ flip(flip(t, i), i; inv = true) - @test t ≈ flip(flip(t, i; inv = true), i) - end + @test t ≈ flip(flip(t, i), i; inv = true) + @test t ≈ flip(flip(t, i; inv = true), i) end end - #=@timedtestset "Index flipping: test via explicit flip" begin + symmetricbraiding && @timedtestset "Index flipping: test via explicit flip" begin t = cuRAND.rand(ComplexF64, V1 ⊗ V1' ← V1' ⊗ V1) - F1 = unitary(flip(V1), V1) + F1 = adapt(CuArray{ComplexF64}, unitary(flip(V1), V1)) - CUDA.@allowscalar begin - @tensor tf[a, b; c, d] := F1[a, a'] * t[a', b; c, d] - @test flip(t, 1) ≈ tf - @tensor tf[a, b; c, d] := conj(F1[b, b']) * t[a, b'; c, d] - @test twist!(flip(t, 2), 2) ≈ tf - @tensor tf[a, b; c, d] := F1[c, c'] * t[a, b; c', d] - @test flip(t, 3) ≈ tf - @tensor tf[a, b; c, d] := conj(F1[d, d']) * t[a, b; c, d'] - @test twist!(flip(t, 4), 4) ≈ tf - end + @tensor tf[a, b; c, d] := F1[a, a'] * t[a', b; c, d] + @test flip(t, 1) ≈ tf + @tensor tf[a, b; c, d] := conj(F1[b, b']) * t[a, b'; c, d] + @test twist!(flip(t, 2), 2) ≈ tf + @tensor tf[a, b; c, d] := F1[c, c'] * t[a, b; c', d] + @test flip(t, 3) ≈ tf + @tensor tf[a, b; c, d] := conj(F1[d, d']) * t[a, b; c, d'] + @test twist!(flip(t, 4), 4) ≈ tf end - @timedtestset "Index flipping: test via contraction" begin + symmetricbraiding && @timedtestset "Index flipping: test via contraction" begin t1 = cuRAND.rand(ComplexF64, V1 ⊗ V2 ⊗ V3 ← V4) t2 = cuRAND.rand(ComplexF64, V2' ⊗ V5 ← V4' ⊗ V1) - CUDA.@allowscalar begin - @tensor ta[a, b] := t1[x, y, a, z] * t2[y, b, z, x] - @tensor tb[a, b] := flip(t1, 1)[x, y, a, z] * flip(t2, 4)[y, b, z, x] - @test ta ≈ tb - @tensor tb[a, b] := flip(t1, (2, 4))[x, y, a, z] * flip(t2, (1, 3))[y, b, z, x] - @test ta ≈ tb - @tensor tb[a, b] := flip(t1, (1, 2, 4))[x, y, a, z] * flip(t2, (1, 3, 4))[y, b, z, x] - @tensor tb[a, b] := flip(t1, (1, 3))[x, y, a, z] * flip(t2, (2, 4))[y, b, z, x] - @test flip(ta, (1, 2)) ≈ tb - end - end=# # TODO + @tensor ta[a, b] := t1[x, y, a, z] * t2[y, b, z, x] + @tensor tb[a, b] := flip(t1, 1)[x, y, a, z] * flip(t2, 4)[y, b, z, x] + @test ta ≈ tb + @tensor tb[a, b] := flip(t1, (2, 4))[x, y, a, z] * flip(t2, (1, 3))[y, b, z, x] + @test ta ≈ tb + @tensor tb[a, b] := flip(t1, (1, 2, 4))[x, y, a, z] * flip(t2, (1, 3, 4))[y, b, z, x] + @tensor tb[a, b] := flip(t1, (1, 3))[x, y, a, z] * flip(t2, (2, 4))[y, b, z, x] + @test flip(ta, (1, 2)) ≈ tb + end @timedtestset "Multiplication of isometries: test properties" begin W1 = V1 ⊗ V2 ⊗ V3 W2 = (V4 ⊗ V5)' @@ -435,30 +429,30 @@ for V in spacelist t1 = cuRAND.rand(T, W1, W1) t2 = cuRAND.rand(T, W2, W2) t = cuRAND.rand(T, W1, W2) - ht1 = TensorKit.to_cpu(t1) - ht2 = TensorKit.to_cpu(t2) - ht = TensorKit.to_cpu(t) - @test TensorKit.to_cpu(t1 * t) ≈ ht1 * ht - @test TensorKit.to_cpu(t1' * t) ≈ ht1' * ht - @test TensorKit.to_cpu(t2 * t') ≈ ht2 * ht' - @test TensorKit.to_cpu(t2' * t') ≈ ht2' * ht' + ht1 = adapt(Array, t1) + ht2 = adapt(Array, t2) + ht = adapt(Array, t) + @test adapt(Array, t1 * t) ≈ ht1 * ht + @test adapt(Array, t1' * t) ≈ ht1' * ht + @test adapt(Array, t2 * t') ≈ ht2 * ht' + @test adapt(Array, t2' * t') ≈ ht2' * ht' - @test TensorKit.to_cpu(inv(t1)) ≈ inv(ht1) - @test TensorKit.to_cpu(pinv(t)) ≈ pinv(ht) + @test adapt(Array, inv(t1)) ≈ inv(ht1) + @test adapt(Array, pinv(t)) ≈ pinv(ht) if T == Float32 || T == ComplexF32 continue end - @test TensorKit.to_cpu(t1 \ t) ≈ ht1 \ ht - @test TensorKit.to_cpu(t1' \ t) ≈ ht1' \ ht - @test TensorKit.to_cpu(t2 \ t') ≈ ht2 \ ht' - @test TensorKit.to_cpu(t2' \ t') ≈ ht2' \ ht' + @test adapt(Array, t1 \ t) ≈ ht1 \ ht + @test adapt(Array, t1' \ t) ≈ ht1' \ ht + @test adapt(Array, t2 \ t') ≈ ht2 \ ht' + @test adapt(Array, t2' \ t') ≈ ht2' \ ht' - @test TensorKit.to_cpu(t2 / t) ≈ ht2 / ht - @test TensorKit.to_cpu(t2' / t) ≈ ht2' / ht - @test TensorKit.to_cpu(t1 / t') ≈ ht1 / ht' - @test TensorKit.to_cpu(t1' / t') ≈ ht1' / ht' + @test adapt(Array, t2 / t) ≈ ht2 / ht + @test adapt(Array, t2' / t) ≈ ht2' / ht + @test adapt(Array, t1 / t') ≈ ht1 / ht' + @test adapt(Array, t1' / t') ≈ ht1' / ht' end end symmetricbraiding && @timedtestset "Tensor functions" begin @@ -467,14 +461,14 @@ for V in spacelist t = project_hermitian!(cuRAND.randn(T, W, W)) s = dim(W) #@test (@constinferred sqrt(t))^2 ≈ t - #@test TensorKit.to_cpu(sqrt(t)) ≈ sqrt(TensorKit.to_cpu(t)) + #@test adapt(Array, sqrt(t)) ≈ sqrt(adapt(Array, t)) expt = @constinferred exp(t) - @test TensorKit.to_cpu(expt) ≈ exp(TensorKit.to_cpu(t)) + @test adapt(Array, expt) ≈ exp(adapt(Array, t)) # log doesn't work on CUDA yet (scalar indexing) #@test exp(@constinferred log(project_hermitian!(expt))) ≈ expt - #@test TensorKit.to_cpu(log(project_hermitian!(expt))) ≈ log(TensorKit.to_cpu(expt)) + #@test adapt(Array, log(project_hermitian!(expt))) ≈ log(adapt(Array, expt)) #=@test (@constinferred cos(t))^2 + (@constinferred sin(t))^2 ≈ id(storagetype(t), W) @@ -551,10 +545,8 @@ for V in spacelist t1 = cuRAND.rand(T, V1, V5') t2 = cuRAND.rand(T, V2 ⊗ V3, V4') t = @constinferred (t1 ⊗ t2) - CUDA.@allowscalar begin - @tensor t′[1 2 3; 4 5] := t1[1; 4] * t2[2 3; 5] - end - @test t ≈ t′ # This should really not be broken + @tensor t′[1 2 3; 4 5] := t1[1; 4] * t2[2 3; 5] + @test t ≈ t′ end end end @@ -567,17 +559,17 @@ end V1, V2, V3, V4, V5 = Vlist1 W1, W2, W3, W4, W5 = Vlist2 for T in (Float32, ComplexF64) - t1 = rand(T, V2 ⊗ V3, (V4 ⊗ V5)') - t2 = rand(T, W2, (W3 ⊗ W4)') + t1 = CUDA.rand(T, V2 ⊗ V3, (V4 ⊗ V5)') + t2 = CUDA.rand(T, W2, (W3 ⊗ W4)') t = @constinferred (t1 ⊠ t2) d1 = dim(codomain(t1)) d2 = dim(codomain(t2)) d3 = dim(domain(t1)) d4 = dim(domain(t2)) - At = convert(Array, t) + At = convert(Array, adapt(Vector{T}, t)) @test reshape(At, (d1, d2, d3, d4)) ≈ - reshape(convert(Array, t1), (d1, 1, d3, 1)) .* - reshape(convert(Array, t2), (1, d2, 1, d4)) + reshape(convert(Array, adapt(Vector{T}, t1)), (d1, 1, d3, 1)) .* + reshape(convert(Array, adapt(Vector{T}, t2)), (1, d2, 1, d4)) end end end