diff --git a/Project.toml b/Project.toml
index af43fc925..668ec1cf2 100644
--- a/Project.toml
+++ b/Project.toml
@@ -4,6 +4,7 @@ version = "0.17.0"
 authors = ["Jutho Haegeman, Lukas Devos"]
 
 [deps]
+Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
 Dictionaries = "85a47980-9c8c-11e8-2b9f-f7ca1fa99fb4"
 LRUCache = "8ac3fa9e-de4c-5943-b1dc-09c6b5f20637"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
@@ -19,7 +20,6 @@ TupleTools = "9d95972d-f1c8-5527-a6e0-b4b365fa01f6"
 VectorInterface = "409d34a3-91d5-4945-b6ec-7529ddf182d8"
 
 [weakdeps]
-Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
 AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
 CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
@@ -28,7 +28,6 @@ Mooncake = "da2b9cff-9c12-43a0-ae48-6db2b0edb7d6"
 cuTENSOR = "011b41b2-24ef-40a8-b3eb-fa098493e9e1"
 
 [extensions]
-TensorKitAdaptExt = "Adapt"
 TensorKitAMDGPUExt = "AMDGPU"
 TensorKitCUDAExt = ["CUDA", "cuTENSOR"]
 TensorKitChainRulesCoreExt = "ChainRulesCore"
@@ -55,7 +54,7 @@ Random = "1"
 ScopedValues = "1.3.0"
 Strided = "2"
 TensorKitSectors = "0.3.7"
-TensorOperations = "5.1"
+TensorOperations = "5.5"
 TupleTools = "1.5"
 VectorInterface = "0.4.8, 0.5"
 cuTENSOR = "6"
diff --git a/docs/make.jl b/docs/make.jl
index 34b025580..d72b851a6 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -9,7 +9,7 @@ end
 using Documenter
 using Random
 using TensorKit
-using TensorKit: FusionTreePair, FusionTreeBlock, Index2Tuple
+using TensorKit: FusionTreePair, FusionTreeBlock, Index2Tuple, IndexTuple
 using TensorKit.TensorKitSectors
 using TensorKit.MatrixAlgebraKit
 using DocumenterInterLinks
@@ -26,8 +26,14 @@ pages = [
         "man/intro.md", "man/tutorial.md",
         "man/spaces.md", "man/symmetries.md",
         "man/sectors.md", "man/gradedspaces.md",
-        "man/fusiontrees.md", "man/tensors.md",
-        "man/tensormanipulations.md",
+        "man/fusiontrees.md",
+        "Tensors" => [
+            "man/tensors.md",
+            "man/linearalgebra.md",
+            "man/indexmanipulations.md",
+            "man/factorizations.md",
+            "man/contractions.md",
+        ],
     ],
     "Library" => [
         "lib/sectors.md", "lib/fusiontrees.md",
diff --git a/docs/src/lib/fusiontrees.md b/docs/src/lib/fusiontrees.md
index 8e037af93..57033eca6 100644
--- a/docs/src/lib/fusiontrees.md
+++ b/docs/src/lib/fusiontrees.md
@@ -2,6 +2,7 @@
 
 ```@meta
 CurrentModule = TensorKit
+CollapsedDocStrings = true
 ```
 
 # Type hierarchy
diff --git a/docs/src/lib/sectors.md b/docs/src/lib/sectors.md
index f56980bf0..8a696c675 100644
--- a/docs/src/lib/sectors.md
+++ b/docs/src/lib/sectors.md
@@ -2,6 +2,7 @@
 
 ```@meta
 CurrentModule = TensorKit
+CollapsedDocStrings = true
 ```
 
 ## Type hierarchy
diff --git a/docs/src/lib/spaces.md b/docs/src/lib/spaces.md
index e5705fe3e..a6cce06a1 100644
--- a/docs/src/lib/spaces.md
+++ b/docs/src/lib/spaces.md
@@ -2,6 +2,7 @@
 
 ```@meta
 CurrentModule = TensorKit
+CollapsedDocStrings = true
 ```
 
 ## Type hierarchy
diff --git a/docs/src/lib/tensors.md b/docs/src/lib/tensors.md
index b19537e3f..1ef3da0fb 100644
--- a/docs/src/lib/tensors.md
+++ b/docs/src/lib/tensors.md
@@ -2,6 +2,7 @@
 
 ```@meta
 CurrentModule = TensorKit
+CollapsedDocStrings = true
 ```
 
 ## Type hierarchy
@@ -184,12 +185,6 @@ repartition!
 twist!
 ```
 
-```@docs
-TensorKit.add_permute!
-TensorKit.add_braid!
-TensorKit.add_transpose!
-```
-
 ### Tensor map composition, traces, contractions and tensor products
 
 ```@docs
diff --git a/docs/src/man/contractions.md b/docs/src/man/contractions.md
new file mode 100644
index 000000000..d70b5681d
--- /dev/null
+++ b/docs/src/man/contractions.md
@@ -0,0 +1,106 @@
+# [Tensor contractions and tensor networks](@id ss_tensor_contraction)
+
+One of the most important operation with tensor maps is to compose them, more generally known as contracting them.
+As mentioned in the section on [category theory](@ref s_categories), a typical composition of maps in a ribbon category can graphically be represented as a planar arrangement of the morphisms (i.e. tensor maps, boxes with lines eminating from top and bottom, corresponding to source and target, i.e. domain and codomain), where the lines connecting the source and targets of the different morphisms should be thought of as ribbons, that can braid over or underneath each other, and that can twist.
+Technically, we can embed this diagram in ``ℝ × [0,1]`` and attach all the unconnected line endings corresponding objects in the source at some position ``(x,0)`` for ``x∈ℝ``, and all line endings corresponding to objects in the target at some position ``(x,1)``.
+The resulting morphism is then invariant under what is known as *framed three-dimensional isotopy*, i.e. three-dimensional rearrangements of the morphism that respect the rules of boxes connected by ribbons whose open endings are kept fixed.
+Such a two-dimensional diagram cannot easily be encoded in a single line of code.
+
+However, things simplify when the braiding is symmetric (such that over- and under- crossings become equivalent, i.e. just crossings), and when twists, i.e. self-crossings in this case, are trivial.
+This amounts to `BraidingStyle(I) == Bosonic()` in the language of TensorKit.jl, and is true for any subcategory of ``\mathbf{Vect}``, i.e. ordinary tensors, possibly with some symmetry constraint.
+The case of ``\mathbf{SVect}`` and its subcategories, and more general categories, are discussed below.
+
+In the case of trivial twists, we can deform the diagram such that we first combine every morphism with a number of coevaluations ``η`` so as to represent it as a tensor, i.e. with a trivial domain.
+We can then rearrange the morphism to be all ligned up horizontally, where the original morphism compositions are now being performed by evaluations ``ϵ``.
+This process will generate a number of crossings and twists, where the latter can be omitted because they act trivially.
+Similarly, double crossings can also be omitted.
+As a consequence, the diagram, or the morphism it represents, is completely specified by the tensors it is composed of, and which indices between the different tensors are connect, via the evaluation ``ϵ``, and which indices make up the source and target of the resulting morphism.
+If we also compose the resulting morphisms with coevaluations so that it has a trivial domain, we just have one type of unconnected lines, henceforth called open indices.
+We sketch such a rearrangement in the following picture
+
+```@raw html
+<img src="../img/tensor-bosoniccontraction.svg" alt="tensor unitary" class="color-invertible"/>
+```
+
+Hence, we can now specify such a tensor diagram, henceforth called a tensor contraction or also tensor network, using a one-dimensional syntax that mimicks [abstract index notation](https://en.wikipedia.org/wiki/Abstract_index_notation) and specifies which indices are connected by the evaluation map using Einstein's summation conventation.
+Indeed, for `BraidingStyle(I) == Bosonic()`, such a tensor contraction can take the same format as if all tensors were just multi-dimensional arrays.
+For this, we rely on the interface provided by the package [TensorOperations.jl](https://github.com/QuantumKitHub/TensorOperations.jl).
+
+The above picture would be encoded as
+```julia
+@tensor E[a, b, c, d, e] := A[v, w, d, x] * B[y, z, c, x] * C[v, e, y, b] * D[a, w, z]
+```
+or
+```julia
+@tensor E[:] := A[1, 2, -4, 3] * B[4, 5, -3, 3] * C[1, -5, 4, -2] * D[-1, 2, 5]
+```
+where the latter syntax is known as NCON-style, and labels the unconnected or outgoing indices with negative integers, and the contracted indices with positive integers.
+
+A number of remarks are in order.
+TensorOperations.jl accepts both integers and any valid variable name as dummy label for indices, and everything in between `[ ]` is not resolved in the current context but interpreted as a dummy label.
+Here, we label the indices of a `TensorMap`, like `A::TensorMap{T, S, N₁, N₂}`, in a linear fashion, where the first position corresponds to the first space in `codomain(A)`, and so forth, up to position `N₁`.
+Index `N₁ + 1` then corresponds to the first space in `domain(A)`.
+However, because we have applied the coevaluation ``η``, it actually corresponds to the corresponding dual space, in accordance with the interface of [`space(A, i)`](@ref) that we introduced [above](@ref ss_tensor_properties), and as indiated by the dotted box around ``A`` in the above picture.
+The same holds for the other tensor maps.
+Note that our convention also requires that we braid indices that we brought from the domain to the codomain, and so this is only unambiguous for a symmetric braiding, where there is a unique way to permute the indices.
+
+With the current syntax, we create a new object `E` because we use the definition operator `:=`.
+Furthermore, with the current syntax, it will be a `Tensor`, i.e. it will have a trivial domain, and correspond to the dotted box in the picture above, rather than the actual morphism `E`.
+We can also directly define `E` with the correct codomain and domain by rather using
+```julia
+@tensor E[a b c;d e] := A[v, w, d, x] * B[y, z, c, x] * C[v, e, y, b] * D[a, w, z]
+```
+or
+```julia
+@tensor E[(a, b, c);(d, e)] := A[v, w, d, x] * B[y, z, c, x] * C[v, e, y, b] * D[a, w, z]
+```
+where the latter syntax can also be used when the codomain is empty.
+When using the assignment operator `=`, the `TensorMap` `E` is assumed to exist and the contents will be written to the currently allocated memory.
+Note that for existing tensors, both on the left hand side and right hand side, trying to specify the indices in the domain and the codomain seperately using the above syntax, has no effect, as the bipartition of indices are already fixed by the existing object.
+Hence, if `E` has been created by the previous line of code, all of the following lines are now equivalent
+```julia
+@tensor E[(a, b, c);(d, e)] = A[v, w, d, x] * B[y, z, c, x] * C[v, e, y, b] * D[a, w, z]
+@tensor E[a, b, c, d, e] = A[v w d; x] * B[(y, z, c); (x, )] * C[v e y; b] * D[a, w, z]
+@tensor E[a b; c d e] = A[v; w d x] * B[y, z, c, x] * C[v, e, y, b] * D[a w; z]
+```
+and none of those will or can change the partition of the indices of `E` into its codomain and its domain.
+
+Two final remarks are in order.
+Firstly, the order of the tensors appearing on the right hand side is irrelevant, as we can reorder them by using the allowed moves of the Penrose graphical calculus, which yields some crossings and a twist.
+As the latter is trivial, it can be omitted, and we just use the same rules to evaluate the newly ordered tensor network.
+For the particular case of matrix-matrix multiplication, which also captures more general settings by appropriotely combining spaces into a single line, we indeed find
+
+```@raw html
+<img src="../img/tensor-contractionreorder.svg" alt="tensor contraction reorder" class="color-invertible"/>
+```
+
+or thus, the following two lines of code yield the same result
+```julia
+@tensor C[i, j] := B[i, k] * A[k, j]
+@tensor C[i, j] := A[k, j] * B[i, k]
+```
+Reordering of tensors can be used internally by the `@tensor` macro to evaluate the contraction in a more efficient manner.
+In particular, the NCON-style of specifying the contraction gives the user control over the order, and there are other macros, such as `@tensoropt`, that try to automate this process.
+There is also an `@ncon` macro and `ncon` function, an we recommend reading the [manual of TensorOperations.jl](https://quantumkithub.github.io/TensorOperations.jl/stable/) to learn more about the possibilities and how they work.
+
+A final remark involves the use of adjoints of tensors.
+The current framework is such that the user should not be too worried about the actual bipartition into codomain and domain of a given `TensorMap` instance.
+Indeed, for tensor contractions the `@tensor` macro figures out the correct manipulations automatically.
+However, when wanting to use the `adjoint` of an instance `t::TensorMap{T, S, N₁, N₂}`, the resulting `adjoint(t)` is an `AbstractTensorMap{T, S, N₂, N₁}` and one needs to know the values of `N₁` and `N₂` to know exactly where the `i`th index of `t` will end up in `adjoint(t)`, and hence the index order of `t'`.
+Within the `@tensor` macro, one can instead use `conj()` on the whole index expression so as to be able to use the original index ordering of `t`.
+For example, for `TensorMap{T, S, 1, 1}` instances, this yields exactly the equivalence one expects, namely one between the following two expressions:
+
+```julia
+@tensor C[i, j] := B'[i, k] * A[k, j]
+@tensor C[i, j] := conj(B[k, i]) * A[k, j]
+```
+
+For e.g. an instance `A::TensorMap{T, S, 3, 2}`, the following two syntaxes have the same effect within an `@tensor` expression: `conj(A[a, b, c, d, e])` and `A'[d, e, a, b, c]`.
+
+## Fermionic tensor contractions
+
+TODO
+
+## Anyonic tensor contractions
+
+TODO
diff --git a/docs/src/man/factorizations.md b/docs/src/man/factorizations.md
new file mode 100644
index 000000000..2fbd8b382
--- /dev/null
+++ b/docs/src/man/factorizations.md
@@ -0,0 +1,46 @@
+# [Tensor factorizations](@id ss_tensor_factorization)
+
+```@setup tensors
+using TensorKit
+using LinearAlgebra
+```
+
+As tensors are linear maps, they suport various kinds of factorizations.
+These functions all interpret the provided `AbstractTensorMap` instances as a map from `domain` to `codomain`, which can be thought of as reshaping the tensor into a matrix according to the current bipartition of the indices.
+
+TensorKit's factorizations are provided by [MatrixAlgebraKit.jl](https://github.com/QuantumKitHub/MatrixAlgebraKit.jl), which is used to supply both the interface, as well as the implementation of the various operations on the blocks of data.
+For specific details on the provided functionality, we refer to its [documentation page](https://quantumkithub.github.io/MatrixAlgebraKit.jl/stable/user_interface/decompositions/).
+
+Finally, note that each of the factorizations takes the current partition of `domain` and `codomain` as the *axis* along which to matricize and perform the factorization.
+In order to obtain factorizations according to a different bipartition of the indices, we can use any of the previously mentioned [index manipulations](@ref s_indexmanipulations) before the factorization.
+
+Some examples to conclude this section
+```@repl tensors
+V1 = SU₂Space(0 => 2, 1/2 => 1)
+V2 = SU₂Space(0 => 1, 1/2 => 1, 1 => 1)
+
+t = randn(V1 ⊗ V1, V2);
+U, S, Vh = svd_compact(t);
+t ≈ U * S * Vh
+D, V = eigh_full(t' * t);
+D ≈ S * S
+U' * U ≈ id(domain(U))
+S
+
+Q, R = left_orth(t; alg = :svd);
+Q' * Q ≈ id(domain(Q))
+t ≈ Q * R
+
+U2, S2, Vh2, ε = svd_trunc(t; trunc = truncspace(V1));
+Vh2 * Vh2' ≈ id(codomain(Vh2))
+S2
+ε ≈ norm(block(S, Irrep[SU₂](1))) * sqrt(dim(Irrep[SU₂](1)))
+
+L, Q = right_orth(permute(t, ((1,), (2, 3))));
+codomain(L), domain(L), domain(Q)
+Q * Q'
+P = Q' * Q;
+P ≈ P * P
+t′ = permute(t, ((1,), (2, 3)));
+t′ ≈ t′ * P
+```
diff --git a/docs/src/man/indexmanipulations.md b/docs/src/man/indexmanipulations.md
new file mode 100644
index 000000000..b644eff0c
--- /dev/null
+++ b/docs/src/man/indexmanipulations.md
@@ -0,0 +1,108 @@
+# [Index manipulations](@id s_indexmanipulations)
+
+```@meta
+CollapsedDocStrings = true
+```
+
+```@setup indexmanip
+using TensorKit
+using LinearAlgebra
+```
+
+A `TensorMap{T, S, N₁, N₂}` is a linear map from a domain (a `ProductSpace{S, N₂}`) to a codomain (a `ProductSpace{S, N₁}`).
+In practice, the bipartition of the `N₁ + N₂` indices between domain and codomain rarely remains fixed: algorithms typically need to reshuffle indices between the two sides, reorder them, or change the arrow direction on individual indices before passing a tensor to a factorization or contraction.
+
+Index manipulations cover all such operations.
+They act on the structure of the tensor data in a way that is fully determined by the categorical data of the `sectortype`, such that TensorKit automatically manipulates the tensor entries accordingly.
+The operations fall into three groups, which mirror the structure of the source file:
+
+*   **Reweighting**: [`flip`](@ref) and [`twist`](@ref) apply local isomorphisms to individual indices without changing the index structure.
+*   **Space insertion/removal**: [`insertleftunit`](@ref), [`insertrightunit`](@ref) and [`removeunit`](@ref) add or remove trivial (scalar) index factors.
+*   **Index rearrangements**: [`permute`](@ref), [`braid`](@ref), [`transpose`](@ref) and [`repartition`](@ref) reorder indices and/or move them between domain and codomain.
+
+Throughout this page, new index positions are specified using `Index2Tuple{N₁, N₂}`, i.e. a pair `(p₁, p₂)` of index tuples.
+The indices listed in `p₁` form the new codomain and those in `p₂` form the new domain.
+The following helpers retrieve the current index structure of a tensor:
+
+```@docs; canonical=false
+numout
+numin
+numind
+codomainind
+domainind
+allind
+```
+
+## Reweighting
+
+Reweighting operations modify the entries of a tensor by applying local isomorphisms to individual indices, without changing the number of indices or their partition between domain and codomain.
+In particular, [`twist`](@ref) applies the topological spin (monoidal twist) to selected indices; this operation preserves the space of the indices and is completely trivial for `BraidingStyle(I) == Bosonic()`.
+In contrast, [`flip`](@ref) changes the arrow direction on selected indices by applying a (non-canonical!) isomorphism between the index space and its dual.
+
+```@docs; canonical=false
+twist(::AbstractTensorMap, ::Int)
+twist!
+flip(t::AbstractTensorMap, I)
+```
+
+## Inserting and removing unit spaces
+
+The next set of functions add or remove a trivial tensor product factor at a specified index position, without affecting any other indices.
+We distinguish between [`insertleftunit`](@ref), which inserts a unit index before index `i` (the unit index becoming index `i`),
+and [`insertrightunit`](@ref), which inserts after index `i` (the unit index becoming index `i + 1`);
+[`removeunit`](@ref) undoes either insertion.
+
+For tensors `t` with `UnitStyle(sectortype(t)) = SimpleUnit()`, the only relevant difference between `insertleftunit(t, i + 1)` and `insertrightunit(t, i)` is that `insertleftunit(t, numout(t) + 1)` inserts the unit index as first index in the domain, whereas `insertrightunit(t, numout(t))` will insert the unit index as last index in the codomain. 
+
+Passing `Val(i)` instead of an integer `i` for the position may improve type stability.
+
+```@docs; canonical=false
+insertleftunit(::AbstractTensorMap, ::Val{i}) where {i}
+insertrightunit(::AbstractTensorMap, ::Val{i}) where {i}
+removeunit(::AbstractTensorMap, ::Val{i}) where {i}
+```
+
+## Index rearrangements
+
+These operations reorder indices and/or move them between domain and codomain by applying the transposing or braiding isomorphisms of the underlying category.
+They form a hierarchy from most general to most restricted:
+
+- [`braid`](@ref) is the most general: it accepts any permutation and requires a `levels` argument — a tuple of heights, one per index — that determines whether each index crosses over or under the others it has to pass.
+- [`permute`](@ref) is a simpler interface for sector types with a symmetric braiding (`BraidingStyle(I) isa SymmetricBraiding`), where over- and under-crossings are equivalent and `levels` is therefore not needed.
+- [`transpose`](@ref) is restricted to *cyclic* permutations (indices do not cross).
+- [`repartition`](@ref) only moves the codomain/domain boundary without reordering the indices at all.
+
+For plain tensors (`sectortype(t) == Trivial`), `permute` and `braid` act like `permutedims` on the underlying array:
+
+```@repl indexmanip
+V = ℂ^2;
+t = randn(V ⊗ V ← V ⊗ V);
+ta = convert(Array, t);
+t′ = permute(t, ((4, 2, 3), (1,)));
+convert(Array, t′) ≈ permutedims(ta, (4, 2, 3, 1))
+```
+
+```@docs; canonical=false
+braid(::AbstractTensorMap, ::Index2Tuple, ::IndexTuple)
+braid!
+permute(::AbstractTensorMap, ::Index2Tuple)
+permute!(::AbstractTensorMap, ::AbstractTensorMap, ::Index2Tuple)
+transpose(::AbstractTensorMap, ::Index2Tuple)
+transpose!
+repartition(::AbstractTensorMap, ::Int, ::Int)
+repartition!
+```
+
+## Fusing and splitting indices
+
+There is no dedicated functionality for fusing or splitting indices.
+In the general case there is no canonical embedding of `V1 ⊗ V2` into the fused space `V = fuse(V1 ⊗ V2)`: any two such embeddings differ by a basis transform, i.e. there is a gauge freedom.
+TensorKit resolves this by requiring the user to construct an explicit isomorphism — the *fuser* — and contract it with the tensor.
+One particular isomorphism can be constructed using the [`unitary](@ref) function.
+It preserves norms and inner products, and has an inverse given by its adjoint. 
+For a plain tensor (`sectortype(t) == Trivial`), applying this particular `unitary` is equivalent to `reshape` on the underlying array.
+
+Fusing index `i` and `j = i+1` of a tensor `t` is then accomplished as
+
+
+The resulting `unitary` is a dense `TensorMap`, and this fusion and splitting approach is not optimized for maximal performance. However, because many tensor operations including tensor factorizations (SVD, QR, etc.) can be applied without needing any fusion, we do not expect fusion and splitting to be an essential part of performance critical parts of typical tensor algorithms. 
diff --git a/docs/src/man/linearalgebra.md b/docs/src/man/linearalgebra.md
new file mode 100644
index 000000000..98c9f489a
--- /dev/null
+++ b/docs/src/man/linearalgebra.md
@@ -0,0 +1,85 @@
+# [Basic linear algebra](@id ss_tensor_linalg)
+
+```@setup tensors
+using TensorKit
+using LinearAlgebra
+```
+
+`AbstractTensorMap` instances `t` represent linear maps, i.e. homomorphisms in a `𝕜`-linear category, just like matrices.
+To a large extent, they follow the interface of `Matrix` in Julia's `LinearAlgebra` standard library.
+Many methods from `LinearAlgebra` are (re)exported by TensorKit.jl, and can then us be used without `using LinearAlgebra` explicitly.
+In all of the following methods, the implementation acts directly on the underlying matrix blocks (typically using the same method) and never needs to perform any basis transforms.
+
+In particular, `AbstractTensorMap` instances can be composed, provided the domain of the first object coincides with the codomain of the second.
+Composing tensor maps uses the regular multiplication symbol as in `t = t1 * t2`, which is also used for matrix multiplication.
+TensorKit.jl also supports (and exports) the mutating method `mul!(t, t1, t2)`.
+We can then also try to invert a tensor map using `inv(t)`, though this can only exist if the domain and codomain are isomorphic, which can e.g. be checked as `fuse(codomain(t)) == fuse(domain(t))`.
+If the inverse is composed with another tensor `t2`, we can use the syntax `t1 \ t2` or `t2 / t1`.
+However, this syntax also accepts instances `t1` whose domain and codomain are not isomorphic, and then amounts to `pinv(t1)`, the Moore-Penrose pseudoinverse.
+This, however, is only really justified as minimizing the least squares problem if `InnerProductStyle(t) <: EuclideanProduct`.
+
+`AbstractTensorMap` instances behave themselves as vectors (i.e. they are `𝕜`-linear) and so they can be multiplied by scalars and, if they live in the same space, i.e. have the same domain and codomain, they can be added to each other.
+There is also a `zero(t)`, the additive identity, which produces a zero tensor with the same domain and codomain as `t`.
+In addition, `TensorMap` supports basic Julia methods such as `fill!` and `copy!`, as well as `copy(t)` to create a copy with independent data.
+Aside from basic `+` and `*` operations, TensorKit.jl reexports a number of efficient in-place methods from `LinearAlgebra`, such as `axpy!` (for `y ← α * x + y`), `axpby!` (for `y ← α * x + β * y`), `lmul!` and `rmul!` (for `y ← α * y` and `y ← y * α`, which is typically the same) and `mul!`, which can also be used for out-of-place scalar multiplication `y ← α * x`.
+
+For `S = spacetype(t)` where `InnerProductStyle(S) <: EuclideanProduct`, we can compute `norm(t)`, and for two such instances, the inner product `dot(t1, t2)`, provided `t1` and `t2` have the same domain and codomain.
+Furthermore, there is `normalize(t)` and `normalize!(t)` to return a scaled version of `t` with unit norm.
+These operations should also exist for `InnerProductStyle(S) <: HasInnerProduct`, but require an interface for defining a custom inner product in these spaces.
+Currently, there is no concrete subtype of `HasInnerProduct` that is not an `EuclideanProduct`.
+In particular, `CartesianSpace`, `ComplexSpace` and `GradedSpace` all have `InnerProductStyle(S) <: EuclideanProduct`.
+
+With tensors that have `InnerProductStyle(t) <: EuclideanProduct` there is associated an adjoint operation, given by `adjoint(t)` or simply `t'`, such that `domain(t') == codomain(t)` and `codomain(t') == domain(t)`.
+Note that for an instance `t::TensorMap{S, N₁, N₂}`, `t'` is simply stored in a wrapper called `AdjointTensorMap{S, N₂, N₁}`, which is another subtype of `AbstractTensorMap`.
+This should be mostly invisible to the user, as all methods should work for this type as well.
+It can be hard to reason about the index order of `t'`, i.e. index `i` of `t` appears in `t'` at index position `j = TensorKit.adjointtensorindex(t, i)`, where the latter method is typically not necessary and hence unexported.
+There is also a plural `TensorKit.adjointtensorindices` to convert multiple indices at once.
+Note that, because the adjoint interchanges domain and codomain, we have `space(t', j) == space(t, i)'`.
+
+`AbstractTensorMap` instances can furthermore be tested for exact (`t1 == t2`) or approximate (`t1 ≈ t2`) equality, though the latter requires that `norm` can be computed.
+
+When tensor map instances are endomorphisms, i.e. they have the same domain and codomain, there is a multiplicative identity which can be obtained as `one(t)` or `one!(t)`, where the latter overwrites the contents of `t`.
+The multiplicative identity on a space `V` can also be obtained using `id(A, V)` as discussed [above](@ref ss_tensor_construction), such that for a general homomorphism `t′`, we have `t′ == id(codomain(t′)) * t′ == t′ * id(domain(t′))`.
+Returning to the case of endomorphisms `t`, we can compute the trace via `tr(t)` and exponentiate them using `exp(t)`, or if the contents of `t` can be destroyed in the process, `exp!(t)`.
+Furthermore, there are a number of tensor factorizations for both endomorphisms and general homomorphisms that we discuss on the [Tensor factorizations](@ref ss_tensor_factorization) page.
+
+Finally, there are a number of operations that also belong in this paragraph because of their analogy to common matrix operations.
+The tensor product of two `TensorMap` instances `t1` and `t2` is obtained as `t1 ⊗ t2` and results in a new `TensorMap` with `codomain(t1 ⊗ t2) = codomain(t1) ⊗ codomain(t2)` and `domain(t1 ⊗ t2) = domain(t1) ⊗ domain(t2)`.
+If we have two `TensorMap{T, S, N, 1}` instances `t1` and `t2` with the same codomain, we can combine them in a way that is analogous to `hcat`, i.e. we stack them such that the new tensor `catdomain(t1, t2)` has also the same codomain, but has a domain which is `domain(t1) ⊕ domain(t2)`.
+Similarly, if `t1` and `t2` are of type `TensorMap{T, S, 1, N}` and have the same domain, the operation `catcodomain(t1, t2)` results in a new tensor with the same domain and a codomain given by `codomain(t1) ⊕ codomain(t2)`, which is the analogy of `vcat`.
+Note that direct sum only makes sense between `ElementarySpace` objects, i.e. there is no way to give a tensor product meaning to a direct sum of tensor product spaces.
+
+Time for some more examples:
+```@repl tensors
+using TensorKit # hide
+V1 = ℂ^2
+t = randn(V1 ← V1 ⊗ V1 ⊗ V1)
+t == t + zero(t) == t * id(domain(t)) == id(codomain(t)) * t
+t2 = randn(ComplexF64, codomain(t), domain(t));
+dot(t2, t)
+tr(t2' * t)
+dot(t2, t) ≈ dot(t', t2')
+dot(t2, t2)
+norm(t2)^2
+t3 = copy!(similar(t, ComplexF64), t);
+t3 == t
+rmul!(t3, 0.8);
+t3 ≈ 0.8 * t
+axpby!(0.5, t2, 1.3im, t3);
+t3 ≈ 0.5 * t2 + 0.8 * 1.3im * t
+t4 = randn(fuse(codomain(t)), codomain(t));
+t5 = TensorMap{Float64}(undef, fuse(codomain(t)), domain(t));
+mul!(t5, t4, t) == t4 * t
+inv(t4) * t4 ≈ id(codomain(t))
+t4 * inv(t4) ≈ id(fuse(codomain(t)))
+t4 \ (t4 * t) ≈ t
+t6 = randn(ComplexF64, V1, codomain(t));
+numout(t4) == numout(t6) == 1
+t7 = catcodomain(t4, t6);
+foreach(println, (codomain(t4), codomain(t6), codomain(t7)))
+norm(t7) ≈ sqrt(norm(t4)^2 + norm(t6)^2)
+t8 = t4 ⊗ t6;
+foreach(println, (codomain(t4), codomain(t6), codomain(t8)))
+foreach(println, (domain(t4), domain(t6), domain(t8)))
+norm(t8) ≈ norm(t4)*norm(t6)
+```
diff --git a/docs/src/man/tensormanipulations.md b/docs/src/man/tensormanipulations.md
deleted file mode 100644
index 15285fd78..000000000
--- a/docs/src/man/tensormanipulations.md
+++ /dev/null
@@ -1,337 +0,0 @@
-# [Manipulating tensors](@id s_tensormanipulations)
-
-## [Vector space and linear algebra operations](@id ss_tensor_linalg)
-
-`AbstractTensorMap` instances `t` represent linear maps, i.e. homomorphisms in a `𝕜`-linear category, just like matrices.
-To a large extent, they follow the interface of `Matrix` in Julia's `LinearAlgebra` standard library.
-Many methods from `LinearAlgebra` are (re)exported by TensorKit.jl, and can then us be used without `using LinearAlgebra` explicitly.
-In all of the following methods, the implementation acts directly on the underlying matrix blocks (typically using the same method) and never needs to perform any basis transforms.
-
-In particular, `AbstractTensorMap` instances can be composed, provided the domain of the first object coincides with the codomain of the second.
-Composing tensor maps uses the regular multiplication symbol as in `t = t1 * t2`, which is also used for matrix multiplication.
-TensorKit.jl also supports (and exports) the mutating method `mul!(t, t1, t2)`.
-We can then also try to invert a tensor map using `inv(t)`, though this can only exist if the domain and codomain are isomorphic, which can e.g. be checked as `fuse(codomain(t)) == fuse(domain(t))`.
-If the inverse is composed with another tensor `t2`, we can use the syntax `t1 \ t2` or `t2 / t1`.
-However, this syntax also accepts instances `t1` whose domain and codomain are not isomorphic, and then amounts to `pinv(t1)`, the Moore-Penrose pseudoinverse.
-This, however, is only really justified as minimizing the least squares problem if `InnerProductStyle(t) <: EuclideanProduct`.
-
-`AbstractTensorMap` instances behave themselves as vectors (i.e. they are `𝕜`-linear) and so they can be multiplied by scalars and, if they live in the same space, i.e. have the same domain and codomain, they can be added to each other.
-There is also a `zero(t)`, the additive identity, which produces a zero tensor with the same domain and codomain as `t`.
-In addition, `TensorMap` supports basic Julia methods such as `fill!` and `copy!`, as well as `copy(t)` to create a copy with independent data.
-Aside from basic `+` and `*` operations, TensorKit.jl reexports a number of efficient in-place methods from `LinearAlgebra`, such as `axpy!` (for `y ← α * x + y`), `axpby!` (for `y ← α * x + β * y`), `lmul!` and `rmul!` (for `y ← α * y` and `y ← y * α`, which is typically the same) and `mul!`, which can also be used for out-of-place scalar multiplication `y ← α * x`.
-
-For `S = spacetype(t)` where `InnerProductStyle(S) <: EuclideanProduct`, we can compute `norm(t)`, and for two such instances, the inner product `dot(t1, t2)`, provided `t1` and `t2` have the same domain and codomain.
-Furthermore, there is `normalize(t)` and `normalize!(t)` to return a scaled version of `t` with unit norm.
-These operations should also exist for `InnerProductStyle(S) <: HasInnerProduct`, but require an interface for defining a custom inner product in these spaces.
-Currently, there is no concrete subtype of `HasInnerProduct` that is not an `EuclideanProduct`.
-In particular, `CartesianSpace`, `ComplexSpace` and `GradedSpace` all have `InnerProductStyle(S) <: EuclideanProduct`.
-
-With tensors that have `InnerProductStyle(t) <: EuclideanProduct` there is associated an adjoint operation, given by `adjoint(t)` or simply `t'`, such that `domain(t') == codomain(t)` and `codomain(t') == domain(t)`.
-Note that for an instance `t::TensorMap{S, N₁, N₂}`, `t'` is simply stored in a wrapper called `AdjointTensorMap{S, N₂, N₁}`, which is another subtype of `AbstractTensorMap`.
-This should be mostly invisible to the user, as all methods should work for this type as well.
-It can be hard to reason about the index order of `t'`, i.e. index `i` of `t` appears in `t'` at index position `j = TensorKit.adjointtensorindex(t, i)`, where the latter method is typically not necessary and hence unexported.
-There is also a plural `TensorKit.adjointtensorindices` to convert multiple indices at once.
-Note that, because the adjoint interchanges domain and codomain, we have `space(t', j) == space(t, i)'`.
-
-`AbstractTensorMap` instances can furthermore be tested for exact (`t1 == t2`) or approximate (`t1 ≈ t2`) equality, though the latter requires that `norm` can be computed.
-
-When tensor map instances are endomorphisms, i.e. they have the same domain and codomain, there is a multiplicative identity which can be obtained as `one(t)` or `one!(t)`, where the latter overwrites the contents of `t`.
-The multiplicative identity on a space `V` can also be obtained using `id(A, V)` as discussed [above](@ref ss_tensor_construction), such that for a general homomorphism `t′`, we have `t′ == id(codomain(t′)) * t′ == t′ * id(domain(t′))`.
-Returning to the case of endomorphisms `t`, we can compute the trace via `tr(t)` and exponentiate them using `exp(t)`, or if the contents of `t` can be destroyed in the process, `exp!(t)`.
-Furthermore, there are a number of tensor factorizations for both endomorphisms and general homomorphism that we discuss below.
-
-Finally, there are a number of operations that also belong in this paragraph because of their analogy to common matrix operations.
-The tensor product of two `TensorMap` instances `t1` and `t2` is obtained as `t1 ⊗ t2` and results in a new `TensorMap` with `codomain(t1 ⊗ t2) = codomain(t1) ⊗ codomain(t2)` and `domain(t1 ⊗ t2) = domain(t1) ⊗ domain(t2)`.
-If we have two `TensorMap{T, S, N, 1}` instances `t1` and `t2` with the same codomain, we can combine them in a way that is analogous to `hcat`, i.e. we stack them such that the new tensor `catdomain(t1, t2)` has also the same codomain, but has a domain which is `domain(t1) ⊕ domain(t2)`.
-Similarly, if `t1` and `t2` are of type `TensorMap{T, S, 1, N}` and have the same domain, the operation `catcodomain(t1, t2)` results in a new tensor with the same domain and a codomain given by `codomain(t1) ⊕ codomain(t2)`, which is the analogy of `vcat`.
-Note that direct sum only makes sense between `ElementarySpace` objects, i.e. there is no way to give a tensor product meaning to a direct sum of tensor product spaces.
-
-Time for some more examples:
-```@repl tensors
-using TensorKit # hide
-V1 = ℂ^2
-t = randn(V1 ← V1 ⊗ V1 ⊗ V1)
-t == t + zero(t) == t * id(domain(t)) == id(codomain(t)) * t
-t2 = randn(ComplexF64, codomain(t), domain(t));
-dot(t2, t)
-tr(t2' * t)
-dot(t2, t) ≈ dot(t', t2')
-dot(t2, t2)
-norm(t2)^2
-t3 = copy!(similar(t, ComplexF64), t);
-t3 == t
-rmul!(t3, 0.8);
-t3 ≈ 0.8 * t
-axpby!(0.5, t2, 1.3im, t3);
-t3 ≈ 0.5 * t2 + 0.8 * 1.3im * t
-t4 = randn(fuse(codomain(t)), codomain(t));
-t5 = TensorMap{Float64}(undef, fuse(codomain(t)), domain(t));
-mul!(t5, t4, t) == t4 * t
-inv(t4) * t4 ≈ id(codomain(t))
-t4 * inv(t4) ≈ id(fuse(codomain(t)))
-t4 \ (t4 * t) ≈ t
-t6 = randn(ComplexF64, V1, codomain(t));
-numout(t4) == numout(t6) == 1
-t7 = catcodomain(t4, t6);
-foreach(println, (codomain(t4), codomain(t6), codomain(t7)))
-norm(t7) ≈ sqrt(norm(t4)^2 + norm(t6)^2)
-t8 = t4 ⊗ t6;
-foreach(println, (codomain(t4), codomain(t6), codomain(t8)))
-foreach(println, (domain(t4), domain(t6), domain(t8)))
-norm(t8) ≈ norm(t4)*norm(t6)
-```
-
-## [Index manipulations](@id ss_indexmanipulation)
-
-In many cases, the bipartition of tensor indices (i.e. `ElementarySpace` instances) between the codomain and domain is not fixed throughout the different operations that need to be performed on that tensor map, i.e. we want to use the duality to move spaces from domain to codomain and vice versa.
-Furthermore, we want to use the braiding to reshuffle the order of the indices.
-
-For this, we use an interface that is closely related to that for manipulating splitting- fusion tree pairs, namely [`braid`](@ref) and [`permute`](@ref), with the interface
-
-```julia
-braid(t::AbstractTensorMap{T,S,N₁,N₂}, (p1, p2)::Index2Tuple{N₁′,N₂′}, levels::IndexTuple{N₁+N₂,Int})
-```
-
-and
-
-```julia
-permute(t::AbstractTensorMap{T,S,N₁,N₂}, (p1, p2)::Index2Tuple{N₁′,N₂′}; copy = false)
-```
-
-both of which return an instance of `AbstractTensorMap{T, S, N₁′, N₂′}`.
-
-In these methods, `p1` and `p2` specify which of the original tensor indices ranging from `1` to `N₁ + N₂` make up the new codomain (with `N₁′` spaces) and new domain (with `N₂′` spaces).
-Hence, `(p1..., p2...)` should be a valid permutation of `1:(N₁ + N₂)`.
-Note that, throughout TensorKit.jl, permutations are always specified using tuples of `Int`s, for reasons of type stability.
-For `braid`, we also need to specify `levels` or depths for each of the indices of the original tensor, which determine whether indices will braid over or underneath each other (use the braiding or its inverse).
-We refer to the section on [manipulating fusion trees](@ref ss_fusiontrees) for more details.
-
-When `BraidingStyle(sectortype(t)) isa SymmetricBraiding`, we can use the simpler interface of `permute`, which does not require the argument `levels`.
-`permute` accepts a keyword argument `copy`.
-When `copy == true`, the result will be a tensor with newly allocated data that can independently be modified from that of the input tensor `t`.
-When `copy` takes the default value `false`, `permute` can try to return the result in a way that it shares its data with the input tensor `t`, though this is only possible in specific cases (e.g. when `sectortype(S) == Trivial` and `(p1..., p2...) = (1:(N₁+N₂)...)`).
-
-Both `braid` and `permute` come in a version where the result is stored in an already existing tensor, i.e. [`braid!(tdst, tsrc, (p1, p2), levels)`](@ref) and [`permute!(tdst, tsrc, (p1, p2))`](@ref).
-
-Another operation that belongs under index manipulations is taking the `transpose` of a tensor, i.e. `LinearAlgebra.transpose(t)` and `LinearAlgebra.transpose!(tdst, tsrc)`, both of which are reexported by TensorKit.jl.
-Note that `transpose(t)` is not simply equal to reshuffling domain and codomain with `braid(t, (1:(N₁+N₂)...), reverse(domainind(tsrc)), reverse(codomainind(tsrc))))`.
-Indeed, the graphical representation (where we draw the codomain and domain as a single object), makes clear that this introduces an additional (inverse) twist, which is then compensated in the `transpose` implementation.
-
-```@raw html
-<img src="../img/tensor-transpose.svg" alt="transpose" class="color-invertible"/>
-```
-
-In categorical language, the reason for this extra twist is that we use the left coevaluation ``η``, but the right evaluation ``\tilde{ϵ}``, when repartitioning the indices between domain and codomain.
-
-There are a number of other index related manipulations.
-We can apply a twist (or inverse twist) to one of the tensor map indices via [`twist(t, i; inv = false)`](@ref) or [`twist!(t, i; inv = false)`](@ref).
-Note that the latter method does not store the result in a new destination tensor, but just modifies the tensor `t` in place.
-Twisting several indices simultaneously can be obtained by using the defining property
-
-```math
-θ_{V⊗W} = τ_{W,V} ∘ (θ_W ⊗ θ_V) ∘ τ_{V,W} = (θ_V ⊗ θ_W) ∘ τ_{W,V} ∘ τ_{V,W},
-```
-
-but is currently not implemented explicitly.
-
-For all sector types `I` with `BraidingStyle(I) == Bosonic()`, all twists are `1` and thus have no effect.
-Let us start with some examples, in which we illustrate that, albeit `permute` might act highly non-trivial on the fusion trees and on the corresponding data, after conversion to a regular `Array` (when possible), it just acts like `permutedims`
-
-```@repl tensors
-domain(t) → codomain(t)
-ta = convert(Array, t);
-t′ = permute(t, (1, 2, 3, 4));
-domain(t′) → codomain(t′)
-convert(Array, t′) ≈ ta
-t′′ = permute(t, ((4, 2, 3), (1,)));
-domain(t′′) → codomain(t′′)
-convert(Array, t′′) ≈ permutedims(ta, (4, 2, 3, 1))
-transpose(t)
-convert(Array, transpose(t)) ≈ permutedims(ta, (4, 3, 2, 1))
-dot(t2, t) ≈ dot(transpose(t2), transpose(t))
-transpose(transpose(t)) ≈ t
-twist(t, 3) ≈ t
-```
-
-Note that `transpose` acts like one would expect on a `TensorMap{T, S, 1, 1}`.
-On a `TensorMap{T, S, N₁, N₂}`, because `transpose` replaces the codomain with the dual of the domain, which has its tensor product operation reversed, this in the end amounts in a complete reversal of all tensor indices when representing it as a plain multi-dimensional `Array`.
-Also, note that we have not defined the conjugation of `TensorMap` instances.
-One definition that one could think of is `conj(t) = adjoint(transpose(t))`.
-However note that `codomain(adjoint(tranpose(t))) == domain(transpose(t)) == dual(codomain(t))` and similarly `domain(adjoint(tranpose(t))) == dual(domain(t))`, where `dual` of a `ProductSpace` is composed of the dual of the `ElementarySpace` instances, in reverse order of tensor product.
-This might be very confusing, and as such we leave tensor conjugation undefined.
-However, note that we have a conjugation syntax within the context of [tensor contractions](@ref ss_tensor_contraction).
-
-To show the effect of `twist`, we now consider a type of sector `I` for which `BraidingStyle(I) != Bosonic()`.
-In particular, we use `FibonacciAnyon`.
-We cannot convert the resulting `TensorMap` to an `Array`, so we have to rely on indirect tests to verify our results.
-
-```@repl tensors
-V1 = GradedSpace{FibonacciAnyon}(:I => 3, :τ => 2)
-V2 = GradedSpace{FibonacciAnyon}(:I => 2, :τ => 1)
-m = randn(Float32, V1, V2)
-transpose(m)
-twist(braid(m, ((2,), (1,)), (1, 2)), 1)
-t1 = randn(V1 * V2', V2 * V1);
-t2 = randn(ComplexF64, V1 * V2', V2 * V1);
-dot(t1, t2) ≈ dot(transpose(t1), transpose(t2))
-transpose(transpose(t1)) ≈ t1
-```
-
-A final operation that one might expect in this section is to fuse or join indices, and its inverse, to split a given index into two or more indices.
-For a plain tensor (i.e. with `sectortype(t) == Trivial`) amount to the equivalent of `reshape` on the multidimensional data.
-However, this represents only one possibility, as there is no canonically unique way to embed the tensor product of two spaces `V1 ⊗ V2` in a new space `V = fuse(V1 ⊗ V2)`.
-Such a mapping can always be accompagnied by a basis transform.
-However, one particular choice is created by the function `isomorphism`, or for `EuclideanProduct` spaces, `unitary`.
-Hence, we can join or fuse two indices of a tensor by first constructing `u = unitary(fuse(space(t, i) ⊗ space(t, j)), space(t, i) ⊗ space(t, j))` and then contracting this map with indices `i` and `j` of `t`, as explained in the section on [contracting tensors](@ref ss_tensor_contraction).
-Note, however, that a typical algorithm is not expected to often need to fuse and split indices, as e.g. tensor factorizations can easily be applied without needing to `reshape` or fuse indices first, as explained in the next section.
-
-## [Tensor factorizations](@id ss_tensor_factorization)
-
-As tensors are linear maps, they suport various kinds of factorizations.
-These functions all interpret the provided `AbstractTensorMap` instances as a map from `domain` to `codomain`, which can be thought of as reshaping the tensor into a matrix according to the current bipartition of the indices.
-
-TensorKit's factorizations are provided by [MatrixAlgebraKit.jl](https://github.com/QuantumKitHub/MatrixAlgebraKit.jl), which is used to supply both the interface, as well as the implementation of the various operations on the blocks of data.
-For specific details on the provided functionality, we refer to its [documentation page](https://quantumkithub.github.io/MatrixAlgebraKit.jl/stable/user_interface/decompositions/).
-
-Finally, note that each of the factorizations takes the current partition of `domain` and `codomain` as the *axis* along which to matricize and perform the factorization.
-In order to obtain factorizations according to a different bipartition of the indices, we can use any of the previously mentioned [index manipulations](@ref ss_indexmanipulation) before the factorization.
-
-Some examples to conclude this section
-```@repl tensors
-V1 = SU₂Space(0 => 2, 1/2 => 1)
-V2 = SU₂Space(0 => 1, 1/2 => 1, 1 => 1)
-
-t = randn(V1 ⊗ V1, V2);
-U, S, Vh = svd_compact(t);
-t ≈ U * S * Vh
-D, V = eigh_full(t' * t);
-D ≈ S * S
-U' * U ≈ id(domain(U))
-S
-
-Q, R = left_orth(t; alg = :svd);
-Q' * Q ≈ id(domain(Q))
-t ≈ Q * R
-
-U2, S2, Vh2, ε = svd_trunc(t; trunc = truncspace(V1));
-Vh2 * Vh2' ≈ id(codomain(Vh2))
-S2
-ε ≈ norm(block(S, Irrep[SU₂](1))) * sqrt(dim(Irrep[SU₂](1)))
-
-L, Q = right_orth(permute(t, ((1,), (2, 3))));
-codomain(L), domain(L), domain(Q)
-Q * Q'
-P = Q' * Q;
-P ≈ P * P
-t′ = permute(t, ((1,), (2, 3)));
-t′ ≈ t′ * P
-```
-
-## [Bosonic tensor contractions and tensor networks](@id ss_tensor_contraction)
-
-One of the most important operation with tensor maps is to compose them, more generally known as contracting them.
-As mentioned in the section on [category theory](@ref s_categories), a typical composition of maps in a ribbon category can graphically be represented as a planar arrangement of the morphisms (i.e. tensor maps, boxes with lines eminating from top and bottom, corresponding to source and target, i.e. domain and codomain), where the lines connecting the source and targets of the different morphisms should be thought of as ribbons, that can braid over or underneath each other, and that can twist.
-Technically, we can embed this diagram in ``ℝ × [0,1]`` and attach all the unconnected line endings corresponding objects in the source at some position ``(x,0)`` for ``x∈ℝ``, and all line endings corresponding to objects in the target at some position ``(x,1)``.
-The resulting morphism is then invariant under what is known as *framed three-dimensional isotopy*, i.e. three-dimensional rearrangements of the morphism that respect the rules of boxes connected by ribbons whose open endings are kept fixed.
-Such a two-dimensional diagram cannot easily be encoded in a single line of code.
-
-However, things simplify when the braiding is symmetric (such that over- and under- crossings become equivalent, i.e. just crossings), and when twists, i.e. self-crossings in this case, are trivial.
-This amounts to `BraidingStyle(I) == Bosonic()` in the language of TensorKit.jl, and is true for any subcategory of ``\mathbf{Vect}``, i.e. ordinary tensors, possibly with some symmetry constraint.
-The case of ``\mathbf{SVect}`` and its subcategories, and more general categories, are discussed below.
-
-In the case of trivial twists, we can deform the diagram such that we first combine every morphism with a number of coevaluations ``η`` so as to represent it as a tensor, i.e. with a trivial domain.
-We can then rearrange the morphism to be all ligned up horizontally, where the original morphism compositions are now being performed by evaluations ``ϵ``.
-This process will generate a number of crossings and twists, where the latter can be omitted because they act trivially.
-Similarly, double crossings can also be omitted.
-As a consequence, the diagram, or the morphism it represents, is completely specified by the tensors it is composed of, and which indices between the different tensors are connect, via the evaluation ``ϵ``, and which indices make up the source and target of the resulting morphism.
-If we also compose the resulting morphisms with coevaluations so that it has a trivial domain, we just have one type of unconnected lines, henceforth called open indices.
-We sketch such a rearrangement in the following picture
-
-```@raw html
-<img src="../img/tensor-bosoniccontraction.svg" alt="tensor unitary" class="color-invertible"/>
-```
-
-Hence, we can now specify such a tensor diagram, henceforth called a tensor contraction or also tensor network, using a one-dimensional syntax that mimicks [abstract index notation](https://en.wikipedia.org/wiki/Abstract_index_notation) and specifies which indices are connected by the evaluation map using Einstein's summation conventation.
-Indeed, for `BraidingStyle(I) == Bosonic()`, such a tensor contraction can take the same format as if all tensors were just multi-dimensional arrays.
-For this, we rely on the interface provided by the package [TensorOperations.jl](https://github.com/QuantumKitHub/TensorOperations.jl).
-
-The above picture would be encoded as
-```julia
-@tensor E[a, b, c, d, e] := A[v, w, d, x] * B[y, z, c, x] * C[v, e, y, b] * D[a, w, z]
-```
-or
-```julia
-@tensor E[:] := A[1, 2, -4, 3] * B[4, 5, -3, 3] * C[1, -5, 4, -2] * D[-1, 2, 5]
-```
-where the latter syntax is known as NCON-style, and labels the unconnected or outgoing indices with negative integers, and the contracted indices with positive integers.
-
-A number of remarks are in order.
-TensorOperations.jl accepts both integers and any valid variable name as dummy label for indices, and everything in between `[ ]` is not resolved in the current context but interpreted as a dummy label.
-Here, we label the indices of a `TensorMap`, like `A::TensorMap{T, S, N₁, N₂}`, in a linear fashion, where the first position corresponds to the first space in `codomain(A)`, and so forth, up to position `N₁`.
-Index `N₁ + 1` then corresponds to the first space in `domain(A)`.
-However, because we have applied the coevaluation ``η``, it actually corresponds to the corresponding dual space, in accordance with the interface of [`space(A, i)`](@ref) that we introduced [above](@ref ss_tensor_properties), and as indiated by the dotted box around ``A`` in the above picture.
-The same holds for the other tensor maps.
-Note that our convention also requires that we braid indices that we brought from the domain to the codomain, and so this is only unambiguous for a symmetric braiding, where there is a unique way to permute the indices.
-
-With the current syntax, we create a new object `E` because we use the definition operator `:=`.
-Furthermore, with the current syntax, it will be a `Tensor`, i.e. it will have a trivial domain, and correspond to the dotted box in the picture above, rather than the actual morphism `E`.
-We can also directly define `E` with the correct codomain and domain by rather using
-```julia
-@tensor E[a b c;d e] := A[v, w, d, x] * B[y, z, c, x] * C[v, e, y, b] * D[a, w, z]
-```
-or
-```julia
-@tensor E[(a, b, c);(d, e)] := A[v, w, d, x] * B[y, z, c, x] * C[v, e, y, b] * D[a, w, z]
-```
-where the latter syntax can also be used when the codomain is empty.
-When using the assignment operator `=`, the `TensorMap` `E` is assumed to exist and the contents will be written to the currently allocated memory.
-Note that for existing tensors, both on the left hand side and right hand side, trying to specify the indices in the domain and the codomain seperately using the above syntax, has no effect, as the bipartition of indices are already fixed by the existing object.
-Hence, if `E` has been created by the previous line of code, all of the following lines are now equivalent
-```julia
-@tensor E[(a, b, c);(d, e)] = A[v, w, d, x] * B[y, z, c, x] * C[v, e, y, b] * D[a, w, z]
-@tensor E[a, b, c, d, e] = A[v w d; x] * B[(y, z, c); (x, )] * C[v e y; b] * D[a, w, z]
-@tensor E[a b; c d e] = A[v; w d x] * B[y, z, c, x] * C[v, e, y, b] * D[a w; z]
-```
-and none of those will or can change the partition of the indices of `E` into its codomain and its domain.
-
-Two final remarks are in order.
-Firstly, the order of the tensors appearing on the right hand side is irrelevant, as we can reorder them by using the allowed moves of the Penrose graphical calculus, which yields some crossings and a twist.
-As the latter is trivial, it can be omitted, and we just use the same rules to evaluate the newly ordered tensor network.
-For the particular case of matrix-matrix multiplication, which also captures more general settings by appropriotely combining spaces into a single line, we indeed find
-
-```@raw html
-<img src="../img/tensor-contractionreorder.svg" alt="tensor contraction reorder" class="color-invertible"/>
-```
-
-or thus, the following two lines of code yield the same result
-```julia
-@tensor C[i, j] := B[i, k] * A[k, j]
-@tensor C[i, j] := A[k, j] * B[i, k]
-```
-Reordering of tensors can be used internally by the `@tensor` macro to evaluate the contraction in a more efficient manner.
-In particular, the NCON-style of specifying the contraction gives the user control over the order, and there are other macros, such as `@tensoropt`, that try to automate this process.
-There is also an `@ncon` macro and `ncon` function, an we recommend reading the [manual of TensorOperations.jl](https://quantumkithub.github.io/TensorOperations.jl/stable/) to learn more about the possibilities and how they work.
-
-A final remark involves the use of adjoints of tensors.
-The current framework is such that the user should not be too worried about the actual bipartition into codomain and domain of a given `TensorMap` instance.
-Indeed, for tensor contractions the `@tensor` macro figures out the correct manipulations automatically.
-However, when wanting to use the `adjoint` of an instance `t::TensorMap{T, S, N₁, N₂}`, the resulting `adjoint(t)` is an `AbstractTensorMap{T, S, N₂, N₁}` and one needs to know the values of `N₁` and `N₂` to know exactly where the `i`th index of `t` will end up in `adjoint(t)`, and hence the index order of `t'`.
-Within the `@tensor` macro, one can instead use `conj()` on the whole index expression so as to be able to use the original index ordering of `t`.
-For example, for `TensorMap{T, S, 1, 1}` instances, this yields exactly the equivalence one expects, namely one between the following two expressions:
-
-```julia
-@tensor C[i, j] := B'[i, k] * A[k, j]
-@tensor C[i, j] := conj(B[k, i]) * A[k, j]
-```
-
-For e.g. an instance `A::TensorMap{T, S, 3, 2}`, the following two syntaxes have the same effect within an `@tensor` expression: `conj(A[a, b, c, d, e])` and `A'[d, e, a, b, c]`.
-
-Some examples:
-
-## Fermionic tensor contractions
-
-TODO
-
-## Anyonic tensor contractions
-
-TODO
diff --git a/docs/src/man/tensors.md b/docs/src/man/tensors.md
index 2921e5f1b..155dbbe4d 100644
--- a/docs/src/man/tensors.md
+++ b/docs/src/man/tensors.md
@@ -5,7 +5,7 @@ using TensorKit
 using LinearAlgebra
 ```
 
-This last page explains how to create and manipulate tensors in TensorKit.jl.
+This page explains how to construct and access tensors in TensorKit.jl.
 As this is probably the most important part of the manual, we will also focus more strongly on the usage and interface, and less so on the underlying implementation.
 The only aspect of the implementation that we will address is the storage of the tensor data, as this is important to know how to create and initialize a tensor, but will in fact also shed light on how some of the methods work.
 
diff --git a/ext/TensorKitAdaptExt.jl b/ext/TensorKitAdaptExt.jl
deleted file mode 100644
index 4d2693b7b..000000000
--- a/ext/TensorKitAdaptExt.jl
+++ /dev/null
@@ -1,26 +0,0 @@
-module TensorKitAdaptExt
-
-using TensorKit
-using TensorKit: AdjointTensorMap
-using Adapt
-
-function Adapt.adapt_structure(to, x::TensorMap)
-    data′ = adapt(to, x.data)
-    return TensorMap{eltype(data′)}(data′, space(x))
-end
-function Adapt.adapt_structure(to, x::AdjointTensorMap)
-    return adjoint(adapt(to, parent(x)))
-end
-function Adapt.adapt_structure(to, x::DiagonalTensorMap)
-    data′ = adapt(to, x.data)
-    return DiagonalTensorMap(data′, x.domain)
-end
-function Adapt.adapt_structure(::Type{T}, x::BraidingTensor{T′, S, A}) where {T <: Number, T′, S, A}
-    A′ = TensorKit.similarstoragetype(A, T)
-    return BraidingTensor{T, S, A′}(space(x), x.adjoint)
-end
-function Adapt.adapt_structure(::Type{TA}, x::BraidingTensor{T, S, A}) where {T′, TA <: DenseArray{T′}, T, S, A}
-    return BraidingTensor{T′, S, TA}(space(x), x.adjoint)
-end
-
-end
diff --git a/ext/TensorKitCUDAExt/TensorKitCUDAExt.jl b/ext/TensorKitCUDAExt/TensorKitCUDAExt.jl
index 1a5c28f7c..aa7320f32 100644
--- a/ext/TensorKitCUDAExt/TensorKitCUDAExt.jl
+++ b/ext/TensorKitCUDAExt/TensorKitCUDAExt.jl
@@ -7,6 +7,8 @@ import CUDA.cuRAND: rand as curand, rand! as curand!, randn as curandn, randn! a
 using Strided: StridedViews
 using CUDA.CUDACore.KernelAbstractions: @kernel, @index, get_backend
 
+using Adapt: Adapt
+
 using TensorKit
 using TensorKit.Factorizations
 using TensorKit.Strided
diff --git a/ext/TensorKitCUDAExt/cutensormap.jl b/ext/TensorKitCUDAExt/cutensormap.jl
index 02ca2d5d6..016749fce 100644
--- a/ext/TensorKitCUDAExt/cutensormap.jl
+++ b/ext/TensorKitCUDAExt/cutensormap.jl
@@ -155,7 +155,3 @@ for f in (:sqrt, :log, :asin, :acos, :acosh, :atanh, :acoth)
         return tf
     end
 end
-
-function TensorKit._add_transform_multi!(tdst::CuTensorMap, tsrc, p, (U, structs_dst, structs_src)::Tuple{<:Array, TD, TS}, buffers, alpha, beta, backend...) where {TD, TS}
-    return TensorKit._add_transform_multi!(tdst, tsrc, p, (CUDA.CUDACore.Adapt.adapt(CuArray, U), structs_dst, structs_src), buffers, alpha, beta, backend...)
-end
diff --git a/ext/TensorKitCUDAExt/truncation.jl b/ext/TensorKitCUDAExt/truncation.jl
index 2633b3483..3b6f86dff 100644
--- a/ext/TensorKitCUDAExt/truncation.jl
+++ b/ext/TensorKitCUDAExt/truncation.jl
@@ -51,21 +51,21 @@ end
 function MatrixAlgebraKit.findtruncated_svd(values::CuSectorVector, strategy::S) where {S <: MatrixAlgebraKit.TruncationStrategy}
     # returning a CuSectorVector wrecks things in truncate_{co}domain
     # because of scalar indexing
-    return CUDA.CUDACore.Adapt.adapt(Vector, MatrixAlgebraKit.findtruncated(values, strategy))
+    return Adapt.adapt(Vector, MatrixAlgebraKit.findtruncated(values, strategy))
 end
 
 for strat in (:(MatrixAlgebraKit.TruncationByOrder), :(MatrixAlgebraKit.TruncationByError), :(MatrixAlgebraKit.TruncationIntersection), :(TensorKit.Factorizations.TruncationSpace))
     @eval function MatrixAlgebraKit.findtruncated_svd(values::CuSectorVector, strategy::$strat)
         # returning a CuSectorVector wrecks things in truncate_{co}domain
         # because of scalar indexing
-        return CUDA.CUDACore.Adapt.adapt(Vector, MatrixAlgebraKit.findtruncated(values, strategy))
+        return Adapt.adapt(Vector, MatrixAlgebraKit.findtruncated(values, strategy))
     end
 end
 
 function MatrixAlgebraKit.findtruncated_svd(values::CuSectorVector, strategy::MatrixAlgebraKit.TruncationByValue)
     atol = TensorKit.Factorizations.rtol_to_atol(values, strategy.p, strategy.atol, strategy.rtol)
     strategy′ = trunctol(; atol, strategy.by, strategy.keep_below)
-    return SectorDict(c => CUDA.CUDACore.Adapt.adapt(Vector, MatrixAlgebraKit.findtruncated_svd(d, strategy′)) for (c, d) in pairs(values))
+    return SectorDict(c => Adapt.adapt(Vector, MatrixAlgebraKit.findtruncated_svd(d, strategy′)) for (c, d) in pairs(values))
 end
 
 # Needed until MatrixAlgebraKit patch hits...
diff --git a/ext/TensorKitMooncakeExt/indexmanipulations.jl b/ext/TensorKitMooncakeExt/indexmanipulations.jl
index c3bc3e26c..1fccdd9e6 100644
--- a/ext/TensorKitMooncakeExt/indexmanipulations.jl
+++ b/ext/TensorKitMooncakeExt/indexmanipulations.jl
@@ -1,11 +1,11 @@
 for transform in (:permute, :transpose)
-    add_transform! = Symbol(:add_, transform, :!)
-    add_transform_pullback = Symbol(add_transform!, :_pullback)
+    transform! = Symbol(transform, :!)
+    transform_pullback = Symbol(transform!, :_pullback)
     @eval @is_primitive(
         DefaultCtx,
         ReverseMode,
         Tuple{
-            typeof(TK.$add_transform!),
+            typeof(TK.$transform!),
             AbstractTensorMap,
             AbstractTensorMap, Index2Tuple,
             Number, Number, Vararg{Any},
@@ -13,7 +13,7 @@ for transform in (:permute, :transpose)
     )
 
     @eval function Mooncake.rrule!!(
-            ::CoDual{typeof(TK.$add_transform!)},
+            ::CoDual{typeof(TK.$transform!)},
             C_ΔC::CoDual{<:AbstractTensorMap},
             A_ΔA::CoDual{<:AbstractTensorMap}, p_Δp::CoDual{<:Index2Tuple},
             α_Δα::CoDual{<:Number}, β_Δβ::CoDual{<:Number},
@@ -30,17 +30,17 @@ for transform in (:permute, :transpose)
 
         # if we need to compute Δa, it is faster to allocate an intermediate permuted A
         # and store that instead of repeating the permutation in the pullback each time.
-        # effectively, we replace `add_permute` by `add ∘ permute`.
+        # effectively, we replace `permute!/transpose!` by `add ∘ permute/transpose`.
         Ap = if _needs_tangent(α)
             Ap = $transform(A, p)
             add!(C, Ap, α, β)
             Ap
         else
-            TK.$add_transform!(C, A, p, α, β, ba...)
+            TK.$transform!(C, A, p, α, β, ba...)
             nothing
         end
 
-        function $add_transform_pullback(::NoRData)
+        function $transform_pullback(::NoRData)
             copy!(C, C_cache)
 
             # ΔA
@@ -50,10 +50,10 @@ for transform in (:permute, :transpose)
             TC = VectorInterface.promote_scale(ΔC, α)
             if scalartype(ΔA) <: Real && !(TC <: Real)
                 ΔAc = TO.tensoralloc_add(TC, ΔC, pΔA, false, Val(false))
-                TK.$add_transform!(ΔAc, ΔC, pΔA, conj(α), Zero(), ba...)
+                TK.$transform!(ΔAc, ΔC, pΔA, conj(α), Zero(), ba...)
                 add!(ΔA, real(ΔAc))
             else
-                TK.$add_transform!(ΔA, ΔC, pΔA, conj(α), One(), ba...)
+                TK.$transform!(ΔA, ΔC, pΔA, conj(α), One(), ba...)
             end
             ΔAr = NoRData()
 
@@ -64,7 +64,7 @@ for transform in (:permute, :transpose)
             return NoRData(), ΔCr, ΔAr, NoRData(), Δαr, Δβr, map(Returns(NoRData()), ba)...
         end
 
-        return C_ΔC, $add_transform_pullback
+        return C_ΔC, $transform_pullback
     end
 end
 
@@ -72,7 +72,7 @@ end
     DefaultCtx,
     ReverseMode,
     Tuple{
-        typeof(TK.add_braid!),
+        typeof(TK.braid!),
         AbstractTensorMap,
         AbstractTensorMap, Index2Tuple, IndexTuple,
         Number, Number, Vararg{Any},
@@ -80,7 +80,7 @@ end
 )
 
 function Mooncake.rrule!!(
-        ::CoDual{typeof(TK.add_braid!)},
+        ::CoDual{typeof(TK.braid!)},
         C_ΔC::CoDual{<:AbstractTensorMap},
         A_ΔA::CoDual{<:AbstractTensorMap}, p_Δp::CoDual{<:Index2Tuple}, levels_Δlevels::CoDual{<:IndexTuple},
         α_Δα::CoDual{<:Number}, β_Δβ::CoDual{<:Number},
@@ -98,17 +98,17 @@ function Mooncake.rrule!!(
 
     # if we need to compute Δa, it is faster to allocate an intermediate braided A
     # and store that instead of repeating the permutation in the pullback each time.
-    # effectively, we replace `add_permute` by `add ∘ permute`.
+    # effectively, we replace `braid!` by `add ∘ braid`.
     Ap = if _needs_tangent(α)
         Ap = braid(A, p, levels)
         add!(C, Ap, α, β)
         Ap
     else
-        TK.add_braid!(C, A, p, levels, α, β, ba...)
+        TK.braid!(C, A, p, levels, α, β, ba...)
         nothing
     end
 
-    function add_braid!_pullback(::NoRData)
+    function braid!_pullback(::NoRData)
         copy!(C, C_cache)
 
         # ΔA
@@ -118,10 +118,10 @@ function Mooncake.rrule!!(
         TC = VectorInterface.promote_scale(ΔC, α)
         if scalartype(ΔA) <: Real && !(TC <: Real)
             ΔAc = TO.tensoralloc_add(TC, ΔC, pΔA, false, Val(false))
-            TK.add_braid!(ΔAc, ΔC, pΔA, ilevels, conj(α), Zero(), ba...)
+            TK.braid!(ΔAc, ΔC, pΔA, ilevels, conj(α), Zero(), ba...)
             add!(ΔA, real(ΔAc))
         else
-            TK.add_braid!(ΔA, ΔC, pΔA, ilevels, conj(α), One(), ba...)
+            TK.braid!(ΔA, ΔC, pΔA, ilevels, conj(α), One(), ba...)
         end
         ΔAr = NoRData()
 
@@ -132,7 +132,7 @@ function Mooncake.rrule!!(
         return NoRData(), ΔCr, ΔAr, NoRData(), NoRData(), Δαr, Δβr, map(Returns(NoRData()), ba)...
     end
 
-    return C_ΔC, add_braid!_pullback
+    return C_ΔC, braid!_pullback
 end
 
 # both are needed for correctly capturing every dispatch
diff --git a/ext/TensorKitMooncakeExt/planaroperations.jl b/ext/TensorKitMooncakeExt/planaroperations.jl
index 3c75fe2da..abbef5004 100644
--- a/ext/TensorKitMooncakeExt/planaroperations.jl
+++ b/ext/TensorKitMooncakeExt/planaroperations.jl
@@ -60,7 +60,7 @@
 #     if length(q[1]) == 0
 #         ip = invperm(linearize(p))
 #         pΔA = _repartition(ip, A)
-#         TK.add_transpose!(ΔA, ΔC, pΔA, conj(α), One(), backend, allocator)
+#         TK.transpose!(ΔA, ΔC, pΔA, conj(α), One(), backend, allocator)
 #         return NoRData()
 #     end
 #     # if length(q[1]) == 1
diff --git a/src/TensorKit.jl b/src/TensorKit.jl
index d8361ac79..6a2828588 100644
--- a/src/TensorKit.jl
+++ b/src/TensorKit.jl
@@ -91,8 +91,7 @@ export left_orth, right_orth, left_null, right_null,
     isisometric, isunitary, project_isometric, project_isometric!,
     isposdef, isposdef!, sylvester, rank, cond
 
-export braid, braid!, permute, permute!, transpose, transpose!, twist, twist!, repartition,
-    repartition!
+export braid, braid!, permute, permute!, transpose, transpose!, twist, twist!, repartition, repartition!
 export catdomain, catcodomain, absorb, absorb!
 
 # tensor operations
@@ -150,6 +149,8 @@ import Base.Meta
 
 using Random: Random, rand!, randn!
 
+using Adapt: Adapt
+
 # Auxiliary files
 #-----------------
 include("auxiliary/auxiliary.jl")
diff --git a/src/planar/planaroperations.jl b/src/planar/planaroperations.jl
index cde772982..758bb708a 100644
--- a/src/planar/planaroperations.jl
+++ b/src/planar/planaroperations.jl
@@ -32,7 +32,7 @@ function planaradd!(
         α::Number, β::Number,
         backend, allocator
     )
-    return add_transpose!(C, A, p, α, β, backend)
+    return transpose!(C, A, p, α, β, backend)
 end
 
 # insert default backend
@@ -173,7 +173,7 @@ function planarcontract!(
         A′ = TO.tensoralloc_add(
             scalartype(A), A, (oindA, cindA), false, Val(true), allocator
         )
-        add_transpose!(A′, A, (oindA, cindA), One(), Zero(), backend)
+        transpose!(A′, A, (oindA, cindA), One(), Zero(), backend)
     end
 
     if cindB == codB && oindB == domB
@@ -182,7 +182,7 @@ function planarcontract!(
         B′ = TensorOperations.tensoralloc_add(
             scalartype(B), B, (cindB, oindB), false, Val(true), allocator
         )
-        add_transpose!(B′, B, (cindB, oindB), One(), Zero(), backend)
+        transpose!(B′, B, (cindB, oindB), One(), Zero(), backend)
     end
     mul!(C, A′, B′, α, β)
     (oindA == codA && cindA == domA) || TO.tensorfree!(A′, allocator)
diff --git a/src/tensors/adjoint.jl b/src/tensors/adjoint.jl
index ca484e77b..820e87375 100644
--- a/src/tensors/adjoint.jl
+++ b/src/tensors/adjoint.jl
@@ -50,6 +50,8 @@ Base.@propagate_inbounds function subblock(t::AdjointTensorMap, (f₁, f₂)::Tu
     return permutedims(conj(data), (domainind(tp)..., codomainind(tp)...))
 end
 
+Adapt.adapt_structure(to, x::AdjointTensorMap) = adjoint(Adapt.adapt(to, parent(x)))
+
 # Show
 #------
 function Base.showarg(io::IO, t::AdjointTensorMap, toplevel::Bool)
diff --git a/src/tensors/braidingtensor.jl b/src/tensors/braidingtensor.jl
index d28b2e1df..a1b7dbd02 100644
--- a/src/tensors/braidingtensor.jl
+++ b/src/tensors/braidingtensor.jl
@@ -53,6 +53,14 @@ function BraidingTensor{T}(V::HomSpace, adjoint::Bool = false) where {T}
     return BraidingTensor{T}(V[2], V[1], adjoint)
 end
 
+function Adapt.adapt_structure(::Type{T}, x::BraidingTensor{T′, S, A}) where {T <: Number, T′, S, A}
+    A′ = TensorKit.similarstoragetype(A, T)
+    return BraidingTensor{T, S, A′}(space(x), x.adjoint)
+end
+function Adapt.adapt_structure(::Type{TA}, x::BraidingTensor{T, S, A}) where {T′, TA <: DenseArray{T′}, T, S, A}
+    return BraidingTensor{T′, S, TA}(space(x), x.adjoint)
+end
+
 function Base.adjoint(b::BraidingTensor{T, S, A}) where {T, S, A}
     return BraidingTensor{T, S, A}(b.V1, b.V2, !b.adjoint)
 end
@@ -221,11 +229,11 @@ function planarcontract!(
 
     I = sectortype(C)
     BraidingStyle(I) isa Bosonic &&
-        return add_permute!(C, B, (reverse(cindB), oindB), α, β, backend)
+        return permute!(C, B, (reverse(cindB), oindB), α, β, backend, allocator)
 
     # Non-bosonic case: factor into a cyclic transpose (no crossings) + a single Artin braid
     # that swaps the two contracted legs, producing the R-symbol that A encodes. Naively
-    # using a single `add_braid!` is wrong: it would resolve cyclic moves as crossings and
+    # using a single `braid!` is wrong: it would resolve cyclic moves as crossings and
     # pick up spurious R-symbol factors.
     B_in_layout = (cindB == codB && oindB == domB)
     if B_in_layout
@@ -234,7 +242,7 @@ function planarcontract!(
         B′ = TO.tensoralloc_add(
             scalartype(B), B, (cindB, oindB), false, Val(true), allocator
         )
-        add_transpose!(B′, B, (cindB, oindB), One(), Zero(), backend)
+        transpose!(B′, B, (cindB, oindB), One(), Zero(), backend, allocator)
     end
 
     levelsA = A.adjoint ? (1, 2, 2, 1) : (2, 1, 1, 2)
@@ -244,9 +252,9 @@ function planarcontract!(
         ntuple(Returns(3), N - 2)...,
     )
 
-    add_braid!(
+    braid!(
         C, B′, ((2, 1), ntuple(i -> i + 2, N - 2)),
-        levels, α, β, backend,
+        levels, α, β, backend, allocator
     )
 
     B_in_layout || TO.tensorfree!(B′, allocator)
@@ -274,11 +282,11 @@ function planarcontract!(
 
     I = sectortype(C)
     BraidingStyle(I) isa Bosonic &&
-        return add_permute!(C, A, (oindA, reverse(cindA)), α, β, backend)
+        return permute!(C, A, (oindA, reverse(cindA)), α, β, backend, allocator)
 
     # Non-bosonic case: cyclic transpose A → (oindA, cindA) (no crossings), then a single
     # Artin braid swaps A′'s last two indices, producing the R-symbol that B encodes. Naively
-    # using a single `add_braid!` is wrong: it would resolve cyclic moves as crossings and
+    # using a single `braid!` is wrong: it would resolve cyclic moves as crossings and
     # pick up spurious R-symbol factors.
 
     A_in_layout = (oindA == codA && cindA == domA)
@@ -288,7 +296,7 @@ function planarcontract!(
         A′ = TO.tensoralloc_add(
             scalartype(A), A, (oindA, cindA), false, Val(true), allocator
         )
-        add_transpose!(A′, A, (oindA, cindA), One(), Zero(), backend)
+        transpose!(A′, A, (oindA, cindA), One(), Zero(), backend, allocator)
     end
 
     levelsB = B.adjoint ? (1, 2, 2, 1) : (2, 1, 1, 2)
@@ -299,9 +307,9 @@ function planarcontract!(
         levelsB[cindB[1]], levelsB[cindB[2]],
     )
 
-    add_braid!(
+    braid!(
         C, A′, (ntuple(identity, M), (N, N - 1)),
-        levels, α, β, backend,
+        levels, α, β, backend, allocator
     )
 
     A_in_layout || TO.tensorfree!(A′, allocator)
diff --git a/src/tensors/diagonal.jl b/src/tensors/diagonal.jl
index b2ac4134b..5fc6682e3 100644
--- a/src/tensors/diagonal.jl
+++ b/src/tensors/diagonal.jl
@@ -133,6 +133,11 @@ function Base.convert(::Type{DiagonalTensorMap}, d::Dict{Symbol, Any})
     return convert(DiagonalTensorMap, convert(TensorMap, d))
 end
 
+function Adapt.adapt_structure(to, x::DiagonalTensorMap)
+    data′ = Adapt.adapt(to, x.data)
+    return DiagonalTensorMap(data′, x.domain)
+end
+
 # Complex, real and imaginary parts
 #-----------------------------------
 for f in (:real, :imag, :complex)
diff --git a/src/tensors/indexmanipulations.jl b/src/tensors/indexmanipulations.jl
index 3108abb17..655f0ca7f 100644
--- a/src/tensors/indexmanipulations.jl
+++ b/src/tensors/indexmanipulations.jl
@@ -1,28 +1,10 @@
-# Index manipulations
-#---------------------
-
-# find the scalartype after applying operations: take into account fusion and/or braiding
-# might need to become Float or Complex to capture complex recoupling coefficients but don't alter precision
-for (operation, manipulation) in (
-        :flip => :sector, :twist => :braiding,
-        :transpose => :fusion, :permute => :sector, :braid => :sector,
-    )
-    promote_op = Symbol(:promote_, operation)
-    manipulation_scalartype = Symbol(manipulation, :scalartype)
-
-    @eval begin
-        $promote_op(t::AbstractTensorMap) = $promote_op(typeof(t))
-        $promote_op(::Type{T}) where {T <: AbstractTensorMap} =
-            $promote_op(scalartype(T), sectortype(T))
-        $promote_op(::Type{T}, ::Type{I}) where {T <: Number, I <: Sector} =
-            sectorscalartype(I) <: Integer ? T :
-            sectorscalartype(I) <: Real ? float(T) : complex(T)
-        # TODO: currently the manipulations all use sectorscalartype, change to:
-        # $manipulation_scalartype(I) <: Integer ? T :
-        # $manipulation_scalartype(I) <: Real ? float(T) : complex(T)
-    end
-end
+# =============
+#  Reweighting
+# =============
 
+# ------
+# flip
+# ------
 """
     flip(t::AbstractTensorMap, I) -> t′::AbstractTensorMap
 
@@ -46,33 +28,221 @@ function flip(t::AbstractTensorMap, I; inv::Bool = false)
     return t′
 end
 
+# ---------
+# twist(!)
+# ---------
+function has_shared_twist(t, inds)
+    I = sectortype(t)
+    if BraidingStyle(I) == NoBraiding()
+        for i in inds
+            cs = sectors(space(t, i))
+            all(isunit, cs) || throw(SectorMismatch(lazy"Cannot twist sectors $cs"))
+        end
+        return true
+    elseif BraidingStyle(I) == Bosonic()
+        return true
+    else
+        for i in inds
+            cs = sectors(space(t, i))
+            all(isone ∘ twist, cs) || return false
+        end
+        return true
+    end
+end
+
+"""
+    twist!(t::AbstractTensorMap, i::Int; inv::Bool = false) -> t
+    twist!(t::AbstractTensorMap, inds; inv::Bool = false) -> t
+
+Apply a twist to the `i`th index of `t`, or all indices in `inds`, storing the result in `t`.
+If `inv=true`, use the inverse twist.
+
+See [`twist`](@ref) for creating a new tensor.
+"""
+function twist!(t::AbstractTensorMap, inds; inv::Bool = false)
+    if !all(in(allind(t)), inds)
+        msg = "Can't twist indices $inds of a tensor with only $(numind(t)) indices."
+        throw(ArgumentError(msg))
+    end
+    (scalartype(t) <: Real && !(sectorscalartype(sectortype(t)) <: Real)) &&
+        throw(ArgumentError("Can't in-place twist a real tensor with complex sector type"))
+    has_shared_twist(t, inds) && return t
+
+    N₁ = numout(t)
+    for (f₁, f₂) in fusiontrees(t)
+        θ = prod(i -> i <= N₁ ? twist(f₁.uncoupled[i]) : twist(f₂.uncoupled[i - N₁]), inds)
+        inv && (θ = θ')
+        scale!(t[f₁, f₂], θ)
+    end
+    return t
+end
+
+"""
+    twist(tsrc::AbstractTensorMap, i::Int; inv::Bool = false, copy::Bool = false) -> tdst
+    twist(tsrc::AbstractTensorMap, inds; inv::Bool = false, copy::Bool = false) -> tdst
+
+Apply a twist to the `i`th index of `tsrc` and return the result as a new tensor.
+If `inv = true`, use the inverse twist.
+If `copy = false`, `tdst` might share data with `tsrc` whenever possible. Otherwise, a copy is always made.
+
+See [`twist!`](@ref) for storing the result in place.
+"""
+function twist(t::AbstractTensorMap, inds; inv::Bool = false, copy::Bool = false)
+    if has_shared_twist(t, inds)
+        return copy ? Base.copy(t) : t
+    end
+    tdst = similar(t, promote_twist(t))
+    copy!(tdst, t)
+    return twist!(tdst, inds; inv)
+end
+
+# =========================
+#  Space insertion/removal
+# =========================
+
+# Methods which change the number of indices, implement using `Val(i)` for type inference
+"""
+    insertleftunit(
+            tsrc::AbstractTensorMap, i = numind(t) + 1;
+            conj = false, dual = false, copy = false
+        ) -> tdst
+
+Insert a trivial vector space, isomorphic to the underlying field, at position `i`,
+which can be specified as an `Int` or as `Val(i)` for improved type stability.
+More specifically, adds a left monoidal unit or its dual.
+Insert a trivial vector space, isomorphic to the underlying field, before position `i`,
+which should satisfy `1 ≤ i ≤ numind(t) + 1`
+and can be specified as an `Int` or as `Val(i)` for improved type stability,
+More specifically, add a left monoidal unit (or its dual) of the space associated with index `i`.
+The new index appears at position `i` in the new tensor,
+namely in its codomain for `1 ≤ i ≤ numout(t)` and in its domain otherwise.
+If `copy=false`, `tdst` might share data with `tsrc` whenever possible. Otherwise, a copy is always made.
+
+See also [`insertrightunit`](@ref insertrightunit(::AbstractTensorMap, ::Val{i}) where {i}),
+[`removeunit`](@ref removeunit(::AbstractTensorMap, ::Val{i}) where {i}).
+"""
+function insertleftunit(
+        t::AbstractTensorMap, ::Val{i} = Val(numind(t) + 1);
+        copy::Bool = false, conj::Bool = false, dual::Bool = false
+    ) where {i}
+    W = insertleftunit(space(t), Val(i); conj, dual)
+    if t isa TensorMap
+        return TensorMap{scalartype(t)}(copy ? Base.copy(t.data) : t.data, W)
+    else
+        tdst = similar(t, W)
+        for (c, b) in blocks(t)
+            copy!(block(tdst, c), b)
+        end
+        return tdst
+    end
+end
+
+"""
+    insertrightunit(
+            tsrc::AbstractTensorMap, i = numind(t);
+            conj = false, dual = false, copy = false
+        ) -> tdst
+
+Insert a trivial vector space, isomorphic to the underlying field, after position `i`,
+which should satisfy `0 ≤ i ≤ numind(t)`
+and can be specified as an `Int` or as `Val(i)` for improved type stability,
+More specifically, add a right monoidal unit (or its dual) of the space associated with index `i`.
+The new index appears at position `i+1` in the new tensor,
+namely in its codomain for `0 ≤ i ≤ numout(t)` and in its domain otherwise.
+
+If `copy=false`, `tdst` might share data with `tsrc` whenever possible. Otherwise, a copy is always made.
+
+See also [`insertleftunit`](@ref insertleftunit(::AbstractTensorMap, ::Val{i}) where {i}),
+[`removeunit`](@ref removeunit(::AbstractTensorMap, ::Val{i}) where {i}).
+"""
+function insertrightunit(
+        t::AbstractTensorMap, ::Val{i} = Val(numind(t));
+        copy::Bool = false, conj::Bool = false, dual::Bool = false
+    ) where {i}
+    W = insertrightunit(space(t), Val(i); conj, dual)
+    if t isa TensorMap
+        return TensorMap{scalartype(t)}(copy ? Base.copy(t.data) : t.data, W)
+    else
+        tdst = similar(t, W)
+        for (c, b) in blocks(t)
+            copy!(block(tdst, c), b)
+        end
+        return tdst
+    end
+end
+
+"""
+    removeunit(tsrc::AbstractTensorMap, i; copy = false) -> tdst
+
+This removes a trivial tensor product factor at position `1 ≤ i ≤ N`, where `i`
+can be specified as an `Int` or as `Val(i)` for improved type stability.
+For this to work, that factor has to be isomorphic to the field of scalars.
+
+If `copy=false`, `tdst` might share data with `tsrc` whenever possible. Otherwise, a copy is always made.
+
+This operation undoes the work of [`insertleftunit`](@ref insertleftunit(::AbstractTensorMap, ::Val{i}) where {i})
+and [`insertrightunit`](@ref insertrightunit(::AbstractTensorMap, ::Val{i}) where {i}).
+"""
+function removeunit(t::AbstractTensorMap, ::Val{i}; copy::Bool = false) where {i}
+    W = removeunit(space(t), Val(i))
+    if t isa TensorMap
+        return TensorMap{scalartype(t)}(copy ? Base.copy(t.data) : t.data, W)
+    else
+        tdst = similar(t, W)
+        for (c, b) in blocks(t)
+            copy!(block(tdst, c), b)
+        end
+        return tdst
+    end
+end
+
+# TODO: fusion/splitting of indices
+
+# ============================
+# Index rearrangements
+# ============================
+
+# --------------
+#   permute(!)
+# --------------
 """
-    permute!(tdst::AbstractTensorMap, tsrc::AbstractTensorMap, (p₁, p₂)::Index2Tuple)
-        -> tdst
+    permute!(tdst, tsrc, (p₁, p₂)::Index2Tuple, α = 1, β = 0, [backend], [allocator]) -> tdst
 
-Write into `tdst` the result of permuting the indices of `tsrc`.
+Compute `tdst = β * tdst + α * permute(tsrc, (p₁, p₂))`, writing the result into `tdst`.
 The codomain and domain of `tdst` correspond to the indices in `p₁` and `p₂` of `tsrc` respectively.
-                
-See [`permute`](@ref) for creating a new tensor and [`add_permute!`](@ref) for a more general version.
+Optionally specify a `backend` and `allocator` for the underlying array operation.
+
+See also [`permute`](@ref) for creating a new tensor.
 """
 @propagate_inbounds function Base.permute!(
-        tdst::AbstractTensorMap, tsrc::AbstractTensorMap, p::Index2Tuple
+        tdst::AbstractTensorMap, tsrc::AbstractTensorMap, p::Index2Tuple,
+        α::Number = One(), β::Number = Zero(),
+        backend::AbstractBackend = TO.DefaultBackend(), allocator = TO.DefaultAllocator()
     )
-    return add_permute!(tdst, tsrc, p, One(), Zero())
+    @boundscheck spacecheck_transform(permute, tdst, tsrc, p)
+    levels = ntuple(identity, numind(tsrc))
+    return @inbounds braid!(tdst, tsrc, p, levels, α, β, backend, allocator)
 end
 
 """
-    permute(tsrc::AbstractTensorMap, (p₁, p₂)::Index2Tuple; copy::Bool = false) -> tdst::TensorMap
+    permute(
+        tsrc, (p₁, p₂)::Index2Tuple; copy = false,
+        backend = DefaultBackend(), allocator = DefaultAllocator()
+    ) -> tdst::TensorMap    
 
 Return tensor `tdst` obtained by permuting the indices of `tsrc`.
 The codomain and domain of `tdst` correspond to the indices in `p₁` and `p₂` of `tsrc` respectively.
 
 If `copy = false`, `tdst` might share data with `tsrc` whenever possible.
 Otherwise, a copy is always made.
+Optionally specify a `backend` and `allocator` for the underlying array operation.
 
-To permute into an existing destination, see [permute!](@ref) and [`add_permute!`](@ref)
+See also [`permute!`](@ref) for writing into an existing destination.
 """
-function permute(t::AbstractTensorMap, p::Index2Tuple; copy::Bool = false)
+function permute(
+        t::AbstractTensorMap, p::Index2Tuple;
+        copy::Bool = false, backend::AbstractBackend = TO.DefaultBackend(), allocator = TO.DefaultAllocator()
+    )
     # share data if possible
     if !copy
         if p == (codomainind(t), domainind(t))
@@ -84,14 +254,15 @@ function permute(t::AbstractTensorMap, p::Index2Tuple; copy::Bool = false)
 
     # general case
     tdst = similar(t, promote_permute(t), permute(space(t), p))
-    return @inbounds permute!(tdst, t, p)
+    levels = ntuple(identity, numind(t))
+    return @inbounds braid!(tdst, t, p, levels, One(), Zero(), backend, allocator)
 end
-function permute(t::AdjointTensorMap, (p₁, p₂)::Index2Tuple; copy::Bool = false)
+function permute(t::AdjointTensorMap, (p₁, p₂)::Index2Tuple; kwargs...)
     p₁′ = adjointtensorindices(t, p₂)
     p₂′ = adjointtensorindices(t, p₁)
-    return adjoint(permute(adjoint(t), (p₁′, p₂′); copy))
+    return adjoint(permute(adjoint(t), (p₁′, p₂′); kwargs...))
 end
-permute(t::AbstractTensorMap, p::IndexTuple; copy::Bool = false) = permute(t, (p, ()); copy)
+permute(t::AbstractTensorMap, p::IndexTuple; kwargs...) = permute(t, (p, ()); kwargs...)
 
 function has_shared_permute(t::AbstractTensorMap, (p₁, p₂)::Index2Tuple)
     return (p₁ === codomainind(t) && p₂ === domainind(t))
@@ -100,8 +271,8 @@ function has_shared_permute(t::TensorMap, (p₁, p₂)::Index2Tuple)
     if p₁ === codomainind(t) && p₂ === domainind(t)
         return true
     elseif sectortype(t) === Trivial
-        stridet = i -> stride(t[], i)
-        sizet = i -> size(t[], i)
+        stridet = Base.Fix1(stride, t[])
+        sizet = Base.Fix1(size, t[])
         canfuse1, d1, s1 = TO._canfuse(sizet.(p₁), stridet.(p₁))
         canfuse2, d2, s2 = TO._canfuse(sizet.(p₂), stridet.(p₂))
         return canfuse1 && canfuse2 && s1 == 1 && (d2 == 1 || s2 == d1)
@@ -115,29 +286,37 @@ function has_shared_permute(t::AdjointTensorMap, (p₁, p₂)::Index2Tuple)
     return has_shared_permute(t', (p₁′, p₂′))
 end
 
-# Braid
+# -------------
+#   braid(!)
+# -------------
 """
-    braid!(tdst::AbstractTensorMap, tsrc::AbstractTensorMap,
-           (p₁, p₂)::Index2Tuple, levels::Tuple)
-        -> tdst
+    braid!(tdst, tsrc, (p₁, p₂)::Index2Tuple, levels::IndexTuple, α = 1, β = 0, [backend], [allocator]) -> tdst
 
-Write into `tdst` the result of braiding the indices of `tsrc`.
+Compute `tdst = β * tdst + α * braid(tsrc, (p₁, p₂), levels)`, writing the result into `tdst`.
 The codomain and domain of `tdst` correspond to the indices in `p₁` and `p₂` of `tsrc` respectively.
 Here, `levels` is a tuple of length `numind(tsrc)` that assigns a level or height to the indices of `tsrc`,
 which determines whether they will braid over or under any other index with which they have to change places.
+Optionally specify a `backend` and `allocator` for the underlying array operation.
 
-See [`braid`](@ref) for creating a new tensor and [`add_braid!`](@ref) for a more general version.
+See also [`braid`](@ref) for creating a new tensor.
 """
 @propagate_inbounds function braid!(
-        tdst::AbstractTensorMap, tsrc::AbstractTensorMap, p::Index2Tuple, levels::IndexTuple
+        tdst::AbstractTensorMap, tsrc::AbstractTensorMap, p::Index2Tuple, levels::IndexTuple,
+        α::Number = One(), β::Number = Zero(),
+        backend::AbstractBackend = TO.DefaultBackend(), allocator = TO.DefaultAllocator()
     )
-    return add_braid!(tdst, tsrc, p, levels, One(), Zero())
+    @boundscheck spacecheck_transform(braid, tdst, tsrc, p, levels)
+    levels1 = TupleTools.getindices(levels, codomainind(tsrc))
+    levels2 = TupleTools.getindices(levels, domainind(tsrc))
+    transformer = treebraider(tdst, tsrc, p, (levels1, levels2))
+    return @inbounds add_transform!(tdst, tsrc, p, transformer, α, β, backend, allocator)
 end
 
 """
-    braid(tsrc::AbstractTensorMap, (p₁, p₂)::Index2Tuple, levels::IndexTuple;
-          copy::Bool = false)
-        -> tdst::TensorMap
+    braid(
+        tsrc, (p₁, p₂)::Index2Tuple, levels::IndexTuple; copy = false,
+        backend = DefaultBackend(), allocator = DefaultAllocator()
+    ) -> tdst::TensorMap
 
 Return tensor `tdst` obtained by braiding the indices of `tsrc`.
 The codomain and domain of `tdst` correspond to the indices in `p₁` and `p₂` of `tsrc` respectively.
@@ -145,277 +324,178 @@ Here, `levels` is a tuple of length `numind(tsrc)` that assigns a level or heigh
 which determines whether they will braid over or under any other index with which they have to change places.
 
 If `copy=false`, `tdst` might share data with `tsrc` whenever possible. Otherwise, a copy is always made.
+Optionally specify a `backend` and `allocator` for the underlying array operation.
 
-To braid into an existing destination, see [braid!](@ref) and [`add_braid!`](@ref)
+See also [`braid!`](@ref) for writing into an existing destination.
 """
 function braid(
-        t::AbstractTensorMap, p::Index2Tuple, levels::IndexTuple; copy::Bool = false
+        t::AbstractTensorMap, p::Index2Tuple, levels::IndexTuple;
+        copy::Bool = false, backend::AbstractBackend = TO.DefaultBackend(), allocator = TO.DefaultAllocator()
     )
-    length(levels) == numind(t) || throw(ArgumentError("invalid levels"))
+    length(levels) == numind(t) || throw(ArgumentError(lazy"length of levels should be $(numind(t)), got $(length(levels))"))
 
-    BraidingStyle(sectortype(t)) isa SymmetricBraiding && return permute(t, p; copy)
     (!copy && p == (codomainind(t), domainind(t))) && return t
 
     # general case
     tdst = similar(t, promote_braid(t), permute(space(t), p))
-    return @inbounds braid!(tdst, t, p, levels)
+    return @inbounds braid!(tdst, t, p, levels, One(), Zero(), backend, allocator)
+end
+function braid(
+        t::AdjointTensorMap, (p₁, p₂)::Index2Tuple, levels::IndexTuple;
+        kwargs...
+    )
+    p₁′ = adjointtensorindices(t, p₂)
+    p₂′ = adjointtensorindices(t, p₁)
+    perm = adjointtensorindices(adjoint(t), ntuple(identity, numind(t)))
+    levels′ = TupleTools.getindices(levels, perm)
+    return adjoint(braid(adjoint(t), (p₁′, p₂′), levels′; kwargs...))
 end
-# TODO: braid for `AdjointTensorMap`; think about how to map the `levels` argument.
 
-# Transpose
+# ----------------
+#   transpose(!)
+# ----------------
 _transpose_indices(t::AbstractTensorMap) = (reverse(domainind(t)), reverse(codomainind(t)))
 
 """
-    transpose!(tdst::AbstractTensorMap, tsrc::AbstractTensorMap,
-               (p₁, p₂)::Index2Tuple)
-        -> tdst
+    transpose!(tdst, tsrc, (p₁, p₂)::Index2Tuple, α = 1, β = 0, [backend], [allocator]) -> tdst
 
-Write into `tdst` the result of transposing the indices of `tsrc`.
+Compute `tdst = β * tdst + α * transpose(tsrc, (p₁, p₂))`, writing the result into `tdst`.
 The codomain and domain of `tdst` correspond to the indices in `p₁` and `p₂` of `tsrc` respectively.
 The new index positions should be attainable without any indices crossing each other, i.e.,
-the permutation `(p₁..., reverse(p₂)...)` should constitute a cyclic permutation of `(codomainind(tsrc)..., reverse(domainind(tsrc))...)`.
+the permutation `(p₁..., reverse(p₂)...)` should constitute a cyclic permutation of
+`(codomainind(tsrc)..., reverse(domainind(tsrc))...)`.
+Optionally specify a `backend` and `allocator` for the underlying array operation.
 
-See [`transpose`](@ref) for creating a new tensor and [`add_transpose!`](@ref) for a more general version.
+See also [`transpose`](@ref) for creating a new tensor.
 """
+function LinearAlgebra.transpose!(tdst::AbstractTensorMap, tsrc::AbstractTensorMap)
+    return transpose!(tdst, tsrc, _transpose_indices(tsrc))
+end
 @propagate_inbounds function LinearAlgebra.transpose!(
-        tdst::AbstractTensorMap, tsrc::AbstractTensorMap, (p₁, p₂)::Index2Tuple = _transpose_indices(tsrc)
+        tdst::AbstractTensorMap, tsrc::AbstractTensorMap, p::Index2Tuple,
+        α::Number = One(), β::Number = Zero(),
+        backend::AbstractBackend = TO.DefaultBackend(), allocator = TO.DefaultAllocator()
     )
-    return add_transpose!(tdst, tsrc, (p₁, p₂), One(), Zero())
+    @boundscheck spacecheck_transform(transpose, tdst, tsrc, p)
+    transformer = treetransposer(tdst, tsrc, p)
+    return @inbounds add_transform!(tdst, tsrc, p, transformer, α, β, backend, allocator)
 end
 
 """
-    transpose(tsrc::AbstractTensorMap, (p₁, p₂)::Index2Tuple;
-              copy::Bool=false)
-        -> tdst::TensorMap
+    transpose(
+        tsrc, (p₁, p₂)::Index2Tuple; copy = false,
+        backend = DefaultBackend(), allocator = DefaultAllocator()
+    ) -> tdst::TensorMap
 
 Return tensor `tdst` obtained by transposing the indices of `tsrc`.
 The codomain and domain of `tdst` correspond to the indices in `p₁` and `p₂` of `tsrc` respectively.
 The new index positions should be attainable without any indices crossing each other, i.e.,
-the permutation `(p₁..., reverse(p₂)...)` should constitute a cyclic permutation of `(codomainind(tsrc)..., reverse(domainind(tsrc))...)`.
+the permutation `(p₁..., reverse(p₂)...)` should constitute a cyclic permutation of
+`(codomainind(tsrc)..., reverse(domainind(tsrc))...)`.
 
 If `copy=false`, `tdst` might share data with `tsrc` whenever possible. Otherwise, a copy is always made.
+Optionally specify a `backend` and `allocator` for the underlying array operation.
 
-To permute into an existing destination, see [permute!](@ref) and [`add_permute!`](@ref)
+See also [`transpose!`](@ref) for writing into an existing destination.
 """
 function LinearAlgebra.transpose(
         t::AbstractTensorMap, p::Index2Tuple = _transpose_indices(t);
-        copy::Bool = false
+        copy::Bool = false, backend = TO.DefaultBackend(), allocator = TO.DefaultAllocator()
     )
-    sectortype(t) === Trivial && return permute(t, p; copy)
+    sectortype(t) === Trivial && return permute(t, p; copy, backend, allocator)
     (!copy && p == (codomainind(t), domainind(t))) && return t
 
     # general case
     tdst = similar(t, promote_transpose(t), permute(space(t), p))
-    return @inbounds transpose!(tdst, t, p)
+    return @inbounds transpose!(tdst, t, p, One(), Zero(), backend, allocator)
 end
 
 function LinearAlgebra.transpose(
         t::AdjointTensorMap, (p₁, p₂)::Index2Tuple = _transpose_indices(t);
-        copy::Bool = false
+        copy::Bool = false, backend = TO.DefaultBackend(), allocator = TO.DefaultAllocator()
     )
     p₁′ = map(n -> adjointtensorindex(t, n), p₂)
     p₂′ = map(n -> adjointtensorindex(t, n), p₁)
-    return adjoint(transpose(adjoint(t), (p₁′, p₂′); copy = copy))
+    return adjoint(transpose(adjoint(t), (p₁′, p₂′); copy, backend, allocator))
 end
 
+# -------------------
+#   repartition(!)
+# -------------------
 """
-    repartition!(tdst::AbstractTensorMap, tsrc::AbstractTensorMap) -> tdst
+    repartition!(tdst, tsrc, α = 1, β = 0, [backend], [allocator]) -> tdst
 
-Write into `tdst` the result of repartitioning the indices of `tsrc`. This is just a special
-case of a transposition that only changes the number of in- and outgoing indices.
+Compute `tdst = β * tdst + α * repartition(tsrc)`, writing the result into `tdst`.
+This is a special case of `transpose!` that only changes the partition of indices between
+codomain and domain, without changing their cyclic order.
+Optionally specify a `backend` and `allocator` for the underlying array operation.
 
-See [`repartition`](@ref) for creating a new tensor.
+See also [`repartition`](@ref) for creating a new tensor.
 """
-@propagate_inbounds function repartition!(tdst::AbstractTensorMap, tsrc::AbstractTensorMap)
+@propagate_inbounds function repartition!(
+        tdst::AbstractTensorMap, tsrc::AbstractTensorMap,
+        α::Number = One(), β::Number = Zero(),
+        backend::AbstractBackend = TO.DefaultBackend(), allocator = TO.DefaultAllocator()
+    )
     check_spacetype(tdst, tsrc)
     numind(tsrc) == numind(tdst) ||
         throw(ArgumentError("tsrc and tdst should have an equal amount of indices"))
-    all_inds = (codomainind(tsrc)..., reverse(domainind(tsrc))...)
-    p₁ = ntuple(i -> all_inds[i], numout(tdst))
-    p₂ = reverse(ntuple(i -> all_inds[i + numout(tdst)], numin(tdst)))
-    return transpose!(tdst, tsrc, (p₁, p₂))
+    p₁, p₂ = let all_inds = (codomainind(tsrc)..., reverse(domainind(tsrc))...)
+        ntuple(i -> all_inds[i], numout(tdst)), reverse(ntuple(i -> all_inds[i + numout(tdst)], numin(tdst)))
+    end
+    return transpose!(tdst, tsrc, (p₁, p₂), α, β, backend, allocator)
 end
 
 """
     repartition(
-        tsrc::AbstractTensorMap{T, S}, N₁::Int, N₂::Int; copy::Bool=false
-    ) where {T, S} -> tdst::AbstractTensorMap{T, S, N₁, N₂}
+            tsrc, N₁::Int, N₂::Int = numind(tsrc) - N₁; copy = false,
+            backend = DefaultBackend(), allocator = DefaultAllocator()
+        ) -> tdst
 
-Return tensor `tdst` obtained by repartitioning the indices of `t`.
-The codomain and domain of `tdst` correspond to the first `N₁` and last `N₂` spaces of `t`, respectively.
+Return tensor `tdst` obtained by repartitioning the indices of `tsrc`.
+The codomain and domain of `tdst` correspond to the first `N₁` and last `N₂` spaces of `tsrc`,
+respectively.
 
 If `copy=false`, `tdst` might share data with `tsrc` whenever possible. Otherwise, a copy is always made.
+Optionally specify a `backend` and `allocator` for the underlying array operation.
 
-To repartition into an existing destination, see [repartition!](@ref).
+See also [`repartition!`](@ref) for writing into an existing destination.
 """
 @constprop :aggressive function repartition(
-        t::AbstractTensorMap, N₁::Int, N₂::Int = numind(t) - N₁; copy::Bool = false
+        t::AbstractTensorMap, N₁::Int, N₂::Int = numind(t) - N₁;
+        copy::Bool = false, backend = TO.DefaultBackend(), allocator = TO.DefaultAllocator()
     )
     N₁ + N₂ == numind(t) ||
         throw(ArgumentError("Invalid repartition: $(numind(t)) to ($N₁, $N₂)"))
-    all_inds = (codomainind(t)..., reverse(domainind(t))...)
-    p₁ = ntuple(i -> all_inds[i], N₁)
-    p₂ = reverse(ntuple(i -> all_inds[i + N₁], N₂))
-    return transpose(t, (p₁, p₂); copy)
-end
-
-# Twist
-function has_shared_twist(t, inds)
-    I = sectortype(t)
-    if BraidingStyle(I) == NoBraiding()
-        for i in inds
-            cs = sectors(space(t, i))
-            all(isunit, cs) || throw(SectorMismatch(lazy"Cannot twist sectors $cs"))
-        end
-        return true
-    elseif BraidingStyle(I) == Bosonic()
-        return true
-    else
-        for i in inds
-            cs = sectors(space(t, i))
-            all(isone ∘ twist, cs) || return false
-        end
-        return true
-    end
-end
-
-"""
-    twist!(t::AbstractTensorMap, i::Int; inv::Bool=false) -> t
-    twist!(t::AbstractTensorMap, inds; inv::Bool=false) -> t
-
-Apply a twist to the `i`th index of `t`, or all indices in `inds`, storing the result in `t`.
-If `inv=true`, use the inverse twist.
-
-See [`twist`](@ref) for creating a new tensor.
-"""
-function twist!(t::AbstractTensorMap, inds; inv::Bool = false)
-    if !all(in(allind(t)), inds)
-        msg = "Can't twist indices $inds of a tensor with only $(numind(t)) indices."
-        throw(ArgumentError(msg))
-    end
-    (scalartype(t) <: Real && !(sectorscalartype(sectortype(t)) <: Real)) &&
-        throw(ArgumentError("Can't in-place twist a real tensor with complex sector type"))
-    has_shared_twist(t, inds) && return t
-
-    (scalartype(t) <: Real && !(sectorscalartype(sectortype(t)) <: Real)) &&
-        throw(ArgumentError("No in-place `twist!` for a real tensor with complex sector type"))
-
-    N₁ = numout(t)
-    for (f₁, f₂) in fusiontrees(t)
-        θ = prod(i -> i <= N₁ ? twist(f₁.uncoupled[i]) : twist(f₂.uncoupled[i - N₁]), inds)
-        inv && (θ = θ')
-        scale!(t[f₁, f₂], θ)
-    end
-    return t
-end
-
-"""
-    twist(tsrc::AbstractTensorMap, i::Int; inv::Bool = false, copy::Bool = false) -> tdst
-    twist(tsrc::AbstractTensorMap, inds; inv::Bool = false, copy::Bool = false) -> tdst
-
-Apply a twist to the `i`th index of `tsrc` and return the result as a new tensor.
-If `inv = true`, use the inverse twist.
-If `copy = false`, `tdst` might share data with `tsrc` whenever possible. Otherwise, a copy is always made.
-
-See [`twist!`](@ref) for storing the result in place.
-"""
-function twist(t::AbstractTensorMap, inds; inv::Bool = false, copy::Bool = false)
-    !copy && has_shared_twist(t, inds) && return t
-    tdst = similar(t, promote_twist(t))
-    copy!(tdst, t)
-    return twist!(tdst, inds; inv)
-end
-
-# Methods which change the number of indices, implement using `Val(i)` for type inference
-"""
-    insertleftunit(tsrc::AbstractTensorMap, i=numind(t) + 1;
-                   conj=false, dual=false, copy=false) -> tdst
-
-Insert a trivial vector space, isomorphic to the underlying field, at position `i`,
-which can be specified as an `Int` or as `Val(i)` for improved type stability.
-More specifically, adds a left monoidal unit or its dual.
-
-If `copy=false`, `tdst` might share data with `tsrc` whenever possible. Otherwise, a copy is always made.
-
-See also [`insertrightunit`](@ref insertrightunit(::AbstractTensorMap, ::Val{i}) where {i}),
-[`removeunit`](@ref removeunit(::AbstractTensorMap, ::Val{i}) where {i}).
-"""
-function insertleftunit(
-        t::AbstractTensorMap, ::Val{i} = Val(numind(t) + 1);
-        copy::Bool = false, conj::Bool = false, dual::Bool = false
-    ) where {i}
-    W = insertleftunit(space(t), Val(i); conj, dual)
-    if t isa TensorMap
-        return TensorMap{scalartype(t)}(copy ? Base.copy(t.data) : t.data, W)
-    else
-        tdst = similar(t, W)
-        for (c, b) in blocks(t)
-            copy!(block(tdst, c), b)
-        end
-        return tdst
+    p₁, p₂ = let all_inds = (codomainind(t)..., reverse(domainind(t))...)
+        ntuple(i -> all_inds[i], N₁), reverse(ntuple(i -> all_inds[i + N₁], N₂))
     end
+    return transpose(t, (p₁, p₂); copy, backend, allocator)
 end
 
-"""
-    insertrightunit(tsrc::AbstractTensorMap, i=numind(t);
-                    conj=false, dual=false, copy=false) -> tdst
-
-Insert a trivial vector space, isomorphic to the underlying field, after position `i`,
-which can be specified as an `Int` or as `Val(i)` for improved type stability.
-More specifically, adds a right monoidal unit or its dual.
-
-If `copy=false`, `tdst` might share data with `tsrc` whenever possible. Otherwise, a copy is always made.
-
-See also [`insertleftunit`](@ref insertleftunit(::AbstractTensorMap, ::Val{i}) where {i}),
-[`removeunit`](@ref removeunit(::AbstractTensorMap, ::Val{i}) where {i}).
-"""
-function insertrightunit(
-        t::AbstractTensorMap, ::Val{i} = Val(numind(t));
-        copy::Bool = false, conj::Bool = false, dual::Bool = false
-    ) where {i}
-    W = insertrightunit(space(t), Val(i); conj, dual)
-    if t isa TensorMap
-        return TensorMap{scalartype(t)}(copy ? Base.copy(t.data) : t.data, W)
-    else
-        tdst = similar(t, W)
-        for (c, b) in blocks(t)
-            copy!(block(tdst, c), b)
-        end
-        return tdst
-    end
-end
-
-"""
-    removeunit(tsrc::AbstractTensorMap, i; copy=false) -> tdst
-
-This removes a trivial tensor product factor at position `1 ≤ i ≤ N`, where `i`
-can be specified as an `Int` or as `Val(i)` for improved type stability.
-For this to work, that factor has to be isomorphic to the field of scalars.
+#-------------------------------------
+# Internal implementations
+#-------------------------------------
 
-If `copy=false`, `tdst` might share data with `tsrc` whenever possible. Otherwise, a copy is always made.
+# find the scalartype after applying operations: take into account fusion and/or braiding
+# might need to become Float or Complex to capture complex recoupling coefficients but don't alter precision
+for (operation, manipulation) in (
+        :flip => :sector, :twist => :braiding,
+        :transpose => :fusion, :permute => :sector, :braid => :sector,
+    )
+    promote_op = Symbol(:promote_, operation)
+    manipulation_scalartype = Symbol(manipulation, :scalartype)
 
-This operation undoes the work of [`insertleftunit`](@ref insertleftunit(::AbstractTensorMap, ::Val{i}) where {i}) 
-and [`insertrightunit`](@ref insertrightunit(::AbstractTensorMap, ::Val{i}) where {i}).
-"""
-function removeunit(t::AbstractTensorMap, ::Val{i}; copy::Bool = false) where {i}
-    W = removeunit(space(t), Val(i))
-    if t isa TensorMap
-        return TensorMap{scalartype(t)}(copy ? Base.copy(t.data) : t.data, W)
-    else
-        tdst = similar(t, W)
-        for (c, b) in blocks(t)
-            copy!(block(tdst, c), b)
-        end
-        return tdst
+    @eval begin
+        $promote_op(t::AbstractTensorMap) = $promote_op(typeof(t))
+        $promote_op(::Type{T}) where {T <: AbstractTensorMap} =
+            $promote_op(scalartype(T), sectortype(T))
+        $promote_op(::Type{T}, ::Type{I}) where {T <: Number, I <: Sector} =
+            $manipulation_scalartype(I) <: Integer ? T :
+            $manipulation_scalartype(I) <: Real ? float(T) : complex(T)
     end
 end
 
-# Fusing and splitting
-# TODO: add functionality for easy fusing and splitting of tensor indices
-
-#-------------------------------------
-# Full implementations based on `add`
-#-------------------------------------
 spacecheck_transform(f, tdst::AbstractTensorMap, tsrc::AbstractTensorMap, args...) =
     spacecheck_transform(f, space(tdst), space(tsrc), args...)
 @noinline function spacecheck_transform(f, Vdst::TensorMapSpace, Vsrc::TensorMapSpace, p::Index2Tuple)
@@ -447,67 +527,26 @@ end
     return nothing
 end
 
-
-"""
-    add_permute!(tdst::AbstractTensorMap, tsrc::AbstractTensorMap, (p₁, p₂)::Index2Tuple,
-                 α::Number, β::Number, backend::AbstractBackend...)
-
-Return the updated `tdst`, which is the result of adding `α * tsrc` to `tdst` after permuting 
-the indices of `tsrc` according to `(p₁, p₂)`.
-
-See also [`permute`](@ref), [`permute!`](@ref), [`add_braid!`](@ref), [`add_transpose!`](@ref).
-"""
-@propagate_inbounds function add_permute!(
-        tdst::AbstractTensorMap, tsrc::AbstractTensorMap, p::Index2Tuple,
-        α::Number, β::Number, backend::AbstractBackend...
-    )
-    @boundscheck spacecheck_transform(permute, tdst, tsrc, p)
-    transformer = treepermuter(tdst, tsrc, p)
-    return @inbounds add_transform!(tdst, tsrc, p, transformer, α, β, backend...)
-end
-
-"""
-    add_braid!(tdst::AbstractTensorMap, tsrc::AbstractTensorMap, (p₁, p₂)::Index2Tuple,
-               levels::IndexTuple, α::Number, β::Number, backend::AbstractBackend...)
-
-Return the updated `tdst`, which is the result of adding `α * tsrc` to `tdst` after braiding
-the indices of `tsrc` according to `(p₁, p₂)` and `levels`.
-
-See also [`braid`](@ref), [`braid!`](@ref), [`add_permute!`](@ref), [`add_transpose!`](@ref).
-"""
-@propagate_inbounds function add_braid!(
-        tdst::AbstractTensorMap, tsrc::AbstractTensorMap, p::Index2Tuple, levels::IndexTuple,
-        α::Number, β::Number, backend::AbstractBackend...
-    )
-    @boundscheck spacecheck_transform(braid, tdst, tsrc, p, levels)
-    levels1 = TupleTools.getindices(levels, codomainind(tsrc))
-    levels2 = TupleTools.getindices(levels, domainind(tsrc))
-    # TODO: arg order for tensormaps is different than for fusiontrees
-    transformer = treebraider(tdst, tsrc, p, (levels1, levels2))
-    return @inbounds add_transform!(tdst, tsrc, p, transformer, α, β, backend...)
-end
-
-"""
-    add_transpose!(tdst::AbstractTensorMap, tsrc::AbstractTensorMap, (p₁, p₂)::Index2Tuple,
-                   α::Number, β::Number, backend::AbstractBackend...)
-
-Return the updated `tdst`, which is the result of adding `α * tsrc` to `tdst` after transposing
-the indices of `tsrc` according to `(p₁, p₂)`.
-
-See also [`transpose`](@ref), [`transpose!`](@ref), [`add_permute!`](@ref), [`add_braid!`](@ref).
-"""
-@propagate_inbounds function add_transpose!(
-        tdst::AbstractTensorMap, tsrc::AbstractTensorMap, p::Index2Tuple,
-        α::Number, β::Number, backend::AbstractBackend...
-    )
-    @boundscheck spacecheck_transform(transpose, tdst, tsrc, p)
-    transformer = treetransposer(tdst, tsrc, p)
-    return @inbounds add_transform!(tdst, tsrc, p, transformer, α, β, backend...)
-end
-
+# Deprecated add_*! wrappers
+# --------------------------
+Base.@deprecate(
+    add_permute!(tdst::AbstractTensorMap, tsrc::AbstractTensorMap, p::Index2Tuple, α::Number, β::Number, backend::AbstractBackend...),
+    permute!(tdst, tsrc, p, α, β, backend...)
+)
+Base.@deprecate(
+    add_braid!(tdst::AbstractTensorMap, tsrc::AbstractTensorMap, p::Index2Tuple, levels::IndexTuple, α::Number, β::Number, backend::AbstractBackend...),
+    braid!(tdst, tsrc, p, levels, α, β, backend...)
+)
+Base.@deprecate(
+    add_transpose!(tdst::AbstractTensorMap, tsrc::AbstractTensorMap, p::Index2Tuple, α::Number, β::Number, backend::AbstractBackend...),
+    transpose!(tdst, tsrc, p, α, β, backend...)
+)
+
+# Kernel implementation
+# ---------------------
 @propagate_inbounds function add_transform!(
         tdst::AbstractTensorMap, tsrc::AbstractTensorMap, p::Index2Tuple, transformer,
-        α::Number, β::Number, backend::AbstractBackend...
+        α::Number, β::Number, backend, allocator
     )
     @boundscheck spacecheck_transform(permute, tdst, tsrc, p)
 
@@ -516,13 +555,14 @@ end
     else
         I = sectortype(tdst)
         if I === Trivial
-            add_trivial_kernel!(tdst, tsrc, p, transformer, α, β, backend...)
+            TO.tensoradd!(tdst[], tsrc[], p, false, α, β, backend, allocator)
         else
-            style = FusionStyle(I)
-            if use_threaded_transform(tdst, transformer)
-                add_kernel_threaded!(style, tdst, tsrc, p, transformer, α, β, backend...)
+            ntasks = use_threaded_transform(tdst, transformer) ? get_num_transformer_threads() : 1
+            scheduler = ntasks == 1 ? SerialScheduler() : DynamicScheduler(; ntasks, split = :roundrobin)
+            if tdst isa TensorMap && tsrc isa TensorMap # unpack data fields to avoid specializing
+                add_transform_kernel!(tdst.data, tsrc.data, p, transformer, α, β, backend, allocator, scheduler)
             else
-                add_kernel_nonthreaded!(style, tdst, tsrc, p, transformer, α, β, backend...)
+                add_transform_kernel!(tdst, tsrc, p, transformer, α, β, backend, allocator, scheduler)
             end
         end
     end
@@ -537,267 +577,141 @@ function use_threaded_transform(t::AbstractTensorMap, transformer)
     return get_num_transformer_threads() > 1 && dim(space(t)) > Strided.MINTHREADLENGTH
 end
 
-# Trivial implementations
-# -----------------------
-function add_trivial_kernel!(tdst, tsrc, p, transformer, α, β, backend...)
-    TO.tensoradd!(tdst[], tsrc[], p, false, α, β, backend...)
-    return nothing
-end
-
-# Non-threaded implementations
-# ----------------------------
-function add_kernel_nonthreaded!(
-        ::UniqueFusion, tdst, tsrc, p, transformer, α, β, backend...
+function add_transform_kernel!(
+        tdst, tsrc, p, transformer, α, β, backend, allocator, scheduler
     )
-    for (f₁, f₂) in fusiontrees(tsrc)
-        _add_transform_single!(tdst, tsrc, p, (f₁, f₂), transformer, α, β, backend...)
-    end
-    return nothing
-end
-function add_kernel_nonthreaded!(
-        ::UniqueFusion, tdst, tsrc, p, transformer::AbelianTreeTransformer, α, β, backend...
-    )
-    for subtransformer in transformer.data
-        _add_transform_single!(tdst, tsrc, p, subtransformer, α, β, backend...)
-    end
-    return nothing
-end
-function add_kernel_nonthreaded!(::FusionStyle, tdst, tsrc, p, transformer, α, β, backend...)
-    # preallocate buffers
-    buffers = allocate_buffers(tdst, tsrc, transformer)
-
-    for src in fusionblocks(tsrc)
-        if length(src) == 1
-            _add_transform_single!(tdst, tsrc, p, src, transformer, α, β, backend...)
-        else
-            _add_transform_multi!(tdst, tsrc, p, src, transformer, buffers, α, β, backend...)
+    I = sectortype(tdst)
+    if FusionStyle(I) === UniqueFusion()
+        tforeach(fusiontrees(tsrc); scheduler) do (f₁, f₂)
+            (f₁′, f₂′), coeff = transformer((f₁, f₂))
+            @inbounds TO.tensoradd!(
+                tdst[f₁′, f₂′], tsrc[f₁, f₂], p, false, α * coeff, β, backend, allocator
+            )
         end
+        return nothing
     end
-    return nothing
-end
-# specialization in the case of TensorMap
-function add_kernel_nonthreaded!(
-        ::FusionStyle, tdst, tsrc, p, transformer::GenericTreeTransformer, α, β, backend...
-    )
-    # preallocate buffers
-    buffers = allocate_buffers(tdst, tsrc, transformer)
+    cp = TO.allocator_checkpoint!(allocator)
+
+    # buffers have to be created without race condition: err on the side of caution with a lock
+    buffer_lock = Threads.ReentrantLock()
+
+    OhMyThreads.@tasks for src in fusionblocks(tsrc)
+        # setup
+        OhMyThreads.@set scheduler = scheduler
+        dst, U = transformer(src)
+
+        if length(src) == 1 # Degenerate block with a single tree: no matmul needed.
+            (f₁, f₂) = only(fusiontrees(src))
+            (f₁′, f₂′) = only(fusiontrees(dst))
+            @inbounds TO.tensoradd!(
+                tdst[f₁′, f₂′], tsrc[f₁, f₂], p, false, α * only(U), β, backend, allocator
+            )
+        else # Multi-tree block: pack → recoupling matmul → unpack.
+            rows, cols = size(U)
+            sz_src = size(tsrc[first(fusiontrees(src))...])
+            blocksize = prod(sz_src)
+            buffer = @lock buffer_lock TO.tensoralloc(storagetype(tdst), blocksize * (rows + cols), Val(true), allocator)
+            ptriv = (ntuple(identity, length(sz_src)), ())
+            buffer_dst = StridedView(buffer, (blocksize, rows), (1, blocksize), 0)
+            buffer_src = StridedView(buffer, (blocksize, cols), (1, blocksize), blocksize * rows)
+
+            # 1. Extract: copy each source block into column i of buffer_src as a flat vector,
+            #    using a trivial permutation so the layout is canonical before the matmul.
+            @inbounds for (i, (f₁, f₂)) in enumerate(fusiontrees(src))
+                TO.tensoradd!(
+                    sreshape(buffer_src[:, i], sz_src), tsrc[f₁, f₂],
+                    ptriv, false, One(), Zero(), backend, allocator
+                )
+            end
 
-    for subtransformer in transformer.data
-        # Special case without intermediate buffers whenever there is only a single block
-        if length(subtransformer[1]) == 1
-            _add_transform_single!(tdst, tsrc, p, subtransformer, α, β, backend...)
-        else
-            _add_transform_multi!(tdst, tsrc, p, subtransformer, buffers, α, β, backend...)
-        end
-    end
-    return nothing
-end
-# ambiguity resolution
-function add_kernel_nonthreaded!(
-        ::UniqueFusion, tdst, tsrc, p, transformer::GenericTreeTransformer, α, β, backend...
-    )
-    throw(ArgumentError("Cannot combine `GenericTreeTransformer` with `UniqueFusion`"))
-end
-# Threaded implementations
-# ------------------------
-function add_kernel_threaded!(
-        ::UniqueFusion, tdst, tsrc, p, transformer, α, β, backend...;
-        ntasks::Int = get_num_transformer_threads()
-    )
-    trees = fusiontrees(tsrc)
-    nblocks = length(trees)
-    counter = Threads.Atomic{Int}(1)
-    Threads.@sync for _ in 1:min(ntasks, nblocks)
-        Threads.@spawn begin
-            while true
-                local_counter = Threads.atomic_add!(counter, 1)
-                local_counter > nblocks && break
-                @inbounds (f₁, f₂) = trees[local_counter]
-                _add_transform_single!(tdst, tsrc, p, (f₁, f₂), transformer, α, β, backend...)
+            # 2. Recoupling: buffer_dst = buffer_src * U^T  (each output tree is a linear
+            #    combination of input trees weighted by the recoupling coefficients).
+            U′ = Adapt.adapt(storagetype(tdst), StridedView(U))
+            mul!(buffer_dst, buffer_src, transpose(U′))
+
+            # 3. Insert: scatter column i of buffer_dst into the destination, applying the
+            #    actual index permutation p in the same tensoradd! call.
+            @inbounds for (i, (f₃, f₄)) in enumerate(fusiontrees(dst))
+                TO.tensoradd!(
+                    tdst[f₃, f₄], sreshape(buffer_dst[:, i], sz_src),
+                    p, false, α, β, backend, allocator
+                )
             end
+            @lock buffer_lock TO.tensorfree!(buffer, allocator)
         end
     end
+    TO.allocator_reset!(allocator, cp)
     return nothing
 end
-function add_kernel_threaded!(
-        ::UniqueFusion, tdst, tsrc, p, transformer::AbelianTreeTransformer, α, β, backend...;
-        ntasks::Int = get_num_transformer_threads()
+
+# TensorMap specializations: operate directly on the flat data vector to avoid
+# repeated specialization -- this only depends on `numind` and `eltype`.
+function add_transform_kernel!(
+        data_dst::DenseVector, data_src::DenseVector, p, transformer::AbelianTreeTransformer,
+        α, β, backend, allocator, scheduler
     )
-    nblocks = length(transformer.data)
-    counter = Threads.Atomic{Int}(1)
-    Threads.@sync for _ in 1:min(ntasks, nblocks)
-        Threads.@spawn begin
-            while true
-                local_counter = Threads.atomic_add!(counter, 1)
-                local_counter > nblocks && break
-                @inbounds subtransformer = transformer.data[local_counter]
-                _add_transform_single!(tdst, tsrc, p, subtransformer, α, β, backend...)
-            end
-        end
+    tforeach(transformer.data; scheduler) do (coeff, struct_dst, struct_src)
+        TO.tensoradd!(
+            StridedView(data_dst, struct_dst...), StridedView(data_src, struct_src...),
+            p, false, α * coeff, β, backend, allocator
+        )
     end
     return nothing
 end
-
-function add_kernel_threaded!(
-        ::FusionStyle, tdst, tsrc, p, transformer, α, β, backend...;
-        ntasks::Int = get_num_transformer_threads()
+function add_transform_kernel!(
+        data_dst::DenseVector, data_src::DenseVector, p, transformer::GenericTreeTransformer,
+        α, β, backend, allocator, scheduler
     )
-    allblocks = fusionblocks(tsrc)
-    nblocks = length(allblocks)
-
-    counter = Threads.Atomic{Int}(1)
-    Threads.@sync for _ in 1:min(ntasks, nblocks)
-        Threads.@spawn begin
-            # preallocate buffers for each task
-            buffers = allocate_buffers(tdst, tsrc, transformer)
-
-            while true
-                local_counter = Threads.atomic_add!(counter, 1)
-                local_counter > nblocks && break
-                @inbounds src = allblocks[local_counter]
-                if length(src) == 1
-                    _add_transform_single!(tdst, tsrc, p, src, transformer, α, β, backend...)
-                else
-                    _add_transform_multi!(tdst, tsrc, p, src, transformer, buffers, α, β, backend...)
-                end
+    cp = TO.allocator_checkpoint!(allocator)
+
+    # buffers have to be created without race condition: err on the side of caution with a lock
+    buffer_lock = Threads.ReentrantLock()
+
+    OhMyThreads.@tasks for subtransformer in transformer.data
+        # setup
+        OhMyThreads.@set scheduler = scheduler
+        U, (sz_dst, structs_dst), (sz_src, structs_src) = subtransformer
+
+        if length(U) == 1 # Degenerate block with a single tree: no matmul needed.
+            coeff = only(U)
+            TO.tensoradd!(
+                StridedView(data_dst, sz_dst, only(structs_dst)...),
+                StridedView(data_src, sz_src, only(structs_src)...),
+                p, false, α * coeff, β, backend, allocator
+            )
+        else # Multi-tree block: pack → recoupling matmul → unpack.
+            rows, cols = size(U)
+            blocksize = prod(sz_src)
+            buffer = @lock buffer_lock TO.tensoralloc(typeof(data_dst), blocksize * (rows + cols), Val(true), allocator)
+            ptriv = (ntuple(identity, length(sz_src)), ())
+            buffer_dst = StridedView(buffer, (blocksize, rows), (1, blocksize), 0)
+            buffer_src = StridedView(buffer, (blocksize, cols), (1, blocksize), blocksize * rows)
+
+            # 1. Extract: copy each source block into column i of buffer_src as a flat vector,
+            #    using a trivial permutation so the layout is canonical before the matmul.
+            @inbounds for (i, struct_src_i) in enumerate(structs_src)
+                TO.tensoradd!(
+                    sreshape(buffer_src[:, i], sz_src), StridedView(data_src, sz_src, struct_src_i...),
+                    ptriv, false, One(), Zero(), backend, allocator
+                )
             end
-        end
-    end
 
-    return nothing
-end
-# specialization in the case of TensorMap
-function add_kernel_threaded!(
-        ::FusionStyle, tdst, tsrc, p, transformer::GenericTreeTransformer, α, β, backend...;
-        ntasks::Int = get_num_transformer_threads()
-    )
-    nblocks = length(transformer.data)
-
-    counter = Threads.Atomic{Int}(1)
-    Threads.@sync for _ in 1:min(ntasks, nblocks)
-        Threads.@spawn begin
-            # preallocate buffers for each task
-            buffers = allocate_buffers(tdst, tsrc, transformer)
-
-            while true
-                local_counter = Threads.atomic_add!(counter, 1)
-                local_counter > nblocks && break
-                @inbounds subtransformer = transformer.data[local_counter]
-                if length(subtransformer[1]) == 1
-                    _add_transform_single!(tdst, tsrc, p, subtransformer, α, β, backend...)
-                else
-                    _add_transform_multi!(tdst, tsrc, p, subtransformer, buffers, α, β, backend...)
-                end
+            # 2. Recoupling: buffer_dst = buffer_src * U^T  (each output tree is a linear
+            #    combination of input trees weighted by the recoupling coefficients).
+            U′ = Adapt.adapt(typeof(data_dst), StridedView(U))
+            mul!(buffer_dst, buffer_src, transpose(U′))
+
+            # 3. Insert: scatter column i of buffer_dst into the destination, applying the
+            #    actual index permutation p in the same tensoradd! call.
+            @inbounds for (i, struct_dst_i) in enumerate(structs_dst)
+                TO.tensoradd!(
+                    StridedView(data_dst, sz_dst, struct_dst_i...), sreshape(buffer_dst[:, i], sz_src),
+                    p, false, α, β, backend, allocator
+                )
             end
+            @lock buffer_lock TO.tensorfree!(buffer, allocator)
         end
     end
-
-    return nothing
-end
-# ambiguity resolution
-function add_kernel_threaded!(
-        ::UniqueFusion, tdst, tsrc, p, transformer::GenericTreeTransformer, α, β, backend...;
-        ntasks::Int = get_num_transformer_threads()
-    )
-    throw(ArgumentError("Cannot combine `GenericTreeTransformer` with `UniqueFusion`"))
-end
-
-
-# Auxiliary methods
-# -----------------
-function _add_transform_single!(tdst, tsrc, p, (f₁, f₂)::FusionTreePair, transformer, α, β, backend...)
-    (f₁′, f₂′), coeff = transformer((f₁, f₂))
-    @inbounds TO.tensoradd!(tdst[f₁′, f₂′], tsrc[f₁, f₂], p, false, α * coeff, β, backend...)
-    return nothing
-end
-function _add_transform_single!(tdst, tsrc, p, src::FusionTreeBlock, transformer, α, β, backend...)
-    dst, U = transformer(src)
-    f₁, f₂ = only(fusiontrees(src))
-    f₁′, f₂′ = only(fusiontrees(dst))
-    coeff = only(U)
-    @inbounds TO.tensoradd!(tdst[f₁′, f₂′], tsrc[f₁, f₂], p, false, α * coeff, β, backend...)
-    return nothing
-end
-function _add_transform_single!(
-        tdst, tsrc, p, (coeff, struct_dst, struct_src)::AbelianTransformerData,
-        α, β, backend...
-    )
-    subblock_dst = StridedView(tdst.data, struct_dst...)
-    subblock_src = StridedView(tsrc.data, struct_src...)
-    TO.tensoradd!(subblock_dst, subblock_src, p, false, α * coeff, β, backend...)
-    return nothing
-end
-function _add_transform_single!(
-        tdst, tsrc, p, (basistransform, structs_dst, structs_src)::GenericTransformerData,
-        α, β, backend...
-    )
-    struct_dst = (structs_dst[1], only(structs_dst[2])...)
-    struct_src = (structs_src[1], only(structs_src[2])...)
-    coeff = only(basistransform)
-    _add_transform_single!(tdst, tsrc, p, (coeff, struct_dst, struct_src), α, β, backend...)
-    return nothing
-end
-
-function _add_transform_multi!(tdst, tsrc, p, src::FusionTreeBlock, transformer, (buffer1, buffer2), α, β, backend...)
-    dst, U = transformer(src)
-    rows, cols = size(U)
-    sz_src = size(tsrc[first(fusiontrees(src))...])
-    blocksize = prod(sz_src)
-    matsize = (
-        prod(TupleTools.getindices(sz_src, codomainind(tsrc))),
-        prod(TupleTools.getindices(sz_src, domainind(tsrc))),
-    )
-
-    # Filling up a buffer with contiguous data
-    buffer_src = StridedView(buffer2, (blocksize, cols), (1, blocksize), 0)
-    for (i, (f₁, f₂)) in enumerate(fusiontrees(src))
-        subblock_src = sreshape(tsrc[f₁, f₂], matsize)
-        bufblock_src = sreshape(buffer_src[:, i], matsize)
-        copy!(bufblock_src, subblock_src)
-    end
-
-    # Resummation into a second buffer using BLAS
-    buffer_dst = StridedView(buffer1, (blocksize, rows), (1, blocksize), 0)
-    mul!(buffer_dst, buffer_src, transpose(StridedView(U)), α, Zero())
-
-    # Filling up the output
-    for (i, (f₃, f₄)) in enumerate(fusiontrees(dst))
-        subblock_dst = tdst[f₃, f₄]
-        bufblock_dst = sreshape(buffer_dst[:, i], sz_src)
-        TO.tensoradd!(subblock_dst, bufblock_dst, p, false, One(), β, backend...)
-    end
-
-    return nothing
-end
-function _add_transform_multi!(
-        tdst, tsrc, p, (U, (sz_dst, structs_dst), (sz_src, structs_src)),
-        (buffer1, buffer2), α, β, backend...
-    )
-    rows, cols = size(U)
-    blocksize = prod(sz_src)
-    matsize = (
-        prod(TupleTools.getindices(sz_src, codomainind(tsrc))),
-        prod(TupleTools.getindices(sz_src, domainind(tsrc))),
-    )
-
-    # Filling up a buffer with contiguous data
-    buffer_src = StridedView(buffer2, (blocksize, cols), (1, blocksize), 0)
-    for (i, struct_src) in enumerate(structs_src)
-        subblock_src = sreshape(StridedView(tsrc.data, sz_src, struct_src...), matsize)
-        bufblock_src = sreshape(buffer_src[:, i], matsize)
-        copy!(bufblock_src, subblock_src)
-    end
-
-    # Resummation into a second buffer using BLAS
-    buffer_dst = StridedView(buffer1, (blocksize, rows), (1, blocksize), 0)
-    mul!(buffer_dst, buffer_src, transpose(StridedView(U)), α, Zero())
-
-    # Filling up the output
-    for (i, struct_dst) in enumerate(structs_dst)
-        subblock_dst = StridedView(tdst.data, sz_dst, struct_dst...)
-        bufblock_dst = sreshape(buffer_dst[:, i], sz_src)
-        TO.tensoradd!(subblock_dst, bufblock_dst, p, false, One(), β, backend...)
-    end
-
+    TO.allocator_reset!(allocator, cp)
     return nothing
 end
diff --git a/src/tensors/tensor.jl b/src/tensors/tensor.jl
index bd6609163..64ae90a69 100644
--- a/src/tensors/tensor.jl
+++ b/src/tensors/tensor.jl
@@ -554,3 +554,8 @@ function Base.promote_rule(
     A = promote_storagetype(VectorInterface.promote_add(scalartype(TT₁), scalartype(TT₂)), TT₁, TT₂)
     return tensormaptype(S, N₁, N₂, A)
 end
+
+function Adapt.adapt_structure(to, x::TensorMap)
+    data = Adapt.adapt(to, x.data)
+    return TensorMap{eltype(data)}(data, space(x))
+end
diff --git a/src/tensors/tensoroperations.jl b/src/tensors/tensoroperations.jl
index 3fc79cf0c..375b63768 100644
--- a/src/tensors/tensoroperations.jl
+++ b/src/tensors/tensoroperations.jl
@@ -43,9 +43,9 @@ function TO.tensoradd!(
     if conjA
         A′ = adjoint(A)
         pA′ = adjointtensorindices(A, _canonicalize(pA, C))
-        add_permute!(C, A′, pA′, α, β, backend)
+        permute!(C, A′, pA′, α, β, backend)
     else
-        add_permute!(C, A, _canonicalize(pA, C), α, β, backend)
+        permute!(C, A, _canonicalize(pA, C), α, β, backend)
     end
     return C
 end
diff --git a/src/tensors/treetransformers.jl b/src/tensors/treetransformers.jl
index 30ec1de0f..f68378cc2 100644
--- a/src/tensors/treetransformers.jl
+++ b/src/tensors/treetransformers.jl
@@ -128,25 +128,6 @@ function repack_transformer_structure(structures::Dictionary, trees)
     return sz, strides_offsets
 end
 
-function buffersize(transformer::GenericTreeTransformer)
-    return maximum(transformer.data; init = 0) do (basistransform, structures_dst, _)
-        return prod(structures_dst[1]) * size(basistransform, 1)
-    end
-end
-
-function allocate_buffers(
-        tdst::TensorMap, tsrc::TensorMap, transformer::GenericTreeTransformer
-    )
-    sz = buffersize(transformer)
-    return similar(tdst.data, sz), similar(tsrc.data, sz)
-end
-function allocate_buffers(
-        tdst::AbstractTensorMap, tsrc::AbstractTensorMap, transformer
-    )
-    # be pessimistic and assume the worst for now
-    sz = dim(space(tsrc))
-    return similar(storagetype(tdst), sz), similar(storagetype(tsrc), sz)
-end
 
 function treetransformertype(Vdst, Vsrc)
     I = sectortype(Vdst)
@@ -185,22 +166,17 @@ end
     return TreeTransformer(fusiontreebraider, p, Vdst, Vsrc)
 end
 
-for (transform, treetransformer) in
-    ((:permute, :treepermuter), (:transpose, :treetransposer))
-    @eval begin
-        function $treetransformer(::AbstractTensorMap, ::AbstractTensorMap, p::Index2Tuple)
-            return fusiontreetransform(f) = $transform(f, p)
-        end
-        function $treetransformer(tdst::TensorMap, tsrc::TensorMap, p::Index2Tuple)
-            return $treetransformer(space(tdst), space(tsrc), p)
-        end
-        @cached function $treetransformer(
-                Vdst::TensorMapSpace, Vsrc::TensorMapSpace, p::Index2Tuple
-            )::treetransformertype(Vdst, Vsrc)
-            fusiontreetransform(f) = $transform(f, p)
-            return TreeTransformer(fusiontreetransform, p, Vdst, Vsrc)
-        end
-    end
+function treetransposer(::AbstractTensorMap, ::AbstractTensorMap, p::Index2Tuple)
+    return fusiontreetransform(f) = transpose(f, p)
+end
+function treetransposer(tdst::TensorMap, tsrc::TensorMap, p::Index2Tuple)
+    return treetransposer(space(tdst), space(tsrc), p)
+end
+@cached function treetransposer(
+        Vdst::TensorMapSpace, Vsrc::TensorMapSpace, p::Index2Tuple
+    )::treetransformertype(Vdst, Vsrc)
+    fusiontreetransform(f) = transpose(f, p)
+    return TreeTransformer(fusiontreetransform, p, Vdst, Vsrc)
 end
 
 # default cachestyle is GlobalLRUCache
@@ -227,3 +203,9 @@ end
 function _transformer_weight((mat, structs_dst, structs_src)::GenericTransformerData)
     return length(mat) * prod(structs_dst[1])
 end
+
+function buffersize(transformer::GenericTreeTransformer)
+    return maximum(transformer.data; init = 0) do (basistransform, structures_dst, _)
+        return prod(structures_dst[1]) * size(basistransform, 1)
+    end
+end
diff --git a/test/factorizations/svd.jl b/test/factorizations/svd.jl
index 0678db827..e2eb19076 100644
--- a/test/factorizations/svd.jl
+++ b/test/factorizations/svd.jl
@@ -49,9 +49,9 @@ for V in spacelist
             end
             for T in eltypes, t in (randn(T, W, W), randn(T, W, W)')
                 project_hermitian!(t)
-                vals = @constinferred LinearAlgebra.eigvals(t)
-                λmax = maximum(s -> maximum(abs, s), values(vals))
-                λmin = minimum(s -> minimum(abs, s), values(vals))
+                vals = @constinferred eigh_vals(t)
+                λmax = maximum(abs, vals)
+                λmin = minimum(abs, vals)
                 @test cond(t) ≈ λmax / λmin
             end
         end
diff --git a/test/mooncake/indexmanipulations.jl b/test/mooncake/indexmanipulations.jl
index 4dd6413cf..390721d71 100644
--- a/test/mooncake/indexmanipulations.jl
+++ b/test/mooncake/indexmanipulations.jl
@@ -18,7 +18,7 @@ eltypes = (Float64, ComplexF64)
     hasbraiding = BraidingStyle(sectortype(eltype(V))) isa HasBraiding
     symmetricbraiding = BraidingStyle(sectortype(eltype(V))) isa SymmetricBraiding
 
-    symmetricbraiding && @timedtestset "add_permute!" begin
+    symmetricbraiding && @timedtestset "permute!" begin
         A = randn(T, V[1] ⊗ V[2] ← (V[3] ⊗ V[4] ⊗ V[5])')
         α = randn(T)
         β = randn(T)
@@ -27,12 +27,12 @@ eltypes = (Float64, ComplexF64)
         for _ in 1:5
             p = randindextuple(numind(A))
             C = randn!(permute(A, p))
-            Mooncake.TestUtils.test_rule(rng, TensorKit.add_permute!, C, A, p, α, β; atol, rtol, mode)
+            Mooncake.TestUtils.test_rule(rng, TensorKit.permute!, C, A, p, α, β; atol, rtol, mode)
             A = C
         end
     end
 
-    @timedtestset "add_transpose!" begin
+    @timedtestset "transpose!" begin
         A = randn(T, V[1] ⊗ V[2] ← (V[3] ⊗ V[4] ⊗ V[5])')
         α = randn(T)
         β = randn(T)
@@ -41,18 +41,18 @@ eltypes = (Float64, ComplexF64)
         for _ in 1:2
             p = randcircshift(numout(A), numin(A))
             C = randn!(transpose(A, p))
-            Mooncake.TestUtils.test_rule(rng, TensorKit.add_transpose!, C, A, p, One(), Zero(); atol, rtol, mode)
-            Mooncake.TestUtils.test_rule(rng, TensorKit.add_transpose!, C, A, p, α, β; atol, rtol, mode)
+            Mooncake.TestUtils.test_rule(rng, TensorKit.transpose!, C, A, p, One(), Zero(); atol, rtol, mode)
+            Mooncake.TestUtils.test_rule(rng, TensorKit.transpose!, C, A, p, α, β; atol, rtol, mode)
             if !(T <: Real)
-                Mooncake.TestUtils.test_rule(rng, TensorKit.add_transpose!, C, real(A), p, α, β; atol, rtol, mode)
-                Mooncake.TestUtils.test_rule(rng, TensorKit.add_transpose!, C, A, p, real(α), β; atol, rtol, mode)
-                Mooncake.TestUtils.test_rule(rng, TensorKit.add_transpose!, C, real(A), p, real(α), β; atol, rtol, mode)
+                Mooncake.TestUtils.test_rule(rng, TensorKit.transpose!, C, real(A), p, α, β; atol, rtol, mode)
+                Mooncake.TestUtils.test_rule(rng, TensorKit.transpose!, C, A, p, real(α), β; atol, rtol, mode)
+                Mooncake.TestUtils.test_rule(rng, TensorKit.transpose!, C, real(A), p, real(α), β; atol, rtol, mode)
             end
             A = C
         end
     end
 
-    hasbraiding && @timedtestset "add_braid!" begin
+    hasbraiding && @timedtestset "braid!" begin
         A = randn(T, V[1] ⊗ V[2] ← (V[3] ⊗ V[4] ⊗ V[5])')
         α = randn(T)
         β = randn(T)
@@ -62,11 +62,11 @@ eltypes = (Float64, ComplexF64)
             p = randcircshift(numout(A), numin(A))
             levels = Tuple(randperm(numind(A)))
             C = randn!(transpose(A, p))
-            Mooncake.TestUtils.test_rule(rng, TensorKit.add_braid!, C, A, p, levels, α, β; atol, rtol, mode)
+            Mooncake.TestUtils.test_rule(rng, TensorKit.braid!, C, A, p, levels, α, β; atol, rtol, mode)
             if !(T <: Real)
-                Mooncake.TestUtils.test_rule(rng, TensorKit.add_braid!, C, real(A), p, levels, α, β; atol, rtol, mode)
-                Mooncake.TestUtils.test_rule(rng, TensorKit.add_braid!, C, A, p, levels, real(α), β; atol, rtol, mode)
-                Mooncake.TestUtils.test_rule(rng, TensorKit.add_braid!, C, A, p, levels, real(α), real(β); atol, rtol, mode)
+                Mooncake.TestUtils.test_rule(rng, TensorKit.braid!, C, real(A), p, levels, α, β; atol, rtol, mode)
+                Mooncake.TestUtils.test_rule(rng, TensorKit.braid!, C, A, p, levels, real(α), β; atol, rtol, mode)
+                Mooncake.TestUtils.test_rule(rng, TensorKit.braid!, C, A, p, levels, real(α), real(β); atol, rtol, mode)
             end
             A = C
         end
diff --git a/test/tensors/indexmanipulations.jl b/test/tensors/indexmanipulations.jl
index c38b182a2..836418b3f 100644
--- a/test/tensors/indexmanipulations.jl
+++ b/test/tensors/indexmanipulations.jl
@@ -129,6 +129,14 @@ for V in spacelist
             @tensor tb[a, b] := flip(t1, (1, 3))[x, y, a, z] * flip(t2, (2, 4))[y, b, z, x]
             @test flip(ta, (1, 2)) ≈ tb
         end
+        hasbraiding && !symmetricbraiding && @timedtestset "Braid AdjointTensorMap: adjoint identity" begin
+            t = rand(ComplexF64, V1 ⊗ V2 ← V3)
+            p = ((2,), (1, 3))
+            levels = (1, 3, 2)
+            t1 = copy(braid(t', p, levels))
+            t2 = braid(copy(t'), p, levels)
+            @test t1 ≈ t2
+        end
     end
     TensorKit.empty_globalcaches!()
 end