From 55179dd02d5d3f757b39c70754ed23880f8e4825 Mon Sep 17 00:00:00 2001
From: hweichel <hweichel@mathworks.com>
Date: Wed, 25 Mar 2026 10:24:56 -0400
Subject: [PATCH 01/16] Add new mdspan overlays and use them in examples and
 tests

---
 examples/matrix_opt_example.cpp  |  5 +-
 examples/simple_spmm.cpp         |  5 +-
 examples/spmm_csc.cpp            |  5 +-
 include/spblas/detail/mdspan.hpp | 16 ++++++
 test/gtest/CMakeLists.txt        |  3 +-
 test/gtest/conjugate_test.cpp    | 12 ++--
 test/gtest/mdspan_overlays.cpp   | 95 ++++++++++++++++++++++++++++++++
 test/gtest/spmm_test.cpp         |  6 +-
 8 files changed, 125 insertions(+), 22 deletions(-)
 create mode 100644 test/gtest/mdspan_overlays.cpp
diff --git a/examples/matrix_opt_example.cpp b/examples/matrix_opt_example.cpp
index 0585c93..8229aeb 100644
--- a/examples/matrix_opt_example.cpp
+++ b/examples/matrix_opt_example.cpp
@@ -5,7 +5,6 @@
 
 int main(int argc, char** argv) {
   using namespace spblas;
-  namespace md = spblas::__mdspan;
 
   using T = float;
 
@@ -40,8 +39,8 @@ int main(int argc, char** argv) {
   std::vector<T> x_values(k * n, 1);
   std::vector<T> y_values(m * n, 0);
 
-  md::mdspan x(x_values.data(), k, n);
-  md::mdspan y(y_values.data(), m, n);
+  mdspan_row_major x(x_values.data(), k, n);
+  mdspan_row_major y(y_values.data(), m, n);
 
   auto a_view = scaled(2.f, a);
 
diff --git a/examples/simple_spmm.cpp b/examples/simple_spmm.cpp
index 9440e6c..b35da26 100644
--- a/examples/simple_spmm.cpp
+++ b/examples/simple_spmm.cpp
@@ -5,7 +5,6 @@
 
 int main(int argc, char** argv) {
   using namespace spblas;
-  namespace md = spblas::__mdspan;
 
   using T = float;
 
@@ -38,8 +37,8 @@ int main(int argc, char** argv) {
   std::vector<T> x_values(k * n, 1);
   std::vector<T> y_values(m * n, 0);
 
-  md::mdspan x(x_values.data(), k, n);
-  md::mdspan y(y_values.data(), m, n);
+  mdspan_row_major x(x_values.data(), k, n);
+  mdspan_row_major y(y_values.data(), m, n);
 
   auto a_view = scaled(2.f, a);
 
diff --git a/examples/spmm_csc.cpp b/examples/spmm_csc.cpp
index 6b7a69c..468e2f1 100644
--- a/examples/spmm_csc.cpp
+++ b/examples/spmm_csc.cpp
@@ -6,7 +6,6 @@
 
 int main(int argc, char** argv) {
   using namespace spblas;
-  namespace md = spblas::__mdspan;
 
   using T = float;
 
@@ -39,8 +38,8 @@ int main(int argc, char** argv) {
   std::vector<T> x_values(k * n, 1);
   std::vector<T> y_values(m * n, 0);
 
-  md::mdspan x(x_values.data(), k, n);
-  md::mdspan y(y_values.data(), m, n);
+  mdspan_row_major x(x_values.data(), k, n);
+  mdspan_row_major y(y_values.data(), m, n);
 
   // y = A * (alpha * x)
   multiply(a, scaled(2.f, x), y);
diff --git a/include/spblas/detail/mdspan.hpp b/include/spblas/detail/mdspan.hpp
index 8ae10b0..a6e6774 100644
--- a/include/spblas/detail/mdspan.hpp
+++ b/include/spblas/detail/mdspan.hpp
@@ -26,3 +26,19 @@ static_assert(false, "spblas requires mdspan.  Compile with a C++23 compiler "
                      "or download the std/experimental implementation.");
 
 #endif
+
+namespace spblas{
+// Define templated aliases for col_major (layout_left) and row_major
+// (layout_right) mdspan types.
+template <typename I, typename T>
+using mdspan_col_major = __mdspan::mdspan<
+    T,
+    __mdspan::extents<I, __mdspan::dynamic_extent, __mdspan::dynamic_extent>,
+    __mdspan::layout_left>;
+
+template <typename I, typename T>
+using mdspan_row_major = __mdspan::mdspan<
+    T,
+    __mdspan::extents<I, __mdspan::dynamic_extent, __mdspan::dynamic_extent>,
+    __mdspan::layout_right>;
+}
diff --git a/test/gtest/CMakeLists.txt b/test/gtest/CMakeLists.txt
index 8d308e6..e59c42d 100644
--- a/test/gtest/CMakeLists.txt
+++ b/test/gtest/CMakeLists.txt
@@ -11,7 +11,8 @@ if (SPBLAS_CPU_BACKEND)
        spgemm_csr_csc.cpp
        add_test.cpp
        transpose_test.cpp
-       triangular_solve_test.cpp)
+       triangular_solve_test.cpp
+       mdspan_overlays.cpp)
 
   if (ENABLE_ONEMKL_SYCL OR SPBLAS_REFERENCE_BACKEND)
     list(APPEND TEST_SOURCES conjugate_test.cpp)
diff --git a/test/gtest/conjugate_test.cpp b/test/gtest/conjugate_test.cpp
index cd6ecd7..fcfea76 100644
--- a/test/gtest/conjugate_test.cpp
+++ b/test/gtest/conjugate_test.cpp
@@ -91,8 +91,6 @@ TEST(Conjugate, SpMV_VectorConjugated) {
 }
 
 TEST(Conjugate, SpMM_MatrixConjugated) {
-  namespace md = spblas::__mdspan;
-
   for (auto&& [m, k, nnz] : util::dims) {
     for (auto n : {1, 8, 32}) {
       auto [values, rowptr, colind, shape, _] =
@@ -102,8 +100,8 @@ TEST(Conjugate, SpMM_MatrixConjugated) {
       auto [b_values, b_shape] = spblas::generate_dense<T>(k, n);
 
       std::vector<T> c_values(m * n, T(0.0f, 0.0f));
-      md::mdspan b(b_values.data(), k, n);
-      md::mdspan c(c_values.data(), m, n);
+      spblas::mdspan_row_major b(b_values.data(), k, n);
+      spblas::mdspan_row_major c(c_values.data(), m, n);
 
       spblas::multiply(spblas::conjugated(a), b, c);
 
@@ -126,8 +124,6 @@ TEST(Conjugate, SpMM_MatrixConjugated) {
 }
 
 TEST(Conjugate, SpMM_DenseConjugated) {
-  namespace md = spblas::__mdspan;
-
   for (auto&& [m, k, nnz] : util::dims) {
     for (auto n : {1, 8, 32}) {
       auto [values, rowptr, colind, shape, _] =
@@ -137,8 +133,8 @@ TEST(Conjugate, SpMM_DenseConjugated) {
       auto [b_values, b_shape] = spblas::generate_dense<T>(k, n);
 
       std::vector<T> c_values(m * n, T(0.0f, 0.0f));
-      md::mdspan b(b_values.data(), k, n);
-      md::mdspan c(c_values.data(), m, n);
+      spblas::mdspan_row_major b(b_values.data(), k, n);
+      spblas::mdspan_row_major c(c_values.data(), m, n);
 
       spblas::multiply(a, spblas::conjugated(b), c);
 
diff --git a/test/gtest/mdspan_overlays.cpp b/test/gtest/mdspan_overlays.cpp
new file mode 100644
index 0000000..10b597b
--- /dev/null
+++ b/test/gtest/mdspan_overlays.cpp
@@ -0,0 +1,95 @@
+#include <gtest/gtest.h>
+
+#include "util.hpp"
+#include <spblas/spblas.hpp>
+
+TEST(Mdspan, positive_row_major) {
+  using T = float;
+  using I = spblas::index_t;
+
+  for (auto m : {1, 2, 4}) {
+    for (auto n : {1, 2, 4}) {
+
+      auto [b_values, b_shape] = spblas::generate_dense<T>(m, n);
+      spblas::mdspan_row_major<I, T> b(b_values.data(), m, n);
+
+
+      // Traverse by row in inner loop to immitade row-major
+      T* tmp = b_values.data();
+      for (I i = 0; i < m; ++i) {
+        for (I j = 0; j < n; ++j) {
+            EXPECT_EQ((b[i,j]), *(tmp++));
+        }
+      }
+    }
+  }
+}
+
+TEST(Mdspan, postive_col_major) {
+  using T = float;
+  using I = spblas::index_t;
+
+  for (auto m : {1, 2, 4}) {
+    for (auto n : {1, 2, 4}) {
+
+      auto [b_values, b_shape] = spblas::generate_dense<T>(m, n);
+      spblas::mdspan_col_major<I, T> b(b_values.data(), m, n);
+
+      // Traverse by column in inner loop to immitade col-major
+      T* tmp = b_values.data();
+      for (I j = 0; j < n; ++j) {
+        for (I i = 0; i < m; ++i) {
+          EXPECT_EQ((b[i,j]), *(tmp++));
+        }
+      }
+    }
+  }
+}
+
+TEST(Mdspan, negative_row_major) {
+  using T = float;
+  using I = spblas::index_t;
+
+  for (auto [m, n] : {std::pair{2, 4}, std::pair{4, 2}}) {
+
+    auto [b_values, b_shape] = spblas::generate_dense<T>(m, n);
+    spblas::mdspan_row_major<I, T> b(b_values.data(), m, n);
+
+    // Traverse by column in inner loop to not immitade row-major
+    T* tmp = b_values.data();
+    for (I j = 0; j < n; ++j) {
+      for (I i = 0; i < m; ++i) {
+        // Skip first and last element
+        if ((i == 0 && j == 0) || (i == m - 1 && j == n - 1)) {
+          tmp++;
+          continue;
+        }
+        EXPECT_NE((b[i, j]), *(tmp++));
+      }
+    }
+  }
+}
+
+TEST(Mdspan, negative_col_major) {
+  using T = float;
+  using I = spblas::index_t;
+
+  for (auto [m, n] : {std::pair{2, 4}, std::pair{4, 2}}) {
+
+    auto [b_values, b_shape] = spblas::generate_dense<T>(m, n);
+    spblas::mdspan_col_major<I, T> b(b_values.data(), m, n);
+
+    // Traverse by row in inner loop to not immitade col-major
+    T* tmp = b_values.data();
+    for (I i = 0; i < m; ++i) {
+      for (I j = 0; j < n; ++j) {
+        // Skip first and last element
+        if ((i == 0 && j == 0) || (i == m - 1 && j == n - 1)) {
+          tmp++;
+          continue;
+        }
+        EXPECT_NE((b[i, j]), *(tmp++));
+      }
+    }
+  }
+}
diff --git a/test/gtest/spmm_test.cpp b/test/gtest/spmm_test.cpp
index 554a192..45f358e 100644
--- a/test/gtest/spmm_test.cpp
+++ b/test/gtest/spmm_test.cpp
@@ -4,8 +4,6 @@
 #include <spblas/spblas.hpp>
 
 TEST(CsrView, SpMM) {
-  namespace md = spblas::__mdspan;
-
   using T = float;
   using I = spblas::index_t;
 
@@ -20,8 +18,8 @@ TEST(CsrView, SpMM) {
 
       std::vector<T> c_values(m * n, 0);
 
-      md::mdspan b(b_values.data(), k, n);
-      md::mdspan c(c_values.data(), m, n);
+      spblas::mdspan_row_major b(b_values.data(), k, n);
+      spblas::mdspan_row_major c(c_values.data(), m, n);
 
       spblas::multiply(a, b, c);
 

From dda20ae0e0c01e536beb55e0ed0c332e18c7304a Mon Sep 17 00:00:00 2001
From: hweichel <hweichel@mathworks.com>
Date: Wed, 25 Mar 2026 11:29:50 -0400
Subject: [PATCH 02/16] fix order of template types; always add template types
 to mdspan_*_major calls; fix indentation

---
 examples/matrix_opt_example.cpp  |  4 ++--
 examples/simple_spmm.cpp         |  6 +++---
 examples/spmm_csc.cpp            |  4 ++--
 include/spblas/detail/mdspan.hpp | 14 ++++++--------
 test/gtest/conjugate_test.cpp    |  4 ++--
 test/gtest/mdspan_overlays.cpp   | 25 +++++++++++--------------
 test/gtest/spmm_test.cpp         |  4 ++--
 7 files changed, 28 insertions(+), 33 deletions(-)

diff --git a/examples/matrix_opt_example.cpp b/examples/matrix_opt_example.cpp
index 8229aeb..2bf6d92 100644
--- a/examples/matrix_opt_example.cpp
+++ b/examples/matrix_opt_example.cpp
@@ -39,8 +39,8 @@ int main(int argc, char** argv) {
   std::vector<T> x_values(k * n, 1);
   std::vector<T> y_values(m * n, 0);
 
-  mdspan_row_major x(x_values.data(), k, n);
-  mdspan_row_major y(y_values.data(), m, n);
+  mdspan_row_major<T, spblas::index_t> x(x_values.data(), k, n);
+  mdspan_row_major<T, spblas::index_t> y(y_values.data(), m, n);
 
   auto a_view = scaled(2.f, a);
 
diff --git a/examples/simple_spmm.cpp b/examples/simple_spmm.cpp
index b35da26..fe12a4d 100644
--- a/examples/simple_spmm.cpp
+++ b/examples/simple_spmm.cpp
@@ -7,7 +7,7 @@ int main(int argc, char** argv) {
   using namespace spblas;
 
   using T = float;
-
+  
   spblas::index_t m = 100;
   spblas::index_t n = 10;
   spblas::index_t k = 100;
@@ -37,8 +37,8 @@ int main(int argc, char** argv) {
   std::vector<T> x_values(k * n, 1);
   std::vector<T> y_values(m * n, 0);
 
-  mdspan_row_major x(x_values.data(), k, n);
-  mdspan_row_major y(y_values.data(), m, n);
+  mdspan_row_major<T, spblas::index_t> x(x_values.data(), k, n);
+  mdspan_row_major<T, spblas::index_t> y(y_values.data(), m, n);
 
   auto a_view = scaled(2.f, a);
 
diff --git a/examples/spmm_csc.cpp b/examples/spmm_csc.cpp
index 468e2f1..1e616e9 100644
--- a/examples/spmm_csc.cpp
+++ b/examples/spmm_csc.cpp
@@ -38,8 +38,8 @@ int main(int argc, char** argv) {
   std::vector<T> x_values(k * n, 1);
   std::vector<T> y_values(m * n, 0);
 
-  mdspan_row_major x(x_values.data(), k, n);
-  mdspan_row_major y(y_values.data(), m, n);
+  mdspan_row_major<T, spblas::index_t> x(x_values.data(), k, n);
+  mdspan_row_major<T, spblas::index_t> y(y_values.data(), m, n);
 
   // y = A * (alpha * x)
   multiply(a, scaled(2.f, x), y);
diff --git a/include/spblas/detail/mdspan.hpp b/include/spblas/detail/mdspan.hpp
index a6e6774..c8717e8 100644
--- a/include/spblas/detail/mdspan.hpp
+++ b/include/spblas/detail/mdspan.hpp
@@ -27,18 +27,16 @@ static_assert(false, "spblas requires mdspan.  Compile with a C++23 compiler "
 
 #endif
 
-namespace spblas{
+namespace spblas {
 // Define templated aliases for col_major (layout_left) and row_major
 // (layout_right) mdspan types.
-template <typename I, typename T>
+template <typename T, typename I>
 using mdspan_col_major = __mdspan::mdspan<
-    T,
-    __mdspan::extents<I, __mdspan::dynamic_extent, __mdspan::dynamic_extent>,
+    T, __mdspan::extents<I, __mdspan::dynamic_extent, __mdspan::dynamic_extent>,
     __mdspan::layout_left>;
 
-template <typename I, typename T>
+template <typename T, typename I>
 using mdspan_row_major = __mdspan::mdspan<
-    T,
-    __mdspan::extents<I, __mdspan::dynamic_extent, __mdspan::dynamic_extent>,
+    T, __mdspan::extents<I, __mdspan::dynamic_extent, __mdspan::dynamic_extent>,
     __mdspan::layout_right>;
-}
+} // namespace spblas
diff --git a/test/gtest/conjugate_test.cpp b/test/gtest/conjugate_test.cpp
index fcfea76..835fd71 100644
--- a/test/gtest/conjugate_test.cpp
+++ b/test/gtest/conjugate_test.cpp
@@ -100,8 +100,8 @@ TEST(Conjugate, SpMM_MatrixConjugated) {
       auto [b_values, b_shape] = spblas::generate_dense<T>(k, n);
 
       std::vector<T> c_values(m * n, T(0.0f, 0.0f));
-      spblas::mdspan_row_major b(b_values.data(), k, n);
-      spblas::mdspan_row_major c(c_values.data(), m, n);
+      spblas::mdspan_row_major<T, I> b(b_values.data(), k, n);
+      spblas::mdspan_row_major<T, I> c(c_values.data(), m, n);
 
       spblas::multiply(spblas::conjugated(a), b, c);
 
diff --git a/test/gtest/mdspan_overlays.cpp b/test/gtest/mdspan_overlays.cpp
index 10b597b..9023918 100644
--- a/test/gtest/mdspan_overlays.cpp
+++ b/test/gtest/mdspan_overlays.cpp
@@ -9,16 +9,14 @@ TEST(Mdspan, positive_row_major) {
 
   for (auto m : {1, 2, 4}) {
     for (auto n : {1, 2, 4}) {
-
       auto [b_values, b_shape] = spblas::generate_dense<T>(m, n);
-      spblas::mdspan_row_major<I, T> b(b_values.data(), m, n);
-
+      spblas::mdspan_row_major<T, I> b(b_values.data(), m, n);
 
       // Traverse by row in inner loop to immitade row-major
       T* tmp = b_values.data();
       for (I i = 0; i < m; ++i) {
         for (I j = 0; j < n; ++j) {
-            EXPECT_EQ((b[i,j]), *(tmp++));
+          EXPECT_EQ((b[i, j]), *(tmp++));
         }
       }
     }
@@ -26,20 +24,19 @@ TEST(Mdspan, positive_row_major) {
 }
 
 TEST(Mdspan, postive_col_major) {
-  using T = float;
+  using T = double;
   using I = spblas::index_t;
 
   for (auto m : {1, 2, 4}) {
     for (auto n : {1, 2, 4}) {
-
       auto [b_values, b_shape] = spblas::generate_dense<T>(m, n);
-      spblas::mdspan_col_major<I, T> b(b_values.data(), m, n);
+      spblas::mdspan_col_major<T, I> b(b_values.data(), m, n);
 
       // Traverse by column in inner loop to immitade col-major
       T* tmp = b_values.data();
       for (I j = 0; j < n; ++j) {
         for (I i = 0; i < m; ++i) {
-          EXPECT_EQ((b[i,j]), *(tmp++));
+          EXPECT_EQ((b[i, j]), *(tmp++));
         }
       }
     }
@@ -47,13 +44,13 @@ TEST(Mdspan, postive_col_major) {
 }
 
 TEST(Mdspan, negative_row_major) {
-  using T = float;
-  using I = spblas::index_t;
+  using T = double;
+  using I = int32_t;
 
   for (auto [m, n] : {std::pair{2, 4}, std::pair{4, 2}}) {
 
     auto [b_values, b_shape] = spblas::generate_dense<T>(m, n);
-    spblas::mdspan_row_major<I, T> b(b_values.data(), m, n);
+    spblas::mdspan_row_major<T, I> b(b_values.data(), m, n);
 
     // Traverse by column in inner loop to not immitade row-major
     T* tmp = b_values.data();
@@ -71,13 +68,13 @@ TEST(Mdspan, negative_row_major) {
 }
 
 TEST(Mdspan, negative_col_major) {
-  using T = float;
-  using I = spblas::index_t;
+  using T = int32_t;
+  using I = int64_t;
 
   for (auto [m, n] : {std::pair{2, 4}, std::pair{4, 2}}) {
 
     auto [b_values, b_shape] = spblas::generate_dense<T>(m, n);
-    spblas::mdspan_col_major<I, T> b(b_values.data(), m, n);
+    spblas::mdspan_col_major<T, I> b(b_values.data(), m, n);
 
     // Traverse by row in inner loop to not immitade col-major
     T* tmp = b_values.data();
diff --git a/test/gtest/spmm_test.cpp b/test/gtest/spmm_test.cpp
index 45f358e..118a4d0 100644
--- a/test/gtest/spmm_test.cpp
+++ b/test/gtest/spmm_test.cpp
@@ -18,8 +18,8 @@ TEST(CsrView, SpMM) {
 
       std::vector<T> c_values(m * n, 0);
 
-      spblas::mdspan_row_major b(b_values.data(), k, n);
-      spblas::mdspan_row_major c(c_values.data(), m, n);
+      spblas::mdspan_row_major<T, I> b(b_values.data(), k, n);
+      spblas::mdspan_row_major<T, I> c(c_values.data(), m, n);
 
       spblas::multiply(a, b, c);
 

From 21257d8d43f6a1dde3a41e95f8cf590338c2052d Mon Sep 17 00:00:00 2001
From: hweichel <hweichel@mathworks.com>
Date: Wed, 25 Mar 2026 11:32:33 -0400
Subject: [PATCH 03/16] more indentation fixes

---
 examples/simple_spmm.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/simple_spmm.cpp b/examples/simple_spmm.cpp
index fe12a4d..436ae29 100644
--- a/examples/simple_spmm.cpp
+++ b/examples/simple_spmm.cpp
@@ -7,7 +7,7 @@ int main(int argc, char** argv) {
   using namespace spblas;
 
   using T = float;
-  
+
   spblas::index_t m = 100;
   spblas::index_t n = 10;
   spblas::index_t k = 100;

From 872417b1f97c992c07069ed3abc45fa49fe6ec2c Mon Sep 17 00:00:00 2001
From: hweichel <hweichel@mathworks.com>
Date: Wed, 25 Mar 2026 12:09:36 -0400
Subject: [PATCH 04/16] make mdspan access work for all mdspan flavours

---
 test/gtest/conjugate_test.cpp  | 10 ++++------
 test/gtest/mdspan_overlays.cpp | 20 ++++++++++++++++----
 2 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/test/gtest/conjugate_test.cpp b/test/gtest/conjugate_test.cpp
index 835fd71..bde55f5 100644
--- a/test/gtest/conjugate_test.cpp
+++ b/test/gtest/conjugate_test.cpp
@@ -133,8 +133,8 @@ TEST(Conjugate, SpMM_DenseConjugated) {
       auto [b_values, b_shape] = spblas::generate_dense<T>(k, n);
 
       std::vector<T> c_values(m * n, T(0.0f, 0.0f));
-      spblas::mdspan_row_major b(b_values.data(), k, n);
-      spblas::mdspan_row_major c(c_values.data(), m, n);
+      spblas::mdspan_row_major<T, I> b(b_values.data(), k, n);
+      spblas::mdspan_row_major<T, I> c(c_values.data(), m, n);
 
       spblas::multiply(a, spblas::conjugated(b), c);
 
@@ -234,8 +234,6 @@ TEST(Conjugate, SpMV_MatrixConjugated) {
 }
 
 TEST(Conjugate, SpMM_MatrixConjugated) {
-  namespace md = spblas::__mdspan;
-
   for (auto&& [m, k, nnz] : util::dims) {
     for (auto n : {1, 8, 32}) {
       auto [values, colptr, rowind, shape, _] =
@@ -245,8 +243,8 @@ TEST(Conjugate, SpMM_MatrixConjugated) {
       auto [b_values, b_shape] = spblas::generate_dense<T>(k, n);
 
       std::vector<T> c_values(m * n, T(0.0f, 0.0f));
-      md::mdspan b(b_values.data(), k, n);
-      md::mdspan c(c_values.data(), m, n);
+      spblas::mdspan_row_major<T, I> b(b_values.data(), k, n);
+      spblas::mdspan_row_major<T, I> c(c_values.data(), m, n);
 
       spblas::multiply(spblas::conjugated(a), b, c);
 
diff --git a/test/gtest/mdspan_overlays.cpp b/test/gtest/mdspan_overlays.cpp
index 9023918..7581c4e 100644
--- a/test/gtest/mdspan_overlays.cpp
+++ b/test/gtest/mdspan_overlays.cpp
@@ -3,6 +3,18 @@
 #include "util.hpp"
 #include <spblas/spblas.hpp>
 
+// Accessing the data inside mdspan differs between different mdspan
+// implementations. The portable way is quite heavy and the following helper
+// makes the tests themselves easier to read.
+template <class T>
+decltype(auto) md_at(T& m, typename T::index_type i, typename T::index_type j) {
+#if defined(__cpp_multidimensional_subscript)
+  return m[i, j];
+#else
+  return m(i, j);
+#endif
+}
+
 TEST(Mdspan, positive_row_major) {
   using T = float;
   using I = spblas::index_t;
@@ -16,7 +28,7 @@ TEST(Mdspan, positive_row_major) {
       T* tmp = b_values.data();
       for (I i = 0; i < m; ++i) {
         for (I j = 0; j < n; ++j) {
-          EXPECT_EQ((b[i, j]), *(tmp++));
+          EXPECT_EQ(md_at(b, i, j), *(tmp++));
         }
       }
     }
@@ -36,7 +48,7 @@ TEST(Mdspan, postive_col_major) {
       T* tmp = b_values.data();
       for (I j = 0; j < n; ++j) {
         for (I i = 0; i < m; ++i) {
-          EXPECT_EQ((b[i, j]), *(tmp++));
+          EXPECT_EQ(md_at(b, i, j), *(tmp++));
         }
       }
     }
@@ -61,7 +73,7 @@ TEST(Mdspan, negative_row_major) {
           tmp++;
           continue;
         }
-        EXPECT_NE((b[i, j]), *(tmp++));
+        EXPECT_NE(md_at(b, i, j), *(tmp++));
       }
     }
   }
@@ -85,7 +97,7 @@ TEST(Mdspan, negative_col_major) {
           tmp++;
           continue;
         }
-        EXPECT_NE((b[i, j]), *(tmp++));
+        EXPECT_NE(md_at(b, i, j), *(tmp++));
       }
     }
   }

From 3127a15da8748eb7c5b8d791d4252af51232cef4 Mon Sep 17 00:00:00 2001
From: hweichel <hweichel@mathworks.com>
Date: Wed, 25 Mar 2026 15:12:43 -0400
Subject: [PATCH 05/16] Add first draft or MATLAB MEX files and add matlab
 runner to GIT

---
 .github/workflows/ci.yml                    |  22 +++
 examples/MATLAB_MEX/build_mex_APIs.m        |  29 ++++
 examples/MATLAB_MEX/simple_spgemm_mex.cpp   | 105 +++++++++++++
 examples/MATLAB_MEX/simple_spmm_mex.cpp     | 133 +++++++++++++++++
 examples/MATLAB_MEX/simple_spmm_mex.m       |  18 +++
 examples/MATLAB_MEX/simple_spmv_mex.cpp     | 134 +++++++++++++++++
 examples/MATLAB_MEX/simple_spmv_mex.m       |  18 +++
 examples/MATLAB_MEX/simple_sptrsv_mex.cpp   | 152 +++++++++++++++++++
 examples/MATLAB_MEX/testSparseBLASMexAPIs.m | 155 ++++++++++++++++++++
 9 files changed, 766 insertions(+)
 create mode 100644 examples/MATLAB_MEX/build_mex_APIs.m
 create mode 100755 examples/MATLAB_MEX/simple_spgemm_mex.cpp
 create mode 100644 examples/MATLAB_MEX/simple_spmm_mex.cpp
 create mode 100644 examples/MATLAB_MEX/simple_spmm_mex.m
 create mode 100644 examples/MATLAB_MEX/simple_spmv_mex.cpp
 create mode 100644 examples/MATLAB_MEX/simple_spmv_mex.m
 create mode 100755 examples/MATLAB_MEX/simple_sptrsv_mex.cpp
 create mode 100644 examples/MATLAB_MEX/testSparseBLASMexAPIs.m

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 3d17a73..8682b3c 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -8,6 +8,28 @@ on:
   pull_request:
 
 jobs:
+  mex_checks:
+    runs-on: 'ubuntu-latest'
+    steps:
+    - uses: actions/checkout@v4
+    - name: CMake
+      run: |
+        module load cmake
+        cmake -B build
+    - name: Build
+      run: |
+        make -C build -j `nproc`
+    - name: Set up MATLAB
+      uses: matlab-actions/setup-matlab@v2.5.0
+      with:
+        release: R2025b
+    - name: Run script
+      uses: matlab-actions/run-command@v2.2.1
+      with:
+        command: |
+          cd("examples/MATLAB_MEX");
+          build_mex_APIs('../../include')
+  
   checks:
     runs-on: 'ubuntu-latest'
     steps:
diff --git a/examples/MATLAB_MEX/build_mex_APIs.m b/examples/MATLAB_MEX/build_mex_APIs.m
new file mode 100644
index 0000000..57256cb
--- /dev/null
+++ b/examples/MATLAB_MEX/build_mex_APIs.m
@@ -0,0 +1,29 @@
+function build_mex_APIs(incl_path, verbose, debug)
+%BUILD_MEX_APIS - Function to build all available mex APIs
+%
+% First input must be the path to the SparseBLAS INCLUDE folder.
+% Second and third are optional logical inputs to activate VERBOSE or and
+% DEBUG mode.
+%
+%  The calling syntaxes are:
+% 		build_mex_APIs("PATH_TO_SparseBLAS_INCLUDE")
+% 		build_mex_APIs("PATH_TO_SparseBLAS_INCLUDE", true)
+% 		build_mex_APIs("PATH_TO_SparseBLAS_INCLUDE", false, true)
+
+% Set default options
+opts = {['-I' incl_path], "-O", "-R2018a", "CXXFLAGS=$CFLAGS -std=c++20"};
+
+% Parse optional VERBOSE option
+if nargin > 1 && verbose
+    opts = [opts, "-v"];
+end
+
+% Parse optional DEBUG option
+if nargin > 2 && debug
+    opts = [opts, "-g"];
+end
+
+% Compile all APIs
+mex("simple_spmv_mex.cpp", opts{:});
+mex("simple_spmm_mex.cpp", opts{:});
+end
\ No newline at end of file
diff --git a/examples/MATLAB_MEX/simple_spgemm_mex.cpp b/examples/MATLAB_MEX/simple_spgemm_mex.cpp
new file mode 100755
index 0000000..3ed82cd
--- /dev/null
+++ b/examples/MATLAB_MEX/simple_spgemm_mex.cpp
@@ -0,0 +1,105 @@
+// Includes from SparseBLAS
+#include <spblas/spblas.hpp>
+
+// Includes for MEX
+#include <matrix.h>
+#include <mex.h>
+
+// General includes
+#include <complex> // Support complex inputs
+#include <iostream>
+// C = A * B
+// prhs[0] = A, prhs[1] = B
+// plhs[0] = C
+void mexFunction(int nlhs, mxArray *plhs[],
+                 int nrhs, const mxArray *prhs[]) {
+  
+  // General input checking
+  if (nrhs!=2) {
+    mexErrMsgIdAndTxt("SparseBLAS_Mex:WrongNumberOfInputs",
+                      "Function needs 2 inputs.");
+  }
+  if (nlhs>1) {
+    mexErrMsgIdAndTxt("SparseBLAS_Mex:WrongNumberOfOutputs",
+                      "Function returns only 1 output.");
+  }
+  if (!mxIsDouble(prhs[0]) || !mxIsDouble(prhs[1])) {
+    mexErrMsgIdAndTxt("SparseBLAS_Mex:UnsupportedClass",
+                      "Only double inputs supported.");
+  }
+  if(!mxIsSparse(prhs[0]) || !mxIsSparse(prhs[1])) {
+    mexErrMsgIdAndTxt("SparseBLAS_Mex:InputsNotSparse",
+                      "All inputs must be sparse.");
+  }
+  
+
+  /*
+  // Reference SparseBLAS can handle inputs with mixed complexity,
+  // however, the vendor implementations need all inputs of the same
+  // complexity, hence, this example also insists on having matching 
+  // complexity.
+  */
+  // For now, allow only real to avoid switch-yards.  
+  if(mxIsComplex(prhs[0]) || mxIsComplex(prhs[1])) {
+    mexErrMsgIdAndTxt("SparseBLAS_Mex:NonReal",
+                      "Only real valued inputs supported.");
+  }
+
+  // Gather dimensions
+  mwIndex m = mxGetM(prhs[0]);
+  mwIndex k = mxGetN(prhs[0]);
+  mwIndex n = mxGetN(prhs[1]);
+  mwIndex nnzA = mxGetNzmax(prhs[0]);
+  mwIndex nnzB = mxGetNzmax(prhs[1]);
+
+  // Check dimensions of second input
+  if (mxGetM(prhs[1]) != k) {
+     mexErrMsgIdAndTxt("SparseBLAS_Mex:InnerDimWrong",
+                       "Second input must be an array with k rows, "
+                       "i.e., number of columns of first input.");
+  }
+
+  // Fill csc_view for A with:
+  // - double* values
+  // - mwIndex* colptr
+  // - mwIndex* rowind
+  // - {mwIndex m, mwIndex k} (shape)
+  // - mwIndex nnzA
+  spblas::csc_view<double> A(static_cast<double*>(mxGetData(prhs[0])),
+                     mxGetJc(prhs[0]), mxGetIr(prhs[0]), {m, k}, nnzA);
+
+  // Fill csc_view for B with:
+  // - double* values
+  // - mwIndex* colptr
+  // - mwIndex* rowind
+  // - {mwIndex k, mwIndex n} (shape)
+  // - mwIndex nnzB
+  spblas::csc_view<double> B(static_cast<double*>(mxGetData(prhs[1])),
+                     mxGetJc(prhs[1]), mxGetIr(prhs[1]), {k, n}, nnzB);
+
+  // Placeholder csc_view for C with:
+  // - {mwIndex m, mwIndex n} (shape)
+  spblas::csc_view<double> C(nullptr, nullptr, nullptr, {m, n}, 0);
+
+  // C = A * B - Compute stage
+  auto info = multiply_compute(A, B, C);
+
+  // Allocate mxArray for output C and pre-computed nnzC
+  mwIndex nnzC = info.result_nnz();
+  plhs[0] = mxCreateSparse(m, n, nnzC, mxCOMPLEX);
+  
+  // Update csc_view for C with values, colptr, and rowind of plhs[0],
+  // using spans to ensure correct length of the arrays
+  std::span<double> C_values(static_cast<double*>(mxGetData(plhs[0])), nnzC);
+  std::span<mwIndex> C_colptr(mxGetJc(plhs[0]), n+1);  
+  std::span<mwIndex> C_rowind(mxGetIr(plhs[0]), nnzC);  
+  C.update(C_values, C_colptr, C_rowind);
+    
+  // C = A * B - Fill stage
+  multiply_fill(info, A, B, C);
+}
+
+// Compile from within MATLAB via:
+// mex simple_spgemm_mex.cpp -R2018a -I{PATH_TO_SparseBLAS_INCLUDE} 'CXXFLAGS=$CFLAGS -fobjc-arc -stdlib=libc++ -std=c++20'
+//
+// Add '-g' to build in Debug mode if needed (activates asserts)
diff --git a/examples/MATLAB_MEX/simple_spmm_mex.cpp b/examples/MATLAB_MEX/simple_spmm_mex.cpp
new file mode 100644
index 0000000..7369759
--- /dev/null
+++ b/examples/MATLAB_MEX/simple_spmm_mex.cpp
@@ -0,0 +1,133 @@
+// Includes from SparseBLAS
+#include <spblas/spblas.hpp>
+
+// Includes for MEX
+#include <matrix.h>
+#include <mex.h>
+
+// General includes
+#include <complex> // Support complex inputs
+
+template <typename T>
+void spmmDriver(mxArray* mxY, const mxArray* mxA, const mxArray* mxX,
+                const mxArray* mxAlpha){
+  // Gather dimensions
+  mwIndex m = mxGetM(mxA);
+  mwIndex k = mxGetN(mxA);
+  mwIndex n = mxGetN(mxX);
+
+  // Fill csc_view with:
+  // - T* values
+  // - mwIndex* colptr
+  // - mwIndex* rowind
+  // - {mwIndex m, mwIndex k} (shape)
+  // - mwIndex nnz
+  spblas::csc_view<const T, mwIndex> A(static_cast<const T*>(mxGetData(mxA)),
+                                       mxGetJc(mxA), mxGetIr(mxA), {m, k},
+                                       mxGetJc(mxA)[k]);
+
+  // Wrap X in an mdspan of size k-by-n
+  spblas::mdspan_col_major<T, mwIndex> X(static_cast<T*>(mxGetData(mxX)), k, n);
+
+  // Wrap output Y in an mdspan of size m-by-n
+  spblas::mdspan_col_major<T, mwIndex> Y(static_cast<T*>(mxGetData(mxY)), m, n);
+
+  // Store and apply scaling factor alpha, if provided and not empty
+  T alpha = T(1);
+  if (mxAlpha != nullptr && !mxIsEmpty(mxAlpha)) {
+    // We don't use mxGetScalar as it doesn't work for complex 
+    alpha = *(static_cast<T*>(mxGetData(mxAlpha)));
+  }
+  auto alpha_A = spblas::scaled(alpha, A);
+
+  // Y = (alpha * A) * X
+  spblas::multiply(alpha_A, X, Y);
+}
+
+// Y = (alpha *) A * X
+// prhs[0] = A, prhs[1] = X (optional: prhs[2] = alpha)
+// plhs[0] = Y
+void mexFunction(int nlhs, mxArray *plhs[],
+                 int nrhs, const mxArray *prhs[]) {
+
+  // General input checking
+  if (nrhs < 2 || nrhs > 3) {
+    mexErrMsgIdAndTxt("SparseBLAS_Mex:WrongNumberOfInputs",
+                      "Function needs 2 or 3 inputs.");
+  }
+  if (nlhs > 1) {
+    mexErrMsgIdAndTxt("SparseBLAS_Mex:WrongNumberOfOutputs",
+                      "Function returns only 1 output.");
+  }
+  if (mxGetClassID(prhs[0]) != mxGetClassID(prhs[1])|| 
+      ((nrhs == 3) && mxGetClassID(prhs[1]) != mxGetClassID(prhs[2]))) {
+      mexErrMsgIdAndTxt("SparseBLAS_Mex:ClassMismatch",
+                      "All inputs must have matching type.");
+  }
+  if (!mxIsDouble(prhs[0]) && !mxIsSingle(prhs[0])) {
+    mexErrMsgIdAndTxt("SparseBLAS_Mex:NonFloat",
+                      "All inputs must be single or double.");
+  }
+  if(!mxIsSparse(prhs[0])) {
+    mexErrMsgIdAndTxt("SparseBLAS_Mex:FirstInputNotSparse",
+                      "First input must be sparse.");
+  }
+  if(mxIsSparse(prhs[1])) {
+    mexErrMsgIdAndTxt("SparseBLAS_Mex:SecondInputNotDense",
+                      "Second input must be dense.");
+  }
+
+  // Reference SparseBLAS can handle inputs with mixed complexity,
+  // however, the vendor implementations need all inputs of the same
+  // complexity, hence, this example also insists on having matching 
+  // complexity.
+  if(mxIsComplex(prhs[0]) != mxIsComplex(prhs[1]) || 
+      ((nrhs == 3) && mxIsComplex(prhs[1]) != mxIsComplex(prhs[2]))) {
+      mexErrMsgIdAndTxt("SparseBLAS_Mex:ComplexityMismatch",
+                      "All inputs must have matching complexity.");
+  }
+
+  // Gather dimensions
+  mwIndex m = mxGetM(prhs[0]);
+  mwIndex k = mxGetN(prhs[0]);
+  mwIndex n = mxGetN(prhs[1]);
+
+  // Check dimensions of second input
+  if (mxGetM(prhs[1]) != k) {
+     mexErrMsgIdAndTxt("SparseBLAS_Mex:InnerDimWrong",
+                       "Second input must be an array with k rows, "
+                       "i.e., number of columns of first input.");
+  }
+
+  // Calculate in complex (we check for matching complexity above)
+  bool isCmplx = mxIsComplex(prhs[0]);
+
+  // Type dispatch for double or single, each as real or complex flavor
+  if (mxIsDouble(prhs[0])) {
+    if (isCmplx) {
+      plhs[0] = mxCreateNumericMatrix(m, n, mxDOUBLE_CLASS, mxCOMPLEX);
+      spmmDriver<std::complex<double>>(plhs[0], prhs[0], prhs[1],
+                                       nrhs == 3 ? prhs[2] : nullptr);
+    } else {
+      plhs[0] = mxCreateNumericMatrix(m, n, mxDOUBLE_CLASS, mxREAL);
+      spmmDriver<double>(plhs[0], prhs[0], prhs[1],
+                         nrhs == 3 ? prhs[2] : nullptr);
+    }
+  } else {
+    mxAssert(mxIsSingle(prhs[0]), "Invalid data type");
+    if (isCmplx) {
+      plhs[0] = mxCreateNumericMatrix(m, n, mxSINGLE_CLASS, mxCOMPLEX);
+      spmmDriver<std::complex<float>>(plhs[0], prhs[0], prhs[1],
+                                      nrhs == 3 ? prhs[2] : nullptr);
+    } else {
+      plhs[0] = mxCreateNumericMatrix(m, n, mxSINGLE_CLASS, mxREAL);
+      spmmDriver<float>(plhs[0], prhs[0], prhs[1],
+                        nrhs == 3 ? prhs[2] : nullptr);
+    }
+  }
+}
+
+// Compile from within MATLAB via:
+// mex simple_spmm_mex.cpp -R2018a -I{PATH_TO_SparseBLAS_INCLUDE} 'CXXFLAGS=$CFLAGS -std=c++20'
+//
+// Add '-g' to build in Debug mode if needed (activates asserts)
diff --git a/examples/MATLAB_MEX/simple_spmm_mex.m b/examples/MATLAB_MEX/simple_spmm_mex.m
new file mode 100644
index 0000000..dc09a4c
--- /dev/null
+++ b/examples/MATLAB_MEX/simple_spmm_mex.m
@@ -0,0 +1,18 @@
+% simple_spmm_mex - Sparse matrix times dense matrix multiplication
+%  simple_smpm_mex.c - example in MATLAB External Interfaces
+% 
+%  Multiplies a (potentially scaled by a scalar alpha) sparse MxK matrix
+%  with a dense KxN matrix and outputs a dense MxN matrix:
+%       Y = A * X  or  Y = alpha * A * X
+% 
+%  The calling syntaxes are:
+% 		Y = simple_smpv_mex(A, X)
+% 		Y = simple_smpv_mex(A, X, alpha)
+% 
+%  The following restrictions apply:
+%    * A must be sparse
+%    * X must be dense
+%    * Number of columns in A and rows in X must match
+%    * All inputs must have the same data type and complexity
+%
+%  This is a MEX-file for MATLAB.
\ No newline at end of file
diff --git a/examples/MATLAB_MEX/simple_spmv_mex.cpp b/examples/MATLAB_MEX/simple_spmv_mex.cpp
new file mode 100644
index 0000000..840af67
--- /dev/null
+++ b/examples/MATLAB_MEX/simple_spmv_mex.cpp
@@ -0,0 +1,134 @@
+// Includes from SparseBLAS
+#include <spblas/spblas.hpp>
+
+// Includes for MEX
+#include <matrix.h>
+#include <mex.h>
+
+// General includes
+#include <complex> // Support complex inputs
+
+template <typename T>
+void spmvDriver(mxArray* mxY, const mxArray* mxA, const mxArray* mxX,
+                const mxArray* mxAlpha){
+
+  // Gather dimensions
+  mwIndex m = mxGetM(mxA);
+  mwIndex n = mxGetN(mxA);
+
+  // Fill csc_view with:
+  // - T* values
+  // - mwIndex* colptr
+  // - mwIndex* rowind
+  // - {mwIndex m, mwIndex n} (shape)
+  // - mwIndex nnz
+  spblas::csc_view<const T, mwIndex> A(static_cast<const T*>(mxGetData(mxA)),
+                                       mxGetJc(mxA), mxGetIr(mxA), {m, n},
+                                       mxGetJc(mxA)[n]);
+  // Wrap x in a span of length n
+  std::span<const T> x(static_cast<const T*>(mxGetData(mxX)), n);
+
+  // Wrap output y in a span of length m
+  std::span<T> y(static_cast<T*>(mxGetData(mxY)), m);
+
+  // Store and apply scaling factor alpha, if provided and not empty
+  T alpha = T(1);
+  if (mxAlpha != nullptr && !mxIsEmpty(mxAlpha)) {
+    // We don't use mxGetScalar as it doesn't work for complex 
+    alpha = *(static_cast<T*>(mxGetData(mxAlpha)));
+  }
+  auto alpha_A = spblas::scaled(alpha, A);
+
+  // y = (alpha * A) * x
+  spblas::multiply(alpha_A, x, y);
+}
+
+// y = (alpha *) A * x
+// prhs[0] = A, prhs[1] = x (optional: prhs[2] = alpha)
+// plhs[0] = y
+void mexFunction(int nlhs, mxArray *plhs[],
+                 int nrhs, const mxArray *prhs[]) {
+
+  // General input checking
+  if (nrhs < 2 || nrhs > 3) {
+    mexErrMsgIdAndTxt("SparseBLAS_Mex:WrongNumberOfInputs",
+                      "Function needs 2 or 3 inputs.");
+  }
+  if (nlhs > 1) {
+    mexErrMsgIdAndTxt("SparseBLAS_Mex:WrongNumberOfOutputs",
+                      "Function returns only 1 output.");
+  }
+  if (mxGetClassID(prhs[0]) != mxGetClassID(prhs[1])|| 
+      ((nrhs == 3) && mxGetClassID(prhs[1]) != mxGetClassID(prhs[2]))) {
+    mexErrMsgIdAndTxt("SparseBLAS_Mex:ClassMismatch",
+                      "All inputs must have matching type.");
+  }
+  if (!mxIsDouble(prhs[0]) && !mxIsSingle(prhs[0])) {
+    mexErrMsgIdAndTxt("SparseBLAS_Mex:NonFloat",
+                      "All inputs must be single or double.");
+  }
+  if (!mxIsDouble(prhs[0]) && !mxIsSingle(prhs[0])) {
+    mexErrMsgIdAndTxt("SparseBLAS_Mex:NonFloat",
+                      "All inputs must be single or double.");
+  }
+  if(!mxIsSparse(prhs[0])) {
+    mexErrMsgIdAndTxt("SparseBLAS_Mex:FirstInputNotSparse",
+                      "First input must be sparse.");
+  }
+  if(mxIsSparse(prhs[1])) {
+    mexErrMsgIdAndTxt("SparseBLAS_Mex:SecondInputNotDense",
+                      "Second input must be dense.");
+  }
+
+  // Reference SparseBLAS can handle inputs with mixed complexity,
+  // however, the vendor implementations need all inputs of the same
+  // complexity, hence, this example also insists on having matching 
+  // complexity.
+  if(mxIsComplex(prhs[0]) != mxIsComplex(prhs[1]) || 
+      ((nrhs == 3) && mxIsComplex(prhs[1]) != mxIsComplex(prhs[2]))) {
+    mexErrMsgIdAndTxt("SparseBLAS_Mex:ComplexityMismatch",
+                      "All inputs must have matching complexity.");
+  }
+
+  // Gather dimensions
+  mwIndex m = mxGetM(prhs[0]);
+  mwIndex n = mxGetN(prhs[0]);
+  // Check dimensions of second input
+  if ((mxGetM(prhs[1]) != n) && (mxGetN(prhs[1]) != 1)) {
+     mexErrMsgIdAndTxt("SparseBLAS_Mex:InnerDimWrong",
+                       "Second input must be column vector of length n, "
+                       "i.e., number of columns of first input.");
+  }
+
+  // Calculate in complex (we check for matching complexity above)
+  bool isCmplx = mxIsComplex(prhs[0]);
+
+  // Type dispatch for double or single, each as real or complex flavor
+  if (mxIsDouble(prhs[0])) {
+    if (isCmplx) {
+      plhs[0] = mxCreateNumericMatrix(m, 1, mxDOUBLE_CLASS, mxCOMPLEX);
+      spmvDriver<std::complex<double>>(plhs[0], prhs[0], prhs[1],
+                                       nrhs == 3 ? prhs[2] : nullptr);
+    } else {
+      plhs[0] = mxCreateNumericMatrix(m, 1, mxDOUBLE_CLASS, mxREAL);
+      spmvDriver<double>(plhs[0], prhs[0], prhs[1],
+                         nrhs == 3 ? prhs[2] : nullptr);
+    }
+  } else {
+    mxAssert(mxIsSingle(prhs[0]), "Invalid data type");
+    if (isCmplx) {
+      plhs[0] = mxCreateNumericMatrix(m, 1, mxSINGLE_CLASS, mxCOMPLEX);
+      spmvDriver<std::complex<float>>(plhs[0], prhs[0], prhs[1],
+                                      nrhs == 3 ? prhs[2] : nullptr);
+    } else {
+      plhs[0] = mxCreateNumericMatrix(m, 1, mxSINGLE_CLASS, mxREAL);
+      spmvDriver<float>(plhs[0], prhs[0], prhs[1],
+                        nrhs == 3 ? prhs[2] : nullptr);
+    }
+  }
+}
+
+// Compile from within MATLAB via:
+// mex simple_spmv_mex.cpp -R2018a -I{PATH_TO_SparseBLAS_INCLUDE} 'CXXFLAGS=$CFLAGS -std=c++20'
+//
+// Add '-g' to build in Debug mode if needed (activates asserts)
diff --git a/examples/MATLAB_MEX/simple_spmv_mex.m b/examples/MATLAB_MEX/simple_spmv_mex.m
new file mode 100644
index 0000000..31a4f3c
--- /dev/null
+++ b/examples/MATLAB_MEX/simple_spmv_mex.m
@@ -0,0 +1,18 @@
+% simple_spmv_mex - Sparse matrix times dense vector multiplication
+%  simple_smpv_mex.c - example in MATLAB External Interfaces
+% 
+%  Multiplies a (potentially scaled by a scalar alpha) sparse MxN matrix
+%  with a dense Nx1 column vector and outputs a dense Mx1 column vector:
+%       y = A * x  or  y = alpha * A * x
+% 
+%  The calling syntaxes are:
+% 		y = simple_smpv_mex(A, x)
+% 		y = simple_smpv_mex(A, x, alpha)
+% 
+%  The following restrictions apply:
+%    * A must be sparse
+%    * x must be dense column vector
+%    * Number of columns in A and rows in x must match
+%    * All inputs must have the same data type and complexity
+%
+%  This is a MEX-file for MATLAB.
\ No newline at end of file
diff --git a/examples/MATLAB_MEX/simple_sptrsv_mex.cpp b/examples/MATLAB_MEX/simple_sptrsv_mex.cpp
new file mode 100755
index 0000000..d51e9b1
--- /dev/null
+++ b/examples/MATLAB_MEX/simple_sptrsv_mex.cpp
@@ -0,0 +1,152 @@
+// Includes from SparseBLAS
+#include <spblas/spblas.hpp>
+
+// Includes for MEX
+#include <matrix.h>
+#include <mex.h>
+
+// General includes
+#include <complex> // Support complex inputs
+#include <string>  // Parse uplo input
+
+template <typename T>
+void simple_sptrsv_mex(mwIndex m, mwIndex n, mwIndex nnz, mxArray * x_out,
+                     int nrhs, const mxArray *prhs[]){
+  using namespace spblas;
+
+  // Fill csc_view with:
+  // - T* values
+  // - mwIndex* colptr
+  // - mwIndex* rowind
+  // - {mwIndex m, mwIndex n} (shape)
+  // - mwIndex nnz
+  csr_view<T> A(static_cast<T*>(mxGetData(prhs[0])), mxGetJc(prhs[0]),
+                mxGetIr(prhs[0]), {m, n}, nnz);
+
+  csr_view<T> B;
+
+    
+  // Wrap b in a span of length n
+  std::span<T> b(static_cast<T*>(mxGetData(prhs[1])), n);
+
+  // Wrap output y in a span of length m
+  std::span<T> x(static_cast<T*>(mxGetData(x_out)), m);
+
+  bool useUpper = true;
+  if (nrhs>2) {
+    char uplo_in[5];
+    int info = mxGetString(prhs[2], uplo_in, 6); 
+    if (!strcmp(uplo_in, "lower")) {
+      useUpper = false;
+    } else {
+      if (strcmp(uplo_in, "upper")) {
+        mexErrMsgIdAndTxt("SparseBLAS_Mex:WrongUplo",
+                          "3rd input must be ""upper"" or ""lower"".");
+      }
+    } 
+  }
+
+  // Store and apply scaling factor alpha, if provided
+  T alpha = 1.0;
+  if (nrhs==4) {
+    alpha = static_cast<T>(mxGetScalar(prhs[2]));
+  }
+  auto alpha_b= scaled(alpha, b);
+
+  // solve for x:  uplo(A) * x = alpha * b
+  // triangular_solve(A, spblas::lower_triangle_t{},
+  //                  spblas::explicit_diagonal_t{}, alpha_b, x);
+  if (useUpper) {
+    triangular_solve(A, spblas::upper_triangle_t{},
+                     spblas::explicit_diagonal_t{}, alpha_b, x);
+  } else {
+
+    triangular_solve(A, spblas::lower_triangle_t{},
+                     spblas::explicit_diagonal_t{}, alpha_b, x);
+  }
+}
+
+// y = (alpha *) A * x
+// prhs[0] = A, prhs[1] = x (optional: prhs[2] = alpha)
+// plhs[0] = y
+void mexFunction(int nlhs, mxArray *plhs[],
+                 int nrhs, const mxArray *prhs[]) {
+
+  // General input checking
+  if (nrhs<2 && nrhs>3) {
+    mexErrMsgIdAndTxt("SparseBLAS_Mex:WrongNumberOfInputs",
+                      "Function needs 2 or 3 inputs.");
+  }
+  if (nlhs>1) {
+    mexErrMsgIdAndTxt("SparseBLAS_Mex:WrongNumberOfOutputs",
+                      "Function returns only 1 output.");
+  }
+  if (mxGetClassID(prhs[0]) != mxGetClassID(prhs[1])) {
+    mexErrMsgIdAndTxt("SparseBLAS_Mex:ClassMissmatch",
+                      "First and second input must have matching type.");
+  }
+  if(!mxIsSparse(prhs[0])) {
+    mexErrMsgIdAndTxt("SparseBLAS_Mex:FirstInputNotSparse",
+                      "First input must be sparse.");
+  }
+  if(mxIsSparse(prhs[1])) {
+    mexErrMsgIdAndTxt("SparseBLAS_Mex:SecondInputNotDense",
+                      "Second input must be dense.");
+  }
+
+  // Reference SparseBLAS can handle inputs with mixed complexity,
+  // however, the vendor implementations need all inputs of the same
+  // complexity, hence, this example also insists on having matching 
+  // complexity.
+  if(mxIsComplex(prhs[0]) != mxIsComplex(prhs[1])) {
+    mexErrMsgIdAndTxt("SparseBLAS_Mex:ComplexityMissmatch",
+                      "First and second input must have "
+                      "matching complexity.");
+  }
+
+  // Gather dimensions
+  mwIndex m = mxGetM(prhs[0]);
+  mwIndex n = mxGetN(prhs[0]);
+  mwIndex nnz = mxGetNzmax(prhs[0]);
+
+  // Check dimensions of second input
+  if ((mxGetM(prhs[1]) != n) && (mxGetN(prhs[1]) != 1)) {
+     mexErrMsgIdAndTxt("SparseBLAS_Mex:InnerDimWrong",
+                       "Second input must be column vector of length n, "
+                       "i.e., number of columns of first input.");
+  }
+
+  // Set output size
+  mwIndex dims[2] = {m, 1};
+
+  // Calculate in complex (assume matching complexity for both inputs)
+  bool isCmplx = mxIsComplex(prhs[0]);
+
+  // Type dispatch for double, single, or logical
+  if (mxIsDouble(prhs[0])) {
+    if (isCmplx) {
+      plhs[0] = mxCreateNumericArray(2, dims, mxDOUBLE_CLASS, mxCOMPLEX);
+      simple_sptrsv_mex<std::complex<double>>(m, n, nnz, plhs[0], nrhs, prhs);
+    } else {
+      plhs[0] = mxCreateNumericArray(2, dims, mxDOUBLE_CLASS, mxREAL);
+      simple_sptrsv_mex<double>(m, n, nnz, plhs[0], nrhs, prhs);
+    }
+  } else if (mxIsSingle(prhs[0])) {
+    if (isCmplx) {
+      plhs[0] = mxCreateNumericArray(2, dims, mxSINGLE_CLASS, mxCOMPLEX);
+      simple_sptrsv_mex<std::complex<float>>(m, n, nnz, plhs[0], nrhs, prhs);
+    } else {
+      plhs[0] = mxCreateNumericArray(2, dims, mxSINGLE_CLASS, mxREAL);
+      simple_sptrsv_mex<float>(m, n, nnz, plhs[0], nrhs, prhs);
+    }
+  } else {
+    mxAssert(mxIsLogical(prhs[0]), "Invalid data type");
+    plhs[0] = mxCreateNumericArray(2, dims, mxLOGICAL_CLASS, mxREAL);
+    simple_sptrsv_mex<bool>(m, n, nnz, plhs[0], nrhs, prhs);
+  }
+}
+
+// Compile from within MATLAB via:
+// mex simple_spmv_mex.cpp -R2018a -I{PATH_TO_SparseBLAS_INCLUDE} 'CXXFLAGS=$CFLAGS -fobjc-arc -stdlib=libc++ -std=c++20'
+//
+// Add '-g' to build in Debug mode if needed (activates asserts)
\ No newline at end of file
diff --git a/examples/MATLAB_MEX/testSparseBLASMexAPIs.m b/examples/MATLAB_MEX/testSparseBLASMexAPIs.m
new file mode 100644
index 0000000..8164cf6
--- /dev/null
+++ b/examples/MATLAB_MEX/testSparseBLASMexAPIs.m
@@ -0,0 +1,155 @@
+classdef testSparseBLASMexAPIs < matlab.unittest.TestCase
+% testSparseBLASMexAPIs Tests for SparseBLAS MEX APIs
+    
+   methods(TestMethodSetup)
+        function initializeRNG(~)
+            rng(0,'twister');
+        end
+   end
+
+   properties(TestParameter)
+        % Loop over different sizes
+        sizesToTest = struct(...
+            'empty1',    0, ...
+            'empty2',    1, ...
+            'tiny1',     2, ...
+            'tiny2',     5, ...
+            'small1' , 1e1, ...
+            'small2',  5e1, ...
+            'medium1', 1e2, ...
+            'medium2', 5e2, ...
+            'large1',  1e3, ...
+            'large2',  5e3);
+        % Loop over data types and complexities
+        complexity = {'real', 'complex'};
+        datatypes  = {'double', 'single'};
+        % Loop over various shapes of sparse and dense inputs
+        shape      = {'square', 'tall', 'wide'};
+        numberOfRHS = struct(...
+            'singleColumn',      1, ...
+            'doubleColumn',      2, ...
+            'manyColumns',      10, ...
+            'veryManyColumns', 100);
+        % Loop over different scalar scalings
+        alpha      = struct(...
+            'none',          [],   ...
+            'neutral',       1.0,  ...
+            'upScale',       2.3,  ...
+            'upScaleNeg',   -4.3,  ...
+            'downScale',     0.23, ...
+            'downScaleNeg', -0.43);
+    end
+
+    methods (Test)
+        % Test each API
+        function simpleSPMV(testCase, sizesToTest, datatypes, complexity, shape, alpha)
+            %% Create data
+            nRhs = 1;
+            [A, x] = createData(sizesToTest, nRhs, datatypes, complexity, shape);
+
+            %% Calculate reference solution, adapted to MATLAB's special
+            %  case treatment
+            if isempty(alpha)
+                y_exp = A*x;
+            else
+                alpha = cast(alpha, datatypes);
+                if strcmp(complexity, 'complex')
+                    alpha = complex(alpha, alpha);
+                end
+                y_exp = alpha*A*x;
+            end
+
+            % If either input the '*' is scalar, MATLAB calls '.*' which
+            % returns sparse results for dense and sparse mixed inputs,
+            % hence, making the result dense as SparseBLAS doesn't special
+            % case these situations.
+            if isscalar(A) || isscalar(x)
+                y_exp = full(y_exp);
+            end
+
+            % MATLAB strips all-zero imaginary parts during '*'. SparseBLAS
+            % does not, hence, make results complex again if complexity is
+            % set to 'complex'.
+            if strcmp(complexity, 'complex') && isreal(y_exp)
+                y_exp = complex(y_exp);
+            end
+
+            %% Calculate solution via SparseBLAS MEX APIs
+            if isempty(alpha)
+                y_act = simple_spmv_mex(A, x);
+            else
+                y_act = simple_spmv_mex(A, x, alpha);
+            end
+
+            %% Verify results
+            testCase.verifyEqual(y_act, y_exp);
+        end
+
+        function simpleSPMM(testCase, sizesToTest, numberOfRHS, datatypes, complexity, shape, alpha)
+            %% Create data
+            [A, X] = createData(sizesToTest, numberOfRHS, datatypes, complexity, shape);
+
+            %% Calculate reference solution, adapted to MATLAB's special
+            %  case treatment
+            if isempty(alpha)
+                y_exp = A*X;
+            else
+                alpha = cast(alpha, datatypes);
+                if strcmp(complexity, 'complex')
+                    alpha = complex(alpha, alpha);
+                end
+                y_exp = alpha*A*X;
+            end
+
+            % If either input the '*' is scalar, MATLAB calls '.*' which
+            % returns sparse results for dense and sparse mixed inputs,
+            % hence, making the result dense as SparseBLAS doesn't special
+            % case these situations.
+            if isscalar(A) || isscalar(X)
+                y_exp = full(y_exp);
+            end
+
+            % MATLAB strips all-zero imaginary parts during '*'. SparseBLAS
+            % does not, hence, make results complex again if complexity is
+            % set to 'complex'.
+            if strcmp(complexity, 'complex') && isreal(y_exp)
+                y_exp = complex(y_exp);
+            end
+
+            %% Calculate solution via SparseBLAS MEX APIs
+            if isempty(alpha)
+                y_act = simple_spmm_mex(A, X);
+            else
+                y_act = simple_spmm_mex(A, X, alpha);
+            end
+
+            %% Verify results
+            testCase.verifyEqual(y_act, y_exp);
+        end
+    end
+
+end
+
+function [A, X] = createData(n, nRhs, datatypes, complexity, shape)
+% We use this routine to create sparse A and dense X
+lesser_n = floor(n/2);
+switch shape
+    case 'wide'
+        sz = [lesser_n, n];
+    case 'tall'
+        sz = [n, lesser_n];
+    case 'square'
+        sz = [n, n];
+end
+
+switch complexity
+    case 'real'
+        A = sprand(sz(1), sz(2), 0.01, datatypes);
+        X = rand(sz(2), nRhs, datatypes);
+    case 'complex'
+        A = complex(sprand(sz(1), sz(2), 0.01, datatypes), ...
+            sprand(sz(1), sz(2), 0.01, datatypes));
+        X = complex(rand(sz(2), nRhs, datatypes), ...
+            rand(sz(2), nRhs, datatypes));
+end
+end

From adf40f9e02641748ee0f449ccb8c6a60ea83ae12 Mon Sep 17 00:00:00 2001
From: hweichel <hweichel@mathworks.com>
Date: Wed, 25 Mar 2026 15:19:59 -0400
Subject: [PATCH 06/16] Fix MATLAB CI script

---
 .github/workflows/ci.yml | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 8682b3c..e531049 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -14,11 +14,14 @@ jobs:
     - uses: actions/checkout@v4
     - name: CMake
       run: |
-        module load cmake
-        cmake -B build
+        cmake -B build -D CMAKE_INSTALL_PREFIX=install
     - name: Build
       run: |
         make -C build -j `nproc`
+    - name: Install
+      run: |
+        cmake --install build/.
+     
     - name: Set up MATLAB
       uses: matlab-actions/setup-matlab@v2.5.0
       with:
@@ -28,7 +31,7 @@ jobs:
       with:
         command: |
           cd("examples/MATLAB_MEX");
-          build_mex_APIs('../../include')
+          build_mex_APIs("install/include")
   
   checks:
     runs-on: 'ubuntu-latest'

From b2dbdcb02578f2fa30bb080187e17645b2508e0c Mon Sep 17 00:00:00 2001
From: hweichel <hweichel@mathworks.com>
Date: Wed, 25 Mar 2026 15:28:34 -0400
Subject: [PATCH 07/16] Another fix for MATLAB CI script

---
 .github/workflows/ci.yml             | 2 +-
 examples/MATLAB_MEX/build_mex_APIs.m | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index e531049..007bdf5 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -31,7 +31,7 @@ jobs:
       with:
         command: |
           cd("examples/MATLAB_MEX");
-          build_mex_APIs("install/include")
+          build_mex_APIs("../../install/include")
   
   checks:
     runs-on: 'ubuntu-latest'
diff --git a/examples/MATLAB_MEX/build_mex_APIs.m b/examples/MATLAB_MEX/build_mex_APIs.m
index 57256cb..331a39a 100644
--- a/examples/MATLAB_MEX/build_mex_APIs.m
+++ b/examples/MATLAB_MEX/build_mex_APIs.m
@@ -26,4 +26,4 @@ function build_mex_APIs(incl_path, verbose, debug)
 % Compile all APIs
 mex("simple_spmv_mex.cpp", opts{:});
 mex("simple_spmm_mex.cpp", opts{:});
-end
\ No newline at end of file
+end

From 9bf3176399b1308b5d6202c60ee065b0f6a35e97 Mon Sep 17 00:00:00 2001
From: hweichel <hweichel@mathworks.com>
Date: Wed, 25 Mar 2026 15:40:53 -0400
Subject: [PATCH 08/16] Fix build_mex_APIs script

---
 examples/MATLAB_MEX/build_mex_APIs.m | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/MATLAB_MEX/build_mex_APIs.m b/examples/MATLAB_MEX/build_mex_APIs.m
index 331a39a..efdaee8 100644
--- a/examples/MATLAB_MEX/build_mex_APIs.m
+++ b/examples/MATLAB_MEX/build_mex_APIs.m
@@ -11,7 +11,7 @@ function build_mex_APIs(incl_path, verbose, debug)
 % 		build_mex_APIs("PATH_TO_SparseBLAS_INCLUDE", false, true)
 
 % Set default options
-opts = {['-I' incl_path], "-O", "-R2018a", "CXXFLAGS=$CFLAGS -std=c++20"};
+opts = {['-I' char(incl_path)], "-O", "-R2018a", "CXXFLAGS=$CFLAGS -std=c++20"};
 
 % Parse optional VERBOSE option
 if nargin > 1 && verbose

From 34624e1c317268134656b2b7dd1e9dfcfe637557 Mon Sep 17 00:00:00 2001
From: hweichel <hweichel@mathworks.com>
Date: Wed, 25 Mar 2026 15:52:27 -0400
Subject: [PATCH 09/16] Fix build_mex_APIs script

---
 examples/MATLAB_MEX/build_mex_APIs.m | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/MATLAB_MEX/build_mex_APIs.m b/examples/MATLAB_MEX/build_mex_APIs.m
index efdaee8..0d611b7 100644
--- a/examples/MATLAB_MEX/build_mex_APIs.m
+++ b/examples/MATLAB_MEX/build_mex_APIs.m
@@ -11,7 +11,7 @@ function build_mex_APIs(incl_path, verbose, debug)
 % 		build_mex_APIs("PATH_TO_SparseBLAS_INCLUDE", false, true)
 
 % Set default options
-opts = {['-I' char(incl_path)], "-O", "-R2018a", "CXXFLAGS=$CFLAGS -std=c++20"};
+opts = {['-I' char(incl_path)], "-O", "-R2018a", "CXXFLAGS=$CFLAGS -std=c++23"};
 
 % Parse optional VERBOSE option
 if nargin > 1 && verbose

From 4f4a19e40e3682b2ceab0c19bd4b91901bba5941 Mon Sep 17 00:00:00 2001
From: hweichel <hweichel@mathworks.com>
Date: Wed, 25 Mar 2026 15:59:37 -0400
Subject: [PATCH 10/16] Add MATLAB test to CI run

---
 examples/MATLAB_MEX/testSparseBLASMexAPIs.m | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/MATLAB_MEX/testSparseBLASMexAPIs.m b/examples/MATLAB_MEX/testSparseBLASMexAPIs.m
index 8164cf6..5963e5d 100644
--- a/examples/MATLAB_MEX/testSparseBLASMexAPIs.m
+++ b/examples/MATLAB_MEX/testSparseBLASMexAPIs.m
@@ -82,7 +82,7 @@ function simpleSPMV(testCase, sizesToTest, datatypes, complexity, shape, alpha)
             end
 
             %% Verify results
-            testCase.verifyEqual(y_act, y_exp);
+            testCase.verifyEqual(y_act, -y_exp);
         end
 
         function simpleSPMM(testCase, sizesToTest, numberOfRHS, datatypes, complexity, shape, alpha)

From dd3c710c268561a204037602e49bd549a157c778 Mon Sep 17 00:00:00 2001
From: hweichel <hweichel@mathworks.com>
Date: Wed, 25 Mar 2026 16:04:18 -0400
Subject: [PATCH 11/16] Add MATLAB test to CI run

---
 .github/workflows/ci.yml | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 007bdf5..adde7d4 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -26,12 +26,18 @@ jobs:
       uses: matlab-actions/setup-matlab@v2.5.0
       with:
         release: R2025b
-    - name: Run script
+    - name: Build MEX APIs
       uses: matlab-actions/run-command@v2.2.1
       with:
         command: |
           cd("examples/MATLAB_MEX");
           build_mex_APIs("../../install/include")
+    - name: Run MEX API Tests
+      uses: matlab-actions/run-command@v2.2.1
+      with:
+        command: |
+          cd("examples/MATLAB_MEX");
+          runtests("testSparseBLASMexAPIs");
   
   checks:
     runs-on: 'ubuntu-latest'

From c77290dc90c5ed1228dcf8e233ad9f3656502bf7 Mon Sep 17 00:00:00 2001
From: hweichel <hweichel@mathworks.com>
Date: Wed, 25 Mar 2026 16:10:19 -0400
Subject: [PATCH 12/16] Add MATLAB test to CI run

---
 .github/workflows/ci.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index adde7d4..f491d98 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -37,7 +37,8 @@ jobs:
       with:
         command: |
           cd("examples/MATLAB_MEX");
-          runtests("testSparseBLASMexAPIs");
+          results = runtests("testSparseBLASMexAPIs");
+          assertSuccess(results);
   
   checks:
     runs-on: 'ubuntu-latest'

From 0cf2459e16a22d810c9b39fe5c5bbc90334d4082 Mon Sep 17 00:00:00 2001
From: hweichel <hweichel@mathworks.com>
Date: Wed, 25 Mar 2026 16:19:55 -0400
Subject: [PATCH 13/16] Fix tests to pass now

---
 examples/MATLAB_MEX/testSparseBLASMexAPIs.m | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/MATLAB_MEX/testSparseBLASMexAPIs.m b/examples/MATLAB_MEX/testSparseBLASMexAPIs.m
index 5963e5d..8164cf6 100644
--- a/examples/MATLAB_MEX/testSparseBLASMexAPIs.m
+++ b/examples/MATLAB_MEX/testSparseBLASMexAPIs.m
@@ -82,7 +82,7 @@ function simpleSPMV(testCase, sizesToTest, datatypes, complexity, shape, alpha)
             end
 
             %% Verify results
-            testCase.verifyEqual(y_act, -y_exp);
+            testCase.verifyEqual(y_act, y_exp);
         end
 
         function simpleSPMM(testCase, sizesToTest, numberOfRHS, datatypes, complexity, shape, alpha)

From 2ee7da394f0efbb4c70f4a9fd11e645cbedfe18b Mon Sep 17 00:00:00 2001
From: hweichel <hweichel@mathworks.com>
Date: Wed, 25 Mar 2026 16:51:15 -0400
Subject: [PATCH 14/16] fix indentation and remove accidentally added files
 that aren't ready yet

---
 .github/workflows/ci.yml                  |   4 +-
 .pre-commit-config.yaml                   |   1 +
 examples/MATLAB_MEX/build_mex_APIs.m      |   2 +-
 examples/MATLAB_MEX/simple_spgemm_mex.cpp | 105 ---------------
 examples/MATLAB_MEX/simple_spmm_mex.cpp   |  28 ++--
 examples/MATLAB_MEX/simple_spmm_mex.m     |   9 +-
 examples/MATLAB_MEX/simple_spmv_mex.cpp   |  26 ++--
 examples/MATLAB_MEX/simple_spmv_mex.m     |   8 +-
 examples/MATLAB_MEX/simple_sptrsv_mex.cpp | 152 ----------------------
 9 files changed, 40 insertions(+), 295 deletions(-)
 delete mode 100755 examples/MATLAB_MEX/simple_spgemm_mex.cpp
 delete mode 100755 examples/MATLAB_MEX/simple_sptrsv_mex.cpp

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index f491d98..3e3790b 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -21,7 +21,7 @@ jobs:
     - name: Install
       run: |
         cmake --install build/.
-     
+
     - name: Set up MATLAB
       uses: matlab-actions/setup-matlab@v2.5.0
       with:
@@ -39,7 +39,7 @@ jobs:
           cd("examples/MATLAB_MEX");
           results = runtests("testSparseBLASMexAPIs");
           assertSuccess(results);
-  
+
   checks:
     runs-on: 'ubuntu-latest'
     steps:
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 2bd722f..539049f 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -4,6 +4,7 @@ repos:
   rev: v16.0.6
   hooks:
     - id: clang-format
+      types_or: [c, c++, cuda, inc]
 
 - repo: https://github.com/pre-commit/pre-commit-hooks
   rev: v4.4.0
diff --git a/examples/MATLAB_MEX/build_mex_APIs.m b/examples/MATLAB_MEX/build_mex_APIs.m
index 0d611b7..a1f2d38 100644
--- a/examples/MATLAB_MEX/build_mex_APIs.m
+++ b/examples/MATLAB_MEX/build_mex_APIs.m
@@ -9,7 +9,7 @@ function build_mex_APIs(incl_path, verbose, debug)
 % 		build_mex_APIs("PATH_TO_SparseBLAS_INCLUDE")
 % 		build_mex_APIs("PATH_TO_SparseBLAS_INCLUDE", true)
 % 		build_mex_APIs("PATH_TO_SparseBLAS_INCLUDE", false, true)
-
+    
 % Set default options
 opts = {['-I' char(incl_path)], "-O", "-R2018a", "CXXFLAGS=$CFLAGS -std=c++23"};
 
diff --git a/examples/MATLAB_MEX/simple_spgemm_mex.cpp b/examples/MATLAB_MEX/simple_spgemm_mex.cpp
deleted file mode 100755
index 3ed82cd..0000000
--- a/examples/MATLAB_MEX/simple_spgemm_mex.cpp
+++ /dev/null
@@ -1,105 +0,0 @@
-// Includes from SparseBLAS
-#include <spblas/spblas.hpp>
-
-// Includes for MEX
-#include <matrix.h>
-#include <mex.h>
-
-// General includes
-#include <complex> // Support complex inputs
-#include <iostream>
-// C = A * B
-// prhs[0] = A, prhs[1] = B
-// plhs[0] = C
-void mexFunction(int nlhs, mxArray *plhs[],
-                 int nrhs, const mxArray *prhs[]) {
-  
-  // General input checking
-  if (nrhs!=2) {
-    mexErrMsgIdAndTxt("SparseBLAS_Mex:WrongNumberOfInputs",
-                      "Function needs 2 inputs.");
-  }
-  if (nlhs>1) {
-    mexErrMsgIdAndTxt("SparseBLAS_Mex:WrongNumberOfOutputs",
-                      "Function returns only 1 output.");
-  }
-  if (!mxIsDouble(prhs[0]) || !mxIsDouble(prhs[1])) {
-    mexErrMsgIdAndTxt("SparseBLAS_Mex:UnsupportedClass",
-                      "Only double inputs supported.");
-  }
-  if(!mxIsSparse(prhs[0]) || !mxIsSparse(prhs[1])) {
-    mexErrMsgIdAndTxt("SparseBLAS_Mex:InputsNotSparse",
-                      "All inputs must be sparse.");
-  }
-  
-
-  /*
-  // Reference SparseBLAS can handle inputs with mixed complexity,
-  // however, the vendor implementations need all inputs of the same
-  // complexity, hence, this example also insists on having matching 
-  // complexity.
-  */
-  // For now, allow only real to avoid switch-yards.  
-  if(mxIsComplex(prhs[0]) || mxIsComplex(prhs[1])) {
-    mexErrMsgIdAndTxt("SparseBLAS_Mex:NonReal",
-                      "Only real valued inputs supported.");
-  }
-
-  // Gather dimensions
-  mwIndex m = mxGetM(prhs[0]);
-  mwIndex k = mxGetN(prhs[0]);
-  mwIndex n = mxGetN(prhs[1]);
-  mwIndex nnzA = mxGetNzmax(prhs[0]);
-  mwIndex nnzB = mxGetNzmax(prhs[1]);
-
-  // Check dimensions of second input
-  if (mxGetM(prhs[1]) != k) {
-     mexErrMsgIdAndTxt("SparseBLAS_Mex:InnerDimWrong",
-                       "Second input must be an array with k rows, "
-                       "i.e., number of columns of first input.");
-  }
-
-  // Fill csc_view for A with:
-  // - double* values
-  // - mwIndex* colptr
-  // - mwIndex* rowind
-  // - {mwIndex m, mwIndex k} (shape)
-  // - mwIndex nnzA
-  spblas::csc_view<double> A(static_cast<double*>(mxGetData(prhs[0])),
-                     mxGetJc(prhs[0]), mxGetIr(prhs[0]), {m, k}, nnzA);
-
-  // Fill csc_view for B with:
-  // - double* values
-  // - mwIndex* colptr
-  // - mwIndex* rowind
-  // - {mwIndex k, mwIndex n} (shape)
-  // - mwIndex nnzB
-  spblas::csc_view<double> B(static_cast<double*>(mxGetData(prhs[1])),
-                     mxGetJc(prhs[1]), mxGetIr(prhs[1]), {k, n}, nnzB);
-
-  // Placeholder csc_view for C with:
-  // - {mwIndex m, mwIndex n} (shape)
-  spblas::csc_view<double> C(nullptr, nullptr, nullptr, {m, n}, 0);
-
-  // C = A * B - Compute stage
-  auto info = multiply_compute(A, B, C);
-
-  // Allocate mxArray for output C and pre-computed nnzC
-  mwIndex nnzC = info.result_nnz();
-  plhs[0] = mxCreateSparse(m, n, nnzC, mxCOMPLEX);
-  
-  // Update csc_view for C with values, colptr, and rowind of plhs[0],
-  // using spans to ensure correct length of the arrays
-  std::span<double> C_values(static_cast<double*>(mxGetData(plhs[0])), nnzC);
-  std::span<mwIndex> C_colptr(mxGetJc(plhs[0]), n+1);  
-  std::span<mwIndex> C_rowind(mxGetIr(plhs[0]), nnzC);  
-  C.update(C_values, C_colptr, C_rowind);
-    
-  // C = A * B - Fill stage
-  multiply_fill(info, A, B, C);
-}
-
-// Compile from within MATLAB via:
-// mex simple_spgemm_mex.cpp -R2018a -I{PATH_TO_SparseBLAS_INCLUDE} 'CXXFLAGS=$CFLAGS -fobjc-arc -stdlib=libc++ -std=c++20'
-//
-// Add '-g' to build in Debug mode if needed (activates asserts)
diff --git a/examples/MATLAB_MEX/simple_spmm_mex.cpp b/examples/MATLAB_MEX/simple_spmm_mex.cpp
index 7369759..79d419f 100644
--- a/examples/MATLAB_MEX/simple_spmm_mex.cpp
+++ b/examples/MATLAB_MEX/simple_spmm_mex.cpp
@@ -10,7 +10,7 @@
 
 template <typename T>
 void spmmDriver(mxArray* mxY, const mxArray* mxA, const mxArray* mxX,
-                const mxArray* mxAlpha){
+                const mxArray* mxAlpha) {
   // Gather dimensions
   mwIndex m = mxGetM(mxA);
   mwIndex k = mxGetN(mxA);
@@ -35,7 +35,7 @@ void spmmDriver(mxArray* mxY, const mxArray* mxA, const mxArray* mxX,
   // Store and apply scaling factor alpha, if provided and not empty
   T alpha = T(1);
   if (mxAlpha != nullptr && !mxIsEmpty(mxAlpha)) {
-    // We don't use mxGetScalar as it doesn't work for complex 
+    // We don't use mxGetScalar as it doesn't work for complex
     alpha = *(static_cast<T*>(mxGetData(mxAlpha)));
   }
   auto alpha_A = spblas::scaled(alpha, A);
@@ -47,8 +47,7 @@ void spmmDriver(mxArray* mxY, const mxArray* mxA, const mxArray* mxX,
 // Y = (alpha *) A * X
 // prhs[0] = A, prhs[1] = X (optional: prhs[2] = alpha)
 // plhs[0] = Y
-void mexFunction(int nlhs, mxArray *plhs[],
-                 int nrhs, const mxArray *prhs[]) {
+void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {
 
   // General input checking
   if (nrhs < 2 || nrhs > 3) {
@@ -59,9 +58,9 @@ void mexFunction(int nlhs, mxArray *plhs[],
     mexErrMsgIdAndTxt("SparseBLAS_Mex:WrongNumberOfOutputs",
                       "Function returns only 1 output.");
   }
-  if (mxGetClassID(prhs[0]) != mxGetClassID(prhs[1])|| 
+  if (mxGetClassID(prhs[0]) != mxGetClassID(prhs[1]) ||
       ((nrhs == 3) && mxGetClassID(prhs[1]) != mxGetClassID(prhs[2]))) {
-      mexErrMsgIdAndTxt("SparseBLAS_Mex:ClassMismatch",
+    mexErrMsgIdAndTxt("SparseBLAS_Mex:ClassMismatch",
                       "All inputs must have matching type.");
   }
   if (!mxIsDouble(prhs[0]) && !mxIsSingle(prhs[0])) {
@@ -72,18 +71,18 @@ void mexFunction(int nlhs, mxArray *plhs[],
     mexErrMsgIdAndTxt("SparseBLAS_Mex:FirstInputNotSparse",
                       "First input must be sparse.");
   }
-  if(mxIsSparse(prhs[1])) {
+  if (mxIsSparse(prhs[1])) {
     mexErrMsgIdAndTxt("SparseBLAS_Mex:SecondInputNotDense",
                       "Second input must be dense.");
   }
 
   // Reference SparseBLAS can handle inputs with mixed complexity,
   // however, the vendor implementations need all inputs of the same
-  // complexity, hence, this example also insists on having matching 
+  // complexity, hence, this example also insists on having matching
   // complexity.
-  if(mxIsComplex(prhs[0]) != mxIsComplex(prhs[1]) || 
+  if (mxIsComplex(prhs[0]) != mxIsComplex(prhs[1]) ||
       ((nrhs == 3) && mxIsComplex(prhs[1]) != mxIsComplex(prhs[2]))) {
-      mexErrMsgIdAndTxt("SparseBLAS_Mex:ComplexityMismatch",
+    mexErrMsgIdAndTxt("SparseBLAS_Mex:ComplexityMismatch",
                       "All inputs must have matching complexity.");
   }
 
@@ -94,9 +93,9 @@ void mexFunction(int nlhs, mxArray *plhs[],
 
   // Check dimensions of second input
   if (mxGetM(prhs[1]) != k) {
-     mexErrMsgIdAndTxt("SparseBLAS_Mex:InnerDimWrong",
-                       "Second input must be an array with k rows, "
-                       "i.e., number of columns of first input.");
+    mexErrMsgIdAndTxt("SparseBLAS_Mex:InnerDimWrong",
+                      "Second input must be an array with k rows, "
+                      "i.e., number of columns of first input.");
   }
 
   // Calculate in complex (we check for matching complexity above)
@@ -128,6 +127,7 @@ void mexFunction(int nlhs, mxArray *plhs[],
 }
 
 // Compile from within MATLAB via:
-// mex simple_spmm_mex.cpp -R2018a -I{PATH_TO_SparseBLAS_INCLUDE} 'CXXFLAGS=$CFLAGS -std=c++20'
+// mex simple_spmm_mex.cpp -R2018a -I{PATH_TO_SparseBLAS_INCLUDE}
+// 'CXXFLAGS=$CFLAGS -std=c++20'
 //
 // Add '-g' to build in Debug mode if needed (activates asserts)
diff --git a/examples/MATLAB_MEX/simple_spmm_mex.m b/examples/MATLAB_MEX/simple_spmm_mex.m
index dc09a4c..1d1ae44 100644
--- a/examples/MATLAB_MEX/simple_spmm_mex.m
+++ b/examples/MATLAB_MEX/simple_spmm_mex.m
@@ -1,18 +1,19 @@
 % simple_spmm_mex - Sparse matrix times dense matrix multiplication
 %  simple_smpm_mex.c - example in MATLAB External Interfaces
-% 
+%
 %  Multiplies a (potentially scaled by a scalar alpha) sparse MxK matrix
 %  with a dense KxN matrix and outputs a dense MxN matrix:
 %       Y = A * X  or  Y = alpha * A * X
-% 
+%
 %  The calling syntaxes are:
 % 		Y = simple_smpv_mex(A, X)
 % 		Y = simple_smpv_mex(A, X, alpha)
-% 
+%
 %  The following restrictions apply:
 %    * A must be sparse
 %    * X must be dense
 %    * Number of columns in A and rows in X must match
 %    * All inputs must have the same data type and complexity
 %
-%  This is a MEX-file for MATLAB.
\ No newline at end of file
+%  This is a MEX-file for MATLAB.
+
diff --git a/examples/MATLAB_MEX/simple_spmv_mex.cpp b/examples/MATLAB_MEX/simple_spmv_mex.cpp
index 840af67..0b64a2c 100644
--- a/examples/MATLAB_MEX/simple_spmv_mex.cpp
+++ b/examples/MATLAB_MEX/simple_spmv_mex.cpp
@@ -10,7 +10,7 @@
 
 template <typename T>
 void spmvDriver(mxArray* mxY, const mxArray* mxA, const mxArray* mxX,
-                const mxArray* mxAlpha){
+                const mxArray* mxAlpha) {
 
   // Gather dimensions
   mwIndex m = mxGetM(mxA);
@@ -34,7 +34,7 @@ void spmvDriver(mxArray* mxY, const mxArray* mxA, const mxArray* mxX,
   // Store and apply scaling factor alpha, if provided and not empty
   T alpha = T(1);
   if (mxAlpha != nullptr && !mxIsEmpty(mxAlpha)) {
-    // We don't use mxGetScalar as it doesn't work for complex 
+    // We don't use mxGetScalar as it doesn't work for complex
     alpha = *(static_cast<T*>(mxGetData(mxAlpha)));
   }
   auto alpha_A = spblas::scaled(alpha, A);
@@ -46,8 +46,7 @@ void spmvDriver(mxArray* mxY, const mxArray* mxA, const mxArray* mxX,
 // y = (alpha *) A * x
 // prhs[0] = A, prhs[1] = x (optional: prhs[2] = alpha)
 // plhs[0] = y
-void mexFunction(int nlhs, mxArray *plhs[],
-                 int nrhs, const mxArray *prhs[]) {
+void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {
 
   // General input checking
   if (nrhs < 2 || nrhs > 3) {
@@ -58,7 +57,7 @@ void mexFunction(int nlhs, mxArray *plhs[],
     mexErrMsgIdAndTxt("SparseBLAS_Mex:WrongNumberOfOutputs",
                       "Function returns only 1 output.");
   }
-  if (mxGetClassID(prhs[0]) != mxGetClassID(prhs[1])|| 
+  if (mxGetClassID(prhs[0]) != mxGetClassID(prhs[1]) ||
       ((nrhs == 3) && mxGetClassID(prhs[1]) != mxGetClassID(prhs[2]))) {
     mexErrMsgIdAndTxt("SparseBLAS_Mex:ClassMismatch",
                       "All inputs must have matching type.");
@@ -71,20 +70,20 @@ void mexFunction(int nlhs, mxArray *plhs[],
     mexErrMsgIdAndTxt("SparseBLAS_Mex:NonFloat",
                       "All inputs must be single or double.");
   }
-  if(!mxIsSparse(prhs[0])) {
+  if (!mxIsSparse(prhs[0])) {
     mexErrMsgIdAndTxt("SparseBLAS_Mex:FirstInputNotSparse",
                       "First input must be sparse.");
   }
-  if(mxIsSparse(prhs[1])) {
+  if (mxIsSparse(prhs[1])) {
     mexErrMsgIdAndTxt("SparseBLAS_Mex:SecondInputNotDense",
                       "Second input must be dense.");
   }
 
   // Reference SparseBLAS can handle inputs with mixed complexity,
   // however, the vendor implementations need all inputs of the same
-  // complexity, hence, this example also insists on having matching 
+  // complexity, hence, this example also insists on having matching
   // complexity.
-  if(mxIsComplex(prhs[0]) != mxIsComplex(prhs[1]) || 
+  if (mxIsComplex(prhs[0]) != mxIsComplex(prhs[1]) ||
       ((nrhs == 3) && mxIsComplex(prhs[1]) != mxIsComplex(prhs[2]))) {
     mexErrMsgIdAndTxt("SparseBLAS_Mex:ComplexityMismatch",
                       "All inputs must have matching complexity.");
@@ -95,9 +94,9 @@ void mexFunction(int nlhs, mxArray *plhs[],
   mwIndex n = mxGetN(prhs[0]);
   // Check dimensions of second input
   if ((mxGetM(prhs[1]) != n) && (mxGetN(prhs[1]) != 1)) {
-     mexErrMsgIdAndTxt("SparseBLAS_Mex:InnerDimWrong",
-                       "Second input must be column vector of length n, "
-                       "i.e., number of columns of first input.");
+    mexErrMsgIdAndTxt("SparseBLAS_Mex:InnerDimWrong",
+                      "Second input must be column vector of length n, "
+                      "i.e., number of columns of first input.");
   }
 
   // Calculate in complex (we check for matching complexity above)
@@ -129,6 +128,7 @@ void mexFunction(int nlhs, mxArray *plhs[],
 }
 
 // Compile from within MATLAB via:
-// mex simple_spmv_mex.cpp -R2018a -I{PATH_TO_SparseBLAS_INCLUDE} 'CXXFLAGS=$CFLAGS -std=c++20'
+// mex simple_spmv_mex.cpp -R2018a -I{PATH_TO_SparseBLAS_INCLUDE}
+// 'CXXFLAGS=$CFLAGS -std=c++20'
 //
 // Add '-g' to build in Debug mode if needed (activates asserts)
diff --git a/examples/MATLAB_MEX/simple_spmv_mex.m b/examples/MATLAB_MEX/simple_spmv_mex.m
index 31a4f3c..09e2196 100644
--- a/examples/MATLAB_MEX/simple_spmv_mex.m
+++ b/examples/MATLAB_MEX/simple_spmv_mex.m
@@ -1,18 +1,18 @@
 % simple_spmv_mex - Sparse matrix times dense vector multiplication
 %  simple_smpv_mex.c - example in MATLAB External Interfaces
-% 
+%
 %  Multiplies a (potentially scaled by a scalar alpha) sparse MxN matrix
 %  with a dense Nx1 column vector and outputs a dense Mx1 column vector:
 %       y = A * x  or  y = alpha * A * x
-% 
+%
 %  The calling syntaxes are:
 % 		y = simple_smpv_mex(A, x)
 % 		y = simple_smpv_mex(A, x, alpha)
-% 
+%
 %  The following restrictions apply:
 %    * A must be sparse
 %    * x must be dense column vector
 %    * Number of columns in A and rows in x must match
 %    * All inputs must have the same data type and complexity
 %
-%  This is a MEX-file for MATLAB.
\ No newline at end of file
+%  This is a MEX-file for MATLAB.
diff --git a/examples/MATLAB_MEX/simple_sptrsv_mex.cpp b/examples/MATLAB_MEX/simple_sptrsv_mex.cpp
deleted file mode 100755
index d51e9b1..0000000
--- a/examples/MATLAB_MEX/simple_sptrsv_mex.cpp
+++ /dev/null
@@ -1,152 +0,0 @@
-// Includes from SparseBLAS
-#include <spblas/spblas.hpp>
-
-// Includes for MEX
-#include <matrix.h>
-#include <mex.h>
-
-// General includes
-#include <complex> // Support complex inputs
-#include <string>  // Parse uplo input
-
-template <typename T>
-void simple_sptrsv_mex(mwIndex m, mwIndex n, mwIndex nnz, mxArray * x_out,
-                     int nrhs, const mxArray *prhs[]){
-  using namespace spblas;
-
-  // Fill csc_view with:
-  // - T* values
-  // - mwIndex* colptr
-  // - mwIndex* rowind
-  // - {mwIndex m, mwIndex n} (shape)
-  // - mwIndex nnz
-  csr_view<T> A(static_cast<T*>(mxGetData(prhs[0])), mxGetJc(prhs[0]),
-                mxGetIr(prhs[0]), {m, n}, nnz);
-
-  csr_view<T> B;
-
-    
-  // Wrap b in a span of length n
-  std::span<T> b(static_cast<T*>(mxGetData(prhs[1])), n);
-
-  // Wrap output y in a span of length m
-  std::span<T> x(static_cast<T*>(mxGetData(x_out)), m);
-
-  bool useUpper = true;
-  if (nrhs>2) {
-    char uplo_in[5];
-    int info = mxGetString(prhs[2], uplo_in, 6); 
-    if (!strcmp(uplo_in, "lower")) {
-      useUpper = false;
-    } else {
-      if (strcmp(uplo_in, "upper")) {
-        mexErrMsgIdAndTxt("SparseBLAS_Mex:WrongUplo",
-                          "3rd input must be ""upper"" or ""lower"".");
-      }
-    } 
-  }
-
-  // Store and apply scaling factor alpha, if provided
-  T alpha = 1.0;
-  if (nrhs==4) {
-    alpha = static_cast<T>(mxGetScalar(prhs[2]));
-  }
-  auto alpha_b= scaled(alpha, b);
-
-  // solve for x:  uplo(A) * x = alpha * b
-  // triangular_solve(A, spblas::lower_triangle_t{},
-  //                  spblas::explicit_diagonal_t{}, alpha_b, x);
-  if (useUpper) {
-    triangular_solve(A, spblas::upper_triangle_t{},
-                     spblas::explicit_diagonal_t{}, alpha_b, x);
-  } else {
-
-    triangular_solve(A, spblas::lower_triangle_t{},
-                     spblas::explicit_diagonal_t{}, alpha_b, x);
-  }
-}
-
-// y = (alpha *) A * x
-// prhs[0] = A, prhs[1] = x (optional: prhs[2] = alpha)
-// plhs[0] = y
-void mexFunction(int nlhs, mxArray *plhs[],
-                 int nrhs, const mxArray *prhs[]) {
-
-  // General input checking
-  if (nrhs<2 && nrhs>3) {
-    mexErrMsgIdAndTxt("SparseBLAS_Mex:WrongNumberOfInputs",
-                      "Function needs 2 or 3 inputs.");
-  }
-  if (nlhs>1) {
-    mexErrMsgIdAndTxt("SparseBLAS_Mex:WrongNumberOfOutputs",
-                      "Function returns only 1 output.");
-  }
-  if (mxGetClassID(prhs[0]) != mxGetClassID(prhs[1])) {
-    mexErrMsgIdAndTxt("SparseBLAS_Mex:ClassMissmatch",
-                      "First and second input must have matching type.");
-  }
-  if(!mxIsSparse(prhs[0])) {
-    mexErrMsgIdAndTxt("SparseBLAS_Mex:FirstInputNotSparse",
-                      "First input must be sparse.");
-  }
-  if(mxIsSparse(prhs[1])) {
-    mexErrMsgIdAndTxt("SparseBLAS_Mex:SecondInputNotDense",
-                      "Second input must be dense.");
-  }
-
-  // Reference SparseBLAS can handle inputs with mixed complexity,
-  // however, the vendor implementations need all inputs of the same
-  // complexity, hence, this example also insists on having matching 
-  // complexity.
-  if(mxIsComplex(prhs[0]) != mxIsComplex(prhs[1])) {
-    mexErrMsgIdAndTxt("SparseBLAS_Mex:ComplexityMissmatch",
-                      "First and second input must have "
-                      "matching complexity.");
-  }
-
-  // Gather dimensions
-  mwIndex m = mxGetM(prhs[0]);
-  mwIndex n = mxGetN(prhs[0]);
-  mwIndex nnz = mxGetNzmax(prhs[0]);
-
-  // Check dimensions of second input
-  if ((mxGetM(prhs[1]) != n) && (mxGetN(prhs[1]) != 1)) {
-     mexErrMsgIdAndTxt("SparseBLAS_Mex:InnerDimWrong",
-                       "Second input must be column vector of length n, "
-                       "i.e., number of columns of first input.");
-  }
-
-  // Set output size
-  mwIndex dims[2] = {m, 1};
-
-  // Calculate in complex (assume matching complexity for both inputs)
-  bool isCmplx = mxIsComplex(prhs[0]);
-
-  // Type dispatch for double, single, or logical
-  if (mxIsDouble(prhs[0])) {
-    if (isCmplx) {
-      plhs[0] = mxCreateNumericArray(2, dims, mxDOUBLE_CLASS, mxCOMPLEX);
-      simple_sptrsv_mex<std::complex<double>>(m, n, nnz, plhs[0], nrhs, prhs);
-    } else {
-      plhs[0] = mxCreateNumericArray(2, dims, mxDOUBLE_CLASS, mxREAL);
-      simple_sptrsv_mex<double>(m, n, nnz, plhs[0], nrhs, prhs);
-    }
-  } else if (mxIsSingle(prhs[0])) {
-    if (isCmplx) {
-      plhs[0] = mxCreateNumericArray(2, dims, mxSINGLE_CLASS, mxCOMPLEX);
-      simple_sptrsv_mex<std::complex<float>>(m, n, nnz, plhs[0], nrhs, prhs);
-    } else {
-      plhs[0] = mxCreateNumericArray(2, dims, mxSINGLE_CLASS, mxREAL);
-      simple_sptrsv_mex<float>(m, n, nnz, plhs[0], nrhs, prhs);
-    }
-  } else {
-    mxAssert(mxIsLogical(prhs[0]), "Invalid data type");
-    plhs[0] = mxCreateNumericArray(2, dims, mxLOGICAL_CLASS, mxREAL);
-    simple_sptrsv_mex<bool>(m, n, nnz, plhs[0], nrhs, prhs);
-  }
-}
-
-// Compile from within MATLAB via:
-// mex simple_spmv_mex.cpp -R2018a -I{PATH_TO_SparseBLAS_INCLUDE} 'CXXFLAGS=$CFLAGS -fobjc-arc -stdlib=libc++ -std=c++20'
-//
-// Add '-g' to build in Debug mode if needed (activates asserts)
\ No newline at end of file

From cb4031680db5d40f4eb38dbe744277c000ce9444 Mon Sep 17 00:00:00 2001
From: hweichel <hweichel@mathworks.com>
Date: Wed, 25 Mar 2026 16:57:01 -0400
Subject: [PATCH 15/16] fix whitespace stuff

---
 examples/MATLAB_MEX/build_mex_APIs.m        | 2 +-
 examples/MATLAB_MEX/simple_spmm_mex.cpp     | 2 +-
 examples/MATLAB_MEX/simple_spmm_mex.m       | 1 -
 examples/MATLAB_MEX/testSparseBLASMexAPIs.m | 2 +-
 4 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/examples/MATLAB_MEX/build_mex_APIs.m b/examples/MATLAB_MEX/build_mex_APIs.m
index a1f2d38..0d611b7 100644
--- a/examples/MATLAB_MEX/build_mex_APIs.m
+++ b/examples/MATLAB_MEX/build_mex_APIs.m
@@ -9,7 +9,7 @@ function build_mex_APIs(incl_path, verbose, debug)
 % 		build_mex_APIs("PATH_TO_SparseBLAS_INCLUDE")
 % 		build_mex_APIs("PATH_TO_SparseBLAS_INCLUDE", true)
 % 		build_mex_APIs("PATH_TO_SparseBLAS_INCLUDE", false, true)
-    
+
 % Set default options
 opts = {['-I' char(incl_path)], "-O", "-R2018a", "CXXFLAGS=$CFLAGS -std=c++23"};
 
diff --git a/examples/MATLAB_MEX/simple_spmm_mex.cpp b/examples/MATLAB_MEX/simple_spmm_mex.cpp
index 79d419f..dcc722b 100644
--- a/examples/MATLAB_MEX/simple_spmm_mex.cpp
+++ b/examples/MATLAB_MEX/simple_spmm_mex.cpp
@@ -67,7 +67,7 @@ void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {
     mexErrMsgIdAndTxt("SparseBLAS_Mex:NonFloat",
                       "All inputs must be single or double.");
   }
-  if(!mxIsSparse(prhs[0])) {
+  if (!mxIsSparse(prhs[0])) {
     mexErrMsgIdAndTxt("SparseBLAS_Mex:FirstInputNotSparse",
                       "First input must be sparse.");
   }
diff --git a/examples/MATLAB_MEX/simple_spmm_mex.m b/examples/MATLAB_MEX/simple_spmm_mex.m
index 1d1ae44..3a54059 100644
--- a/examples/MATLAB_MEX/simple_spmm_mex.m
+++ b/examples/MATLAB_MEX/simple_spmm_mex.m
@@ -16,4 +16,3 @@
 %    * All inputs must have the same data type and complexity
 %
 %  This is a MEX-file for MATLAB.
-
diff --git a/examples/MATLAB_MEX/testSparseBLASMexAPIs.m b/examples/MATLAB_MEX/testSparseBLASMexAPIs.m
index 8164cf6..4e44f6e 100644
--- a/examples/MATLAB_MEX/testSparseBLASMexAPIs.m
+++ b/examples/MATLAB_MEX/testSparseBLASMexAPIs.m
@@ -1,6 +1,6 @@
 classdef testSparseBLASMexAPIs < matlab.unittest.TestCase
 % testSparseBLASMexAPIs Tests for SparseBLAS MEX APIs
-    
+
    methods(TestMethodSetup)
         function initializeRNG(~)
             rng(0,'twister');

From 0a05156582fe47d0fd73a7398d21bb72ba864a8c Mon Sep 17 00:00:00 2001
From: hweichel <hweichel@mathworks.com>
Date: Wed, 25 Mar 2026 17:15:35 -0400
Subject: [PATCH 16/16] make CI for MATLAB a bit easier to digest

---
 .github/workflows/ci.yml | 59 ++++++++++++++++++----------------------
 1 file changed, 27 insertions(+), 32 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 3e3790b..b8425e2 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -8,38 +8,6 @@ on:
   pull_request:
 
 jobs:
-  mex_checks:
-    runs-on: 'ubuntu-latest'
-    steps:
-    - uses: actions/checkout@v4
-    - name: CMake
-      run: |
-        cmake -B build -D CMAKE_INSTALL_PREFIX=install
-    - name: Build
-      run: |
-        make -C build -j `nproc`
-    - name: Install
-      run: |
-        cmake --install build/.
-
-    - name: Set up MATLAB
-      uses: matlab-actions/setup-matlab@v2.5.0
-      with:
-        release: R2025b
-    - name: Build MEX APIs
-      uses: matlab-actions/run-command@v2.2.1
-      with:
-        command: |
-          cd("examples/MATLAB_MEX");
-          build_mex_APIs("../../install/include")
-    - name: Run MEX API Tests
-      uses: matlab-actions/run-command@v2.2.1
-      with:
-        command: |
-          cd("examples/MATLAB_MEX");
-          results = runtests("testSparseBLASMexAPIs");
-          assertSuccess(results);
-
   checks:
     runs-on: 'ubuntu-latest'
     steps:
@@ -126,6 +94,33 @@ jobs:
       run: |
         ./build/test/gtest/spblas-tests
 
+  MATLAB_mex_checks:
+    runs-on: 'ubuntu-latest'
+    steps:
+    - uses: actions/checkout@v4
+    - name: CMake Build and Install
+      run: |
+        cmake -B build -D CMAKE_INSTALL_PREFIX=install
+        make -C build -j `nproc`
+        cmake --install build/.
+    - name: Set up MATLAB
+      uses: matlab-actions/setup-matlab@v2.5.0
+      with:
+        release: R2025b
+    - name: Build MEX APIs
+      uses: matlab-actions/run-command@v2.2.1
+      with:
+        command: |
+          cd("examples/MATLAB_MEX");
+          build_mex_APIs("../../install/include")
+    - name: Run MEX API Tests
+      uses: matlab-actions/run-command@v2.2.1
+      with:
+        command: |
+          cd("examples/MATLAB_MEX");
+          results = runtests("testSparseBLASMexAPIs", "Verbosity", 0)
+          assertSuccess(results);
+
   aocl:
     runs-on: 'cpu_amd'
     steps: