Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
52997da
Add HNSW layered hierarchy
julianmi May 26, 2026
a11d9ee
Improve deserialization logging
julianmi May 27, 2026
a95b0e0
Use ace prefix in benchmarking consistently
julianmi May 27, 2026
47e2d85
Validate metadata before allocating
julianmi May 27, 2026
21ce339
Store layered base topology by original node ID
julianmi Jun 1, 2026
4514bc8
Unify the ACE logging format
julianmi Jun 1, 2026
0e9accd
Merge branch 'main' into hnsw-layered-index
julianmi Jun 3, 2026
7a761cf
Address review feedback
julianmi Jun 9, 2026
ea1a96c
Replace JSON header with binary header
julianmi Jun 9, 2026
153a82d
Merge branch 'main' into hnsw-layered-index
julianmi Jun 9, 2026
265206b
Merge branch 'main' into hnsw-layered-index
julianmi Jun 12, 2026
0e2d458
Merge branch 'main' into hnsw-layered-index
julianmi Jun 22, 2026
4513f6f
Merge branch 'main' into hnsw-layered-index
julianmi Jun 24, 2026
6d9b2c6
Address coderabbit comments
julianmi Jun 24, 2026
2bf44ea
Fix half type numpy conversions
julianmi Jun 24, 2026
002c373
Make numpy helpers private
julianmi Jun 24, 2026
1bed620
C deserialization should accept f2 numpy half type
julianmi Jun 24, 2026
99b1320
Merge branch 'main' of https://github.com/NVIDIA/cuvs into hnsw-layer…
julianmi Jun 24, 2026
a61b7cb
Merge branch 'main' into hnsw-layered-index
julianmi Jun 25, 2026
bb66010
Merge branch 'main' into hnsw-layered-index
julianmi Jun 29, 2026
d6e3ffa
Merge branch 'main' into hnsw-layered-index
julianmi Jun 30, 2026
8f067ed
Merge branch 'main' into hnsw-layered-index
julianmi Jul 1, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions c/src/neighbors/brute_force.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -250,10 +250,13 @@ extern "C" cuvsError_t cuvsBruteForceDeserialize(cuvsResources_t res,
index->dtype.bits = dtype.itemsize * 8;
if (dtype.kind == 'f' && dtype.itemsize == 4) {
index->dtype.code = kDLFloat;
index->addr = reinterpret_cast<uintptr_t>(_deserialize<float>(res, filename));
} else if (dtype.kind == 'e' && dtype.itemsize == 2) {
index->addr =
reinterpret_cast<uintptr_t>(_deserialize<float>(res, filename));
} else if ((dtype.kind == 'f' || dtype.kind == 'e') &&
dtype.itemsize == 2) {
index->dtype.code = kDLFloat;
index->addr = reinterpret_cast<uintptr_t>(_deserialize<half>(res, filename));
index->addr =
reinterpret_cast<uintptr_t>(_deserialize<half>(res, filename));
} else {
RAFT_FAIL("Unsupported index dtype: %d and bits: %d", index->dtype.code, index->dtype.bits);
}
Expand Down
9 changes: 6 additions & 3 deletions c/src/neighbors/cagra.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -884,10 +884,13 @@ extern "C" cuvsError_t cuvsCagraDeserialize(cuvsResources_t res,

index->dtype.bits = dtype.itemsize * 8;
if (dtype.kind == 'f' && dtype.itemsize == 4) {
index->addr = reinterpret_cast<uintptr_t>(_deserialize<float>(res, filename));
index->addr =
reinterpret_cast<uintptr_t>(_deserialize<float>(res, filename));
index->dtype.code = kDLFloat;
} else if (dtype.kind == 'e' && dtype.itemsize == 2) {
index->addr = reinterpret_cast<uintptr_t>(_deserialize<half>(res, filename));
} else if ((dtype.kind == 'f' || dtype.kind == 'e') &&
dtype.itemsize == 2) {
index->addr =
reinterpret_cast<uintptr_t>(_deserialize<half>(res, filename));
index->dtype.code = kDLFloat;
} else if (dtype.kind == 'i' && dtype.itemsize == 1) {
index->addr = reinterpret_cast<uintptr_t>(_deserialize<int8_t>(res, filename));
Expand Down
9 changes: 6 additions & 3 deletions c/src/neighbors/ivf_flat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -310,10 +310,13 @@ extern "C" cuvsError_t cuvsIvfFlatDeserialize(cuvsResources_t res,

index->dtype.bits = dtype.itemsize * 8;
if (dtype.kind == 'f' && dtype.itemsize == 4) {
index->addr = reinterpret_cast<uintptr_t>(_deserialize<float, int64_t>(res, filename));
index->addr = reinterpret_cast<uintptr_t>(
_deserialize<float, int64_t>(res, filename));
index->dtype.code = kDLFloat;
} else if (dtype.kind == 'e' && dtype.itemsize == 2) {
index->addr = reinterpret_cast<uintptr_t>(_deserialize<half, int64_t>(res, filename));
} else if ((dtype.kind == 'f' || dtype.kind == 'e') &&
dtype.itemsize == 2) {
index->addr = reinterpret_cast<uintptr_t>(
_deserialize<half, int64_t>(res, filename));
index->dtype.code = kDLFloat;
index->dtype.bits = 16;
} else if (dtype.kind == 'i' && dtype.itemsize == 1) {
Expand Down
18 changes: 12 additions & 6 deletions c/src/neighbors/mg_cagra.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -411,10 +411,13 @@ extern "C" cuvsError_t cuvsMultiGpuCagraDeserialize(cuvsResources_t res,
index->dtype.bits = dtype.itemsize * 8;
if (dtype.kind == 'f' && dtype.itemsize == 4) {
index->dtype.code = kDLFloat;
index->addr = reinterpret_cast<uintptr_t>(_mg_deserialize<float>(res, filename));
} else if (dtype.kind == 'e' && dtype.itemsize == 2) {
index->addr =
reinterpret_cast<uintptr_t>(_mg_deserialize<float>(res, filename));
} else if ((dtype.kind == 'f' || dtype.kind == 'e') &&
dtype.itemsize == 2) {
index->dtype.code = kDLFloat;
index->addr = reinterpret_cast<uintptr_t>(_mg_deserialize<half>(res, filename));
index->addr =
reinterpret_cast<uintptr_t>(_mg_deserialize<half>(res, filename));
} else if (dtype.kind == 'i' && dtype.itemsize == 1) {
index->dtype.code = kDLInt;
index->addr = reinterpret_cast<uintptr_t>(_mg_deserialize<int8_t>(res, filename));
Expand Down Expand Up @@ -445,10 +448,13 @@ extern "C" cuvsError_t cuvsMultiGpuCagraDistribute(cuvsResources_t res,
index->dtype.bits = dtype.itemsize * 8;
if (dtype.kind == 'f' && dtype.itemsize == 4) {
index->dtype.code = kDLFloat;
index->addr = reinterpret_cast<uintptr_t>(_mg_distribute<float>(res, filename));
} else if (dtype.kind == 'e' && dtype.itemsize == 2) {
index->addr =
reinterpret_cast<uintptr_t>(_mg_distribute<float>(res, filename));
} else if ((dtype.kind == 'f' || dtype.kind == 'e') &&
dtype.itemsize == 2) {
index->dtype.code = kDLFloat;
index->addr = reinterpret_cast<uintptr_t>(_mg_distribute<half>(res, filename));
index->addr =
reinterpret_cast<uintptr_t>(_mg_distribute<half>(res, filename));
} else if (dtype.kind == 'i' && dtype.itemsize == 1) {
index->dtype.code = kDLInt;
index->addr = reinterpret_cast<uintptr_t>(_mg_distribute<int8_t>(res, filename));
Expand Down
18 changes: 12 additions & 6 deletions c/src/neighbors/mg_ivf_flat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -408,10 +408,13 @@ extern "C" cuvsError_t cuvsMultiGpuIvfFlatDeserialize(cuvsResources_t res,
index->dtype.bits = dtype.itemsize * 8;
if (dtype.kind == 'f' && dtype.itemsize == 4) {
index->dtype.code = kDLFloat;
index->addr = reinterpret_cast<uintptr_t>(_mg_deserialize<float>(res, filename));
} else if (dtype.kind == 'e' && dtype.itemsize == 2) {
index->addr =
reinterpret_cast<uintptr_t>(_mg_deserialize<float>(res, filename));
} else if ((dtype.kind == 'f' || dtype.kind == 'e') &&
dtype.itemsize == 2) {
index->dtype.code = kDLFloat;
index->addr = reinterpret_cast<uintptr_t>(_mg_deserialize<half>(res, filename));
index->addr =
reinterpret_cast<uintptr_t>(_mg_deserialize<half>(res, filename));
} else if (dtype.kind == 'i' && dtype.itemsize == 1) {
index->dtype.code = kDLInt;
index->addr = reinterpret_cast<uintptr_t>(_mg_deserialize<int8_t>(res, filename));
Expand Down Expand Up @@ -442,10 +445,13 @@ extern "C" cuvsError_t cuvsMultiGpuIvfFlatDistribute(cuvsResources_t res,
index->dtype.bits = dtype.itemsize * 8;
if (dtype.kind == 'f' && dtype.itemsize == 4) {
index->dtype.code = kDLFloat;
index->addr = reinterpret_cast<uintptr_t>(_mg_distribute<float>(res, filename));
} else if (dtype.kind == 'e' && dtype.itemsize == 2) {
index->addr =
reinterpret_cast<uintptr_t>(_mg_distribute<float>(res, filename));
} else if ((dtype.kind == 'f' || dtype.kind == 'e') &&
dtype.itemsize == 2) {
index->dtype.code = kDLFloat;
index->addr = reinterpret_cast<uintptr_t>(_mg_distribute<half>(res, filename));
index->addr =
reinterpret_cast<uintptr_t>(_mg_distribute<half>(res, filename));
} else if (dtype.kind == 'i' && dtype.itemsize == 1) {
index->dtype.code = kDLInt;
index->addr = reinterpret_cast<uintptr_t>(_mg_distribute<int8_t>(res, filename));
Expand Down
9 changes: 6 additions & 3 deletions c/src/neighbors/mg_ivf_pq.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -400,10 +400,13 @@ extern "C" cuvsError_t cuvsMultiGpuIvfPqDeserialize(cuvsResources_t res,
index->dtype.bits = dtype.itemsize * 8;
if (dtype.kind == 'f' && dtype.itemsize == 4) {
index->dtype.code = kDLFloat;
index->addr = reinterpret_cast<uintptr_t>(_mg_deserialize<float>(res, filename));
} else if (dtype.kind == 'e' && dtype.itemsize == 2) {
index->addr =
reinterpret_cast<uintptr_t>(_mg_deserialize<float>(res, filename));
} else if ((dtype.kind == 'f' || dtype.kind == 'e') &&
dtype.itemsize == 2) {
index->dtype.code = kDLFloat;
index->addr = reinterpret_cast<uintptr_t>(_mg_deserialize<half>(res, filename));
index->addr =
reinterpret_cast<uintptr_t>(_mg_deserialize<half>(res, filename));
} else if (dtype.kind == 'i' && dtype.itemsize == 1) {
index->dtype.code = kDLInt;
index->addr = reinterpret_cast<uintptr_t>(_mg_deserialize<int8_t>(res, filename));
Expand Down
24 changes: 23 additions & 1 deletion cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib.cu
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
*/

#include "../common/ann_types.hpp"
#include "../common/conf.hpp"
#include "cuvs_ann_bench_param_parser.h"
#include "cuvs_cagra_hnswlib_wrapper.h"

Expand All @@ -27,6 +28,8 @@ auto parse_build_param(const nlohmann::json& conf) ->
hnsw_params.hierarchy = cuvs::neighbors::hnsw::HnswHierarchy::CPU;
} else if (conf.at("hierarchy") == "gpu") {
hnsw_params.hierarchy = cuvs::neighbors::hnsw::HnswHierarchy::GPU;
} else if (conf.at("hierarchy") == "gpu_layered_on_disk") {
hnsw_params.hierarchy = cuvs::neighbors::hnsw::HnswHierarchy::GPU_LAYERED_ON_DISK;
} else {
THROW("Invalid value for hierarchy: %s", conf.at("hierarchy").get<std::string>().c_str());
}
Expand All @@ -36,12 +39,31 @@ auto parse_build_param(const nlohmann::json& conf) ->
if (conf.contains("ef_construction")) {
hnsw_params.ef_construction = conf.at("ef_construction");
}
if (conf.contains("dataset_path")) {
hnsw_params.dataset_path = conf.at("dataset_path");
} else if (hnsw_params.hierarchy == cuvs::neighbors::hnsw::HnswHierarchy::GPU_LAYERED_ON_DISK) {
hnsw_params.dataset_path = configuration::singleton().get_dataset_conf().base_file;
}
if (conf.contains("num_threads")) { hnsw_params.num_threads = conf.at("num_threads"); }

// Reuse the CAGRA wrapper params parser
::parse_build_param<T, IdxT>(conf, cagra_params);

if (conf.contains("M")) { hnsw_params.M = conf.at("M"); }

// ACE / GPU_LAYERED_ON_DISK builds can be fine-tuned from the benchmark config. The library
// auto-selects the build algorithm from `M` and `ef_construction`; here we only forward the
// explicit ACE overrides (if any) onto the new hnsw index params.
auto ace_conf = collect_conf_with_prefix(conf, "ace_");
if (!ace_conf.empty()) {
auto ace_params = cuvs::neighbors::hnsw::graph_build_params::ace_params();
if (ace_conf.contains("npartitions")) { ace_params.npartitions = ace_conf.at("npartitions"); }
if (ace_conf.contains("build_dir")) { ace_params.build_dir = ace_conf.at("build_dir"); }
if (ace_conf.contains("ef_construction")) {
ace_params.ef_construction = ace_conf.at("ef_construction");
}
if (ace_conf.contains("use_disk")) { ace_params.use_disk = ace_conf.at("use_disk"); }
hnsw_params.graph_build_params = ace_params;
}
return param;
}

Expand Down
42 changes: 36 additions & 6 deletions cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib_wrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,33 @@
#include <raft/core/logger.hpp>

#include <chrono>
#include <filesystem>
#include <memory>

namespace cuvs::bench {

inline void move_file_overwrite(const std::filesystem::path& src, const std::filesystem::path& dst)
{
std::error_code ec;
if (src == dst ||
(std::filesystem::exists(dst, ec) && std::filesystem::equivalent(src, dst, ec))) {
return;
}
if (!dst.parent_path().empty()) { std::filesystem::create_directories(dst.parent_path()); }
if (std::filesystem::exists(dst, ec)) { std::filesystem::remove(dst, ec); }

std::filesystem::rename(src, dst, ec);
if (ec) {
// Rename fails across filesystems. Fall back to copy followed by removal of the source.
ec.clear();
std::filesystem::copy_file(src, dst, std::filesystem::copy_options::overwrite_existing, ec);
const auto src_str = src.string();
const auto dst_str = dst.string();
RAFT_EXPECTS(!ec, "Failed to move '%s' to '%s'.", src_str.c_str(), dst_str.c_str());
std::filesystem::remove(src, ec);
}
}

template <typename T, typename IdxT>
class cuvs_cagra_hnswlib : public algo<T>, public algo_gpu {
public:
Expand Down Expand Up @@ -101,18 +124,25 @@ void cuvs_cagra_hnswlib<T, IdxT>::set_search_param(const search_param_base& para
template <typename T, typename IdxT>
void cuvs_cagra_hnswlib<T, IdxT>::save(const std::string& file) const
{
if (build_param_.hnsw_index_params.hierarchy ==
cuvs::neighbors::hnsw::HnswHierarchy::GPU_LAYERED_ON_DISK) {
const auto src_artifact = std::filesystem::path(hnsw_index_->file_path());
RAFT_EXPECTS(!src_artifact.empty(), "Layered HNSW artifact path is not available.");
RAFT_EXPECTS(std::filesystem::exists(src_artifact),
"Layered HNSW artifact '%s' does not exist.",
src_artifact.c_str());

move_file_overwrite(src_artifact, std::filesystem::path(file));
return;
}

if (cagra_ace_build_) {
std::string index_filename = hnsw_index_->file_path();
RAFT_EXPECTS(!index_filename.empty(), "HNSW index file path is not available.");
RAFT_EXPECTS(std::filesystem::exists(index_filename),
"Index file '%s' does not exist.",
index_filename.c_str());
if (std::filesystem::exists(file)) { std::filesystem::remove(file); }
// might fail when using 2 different filesystems
std::error_code ec;
std::filesystem::rename(index_filename, file, ec);
RAFT_EXPECTS(
!ec, "Failed to rename index file '%s' to '%s'.", index_filename.c_str(), file.c_str());
move_file_overwrite(std::filesystem::path(index_filename), std::filesystem::path(file));
} else {
cuvs::neighbors::hnsw::serialize(handle_, file, *(hnsw_index_.get()));
}
Expand Down
18 changes: 15 additions & 3 deletions cpp/include/cuvs/neighbors/hnsw.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <cstdint>
#include <cuvs/core/export.hpp>
#include <memory>
#include <string>
#include <type_traits>
#include <variant>

Expand All @@ -41,9 +42,10 @@ namespace graph_build_params = cuvs::neighbors::graph_build_params;
* NOTE: When the value is `NONE`, the HNSW index is built as a base-layer-only index.
*/
enum class HnswHierarchy {
NONE, // base-layer-only index
CPU, // full index with CPU-built hierarchy
GPU // full index with GPU-built hierarchy
NONE, // base-layer-only index
CPU, // full index with CPU-built hierarchy
GPU, // full index with GPU-built hierarchy
GPU_LAYERED_ON_DISK // GPU-built hierarchy stored as layered on-disk topology
};

struct index_params : cuvs::neighbors::index_params {
Expand All @@ -64,6 +66,16 @@ struct index_params : cuvs::neighbors::index_params {
*/
size_t M = 32;

/** Local dataset path used by layered HNSW deserialization.
*
* When `hierarchy == HnswHierarchy::GPU_LAYERED_ON_DISK`, the index artifact stores graph
* topology only. `deserialize` loads vectors from this local dataset path to reconstruct an
* in-memory HNSW index.
* Currently supported local dataset formats are `.npy` and ANN benchmark `*.bin` files with a
* `[uint32 rows, uint32 cols]` header.
*/
std::string dataset_path;

/** Parameters to fine tune GPU graph building. By default we select the parameters based on
* dataset shape and HNSW build parameters. You can override these parameters to fine tune the
* graph building process as described in the CAGRA build docs.
Expand Down
13 changes: 4 additions & 9 deletions cpp/include/cuvs/util/file_io.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
#include <raft/core/numpy_serializer.hpp>
#include <raft/core/serialize.hpp>

#include <cuvs/util/numpy_dtype.hpp>

#include <algorithm>
#include <cstring>
#include <istream>
Expand Down Expand Up @@ -189,15 +191,8 @@ std::pair<file_descriptor, size_t> create_numpy_file(const std::string& path,
// Open file
file_descriptor fd(path, O_CREAT | O_RDWR | O_TRUNC, 0644);

// Build header
const auto dtype = raft::numpy_serializer::get_numpy_dtype<T>();
const bool fortran_order = false;
const raft::numpy_serializer::header_t header = {dtype, fortran_order, shape};

std::stringstream ss;
raft::numpy_serializer::write_header(ss, header);
std::string header_str = ss.str();
size_t header_size = header_str.size();
const std::string header_str = detail::make_numpy_header_string<T>(shape);
size_t header_size = header_str.size();

// Calculate data size from shape
size_t data_bytes = sizeof(T);
Expand Down
Loading
Loading