diff --git a/benchmark/run_benchmarks_ci.sh b/benchmark/run_benchmarks_ci.sh index 49fdc38ee5..1bcce8009f 100755 --- a/benchmark/run_benchmarks_ci.sh +++ b/benchmark/run_benchmarks_ci.sh @@ -22,7 +22,7 @@ pushd "${PROJECT_DIR}" > /dev/null # Run benchmarks message "Running benchmarks" -cargo bench --workspace --features libdd-crashtracker/benchmarking,libdd-sampling/v04_span,libdd-sampling/bench-internals -- --warm-up-time 1 --measurement-time 5 --sample-size=200 +cargo bench --workspace --features libdd-crashtracker/benchmarking,libdd-sampling/v04_span,libdd-sampling/bench-internals,libdd-trace-utils/bench-internals -- --warm-up-time 1 --measurement-time 5 --sample-size=200 message "Finished running benchmarks" # Copy the benchmark results to the output directory diff --git a/libdd-trace-utils/Cargo.toml b/libdd-trace-utils/Cargo.toml index 492c0c59c5..3ed385e49f 100644 --- a/libdd-trace-utils/Cargo.toml +++ b/libdd-trace-utils/Cargo.toml @@ -17,6 +17,12 @@ name = "main" harness = false path = "benches/main.rs" +[[bench]] +name = "vec_map_bench" +harness = false +path = "benches/vec_map_bench.rs" +required-features = ["bench-internals"] + [dependencies] anyhow = "1.0" base64 = "0.22" @@ -88,6 +94,10 @@ test-utils = [ "urlencoding", ] change-buffer = [] +# Opt-in switch for crate-internal microbenchmarks (e.g. `vec_map_bench`). Mirrors the +# `bench-internals` feature in `libdd-sampling`; the bench targets it via `required-features` so +# they are not built by default. Not intended for downstream consumers. +bench-internals = [] compression = ["zstd", "flate2"] # FIPS mode uses the FIPS-compliant cryptographic provider (Unix only) fips = ["libdd-common/fips", "libdd-capabilities-impl/fips"] diff --git a/libdd-trace-utils/benches/vec_map_bench.rs b/libdd-trace-utils/benches/vec_map_bench.rs new file mode 100644 index 0000000000..6beccd6afe --- /dev/null +++ b/libdd-trace-utils/benches/vec_map_bench.rs @@ -0,0 +1,293 @@ +// Copyright 2026-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +//! Microbenchmarks for [`VecMap`], the linear-scan ordered map backing some of span's associative +//! maps. +//! +//! Keys are [`BytesString`] to match real span usage (`meta`/`metrics` are keyed by `BytesString`). +//! Map sizes span the typical range up to a large end (128). We expect the advantage of `VecMap` to +//! degrade with size and with duplicates rate. + +use criterion::{criterion_group, criterion_main, BatchSize, BenchmarkId, Criterion}; +use libdd_tinybytes::BytesString; +use libdd_trace_utils::span::vec_map::VecMap; +use std::hint::black_box; + +/// Representative map sizes: the small end is the common case (a span carries a handful of tags), +/// the larger end covers heavily-tagged spans. Deliberately bounded — `VecMap` is never expected +/// to hold thousands of entries. +const SIZES: &[usize] = &[8, 16, 64, 128]; + +/// A small set of prefixes resembling real span meta namespaces. Includes an empty prefix so that +/// not every key shares a common head — keys generated from different prefixes diverge on the very +/// first byte, which is the realistic mix the linear scan actually sees. +const PREFIXES: &[&str] = &["", "http.", "db.", "aws.", "_dd."]; + +/// Duplicate periods exercised by the dedup benches: a key is re-inserted (shadowed) every +/// `period`-th insert, so the duplicate rate is `1/period`. We cover 50% (2, unrealistic/worse +/// case), 25% (4) and 10% (10) to measure how dedup cost scales with duplicate rates. +const DUP_PERIODS: &[usize] = &[2, 4, 10]; + +/// Build a deterministic set of `BytesString` keys shaped like real span tag names. +fn keys(n: usize) -> Vec { + // Dotted names resembling real span meta keys (`http.method`, `db.statement`, ...). Generated + // deterministically. Prefixes are picked by modulo over a the `PREFIXES` set. + // The index is put first to simulate the fact that after the prefix, the identifiers are likely + // to be distinct. Doing the converse would add a longer common prefix. + (0..n) + .map(|i| { + let prefix = PREFIXES[i % PREFIXES.len()]; + BytesString::from_string(format!("{prefix}{i:03}-nth-key")) + }) + .collect() +} + +/// Build deterministic string values, sized like typical meta values. +fn values(n: usize) -> Vec { + (0..n) + .map(|i| BytesString::from_string(format!("value-{i:03}"))) + .collect() +} + +/// A pre-populated `meta`-shaped map (`BytesString -> BytesString`) with `n` unique keys. +fn populated_meta(n: usize) -> VecMap { + keys(n).into_iter().zip(values(n)).collect() +} + +/// A `metrics`-shaped map (`BytesString -> f64`) with `n` unique keys. +fn populated_metrics(n: usize) -> VecMap { + keys(n) + .into_iter() + .enumerate() + .map(|(i, k)| (k, i as f64)) + .collect() +} + +/// Insert: builds a fresh map of `n` entries from scratch (the construction path on the client's +/// hot path). `insert` mutates, so we rebuild the input each iteration with `iter_batched`. +fn bench_insert(c: &mut Criterion) { + let mut group = c.benchmark_group("vec_map/insert"); + + for &n in SIZES { + group.throughput(criterion::Throughput::Elements(n as u64)); + group.bench_with_input(BenchmarkId::from_parameter(n), &n, |b, &n| { + let ks = keys(n); + let vs = values(n); + b.iter_batched( + || (ks.clone(), vs.clone()), + |(ks, vs)| { + let mut map = VecMap::with_capacity(n); + for (k, v) in ks.into_iter().zip(vs) { + map.insert(black_box(k), black_box(v)); + } + map + }, + BatchSize::SmallInput, + ) + }); + } + group.finish(); +} + +/// Get (hit): looks up every present key once, reporting the average successful-lookup cost. +/// `get` returns the last match (scanning from the back), so this averages over scan distances. +fn bench_get_hit(c: &mut Criterion) { + let mut group = c.benchmark_group("vec_map/get_hit"); + + for &n in SIZES { + group.throughput(criterion::Throughput::Elements(n as u64)); + let map = populated_meta(n); + let lookups = keys(n); + group.bench_with_input(BenchmarkId::from_parameter(n), &n, |b, _| { + b.iter(|| { + for k in &lookups { + black_box(map.get(black_box(k.as_str()))); + } + }) + }); + } + group.finish(); +} + +/// Get (miss): worst case for a linear-scan map — a full scan that finds nothing. +fn bench_get_miss(c: &mut Criterion) { + let mut group = c.benchmark_group("vec_map/get_miss"); + + for &n in SIZES { + let map = populated_meta(n); + group.bench_with_input(BenchmarkId::from_parameter(n), &n, |b, _| { + b.iter(|| { + black_box(map.get(black_box("this.key.is.absent"))); + }) + }); + } + group.finish(); +} + +/// Get_mut (hit): mutable lookup of every key. +fn bench_get_mut(c: &mut Criterion) { + let mut group = c.benchmark_group("vec_map/get_mut"); + + for &n in SIZES { + group.throughput(criterion::Throughput::Elements(n as u64)); + let lookups = keys(n); + group.bench_with_input(BenchmarkId::from_parameter(n), &n, |b, _| { + b.iter_batched_ref( + || populated_metrics(n), + |map| { + for k in &lookups { + if let Some(v) = map.get_mut(black_box(k.as_str())) { + *v += 1.0; + } + } + }, + BatchSize::SmallInput, + ) + }); + } + group.finish(); +} + +/// Contains_key: full scan (`any`), checking every present key plus one absent key. +fn bench_contains_key(c: &mut Criterion) { + let mut group = c.benchmark_group("vec_map/contains_key"); + + for &n in SIZES { + group.throughput(criterion::Throughput::Elements(n as u64)); + let map = populated_meta(n); + let lookups = keys(n); + group.bench_with_input(BenchmarkId::from_parameter(n), &n, |b, _| { + b.iter(|| { + for k in &lookups { + black_box(map.contains_key(black_box(k.as_str()))); + } + black_box(map.contains_key(black_box("this.key.is.absent"))); + }) + }); + } + group.finish(); +} + +/// Iter: full traversal, as performed on the encode path. +fn bench_iter(c: &mut Criterion) { + let mut group = c.benchmark_group("vec_map/iter"); + + for &n in SIZES { + group.throughput(criterion::Throughput::Elements(n as u64)); + let map = populated_meta(n); + group.bench_with_input(BenchmarkId::from_parameter(n), &n, |b, _| { + b.iter(|| { + for (k, v) in map.iter() { + black_box((k, v)); + } + }) + }); + } + group.finish(); +} + +/// A `meta`-shaped map where roughly one in `period` of the inserts is a duplicate key (a tag being +/// overwritten). This is the realistic "has duplicates" shape that `dedup` has to compact; a +/// smaller `period` means more duplicates. +fn meta_with_duplicates(n: usize, period: usize) -> VecMap { + let mut map = VecMap::with_capacity(n + n / period); + + for (i, (k, v)) in keys(n).into_iter().zip(values(n)).enumerate() { + // Re-insert every `period`-th key first to create a duplicate (the earlier value gets + // shadowed). + if i % period == 0 { + map.insert(k.clone(), BytesString::from_static("stale")); + } + map.insert(k, v); + } + + map +} + +/// dedup(): runs once per span on decode. `dedup` mutates and sets a flag, so we rebuild the +/// (un-deduped) input each iteration. Benched both with and without duplicates. +fn bench_dedup(c: &mut Criterion) { + let mut group = c.benchmark_group("vec_map/dedup"); + + for &n in SIZES { + group.bench_with_input(BenchmarkId::new("no_duplicates", n), &n, |b, &n| { + b.iter_batched_ref( + || populated_meta(n), + |map| { + map.dedup(); + black_box(&*map); + }, + BatchSize::SmallInput, + ) + }); + for &period in DUP_PERIODS { + group.bench_with_input( + BenchmarkId::new(format!("dup_1_in_{period}"), n), + &n, + |b, &n| { + b.iter_batched_ref( + || meta_with_duplicates(n, period), + |map| { + map.dedup(); + black_box(&*map); + }, + BatchSize::SmallInput, + ) + }, + ); + } + } + group.finish(); +} + +/// as_deduped_map(): the immutable variant used on the encode path. When the map is already deduped +/// it borrows for free; when not, it dedup on the fly with a side allocation. Both cases are +/// benched, and iterated through. +fn bench_as_deduped_map(c: &mut Criterion) { + let mut group = c.benchmark_group("vec_map/as_deduped_map"); + + for &n in SIZES { + // Already-deduped: cheap borrow path (the common case on encode). + let mut deduped = populated_meta(n); + deduped.dedup(); + group.bench_with_input(BenchmarkId::new("already_deduped", n), &n, |b, _| { + b.iter(|| { + let map = black_box(deduped.as_deduped_map()); + for (k, v) in map.iter() { + black_box((k, v)); + } + }) + }); + + // Not deduped, with duplicates: allocating fallback path. + for &period in DUP_PERIODS { + let dirty = meta_with_duplicates(n, period); + group.bench_with_input( + BenchmarkId::new(format!("needs_dedup_1_in_{period}"), n), + &n, + |b, _| { + b.iter(|| { + let map = black_box(dirty.as_deduped_map()); + for (k, v) in map.iter() { + black_box((k, v)); + } + }) + }, + ); + } + } + group.finish(); +} + +criterion_group!( + benches, + bench_insert, + bench_get_hit, + bench_get_miss, + bench_get_mut, + bench_contains_key, + bench_iter, + bench_dedup, + bench_as_deduped_map, +); +criterion_main!(benches);