From bd85696f8c83653ec4ba7ee6af9ffac22a54c976 Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Tue, 16 Jun 2026 10:57:13 +0200 Subject: [PATCH 1/6] test(trace-normalization): add microbenchmarks for tag/metric/truncate normalization Extend the existing criterion bench to cover the per-char UTF-8 state machines that run on every ingested span but were previously unmeasured: `normalize_tag` (ASCII / mixed-unicode / over-length), `normalize_metric_name`, `truncate_utf8` (UTF-8 boundary walk-back), and `normalize_span_start_duration` (quantifying the SystemTime read on the year-2000 path). Adds a `bench-internals` feature, mirroring `libdd-sampling`, to expose the otherwise-private `normalize_metric_name`/`truncate_utf8` without changing the shipped public API. Co-Authored-By: Claude Opus 4.8 (1M context) --- libdd-trace-normalization/Cargo.toml | 5 + .../benches/normalization_utils.rs | 160 +++++++++++++++++- .../src/normalize_utils.rs | 18 ++ 3 files changed, 181 insertions(+), 2 deletions(-) diff --git a/libdd-trace-normalization/Cargo.toml b/libdd-trace-normalization/Cargo.toml index 1579a5e299..e510084532 100644 --- a/libdd-trace-normalization/Cargo.toml +++ b/libdd-trace-normalization/Cargo.toml @@ -19,6 +19,10 @@ arbitrary = { version = "1.3", features = ["derive"], optional = true } [features] fuzzing = ["arbitrary"] +# Exposes thin public `*_bench_wrapper` shims (e.g. for `normalize_metric_name`, `truncate_utf8`) +# so benchmarks can reach otherwise-internal functions. The benchmarked functions themselves are +# left untouched. Not intended for downstream consumers — enable only when running benches. +bench-internals = [] [dev-dependencies] rand = "0.8.5" @@ -29,3 +33,4 @@ criterion = "0.5" name = "normalization_utils" harness = false path = "benches/normalization_utils.rs" +required-features = ["bench-internals"] diff --git a/libdd-trace-normalization/benches/normalization_utils.rs b/libdd-trace-normalization/benches/normalization_utils.rs index 6c69ff495f..35afe30f93 100644 --- a/libdd-trace-normalization/benches/normalization_utils.rs +++ b/libdd-trace-normalization/benches/normalization_utils.rs @@ -6,7 +6,10 @@ use criterion::Throughput::Elements; use criterion::{ criterion_group, criterion_main, BatchSize, BenchmarkGroup, BenchmarkId, Criterion, }; -use libdd_trace_normalization::normalize_utils::{normalize_name, normalize_service}; +use libdd_trace_normalization::normalize_utils::{ + normalize_metric_name_bench_wrapper, normalize_name, normalize_service, + normalize_span_start_duration, normalize_tag, truncate_utf8_bench_wrapper, +}; use libdd_trace_normalization::normalizer::normalize_trace; use libdd_trace_protobuf::pb; use std::hint::black_box; @@ -142,10 +145,163 @@ fn normalize_span_bench(c: &mut Criterion) { ); } +/// `normalize_tag` runs on every ingested tag key/value. It is the heaviest normalization +/// function: a nested loop combining an ASCII fast-path with per-codepoint UTF-8 scanning and a +/// char-class state machine. We exercise realistic tag values plus the unicode and over-length +/// paths that defeat the ASCII fast-path. +fn normalize_tag_bench(c: &mut Criterion) { + let group = c.benchmark_group("normalization/normalize_tag"); + let cases = &[ + // Empty input: measures the early-return baseline. + "", + // Already-clean realistic tag values: ASCII fast-path only. + "ascii:http.method:get", + "ascii:env:production", + "ascii:resource:get_/api/v1/users/{id}", + // Mixed: needs the illegal-char state machine but stays ASCII. + "mixed:Some Service Name!!", + // Unicode service name: exercises the codepoint-scanning slow path. + "unicode:café-Über-Sérvice", + "unicode:Data🐨dog🐶 繋がっ⛰てて", + // Over-length (> MAX_TAG_LEN = 200): forces the loop to run to the codepoint cap. + "over-length-ascii:over_length_ascii_value_that_keeps_going_and_going_and_going_and_going_and_going_and_going_and_going_and_going_and_going_and_going_and_going_and_going_and_going_and_going_and_going_and_going_and_going_and_going", + ]; + normalize_fnmut_string(group, cases, 1000, "normalize_tag", normalize_tag); +} + +/// `normalize_metric_name` runs on every span name. Similar complexity to `normalize_tag` with a +/// one-byte lookahead (`last_written_char`) to collapse separators. +fn normalize_metric_name_bench(c: &mut Criterion) { + let group = c.benchmark_group("normalization/normalize_metric_name"); + let cases = &[ + // Empty input: measures the early-return baseline. + "", + // Already-clean span names. + "http.request", + "django.controller", + // Names needing separator collapsing / illegal-char replacement. + "GET /some/raclette", + "rails.action_controller.process", + // Over-length (> MAX_NAME_LEN = 100). + "Too-Long-.Too-Long-.Too-Long-.Too-Long-.Too-Long-.Too-Long-.Too-Long-.Too-Long-.Too-Long-.Too-Long-.Too-Long-.", + ]; + normalize_fnmut_string( + group, + cases, + 1000, + "normalize_metric_name", + normalize_metric_name_bench_wrapper, + ); +} + +/// `truncate_utf8` is called before every name/service/type normalization to enforce a byte +/// limit while preserving UTF-8 boundaries. We bench the over-length cases (where it actually does +/// work) at the real limits used in the code, including a multi-byte boundary that must be walked +/// back. +fn truncate_utf8_bench(c: &mut Criterion) { + let group = c.benchmark_group("normalization/truncate_utf8"); + // MAX_SERVICE_LEN / MAX_NAME_LEN / MAX_TYPE_LEN are all 100 in the source. + const LIMIT: usize = 100; + let ascii_over = "a".repeat(256); + // Multi-byte chars (3 bytes each) so the limit falls mid-codepoint and must be walked back. + let unicode_over = "繋".repeat(128); + let cases: &[(&str, &str)] = &[ + ("over-length-ascii", ascii_over.as_str()), + ("over-length-unicode", unicode_over.as_str()), + ]; + + normalize_fnmut_string_with( + group, + cases, + 1000, + "truncate_utf8", + move |s: &mut String| truncate_utf8_bench_wrapper(s, LIMIT), + ); +} + +/// `normalize_span_start_duration` runs on every span and, in the common case where the start +/// timestamp predates the year-2000 cutoff, performs a `SystemTime` read. We bench in a tight loop +/// to confirm that read isn't a meaningful per-span tax. The "clean" case skips the clock; the +/// "needs-clock" case forces the `SystemTime::elapsed()` path. +fn normalize_span_start_duration_bench(c: &mut Criterion) { + let mut group = c.benchmark_group("normalization/normalize_span_start_duration"); + group.throughput(Elements(1000)); + group.warm_up_time(Duration::from_secs(1)); + group.measurement_time(Duration::from_secs(2)); + group.sample_size(200); + group.sampling_mode(criterion::SamplingMode::Flat); + + // (start, duration): valid recent timestamp (no clock read) vs. a too-old start that forces + // the SystemTime read. + let cases: &[(&str, i64, i64)] = &[ + ("clean", 1_448_466_874_000_000_000, 10_000_000), + ("needs-clock", 0, 10_000_000), + ]; + + for (label, start, duration) in cases { + group.bench_with_input( + BenchmarkId::new("normalize_span_start_duration", label), + &(*start, *duration), + |b, &(start, duration)| { + b.iter(|| { + let mut s = black_box(start); + let mut d = black_box(duration); + normalize_span_start_duration(black_box(&mut s), black_box(&mut d)); + black_box((s, d)); + }); + }, + ); + } + group.finish(); +} + +/// Like [`normalize_fnmut_string`] but takes labelled cases (label, input) so over-length inputs +/// don't need to be displayed verbatim in benchmark ids. +#[inline] +fn normalize_fnmut_string_with( + mut group: BenchmarkGroup, + cases: &[(&str, &str)], + elements: usize, + function_name: &str, + mut function: F, +) where + F: FnMut(&mut String), +{ + group.throughput(Elements(elements as u64)); + group.warm_up_time(Duration::from_secs(1)); + group.measurement_time(Duration::from_secs(2)); + group.sample_size(200); + group.sampling_mode(criterion::SamplingMode::Flat); + + for (label, case) in cases { + group.bench_with_input(BenchmarkId::new(function_name, label), *case, |b, case| { + b.iter_batched_ref( + || { + let mut strings = Vec::with_capacity(elements); + (0..elements).for_each(|_| strings.push(case.to_owned())); + strings + }, + |strings| { + #[allow(clippy::unit_arg)] + strings.iter_mut().for_each(|string| { + black_box(function(black_box(string))); + }); + }, + BatchSize::LargeInput, + ) + }); + } + group.finish(); +} + criterion_group!( benches, normalize_service_bench, normalize_name_bench, - normalize_span_bench + normalize_span_bench, + normalize_tag_bench, + normalize_metric_name_bench, + truncate_utf8_bench, + normalize_span_start_duration_bench ); criterion_main!(benches); diff --git a/libdd-trace-normalization/src/normalize_utils.rs b/libdd-trace-normalization/src/normalize_utils.rs index b70093c817..0c188cdded 100644 --- a/libdd-trace-normalization/src/normalize_utils.rs +++ b/libdd-trace-normalization/src/normalize_utils.rs @@ -272,6 +272,15 @@ fn normalize_metric_name(name: &mut String) { bytes.truncate(write_cursor); } +/// Wrapper exposing [`normalize_metric_name`] for benchmarks only (see the `bench-internals` +/// feature). Not part of the public API; the benchmarked function itself is left untouched. +#[cfg(feature = "bench-internals")] +#[doc(hidden)] +#[inline(always)] +pub fn normalize_metric_name_bench_wrapper(name: &mut String) { + normalize_metric_name(name) +} + // truncate_utf8 truncates the given string to make sure it uses less than limit bytes. // If the last character is a utf8 character that would be split, it removes it // entirely to make sure the resulting string is not broken. @@ -280,6 +289,15 @@ pub(crate) fn truncate_utf8(s: &mut String, limit: usize) { s.truncate(boundary); } +/// Wrapper exposing [`truncate_utf8`] for benchmarks only (see the `bench-internals` feature). +/// Not part of the public API; the benchmarked function itself is left untouched. +#[cfg(feature = "bench-internals")] +#[doc(hidden)] +#[inline(always)] +pub fn truncate_utf8_bench_wrapper(s: &mut String, limit: usize) { + truncate_utf8(s, limit) +} + #[cfg(test)] mod tests { From ed81de7e054960ede6a7721231fb6ba0128ff058 Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Tue, 16 Jun 2026 15:25:02 +0200 Subject: [PATCH 2/6] doc: reword Cargo.toml comment --- libdd-trace-normalization/Cargo.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libdd-trace-normalization/Cargo.toml b/libdd-trace-normalization/Cargo.toml index e510084532..23d2c372aa 100644 --- a/libdd-trace-normalization/Cargo.toml +++ b/libdd-trace-normalization/Cargo.toml @@ -19,9 +19,9 @@ arbitrary = { version = "1.3", features = ["derive"], optional = true } [features] fuzzing = ["arbitrary"] -# Exposes thin public `*_bench_wrapper` shims (e.g. for `normalize_metric_name`, `truncate_utf8`) -# so benchmarks can reach otherwise-internal functions. The benchmarked functions themselves are -# left untouched. Not intended for downstream consumers — enable only when running benches. +# Exposes thin public `*_bench_wrapper` shims so benchmarks can reach +# otherwise-internal functions. The benchmarked functions themselves are left +# untouched. Enable only when running benches. bench-internals = [] [dev-dependencies] From 5a09b1fcf82905eb25681d25f55d9fdb16e752be Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Tue, 16 Jun 2026 15:44:47 +0200 Subject: [PATCH 3/6] test(trace-normalization): batch normalize_span_start_duration bench Convert the bench from a single-call `b.iter` to the batched `iter_batched_ref` + 1000-element inner loop used by the other benches in this file. The previous form set `throughput(Elements(1000))` and `SamplingMode::Flat` but measured one call per iteration, so the throughput number was meaningless and the ns-scale "clean" path was swamped by timer overhead. The batch is rebuilt in untimed setup because the function mutates its inputs in place: on the year-2000 path the first call rewrites `start` to a recent timestamp, which would make a second call on the same value skip the clock branch. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../benches/normalization_utils.rs | 24 +++++++++++++------ 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/libdd-trace-normalization/benches/normalization_utils.rs b/libdd-trace-normalization/benches/normalization_utils.rs index 35afe30f93..d89a25a487 100644 --- a/libdd-trace-normalization/benches/normalization_utils.rs +++ b/libdd-trace-normalization/benches/normalization_utils.rs @@ -225,7 +225,13 @@ fn truncate_utf8_bench(c: &mut Criterion) { /// "needs-clock" case forces the `SystemTime::elapsed()` path. fn normalize_span_start_duration_bench(c: &mut Criterion) { let mut group = c.benchmark_group("normalization/normalize_span_start_duration"); - group.throughput(Elements(1000)); + // Each measured iteration normalizes a batch of `ELEMENTS` spans so the per-span cost (a few + // integer ops, or a `SystemTime` read on the year-2000 path) isn't swamped by timer overhead. + // The batch is rebuilt fresh in (untimed) setup because the function mutates its inputs in + // place: on the "needs-clock" path the first call rewrites `start` to a recent timestamp, which + // would make a second call on the same value skip the clock branch. + const ELEMENTS: usize = 1000; + group.throughput(Elements(ELEMENTS as u64)); group.warm_up_time(Duration::from_secs(1)); group.measurement_time(Duration::from_secs(2)); group.sample_size(200); @@ -243,12 +249,16 @@ fn normalize_span_start_duration_bench(c: &mut Criterion) { BenchmarkId::new("normalize_span_start_duration", label), &(*start, *duration), |b, &(start, duration)| { - b.iter(|| { - let mut s = black_box(start); - let mut d = black_box(duration); - normalize_span_start_duration(black_box(&mut s), black_box(&mut d)); - black_box((s, d)); - }); + b.iter_batched_ref( + || vec![(start, duration); ELEMENTS], + |pairs| { + pairs.iter_mut().for_each(|(s, d)| { + normalize_span_start_duration(black_box(s), black_box(d)); + }); + black_box(pairs); + }, + BatchSize::LargeInput, + ) }, ); } From 7834e6f9020a2234f683009242a0f00198ae5cd9 Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Tue, 16 Jun 2026 15:50:44 +0200 Subject: [PATCH 4/6] test: add black_box to avoid undue inlining --- libdd-trace-normalization/benches/normalization_utils.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/libdd-trace-normalization/benches/normalization_utils.rs b/libdd-trace-normalization/benches/normalization_utils.rs index d89a25a487..33cae1175a 100644 --- a/libdd-trace-normalization/benches/normalization_utils.rs +++ b/libdd-trace-normalization/benches/normalization_utils.rs @@ -252,10 +252,9 @@ fn normalize_span_start_duration_bench(c: &mut Criterion) { b.iter_batched_ref( || vec![(start, duration); ELEMENTS], |pairs| { - pairs.iter_mut().for_each(|(s, d)| { - normalize_span_start_duration(black_box(s), black_box(d)); - }); - black_box(pairs); + for (s, d) in pairs { + black_box(normalize_span_start_duration(black_box(s), black_box(d))); + } }, BatchSize::LargeInput, ) From ee82929d01bef38f59f36cb2f37c43122075fab2 Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Tue, 16 Jun 2026 17:40:24 +0200 Subject: [PATCH 5/6] ci(trace-normalization): enable bench-internals feature in CI benchmark script The normalization_utils bench target uses required-features = ["bench-internals"], so Cargo silently skips it unless that feature is explicitly activated. Add libdd-trace-normalization/bench-internals to the --features list in run_benchmarks_ci.sh so the new (and existing) normalization benchmarks appear in CI results. --- benchmark/run_benchmarks_ci.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmark/run_benchmarks_ci.sh b/benchmark/run_benchmarks_ci.sh index 49fdc38ee5..751fef487e 100755 --- a/benchmark/run_benchmarks_ci.sh +++ b/benchmark/run_benchmarks_ci.sh @@ -22,7 +22,7 @@ pushd "${PROJECT_DIR}" > /dev/null # Run benchmarks message "Running benchmarks" -cargo bench --workspace --features libdd-crashtracker/benchmarking,libdd-sampling/v04_span,libdd-sampling/bench-internals -- --warm-up-time 1 --measurement-time 5 --sample-size=200 +cargo bench --workspace --features libdd-crashtracker/benchmarking,libdd-sampling/v04_span,libdd-sampling/bench-internals,libdd-trace-normalization/bench-internals -- --warm-up-time 1 --measurement-time 5 --sample-size=200 message "Finished running benchmarks" # Copy the benchmark results to the output directory From 302189ae0766790d31b1762febd682a7f74eee4e Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Tue, 16 Jun 2026 17:43:16 +0200 Subject: [PATCH 6/6] fix: clippy warning (remove useless black box) --- libdd-trace-normalization/benches/normalization_utils.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libdd-trace-normalization/benches/normalization_utils.rs b/libdd-trace-normalization/benches/normalization_utils.rs index 33cae1175a..1fbeb2e9d5 100644 --- a/libdd-trace-normalization/benches/normalization_utils.rs +++ b/libdd-trace-normalization/benches/normalization_utils.rs @@ -253,7 +253,7 @@ fn normalize_span_start_duration_bench(c: &mut Criterion) { || vec![(start, duration); ELEMENTS], |pairs| { for (s, d) in pairs { - black_box(normalize_span_start_duration(black_box(s), black_box(d))); + normalize_span_start_duration(black_box(s), black_box(d)); } }, BatchSize::LargeInput,