diff --git a/benchmark/run_benchmarks_ci.sh b/benchmark/run_benchmarks_ci.sh index 49fdc38ee5..751fef487e 100755 --- a/benchmark/run_benchmarks_ci.sh +++ b/benchmark/run_benchmarks_ci.sh @@ -22,7 +22,7 @@ pushd "${PROJECT_DIR}" > /dev/null # Run benchmarks message "Running benchmarks" -cargo bench --workspace --features libdd-crashtracker/benchmarking,libdd-sampling/v04_span,libdd-sampling/bench-internals -- --warm-up-time 1 --measurement-time 5 --sample-size=200 +cargo bench --workspace --features libdd-crashtracker/benchmarking,libdd-sampling/v04_span,libdd-sampling/bench-internals,libdd-trace-normalization/bench-internals -- --warm-up-time 1 --measurement-time 5 --sample-size=200 message "Finished running benchmarks" # Copy the benchmark results to the output directory diff --git a/libdd-trace-normalization/Cargo.toml b/libdd-trace-normalization/Cargo.toml index 1579a5e299..23d2c372aa 100644 --- a/libdd-trace-normalization/Cargo.toml +++ b/libdd-trace-normalization/Cargo.toml @@ -19,6 +19,10 @@ arbitrary = { version = "1.3", features = ["derive"], optional = true } [features] fuzzing = ["arbitrary"] +# Exposes thin public `*_bench_wrapper` shims so benchmarks can reach +# otherwise-internal functions. The benchmarked functions themselves are left +# untouched. Enable only when running benches. +bench-internals = [] [dev-dependencies] rand = "0.8.5" @@ -29,3 +33,4 @@ criterion = "0.5" name = "normalization_utils" harness = false path = "benches/normalization_utils.rs" +required-features = ["bench-internals"] diff --git a/libdd-trace-normalization/benches/normalization_utils.rs b/libdd-trace-normalization/benches/normalization_utils.rs index 6c69ff495f..1fbeb2e9d5 100644 --- a/libdd-trace-normalization/benches/normalization_utils.rs +++ b/libdd-trace-normalization/benches/normalization_utils.rs @@ -6,7 +6,10 @@ use criterion::Throughput::Elements; use criterion::{ criterion_group, criterion_main, BatchSize, BenchmarkGroup, BenchmarkId, Criterion, }; -use libdd_trace_normalization::normalize_utils::{normalize_name, normalize_service}; +use libdd_trace_normalization::normalize_utils::{ + normalize_metric_name_bench_wrapper, normalize_name, normalize_service, + normalize_span_start_duration, normalize_tag, truncate_utf8_bench_wrapper, +}; use libdd_trace_normalization::normalizer::normalize_trace; use libdd_trace_protobuf::pb; use std::hint::black_box; @@ -142,10 +145,172 @@ fn normalize_span_bench(c: &mut Criterion) { ); } +/// `normalize_tag` runs on every ingested tag key/value. It is the heaviest normalization +/// function: a nested loop combining an ASCII fast-path with per-codepoint UTF-8 scanning and a +/// char-class state machine. We exercise realistic tag values plus the unicode and over-length +/// paths that defeat the ASCII fast-path. +fn normalize_tag_bench(c: &mut Criterion) { + let group = c.benchmark_group("normalization/normalize_tag"); + let cases = &[ + // Empty input: measures the early-return baseline. + "", + // Already-clean realistic tag values: ASCII fast-path only. + "ascii:http.method:get", + "ascii:env:production", + "ascii:resource:get_/api/v1/users/{id}", + // Mixed: needs the illegal-char state machine but stays ASCII. + "mixed:Some Service Name!!", + // Unicode service name: exercises the codepoint-scanning slow path. + "unicode:café-Über-Sérvice", + "unicode:Data🐨dog🐶 繋がっ⛰てて", + // Over-length (> MAX_TAG_LEN = 200): forces the loop to run to the codepoint cap. + "over-length-ascii:over_length_ascii_value_that_keeps_going_and_going_and_going_and_going_and_going_and_going_and_going_and_going_and_going_and_going_and_going_and_going_and_going_and_going_and_going_and_going_and_going_and_going", + ]; + normalize_fnmut_string(group, cases, 1000, "normalize_tag", normalize_tag); +} + +/// `normalize_metric_name` runs on every span name. Similar complexity to `normalize_tag` with a +/// one-byte lookahead (`last_written_char`) to collapse separators. +fn normalize_metric_name_bench(c: &mut Criterion) { + let group = c.benchmark_group("normalization/normalize_metric_name"); + let cases = &[ + // Empty input: measures the early-return baseline. + "", + // Already-clean span names. + "http.request", + "django.controller", + // Names needing separator collapsing / illegal-char replacement. + "GET /some/raclette", + "rails.action_controller.process", + // Over-length (> MAX_NAME_LEN = 100). + "Too-Long-.Too-Long-.Too-Long-.Too-Long-.Too-Long-.Too-Long-.Too-Long-.Too-Long-.Too-Long-.Too-Long-.Too-Long-.", + ]; + normalize_fnmut_string( + group, + cases, + 1000, + "normalize_metric_name", + normalize_metric_name_bench_wrapper, + ); +} + +/// `truncate_utf8` is called before every name/service/type normalization to enforce a byte +/// limit while preserving UTF-8 boundaries. We bench the over-length cases (where it actually does +/// work) at the real limits used in the code, including a multi-byte boundary that must be walked +/// back. +fn truncate_utf8_bench(c: &mut Criterion) { + let group = c.benchmark_group("normalization/truncate_utf8"); + // MAX_SERVICE_LEN / MAX_NAME_LEN / MAX_TYPE_LEN are all 100 in the source. + const LIMIT: usize = 100; + let ascii_over = "a".repeat(256); + // Multi-byte chars (3 bytes each) so the limit falls mid-codepoint and must be walked back. + let unicode_over = "繋".repeat(128); + let cases: &[(&str, &str)] = &[ + ("over-length-ascii", ascii_over.as_str()), + ("over-length-unicode", unicode_over.as_str()), + ]; + + normalize_fnmut_string_with( + group, + cases, + 1000, + "truncate_utf8", + move |s: &mut String| truncate_utf8_bench_wrapper(s, LIMIT), + ); +} + +/// `normalize_span_start_duration` runs on every span and, in the common case where the start +/// timestamp predates the year-2000 cutoff, performs a `SystemTime` read. We bench in a tight loop +/// to confirm that read isn't a meaningful per-span tax. The "clean" case skips the clock; the +/// "needs-clock" case forces the `SystemTime::elapsed()` path. +fn normalize_span_start_duration_bench(c: &mut Criterion) { + let mut group = c.benchmark_group("normalization/normalize_span_start_duration"); + // Each measured iteration normalizes a batch of `ELEMENTS` spans so the per-span cost (a few + // integer ops, or a `SystemTime` read on the year-2000 path) isn't swamped by timer overhead. + // The batch is rebuilt fresh in (untimed) setup because the function mutates its inputs in + // place: on the "needs-clock" path the first call rewrites `start` to a recent timestamp, which + // would make a second call on the same value skip the clock branch. + const ELEMENTS: usize = 1000; + group.throughput(Elements(ELEMENTS as u64)); + group.warm_up_time(Duration::from_secs(1)); + group.measurement_time(Duration::from_secs(2)); + group.sample_size(200); + group.sampling_mode(criterion::SamplingMode::Flat); + + // (start, duration): valid recent timestamp (no clock read) vs. a too-old start that forces + // the SystemTime read. + let cases: &[(&str, i64, i64)] = &[ + ("clean", 1_448_466_874_000_000_000, 10_000_000), + ("needs-clock", 0, 10_000_000), + ]; + + for (label, start, duration) in cases { + group.bench_with_input( + BenchmarkId::new("normalize_span_start_duration", label), + &(*start, *duration), + |b, &(start, duration)| { + b.iter_batched_ref( + || vec![(start, duration); ELEMENTS], + |pairs| { + for (s, d) in pairs { + normalize_span_start_duration(black_box(s), black_box(d)); + } + }, + BatchSize::LargeInput, + ) + }, + ); + } + group.finish(); +} + +/// Like [`normalize_fnmut_string`] but takes labelled cases (label, input) so over-length inputs +/// don't need to be displayed verbatim in benchmark ids. +#[inline] +fn normalize_fnmut_string_with( + mut group: BenchmarkGroup, + cases: &[(&str, &str)], + elements: usize, + function_name: &str, + mut function: F, +) where + F: FnMut(&mut String), +{ + group.throughput(Elements(elements as u64)); + group.warm_up_time(Duration::from_secs(1)); + group.measurement_time(Duration::from_secs(2)); + group.sample_size(200); + group.sampling_mode(criterion::SamplingMode::Flat); + + for (label, case) in cases { + group.bench_with_input(BenchmarkId::new(function_name, label), *case, |b, case| { + b.iter_batched_ref( + || { + let mut strings = Vec::with_capacity(elements); + (0..elements).for_each(|_| strings.push(case.to_owned())); + strings + }, + |strings| { + #[allow(clippy::unit_arg)] + strings.iter_mut().for_each(|string| { + black_box(function(black_box(string))); + }); + }, + BatchSize::LargeInput, + ) + }); + } + group.finish(); +} + criterion_group!( benches, normalize_service_bench, normalize_name_bench, - normalize_span_bench + normalize_span_bench, + normalize_tag_bench, + normalize_metric_name_bench, + truncate_utf8_bench, + normalize_span_start_duration_bench ); criterion_main!(benches); diff --git a/libdd-trace-normalization/src/normalize_utils.rs b/libdd-trace-normalization/src/normalize_utils.rs index b70093c817..0c188cdded 100644 --- a/libdd-trace-normalization/src/normalize_utils.rs +++ b/libdd-trace-normalization/src/normalize_utils.rs @@ -272,6 +272,15 @@ fn normalize_metric_name(name: &mut String) { bytes.truncate(write_cursor); } +/// Wrapper exposing [`normalize_metric_name`] for benchmarks only (see the `bench-internals` +/// feature). Not part of the public API; the benchmarked function itself is left untouched. +#[cfg(feature = "bench-internals")] +#[doc(hidden)] +#[inline(always)] +pub fn normalize_metric_name_bench_wrapper(name: &mut String) { + normalize_metric_name(name) +} + // truncate_utf8 truncates the given string to make sure it uses less than limit bytes. // If the last character is a utf8 character that would be split, it removes it // entirely to make sure the resulting string is not broken. @@ -280,6 +289,15 @@ pub(crate) fn truncate_utf8(s: &mut String, limit: usize) { s.truncate(boundary); } +/// Wrapper exposing [`truncate_utf8`] for benchmarks only (see the `bench-internals` feature). +/// Not part of the public API; the benchmarked function itself is left untouched. +#[cfg(feature = "bench-internals")] +#[doc(hidden)] +#[inline(always)] +pub fn truncate_utf8_bench_wrapper(s: &mut String, limit: usize) { + truncate_utf8(s, limit) +} + #[cfg(test)] mod tests {