Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion benchmark/run_benchmarks_ci.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ pushd "${PROJECT_DIR}" > /dev/null

# Run benchmarks
message "Running benchmarks"
cargo bench --workspace --features libdd-crashtracker/benchmarking,libdd-sampling/v04_span,libdd-sampling/bench-internals -- --warm-up-time 1 --measurement-time 5 --sample-size=200
cargo bench --workspace --features libdd-crashtracker/benchmarking,libdd-sampling/v04_span,libdd-sampling/bench-internals,libdd-trace-normalization/bench-internals -- --warm-up-time 1 --measurement-time 5 --sample-size=200
message "Finished running benchmarks"

# Copy the benchmark results to the output directory
Expand Down
5 changes: 5 additions & 0 deletions libdd-trace-normalization/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ arbitrary = { version = "1.3", features = ["derive"], optional = true }

[features]
fuzzing = ["arbitrary"]
# Exposes thin public `*_bench_wrapper` shims so benchmarks can reach
# otherwise-internal functions. The benchmarked functions themselves are left
# untouched. Enable only when running benches.
bench-internals = []

[dev-dependencies]
rand = "0.8.5"
Expand All @@ -29,3 +33,4 @@ criterion = "0.5"
name = "normalization_utils"
harness = false
path = "benches/normalization_utils.rs"
required-features = ["bench-internals"]
Comment thread
yannham marked this conversation as resolved.
169 changes: 167 additions & 2 deletions libdd-trace-normalization/benches/normalization_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@ use criterion::Throughput::Elements;
use criterion::{
criterion_group, criterion_main, BatchSize, BenchmarkGroup, BenchmarkId, Criterion,
};
use libdd_trace_normalization::normalize_utils::{normalize_name, normalize_service};
use libdd_trace_normalization::normalize_utils::{
normalize_metric_name_bench_wrapper, normalize_name, normalize_service,
normalize_span_start_duration, normalize_tag, truncate_utf8_bench_wrapper,
};
use libdd_trace_normalization::normalizer::normalize_trace;
use libdd_trace_protobuf::pb;
use std::hint::black_box;
Expand Down Expand Up @@ -142,10 +145,172 @@ fn normalize_span_bench(c: &mut Criterion) {
);
}

/// `normalize_tag` runs on every ingested tag key/value. It is the heaviest normalization
/// function: a nested loop combining an ASCII fast-path with per-codepoint UTF-8 scanning and a
/// char-class state machine. We exercise realistic tag values plus the unicode and over-length
/// paths that defeat the ASCII fast-path.
fn normalize_tag_bench(c: &mut Criterion) {
let group = c.benchmark_group("normalization/normalize_tag");
let cases = &[
// Empty input: measures the early-return baseline.
"",
// Already-clean realistic tag values: ASCII fast-path only.
"ascii:http.method:get",
"ascii:env:production",
"ascii:resource:get_/api/v1/users/{id}",
// Mixed: needs the illegal-char state machine but stays ASCII.
"mixed:Some Service Name!!",
// Unicode service name: exercises the codepoint-scanning slow path.
"unicode:café-Über-Sérvice",
"unicode:Data🐨dog🐶 繋がっ⛰てて",
// Over-length (> MAX_TAG_LEN = 200): forces the loop to run to the codepoint cap.
"over-length-ascii:over_length_ascii_value_that_keeps_going_and_going_and_going_and_going_and_going_and_going_and_going_and_going_and_going_and_going_and_going_and_going_and_going_and_going_and_going_and_going_and_going_and_going",
];
normalize_fnmut_string(group, cases, 1000, "normalize_tag", normalize_tag);
}

/// `normalize_metric_name` runs on every span name. Similar complexity to `normalize_tag` with a
/// one-byte lookahead (`last_written_char`) to collapse separators.
fn normalize_metric_name_bench(c: &mut Criterion) {
let group = c.benchmark_group("normalization/normalize_metric_name");
let cases = &[
// Empty input: measures the early-return baseline.
"",
// Already-clean span names.
"http.request",
"django.controller",
// Names needing separator collapsing / illegal-char replacement.
"GET /some/raclette",
"rails.action_controller.process",
// Over-length (> MAX_NAME_LEN = 100).
"Too-Long-.Too-Long-.Too-Long-.Too-Long-.Too-Long-.Too-Long-.Too-Long-.Too-Long-.Too-Long-.Too-Long-.Too-Long-.",
];
normalize_fnmut_string(
group,
cases,
1000,
"normalize_metric_name",
normalize_metric_name_bench_wrapper,
);
}

/// `truncate_utf8` is called before every name/service/type normalization to enforce a byte
/// limit while preserving UTF-8 boundaries. We bench the over-length cases (where it actually does
/// work) at the real limits used in the code, including a multi-byte boundary that must be walked
/// back.
fn truncate_utf8_bench(c: &mut Criterion) {
let group = c.benchmark_group("normalization/truncate_utf8");
// MAX_SERVICE_LEN / MAX_NAME_LEN / MAX_TYPE_LEN are all 100 in the source.
const LIMIT: usize = 100;
let ascii_over = "a".repeat(256);
// Multi-byte chars (3 bytes each) so the limit falls mid-codepoint and must be walked back.
let unicode_over = "繋".repeat(128);
let cases: &[(&str, &str)] = &[
("over-length-ascii", ascii_over.as_str()),
("over-length-unicode", unicode_over.as_str()),
];

normalize_fnmut_string_with(
group,
cases,
1000,
"truncate_utf8",
move |s: &mut String| truncate_utf8_bench_wrapper(s, LIMIT),
);
}

/// `normalize_span_start_duration` runs on every span and, in the common case where the start
/// timestamp predates the year-2000 cutoff, performs a `SystemTime` read. We bench in a tight loop
/// to confirm that read isn't a meaningful per-span tax. The "clean" case skips the clock; the
/// "needs-clock" case forces the `SystemTime::elapsed()` path.
fn normalize_span_start_duration_bench(c: &mut Criterion) {
let mut group = c.benchmark_group("normalization/normalize_span_start_duration");
// Each measured iteration normalizes a batch of `ELEMENTS` spans so the per-span cost (a few
// integer ops, or a `SystemTime` read on the year-2000 path) isn't swamped by timer overhead.
// The batch is rebuilt fresh in (untimed) setup because the function mutates its inputs in
// place: on the "needs-clock" path the first call rewrites `start` to a recent timestamp, which
// would make a second call on the same value skip the clock branch.
const ELEMENTS: usize = 1000;
group.throughput(Elements(ELEMENTS as u64));
group.warm_up_time(Duration::from_secs(1));
group.measurement_time(Duration::from_secs(2));
group.sample_size(200);
group.sampling_mode(criterion::SamplingMode::Flat);

// (start, duration): valid recent timestamp (no clock read) vs. a too-old start that forces
// the SystemTime read.
let cases: &[(&str, i64, i64)] = &[
("clean", 1_448_466_874_000_000_000, 10_000_000),
("needs-clock", 0, 10_000_000),
];

for (label, start, duration) in cases {
group.bench_with_input(
BenchmarkId::new("normalize_span_start_duration", label),
&(*start, *duration),
|b, &(start, duration)| {
b.iter_batched_ref(
|| vec![(start, duration); ELEMENTS],
|pairs| {
for (s, d) in pairs {
normalize_span_start_duration(black_box(s), black_box(d));
}
},
BatchSize::LargeInput,
)
},
);
}
group.finish();
}

/// Like [`normalize_fnmut_string`] but takes labelled cases (label, input) so over-length inputs
/// don't need to be displayed verbatim in benchmark ids.
#[inline]
fn normalize_fnmut_string_with<F>(
mut group: BenchmarkGroup<WallTime>,
cases: &[(&str, &str)],
elements: usize,
function_name: &str,
mut function: F,
) where
F: FnMut(&mut String),
{
group.throughput(Elements(elements as u64));
group.warm_up_time(Duration::from_secs(1));
group.measurement_time(Duration::from_secs(2));
group.sample_size(200);
group.sampling_mode(criterion::SamplingMode::Flat);

for (label, case) in cases {
group.bench_with_input(BenchmarkId::new(function_name, label), *case, |b, case| {
b.iter_batched_ref(
|| {
let mut strings = Vec::with_capacity(elements);
(0..elements).for_each(|_| strings.push(case.to_owned()));
strings
},
|strings| {
#[allow(clippy::unit_arg)]
strings.iter_mut().for_each(|string| {
black_box(function(black_box(string)));
});
},
BatchSize::LargeInput,
)
});
}
group.finish();
}

criterion_group!(
benches,
normalize_service_bench,
normalize_name_bench,
normalize_span_bench
normalize_span_bench,
normalize_tag_bench,
normalize_metric_name_bench,
truncate_utf8_bench,
normalize_span_start_duration_bench
);
criterion_main!(benches);
18 changes: 18 additions & 0 deletions libdd-trace-normalization/src/normalize_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,15 @@ fn normalize_metric_name(name: &mut String) {
bytes.truncate(write_cursor);
}

/// Wrapper exposing [`normalize_metric_name`] for benchmarks only (see the `bench-internals`
/// feature). Not part of the public API; the benchmarked function itself is left untouched.
#[cfg(feature = "bench-internals")]
#[doc(hidden)]
#[inline(always)]
pub fn normalize_metric_name_bench_wrapper(name: &mut String) {
normalize_metric_name(name)
}

// truncate_utf8 truncates the given string to make sure it uses less than limit bytes.
// If the last character is a utf8 character that would be split, it removes it
// entirely to make sure the resulting string is not broken.
Expand All @@ -280,6 +289,15 @@ pub(crate) fn truncate_utf8(s: &mut String, limit: usize) {
s.truncate(boundary);
}

/// Wrapper exposing [`truncate_utf8`] for benchmarks only (see the `bench-internals` feature).
/// Not part of the public API; the benchmarked function itself is left untouched.
#[cfg(feature = "bench-internals")]
#[doc(hidden)]
#[inline(always)]
pub fn truncate_utf8_bench_wrapper(s: &mut String, limit: usize) {
truncate_utf8(s, limit)
}

#[cfg(test)]
mod tests {

Expand Down
Loading