From 65144cc12ea47a75b49327919d5229115b8b2ffc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gustavo=20Andr=C3=A9=20dos=20Santos=20Lopes?= Date: Tue, 16 Jun 2026 16:23:15 +0100 Subject: [PATCH 1/4] More robust way to inline thread local var resolution --- Cargo.lock | 2 - libdd-otel-thread-ctx-ffi/README.md | 56 +---------- libdd-otel-thread-ctx-ffi/build-optimized.sh | 69 -------------- libdd-otel-thread-ctx-ffi/build.rs | 97 +------------------- libdd-otel-thread-ctx/Cargo.toml | 4 - libdd-otel-thread-ctx/README.md | 8 +- libdd-otel-thread-ctx/build.rs | 49 ---------- libdd-otel-thread-ctx/src/lib.rs | 87 ++++++++++++++---- libdd-otel-thread-ctx/src/tls_shim.c | 20 ---- 9 files changed, 81 insertions(+), 311 deletions(-) delete mode 100755 libdd-otel-thread-ctx-ffi/build-optimized.sh delete mode 100644 libdd-otel-thread-ctx/src/tls_shim.c diff --git a/Cargo.lock b/Cargo.lock index 9322bd0bf1..e4cccf76f7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3184,8 +3184,6 @@ name = "libdd-otel-thread-ctx" version = "1.0.0" dependencies = [ "anyhow", - "build_common", - "cc", "elf", ] diff --git a/libdd-otel-thread-ctx-ffi/README.md b/libdd-otel-thread-ctx-ffi/README.md index ae268d5770..936e1e15fe 100644 --- a/libdd-otel-thread-ctx-ffi/README.md +++ b/libdd-otel-thread-ctx-ffi/README.md @@ -6,56 +6,8 @@ that external readers (e.g. the eBPF profiler) can discover. Currently Linux-only (x86-64 and aarch64). -## Optimized build (cross-language inlining) +## TLS -The OTel thread-level context sharing specification requires the use of the -TLSDESC dialect for the thread-local variable that holds the current context. -Because (stable) `rustc` doesn't currently provide a way to control the TLS -dialect, we need to use a small C shim that defines the variable and expose a -one-line getter. This unfortunately adds one level of indirection (a function -call) when attaching or detaching a context. - -With the right toolchain, it's possible to use Link-Time Optimization (LTO) to -inline the C wrapper at link time. The requirements are: - -- `clang` is available to compile the C shim to LLVM IR (version requirements - aren't clear -- tested with clang18 and clang20, but ideally the version - should be the same or close to the LLVM version shipped with `rustc`) -- Either the Rust toolchain ships `lld` or there's a system-wide `lld` install - (Rust has been shipping `rust-lld` for a long time now, something like since - 1.53+, however some musl-based distro like Alpine might have the Rust - toolchain without `rust-lld`) -- `lld` version is at least 18.1 (TLSDESC support) - -**If those requirements are met, setting the environment variables -`CARGO_TARGET__RUSTFLAGS=-Clinker-plugin-lto -Clinker=clang` and -`LIBDD_OTEL_THREAD_CTX_INLINE=1` when calling to `cargo` will trigger the -optimized build where the C shim is inlined.** Here, `` is the target -triple in screaming snake case. - -External environment variables are needed because cross-language LTO requires -two `rustc` codegen flags (`-Clinker-plugin-lto` and `-Clinker=clang`) that -cannot be set from a Cargo build script: they must come from `RUSTFLAGS` or -`.cargo/config.toml`, which can't be entirely automated from Rust only. We -advise to set those flags via the target-scoped -`CARGO_TARGET__RUSTFLAGS` env var so they don't leak to build scripts -or proc-macros if cross-compiling. - -### Build script - -The `build-optimized.sh` wrapper script is provided as a convenience and as an -example. - -#### Usage - -```bash -./build-optimized.sh -``` - -The script auto-detects the host triple. To cross-compile: - -```bash -./build-optimized.sh --target aarch64-unknown-linux-gnu -``` - -Extra arguments are forwarded to `cargo build`. +The thread-local variable `otel_thread_ctx_v1` and its TLSDESC accessor are +implemented in pure Rust using `global_asm!` and `asm!` in the +`libdd-otel-thread-ctx` crate. diff --git a/libdd-otel-thread-ctx-ffi/build-optimized.sh b/libdd-otel-thread-ctx-ffi/build-optimized.sh deleted file mode 100755 index b52fd657a1..0000000000 --- a/libdd-otel-thread-ctx-ffi/build-optimized.sh +++ /dev/null @@ -1,69 +0,0 @@ -#!/usr/bin/env bash -# Copyright 2026-Present Datadog, Inc. https://www.datadoghq.com/ -# SPDX-License-Identifier: Apache-2.0 -# -# Build libdd-otel-thread-ctx-ffi with cross-language LTO so the C TLS shim is -# inlined into the Rust FFI functions, eliminating a function-call indirection -# on every TLS access. -# -# Requirements: clang, lld (rust-lld from the toolchain is used automatically). -# The requirements are checked by the build.rs script. -# -# Usage: -# # auto-detect host triple -# ./build-optimized.sh -# # explicit target -# ./build-optimized.sh --target aarch64-unknown-linux-gnu -# -# Any extra arguments are forwarded to `cargo build`. -set -euo pipefail - -# Parse --target from args, or auto-detect the host triple. -TARGET="" -EXTRA_ARGS=() -while [[ $# -gt 0 ]]; do - case "$1" in - --target) - TARGET="$2"; shift 2 ;; - --target=*) - TARGET="${1#--target=}"; shift ;; - *) - EXTRA_ARGS+=("$1"); shift ;; - esac -done - -if [[ -z "$TARGET" ]]; then - TARGET=$(rustc -vV | sed -n 's/host: //p') -fi - -# CARGO_TARGET__RUSTFLAGS scopes the flags to the target only, keeping -# build scripts and proc-macros unaffected. -TARGET_ENV=$(echo "$TARGET" | tr 'a-z-' 'A-Z_') -FLAGS_VAR="CARGO_TARGET_${TARGET_ENV}_RUSTFLAGS" -EXISTING_FLAGS="${!FLAGS_VAR:-}" -export "$FLAGS_VAR=${EXISTING_FLAGS:+$EXISTING_FLAGS }-Clinker-plugin-lto -Clinker=clang" -export LIBDD_OTEL_THREAD_CTX_INLINE=1 - -cargo build --release \ - --target "$TARGET" \ - -p libdd-otel-thread-ctx-ffi \ - "${EXTRA_ARGS[@]}" - -# Sanity-check that the C shim was actually inlined, if `nm` is available. -if ! command -v nm &>/dev/null; then - echo >&2 "WARNING: skipping sanity check that the C TLS shim was inlined (\`nm\` not found)" -else - SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" - REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" - SO="$REPO_ROOT/target/$TARGET/release/liblibdd_otel_thread_ctx_ffi.so" - - if [[ -f "$SO" ]]; then - if ! NM_OUTPUT=$(nm "$SO" 2>&1); then - echo >&2 "WARNING: command \`nm\` failed on $SO. Skipping sanity check that the C TLS shim was inlined." - elif echo "$NM_OUTPUT" | grep -q 'libdd_get_otel_thread_ctx'; then - echo >&2 "ERROR: build succeeded but the C TLS shim (libdd_get_otel_thread_ctx_v1) was NOT inlined." - echo >&2 "Cross-language LTO may not be working. Check that clang and lld versions are recent enough and compatible with the Rust toolchain's LLVM." - exit 1 - fi - fi -fi diff --git a/libdd-otel-thread-ctx-ffi/build.rs b/libdd-otel-thread-ctx-ffi/build.rs index 61fa63764b..cbac605286 100644 --- a/libdd-otel-thread-ctx-ffi/build.rs +++ b/libdd-otel-thread-ctx-ffi/build.rs @@ -3,70 +3,7 @@ extern crate build_common; use build_common::{find_rust_lld_dir, generate_and_configure_header}; -use std::{env, fmt::Display, path::PathBuf, process::Command}; - -#[derive(Debug, Eq, PartialEq, Ord, PartialOrd)] -struct LldVersion { - major: u32, - minor: u32, -} - -impl Display for LldVersion { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}.{}", self.major, self.minor) - } -} - -/// Parse the major and minor version from `ld.lld --version` output. -/// -/// Typical formats: -/// "LLD 18.1.3 (compatible with GNU linkers)" -/// "LLD 19.1.0" -fn system_lld_version() -> Option { - let output = Command::new("ld.lld").arg("--version").output().ok()?; - if !output.status.success() { - return None; - } - String::from_utf8_lossy(&output.stdout) - .split_whitespace() - .find_map(|tok| { - let mut splitted = tok.split('.'); - let major = splitted.next()?.parse::().ok()?; - let minor = splitted.next()?.parse::().ok()?; - - Some(LldVersion { major, minor }) - }) -} - -/// TLSDESC is supported in LLD from version 18.1. -const MIN_LLD_VERSION_FOR_TLSDESC: LldVersion = LldVersion { - major: 18, - minor: 1, -}; - -/// Validate that a suitable LLD is available for cross-language LTO. -/// -/// Returns the rust-lld `gcc-ld/` directory if found; `None` means the system -/// `ld.lld` will be used instead. Panics with a clear message when the -/// requirements are not met. -fn resolve_lld_for_inline(target_arch: &str) -> Option { - if let Some(dir) = find_rust_lld_dir() { - return Some(dir); - } - - match system_lld_version() { - Some(v) if target_arch != "x86_64" || v >= MIN_LLD_VERSION_FOR_TLSDESC => None, - Some(v) => panic!( - "LIBDD_OTEL_THREAD_CTX_INLINE requires LLD >= {MIN_LLD_VERSION_FOR_TLSDESC} on \ - x86-64 (for -mllvm -enable-tlsdesc), but system ld.lld is version {v}. \ - Install a newer LLD or use a Rust toolchain that bundles rust-lld." - ), - None => panic!( - "LIBDD_OTEL_THREAD_CTX_INLINE requires LLD for cross-language LTO, but neither \ - rust-lld nor a system ld.lld was found." - ), - } -} +use std::env; fn main() { generate_and_configure_header("otel-thread-ctx.h"); @@ -76,11 +13,7 @@ fn main() { return; } - println!("cargo:rerun-if-env-changed=LIBDD_OTEL_THREAD_CTX_INLINE"); - - let inline_mode = env::var("LIBDD_OTEL_THREAD_CTX_INLINE").is_ok_and(|v| v == "1"); let manifest_dir = env::var("CARGO_MANIFEST_DIR").unwrap(); - let target_arch = env::var("CARGO_CFG_TARGET_ARCH").unwrap(); // Export the TLSDESC thread-local variable to the dynamic symbol table so external readers // (e.g. the eBPF profiler) can discover it. Rust's cdylib linker applies a version script with @@ -93,32 +26,10 @@ fn main() { // Merging multiple version scripts is not supported by GNU ld, so we need lld. We prefer the // toolchain's bundled rust-lld (LLD 19+ since Rust 1.84) over the system lld (if it even // exists). If rust-lld is not found we fall back to whatever `lld` the system provides. - - // If `LIBDD_OTEL_THREAD_CTX_INLINE` is set to `1`, we try to inline the C shim. See the README - // for more details. - if inline_mode { - let rust_lld_dir = resolve_lld_for_inline(&target_arch); - - // Emit link args for ALL link types (not just cdylib) so that test binaries also link - // correctly when RUSTFLAGS sets clang as the linker (in practice we should only build/care - // about the shared object file in inline mode). - if let Some(dir) = rust_lld_dir { - println!("cargo:rustc-link-arg=-B{}", dir.display()); - } - println!("cargo:rustc-link-arg=-fuse-ld=lld"); - - // On x86-64, tell the LLVM backend to use TLSDESC during LTO codegen. - // On aarch64 TLSDESC is the default and the only model. - if target_arch == "x86_64" { - println!("cargo:rustc-link-arg=-Wl,-mllvm,-enable-tlsdesc"); - } - } else { - // Default mode: only the cdylib needs lld (for the version script). - if let Some(gcc_ld_dir) = find_rust_lld_dir() { - println!("cargo:rustc-cdylib-link-arg=-B{}", gcc_ld_dir.display()); - } - println!("cargo:rustc-cdylib-link-arg=-fuse-ld=lld"); + if let Some(gcc_ld_dir) = find_rust_lld_dir() { + println!("cargo:rustc-cdylib-link-arg=-B{}", gcc_ld_dir.display()); } + println!("cargo:rustc-cdylib-link-arg=-fuse-ld=lld"); println!( "cargo:rustc-cdylib-link-arg=-Wl,--version-script={manifest_dir}/tls-dynamic-list.txt" diff --git a/libdd-otel-thread-ctx/Cargo.toml b/libdd-otel-thread-ctx/Cargo.toml index 0580970a96..9044c3eb4f 100644 --- a/libdd-otel-thread-ctx/Cargo.toml +++ b/libdd-otel-thread-ctx/Cargo.toml @@ -22,7 +22,3 @@ elf = { version = "0.7", optional = true } [features] sanity-check = ["dep:elf", "dep:anyhow"] - -[build-dependencies] -build_common = { path = "../build-common" } -cc = "1.1.31" diff --git a/libdd-otel-thread-ctx/README.md b/libdd-otel-thread-ctx/README.md index c55675f7d2..731edb9676 100644 --- a/libdd-otel-thread-ctx/README.md +++ b/libdd-otel-thread-ctx/README.md @@ -15,10 +15,10 @@ Linux only for now. ## TLS -The C shim (`src/tls_shim.c`) is required because `rustc` does not yet support -the TLSDESC TLS dialect required by the spec to export `otel_thread_ctx_v1`. -Since the reader and the writer must agree on the TLS dialect/model, we rely on -the C compiler to emit the right access pattern. +The TLS symbol `otel_thread_ctx_v1` and its TLSDESC accessor are defined +directly in Rust using `global_asm!` and `asm!` (both stable since Rust 1.65 / +1.59). This avoids a C build dependency while guaranteeing the TLSDESC dialect +on both x86-64 and aarch64 as required by the spec. ## Usage diff --git a/libdd-otel-thread-ctx/build.rs b/libdd-otel-thread-ctx/build.rs index adfda34153..a9082c9aaa 100644 --- a/libdd-otel-thread-ctx/build.rs +++ b/libdd-otel-thread-ctx/build.rs @@ -1,17 +1,7 @@ // Copyright 2026-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 -extern crate build_common; use std::env; -use std::process::Command; - -fn clang_is_available() -> bool { - Command::new("clang") - .arg("--version") - .output() - .map(|o| o.status.success()) - .unwrap_or(false) -} fn main() { let target_os = env::var("CARGO_CFG_TARGET_OS").unwrap(); @@ -27,43 +17,4 @@ fn main() { target_arch ) } - - println!("cargo:rerun-if-env-changed=LIBDD_OTEL_THREAD_CTX_INLINE"); - println!("cargo:rerun-if-changed=src/tls_shim.c"); - - // The otel-thread-ctx FFI crate has a special flag to inline the C shim inside the final - // library. This setup has additional requirements for the build of this crate, which are - // enforced below when the flag is set. - let inline_mode = env::var_os("LIBDD_OTEL_THREAD_CTX_INLINE").is_some_and(|v| v == "1"); - - let mut build = cc::Build::new(); - - if inline_mode { - assert!( - clang_is_available(), - "LIBDD_OTEL_THREAD_CTX_INLINE is set but `clang` was not found. \ - Cross-language LTO requires clang as the C compiler." - ); - build.compiler("clang"); - build.flag("-flto=thin"); - - // Any binary linking this crate in inline mode (including test - // binaries) needs lld, because -Clinker-plugin-lto passes LTO plugin - // options that only lld understands. - if let Some(dir) = build_common::find_rust_lld_dir() { - println!("cargo:rustc-link-arg=-B{}", dir.display()); - } - println!("cargo:rustc-link-arg=-fuse-ld=lld"); - - // Note: in the inline setup, TLS dialect selection is handled by the linker and is taken - // care of by the build script of otel-thread-ctx-ffi - } else if target_arch == "x86_64" { - // - On aarch64, TLSDESC is already the only dynamic TLS model so no flag is needed. - // - On x86-64, we use `-mtls-dialect=gnu2` (supported since GCC 4.4 and Clang 19+) to force - // the use of TLSDESC as mandated by the spec. If it's not supported, this build will - // fail. - build.flag("-mtls-dialect=gnu2"); - } - - build.file("src/tls_shim.c").compile("tls_shim"); } diff --git a/libdd-otel-thread-ctx/src/lib.rs b/libdd-otel-thread-ctx/src/lib.rs index 10142a4c12..54417dddcf 100644 --- a/libdd-otel-thread-ctx/src/lib.rs +++ b/libdd-otel-thread-ctx/src/lib.rs @@ -5,8 +5,8 @@ //! //! This crate implements the publisher side of the Thread Context OTEP (PR #4947). //! -//! Since `rustc` doesn't currently support the TLSDESC dialect, we use a C shim to set and get -//! the thread-local storage used for the context. +//! Since `rustc` doesn't currently support the TLSDESC dialect, we define the thread-local +//! storage symbol and its accessor using inline assembly (`global_asm!` / `asm!`). //! //! ## Usage //! @@ -70,17 +70,75 @@ pub mod sanity_check; #[cfg(target_os = "linux")] pub mod linux { use std::{ - ffi::c_void, mem, ptr::{self, NonNull}, sync::atomic::{compiler_fence, AtomicPtr, AtomicU8, Ordering}, }; + // Define the thread-local pointer that external readers (e.g. the eBPF profiler) discover via + // the dynamic symbol table. It must be an exported ELF `STT_TLS` object accessed via the + // TLSDESC dialect, as mandated by the OTel thread-level context sharing spec. + // + // Stable `rustc` cannot select the TLS dialect for a `#[thread_local]` static, so we declare + // the symbol directly in assembly (an 8-byte, zero-initialised slot in `.tbss`) and resolve + // its per-thread address through TLSDESC in [`tls_slot`]. + #[cfg(all( + target_os = "linux", + any(target_arch = "x86_64", target_arch = "aarch64") + ))] + core::arch::global_asm!( + ".section .tbss,\"awT\",@nobits", + ".globl otel_thread_ctx_v1", + ".type otel_thread_ctx_v1, @tls_object", + ".size otel_thread_ctx_v1, 8", + "otel_thread_ctx_v1:", + ".zero 8", + ".previous", + ); + + /// Return the address of the current thread's `otel_thread_ctx_v1` TLS slot, resolved through + /// the TLSDESC dialect. + #[cfg(target_arch = "x86_64")] + #[inline(always)] + unsafe fn tls_slot() -> *mut *mut ThreadContextRecord { + let ptr: usize; + core::arch::asm!( + "leaq otel_thread_ctx_v1@tlsdesc(%rip), %rax", + "call *otel_thread_ctx_v1@TLSCALL(%rax)", + "addq %fs:0, %rax", + out("rax") ptr, + options(att_syntax), + ); + ptr as *mut *mut ThreadContextRecord + } + + /// Return the address of the current thread's `otel_thread_ctx_v1` TLS slot, resolved through + /// the TLSDESC dialect. + #[cfg(target_arch = "aarch64")] + #[inline(always)] + unsafe fn tls_slot() -> *mut *mut ThreadContextRecord { + let ptr: usize; + core::arch::asm!( + "adrp x0, :tlsdesc:otel_thread_ctx_v1", + "ldr x1, [x0, :tlsdesc_lo12:otel_thread_ctx_v1]", + "add x0, x0, :tlsdesc_lo12:otel_thread_ctx_v1", + ".tlsdesccall otel_thread_ctx_v1", + "blr x1", + "mrs x2, tpidr_el0", + "add x0, x0, x2", + out("x0") ptr, + out("x1") _, + out("x2") _, + out("x30") _, + ); + ptr as *mut *mut ThreadContextRecord + } + /// Run `f` with an atomic view of the current thread's TLS slot. /// - /// The address calculation requires a call to a C shim in order to use the TLSDESC dialect - /// from Rust. The returned address is stable (per thread), so callers should try to do as - /// much work as possible inside a single call to reduce the number of C-shim round-trips. + /// The address calculation goes through the TLSDESC dialect via [`tls_slot`]. The returned + /// address is stable (per thread), so callers should try to do as much work as possible + /// inside a single call. /// /// The slot is read by an async signal handler. Atomic operations should in general use /// [Ordering::Relaxed], but modifications to the record might need additional compiler-only @@ -89,11 +147,6 @@ pub mod linux { where F: FnOnce(&AtomicPtr) -> R, { - extern "C" { - /// Return the address of the current thread's `otel_thread_ctx_v1` local. - fn libdd_get_otel_thread_ctx_v1() -> *mut *mut c_void; - } - const { assert!( mem::align_of::>() @@ -102,11 +155,9 @@ pub mod linux { } // Safety: the const assertion above ensures the alignment is correct. The TLS slot is - // valid for the lifetime of the current thread. The `extern "C"` declaration is scoped - // to this function, guaranteeing that all accesses go through the `AtomicPtr` wrapper. - let slot = unsafe { - AtomicPtr::from_ptr(libdd_get_otel_thread_ctx_v1().cast::<*mut ThreadContextRecord>()) - }; + // valid for the lifetime of the current thread, and all accesses go through the + // `AtomicPtr` wrapper. + let slot = unsafe { AtomicPtr::from_ptr(tls_slot()) }; f(slot) } @@ -462,7 +513,7 @@ pub mod linux { } #[cfg(test)] - // The tests are set to be ignored by Miri, since accessing the TLS through C isn't supported. + // The tests are set to be ignored by Miri, since the inline-asm TLSDESC access isn't supported. mod tests { use super::{ThreadContext, ThreadContextRecord}; use std::sync::atomic::Ordering; @@ -672,7 +723,7 @@ pub mod linux { let _ = ThreadContext::detach(); } - // Make sure the C shim is indeed providing a thread-local address. + // Make sure the TLSDESC accessor is indeed providing a thread-local address. #[test] #[cfg_attr(miri, ignore)] fn tls_slots_are_per_thread() { diff --git a/libdd-otel-thread-ctx/src/tls_shim.c b/libdd-otel-thread-ctx/src/tls_shim.c deleted file mode 100644 index 0322967725..0000000000 --- a/libdd-otel-thread-ctx/src/tls_shim.c +++ /dev/null @@ -1,20 +0,0 @@ -// Copyright 2026-Present Datadog, Inc. https://www.datadoghq.com/ -// SPDX-License-Identifier: Apache-2.0 - -// Declares the thread-local pointer that external readers (e.g. the eBPF -// profiler) discover via the dynsym table. The Rust layer accesses this -// pointer in lib.rs. -// -// The variable is declared in C in order to use the TLSDESC dialect for -// thread-local storage, which is required by the OTel thread-level context -// sharing spec. Unfortunately, it's not possible to have Rust use this dialect -// as of today. -#include - -__attribute__((visibility("default"))) -__thread void *otel_thread_ctx_v1 = NULL; - -// Return the resolved address of the thread-local variable. -void **libdd_get_otel_thread_ctx_v1(void) { - return &otel_thread_ctx_v1; -} From e368a561585602665b1ef9536421f802c5597d29 Mon Sep 17 00:00:00 2001 From: Gustavo Lopes Date: Wed, 17 Jun 2026 11:47:32 +0100 Subject: [PATCH 2/4] ensure otel_thread_ctx_v1 is aligned Co-authored-by: Yann Hamdaoui --- libdd-otel-thread-ctx/src/lib.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/libdd-otel-thread-ctx/src/lib.rs b/libdd-otel-thread-ctx/src/lib.rs index 54417dddcf..35f6d8e398 100644 --- a/libdd-otel-thread-ctx/src/lib.rs +++ b/libdd-otel-thread-ctx/src/lib.rs @@ -91,6 +91,7 @@ pub mod linux { ".globl otel_thread_ctx_v1", ".type otel_thread_ctx_v1, @tls_object", ".size otel_thread_ctx_v1, 8", + ".balign 8", "otel_thread_ctx_v1:", ".zero 8", ".previous", From 1aa73e87a09834b852626a02584a7a713eabe56b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gustavo=20Andr=C3=A9=20dos=20Santos=20Lopes?= Date: Wed, 17 Jun 2026 16:47:48 +0100 Subject: [PATCH 3/4] add comment and test for relaxation --- Cargo.lock | 1 + libdd-otel-thread-ctx-ffi/Cargo.toml | 1 + libdd-otel-thread-ctx-ffi/build.rs | 3 + .../tests/elf_properties.rs | 593 +++++++++++++++++- libdd-otel-thread-ctx/src/lib.rs | 5 + 5 files changed, 593 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e4cccf76f7..e6e9f5339c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3192,6 +3192,7 @@ name = "libdd-otel-thread-ctx-ffi" version = "1.0.0" dependencies = [ "build_common", + "elf", "libdd-common-ffi", "libdd-otel-thread-ctx", ] diff --git a/libdd-otel-thread-ctx-ffi/Cargo.toml b/libdd-otel-thread-ctx-ffi/Cargo.toml index 1c0f288628..6d7a43f98d 100644 --- a/libdd-otel-thread-ctx-ffi/Cargo.toml +++ b/libdd-otel-thread-ctx-ffi/Cargo.toml @@ -24,6 +24,7 @@ cbindgen = ["build_common/cbindgen", "libdd-common-ffi/cbindgen"] sanity-check = ["dep:libdd-common-ffi", "libdd-otel-thread-ctx/sanity-check"] [dev-dependencies] +elf = "0.7" libdd-otel-thread-ctx = { path = "../libdd-otel-thread-ctx", features = ["sanity-check"] } [build-dependencies] diff --git a/libdd-otel-thread-ctx-ffi/build.rs b/libdd-otel-thread-ctx-ffi/build.rs index cbac605286..930272f09a 100644 --- a/libdd-otel-thread-ctx-ffi/build.rs +++ b/libdd-otel-thread-ctx-ffi/build.rs @@ -8,6 +8,9 @@ use std::env; fn main() { generate_and_configure_header("otel-thread-ctx.h"); + let cross_compiling = env::var("HOST").unwrap() != env::var("TARGET").unwrap(); + println!("cargo:rustc-env=LIBDD_OTEL_THREAD_CTX_FFI_CROSS_COMPILING={cross_compiling}"); + let target_os = env::var("CARGO_CFG_TARGET_OS").unwrap(); if target_os != "linux" { return; diff --git a/libdd-otel-thread-ctx-ffi/tests/elf_properties.rs b/libdd-otel-thread-ctx-ffi/tests/elf_properties.rs index 3281ca1d2d..633ac6514f 100644 --- a/libdd-otel-thread-ctx-ffi/tests/elf_properties.rs +++ b/libdd-otel-thread-ctx-ffi/tests/elf_properties.rs @@ -1,31 +1,604 @@ // Copyright 2026-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 -//! Verify ELF properties of the built cdylib on Linux. +//! Verify ELF properties of the built artifacts on Linux. //! -//! Delegates to [`libdd_otel_thread_ctx::autocheck::check_tls_slot_in`] which -//! checks that: +//! These tests check that: //! - `otel_thread_ctx_v1` is exported in the dynamic symbol table as a TLS GLOBAL symbol. -//! - `otel_thread_ctx_v1` follows the TLSDESC access model (if there's a relocation, it's a TLSDESC -//! one). +//! - `otel_thread_ctx_v1` follows the TLSDESC access model: if there is a relocation for it, it is +//! a TLSDESC relocation. +//! - A native executable that statically links libdd-otel-thread-ctx-ffi without exporting +//! `otel_thread_ctx_v1` has libdd's TLSDESC access relaxed to local-exec TLS, leaving no +//! relocation for `otel_thread_ctx_v1`. //! -//! The cdylib path is derived at runtime from the test executable location. -//! Both the test binary and the cdylib live in `target/<[triple/]profile>/deps/`. +//! Library artifact paths are derived at runtime from the test executable location. +//! The test binary and crate artifacts live in `target/<[triple/]profile>/deps/`. #![cfg(target_os = "linux")] -use std::path::PathBuf; +use std::{ + io::ErrorKind, + path::{Path, PathBuf}, + process::{Command, Stdio}, +}; -fn cdylib_path() -> PathBuf { +use elf::{abi, endian::AnyEndian, symbol::SymbolTable, ElfBytes}; + +const SYMBOL: &str = "otel_thread_ctx_v1"; + +fn deps_dir() -> PathBuf { + // test binary: target/<[triple/]profile>/deps/ let exe = std::env::current_exe().expect("failed to read current executable path"); exe.parent() .expect("unexpected test executable path structure") - .join("liblibdd_otel_thread_ctx_ffi.so") + .to_owned() +} + +fn artifact_path(name: &str) -> PathBuf { + deps_dir().join(name) +} + +fn cdylib_path() -> PathBuf { + artifact_path("liblibdd_otel_thread_ctx_ffi.so") +} + +fn staticlib_path() -> PathBuf { + artifact_path("liblibdd_otel_thread_ctx_ffi.a") +} + +fn check_readable(path: &Path) { + assert!( + std::fs::File::open(path).is_ok(), + "{} could not be opened for reading", + path.display() + ); +} + +fn tool_available(tool: &str) -> bool { + match Command::new(tool) + .arg("--version") + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status() + { + Ok(_) => true, + Err(e) if e.kind() == ErrorKind::NotFound => { + eprintln!("skipping test: required tool `{tool}` is not available"); + false + } + Err(e) => panic!("failed to check whether `{tool}` is available: {e}"), + } +} + +fn required_tools_available(tools: &[&str]) -> bool { + tools.iter().all(|tool| tool_available(tool)) +} + +fn native_target() -> bool { + let cross_compiling = option_env!("LIBDD_OTEL_THREAD_CTX_FFI_CROSS_COMPILING") == Some("true"); + if cross_compiling { + eprintln!("skipping test: cross-compiling"); + } + !cross_compiling +} + +fn command_output(command: &mut Command) -> String { + let out = command + .output() + .unwrap_or_else(|e| panic!("failed to run {command:?}: {e}")); + assert!( + out.status.success(), + "{command:?} failed with status {}\nstdout:\n{}\nstderr:\n{}", + out.status, + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + String::from_utf8_lossy(&out.stdout).into_owned() +} + +fn objdump(args: &[&str], path: &Path) -> String { + let mut command = Command::new("objdump"); + command.args(args).arg(path); + command_output(&mut command) +} + +fn assert_command_success(command: &mut Command) { + let out = command + .output() + .unwrap_or_else(|e| panic!("failed to run {command:?}: {e}")); + assert!( + out.status.success(), + "{command:?} failed with status {}\nstdout:\n{}\nstderr:\n{}", + out.status, + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); +} + +fn build_dir(name: &str) -> PathBuf { + let dir = deps_dir().join(format!("{name}-{}", std::process::id())); + let _ = std::fs::remove_dir_all(&dir); + std::fs::create_dir_all(&dir) + .unwrap_or_else(|e| panic!("failed to create {}: {e}", dir.display())); + dir +} + +fn parse_elf<'data>(data: &'data [u8], label: &str) -> ElfBytes<'data, AnyEndian> { + ElfBytes::::minimal_parse(data) + .unwrap_or_else(|e| panic!("failed to parse ELF data from {label}: {e}")) +} + +fn symbol_indexes_in_table( + elf: &ElfBytes<'_, AnyEndian>, + symtab_index: usize, + symbol: &str, + label: &str, +) -> Vec { + let Some(section_headers) = elf.section_headers() else { + panic!("{label} has no ELF section headers"); + }; + let symtab_header = section_headers + .get(symtab_index) + .unwrap_or_else(|e| panic!("failed to read symbol table header {symtab_index}: {e}")); + + // Relocation sections link to the symbol table they use; archive members usually use + // `.symtab`, while linked dynamic artifacts may use `.dynsym`. + if !matches!(symtab_header.sh_type, abi::SHT_SYMTAB | abi::SHT_DYNSYM) { + return Vec::new(); + } + + let strtab_header = section_headers + .get(symtab_header.sh_link as usize) + .unwrap_or_else(|e| panic!("failed to read linked string table header: {e}")); + let strtab = elf + .section_data_as_strtab(&strtab_header) + .unwrap_or_else(|e| panic!("failed to read linked string table in {label}: {e}")); + let (symtab_data, _) = elf + .section_data(&symtab_header) + .unwrap_or_else(|e| panic!("failed to read symbol table data in {label}: {e}")); + let symtab = SymbolTable::new(elf.ehdr.endianness, elf.ehdr.class, symtab_data); + + symtab + .iter() + .enumerate() + .filter_map(|(index, sym)| { + strtab + .get(sym.st_name as usize) + .ok() + .filter(|name| *name == symbol) + .map(|_| index as u32) + }) + .collect() +} + +fn relocation_types_for_symbol_in_elf(data: &[u8], symbol: &str, label: &str) -> Vec { + let elf = parse_elf(data, label); + let Some(section_headers) = elf.section_headers() else { + panic!("{label} has no ELF section headers"); + }; + let mut relocation_types = Vec::new(); + + for section_header in section_headers + .iter() + .filter(|shdr| matches!(shdr.sh_type, abi::SHT_REL | abi::SHT_RELA)) + { + let symbol_indexes = + symbol_indexes_in_table(&elf, section_header.sh_link as usize, symbol, label); + if symbol_indexes.is_empty() { + continue; + } + + match section_header.sh_type { + abi::SHT_REL => { + let rels = elf + .section_data_as_rels(§ion_header) + .unwrap_or_else(|e| panic!("failed to read REL relocations in {label}: {e}")); + relocation_types.extend( + rels.filter(|rel| symbol_indexes.contains(&rel.r_sym)) + .map(|rel| rel.r_type), + ); + } + abi::SHT_RELA => { + let relas = elf + .section_data_as_relas(§ion_header) + .unwrap_or_else(|e| panic!("failed to read RELA relocations in {label}: {e}")); + relocation_types.extend( + relas + .filter(|rela| symbol_indexes.contains(&rela.r_sym)) + .map(|rela| rela.r_type), + ); + } + _ => unreachable!(), + } + } + + relocation_types +} + +fn relocation_types_for_symbol_in_file(path: &Path, symbol: &str) -> Vec { + let data = + std::fs::read(path).unwrap_or_else(|e| panic!("failed to read {}: {e}", path.display())); + relocation_types_for_symbol_in_elf(&data, symbol, &path.display().to_string()) +} + +fn parse_ascii_usize(bytes: &[u8], what: &str) -> usize { + std::str::from_utf8(bytes) + .unwrap_or_else(|e| panic!("invalid UTF-8 in {what}: {e}")) + .trim() + .parse() + .unwrap_or_else(|e| panic!("failed to parse {what}: {e}")) +} + +fn trim_archive_name(bytes: &[u8]) -> String { + String::from_utf8_lossy(bytes) + .trim() + .trim_end_matches('/') + .to_owned() +} + +fn gnu_archive_name(name_table: &[u8], offset: usize) -> String { + assert!( + offset < name_table.len(), + "GNU archive name offset {offset} is outside the name table" + ); + let rest = &name_table[offset..]; + let end = rest.iter().position(|b| *b == b'\n').unwrap_or(rest.len()); + trim_archive_name(&rest[..end]) +} + +fn archive_member_name_and_data<'a>( + name_field: &[u8], + member: &'a [u8], + gnu_name_table: Option<&'a [u8]>, +) -> (String, &'a [u8]) { + let name = std::str::from_utf8(name_field) + .unwrap_or_else(|e| panic!("invalid UTF-8 in archive member name: {e}")) + .trim(); + + if matches!(name, "/" | "//") { + return (name.to_owned(), member); + } + + if let Some(name_len) = name.strip_prefix("#1/") { + let name_len = name_len + .parse::() + .unwrap_or_else(|e| panic!("failed to parse BSD archive name length: {e}")); + assert!( + name_len <= member.len(), + "BSD archive member name length {name_len} exceeds member data length {}", + member.len() + ); + return (trim_archive_name(&member[..name_len]), &member[name_len..]); + } + + if let Some(offset) = name.strip_prefix('/') { + if !offset.is_empty() && offset.bytes().all(|b| b.is_ascii_digit()) { + let offset = offset + .parse::() + .unwrap_or_else(|e| panic!("failed to parse GNU archive name offset: {e}")); + let gnu_name_table = + gnu_name_table.expect("GNU archive name offset used before the name table"); + return (gnu_archive_name(gnu_name_table, offset), member); + } + } + + (trim_archive_name(name_field), member) +} + +fn archive_relocation_types_for_symbol(path: &Path, symbol: &str) -> Vec<(String, Vec)> { + const ARMAG: &[u8] = b"!\n"; + const HEADER_LEN: usize = 60; + + let archive = + std::fs::read(path).unwrap_or_else(|e| panic!("failed to read {}: {e}", path.display())); + assert!( + archive.starts_with(ARMAG), + "{} is not an ar archive", + path.display() + ); + + let mut offset = ARMAG.len(); + let mut gnu_name_table = None; + let mut relocations = Vec::new(); + + while offset < archive.len() { + assert!( + offset + HEADER_LEN <= archive.len(), + "truncated ar header in {} at offset {offset}", + path.display() + ); + let header = &archive[offset..offset + HEADER_LEN]; + assert_eq!( + &header[58..60], + b"`\n", + "invalid ar header trailer in {} at offset {offset}", + path.display() + ); + offset += HEADER_LEN; + + let member_size = parse_ascii_usize(&header[48..58], "archive member size"); + let member_end = offset + .checked_add(member_size) + .expect("archive member end offset overflowed"); + assert!( + member_end <= archive.len(), + "truncated ar member in {} at offset {offset}", + path.display() + ); + + let member = &archive[offset..member_end]; + let (member_name, member_data) = + archive_member_name_and_data(&header[0..16], member, gnu_name_table); + + if member_name == "//" { + gnu_name_table = Some(member); + } else if member_data.starts_with(&abi::ELFMAGIC) { + let label = format!("{}({member_name})", path.display()); + let relocation_types = relocation_types_for_symbol_in_elf(member_data, symbol, &label); + if !relocation_types.is_empty() { + relocations.push((member_name.clone(), relocation_types)); + } + } + + offset = member_end + member_size % 2; + assert!( + offset <= archive.len(), + "truncated ar padding in {} after member {member_name}", + path.display() + ); + } + + relocations +} + +#[cfg(target_arch = "x86_64")] +fn is_tlsdesc_object_relocation(relocation_type: u32) -> bool { + // These are object-file TLSDESC relocations. `R_X86_64_TLSDESC` is the dynamic-linker + // relocation emitted after linking, so it is intentionally excluded here. + matches!( + relocation_type, + abi::R_X86_64_GOTPC32_TLSDESC | abi::R_X86_64_TLSDESC_CALL + ) +} + +#[cfg(target_arch = "aarch64")] +fn is_tlsdesc_object_relocation(relocation_type: u32) -> bool { + // These are object-file TLSDESC relocations. `R_AARCH64_TLSDESC` is the dynamic-linker + // relocation emitted after linking, so it is intentionally excluded here. + matches!( + relocation_type, + abi::R_AARCH64_TLSDESC_LD_PREL19 + | abi::R_AARCH64_TLSDESC_ADR_PREL21 + | abi::R_AARCH64_TLSDESC_ADR_PAGE21 + | abi::R_AARCH64_TLSDESC_LD64_LO12 + | abi::R_AARCH64_TLSDESC_ADD_LO12 + | abi::R_AARCH64_TLSDESC_OFF_G1 + | abi::R_AARCH64_TLSDESC_OFF_G0_NC + | abi::R_AARCH64_TLSDESC_LDR + | abi::R_AARCH64_TLSDESC_ADD + | abi::R_AARCH64_TLSDESC_CALL + ) +} + +fn format_relocations(relocations: &[(String, Vec)]) -> String { + if relocations.is_empty() { + return "".to_owned(); + } + + relocations + .iter() + .map(|(name, types)| format!("{name}: {types:?}")) + .collect::>() + .join("\n") +} + +fn is_disassembly_header_for(line: &str, name: &str) -> bool { + let Some((_, symbol)) = line.split_once('<') else { + return false; + }; + let Some(symbol) = symbol.strip_suffix(">:") else { + return false; + }; + symbol == name + || symbol + .strip_prefix(name) + .is_some_and(|suffix| suffix.starts_with("::")) +} + +fn disassembled_functions(output: &str, name: &str) -> Vec { + let mut functions = Vec::new(); + let mut current_function = Vec::new(); + + for line in output.lines() { + if is_disassembly_header_for(line, name) { + if !current_function.is_empty() { + functions.push(current_function.join("\n")); + current_function.clear(); + } + current_function.push(line); + continue; + } + + if !current_function.is_empty() { + if line.is_empty() { + functions.push(current_function.join("\n")); + current_function.clear(); + continue; + } + current_function.push(line); + } + } + + if !current_function.is_empty() { + functions.push(current_function.join("\n")); + } + + assert!( + !functions.is_empty(), + "could not find disassembly for {name} in:\n{output}" + ); + functions +} + +#[cfg(target_arch = "aarch64")] +fn disassembly_window_around_line( + function: &str, + needle: &str, + before: usize, + after: usize, +) -> String { + let lines = function.lines().collect::>(); + let line_index = lines + .iter() + .position(|line| line.contains(needle)) + .unwrap_or_else(|| panic!("could not find {needle:?} in:\n{function}")); + let start = line_index.saturating_sub(before); + let end = usize::min(line_index + after + 1, lines.len()); + lines[start..end].join("\n") } #[test] #[cfg_attr(miri, ignore)] fn otel_thread_ctx_v1_tls_properties() { let path = cdylib_path(); + check_readable(&path); libdd_otel_thread_ctx::sanity_check::check_tls_slot_in(&path).unwrap(); } + +#[test] +#[cfg_attr(miri, ignore)] +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] +fn statically_linked_executable_relaxes_libdd_tls_slot_to_local_exec() { + if !native_target() { + return; + } + + if !required_tools_available(&["cc", "objdump"]) { + return; + } + + let staticlib = staticlib_path(); + check_readable(&staticlib); + + let dir = build_dir("otel-thread-ctx-local-exec"); + let source = dir.join("consumer.c"); + let object = dir.join("consumer.o"); + let executable = dir.join("consumer"); + std::fs::write( + &source, + r#" +#include + +void ddog_otel_thread_ctx_update( + const uint8_t (*trace_id)[16], + const uint8_t (*span_id)[8], + const uint8_t (*local_root_span_id)[8]); +void *ddog_otel_thread_ctx_detach(void); +void ddog_otel_thread_ctx_free(void *ctx); + +int main(void) { + uint8_t trace_id[16] = {1}; + uint8_t span_id[8] = {2}; + uint8_t local_root_span_id[8] = {3}; + + ddog_otel_thread_ctx_update(&trace_id, &span_id, &local_root_span_id); + void *ctx = ddog_otel_thread_ctx_detach(); + ddog_otel_thread_ctx_free(ctx); + + return ctx == 0 ? 1 : 0; +} +"#, + ) + .unwrap_or_else(|e| panic!("failed to write {}: {e}", source.display())); + + let mut compile_object = Command::new("cc"); + compile_object.args(["-O2", "-ffunction-sections", "-fdata-sections"]); + compile_object.arg("-c").arg(&source).arg("-o").arg(&object); + assert_command_success(&mut compile_object); + + let staticlib_relocations = archive_relocation_types_for_symbol(&staticlib, SYMBOL); + assert!( + staticlib_relocations + .iter() + .any(|(_, types)| types.iter().any(|t| is_tlsdesc_object_relocation(*t))), + "expected an object-file TLSDESC relocation for {SYMBOL} in {}\nfound:\n{}", + staticlib.display(), + format_relocations(&staticlib_relocations) + ); + + let object_relocations = relocation_types_for_symbol_in_file(&object, SYMBOL); + assert!( + object_relocations.is_empty(), + "expected generated C object to have no relocations for {SYMBOL}; found {object_relocations:?}" + ); + + let mut link_executable = Command::new("cc"); + link_executable + .arg(&object) + .arg(&staticlib) + .args([ + "-Wl,--gc-sections", + "-lpthread", + "-ldl", + "-lm", + "-lrt", + "-lutil", + ]) + .arg("-o") + .arg(&executable); + assert_command_success(&mut link_executable); + + // Run the generated executable so the test validates the relaxed TLS access at runtime too. + let mut run_executable = Command::new(&executable); + assert_command_success(&mut run_executable); + + let executable_relocations = relocation_types_for_symbol_in_file(&executable, SYMBOL); + assert!( + executable_relocations.is_empty(), + "expected no remaining relocations for {SYMBOL} in {}; found {executable_relocations:?}", + executable.display() + ); + + let disassembly = objdump(&["-drwC"], &executable); + let tls_slot_functions = + disassembled_functions(&disassembly, "libdd_otel_thread_ctx::linux::with_tls_slot"); + + #[cfg(target_arch = "x86_64")] + { + assert!( + tls_slot_functions + .iter() + .any(|function| function.contains("%fs:0x0")), + "expected tls_slot() in libdd-otel-thread-ctx to be relaxed to local-exec x86-64 \ + TLS access through %fs:0x0\n{}", + tls_slot_functions.join("\n\n") + ); + assert!( + tls_slot_functions + .iter() + .all(|function| !function.contains("tlsdesc")), + "expected linker-relaxed local-exec TLS code without TLSDESC operands:\n{}", + tls_slot_functions.join("\n\n") + ); + } + + #[cfg(target_arch = "aarch64")] + { + let function = tls_slot_functions + .iter() + .find(|function| function.contains("tpidr_el0")) + .unwrap_or_else(|| { + panic!( + "expected tls_slot() in libdd-otel-thread-ctx to use tpidr_el0 after \ + relaxation\n{}", + tls_slot_functions.join("\n\n") + ) + }); + let window = disassembly_window_around_line(function, "tpidr_el0", 4, 3); + assert!( + !window.contains("tlsdesc") && !window.contains("\tblr"), + "expected linker-relaxed local-exec TLS code around tpidr_el0 without a TLSDESC call:\n\ + {window}" + ); + } +} diff --git a/libdd-otel-thread-ctx/src/lib.rs b/libdd-otel-thread-ctx/src/lib.rs index 35f6d8e398..b0fb2c8396 100644 --- a/libdd-otel-thread-ctx/src/lib.rs +++ b/libdd-otel-thread-ctx/src/lib.rs @@ -82,6 +82,11 @@ pub mod linux { // Stable `rustc` cannot select the TLS dialect for a `#[thread_local]` static, so we declare // the symbol directly in assembly (an 8-byte, zero-initialised slot in `.tbss`) and resolve // its per-thread address through TLSDESC in [`tls_slot`]. + // + // WARNING: keep the assembly below in the canonical compiler-emitted TLSDESC form. Linkers + // rely on these exact relocation-bearing instruction patterns for TLS relaxation, especially + // when this crate is linked statically. Harmless-looking rewrites can hide part of the sequence + // from the linker and produce a partially relaxed access that computes an invalid TLS address. #[cfg(all( target_os = "linux", any(target_arch = "x86_64", target_arch = "aarch64") From 109699fbd774a926cf9d5efaf2f5440a8261295a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gustavo=20Andr=C3=A9=20dos=20Santos=20Lopes?= Date: Thu, 18 Jun 2026 15:28:43 +0100 Subject: [PATCH 4/4] Compare inline assembly to what's generated by the toolchain --- Cargo.lock | 1 + libdd-otel-thread-ctx-ffi/Cargo.toml | 1 + .../tests/elf_properties.rs | 464 +++++++++++++----- libdd-otel-thread-ctx-ffi/tests/tls_shim.c | 8 + libdd-otel-thread-ctx/src/lib.rs | 9 +- 5 files changed, 347 insertions(+), 136 deletions(-) create mode 100644 libdd-otel-thread-ctx-ffi/tests/tls_shim.c diff --git a/Cargo.lock b/Cargo.lock index e6e9f5339c..c138d3378e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3195,6 +3195,7 @@ dependencies = [ "elf", "libdd-common-ffi", "libdd-otel-thread-ctx", + "object 0.36.5", ] [[package]] diff --git a/libdd-otel-thread-ctx-ffi/Cargo.toml b/libdd-otel-thread-ctx-ffi/Cargo.toml index 6d7a43f98d..00132acd89 100644 --- a/libdd-otel-thread-ctx-ffi/Cargo.toml +++ b/libdd-otel-thread-ctx-ffi/Cargo.toml @@ -26,6 +26,7 @@ sanity-check = ["dep:libdd-common-ffi", "libdd-otel-thread-ctx/sanity-check"] [dev-dependencies] elf = "0.7" libdd-otel-thread-ctx = { path = "../libdd-otel-thread-ctx", features = ["sanity-check"] } +object = { version = "0.36", default-features = false, features = ["archive", "read_core"] } [build-dependencies] build_common = { path = "../build-common" } diff --git a/libdd-otel-thread-ctx-ffi/tests/elf_properties.rs b/libdd-otel-thread-ctx-ffi/tests/elf_properties.rs index 633ac6514f..1eaa9a5008 100644 --- a/libdd-otel-thread-ctx-ffi/tests/elf_properties.rs +++ b/libdd-otel-thread-ctx-ffi/tests/elf_properties.rs @@ -17,14 +17,45 @@ #![cfg(target_os = "linux")] use std::{ + fmt, io::ErrorKind, path::{Path, PathBuf}, process::{Command, Stdio}, }; use elf::{abi, endian::AnyEndian, symbol::SymbolTable, ElfBytes}; +use object::read::archive::ArchiveFile; const SYMBOL: &str = "otel_thread_ctx_v1"; +const SKIP_TLS_SHIM_ASM_TEST_ENV: &str = "LIBDD_OTEL_THREAD_CTX_SKIP_TLS_SHIM_ASM_TEST"; + +#[derive(Clone, Copy, PartialEq, Eq)] +struct RelocationType(u32); + +impl fmt::Debug for RelocationType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.0.fmt(f) + } +} + +#[derive(Debug, PartialEq, Eq)] +struct TlsDescRelocation { + offset: usize, + relocation_type: RelocationType, + addend: i64, +} + +#[derive(Debug, PartialEq, Eq)] +struct TlsDescSequence { + bytes: Vec, + relocations: Vec, +} + +#[derive(Debug)] +struct ArchiveMemberRelocations { + member_name: String, + relocation_types: Vec, +} fn deps_dir() -> PathBuf { // test binary: target/<[triple/]profile>/deps/ @@ -82,6 +113,14 @@ fn native_target() -> bool { !cross_compiling } +fn skip_tls_shim_asm_test() -> bool { + let skip = std::env::var_os(SKIP_TLS_SHIM_ASM_TEST_ENV).is_some(); + if skip { + eprintln!("skipping test: {SKIP_TLS_SHIM_ASM_TEST_ENV} is set"); + } + skip +} + fn command_output(command: &mut Command) -> String { let out = command .output() @@ -171,7 +210,11 @@ fn symbol_indexes_in_table( .collect() } -fn relocation_types_for_symbol_in_elf(data: &[u8], symbol: &str, label: &str) -> Vec { +fn relocation_types_for_symbol_in_elf( + data: &[u8], + symbol: &str, + label: &str, +) -> Vec { let elf = parse_elf(data, label); let Some(section_headers) = elf.section_headers() else { panic!("{label} has no ELF section headers"); @@ -195,7 +238,7 @@ fn relocation_types_for_symbol_in_elf(data: &[u8], symbol: &str, label: &str) -> .unwrap_or_else(|e| panic!("failed to read REL relocations in {label}: {e}")); relocation_types.extend( rels.filter(|rel| symbol_indexes.contains(&rel.r_sym)) - .map(|rel| rel.r_type), + .map(|rel| RelocationType(rel.r_type)), ); } abi::SHT_RELA => { @@ -205,7 +248,7 @@ fn relocation_types_for_symbol_in_elf(data: &[u8], symbol: &str, label: &str) -> relocation_types.extend( relas .filter(|rela| symbol_indexes.contains(&rela.r_sym)) - .map(|rela| rela.r_type), + .map(|rela| RelocationType(rela.r_type)), ); } _ => unreachable!(), @@ -215,158 +258,74 @@ fn relocation_types_for_symbol_in_elf(data: &[u8], symbol: &str, label: &str) -> relocation_types } -fn relocation_types_for_symbol_in_file(path: &Path, symbol: &str) -> Vec { +fn relocation_types_for_symbol_in_file(path: &Path, symbol: &str) -> Vec { let data = std::fs::read(path).unwrap_or_else(|e| panic!("failed to read {}: {e}", path.display())); relocation_types_for_symbol_in_elf(&data, symbol, &path.display().to_string()) } -fn parse_ascii_usize(bytes: &[u8], what: &str) -> usize { - std::str::from_utf8(bytes) - .unwrap_or_else(|e| panic!("invalid UTF-8 in {what}: {e}")) - .trim() - .parse() - .unwrap_or_else(|e| panic!("failed to parse {what}: {e}")) -} - -fn trim_archive_name(bytes: &[u8]) -> String { - String::from_utf8_lossy(bytes) - .trim() - .trim_end_matches('/') - .to_owned() -} - -fn gnu_archive_name(name_table: &[u8], offset: usize) -> String { - assert!( - offset < name_table.len(), - "GNU archive name offset {offset} is outside the name table" - ); - let rest = &name_table[offset..]; - let end = rest.iter().position(|b| *b == b'\n').unwrap_or(rest.len()); - trim_archive_name(&rest[..end]) -} - -fn archive_member_name_and_data<'a>( - name_field: &[u8], - member: &'a [u8], - gnu_name_table: Option<&'a [u8]>, -) -> (String, &'a [u8]) { - let name = std::str::from_utf8(name_field) - .unwrap_or_else(|e| panic!("invalid UTF-8 in archive member name: {e}")) - .trim(); - - if matches!(name, "/" | "//") { - return (name.to_owned(), member); - } - - if let Some(name_len) = name.strip_prefix("#1/") { - let name_len = name_len - .parse::() - .unwrap_or_else(|e| panic!("failed to parse BSD archive name length: {e}")); - assert!( - name_len <= member.len(), - "BSD archive member name length {name_len} exceeds member data length {}", - member.len() - ); - return (trim_archive_name(&member[..name_len]), &member[name_len..]); - } +fn archive_relocation_types_for_symbol(path: &Path, symbol: &str) -> Vec { + let mut relocations = Vec::new(); - if let Some(offset) = name.strip_prefix('/') { - if !offset.is_empty() && offset.bytes().all(|b| b.is_ascii_digit()) { - let offset = offset - .parse::() - .unwrap_or_else(|e| panic!("failed to parse GNU archive name offset: {e}")); - let gnu_name_table = - gnu_name_table.expect("GNU archive name offset used before the name table"); - return (gnu_archive_name(gnu_name_table, offset), member); + for_each_archive_elf_member(path, |member_name, member_data| { + let label = format!("{}({member_name})", path.display()); + let relocation_types = relocation_types_for_symbol_in_elf(member_data, symbol, &label); + if !relocation_types.is_empty() { + relocations.push(ArchiveMemberRelocations { + member_name: member_name.to_owned(), + relocation_types, + }); } - } + }); - (trim_archive_name(name_field), member) + relocations } -fn archive_relocation_types_for_symbol(path: &Path, symbol: &str) -> Vec<(String, Vec)> { - const ARMAG: &[u8] = b"!\n"; - const HEADER_LEN: usize = 60; - - let archive = +fn for_each_archive_elf_member(path: &Path, mut f: impl FnMut(&str, &[u8])) { + let archive_data = std::fs::read(path).unwrap_or_else(|e| panic!("failed to read {}: {e}", path.display())); - assert!( - archive.starts_with(ARMAG), - "{} is not an ar archive", - path.display() - ); - - let mut offset = ARMAG.len(); - let mut gnu_name_table = None; - let mut relocations = Vec::new(); - - while offset < archive.len() { - assert!( - offset + HEADER_LEN <= archive.len(), - "truncated ar header in {} at offset {offset}", - path.display() - ); - let header = &archive[offset..offset + HEADER_LEN]; - assert_eq!( - &header[58..60], - b"`\n", - "invalid ar header trailer in {} at offset {offset}", - path.display() - ); - offset += HEADER_LEN; - - let member_size = parse_ascii_usize(&header[48..58], "archive member size"); - let member_end = offset - .checked_add(member_size) - .expect("archive member end offset overflowed"); - assert!( - member_end <= archive.len(), - "truncated ar member in {} at offset {offset}", - path.display() - ); - - let member = &archive[offset..member_end]; - let (member_name, member_data) = - archive_member_name_and_data(&header[0..16], member, gnu_name_table); - - if member_name == "//" { - gnu_name_table = Some(member); - } else if member_data.starts_with(&abi::ELFMAGIC) { - let label = format!("{}({member_name})", path.display()); - let relocation_types = relocation_types_for_symbol_in_elf(member_data, symbol, &label); - if !relocation_types.is_empty() { - relocations.push((member_name.clone(), relocation_types)); - } + let archive = ArchiveFile::parse(&*archive_data) + .unwrap_or_else(|e| panic!("failed to parse archive {}: {e}", path.display())); + + for member in archive.members() { + let member = + member.unwrap_or_else(|e| panic!("failed to read member in {}: {e}", path.display())); + let member_data = member.data(&*archive_data).unwrap_or_else(|e| { + panic!( + "failed to read member data for {} in {}: {e}", + String::from_utf8_lossy(member.name()), + path.display() + ) + }); + + if member_data.starts_with(&abi::ELFMAGIC) { + let member_name = std::str::from_utf8(member.name()).unwrap_or_else(|e| { + panic!( + "archive member name in {} is not valid UTF-8: {e}", + path.display() + ) + }); + f(member_name, member_data); } - - offset = member_end + member_size % 2; - assert!( - offset <= archive.len(), - "truncated ar padding in {} after member {member_name}", - path.display() - ); } - - relocations } #[cfg(target_arch = "x86_64")] -fn is_tlsdesc_object_relocation(relocation_type: u32) -> bool { +fn is_tlsdesc_object_relocation(relocation_type: RelocationType) -> bool { // These are object-file TLSDESC relocations. `R_X86_64_TLSDESC` is the dynamic-linker // relocation emitted after linking, so it is intentionally excluded here. matches!( - relocation_type, + relocation_type.0, abi::R_X86_64_GOTPC32_TLSDESC | abi::R_X86_64_TLSDESC_CALL ) } #[cfg(target_arch = "aarch64")] -fn is_tlsdesc_object_relocation(relocation_type: u32) -> bool { +fn is_tlsdesc_object_relocation(relocation_type: RelocationType) -> bool { // These are object-file TLSDESC relocations. `R_AARCH64_TLSDESC` is the dynamic-linker // relocation emitted after linking, so it is intentionally excluded here. matches!( - relocation_type, + relocation_type.0, abi::R_AARCH64_TLSDESC_LD_PREL19 | abi::R_AARCH64_TLSDESC_ADR_PREL21 | abi::R_AARCH64_TLSDESC_ADR_PAGE21 @@ -380,14 +339,207 @@ fn is_tlsdesc_object_relocation(relocation_type: u32) -> bool { ) } -fn format_relocations(relocations: &[(String, Vec)]) -> String { +#[derive(Debug)] +struct RawRelocation { + offset: u64, + relocation_type: RelocationType, + addend: i64, +} + +#[cfg(target_arch = "x86_64")] +const TLSDESC_RELOCATIONS_PER_ACCESS: usize = 2; + +#[cfg(target_arch = "aarch64")] +const TLSDESC_RELOCATIONS_PER_ACCESS: usize = 4; + +#[cfg(target_arch = "x86_64")] +fn tlsdesc_sequence_bounds(relocations: &[RawRelocation], section_len: usize) -> (usize, usize) { + let first_offset = usize::try_from(relocations[0].offset) + .expect("first relocation offset does not fit in usize"); + let call_offset = usize::try_from(relocations[1].offset) + .expect("call relocation offset does not fit in usize"); + let start = first_offset + .checked_sub(3) + .expect("x86-64 TLSDESC relocation offset is before the LEA instruction displacement"); + let end = call_offset + 11; + assert!( + end <= section_len, + "x86-64 TLSDESC sequence extends beyond section data" + ); + (start, end) +} + +#[cfg(target_arch = "aarch64")] +fn tlsdesc_sequence_bounds(relocations: &[RawRelocation], section_len: usize) -> (usize, usize) { + let first_offset = usize::try_from(relocations[0].offset) + .expect("first relocation offset does not fit in usize"); + let start = first_offset + .checked_sub(4) + .expect("AArch64 TLSDESC relocation offset is before the TPIDR_EL0 read"); + let last_offset = usize::try_from(relocations[relocations.len() - 1].offset) + .expect("last relocation offset does not fit in usize"); + let end = last_offset + 8; + assert!( + end <= section_len, + "AArch64 TLSDESC sequence extends beyond section data" + ); + (start, end) +} + +fn tlsdesc_sequence_from_relocations( + section_data: &[u8], + relocations: &[RawRelocation], +) -> TlsDescSequence { + let (start, end) = tlsdesc_sequence_bounds(relocations, section_data.len()); + TlsDescSequence { + bytes: section_data[start..end].to_vec(), + relocations: relocations + .iter() + .map(|relocation| TlsDescRelocation { + offset: usize::try_from(relocation.offset) + .expect("relocation offset does not fit in usize") + - start, + relocation_type: relocation.relocation_type, + addend: relocation.addend, + }) + .collect(), + } +} + +fn tlsdesc_sequences_for_symbol_in_elf( + data: &[u8], + symbol: &str, + label: &str, +) -> Vec { + let elf = parse_elf(data, label); + let Some(section_headers) = elf.section_headers() else { + panic!("{label} has no ELF section headers"); + }; + let mut sequences = Vec::new(); + + for section_header in section_headers + .iter() + .filter(|shdr| matches!(shdr.sh_type, abi::SHT_REL | abi::SHT_RELA)) + { + let symbol_indexes = + symbol_indexes_in_table(&elf, section_header.sh_link as usize, symbol, label); + if symbol_indexes.is_empty() { + continue; + } + + let target_header = section_headers + .get(section_header.sh_info as usize) + .unwrap_or_else(|e| panic!("failed to read relocation target section header: {e}")); + let (target_data, _) = elf + .section_data(&target_header) + .unwrap_or_else(|e| panic!("failed to read relocation target section in {label}: {e}")); + let mut relocations = Vec::new(); + + match section_header.sh_type { + abi::SHT_REL => { + let rels = elf + .section_data_as_rels(§ion_header) + .unwrap_or_else(|e| panic!("failed to read REL relocations in {label}: {e}")); + relocations.extend( + rels.filter(|rel| { + symbol_indexes.contains(&rel.r_sym) + && is_tlsdesc_object_relocation(RelocationType(rel.r_type)) + }) + .map(|rel| RawRelocation { + offset: rel.r_offset, + relocation_type: RelocationType(rel.r_type), + addend: 0, + }), + ); + } + abi::SHT_RELA => { + let relas = elf + .section_data_as_relas(§ion_header) + .unwrap_or_else(|e| panic!("failed to read RELA relocations in {label}: {e}")); + relocations.extend( + relas + .filter(|rela| { + symbol_indexes.contains(&rela.r_sym) + && is_tlsdesc_object_relocation(RelocationType(rela.r_type)) + }) + .map(|rela| RawRelocation { + offset: rela.r_offset, + relocation_type: RelocationType(rela.r_type), + addend: rela.r_addend, + }), + ); + } + _ => unreachable!(), + } + + relocations.sort_by_key(|relocation| relocation.offset); + assert!( + relocations.len() % TLSDESC_RELOCATIONS_PER_ACCESS == 0, + "expected TLSDESC relocations for {symbol} in {label} to come in groups of \ + {TLSDESC_RELOCATIONS_PER_ACCESS}; found {relocations:?}" + ); + + sequences.extend( + relocations + .chunks_exact(TLSDESC_RELOCATIONS_PER_ACCESS) + .map(|chunk| tlsdesc_sequence_from_relocations(target_data, chunk)), + ); + } + + sequences +} + +fn tlsdesc_sequences_for_symbol_in_file(path: &Path, symbol: &str) -> Vec { + let data = + std::fs::read(path).unwrap_or_else(|e| panic!("failed to read {}: {e}", path.display())); + tlsdesc_sequences_for_symbol_in_elf(&data, symbol, &path.display().to_string()) +} + +fn archive_tlsdesc_sequences_for_symbol( + path: &Path, + symbol: &str, +) -> Vec<(String, TlsDescSequence)> { + let mut sequences = Vec::new(); + + for_each_archive_elf_member(path, |member_name, member_data| { + let label = format!("{}({member_name})", path.display()); + sequences.extend( + tlsdesc_sequences_for_symbol_in_elf(member_data, symbol, &label) + .into_iter() + .map(|sequence| (member_name.to_owned(), sequence)), + ); + }); + + sequences +} + +fn compile_tls_shim_object(dir: &Path) -> PathBuf { + let source = Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/tls_shim.c"); + let object = dir.join("tls_shim.o"); + let mut compile_object = Command::new("cc"); + compile_object.args(["-O2", "-fPIC", "-fomit-frame-pointer", "-c"]); + + #[cfg(target_arch = "x86_64")] + compile_object.arg("-mtls-dialect=gnu2"); + + compile_object.arg(&source).arg("-o").arg(&object); + assert_command_success(&mut compile_object); + object +} + +fn format_relocations(relocations: &[ArchiveMemberRelocations]) -> String { if relocations.is_empty() { return "".to_owned(); } relocations .iter() - .map(|(name, types)| format!("{name}: {types:?}")) + .map(|relocations| { + format!( + "{}: {:?}", + relocations.member_name, relocations.relocation_types + ) + }) .collect::>() .join("\n") } @@ -457,6 +609,53 @@ fn disassembly_window_around_line( lines[start..end].join("\n") } +#[test] +#[cfg_attr(miri, ignore)] +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] +fn tlsdesc_inline_assembly_matches_c_compiler_sequence() { + if !native_target() || skip_tls_shim_asm_test() { + return; + } + + if !required_tools_available(&["cc"]) { + return; + } + + let staticlib = staticlib_path(); + check_readable(&staticlib); + + let dir = build_dir("otel-thread-ctx-tls-shim"); + let c_object = compile_tls_shim_object(&dir); + let c_sequences = tlsdesc_sequences_for_symbol_in_file(&c_object, SYMBOL); + assert_eq!( + c_sequences.len(), + 1, + "expected one compiler-generated TLSDESC access in {}; found {c_sequences:?}. \ + Set {SKIP_TLS_SHIM_ASM_TEST_ENV}=1 to skip this guard with a different local compiler.", + c_object.display() + ); + let expected = &c_sequences[0]; + + let rust_sequences = archive_tlsdesc_sequences_for_symbol(&staticlib, SYMBOL); + assert!( + !rust_sequences.is_empty(), + "expected at least one Rust inline-asm TLSDESC access for {SYMBOL} in {}", + staticlib.display() + ); + + for (member_name, sequence) in rust_sequences { + assert_eq!( + &sequence, + expected, + "Rust inline assembly TLSDESC sequence in {}({member_name}) does not match \ + compiler output from {}. Set {SKIP_TLS_SHIM_ASM_TEST_ENV}=1 to skip this guard with \ + a different local compiler.", + staticlib.display(), + c_object.display() + ); + } +} + #[test] #[cfg_attr(miri, ignore)] fn otel_thread_ctx_v1_tls_properties() { @@ -518,9 +717,10 @@ int main(void) { let staticlib_relocations = archive_relocation_types_for_symbol(&staticlib, SYMBOL); assert!( - staticlib_relocations + staticlib_relocations.iter().any(|relocations| relocations + .relocation_types .iter() - .any(|(_, types)| types.iter().any(|t| is_tlsdesc_object_relocation(*t))), + .any(|t| is_tlsdesc_object_relocation(*t))), "expected an object-file TLSDESC relocation for {SYMBOL} in {}\nfound:\n{}", staticlib.display(), format_relocations(&staticlib_relocations) diff --git a/libdd-otel-thread-ctx-ffi/tests/tls_shim.c b/libdd-otel-thread-ctx-ffi/tests/tls_shim.c new file mode 100644 index 0000000000..cf31f150a5 --- /dev/null +++ b/libdd-otel-thread-ctx-ffi/tests/tls_shim.c @@ -0,0 +1,8 @@ +// Copyright 2026-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +extern __thread void *otel_thread_ctx_v1 __attribute__((tls_model("global-dynamic"))); + +__attribute__((noinline)) void **tls_slot_from_c(void) { + return &otel_thread_ctx_v1; +} diff --git a/libdd-otel-thread-ctx/src/lib.rs b/libdd-otel-thread-ctx/src/lib.rs index b0fb2c8396..756d8c7a48 100644 --- a/libdd-otel-thread-ctx/src/lib.rs +++ b/libdd-otel-thread-ctx/src/lib.rs @@ -125,13 +125,14 @@ pub mod linux { unsafe fn tls_slot() -> *mut *mut ThreadContextRecord { let ptr: usize; core::arch::asm!( + "mrs x1, tpidr_el0", "adrp x0, :tlsdesc:otel_thread_ctx_v1", - "ldr x1, [x0, :tlsdesc_lo12:otel_thread_ctx_v1]", + "ldr x2, [x0, :tlsdesc_lo12:otel_thread_ctx_v1]", "add x0, x0, :tlsdesc_lo12:otel_thread_ctx_v1", ".tlsdesccall otel_thread_ctx_v1", - "blr x1", - "mrs x2, tpidr_el0", - "add x0, x0, x2", + // x1 is guaranteed not to be clobbered by the call + "blr x2", + "add x0, x1, x0", out("x0") ptr, out("x1") _, out("x2") _,