From 81fe4572d2f79ec8948ba8b02508bff166f62bb4 Mon Sep 17 00:00:00 2001 From: swananan Date: Sun, 31 May 2026 21:38:45 +0800 Subject: [PATCH] fix: cache only verified kernel capabilities --- ghostscope-loader/src/kernel_caps.rs | 314 +++++++++++++++++++++------ ghostscope/src/main.rs | 2 +- 2 files changed, 247 insertions(+), 69 deletions(-) diff --git a/ghostscope-loader/src/kernel_caps.rs b/ghostscope-loader/src/kernel_caps.rs index d322aee..0bfa191 100644 --- a/ghostscope-loader/src/kernel_caps.rs +++ b/ghostscope-loader/src/kernel_caps.rs @@ -6,8 +6,42 @@ use aya::{ use std::{fmt, sync::OnceLock}; use tracing::{error, info, warn}; -/// Global kernel capabilities cache -static KERNEL_CAPS: OnceLock> = OnceLock::new(); +/// Global cache for complete, hardware-backed kernel capability probes. +static KERNEL_CAPS: KernelCapabilityCache = KernelCapabilityCache::new(); + +#[derive(Debug)] +struct KernelCapabilityCache { + full: OnceLock, +} + +impl KernelCapabilityCache { + const fn new() -> Self { + Self { + full: OnceLock::new(), + } + } + + fn get_or_detect(&self, detect: F) -> Result + where + F: FnOnce() -> Result, + { + if let Some(capabilities) = self.full.get() { + return Ok(*capabilities); + } + + let detection = detect()?; + if detection.cacheable { + let _ = self.full.set(detection.capabilities); + if let Some(capabilities) = self.full.get() { + return Ok(*capabilities); + } + } else { + warn!("Kernel capability probe used fallback values; not caching this result"); + } + + Ok(detection.capabilities) + } +} #[derive(Debug, Clone)] pub struct KernelCapabilityError { @@ -31,7 +65,7 @@ impl fmt::Display for KernelCapabilityError { impl std::error::Error for KernelCapabilityError {} /// Kernel eBPF capabilities detection -#[derive(Debug, Clone, Copy)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct KernelCapabilities { /// Whether the kernel supports BPF_MAP_TYPE_RINGBUF (requires >= 5.8) pub supports_ringbuf: bool, @@ -44,51 +78,21 @@ pub struct KernelCapabilities { impl KernelCapabilities { /// Detect kernel capabilities for process startup, including startup-oriented logs and /// user-facing error context. - pub fn detect_for_startup( - force_perf_event_array: bool, - ) -> Result<&'static Self, KernelCapabilityError> { - let capabilities = if force_perf_event_array { - warn!("⚠️ TESTING MODE: force_perf_event_array=true - will use PerfEventArray"); - Self::get_perf_only().map_err(|err| { - KernelCapabilityError::new(format!( - "{err}\nGhostScope requires Linux kernel >= 4.3 with PerfEventArray enabled." - )) - })? - } else { - Self::get().map_err(|err| { - KernelCapabilityError::new(format!( - "{err}\nHint: ensure CONFIG_BPF, CONFIG_BPF_SYSCALL and CONFIG_UPROBE_EVENTS are enabled in your kernel." - )) - })? - }; - - info!( - "Kernel eBPF startup summary: ringbuf_supported={} perf_event_array_supported={} helper_ns_current_pid_tgid={}", - capabilities.supports_ringbuf, - capabilities.supports_perf_event_array, - capabilities.supports_ns_current_pid_tgid_helper - ); - - Ok(capabilities) + pub fn detect_for_startup(force_perf_event_array: bool) -> Result { + detect_for_startup_with_detectors(force_perf_event_array, Self::get, Self::get_perf_only) } - /// Get global kernel capabilities (detected once on first call) - /// Returns an error if neither RingBuf nor PerfEventArray is supported - pub fn get() -> Result<&'static Self, KernelCapabilityError> { - match KERNEL_CAPS.get_or_init(detect_full_capabilities) { - Ok(capabilities) => Ok(capabilities), - Err(err) => Err(err.clone()), - } + /// Get global kernel capabilities (detected once on first cacheable call) + /// Returns an error if neither RingBuf nor PerfEventArray support can be verified. + pub fn get() -> Result { + KERNEL_CAPS.get_or_detect(detect_full_capabilities) } - /// Get kernel capabilities with PerfEventArray-only detection (for testing mode) - /// Skips RingBuf detection and only validates PerfEventArray support - /// Returns an error if PerfEventArray is not supported - pub fn get_perf_only() -> Result<&'static Self, KernelCapabilityError> { - match KERNEL_CAPS.get_or_init(detect_perf_only_capabilities) { - Ok(capabilities) => Ok(capabilities), - Err(err) => Err(err.clone()), - } + /// Detect kernel capabilities with PerfEventArray-only startup semantics. + /// This intentionally bypasses the global cache because force-perf mode is a + /// runtime policy override, not the kernel's complete hardware capability set. + pub fn get_perf_only() -> Result { + detect_perf_only_capabilities() } /// Check if RingBuf is supported (convenience method) @@ -113,21 +117,91 @@ impl KernelCapabilities { } } -fn detect_full_capabilities() -> Result { +fn detect_for_startup_with_detectors( + force_perf_event_array: bool, + detect_full: F, + detect_perf_only: P, +) -> Result +where + F: FnOnce() -> Result, + P: FnOnce() -> Result, +{ + let capabilities = if force_perf_event_array { + warn!("⚠️ TESTING MODE: force_perf_event_array=true - will use PerfEventArray"); + detect_perf_only().map_err(|err| { + KernelCapabilityError::new(format!( + "{err}\nGhostScope requires Linux kernel >= 4.3 with PerfEventArray enabled." + )) + })? + } else { + detect_full().map_err(|err| { + KernelCapabilityError::new(format!( + "{err}\nHint: ensure CONFIG_BPF, CONFIG_BPF_SYSCALL and CONFIG_UPROBE_EVENTS are enabled in your kernel." + )) + })? + }; + + info!( + "Kernel eBPF startup summary: ringbuf_supported={} perf_event_array_supported={} helper_ns_current_pid_tgid={}", + capabilities.supports_ringbuf, + capabilities.supports_perf_event_array, + capabilities.supports_ns_current_pid_tgid_helper + ); + + Ok(capabilities) +} + +#[derive(Debug, Clone, Copy)] +struct KernelCapabilityDetection { + capabilities: KernelCapabilities, + cacheable: bool, +} + +#[derive(Debug, Clone, Copy)] +struct CapabilityProbe { + supported: bool, + cacheable: bool, +} + +impl CapabilityProbe { + fn cacheable(supported: bool) -> Self { + Self { + supported, + cacheable: true, + } + } + + fn uncacheable_unsupported() -> Self { + Self { + supported: false, + cacheable: false, + } + } +} + +fn detect_full_capabilities() -> Result { let supports_ringbuf = detect_ringbuf_support(); - let supports_perf_event_array = if !supports_ringbuf { + let supports_perf_event_array = if !supports_ringbuf.supported { detect_perf_event_array_support() } else { - true + CapabilityProbe::cacheable(true) }; - if supports_ringbuf { + if supports_ringbuf.supported { info!("✓ Kernel supports RingBuf (>= 5.8)"); - } else if supports_perf_event_array { + } else if supports_perf_event_array.supported { warn!("⚠️ Kernel does not support RingBuf (< 5.8)"); warn!("⚠️ Will use PerfEventArray as fallback"); info!("✓ Kernel supports PerfEventArray (>= 4.3)"); } else { + if !supports_ringbuf.cacheable || !supports_perf_event_array.cacheable { + error!("❌ Unable to verify kernel eBPF event output support"); + return Err(KernelCapabilityError::new( + "Unable to verify RingBuf or PerfEventArray support because one or more \ + eBPF capability probes failed. Check privileges and kernel BPF settings.", + )); + } + error!("❌ Kernel supports neither RingBuf nor PerfEventArray"); error!("❌ GhostScope requires kernel >= 4.3 for eBPF event output"); error!("❌ Current kernel appears to be older or eBPF is disabled"); @@ -138,16 +212,21 @@ fn detect_full_capabilities() -> Result Result= 4.3 for eBPF event output"); return Err(KernelCapabilityError::new( @@ -167,7 +254,7 @@ fn detect_perf_only_capabilities() -> Result= 4.3)"); let supports_ns_current_pid_tgid_helper = detect_ns_current_pid_tgid_helper_support(); - if supports_ns_current_pid_tgid_helper { + if supports_ns_current_pid_tgid_helper.supported { info!("✓ Kernel supports helper bpf_get_ns_current_pid_tgid (id=120)"); } else { warn!("⚠️ Kernel does not support helper bpf_get_ns_current_pid_tgid (id=120)"); @@ -175,13 +262,13 @@ fn detect_perf_only_capabilities() -> Result bool { +fn detect_ringbuf_support() -> CapabilityProbe { detect_map_support( MapType::RingBuf, "RingBuf", @@ -190,7 +277,7 @@ fn detect_ringbuf_support() -> bool { } /// Detect PerfEventArray support by attempting to create a minimal map -fn detect_perf_event_array_support() -> bool { +fn detect_perf_event_array_support() -> CapabilityProbe { detect_map_support( MapType::PerfEventArray, "PerfEventArray", @@ -198,26 +285,30 @@ fn detect_perf_event_array_support() -> bool { ) } -fn detect_map_support(map_type: MapType, label: &str, unsupported_context: &str) -> bool { +fn detect_map_support( + map_type: MapType, + label: &str, + unsupported_context: &str, +) -> CapabilityProbe { info!("Probing kernel {label} support via aya::sys::is_map_supported..."); match is_map_supported(map_type) { Ok(true) => { info!("{label} map support probe succeeded - {label} is supported"); - true + CapabilityProbe::cacheable(true) } Ok(false) => { info!("{label} map support probe reported unsupported ({unsupported_context})"); - false + CapabilityProbe::cacheable(false) } Err(err) => { warn!("{label} map support probe failed unexpectedly: {err}"); - false + CapabilityProbe::uncacheable_unsupported() } } } -fn detect_ns_current_pid_tgid_helper_support() -> bool { +fn detect_ns_current_pid_tgid_helper_support() -> CapabilityProbe { info!( "Probing kernel bpf_get_ns_current_pid_tgid helper support via aya::sys::is_helper_supported..." ); @@ -230,15 +321,102 @@ fn detect_ns_current_pid_tgid_helper_support() -> bool { info!( "bpf_get_ns_current_pid_tgid helper support probe succeeded - helper is supported" ); - true + CapabilityProbe::cacheable(true) } Ok(false) => { info!("bpf_get_ns_current_pid_tgid helper support probe reported unsupported"); - false + CapabilityProbe::cacheable(false) } Err(err) => { warn!("bpf_get_ns_current_pid_tgid helper support probe failed unexpectedly: {err}"); - false + CapabilityProbe::uncacheable_unsupported() + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn caps( + supports_ringbuf: bool, + supports_perf_event_array: bool, + supports_ns_current_pid_tgid_helper: bool, + ) -> KernelCapabilities { + KernelCapabilities { + supports_ringbuf, + supports_perf_event_array, + supports_ns_current_pid_tgid_helper, } } + + fn detection(capabilities: KernelCapabilities, cacheable: bool) -> KernelCapabilityDetection { + KernelCapabilityDetection { + capabilities, + cacheable, + } + } + + #[test] + fn forced_perf_startup_does_not_populate_full_capabilities_cache() { + let cache = KernelCapabilityCache::new(); + let perf_only_caps = caps(false, true, true); + let full_caps = caps(true, true, true); + + let forced = detect_for_startup_with_detectors( + true, + || -> Result { + panic!("full detector should not run for forced perf startup") + }, + || Ok(perf_only_caps), + ) + .expect("forced perf startup detection"); + + assert_eq!(forced, perf_only_caps); + + let normal = detect_for_startup_with_detectors( + false, + || cache.get_or_detect(|| Ok(detection(full_caps, true))), + || -> Result { + panic!("perf-only detector should not run for normal startup") + }, + ) + .expect("normal startup detection"); + + assert_eq!(normal, full_caps); + assert_eq!( + cache + .get_or_detect(|| { + panic!("full detector should not rerun after cacheable detection") + }) + .expect("cached full capabilities"), + full_caps + ); + } + + #[test] + fn uncacheable_full_probe_result_is_not_cached() { + let cache = KernelCapabilityCache::new(); + let uncacheable_caps = caps(false, true, false); + let cacheable_caps = caps(true, true, true); + + let first = cache + .get_or_detect(|| Ok(detection(uncacheable_caps, false))) + .expect("uncacheable startup result"); + assert_eq!(first, uncacheable_caps); + + let second = cache + .get_or_detect(|| Ok(detection(cacheable_caps, true))) + .expect("cacheable startup result"); + assert_eq!(second, cacheable_caps); + + assert_eq!( + cache + .get_or_detect(|| { + panic!("full detector should not rerun after cacheable detection") + }) + .expect("cached full capabilities"), + cacheable_caps + ); + } } diff --git a/ghostscope/src/main.rs b/ghostscope/src/main.rs index e8a637f..a9731b7 100644 --- a/ghostscope/src/main.rs +++ b/ghostscope/src/main.rs @@ -52,7 +52,7 @@ async fn main() -> Result<()> { // Dry-run does not attach uprobes, but it still validates the same eBPF // privileges and kernel capabilities as a real run. crate::util::ensure_privileges(); - let kernel_caps = *ghostscope_loader::KernelCapabilities::detect_for_startup( + let kernel_caps = ghostscope_loader::KernelCapabilities::detect_for_startup( user_config.ebpf_config.force_perf_event_array, )?; let resolved_config = config::ResolvedConfig::resolve(user_config, &kernel_caps)?;