diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4e8cb55..9c0194f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -20,15 +20,15 @@ option(ENABLE_WARNINGS "Enable warnings" OFF)
 
 # Check for TBB
 if(NOT MSVC AND NOT DISABLE_PAR)
-    find_package(TBB QUIET)
-    if(TBB_FOUND)
-        message(STATUS "TBB found. Enabling parallel execution.")
-    else()
-        message(STATUS "TBB not found. Disabling parallel execution.")
-        set(DISABLE_PAR ON)
-    endif()
+  find_package(TBB QUIET)
+  if(TBB_FOUND)
+    message(STATUS "TBB found. Enabling parallel execution.")
+  else()
+    message(STATUS "TBB not found. Disabling parallel execution.")
+    set(DISABLE_PAR ON)
+  endif()
 elseif(DISABLE_PAR)
-    message(STATUS "DISABLE_PAR set. Disabling parallel execution.")
+  message(STATUS "DISABLE_PAR set. Disabling parallel execution.")
 endif()
 
 # Create the ctrack library
@@ -40,41 +40,41 @@ target_include_directories(ctrack INTERFACE
 
 # Configure ctrack based on TBB availability
 if(DISABLE_PAR)
-    target_compile_definitions(ctrack INTERFACE CTRACK_DISABLE_EXECUTION_POLICY)
+  target_compile_definitions(ctrack INTERFACE CTRACK_DISABLE_EXECUTION_POLICY)
 elseif(NOT MSVC AND TBB_FOUND)
-    target_link_libraries(ctrack INTERFACE TBB::tbb)
+  target_link_libraries(ctrack INTERFACE TBB::tbb)
 endif()
 
 set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 if(ENABLE_WARNINGS)
-    if (NOT MSVC)
-        include(cmake/add_warning.cmake)
-        include(cmake/warnings.cmake)
-    endif()
+  if (NOT MSVC)
+    include(cmake/add_warning.cmake)
+    include(cmake/warnings.cmake)
+  endif()
 endif()
 
 # Add the examples subdirectory if not disabled
 if(NOT DISABLE_EXAMPLES)
-    add_subdirectory(examples)
+  add_subdirectory(examples)
 else()
-    message(STATUS "Building examples disabled.")
+  message(STATUS "Building examples disabled.")
 endif()
 
 # Add the benchmark subdirectory if enabled
 if(BUILD_BENCHMARK)
-    add_subdirectory(benchmark)
-    message(STATUS "Building benchmark enabled.")
+  add_subdirectory(benchmark)
+  message(STATUS "Building benchmark enabled.")
 else()
-    message(STATUS "Building benchmark disabled.")
+  message(STATUS "Building benchmark disabled.")
 endif()
 
 # Add the test subdirectory if enabled
 if(BUILD_TESTS)
-    add_subdirectory(test)
-    enable_testing()
-    message(STATUS "Building tests enabled.")
+  add_subdirectory(test)
+  enable_testing()
+  message(STATUS "Building tests enabled.")
 else()
-    message(STATUS "Building tests disabled.")
+  message(STATUS "Building tests disabled.")
 endif()
 
 # Installation
@@ -109,4 +109,4 @@ install(FILES
     "${CMAKE_CURRENT_BINARY_DIR}/ctrackConfig.cmake"
     "${CMAKE_CURRENT_BINARY_DIR}/ctrackConfigVersion.cmake"
     DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/ctrack
-)
\ No newline at end of file
+)
diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt
index 1e014d6..9d04305 100644
--- a/benchmark/CMakeLists.txt
+++ b/benchmark/CMakeLists.txt
@@ -1,24 +1,29 @@
-add_executable(ctrack_benchmark ctrack_benchmark.cpp)
-target_link_libraries(ctrack_benchmark PRIVATE ctrack)
-
-# Enable threading support
-set(THREADS_PREFER_PTHREAD_FLAG ON)
-find_package(Threads REQUIRED)
-target_link_libraries(ctrack_benchmark PRIVATE Threads::Threads)
-
-# Add filesystem library if needed (for older compilers)
-if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+# Helper macro — avoids repetition
+macro(add_ctrack_benchmark target_name clock_define)
+  add_executable(${target_name} ctrack_benchmark.cpp)
+  target_link_libraries(${target_name} PRIVATE ctrack)
+  target_compile_options(${target_name} PRIVATE -O3)
+  if(NOT "${clock_define}" STREQUAL "")
+    target_compile_definitions(${target_name} PRIVATE ${clock_define})
+  endif()
+  set(THREADS_PREFER_PTHREAD_FLAG ON)
+  find_package(Threads REQUIRED)
+  target_link_libraries(${target_name} PRIVATE Threads::Threads)
+  if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
     if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 9.0)
-        target_link_libraries(ctrack_benchmark PRIVATE stdc++fs)
+      target_link_libraries(${target_name} PRIVATE stdc++fs)
     endif()
-elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
+  elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
     if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 9.0)
-        target_link_libraries(ctrack_benchmark PRIVATE c++fs)
+      target_link_libraries(${target_name} PRIVATE c++fs)
     endif()
-endif()
-
-# Set output directory
-set_target_properties(ctrack_benchmark
-    PROPERTIES
+  endif()
+  set_target_properties(${target_name} PROPERTIES
     RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/benchmark"
-)
\ No newline at end of file
+  )
+endmacro()
+
+add_ctrack_benchmark(ctrack_benchmark "")
+add_ctrack_benchmark(ctrack_benchmark_rdtsc CTRACK_CLOCK_RDTSC)
+add_ctrack_benchmark(ctrack_benchmark_rdtscp CTRACK_CLOCK_RDTSCP)
+add_ctrack_benchmark(ctrack_benchmark_rdtscp_lfence  CTRACK_CLOCK_RDTSCP_LFENCE)
diff --git a/benchmark/bench_results.svg b/benchmark/bench_results.svg
new file mode 100644
index 0000000..36068fa
--- /dev/null
+++ b/benchmark/bench_results.svg
@@ -0,0 +1,114 @@
+<svg width="680" height="490" viewBox="0 0 680 490" role="img" xmlns="http://www.w3.org/2000/svg" font-family="system-ui, sans-serif">
+
+<!-- ===== Metadata ===== -->
+<title>Timer variant benchmark: accuracy error vs overhead</title>
+<desc>Scatter plot of 4 timer variants. X: accuracy error %, Y: overhead %. Lower is better on both axes.</desc>
+
+<!-- ===== Styling ===== -->
+<style>
+  text { fill: #3d3d3a; font-size: 12px; }
+  .label { font-weight: 600; font-size: 13px; }
+  @media (prefers-color-scheme: dark) { text { fill: #c2c0b6; } }
+</style>
+
+<!-- ===== Grid lines (vertical: X-axis reference) ===== -->
+<line x1="181" y1="60" x2="181" y2="400" stroke="#888" stroke-width="0.5" opacity="0.15"/>
+<line x1="253" y1="60" x2="253" y2="400" stroke="#888" stroke-width="0.5" opacity="0.15"/>
+<line x1="324" y1="60" x2="324" y2="400" stroke="#888" stroke-width="0.5" opacity="0.15"/>
+<line x1="396" y1="60" x2="396" y2="400" stroke="#888" stroke-width="0.5" opacity="0.15"/>
+<line x1="467" y1="60" x2="467" y2="400" stroke="#888" stroke-width="0.5" opacity="0.15"/>
+<line x1="539" y1="60" x2="539" y2="400" stroke="#888" stroke-width="0.5" opacity="0.15"/>
+
+<!-- ===== Grid lines (horizontal: Y-axis reference) ===== -->
+<line x1="110" y1="315" x2="610" y2="315" stroke="#888" stroke-width="0.5" opacity="0.15"/>
+<line x1="110" y1="230" x2="610" y2="230" stroke="#888" stroke-width="0.5" opacity="0.15"/>
+<line x1="110" y1="145" x2="610" y2="145" stroke="#888" stroke-width="0.5" opacity="0.15"/>
+
+<!-- ===== Axes ===== -->
+<line x1="110" y1="400" x2="610" y2="400" stroke="#888" stroke-width="1.5" opacity="0.5"/>
+<line x1="110" y1="60"  x2="110" y2="400" stroke="#888" stroke-width="1.5" opacity="0.5"/>
+
+<!-- ===== Axis labels ===== -->
+<!-- Y axis (rotated) -->
+<text text-anchor="middle" font-size="12" font-weight="600"
+      transform="translate(66, 230) rotate(-90)">
+  overhead % (lower → better)
+</text>
+
+<!-- X axis -->
+<text x="360" y="442" text-anchor="middle" font-size="12">
+  accuracy error % (lower → better)
+</text>
+
+<!-- ===== X-axis ticks and labels ===== -->
+<line x1="110" y1="400" x2="110" y2="406" stroke="#888"/>
+<line x1="181" y1="400" x2="181" y2="406" stroke="#888"/>
+<line x1="253" y1="400" x2="253" y2="406" stroke="#888"/>
+<line x1="324" y1="400" x2="324" y2="406" stroke="#888"/>
+<line x1="396" y1="400" x2="396" y2="406" stroke="#888"/>
+<line x1="467" y1="400" x2="467" y2="406" stroke="#888"/>
+<line x1="539" y1="400" x2="539" y2="406" stroke="#888"/>
+<line x1="610" y1="400" x2="610" y2="406" stroke="#888"/>
+
+<text x="110" y="419" text-anchor="middle">0%</text>
+<text x="181" y="419" text-anchor="middle">2%</text>
+<text x="253" y="419" text-anchor="middle">4%</text>
+<text x="324" y="419" text-anchor="middle">6%</text>
+<text x="396" y="419" text-anchor="middle">8%</text>
+<text x="467" y="419" text-anchor="middle">10%</text>
+<text x="539" y="419" text-anchor="middle">12%</text>
+<text x="610" y="419" text-anchor="middle">14%</text>
+
+<!-- ===== Y-axis ticks and labels ===== -->
+<line x1="110" y1="400" x2="104" y2="400" stroke="#888"/>
+<line x1="110" y1="315" x2="104" y2="315" stroke="#888"/>
+<line x1="110" y1="230" x2="104" y2="230" stroke="#888"/>
+<line x1="110" y1="145" x2="104" y2="145" stroke="#888"/>
+<line x1="110" y1="60"  x2="104" y2="60"  stroke="#888"/>
+
+<text x="98" y="404" text-anchor="end">0%</text>
+<text x="98" y="319" text-anchor="end">5%</text>
+<text x="98" y="234" text-anchor="end">10%</text>
+<text x="98" y="149" text-anchor="end">15%</text>
+<text x="98" y="64"  text-anchor="end">20%</text>
+
+<!-- ===== Data points (scatter plot) ===== -->
+
+<!-- chrono -->
+<circle cx="569" cy="98" r="9" fill="#D85A30" opacity="0.9"/>
+<text x="555" y="84" text-anchor="end" class="label">chrono</text>
+<text x="555" y="98" text-anchor="end" opacity="0.7">
+  12.84% err · 17.79% ovhd
+</text>
+
+<!-- RDTSC -->
+<circle cx="319" cy="255" r="9" fill="#BA7517" opacity="0.9"/>
+<text x="333" y="249" class="label">RDTSC</text>
+<text x="333" y="263" opacity="0.7">
+  5.85% err · 8.55% ovhd
+</text>
+
+<!-- RDTSCP -->
+<circle cx="166" cy="139" r="9" fill="#378ADD" opacity="0.9"/>
+<text x="180" y="130" class="label">RDTSCP</text>
+<text x="180" y="144" opacity="0.7">
+  1.57% err · 15.36% ovhd
+</text>
+
+<!-- RDTSCP + LFENCE (UPDATED VALUE) -->
+<circle cx="121" cy="65" r="9" fill="#3B6D11" opacity="0.9"/>
+<text x="135" y="57" class="label">RDTSCP + LFENCE</text>
+<text x="135" y="71" opacity="0.7">
+  0.31% err · 19.73% ovhd
+</text>
+
+<!-- ===== Legend (bottom) ===== -->
+<circle cx="112" cy="466" r="5" fill="#D85A30"/>
+<text x="122" y="470">chrono</text>
+<circle cx="200" cy="466" r="5" fill="#BA7517"/>
+<text x="210" y="470">RDTSC</text>
+<circle cx="286" cy="466" r="5" fill="#378ADD"/>
+<text x="296" y="470">RDTSCP</text>
+<circle cx="390" cy="466" r="5" fill="#3B6D11"/>
+<text x="400" y="470">RDTSCP + LFENCE</text>
+</svg>
diff --git a/benchmark/ctrack_benchmark.cpp b/benchmark/ctrack_benchmark.cpp
index e6ff4be..75c60f2 100644
--- a/benchmark/ctrack_benchmark.cpp
+++ b/benchmark/ctrack_benchmark.cpp
@@ -1,811 +1,870 @@
-#include <ctrack.hpp>
-#include <chrono>
-#include <thread>
-#include <vector>
-#include <atomic>
-#include <iostream>
-#include <fstream>
-#include <sstream>
-#include <iomanip>
-#include <cmath>
-#include <numeric>
-#include <algorithm>
-#include <filesystem>
-#include <cstring>
-#include <cstdio>
-
-#ifdef _WIN32
-#include <windows.h>
-#include <psapi.h>
-#else
-#include <sys/resource.h>
-#include <unistd.h>
-#endif
-
-// Configuration
-struct BenchmarkConfig
-{
-    size_t total_events = 50'000'000; // Default 50 million events
-    size_t thread_count = std::thread::hardware_concurrency();
-    bool record_baseline = false;
-    bool compare_baseline = false;
-    std::string baseline_file = "ctrack_baseline.json";
-    bool verbose = false;
-};
-
-// Baseline data structure
-struct BaselineData
-{
-    double accuracy_error_percent;
-    double accuracy_error_ms_per_event;
-    double overhead_percent;
-    double overhead_ms;
-    double overhead_ns_per_event;
-    double memory_bytes_per_event;
-    double calculation_time_ms;
-    double peak_calc_memory_mb;
-    size_t total_events;
-    size_t thread_count;
-    std::string timestamp;
-    std::string platform;
-};
-
-// Global config
-BenchmarkConfig g_config;
-
-// Get current memory usage in bytes
-size_t get_memory_usage()
-{
-#ifdef _WIN32
-    PROCESS_MEMORY_COUNTERS_EX pmc;
-    GetProcessMemoryInfo(GetCurrentProcess(), (PROCESS_MEMORY_COUNTERS *)&pmc, sizeof(pmc));
-    return pmc.WorkingSetSize;
-#else
-    struct rusage usage;
-    getrusage(RUSAGE_SELF, &usage);
-    return usage.ru_maxrss * 1024; // Convert KB to bytes on Linux
-#endif
-}
-
-// Precise busy wait function - waits for specified nanoseconds
-void busy_wait_ns(int64_t nanoseconds)
-{
-    auto start = std::chrono::high_resolution_clock::now();
-    auto target_duration = std::chrono::nanoseconds(nanoseconds);
-
-    while (true)
-    {
-        auto now = std::chrono::high_resolution_clock::now();
-        auto elapsed = now - start;
-        if (elapsed >= target_duration)
-        {
-            break;
-        }
-    }
-}
-
-// Benchmark functions with predictable timing
-void leaf_function(int depth)
-{
-    CTRACK_NAME("leaf_function");
-    // Busy wait for 1 microsecond (1000 ns)
-    busy_wait_ns(1000);
-}
-
-void level_3_function(int depth)
-{
-    CTRACK_NAME("level_3_function");
-    // Busy wait for 500 ns
-    busy_wait_ns(500);
-
-    // Call leaf function twice
-    leaf_function(depth + 1);
-    leaf_function(depth + 1);
-}
-
-void level_2_function(int depth, int iterations)
-{
-    CTRACK_NAME("level_2_function");
-    // Busy wait for 300 ns
-    busy_wait_ns(300);
-
-    for (int i = 0; i < iterations; ++i)
-    {
-        level_3_function(depth + 1);
-    }
-}
-
-void level_1_function(int iterations)
-{
-    CTRACK_NAME("level_1_function");
-    // Busy wait for 200 ns
-    busy_wait_ns(200);
-
-    level_2_function(1, iterations);
-}
-
-// Version without CTRACK for overhead measurement
-void leaf_function_no_track(int depth)
-{
-    busy_wait_ns(1000);
-}
-
-void level_3_function_no_track(int depth)
-{
-    busy_wait_ns(500);
-    leaf_function_no_track(depth + 1);
-    leaf_function_no_track(depth + 1);
-}
-
-void level_2_function_no_track(int depth, int iterations)
-{
-    busy_wait_ns(300);
-    for (int i = 0; i < iterations; ++i)
-    {
-        level_3_function_no_track(depth + 1);
-    }
-}
-
-void level_1_function_no_track(int iterations)
-{
-    busy_wait_ns(200);
-    level_2_function_no_track(1, iterations);
-}
-
-// Worker thread function
-void benchmark_worker(size_t events_per_thread, std::atomic<bool> &start_flag)
-{
-    // Wait for start signal
-    while (!start_flag.load())
-    {
-        std::this_thread::yield();
-    }
-
-    // Calculate iterations to reach target event count
-    // Each level_1 call generates: 1 + 1 + iterations * (1 + 2) events
-    // For iterations=10: 1 + 1 + 10 * 3 = 32 events per call
-    const int iterations = 10;
-    const int events_per_call = 2 + iterations * 3;
-    size_t calls_needed = events_per_thread / events_per_call;
-
-    for (size_t i = 0; i < calls_needed; ++i)
-    {
-        level_1_function(iterations);
-    }
-}
-
-// Worker thread function without tracking
-void benchmark_worker_no_track(size_t events_per_thread, std::atomic<bool> &start_flag)
-{
-    while (!start_flag.load())
-    {
-        std::this_thread::yield();
-    }
-
-    const int iterations = 10;
-    const int events_per_call = 2 + iterations * 3;
-    size_t calls_needed = events_per_thread / events_per_call;
-
-    for (size_t i = 0; i < calls_needed; ++i)
-    {
-        level_1_function_no_track(iterations);
-    }
-}
-
-// Parse timing from CTRACK results string for a specific function
-double parse_function_timing(const std::string &results, const std::string &function_name)
-{
-    // Look for the Details section first
-    size_t details_pos = results.find("Details");
-    if (details_pos == std::string::npos)
-    {
-        return -1.0; // Details section not found
-    }
-
-    // Look for the function name after the Details section
-    size_t func_pos = results.find(function_name, details_pos);
-    if (func_pos == std::string::npos)
-    {
-        return -1.0; // Function not found in Details section
-    }
-
-    // Find the line containing this function in the Details section
-    size_t line_start = results.rfind('\n', func_pos);
-    if (line_start == std::string::npos)
-        line_start = details_pos;
-    else
-        line_start++; // Skip the newline
-
-    size_t line_end = results.find('\n', func_pos);
-    if (line_end == std::string::npos)
-        line_end = results.length();
-
-    std::string line = results.substr(line_start, line_end - line_start);
-
-    // Look for the "time acc" column value (4th column after filename, function, line)
-    // Split by | and find the 4th field
-    std::vector<std::string> fields;
-    std::istringstream iss(line);
-    std::string field;
-
-    while (std::getline(iss, field, '|'))
-    {
-        // Trim whitespace
-        field.erase(0, field.find_first_not_of(" \t"));
-        field.erase(field.find_last_not_of(" \t") + 1);
-        if (!field.empty())
-        {
-            fields.push_back(field);
-        }
-    }
-
-    // The time acc should be in the 4th field (0-indexed: filename=0, function=1, line=2, time_acc=3)
-    if (fields.size() > 3)
-    {
-        std::string time_acc = fields[3];
-
-        // Parse value and unit from time_acc (e.g., "2.09 ms")
-        std::istringstream time_iss(time_acc);
-        double value;
-        std::string unit;
-
-        if (time_iss >> value >> unit)
-        {
-            // Convert to nanoseconds based on unit
-            if (unit == "s")
-                return value * 1e9;
-            else if (unit == "ms")
-                return value * 1e6;
-            else if (unit == "mcs")
-                return value * 1e3;
-            else if (unit == "ns")
-                return value;
-        }
-    }
-
-    return -1.0; // Could not parse
-}
-
-// Measure accuracy by comparing known timings with CTRACK measurements
-std::pair<double, double> measure_accuracy()
-{
-    std::cout << "\n=== Measuring Accuracy ===" << std::endl;
-
-    // Clear any previous tracking data by getting and discarding results
-    ctrack::result_as_string();
-
-    // Run a controlled test with known timings
-    const int test_iterations = 100;
-    for (int i = 0; i < test_iterations; ++i)
-    {
-        level_1_function(10);
-    }
-
-    // Get results
-    auto results = ctrack::result_as_string();
-
-    // Expected timings per iteration (in nanoseconds):
-    // leaf_function: 1000ns (called 20 times per iteration) = 20,000ns total per iteration
-    // level_3_function: 500ns + 2*1000ns = 2500ns (called 10 times per iteration) = 25,000ns total per iteration
-    // level_2_function: 300ns + 10*2500ns = 25,300ns (called 1 time per iteration) = 25,300ns total per iteration
-    // level_1_function: 200ns + 25,300ns = 25,500ns (called 1 time per iteration) = 25,500ns total per iteration
-
-    struct ExpectedTiming
-    {
-        std::string name;
-        double expected_total_ns;
-        int call_count;
-    };
-
-    std::vector<ExpectedTiming> expected_timings = {
-        {"leaf_function", 1000.0 * 20 * test_iterations, 20 * test_iterations},
-        {"level_3_function", 2500.0 * 10 * test_iterations, 10 * test_iterations},
-        {"level_2_function", 25300.0 * 1 * test_iterations, 1 * test_iterations},
-        {"level_1_function", 25500.0 * 1 * test_iterations, 1 * test_iterations}};
-
-    double total_expected_time = 0.0;
-    double total_actual_time = 0.0;
-    double max_absolute_error = 0.0;
-
-    if (g_config.verbose)
-    {
-        std::cout << "Function accuracy analysis:" << std::endl;
-    }
-
-    for (const auto &timing : expected_timings)
-    {
-        double actual_ns = parse_function_timing(results, timing.name);
-        if (actual_ns > 0)
-        {
-            double expected_ns = timing.expected_total_ns;
-            double absolute_error = std::abs(actual_ns - expected_ns);
-            double percent_error = (absolute_error / expected_ns) * 100.0;
-
-            total_expected_time += expected_ns;
-            total_actual_time += actual_ns;
-            max_absolute_error = (std::max)(max_absolute_error, absolute_error);
-
-            if (g_config.verbose)
-            {
-                std::cout << "  " << timing.name << ": expected " << expected_ns / 1e6 << " ms, got "
-                          << actual_ns / 1e6 << " ms (error: " << percent_error << "%)" << std::endl;
-            }
-        }
-        else if (g_config.verbose)
-        {
-            std::cout << "  " << timing.name << ": could not parse timing" << std::endl;
-        }
-    }
-
-    double overall_error_percent = 0.0;
-    double overall_error_ms = 0.0;
-
-    if (total_expected_time > 0)
-    {
-        double total_absolute_error = std::abs(total_actual_time - total_expected_time);
-        overall_error_percent = (total_absolute_error / total_expected_time) * 100.0;
-
-        // Calculate total number of events across all functions
-        double total_events = 0;
-        for (const auto &timing : expected_timings)
-        {
-            total_events += timing.call_count;
-        }
-
-        // Convert to milliseconds per event
-        overall_error_ms = (total_absolute_error / 1e6) / total_events; // Convert to milliseconds per event
-    }
-
-    if (g_config.verbose)
-    {
-        std::cout << "Overall accuracy error: " << overall_error_percent << "% (" << overall_error_ms << " ms per event)" << std::endl;
-    }
-
-    return {overall_error_percent, overall_error_ms};
-}
-
-// Measure overhead by comparing with and without CTRACK
-std::tuple<double, double, double> measure_overhead()
-{
-    std::cout << "\n=== Measuring Overhead ===" << std::endl;
-
-    const size_t overhead_events = 1'000'000; // 1M events for overhead test
-    size_t events_per_thread = overhead_events / g_config.thread_count;
-
-    // Measure without CTRACK
-    auto start_no_track = std::chrono::high_resolution_clock::now();
-    {
-        std::vector<std::thread> threads;
-        std::atomic<bool> start_flag{false};
-
-        for (size_t i = 0; i < g_config.thread_count; ++i)
-        {
-            threads.emplace_back(benchmark_worker_no_track, events_per_thread, std::ref(start_flag));
-        }
-
-        start_flag = true;
-
-        for (auto &t : threads)
-        {
-            t.join();
-        }
-    }
-    auto end_no_track = std::chrono::high_resolution_clock::now();
-    auto duration_no_track = std::chrono::duration_cast<std::chrono::microseconds>(end_no_track - start_no_track).count();
-
-    // Clear tracking data by getting and discarding results
-    ctrack::result_as_string();
-
-    // Measure with CTRACK
-    auto start_track = std::chrono::high_resolution_clock::now();
-    {
-        std::vector<std::thread> threads;
-        std::atomic<bool> start_flag{false};
-
-        for (size_t i = 0; i < g_config.thread_count; ++i)
-        {
-            threads.emplace_back(benchmark_worker, events_per_thread, std::ref(start_flag));
-        }
-
-        start_flag = true;
-
-        for (auto &t : threads)
-        {
-            t.join();
-        }
-    }
-    auto end_track = std::chrono::high_resolution_clock::now();
-    auto duration_track = std::chrono::duration_cast<std::chrono::microseconds>(end_track - start_track).count();
-
-    double overhead_percent = ((double)(duration_track - duration_no_track) / duration_no_track) * 100.0;
-    double overhead_ms = (duration_track - duration_no_track) / 1000.0;                               // Convert microseconds to milliseconds
-    double overhead_ns_per_event = ((duration_track - duration_no_track) * 1000.0) / overhead_events; // nanoseconds per event
-
-    if (g_config.verbose)
-    {
-        std::cout << "Without CTRACK: " << duration_no_track << " µs" << std::endl;
-        std::cout << "With CTRACK: " << duration_track << " µs" << std::endl;
-        std::cout << "Overhead: " << overhead_percent << "% (" << overhead_ms << " ms total, "
-                  << overhead_ns_per_event << " ns per event)" << std::endl;
-    }
-
-    return {overhead_percent, overhead_ms, overhead_ns_per_event};
-}
-
-// Measure memory usage and calculation time
-std::tuple<double, double, double> measure_memory_and_calculation_time()
-{
-    std::cout << "\n=== Measuring Memory Usage and Calculation Time ===" << std::endl;
-
-    // Clear any previous tracking data by getting and discarding results
-    ctrack::result_as_string();
-
-    // Measure initial memory
-    size_t initial_memory = get_memory_usage();
-
-    // Generate events
-    size_t events_per_thread = g_config.total_events / g_config.thread_count;
-
-    if (g_config.verbose)
-    {
-        std::cout << "Generating " << g_config.total_events << " events across "
-                  << g_config.thread_count << " threads..." << std::endl;
-    }
-
-    auto gen_start = std::chrono::high_resolution_clock::now();
-    {
-        std::vector<std::thread> threads;
-        std::atomic<bool> start_flag{false};
-
-        for (size_t i = 0; i < g_config.thread_count; ++i)
-        {
-            threads.emplace_back(benchmark_worker, events_per_thread, std::ref(start_flag));
-        }
-
-        start_flag = true;
-
-        for (auto &t : threads)
-        {
-            t.join();
-        }
-    }
-    auto gen_end = std::chrono::high_resolution_clock::now();
-
-    // Measure memory after event generation
-    size_t post_event_memory = get_memory_usage();
-    size_t memory_used = post_event_memory - initial_memory;
-    double bytes_per_event = (double)memory_used / g_config.total_events;
-
-    if (g_config.verbose)
-    {
-        auto gen_duration = std::chrono::duration_cast<std::chrono::milliseconds>(gen_end - gen_start).count();
-        std::cout << "Event generation took: " << gen_duration << " ms" << std::endl;
-        std::cout << "Memory used: " << memory_used / (1024.0 * 1024.0) << " MB" << std::endl;
-        std::cout << "Memory per event: " << bytes_per_event << " bytes" << std::endl;
-    }
-
-    // Measure calculation time and peak memory usage
-    std::atomic<bool> monitoring{true};
-    std::atomic<size_t> peak_memory{post_event_memory};
-
-    // Start memory monitoring thread
-    std::thread monitor_thread([&monitoring, &peak_memory, initial_memory]()
-                               {
-        while (monitoring.load()) {
-            size_t current_memory = get_memory_usage();
-            size_t current_peak = peak_memory.load();
-            while (current_memory > current_peak && 
-                   !peak_memory.compare_exchange_weak(current_peak, current_memory)) {}
-            std::this_thread::sleep_for(std::chrono::milliseconds(10)); // Poll every 10ms
-        } });
-
-    auto calc_start = std::chrono::high_resolution_clock::now();
-    auto results = ctrack::result_as_string();
-    auto calc_end = std::chrono::high_resolution_clock::now();
-
-    // Stop monitoring
-    monitoring = false;
-    monitor_thread.join();
-
-    auto calc_duration = std::chrono::duration_cast<std::chrono::microseconds>(calc_end - calc_start).count() / 1000.0;
-    double peak_calc_memory_mb = (peak_memory.load() - initial_memory) / (1024.0 * 1024.0);
-
-    if (g_config.verbose)
-    {
-        std::cout << "Result calculation took: " << calc_duration << " ms" << std::endl;
-        std::cout << "Peak memory during calculation: " << peak_calc_memory_mb << " MB" << std::endl;
-    }
-
-    return {bytes_per_event, calc_duration, peak_calc_memory_mb};
-}
-
-// Save baseline to file
-void save_baseline(const BaselineData &data)
-{
-    std::ofstream file(g_config.baseline_file);
-    if (!file)
-    {
-        std::cerr << "Error: Could not open baseline file for writing: " << g_config.baseline_file << std::endl;
-        return;
-    }
-
-    // Simple JSON format
-    file << "{\n";
-    file << "  \"accuracy_error_percent\": " << data.accuracy_error_percent << ",\n";
-    file << "  \"accuracy_error_ms_per_event\": " << data.accuracy_error_ms_per_event << ",\n";
-    file << "  \"overhead_percent\": " << data.overhead_percent << ",\n";
-    file << "  \"overhead_ms\": " << data.overhead_ms << ",\n";
-    file << "  \"overhead_ns_per_event\": " << data.overhead_ns_per_event << ",\n";
-    file << "  \"memory_bytes_per_event\": " << data.memory_bytes_per_event << ",\n";
-    file << "  \"calculation_time_ms\": " << data.calculation_time_ms << ",\n";
-    file << "  \"peak_calc_memory_mb\": " << data.peak_calc_memory_mb << ",\n";
-    file << "  \"total_events\": " << data.total_events << ",\n";
-    file << "  \"thread_count\": " << data.thread_count << ",\n";
-    file << "  \"timestamp\": \"" << data.timestamp << "\",\n";
-    file << "  \"platform\": \"" << data.platform << "\"\n";
-    file << "}\n";
-
-    std::cout << "\nBaseline saved to: " << g_config.baseline_file << std::endl;
-}
-
-// Load baseline from file
-bool load_baseline(BaselineData &data)
-{
-    std::ifstream file(g_config.baseline_file);
-    if (!file)
-    {
-        return false;
-    }
-
-    // Simple JSON parsing (production code would use a proper JSON library)
-    std::string line;
-    while (std::getline(file, line))
-    {
-        if (line.find("\"accuracy_error_percent\":") != std::string::npos)
-        {
-            size_t pos = line.find(": ") + 2;
-            size_t end = line.find(",", pos);
-            data.accuracy_error_percent = std::stod(line.substr(pos, end - pos));
-        }
-        else if (line.find("\"accuracy_error_ms_per_event\":") != std::string::npos)
-        {
-            size_t pos = line.find(": ") + 2;
-            size_t end = line.find(",", pos);
-            data.accuracy_error_ms_per_event = std::stod(line.substr(pos, end - pos));
-        }
-        else if (line.find("\"overhead_percent\":") != std::string::npos)
-        {
-            size_t pos = line.find(": ") + 2;
-            size_t end = line.find(",", pos);
-            data.overhead_percent = std::stod(line.substr(pos, end - pos));
-        }
-        else if (line.find("\"overhead_ms\":") != std::string::npos)
-        {
-            size_t pos = line.find(": ") + 2;
-            size_t end = line.find(",", pos);
-            data.overhead_ms = std::stod(line.substr(pos, end - pos));
-        }
-        else if (line.find("\"overhead_ns_per_event\":") != std::string::npos)
-        {
-            size_t pos = line.find(": ") + 2;
-            size_t end = line.find(",", pos);
-            data.overhead_ns_per_event = std::stod(line.substr(pos, end - pos));
-        }
-        else if (line.find("\"memory_bytes_per_event\":") != std::string::npos)
-        {
-            size_t pos = line.find(": ") + 2;
-            size_t end = line.find(",", pos);
-            data.memory_bytes_per_event = std::stod(line.substr(pos, end - pos));
-        }
-        else if (line.find("\"calculation_time_ms\":") != std::string::npos)
-        {
-            size_t pos = line.find(": ") + 2;
-            size_t end = line.find(",", pos);
-            data.calculation_time_ms = std::stod(line.substr(pos, end - pos));
-        }
-        else if (line.find("\"peak_calc_memory_mb\":") != std::string::npos)
-        {
-            size_t pos = line.find(": ") + 2;
-            size_t end = line.find(",", pos);
-            data.peak_calc_memory_mb = std::stod(line.substr(pos, end - pos));
-        }
-        else if (line.find("\"total_events\":") != std::string::npos)
-        {
-            size_t pos = line.find(": ") + 2;
-            size_t end = line.find(",", pos);
-            data.total_events = std::stoull(line.substr(pos, end - pos));
-        }
-        else if (line.find("\"thread_count\":") != std::string::npos)
-        {
-            size_t pos = line.find(": ") + 2;
-            size_t end = line.find(",", pos);
-            data.thread_count = std::stoull(line.substr(pos, end - pos));
-        }
-    }
-
-    return true;
-}
-
-// Compare current results with baseline
-void compare_with_baseline(const BaselineData &current)
-{
-    BaselineData baseline;
-    if (!load_baseline(baseline))
-    {
-        std::cerr << "Error: Could not load baseline file: " << g_config.baseline_file << std::endl;
-        return;
-    }
-
-    std::cout << "\n=== Baseline Comparison ===" << std::endl;
-    std::cout << std::fixed << std::setprecision(2);
-    auto print_comparison = [](const std::string &metric, double baseline_val, double current_val, bool lower_is_better = true)
-    {
-        double diff = current_val - baseline_val;
-        double percent_change = (diff / baseline_val) * 100.0;
-
-        std::string direction = (diff > 0) ? "increased" : "decreased";
-        std::string indicator = (lower_is_better ? (diff > 0 ? "worse" : "better") : (diff > 0 ? "better" : "worse"));
-
-        std::cout << metric << ":\n";
-        std::cout << "  Baseline: " << baseline_val << "\n";
-        std::cout << "  Current:  " << current_val << "\n";
-        std::cout << "  Change:   " << indicator << " - " << std::abs(percent_change) << "% " << direction << "\n\n";
-    };
-
-    print_comparison("Accuracy Error %", baseline.accuracy_error_percent, current.accuracy_error_percent);
-    print_comparison("Accuracy Error (ms/event)", baseline.accuracy_error_ms_per_event, current.accuracy_error_ms_per_event);
-    print_comparison("Overhead %", std::abs(baseline.overhead_percent), std::abs(current.overhead_percent));
-    print_comparison("Overhead Time (ms)", std::abs(baseline.overhead_ms), std::abs(current.overhead_ms));
-    print_comparison("Overhead per Event (ns)", baseline.overhead_ns_per_event, current.overhead_ns_per_event);
-    print_comparison("Memory/Event (bytes)", baseline.memory_bytes_per_event, current.memory_bytes_per_event);
-    print_comparison("Calculation Time (ms)", baseline.calculation_time_ms, current.calculation_time_ms);
-    print_comparison("Peak Calc Memory (MB)", baseline.peak_calc_memory_mb, current.peak_calc_memory_mb);
-}
-
-// Get platform string
-std::string get_platform()
-{
-#ifdef _WIN32
-    return "Windows";
-#elif __APPLE__
-    return "macOS";
-#elif __linux__
-    return "Linux";
-#else
-    return "Unknown";
-#endif
-}
-
-// Get current timestamp
-std::string get_timestamp()
-{
-    auto now = std::chrono::system_clock::now();
-    auto time_t = std::chrono::system_clock::to_time_t(now);
-    std::stringstream ss;
-#ifdef _WIN32
-    struct tm time_info;
-    localtime_s(&time_info, &time_t);
-    ss << std::put_time(&time_info, "%Y-%m-%d %H:%M:%S");
-#else
-    ss << std::put_time(std::localtime(&time_t), "%Y-%m-%d %H:%M:%S");
-#endif
-    return ss.str();
-}
-
-// Print usage
-void print_usage(const char *program_name)
-{
-    std::cout << "Usage: " << program_name << " [options]\n";
-    std::cout << "Options:\n";
-    std::cout << "  --events <count>      Number of events to generate (default: 50000000)\n";
-    std::cout << "  --threads <count>     Number of threads to use (default: hardware concurrency)\n";
-    std::cout << "  --baseline <file>     Baseline file path (default: ctrack_baseline.json)\n";
-    std::cout << "  --record-baseline     Record current results as baseline\n";
-    std::cout << "  --compare-baseline    Compare results with baseline\n";
-    std::cout << "  --verbose             Enable verbose output\n";
-    std::cout << "  --help                Show this help message\n";
-}
-
-// Parse command line arguments
-bool parse_args(int argc, char *argv[])
-{
-    for (int i = 1; i < argc; ++i)
-    {
-        std::string arg = argv[i];
-
-        if (arg == "--help")
-        {
-            print_usage(argv[0]);
-            return false;
-        }
-        else if (arg == "--events" && i + 1 < argc)
-        {
-            g_config.total_events = std::stoull(argv[++i]);
-        }
-        else if (arg == "--threads" && i + 1 < argc)
-        {
-            g_config.thread_count = std::stoull(argv[++i]);
-        }
-        else if (arg == "--baseline" && i + 1 < argc)
-        {
-            g_config.baseline_file = argv[++i];
-        }
-        else if (arg == "--record-baseline")
-        {
-            g_config.record_baseline = true;
-        }
-        else if (arg == "--compare-baseline")
-        {
-            g_config.compare_baseline = true;
-        }
-        else if (arg == "--verbose")
-        {
-            g_config.verbose = true;
-        }
-        else
-        {
-            std::cerr << "Unknown option: " << arg << std::endl;
-            print_usage(argv[0]);
-            return false;
-        }
-    }
-
-    return true;
-}
-
-int main(int argc, char *argv[])
-{
-    if (!parse_args(argc, argv))
-    {
-        return 1;
-    }
-
-    std::cout << "CTRACK Comprehensive Benchmark\n";
-    std::cout << "==============================\n";
-    std::cout << "Total events: " << g_config.total_events << "\n";
-    std::cout << "Thread count: " << g_config.thread_count << "\n";
-    std::cout << "Events per thread: " << g_config.total_events / g_config.thread_count << "\n";
-
-    // Run benchmarks
-    auto [accuracy_error_percent, accuracy_error_ms_per_event] = measure_accuracy();
-    auto [overhead_percent, overhead_ms, overhead_ns_per_event] = measure_overhead();
-    auto [bytes_per_event, calc_time, peak_calc_memory] = measure_memory_and_calculation_time();
-
-    // Prepare results
-    BaselineData current_data;
-    current_data.accuracy_error_percent = accuracy_error_percent;
-    current_data.accuracy_error_ms_per_event = accuracy_error_ms_per_event;
-    current_data.overhead_percent = overhead_percent;
-    current_data.overhead_ms = overhead_ms;
-    current_data.overhead_ns_per_event = overhead_ns_per_event;
-    current_data.memory_bytes_per_event = bytes_per_event;
-    current_data.calculation_time_ms = calc_time;
-    current_data.peak_calc_memory_mb = peak_calc_memory;
-    current_data.total_events = g_config.total_events;
-    current_data.thread_count = g_config.thread_count;
-    current_data.timestamp = get_timestamp();
-    current_data.platform = get_platform();
-
-    // Print summary
-    std::cout << "\n=== Benchmark Results ===" << std::endl;
-    std::cout << std::fixed << std::setprecision(2);
-    std::cout << "Accuracy error: " << accuracy_error_percent << "% (" << accuracy_error_ms_per_event << " ms per event)" << std::endl;
-    std::cout << "Overhead: " << overhead_percent << "% (" << overhead_ms << " ms total, "
-              << overhead_ns_per_event << " ns per event)" << std::endl;
-    std::cout << "Memory per event: " << bytes_per_event << " bytes" << std::endl;
-    std::cout << "Calculation time: " << calc_time << " ms" << std::endl;
-    std::cout << "Peak calculation memory: " << peak_calc_memory << " MB" << std::endl;
-
-    // Handle baseline operations
-    if (g_config.record_baseline)
-    {
-        save_baseline(current_data);
-    }
-
-    if (g_config.compare_baseline)
-    {
-        compare_with_baseline(current_data);
-    }
-
-    return 0;
-}
\ No newline at end of file
+#include <ctrack.hpp>
+#include <chrono>
+#include <thread>
+#include <vector>
+#include <atomic>
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <iomanip>
+#include <cmath>
+#include <algorithm>
+#include <cstring>
+#include <cstdio>
+
+#ifdef _WIN32
+#include <windows.h>
+#include <psapi.h>
+#else
+#include <sys/resource.h>
+#include <unistd.h>
+#endif
+
+// Prevent the compiler from inlining or collapsing calls across call-sites.
+// With -O3 the _no_track helpers would otherwise be fully inlined into the
+// worker loop, letting the optimiser merge/eliminate busy-wait iterations and
+// producing artificially low (even negative) overhead measurements.
+#if defined(_MSC_VER)
+#define BENCHMARK_NOINLINE __declspec(noinline)
+#else
+#define BENCHMARK_NOINLINE __attribute__((noinline))
+#endif
+
+// ---------------------------------------------------------------------------
+// Orthogonal wall-clock: does NOT share the vDSO/TSC path used by either
+// std::chrono or ctrack's internal clocks, so it can measure overhead without
+// self-measurement bias regardless of which ctrack clock variant is compiled.
+//
+// On Windows we fall back to QueryPerformanceCounter which goes through the
+// HAL and is independent of both RDTSC and the C++ runtime clock.
+// ---------------------------------------------------------------------------
+inline int64_t raw_clock_ns()
+{
+#ifdef _WIN32
+  LARGE_INTEGER freq, cnt;
+  QueryPerformanceFrequency(&freq);
+  QueryPerformanceCounter(&cnt);
+  return static_cast<int64_t>(cnt.QuadPart * 1'000'000'000LL / freq.QuadPart);
+#else
+  struct timespec ts;
+  clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
+  return static_cast<int64_t>(ts.tv_sec) * 1'000'000'000LL + ts.tv_nsec;
+#endif
+}
+
+
+// Configuration
+struct BenchmarkConfig
+{
+  size_t total_events = 50'000'000; // Default 50 million events
+  size_t thread_count = std::thread::hardware_concurrency();
+  bool record_baseline = false;
+  bool compare_baseline = false;
+  std::string baseline_file = "ctrack_baseline.json";
+  bool verbose = false;
+};
+
+// Baseline data structure
+struct BaselineData
+{
+  double accuracy_error_percent;
+  double accuracy_error_us_per_event;
+  double overhead_percent;
+  double overhead_ms;
+  double overhead_ns_per_event;
+  double memory_bytes_per_event;
+  double calculation_time_ms;
+  double peak_calc_memory_mb;
+  size_t total_events;
+  size_t thread_count;
+  std::string timestamp;
+  std::string platform;
+};
+
+// Global config
+BenchmarkConfig g_config;
+
+// Get current memory usage in bytes
+size_t get_memory_usage()
+{
+#ifdef _WIN32
+  PROCESS_MEMORY_COUNTERS_EX pmc;
+  GetProcessMemoryInfo(GetCurrentProcess(), (PROCESS_MEMORY_COUNTERS *)&pmc, sizeof(pmc));
+  return pmc.WorkingSetSize;
+#else
+  struct rusage usage;
+  getrusage(RUSAGE_SELF, &usage);
+  return usage.ru_maxrss * 1024; // Convert KB to bytes on Linux
+#endif
+}
+
+// Precise busy wait function - waits for specified nanoseconds
+BENCHMARK_NOINLINE void busy_wait_ns(int64_t nanoseconds)
+{
+  auto start = std::chrono::high_resolution_clock::now();
+  auto target_duration = std::chrono::nanoseconds(nanoseconds);
+
+  while (true)
+  {
+    auto now = std::chrono::high_resolution_clock::now();
+    auto elapsed = now - start;
+    if (elapsed >= target_duration)
+    {
+      break;
+    }
+  }
+}
+
+// Benchmark functions with predictable timing
+void leaf_function(int depth)
+{
+  CTRACK_NAME("leaf_function");
+  // Busy wait for 1 microsecond (1000 ns)
+  busy_wait_ns(1000);
+}
+
+void level_3_function(int depth)
+{
+  CTRACK_NAME("level_3_function");
+  // Busy wait for 500 ns
+  busy_wait_ns(500);
+
+  // Call leaf function twice
+  leaf_function(depth + 1);
+  leaf_function(depth + 1);
+}
+
+void level_2_function(int depth, int iterations)
+{
+  CTRACK_NAME("level_2_function");
+  // Busy wait for 300 ns
+  busy_wait_ns(300);
+
+  for (int i = 0; i < iterations; ++i)
+  {
+    level_3_function(depth + 1);
+  }
+}
+
+void level_1_function(int iterations)
+{
+  CTRACK_NAME("level_1_function");
+  // Busy wait for 200 ns
+  busy_wait_ns(200);
+
+  level_2_function(1, iterations);
+}
+
+// Version without CTRACK for overhead measurement
+BENCHMARK_NOINLINE void leaf_function_no_track(int depth)
+{
+  busy_wait_ns(1000);
+}
+
+BENCHMARK_NOINLINE void level_3_function_no_track(int depth)
+{
+  busy_wait_ns(500);
+  leaf_function_no_track(depth + 1);
+  leaf_function_no_track(depth + 1);
+}
+
+BENCHMARK_NOINLINE void level_2_function_no_track(int depth, int iterations)
+{
+  busy_wait_ns(300);
+  for (int i = 0; i < iterations; ++i)
+  {
+    level_3_function_no_track(depth + 1);
+  }
+}
+
+BENCHMARK_NOINLINE void level_1_function_no_track(int iterations)
+{
+  busy_wait_ns(200);
+  level_2_function_no_track(1, iterations);
+}
+
+// Worker thread function
+void benchmark_worker(size_t events_per_thread, std::atomic<bool> &start_flag)
+{
+  // Wait for start signal
+  while (!start_flag.load())
+  {
+    std::this_thread::yield();
+  }
+
+  // Calculate iterations to reach target event count
+  // Each level_1 call generates: 1 + 1 + iterations * (1 + 2) events
+  // For iterations=10: 1 + 1 + 10 * 3 = 32 events per call
+  const int iterations = 10;
+  const int events_per_call = 2 + iterations * 3;
+  size_t calls_needed = events_per_thread / events_per_call;
+
+  for (size_t i = 0; i < calls_needed; ++i)
+  {
+    level_1_function(iterations);
+  }
+}
+
+// Worker thread function without tracking
+void benchmark_worker_no_track(size_t events_per_thread, std::atomic<bool> &start_flag)
+{
+  while (!start_flag.load())
+  {
+    std::this_thread::yield();
+  }
+
+  const int iterations = 10;
+  const int events_per_call = 2 + iterations * 3;
+  size_t calls_needed = events_per_thread / events_per_call;
+
+  for (size_t i = 0; i < calls_needed; ++i)
+  {
+    level_1_function_no_track(iterations);
+  }
+}
+
+// Parse timing from CTRACK results string for a specific function
+double parse_function_timing(const std::string &results, const std::string &function_name)
+{
+  // Look for the Details section first
+  size_t details_pos = results.find("Details");
+  if (details_pos == std::string::npos)
+  {
+    return -1.0; // Details section not found
+  }
+
+  // Look for the function name after the Details section
+  size_t func_pos = results.find(function_name, details_pos);
+  if (func_pos == std::string::npos)
+  {
+    return -1.0; // Function not found in Details section
+  }
+
+  // Find the line containing this function in the Details section
+  size_t line_start = results.rfind('\n', func_pos);
+  if (line_start == std::string::npos)
+    line_start = details_pos;
+  else
+    line_start++; // Skip the newline
+
+  size_t line_end = results.find('\n', func_pos);
+  if (line_end == std::string::npos)
+    line_end = results.length();
+
+  std::string line = results.substr(line_start, line_end - line_start);
+
+  // Look for the "time acc" column value (4th column after filename, function, line)
+  // Split by | and find the 4th field
+  std::vector<std::string> fields;
+  std::istringstream iss(line);
+  std::string field;
+
+  while (std::getline(iss, field, '|'))
+  {
+    // Trim whitespace
+    field.erase(0, field.find_first_not_of(" \t"));
+    field.erase(field.find_last_not_of(" \t") + 1);
+    if (!field.empty())
+    {
+      fields.push_back(field);
+    }
+  }
+
+  // The time acc should be in the 4th field (0-indexed: filename=0, function=1, line=2, time_acc=3)
+  if (fields.size() > 3)
+  {
+    std::string time_acc = fields[3];
+
+    // Parse value and unit from time_acc (e.g., "2.09 ms")
+    std::istringstream time_iss(time_acc);
+    double value;
+    std::string unit;
+
+    if (time_iss >> value >> unit)
+    {
+      // Convert to nanoseconds based on unit
+      if (unit == "s")
+        return value * 1e9;
+      else if (unit == "ms")
+        return value * 1e6;
+      else if (unit == "us")
+        return value * 1e3;
+      else if (unit == "ns")
+        return value;
+    }
+  }
+
+  return -1.0; // Could not parse
+}
+
+// Measure accuracy by comparing known timings with CTRACK measurements
+std::pair<double, double> measure_accuracy()
+{
+  std::cout << "\n=== Measuring Accuracy ===" << std::endl;
+
+  // Clear any previous tracking data by getting and discarding results
+  ctrack::result_as_string();
+
+  // Run a controlled test with known timings
+  const int test_iterations = 100;
+  for (int i = 0; i < test_iterations; ++i)
+  {
+    level_1_function(10);
+  }
+
+  // Get results
+  auto results = ctrack::result_as_string();
+
+  // Expected timings per iteration (in nanoseconds):
+  // leaf_function: 1000ns (called 20 times per iteration) = 20,000ns total per iteration
+  // level_3_function: 500ns + 2*1000ns = 2500ns (called 10 times per iteration) = 25,000ns total per iteration
+  // level_2_function: 300ns + 10*2500ns = 25,300ns (called 1 time per iteration) = 25,300ns total per iteration
+  // level_1_function: 200ns + 25,300ns = 25,500ns (called 1 time per iteration) = 25,500ns total per iteration
+
+  struct ExpectedTiming
+  {
+    std::string name;
+    double expected_total_ns;
+    int call_count;
+  };
+
+  std::vector<ExpectedTiming> expected_timings = {
+    {"leaf_function", 1000.0 * 20 * test_iterations, 20 * test_iterations},
+    {"level_3_function", 2500.0 * 10 * test_iterations, 10 * test_iterations},
+    {"level_2_function", 25300.0 * 1 * test_iterations, 1 * test_iterations},
+    {"level_1_function", 25500.0 * 1 * test_iterations, 1 * test_iterations}};
+
+  double total_expected_time = 0.0;
+  double total_actual_time = 0.0;
+  double max_absolute_error = 0.0;
+
+  if (g_config.verbose)
+  {
+    std::cout << "Function accuracy analysis:" << std::endl;
+  }
+
+  for (const auto &timing : expected_timings)
+  {
+    double actual_ns = parse_function_timing(results, timing.name);
+    if (actual_ns > 0)
+    {
+      double expected_ns = timing.expected_total_ns;
+      double absolute_error = std::abs(actual_ns - expected_ns);
+      double percent_error = (absolute_error / expected_ns) * 100.0;
+
+      total_expected_time += expected_ns;
+      total_actual_time += actual_ns;
+      max_absolute_error = (std::max)(max_absolute_error, absolute_error);
+
+      if (g_config.verbose)
+      {
+        std::cout << "  " << timing.name << ": expected " << expected_ns / 1e6 << " ms, got "
+          << actual_ns / 1e6 << " ms (error: " << percent_error << "%)" << std::endl;
+      }
+    }
+    else if (g_config.verbose)
+    {
+      std::cout << "  " << timing.name << ": could not parse timing" << std::endl;
+    }
+  }
+
+  double overall_error_percent = 0.0;
+  double overall_error_ms = 0.0;
+
+  if (total_expected_time > 0)
+  {
+    double total_absolute_error = std::abs(total_actual_time - total_expected_time);
+    overall_error_percent = (total_absolute_error / total_expected_time) * 100.0;
+
+    // Calculate total number of events across all functions
+    double total_events = 0;
+    for (const auto &timing : expected_timings)
+    {
+      total_events += timing.call_count;
+    }
+
+    // Convert to milliseconds per event
+    overall_error_ms = (total_absolute_error / 1e3) / total_events; // Convert to us per event
+  }
+
+  if (g_config.verbose)
+  {
+    std::cout << "Overall accuracy error: " << overall_error_percent << "% (" << overall_error_ms << " ms per event)" << std::endl;
+  }
+
+  return {overall_error_percent, overall_error_ms};
+}
+
+// ---------------------------------------------------------------------------
+// measure_overhead: uses raw_clock_ns() (CLOCK_MONOTONIC_RAW / QPC) so the
+// outer timer is orthogonal to whatever clock ctrack uses internally.
+// This eliminates the vDSO-cache self-measurement bias that made the chrono
+// build appear to have artificially low overhead.
+// ---------------------------------------------------------------------------
+std::tuple<double, double, double> measure_overhead()
+{
+  std::cout << "\n=== Measuring Overhead ===" << std::endl;
+
+  const size_t overhead_events    = 1'000'000;
+  size_t       events_per_thread  = overhead_events / g_config.thread_count;
+
+  // Helper: spawn threads, wait for join, return nothing (timing done outside)
+  auto run_variant = [&](bool with_track)
+    {
+      std::vector<std::thread> threads;
+      std::atomic<bool> start_flag{false};
+      for (size_t i = 0; i < g_config.thread_count; ++i)
+      {
+        if (with_track)
+          threads.emplace_back(benchmark_worker,          events_per_thread, std::ref(start_flag));
+        else
+          threads.emplace_back(benchmark_worker_no_track, events_per_thread, std::ref(start_flag));
+      }
+      start_flag = true;
+      for (auto &t : threads) t.join();
+      // NOTE: result_as_string() is intentionally NOT called here.
+      //  It must stay outside the timed window.
+    };
+
+  // Warmup
+  run_variant(false);
+  ctrack::result_as_string(); // clear accumulated state
+  run_variant(true);
+  ctrack::result_as_string(); // clear accumulated state
+
+  // Multi-trial with alternating order
+  const int NUM_TRIALS = 5;
+  std::vector<double> no_track_times, track_times;
+
+  for (int trial = 0; trial < NUM_TRIALS; ++trial)
+  {
+    bool no_track_first = (trial % 2 == 0);
+
+    // measure(with_track): clear ctrack state BEFORE t0, time pure work,
+    //                      discard results AFTER t1.
+    auto measure = [&](bool with_track) -> double
+      {
+        // Pre-clear: outside timed window
+        ctrack::result_as_string();
+
+        int64_t t0 = raw_clock_ns();   // ← CLOCK_MONOTONIC_RAW / QPC
+        run_variant(with_track);
+        int64_t t1 = raw_clock_ns();   // ← CLOCK_MONOTONIC_RAW / QPC
+
+        // Post-clear: outside timed window
+        if (with_track) ctrack::result_as_string();
+
+        return static_cast<double>(t1 - t0) / 1'000.0; // ns → µs
+      };
+
+    if (no_track_first)
+    {
+      no_track_times.push_back(measure(false));
+      track_times   .push_back(measure(true));
+    }
+    else
+  {
+      track_times   .push_back(measure(true));
+      no_track_times.push_back(measure(false));
+    }
+  }
+
+  // Median to reject scheduler outliers
+  auto median = [](std::vector<double> v) -> double
+    {
+      std::sort(v.begin(), v.end());
+      return v[v.size() / 2];
+    };
+
+  double dur_no_track = median(no_track_times);
+  double dur_track    = median(track_times);
+  double raw_diff     = dur_track - dur_no_track;      // µs
+  double clamped_diff = std::max(0.0, raw_diff);
+
+  double overhead_percent      = (clamped_diff / dur_no_track) * 100.0;
+  double overhead_ms           = clamped_diff / 1'000.0;
+  double overhead_ns_per_event = (clamped_diff * 1'000.0) / overhead_events;
+
+  if (g_config.verbose)
+  {
+    std::cout << "Without ctrack (median): " << dur_no_track << " µs\n";
+    std::cout << "With ctrack    (median): " << dur_track    << " µs\n";
+    if (raw_diff < 0)
+      std::cout << "Raw diff: " << raw_diff << " µs (negative — clamped to 0, measurement noise)\n";
+    std::cout << "Overhead: " << overhead_percent << "% ("
+      << overhead_ms << " ms, " << overhead_ns_per_event << " ns/event)\n";
+  }
+
+  return {overhead_percent, overhead_ms, overhead_ns_per_event};
+}
+
+std::tuple<double, double, double> measure_memory_and_calculation_time()
+{
+  std::cout << "\n=== Measuring Memory Usage and Calculation Time ===" << std::endl;
+  ctrack::result_as_string();
+  size_t initial_memory = get_memory_usage();
+  size_t events_per_thread = g_config.total_events / g_config.thread_count;
+
+  if (g_config.verbose)
+  {
+    std::cout << "Generating " << g_config.total_events << " events across "
+      << g_config.thread_count << " threads..." << std::endl;
+  }
+
+  auto gen_start = std::chrono::high_resolution_clock::now();
+  {
+    std::vector<std::thread> threads;
+    std::atomic<bool> start_flag{false};
+
+    for (size_t i = 0; i < g_config.thread_count; ++i)
+    {
+      threads.emplace_back(benchmark_worker, events_per_thread, std::ref(start_flag));
+    }
+
+    start_flag = true;
+
+    for (auto &t : threads)
+    {
+      t.join();
+    }
+  }
+  auto gen_end = std::chrono::high_resolution_clock::now();
+
+  // Measure memory after event generation
+  size_t post_event_memory = get_memory_usage();
+  size_t memory_used = post_event_memory - initial_memory;
+  double bytes_per_event = (double)memory_used / g_config.total_events;
+
+  if (g_config.verbose)
+  {
+    auto gen_duration = std::chrono::duration_cast<std::chrono::milliseconds>(gen_end - gen_start).count();
+    std::cout << "Event generation took: " << gen_duration << " ms" << std::endl;
+    std::cout << "Memory used: " << memory_used / (1024.0 * 1024.0) << " MB" << std::endl;
+    std::cout << "Memory per event: " << bytes_per_event << " bytes" << std::endl;
+  }
+
+  // Measure calculation time and peak memory usage
+  std::atomic<bool> monitoring{true};
+  std::atomic<size_t> peak_memory{post_event_memory};
+
+  // Start memory monitoring thread
+  std::thread monitor_thread([&monitoring, &peak_memory, initial_memory]()
+                             {
+                             while (monitoring.load()) {
+                             size_t current_memory = get_memory_usage();
+                             size_t current_peak = peak_memory.load();
+                             while (current_memory > current_peak && 
+                             !peak_memory.compare_exchange_weak(current_peak, current_memory)) {}
+                             std::this_thread::sleep_for(std::chrono::milliseconds(10)); // Poll every 10ms
+                             } });
+
+  auto calc_start = std::chrono::high_resolution_clock::now();
+  auto results = ctrack::result_as_string();
+  auto calc_end = std::chrono::high_resolution_clock::now();
+
+  // Stop monitoring
+  monitoring = false;
+  monitor_thread.join();
+
+  auto calc_duration = std::chrono::duration_cast<std::chrono::microseconds>(calc_end - calc_start).count() / 1000.0;
+  double peak_calc_memory_mb = (peak_memory.load() - initial_memory) / (1024.0 * 1024.0);
+
+  if (g_config.verbose)
+  {
+    std::cout << "Result calculation took: " << calc_duration << " ms" << std::endl;
+    std::cout << "Peak memory during calculation: " << peak_calc_memory_mb << " MB" << std::endl;
+  }
+
+  return {bytes_per_event, calc_duration, peak_calc_memory_mb};
+}
+
+// Save baseline to file
+void save_baseline(const BaselineData &data)
+{
+  std::ofstream file(g_config.baseline_file);
+  if (!file)
+  {
+    std::cerr << "Error: Could not open baseline file for writing: " << g_config.baseline_file << std::endl;
+    return;
+  }
+
+  // Simple JSON format
+  file << "{\n";
+  file << "  \"accuracy_error_percent\": " << data.accuracy_error_percent << ",\n";
+  file << "  \"accuracy_error_ms_per_event\": " << data.accuracy_error_us_per_event << ",\n";
+  file << "  \"overhead_percent\": " << data.overhead_percent << ",\n";
+  file << "  \"overhead_ms\": " << data.overhead_ms << ",\n";
+  file << "  \"overhead_ns_per_event\": " << data.overhead_ns_per_event << ",\n";
+  file << "  \"memory_bytes_per_event\": " << data.memory_bytes_per_event << ",\n";
+  file << "  \"calculation_time_ms\": " << data.calculation_time_ms << ",\n";
+  file << "  \"peak_calc_memory_mb\": " << data.peak_calc_memory_mb << ",\n";
+  file << "  \"total_events\": " << data.total_events << ",\n";
+  file << "  \"thread_count\": " << data.thread_count << ",\n";
+  file << "  \"timestamp\": \"" << data.timestamp << "\",\n";
+  file << "  \"platform\": \"" << data.platform << "\"\n";
+  file << "}\n";
+
+  std::cout << "\nBaseline saved to: " << g_config.baseline_file << std::endl;
+}
+
+// Load baseline from file
+bool load_baseline(BaselineData &data)
+{
+  std::ifstream file(g_config.baseline_file);
+  if (!file)
+  {
+    return false;
+  }
+
+  // Simple JSON parsing (production code would use a proper JSON library)
+  std::string line;
+  while (std::getline(file, line))
+  {
+    if (line.find("\"accuracy_error_percent\":") != std::string::npos)
+    {
+      size_t pos = line.find(": ") + 2;
+      size_t end = line.find(",", pos);
+      data.accuracy_error_percent = std::stod(line.substr(pos, end - pos));
+    }
+    else if (line.find("\"accuracy_error_ms_per_event\":") != std::string::npos)
+    {
+      size_t pos = line.find(": ") + 2;
+      size_t end = line.find(",", pos);
+      data.accuracy_error_us_per_event = std::stod(line.substr(pos, end - pos));
+    }
+    else if (line.find("\"overhead_percent\":") != std::string::npos)
+    {
+      size_t pos = line.find(": ") + 2;
+      size_t end = line.find(",", pos);
+      data.overhead_percent = std::stod(line.substr(pos, end - pos));
+    }
+    else if (line.find("\"overhead_ms\":") != std::string::npos)
+    {
+      size_t pos = line.find(": ") + 2;
+      size_t end = line.find(",", pos);
+      data.overhead_ms = std::stod(line.substr(pos, end - pos));
+    }
+    else if (line.find("\"overhead_ns_per_event\":") != std::string::npos)
+    {
+      size_t pos = line.find(": ") + 2;
+      size_t end = line.find(",", pos);
+      data.overhead_ns_per_event = std::stod(line.substr(pos, end - pos));
+    }
+    else if (line.find("\"memory_bytes_per_event\":") != std::string::npos)
+    {
+      size_t pos = line.find(": ") + 2;
+      size_t end = line.find(",", pos);
+      data.memory_bytes_per_event = std::stod(line.substr(pos, end - pos));
+    }
+    else if (line.find("\"calculation_time_ms\":") != std::string::npos)
+    {
+      size_t pos = line.find(": ") + 2;
+      size_t end = line.find(",", pos);
+      data.calculation_time_ms = std::stod(line.substr(pos, end - pos));
+    }
+    else if (line.find("\"peak_calc_memory_mb\":") != std::string::npos)
+    {
+      size_t pos = line.find(": ") + 2;
+      size_t end = line.find(",", pos);
+      data.peak_calc_memory_mb = std::stod(line.substr(pos, end - pos));
+    }
+    else if (line.find("\"total_events\":") != std::string::npos)
+    {
+      size_t pos = line.find(": ") + 2;
+      size_t end = line.find(",", pos);
+      data.total_events = std::stoull(line.substr(pos, end - pos));
+    }
+    else if (line.find("\"thread_count\":") != std::string::npos)
+    {
+      size_t pos = line.find(": ") + 2;
+      size_t end = line.find(",", pos);
+      data.thread_count = std::stoull(line.substr(pos, end - pos));
+    }
+  }
+
+  return true;
+}
+
+// Compare current results with baseline
+void compare_with_baseline(const BaselineData &current)
+{
+  BaselineData baseline;
+  if (!load_baseline(baseline))
+  {
+    std::cerr << "Error: Could not load baseline file: " << g_config.baseline_file << std::endl;
+    return;
+  }
+
+  std::cout << "\n=== Baseline Comparison ===" << std::endl;
+  std::cout << std::fixed << std::setprecision(2);
+  auto print_comparison = [](const std::string &metric, double baseline_val, double current_val, bool lower_is_better = true)
+    {
+      double diff = current_val - baseline_val;
+      double percent_change = (diff / baseline_val) * 100.0;
+
+      std::string direction = (diff > 0) ? "increased" : "decreased";
+      std::string indicator = (lower_is_better ? (diff > 0 ? "worse" : "better") : (diff > 0 ? "better" : "worse"));
+
+      std::cout << metric << ":\n";
+      std::cout << "  Baseline: " << baseline_val << "\n";
+      std::cout << "  Current:  " << current_val << "\n";
+      std::cout << "  Change:   " << indicator << " - " << std::abs(percent_change) << "% " << direction << "\n\n";
+    };
+
+  print_comparison("Accuracy Error %", baseline.accuracy_error_percent, current.accuracy_error_percent);
+  print_comparison("Accuracy Error (ms/event)", baseline.accuracy_error_us_per_event, current.accuracy_error_us_per_event);
+  print_comparison("Overhead %", std::abs(baseline.overhead_percent), std::abs(current.overhead_percent));
+  print_comparison("Overhead Time (ms)", std::abs(baseline.overhead_ms), std::abs(current.overhead_ms));
+  print_comparison("Overhead per Event (ns)", baseline.overhead_ns_per_event, current.overhead_ns_per_event);
+  print_comparison("Memory/Event (bytes)", baseline.memory_bytes_per_event, current.memory_bytes_per_event);
+  print_comparison("Calculation Time (ms)", baseline.calculation_time_ms, current.calculation_time_ms);
+  print_comparison("Peak Calc Memory (MB)", baseline.peak_calc_memory_mb, current.peak_calc_memory_mb);
+}
+
+// Get platform string
+std::string get_platform()
+{
+#ifdef _WIN32
+  return "Windows";
+#elif __APPLE__
+  return "macOS";
+#elif __linux__
+  return "Linux";
+#else
+  return "Unknown";
+#endif
+}
+
+// Get current timestamp
+std::string get_timestamp()
+{
+  auto now = std::chrono::system_clock::now();
+  auto time_t = std::chrono::system_clock::to_time_t(now);
+  std::stringstream ss;
+#ifdef _WIN32
+  struct tm time_info;
+  localtime_s(&time_info, &time_t);
+  ss << std::put_time(&time_info, "%Y-%m-%d %H:%M:%S");
+#else
+  ss << std::put_time(std::localtime(&time_t), "%Y-%m-%d %H:%M:%S");
+#endif
+  return ss.str();
+}
+
+// Print usage
+void print_usage(const char *program_name)
+{
+  std::cout << "Usage: " << program_name << " [options]\n";
+  std::cout << "Options:\n";
+  std::cout << "  --events <count>      Number of events to generate (default: 50000000)\n";
+  std::cout << "  --threads <count>     Number of threads to use (default: hardware concurrency)\n";
+  std::cout << "  --baseline <file>     Baseline file path (default: ctrack_baseline.json)\n";
+  std::cout << "  --record-baseline     Record current results as baseline\n";
+  std::cout << "  --compare-baseline    Compare results with baseline\n";
+  std::cout << "  --verbose             Enable verbose output\n";
+  std::cout << "  --help                Show this help message\n";
+}
+
+// Parse command line arguments
+bool parse_args(int argc, char *argv[])
+{
+  for (int i = 1; i < argc; ++i)
+  {
+    std::string arg = argv[i];
+
+    if (arg == "--help")
+    {
+      print_usage(argv[0]);
+      return false;
+    }
+    else if (arg == "--events" && i + 1 < argc)
+    {
+      g_config.total_events = std::stoull(argv[++i]);
+    }
+    else if (arg == "--threads" && i + 1 < argc)
+    {
+      g_config.thread_count = std::stoull(argv[++i]);
+    }
+    else if (arg == "--baseline" && i + 1 < argc)
+    {
+      g_config.baseline_file = argv[++i];
+    }
+    else if (arg == "--record-baseline")
+    {
+      g_config.record_baseline = true;
+    }
+    else if (arg == "--compare-baseline")
+    {
+      g_config.compare_baseline = true;
+    }
+    else if (arg == "--verbose")
+    {
+      g_config.verbose = true;
+    }
+    else
+  {
+      std::cerr << "Unknown option: " << arg << std::endl;
+      print_usage(argv[0]);
+      return false;
+    }
+  }
+
+  return true;
+}
+
+int main(int argc, char *argv[])
+{
+  if (!parse_args(argc, argv))
+  {
+    return 1;
+  }
+
+  std::cout << "CTRACK Comprehensive Benchmark\n";
+  std::cout << "==============================\n";
+  std::cout << "Total events: " << g_config.total_events << "\n";
+  std::cout << "Thread count: " << g_config.thread_count << "\n";
+  std::cout << "Events per thread: " << g_config.total_events / g_config.thread_count << "\n";
+
+  // Run benchmarks
+  auto [accuracy_error_percent, accuracy_error_us_per_event] = measure_accuracy();
+  auto [overhead_percent, overhead_ms, overhead_ns_per_event] = measure_overhead();
+  auto [bytes_per_event, calc_time, peak_calc_memory] = measure_memory_and_calculation_time();
+
+  // Prepare results
+  BaselineData current_data;
+  current_data.accuracy_error_percent = accuracy_error_percent;
+  current_data.accuracy_error_us_per_event = accuracy_error_us_per_event;
+  current_data.overhead_percent = overhead_percent;
+  current_data.overhead_ms = overhead_ms;
+  current_data.overhead_ns_per_event = overhead_ns_per_event;
+  current_data.memory_bytes_per_event = bytes_per_event;
+  current_data.calculation_time_ms = calc_time;
+  current_data.peak_calc_memory_mb = peak_calc_memory;
+  current_data.total_events = g_config.total_events;
+  current_data.thread_count = g_config.thread_count;
+  current_data.timestamp = get_timestamp();
+  current_data.platform = get_platform();
+
+  // Print summary
+  std::cout << "\n=== Benchmark Results ===" << std::endl;
+  std::cout << std::fixed << std::setprecision(2);
+  std::cout << "Accuracy error: " << accuracy_error_percent << "% (" << accuracy_error_us_per_event << " us per event)" << std::endl;
+  std::cout << "Overhead: " << overhead_percent << "% (" << overhead_ms << " ms total, "
+    << overhead_ns_per_event << " ns per event)" << std::endl;
+  std::cout << "Memory per event: " << bytes_per_event << " bytes" << std::endl;
+  std::cout << "Calculation time: " << calc_time << " ms" << std::endl;
+  std::cout << "Peak calculation memory: " << peak_calc_memory << " MB" << std::endl;
+
+  // Handle baseline operations
+  if (g_config.record_baseline)
+  {
+    save_baseline(current_data);
+  }
+
+  if (g_config.compare_baseline)
+  {
+    compare_with_baseline(current_data);
+  }
+
+  return 0;
+}
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
index 2e82e61..ea3cc76 100644
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -1,4 +1,5 @@
 
+#add_compile_definitions(CTRACK_CLOCK_RDTSC) # ""=chrono
 # Create executables for each example
 add_executable(basic_singlethreaded basic_singlethreaded.cpp)
 add_executable(multithreaded_prime_counter multithreaded_prime_counter.cpp)
diff --git a/examples/basic_singlethreaded.cpp b/examples/basic_singlethreaded.cpp
index 27fcd81..802d314 100644
--- a/examples/basic_singlethreaded.cpp
+++ b/examples/basic_singlethreaded.cpp
@@ -43,4 +43,4 @@ int main() {
      ctrack::result_print();
     //std::cout << ctrack::result_as_string() << std::endl;
     return 0;
-}
\ No newline at end of file
+}
diff --git a/include/ctrack.hpp b/include/ctrack.hpp
index 52d309c..527504c 100644
--- a/include/ctrack.hpp
+++ b/include/ctrack.hpp
@@ -27,6 +27,7 @@
 #include <sstream>
 #include <atomic>
 #include <cmath>
+#include <fstream>
 
 #define CTRACK_VERSION_MAJOR 1
 #define CTRACK_VERSION_MINOR 1
@@ -38,8 +39,8 @@
 
 // Create a string version
 #define CTRACK_VERSION_STRING      \
-	TOSTRING(CTRACK_VERSION_MAJOR) \
-	"_" TOSTRING(CTRACK_VERSION_MINOR) "_" TOSTRING(CTRACK_VERSION_PATCH)
+TOSTRING(CTRACK_VERSION_MAJOR) \
+"_" TOSTRING(CTRACK_VERSION_MINOR) "_" TOSTRING(CTRACK_VERSION_PATCH)
 
 // Use the version string as the namespace name
 #define CTRACK_VERSION_NAMESPACE v##CTRACK_VERSION_MAJOR##_##CTRACK_VERSION_MINOR##_##CTRACK_VERSION_PATCH
@@ -47,1211 +48,1483 @@
 namespace ctrack
 {
 
-	inline namespace CTRACK_VERSION_NAMESPACE
-	{
+
+// Cross-platform inline + intrinsic shims (to survive from compiler optim)
+#if defined(_MSC_VER)
+#define CTRACK_ALWAYS_INLINE __forceinline
+#elif defined(__GNUC__) || defined(__clang__)
+#define CTRACK_ALWAYS_INLINE inline __attribute__((always_inline))
+#else
+#define CTRACK_ALWAYS_INLINE inline
+#endif
+
+// TSC clock backends (x86_64 only)
+// otherwise, only Clock_Chrono compiles
+#if defined(__x86_64__) || defined(_M_X64)
+
+#if defined(_MSC_VER)
+#include <intrin.h>
+#ifndef WIN32_LEAN_AND_MEAN
+#define WIN32_LEAN_AND_MEAN
+#endif
+#include <windows.h>
+#else
+#include <x86intrin.h>
+#include <cpuid.h>
+#endif
+
+// TSC -> ns conversion state
+// Defined once inside EventHandler constructor. Then read only
+inline double cycles_per_ns = 3000.0; // 3Ghz
+inline std::chrono::system_clock::time_point tsc_anchor_system{};
+inline uint64_t tsc_anchor_cycles = 0;
+
+// CPUID wrapper
+inline void ctrack_cpuid(
+  uint32_t leaf,
+  uint32_t subleaf,
+  uint32_t& eax,
+  uint32_t& ebx,
+  uint32_t& ecx,
+  uint32_t& edx)
+{
+#if defined(_MSC_VER)
+  int regs[4];
+  __cpuidex(regs, static_cast<int>(leaf), static_cast<int>(subleaf));
+  eax = regs[0]; ebx = regs[1]; ecx = regs[2]; edx = regs[3];
+#else
+  __cpuid_count(leaf, subleaf, eax, ebx, ecx, edx);
+#endif
+}
+
+// C1: CPUID 0x15, exact TSC frequency.  Intel Skylake+ (2015+)
+inline double tsc_ghz_from_cpuid_15h() {
+  uint32_t a, b, c, d;
+  ctrack_cpuid(0, 0, a, b, c, d);
+  if (a < 0x15) return 0.0;
+
+  ctrack_cpuid(0x15, 0, a, b, c, d);
+  // EAX = denominator, EBX = numerator, ECX = core crystal Hz
+  if (a == 0 || b == 0 || c == 0) return 0.0;
+  return (static_cast<double>(c) * b / a) / 1e9;
+}
+
+// C2: CPUID 0x16,  base frequency in MHz. Intel Haswell+ (2013+)
+inline double tsc_ghz_from_cpuid_16h() {
+  uint32_t a, b, c, d;
+  ctrack_cpuid(0, 0, a, b, c, d);
+  if (a < 0x16) return 0.0;
+
+  ctrack_cpuid(0x16, 0, a, b, c, d);
+  uint32_t base_mhz = a & 0xFFFF;
+  if (base_mhz == 0) return 0.0;
+  return static_cast<double>(base_mhz) / 1000.0;
+}
+
+// C3 (Linux): intel_pstate base_frequency  Intel CPU only
+inline double tsc_ghz_from_sysfs_base() {
+#if defined(__linux__)
+  std::ifstream f("/sys/devices/system/cpu/cpu0/cpufreq/base_frequency");
+  if (!f) return 0.0;
+  double khz;
+  if (!(f >> khz) || khz <= 0.0) return 0.0;
+  return khz / 1e6;
+#else
+  return 0.0;
+#endif
+}
+
+// C4 (Windows): registry ~MHz, set at boot from CPUID
+inline double tsc_ghz_from_windows_registry() {
+#if defined(_WIN32)
+  HKEY key;
+  if (RegOpenKeyExA(HKEY_LOCAL_MACHINE,
+                    "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0", 0, KEY_READ, &key) != ERROR_SUCCESS)
+    return 0.0;
+  DWORD mhz = 0, size = sizeof(DWORD);
+  LONG status = RegQueryValueExA(key, "~MHz", nullptr, nullptr, reinterpret_cast<LPBYTE>(&mhz), &size);
+  RegCloseKey(key);
+  if (status != ERROR_SUCCESS || mhz == 0) return 0.0;
+  return static_cast<double>(mhz) / 1000.0;
+#else
+  return 0.0;
+#endif
+}
+
+// Calibration fallback: lightweight runtime calibration (~3ms)
+//
+// Last-resort fallback for AMD bare-metal and virtualized environments
+// where no static frequency source is available.  Three 1ms samples,
+// median wins.  This is the *only* path that pays a startup cost; users
+// on Intel hardware will exit at C1 or C2 before reaching here.
+inline double tsc_ghz_from_calibration() {
+  constexpr int N = 3;
+  double samples[N];
+
+  for (int i = 0; i < N; ++i) {
+    auto wall_t0 = std::chrono::steady_clock::now();
+    uint64_t tsc_t0 = __rdtsc();
+    std::this_thread::sleep_for(std::chrono::milliseconds(1));
+    uint64_t tsc_t1 = __rdtsc();
+    auto wall_t1 = std::chrono::steady_clock::now();
+
+    double ns = std::chrono::duration<double, std::nano>(wall_t1 - wall_t0).count();
+    if (ns <= 0.0) { samples[i] = 0.0; continue; }
+    samples[i] = static_cast<double>(tsc_t1 - tsc_t0) / ns;  // cycles/ns = GHz
+  }
+
+  std::sort(samples, samples + N);
+  return samples[N / 2];  // median rejects the worst scheduler hiccup
+}
+
+// Master calibration: try sources in order, abort if all fail
+inline void calibrate_tsc() {
+  double ghz = tsc_ghz_from_cpuid_15h();
+  if (ghz <= 0.0) ghz = tsc_ghz_from_cpuid_16h();
+  if (ghz <= 0.0) ghz = tsc_ghz_from_sysfs_base();
+  if (ghz <= 0.0) ghz = tsc_ghz_from_windows_registry();
+  if (ghz <= 0.0) ghz = tsc_ghz_from_calibration();
+
+  if (ghz <= 0.0) {
+    std::cerr <<  
+      "[ctrack] FATAL: TSC clock backend selected at compile time but no usable frequency source found.\n"
+      "[ctrack]         Rebuild without CTRACK_CLOCK_RDTSC / RDTSCP / RDTSCP_LFENCE to use the chrono fallback.\n";
+    std::abort();
+  }
+
+  cycles_per_ns = ghz;
+  tsc_anchor_cycles = __rdtsc();
+  tsc_anchor_system = std::chrono::system_clock::now();
+}
+
+inline uint_fast64_t cycles_to_ns(uint64_t cycles) {
+  return static_cast<uint_fast64_t>(cycles / cycles_per_ns);
+}
+
+inline std::string cycles_to_timestring(uint64_t tp) {
+  int64_t delta_cycles = static_cast<int64_t>(tp) - static_cast<int64_t>(tsc_anchor_cycles);
+  auto delta_ns = std::chrono::nanoseconds(static_cast<int64_t>(delta_cycles / cycles_per_ns));
+  auto system_tp = tsc_anchor_system + delta_ns;
+  auto tt = std::chrono::system_clock::to_time_t(system_tp);
+  std::tm tm{};
+#if defined(_WIN32)
+  localtime_s(&tm, &tt);
+#else
+  localtime_r(&tt, &tm);
+#endif
+  std::ostringstream oss;
+  oss << std::put_time(&tm, "%Y-%m-%d %H:%M:%S");
+  return oss.str();
+}
+
+#if defined(CTRACK_CLOCK_RDTSC)
+struct Clock_RDTSC {
+  using time_point = uint64_t;
+  CTRACK_ALWAYS_INLINE static time_point NOW() { return __rdtsc(); }
+  static inline uint_fast64_t duration_ns(time_point s, time_point e) { return cycles_to_ns(e - s); }
+  static inline std::string to_string(const time_point &tp) { return cycles_to_timestring(tp); }
+};
+using ActiveClock = Clock_RDTSC;
+#elif defined(CTRACK_CLOCK_RDTSCP)
+struct Clock_RDTSCP {
+  using time_point = uint64_t;
+  CTRACK_ALWAYS_INLINE static time_point NOW() { unsigned int aux; return __rdtscp(&aux); }
+  static inline uint_fast64_t duration_ns(time_point s, time_point e) { return cycles_to_ns(e - s); }
+  static inline std::string to_string(const time_point &tp) { return cycles_to_timestring(tp); }
+};
+using ActiveClock = Clock_RDTSCP;
+#elif defined(CTRACK_CLOCK_RDTSCP_LFENCE)
+struct Clock_RDTSCP_LFENCE {
+  using time_point = uint64_t;
+  CTRACK_ALWAYS_INLINE static time_point NOW() { _mm_lfence(); unsigned int aux; return __rdtscp(&aux); }
+  static inline uint_fast64_t duration_ns(time_point s, time_point e) { return cycles_to_ns(e - s); }
+  static inline std::string to_string(const time_point &tp) { return cycles_to_timestring(tp); }
+};
+using ActiveClock = Clock_RDTSCP_LFENCE;
+#endif
+
+#else  // not x86_64
+
+// Hard-fail at compile time if a TSC backend is requested on a non-x86 build.
+#if defined(CTRACK_CLOCK_RDTSC) || defined(CTRACK_CLOCK_RDTSCP) || defined(CTRACK_CLOCK_RDTSCP_LFENCE)
+#error "CTRACK_CLOCK_RDTSC* requires x86_64. Remove the macro to use Clock_Chrono."
+#endif
+
+#endif  // x86_64
+
+// ── Chrono fallback (default if no TSC backend selected) ─────────────────
+#if !defined(CTRACK_CLOCK_RDTSC) && !defined(CTRACK_CLOCK_RDTSCP) && !defined(CTRACK_CLOCK_RDTSCP_LFENCE)
+struct Clock_Chrono {
+  using time_point = std::chrono::high_resolution_clock::time_point;
+  CTRACK_ALWAYS_INLINE static time_point NOW() {
+    return std::chrono::high_resolution_clock::now();
+  }
+  static inline uint_fast64_t duration_ns(time_point s, time_point e) {
+    return std::chrono::duration_cast<std::chrono::nanoseconds>(e - s).count();
+  }
+  static inline std::string to_string(const time_point &tp) {
+    auto system_tp = std::chrono::system_clock::now() +
+      std::chrono::duration_cast<std::chrono::system_clock::duration>(
+        tp - std::chrono::high_resolution_clock::now());
+    auto tt = std::chrono::system_clock::to_time_t(system_tp);
+    std::tm tm{};
+#if defined(_WIN32)
+    localtime_s(&tm, &tt);
+#else
+    localtime_r(&tt, &tm);
+#endif
+    std::ostringstream oss;
+    oss << std::put_time(&tm, "%Y-%m-%d %H:%M:%S");
+    return oss.str();
+  }
+};
+using ActiveClock = Clock_Chrono;
+#endif // chrono
+
+
+inline namespace CTRACK_VERSION_NAMESPACE
+{
 #ifndef CTRACK_DISABLE_EXECUTION_POLICY
-		constexpr auto execution_policy = std::execution::par_unseq;
+constexpr auto execution_policy = std::execution::par_unseq;
 #define OPT_EXEC_POLICY execution_policy,
 #else
 #define OPT_EXEC_POLICY
 #endif
 
-		template <typename T, typename Field>
-		auto sum_field(const std::vector<T> &vec, Field T::*field)
-		{
-			using FieldType = std::decay_t<decltype(std::declval<T>().*field)>;
-			return std::transform_reduce(
-				OPT_EXEC_POLICY
-					vec.begin(),
-				vec.end(),
-				FieldType{},
-				std::plus<>(),
-				[field](const auto &item)
-				{ return item.*field; });
-		}
-
-		template <typename T, typename Field>
-		auto sum_squared_field(const std::vector<T> &values, Field T::*field)
-		{
-			using FieldType = std::decay_t<decltype(std::declval<T>().*field)>;
-			return std::transform_reduce(
-				OPT_EXEC_POLICY
-					values.begin(),
-				values.end(),
-				FieldType{},
-				std::plus<>(),
-				[field](const T &v)
-				{
-					return (v.*field) * (v.*field);
-				});
-		}
-
-		template <typename T, typename Field>
-		double calculate_std_dev_field(std::vector<T> &values, Field T::*field, const double mean)
-		{
-			double res = std::transform_reduce(
-				OPT_EXEC_POLICY
-					values.begin(),
-				values.end(),
-				0.0,
-				std::plus<>(),
-				[mean, field](const T &v)
-				{
-					return std::pow(static_cast<double>(v.*field) - mean, 2);
-				});
-
-			return sqrt(res / values.size());
-		}
-
-		template <typename T, typename Field>
-		auto get_distinct_field_values(const std::vector<const T *> &vec, Field T::*field)
-		{
-			std::set<std::remove_reference_t<decltype(std::declval<T>().*field)>> distinct_values;
-
-			std::transform(vec.begin(), vec.end(),
-						   std::inserter(distinct_values, distinct_values.end()),
-						   [field](const T *item)
-						   { return item->*field; });
-			return distinct_values;
-		}
-
-		template <typename T, typename Field>
-		auto get_distinct_field_values(const std::vector<T> &vec, Field T::*field)
-		{
-			std::set<std::remove_reference_t<decltype(std::declval<T>().*field)>> distinct_values;
-
-			std::transform(vec.begin(), vec.end(),
-						   std::inserter(distinct_values, distinct_values.end()),
-						   [field](const T &item)
-						   { return item.*field; });
-			return distinct_values;
-		}
-
-		template <typename T, typename Field>
-		size_t count_distinct_field_values(const std::vector<T> &vec, Field T::*field)
-		{
-			return get_distinct_field_values(vec, field).size();
-		}
-
-		template <typename StructType, typename MemberType>
-		void order_pointer_vector_by_field(std::vector<StructType *> &vec, MemberType StructType::*member, bool asc = true)
-		{
-			std::sort(OPT_EXEC_POLICY vec.begin(), vec.end(),
-					  [member, asc](const StructType *a, const StructType *b)
-					  {
-						  if (asc)
-							  return (a->*member) < (b->*member);
-						  else
-							  return (a->*member) > (b->*member);
-					  });
-		}
-
-		template <typename T>
-		size_t countAllEvents(const std::deque<std::vector<T>> &events)
-		{
-			return std::transform_reduce(
-				OPT_EXEC_POLICY
-					events.begin(),
-				events.end(),
-				size_t(0),
-				std::plus<>(),
-				[](const auto &vec)
-				{
-					return vec.size();
-				});
-		}
-
-		struct ColorScheme
-		{
-			std::string border_color;
-			std::string header_color;
-			std::string top_header_color;
-			std::string row_color;
-
-			ColorScheme(const std::string &border,
-						const std::string &header,
-						const std::string &top_header,
-						const std::string &row)
-				: border_color(border),
-				  header_color(header),
-				  top_header_color(top_header),
-				  row_color(row) {}
-		};
-
-		static inline const ColorScheme default_colors{
-			"\033[38;5;24m",	// Darker Blue (Border)
-			"\033[1;38;5;135m", // Purple (Header)
-			"\033[1;38;5;92m",	// Darker Purple (Top Header)
-			"\033[38;5;39m"		// Light Blue (Row)
-		};
-
-		// Alternate color scheme (still nice to read on terminals)
-		static inline const ColorScheme alternate_colors{
-			"\033[38;5;28m",	// Dark Green (Border)
-			"\033[1;38;5;208m", // Orange (Header)
-			"\033[1;38;5;130m", // Dark Orange (Top Header)
-			"\033[38;5;71m"		// Light Green (Row)
-		};
-
-		class BeautifulTable
-		{
-		private:
-			std::vector<std::pair<std::string, int>> top_header;
-			std::vector<std::string> header;
-			std::vector<std::vector<std::string>> rows;
-			std::vector<size_t> columnWidths;
-			bool useColor;
-			ColorScheme colors;
-			static inline const std::string RESET_COLOR = "\033[0m";
-
-			void updateColumnWidths(const std::vector<std::string> &row)
-			{
-				for (size_t i = 0; i < row.size(); ++i)
-				{
-					if (i >= columnWidths.size())
-					{
-						columnWidths.push_back(row[i].length());
-					}
-					else
-					{
-						columnWidths[i] = std::max<size_t>(columnWidths[i], row[i].length());
-					}
-				}
-			}
-
-			template <typename StreamType>
-			void printHorizontalLine(StreamType &stream) const
-			{
-				if (useColor)
-					stream << colors.border_color;
-				stream << "+";
-				for (size_t width : columnWidths)
-				{
-					stream << std::string(width + 2, '-') << "+";
-				}
-				if (useColor)
-					stream << RESET_COLOR;
-				stream << "\n";
-			}
-
-			template <typename StreamType>
-			void printRow(StreamType &stream, const std::vector<std::string> &row, const std::string &color, bool center = false) const
-			{
-				if (useColor)
-					stream << colors.border_color;
-				stream << "|";
-				if (useColor)
-					stream << RESET_COLOR << color;
-				for (size_t i = 0; i < row.size(); ++i)
-				{
-					if (center)
-					{
-						size_t padding = columnWidths[i] - row[i].length();
-						size_t leftPadding = padding / 2;
-						size_t rightPadding = padding - leftPadding;
-						stream << std::string(leftPadding + 1, ' ') << row[i] << std::string(rightPadding + 1, ' ');
-					}
-					else
-					{
-						stream << " " << std::setw(static_cast<int32_t>(columnWidths[i])) << std::right << row[i] << " ";
-					}
-					if (useColor)
-						stream << RESET_COLOR << colors.border_color;
-					stream << "|";
-					if (useColor)
-						stream << RESET_COLOR << color;
-				}
-				if (useColor)
-					stream << RESET_COLOR;
-				stream << "\n";
-			}
-
-			template <typename StreamType>
-			void printRow(StreamType &stream, const std::vector<std::pair<std::string, int>> &row, const std::string &color) const
-			{
-				if (useColor)
-					stream << colors.border_color;
-				stream << "|";
-				if (useColor)
-					stream << RESET_COLOR << color;
-				int y = 0;
-				for (size_t i = 0; i < row.size(); ++i)
-				{
-					size_t sum = row[i].second - 1;
-					for (int x = y; x < y + row[i].second; x++)
-					{
-						sum += columnWidths[x] + 2;
-					}
-					y += row[i].second;
-
-					size_t textWidth = row[i].first.length();
-					size_t totalPadding = sum - textWidth;
-					size_t leftPadding = totalPadding / 2;
-					size_t rightPadding = totalPadding - leftPadding;
-
-					// Print left padding
-					stream << std::string(leftPadding, ' ');
-
-					// Print text
-					stream << row[i].first;
-
-					// Print right padding
-					stream << std::string(rightPadding, ' ');
-					if (useColor)
-						stream << RESET_COLOR << colors.border_color;
-					stream << "|";
-					if (useColor)
-						stream << RESET_COLOR << color;
-				}
-				if (useColor)
-					stream << RESET_COLOR;
-				stream << "\n";
-			}
-
-		public:
-			BeautifulTable(const std::vector<std::string> &headerColumns, bool enableColor = false, const ColorScheme &colors = default_colors, const std::vector<std::pair<std::string, int>> &top_header = {})
-				: top_header(top_header), header(headerColumns), useColor(enableColor), colors(colors)
-			{
-				updateColumnWidths(header);
-			}
-
-			void addRow(const std::vector<std::string> &row)
-			{
-				if (row.size() != header.size())
-				{
-					throw std::invalid_argument("Row size must match header size");
-				}
-				rows.push_back(row);
-				updateColumnWidths(row);
-			}
-
-			template <typename StreamType>
-			void print(StreamType &stream) const
-			{
-				if (top_header.size() > 0)
-				{
-					printHorizontalLine(stream);
-					printRow(stream, top_header, colors.top_header_color);
-				}
-				printHorizontalLine(stream);
-				printRow(stream, header, colors.header_color, true);
-				printHorizontalLine(stream);
-				for (const auto &row : rows)
-				{
-					printRow(stream, row, colors.row_color);
-					printHorizontalLine(stream);
-				}
-			}
-
-			template <typename T>
-			static inline std::string table_string(const T &value)
-			{
-				std::ostringstream oss;
-				oss << value;
-				return oss.str();
-			}
-
-			static inline std::string table_time(uint_fast64_t nanoseconds)
-			{
-				return table_time(static_cast<double>(nanoseconds));
-			}
-
-			static inline std::string table_time(double nanoseconds)
-			{
-				const char *units[] = {"ns", "mcs", "ms", "s"};
-				int unit = 0;
-				double value = static_cast<double>(nanoseconds);
-				while (value >= 1000 && unit < 3)
-				{
-					value /= 1000;
-					unit++;
-				}
-				std::ostringstream oss;
-				oss << std::fixed << std::setprecision(2) << value << " " << units[unit];
-				return oss.str();
-			}
-
-			static inline std::string table_percentage(uint_fast64_t value, uint_fast64_t total)
-			{
-				if (total == 0)
-				{
-					return "nan%";
-				}
-
-				// Calculate the percentage
-				double percentage = (static_cast<double>(value) / total) * 100.0;
-
-				// Format the percentage as a string with 2 decimal places
-				std::ostringstream ss;
-				ss << std::fixed << std::setprecision(2) << percentage << "%";
-
-				return ss.str();
-			}
-
-			static inline std::string table_timepoint(const std::chrono::high_resolution_clock::time_point &tp)
-			{
-				auto system_tp = std::chrono::system_clock::now() +
-								 std::chrono::duration_cast<std::chrono::system_clock::duration>(
-									 tp - std::chrono::high_resolution_clock::now());
-
-				auto tt = std::chrono::system_clock::to_time_t(system_tp);
-				std::tm tm{};
+template <typename T, typename Field>
+auto sum_field(const std::vector<T> &vec, Field T::*field)
+{
+  using FieldType = std::decay_t<decltype(std::declval<T>().*field)>;
+  return std::transform_reduce(
+    OPT_EXEC_POLICY
+    vec.begin(),
+    vec.end(),
+    FieldType{},
+    std::plus<>(),
+    [field](const auto &item)
+    { return item.*field; }
+  );
+}
 
-#if defined(_WIN32)
-				localtime_s(&tm, &tt);
-#else
-				localtime_r(&tt, &tm);
+template <typename T, typename Field>
+auto sum_squared_field(const std::vector<T> &values, Field T::*field)
+{
+  using FieldType = std::decay_t<decltype(std::declval<T>().*field)>;
+  return std::transform_reduce(
+    OPT_EXEC_POLICY
+    values.begin(),
+    values.end(),
+    FieldType{},
+    std::plus<>(),
+    [field](const T &v)
+    {
+      return (v.*field) * (v.*field);
+    }
+  );
+}
+
+template <typename T, typename Field>
+double calculate_std_dev_field(std::vector<T> &values, Field T::*field, const double mean)
+{
+  double res = std::transform_reduce(
+    OPT_EXEC_POLICY
+    values.begin(),
+    values.end(),
+    0.0,
+    std::plus<>(),
+    [mean, field](const T &v)
+    {
+      return std::pow(static_cast<double>(v.*field) - mean, 2);
+    }
+  );
+
+  return sqrt(res / values.size());
+}
+
+template <typename T, typename Field>
+auto get_distinct_field_values(const std::vector<const T *> &vec, Field T::*field)
+{
+  std::set<std::remove_reference_t<decltype(std::declval<T>().*field)>> distinct_values;
+
+  std::transform(
+    vec.begin(), vec.end(),
+    std::inserter(distinct_values, distinct_values.end()),
+    [field](const T *item)
+    { return item->*field; }
+  );
+  return distinct_values;
+}
+
+template <typename T, typename Field>
+auto get_distinct_field_values(const std::vector<T> &vec, Field T::*field)
+{
+  std::set<std::remove_reference_t<decltype(std::declval<T>().*field)>> distinct_values;
+
+  std::transform(
+    vec.begin(), vec.end(),
+    std::inserter(distinct_values, distinct_values.end()),
+    [field](const T &item)
+    { return item.*field; }
+  );
+  return distinct_values;
+}
+
+template <typename T, typename Field>
+size_t count_distinct_field_values(const std::vector<T> &vec, Field T::*field)
+{
+  return get_distinct_field_values(vec, field).size();
+}
+
+template <typename StructType, typename MemberType>
+void order_pointer_vector_by_field(std::vector<StructType *> &vec, MemberType StructType::*member, bool asc = true)
+{
+  std::sort(
+    OPT_EXEC_POLICY vec.begin(), vec.end(),
+    [member, asc](const StructType *a, const StructType *b){
+    if (asc)
+      return (a->*member) < (b->*member);
+    else
+      return (a->*member) > (b->*member);
+  });
+}
+
+template <typename T>
+size_t countAllEvents(const std::deque<std::vector<T>> &events)
+{
+  return std::transform_reduce(
+    OPT_EXEC_POLICY
+    events.begin(),
+    events.end(),
+    size_t(0),
+    std::plus<>(),
+    [](const auto &vec)
+    {
+      return vec.size();
+    });
+}
+
+struct ColorScheme
+{
+  std::string border_color;
+  std::string header_color;
+  std::string top_header_color;
+  std::string row_color;
+
+  ColorScheme(const std::string &border,
+              const std::string &header,
+              const std::string &top_header,
+              const std::string &row)
+    : border_color(border),
+    header_color(header),
+    top_header_color(top_header),
+    row_color(row) 
+  {}
+};
+
+static inline const ColorScheme default_colors{
+  "\033[38;5;24m",	// Darker Blue (Border)
+  "\033[1;38;5;135m", // Purple (Header)
+  "\033[1;38;5;92m",	// Darker Purple (Top Header)
+  "\033[38;5;39m"		// Light Blue (Row)
+};
+
+// Alternate color scheme (still nice to read on terminals)
+static inline const ColorScheme alternate_colors{
+  "\033[38;5;28m",	// Dark Green (Border)
+  "\033[1;38;5;208m", // Orange (Header)
+  "\033[1;38;5;130m", // Dark Orange (Top Header)
+  "\033[38;5;71m"		// Light Green (Row)
+};
+
+class BeautifulTable
+{
+private:
+  std::vector<std::pair<std::string, int>> top_header;
+  std::vector<std::string> header;
+  std::vector<std::vector<std::string>> rows;
+  std::vector<size_t> columnWidths;
+  bool useColor;
+  ColorScheme colors;
+  static inline const std::string RESET_COLOR = "\033[0m";
+
+  void updateColumnWidths(const std::vector<std::string> &row)
+  {
+    for (size_t i = 0; i < row.size(); ++i)
+    {
+      if (i >= columnWidths.size())
+      {
+        columnWidths.push_back(row[i].length());
+      }
+      else
+    {
+        columnWidths[i] = std::max<size_t>(columnWidths[i], row[i].length());
+      }
+    }
+  }
+
+  template <typename StreamType>
+  void printHorizontalLine(StreamType &stream) const
+  {
+    if (useColor)
+      stream << colors.border_color;
+    stream << "+";
+    for (size_t width : columnWidths)
+    {
+      stream << std::string(width + 2, '-') << "+";
+    }
+    if (useColor)
+      stream << RESET_COLOR;
+    stream << "\n";
+  }
+
+  template <typename StreamType>
+  void printRow(StreamType &stream, const std::vector<std::string> &row, const std::string &color, bool center = false) const
+  {
+    if (useColor)
+      stream << colors.border_color;
+    stream << "|";
+    if (useColor)
+      stream << RESET_COLOR << color;
+    for (size_t i = 0; i < row.size(); ++i)
+    {
+      if (center)
+      {
+        size_t padding = columnWidths[i] - row[i].length();
+        size_t leftPadding = padding / 2;
+        size_t rightPadding = padding - leftPadding;
+        stream << std::string(leftPadding + 1, ' ') << row[i] << std::string(rightPadding + 1, ' ');
+      }
+      else
+    {
+        stream << " " << std::setw(static_cast<int32_t>(columnWidths[i])) << std::right << row[i] << " ";
+      }
+      if (useColor)
+        stream << RESET_COLOR << colors.border_color;
+      stream << "|";
+      if (useColor)
+        stream << RESET_COLOR << color;
+    }
+    if (useColor)
+      stream << RESET_COLOR;
+    stream << "\n";
+  }
+
+  template <typename StreamType>
+  void printRow(StreamType &stream, const std::vector<std::pair<std::string, int>> &row, const std::string &color) const
+  {
+    if (useColor)
+      stream << colors.border_color;
+    stream << "|";
+    if (useColor)
+      stream << RESET_COLOR << color;
+    int y = 0;
+    for (size_t i = 0; i < row.size(); ++i)
+    {
+      size_t sum = row[i].second - 1;
+      for (int x = y; x < y + row[i].second; x++)
+      {
+        sum += columnWidths[x] + 2;
+      }
+      y += row[i].second;
+
+      size_t textWidth = row[i].first.length();
+      size_t totalPadding = sum - textWidth;
+      size_t leftPadding = totalPadding / 2;
+      size_t rightPadding = totalPadding - leftPadding;
+
+      // Print left padding
+      stream << std::string(leftPadding, ' ');
+
+      // Print text
+      stream << row[i].first;
+
+      // Print right padding
+      stream << std::string(rightPadding, ' ');
+      if (useColor)
+        stream << RESET_COLOR << colors.border_color;
+      stream << "|";
+      if (useColor)
+        stream << RESET_COLOR << color;
+    }
+    if (useColor)
+      stream << RESET_COLOR;
+    stream << "\n";
+  }
+
+public:
+  BeautifulTable(const std::vector<std::string> &headerColumns, bool enableColor = false, const ColorScheme &colors = default_colors, const std::vector<std::pair<std::string, int>> &top_header = {})
+    : top_header(top_header), header(headerColumns), useColor(enableColor), colors(colors)
+  {
+    updateColumnWidths(header);
+  }
+
+  void addRow(const std::vector<std::string> &row)
+  {
+    if (row.size() != header.size())
+    {
+      throw std::invalid_argument("Row size must match header size");
+    }
+    rows.push_back(row);
+    updateColumnWidths(row);
+  }
+
+  template <typename StreamType>
+  void print(StreamType &stream) const
+  {
+    if (top_header.size() > 0)
+    {
+      printHorizontalLine(stream);
+      printRow(stream, top_header, colors.top_header_color);
+    }
+    printHorizontalLine(stream);
+    printRow(stream, header, colors.header_color, true);
+    printHorizontalLine(stream);
+    for (const auto &row : rows)
+    {
+      printRow(stream, row, colors.row_color);
+      printHorizontalLine(stream);
+    }
+  }
+
+  template <typename T>
+  static inline std::string table_string(const T &value)
+  {
+    std::ostringstream oss;
+    oss << value;
+    return oss.str();
+  }
+
+  static inline std::string table_time(uint_fast64_t nanoseconds)
+  {
+    return table_time(static_cast<double>(nanoseconds));
+  }
+
+  static inline std::string table_time(double nanoseconds)
+  {
+    const char *units[] = {"ns", "us", "ms", "s"};
+    int unit = 0;
+    double value = static_cast<double>(nanoseconds);
+    while (value >= 1000 && unit < 3)
+    {
+      value /= 1000;
+      unit++;
+    }
+    std::ostringstream oss;
+    oss << std::fixed << std::setprecision(2) << value << " " << units[unit];
+    return oss.str();
+  }
+
+  static inline std::string table_percentage(uint_fast64_t value, uint_fast64_t total)
+  {
+    if (total == 0)
+    {
+      return "nan%";
+    }
+
+    // Calculate the percentage
+    double percentage = (static_cast<double>(value) / total) * 100.0;
+
+    // Format the percentage as a string with 2 decimal places
+    std::ostringstream ss;
+    ss << std::fixed << std::setprecision(2) << percentage << "%";
+
+    return ss.str();
+  }
+
+  static inline std::string table_timepoint(const ActiveClock::time_point &tp)
+  {
+    return ActiveClock::to_string(tp);
+  }
+
+  static inline std::string stable_shortenPath(const std::string &fullPath, size_t maxLength = 35)
+  {
+    namespace fs = std::filesystem;
+
+    fs::path path(fullPath);
+    std::string filename = path.filename().string();
+
+    if (filename.length() <= maxLength)
+    {
+      return filename;
+    }
+
+    // If filename is too long, truncate it and add ...
+    return filename.substr(0, maxLength - 3) + "...";
+  }
+
+  using bt = BeautifulTable;
+};
+
+
+
+
+
+
+struct Event
+{
+  ActiveClock::time_point start_time;
+  ActiveClock::time_point end_time;
+  int line;
+  int thread_id;
+  std::string_view filename;
+  std::string_view function;
+  unsigned int event_id;
+
+  Event(const ActiveClock::time_point &start_time, const ActiveClock::time_point &end_time, const std::string_view filename, const int line, const std::string_view function, const int thread_id, const unsigned int event_id)
+  : start_time(start_time), end_time(end_time), line(line), thread_id(thread_id), filename(filename), function(function), event_id(event_id)
+  {}
+};
+
+struct Simple_Event
+{
+  uint_fast64_t duration = 0;
+  ActiveClock::time_point start_time{};
+  int_fast64_t unique_id = 0;
+  ActiveClock::time_point end_time{};
+  Simple_Event(const ActiveClock::time_point &start_time, const ActiveClock::time_point &end_time, const uint_fast64_t duration, const int_fast64_t unique_id) : duration(duration), start_time(start_time), unique_id(unique_id), end_time(end_time) {}
+  Simple_Event() {}
+};
+
+inline bool cmp_simple_event_by_duration_asc(const Simple_Event &a, const Simple_Event &b)
+{
+  return a.duration < b.duration;
+}
+inline bool cmp_simple_event_by_start_time_asc(const Simple_Event &a, const Simple_Event &b)
+{
+  return a.start_time < b.start_time;
+}
+
+inline uint_fast64_t get_unique_event_id(unsigned int thread_id, unsigned int event_id)
+{
+  uint_fast64_t uniqueId = static_cast<uint_fast64_t>(thread_id);
+  uniqueId = uniqueId << 32;
+  uniqueId += static_cast<uint_fast64_t>(event_id);
+  return uniqueId;
+}
+
+inline std::vector<Simple_Event> create_simple_events(const std::vector<Event> &events)
+{
+  std::vector<Simple_Event> simple_events{};
+  simple_events.resize(events.size());
+  std::transform(
+    OPT_EXEC_POLICY
+    events.begin(),
+    events.end(),
+    simple_events.begin(),
+    [](const Event &event)
+  {
+    Simple_Event simple_event(event.start_time, event.end_time, ActiveClock::duration_ns(event.start_time, event.end_time), get_unique_event_id(event.thread_id, event.event_id));
+    return simple_event;
+  });
+  return simple_events;
+}
+
+inline std::vector<Simple_Event> create_simple_events(const std::vector<const Event *> &events)
+{
+  std::vector<Simple_Event> simple_events{};
+  simple_events.resize(events.size());
+  std::transform(
+    OPT_EXEC_POLICY
+    events.begin(),
+    events.end(),
+    simple_events.begin(),
+    [](const Event *event){
+    Simple_Event simple_event(event->start_time, event->end_time, ActiveClock::duration_ns(event->start_time, event->end_time), get_unique_event_id(event->thread_id, event->event_id));
+    return simple_event;
+  });
+  return simple_events;
+}
+
+// requires already sorted
+inline std::vector<Simple_Event> sorted_create_grouped_simple_events(const std::vector<Simple_Event> &events)
+{
+  std::vector<Simple_Event> result{};
+  if (events.size() == 0)
+    return result;
+  result.push_back(events[0]);
+  unsigned int current_idx = 0;
+
+  for (size_t i = 1; i < events.size(); i++)
+  {
+    if (result[current_idx].end_time >= events[i].start_time)
+    {
+      result[current_idx].end_time = std::max<ActiveClock::time_point>(result[current_idx].end_time, events[i].end_time);
+    }
+    else
+  {
+      result.push_back(events[i]);
+      current_idx++;
+    }
+  }
+
+  for (auto &entry : result)
+  {
+    entry.duration = ActiveClock::duration_ns(entry.start_time, entry.end_time);
+  }
+
+  return result;
+}
+
+inline std::vector<Simple_Event> load_child_events_simple(const std::vector<Simple_Event> &parent_events_simple,
+                                                          const std::unordered_map<int_fast64_t,
+                                                          const Event *> &events_map,
+                                                          const std::unordered_map<int_fast64_t, std::vector<int_fast64_t>> &child_graph)
+{
+  std::vector<const Event *> child_events{};
+
+  for (const auto &simple_parent_event : parent_events_simple)
+  {
+    auto it = child_graph.find(simple_parent_event.unique_id);
+    if (it != child_graph.end())
+    {
+      auto &parent_event = events_map.at(simple_parent_event.unique_id);
+      for (auto &child_id : it->second)
+      {
+        auto &child_event = events_map.at(child_id);
+        if (child_event->filename == parent_event->filename &&
+          child_event->function == parent_event->function &&
+          child_event->line == parent_event->line)
+          continue;
+
+        child_events.push_back(child_event);
+      }
+    }
+  }
+
+  return create_simple_events(child_events);
+};
+
+class EventGroup
+{
+public:
+  void calculateStats(unsigned int non_center_percent, const std::unordered_map<int_fast64_t, const Event *> &events_map, const std::unordered_map<int_fast64_t, std::vector<int_fast64_t>> &child_graph)
+  {
+    if (all_events.size() == 0)
+      return;
+
+
+		auto all_events_simple = create_simple_events(all_events);
+		std::sort(OPT_EXEC_POLICY all_events_simple.begin(), all_events_simple.end(), cmp_simple_event_by_duration_asc);
+		all_cnt = static_cast<unsigned int>(all_events_simple.size());
+		const double factor = (1.0 / static_cast<double>(all_cnt));
+
+    auto all_child_events_simple = load_child_events_simple(all_events_simple, events_map, child_graph);
+
+    all_time_acc = sum_field(all_events_simple, &Simple_Event::duration);
+
+    const double all_mean = all_time_acc * factor;
+    if (std::fpclassify(all_mean) == FP_ZERO)
+      return;
+
+    all_st = calculate_std_dev_field(all_events_simple, &Simple_Event::duration, all_mean); // std::sqrt(all_variance);
+    all_cv = all_st / all_mean;
+
+    all_thread_cnt = static_cast<unsigned int>(get_distinct_field_values(all_events, &Event::thread_id).size());
+    unsigned int amount_non_center = all_cnt * non_center_percent / 100;
+
+    fastest_range = non_center_percent;
+    slowest_range = 100 - non_center_percent;
+
+    std::vector<Simple_Event> fastest_events_simple, slowest_events_simple, center_events_simple;
+    fastest_events_simple.reserve(amount_non_center);
+    slowest_events_simple.reserve(amount_non_center);
+    if (all_cnt > 2)
+      center_events_simple.reserve(all_cnt - 2 * amount_non_center);
+
+    for (unsigned int i = 0; i < all_events_simple.size(); i++)
+    {
+      if (i < amount_non_center)
+      {
+        fastest_events_simple.push_back(all_events_simple[i]);
+      }
+      else if (i >= all_cnt - amount_non_center)
+      {
+        slowest_events_simple.push_back(all_events_simple[i]);
+      }
+      else
+    {
+        center_events_simple.push_back(all_events_simple[i]);
+      }
+    }
+    if (amount_non_center > 0)
+    {
+      // fastest
+      fastest_min = fastest_events_simple[0].duration;
+      fastest_mean = sum_field(fastest_events_simple, &Simple_Event::duration) / static_cast<double>(amount_non_center);
+
+      // slowest
+      slowest_max = slowest_events_simple[slowest_events_simple.size() - 1].duration;
+      slowest_mean = sum_field(slowest_events_simple, &Simple_Event::duration) / static_cast<double>(amount_non_center);
+    }
+
+    // center
+    center_min = center_events_simple[0].duration;
+    center_max = center_events_simple[center_events_simple.size() - 1].duration;
+    center_mean = sum_field(center_events_simple, &Simple_Event::duration) / static_cast<double>(center_events_simple.size());
+    if (center_events_simple.size() % 2 == 1)
+      center_med = center_events_simple[center_events_simple.size() / 2].duration;
+    else
+      center_med = (center_events_simple[center_events_simple.size() / 2].duration + center_events_simple[center_events_simple.size() / 2 - 1].duration) / 2;
+
+    auto center_child_events_simple = load_child_events_simple(center_events_simple, events_map, child_graph);
+
+    std::sort(OPT_EXEC_POLICY center_events_simple.begin(), center_events_simple.end(), cmp_simple_event_by_start_time_asc);
+    center_grouped = sorted_create_grouped_simple_events(center_events_simple);
+    center_time_active = sum_field(center_grouped, &Simple_Event::duration);
+
+    std::sort(OPT_EXEC_POLICY center_child_events_simple.begin(), center_child_events_simple.end(), cmp_simple_event_by_start_time_asc);
+    auto center_child_events_grouped = sorted_create_grouped_simple_events(center_child_events_simple);
+    center_time_active_exclusive = center_time_active - sum_field(center_child_events_grouped, &Simple_Event::duration);
+
+    std::sort(OPT_EXEC_POLICY all_events_simple.begin(), all_events_simple.end(), cmp_simple_event_by_start_time_asc);
+    all_grouped = sorted_create_grouped_simple_events(all_events_simple);
+    all_time_active = sum_field(all_grouped, &Simple_Event::duration);
+
+    std::sort(OPT_EXEC_POLICY all_child_events_simple.begin(), all_child_events_simple.end(), cmp_simple_event_by_start_time_asc);
+    auto all_child_events_grouped = sorted_create_grouped_simple_events(all_child_events_simple);
+    all_time_active_exclusive = all_time_active - sum_field(all_child_events_grouped, &Simple_Event::duration);
+  }
+
+  // all_group
+
+  double all_cv = 0.0;
+  double all_st = 0.0;
+
+  unsigned int all_cnt = 0;
+  uint_fast64_t all_time_acc = 0;
+  uint_fast64_t all_time_active = 0;
+  uint_fast64_t all_time_active_exclusive = 0;
+  unsigned int all_thread_cnt = 0;
+  std::vector<Simple_Event> all_grouped = {};
+  std::vector<const Event *> all_events = {};
+
+  // fastest_group
+  unsigned int fastest_range = 0;
+  uint_fast64_t fastest_min = 0;
+  double fastest_mean = 0.0;
+
+  // slowest group
+  unsigned int slowest_range = 0;
+  uint_fast64_t slowest_max = 0;
+  double slowest_mean = 0.0;
+
+  // center group
+
+  uint_fast64_t center_min = 0;
+  uint_fast64_t center_max = 0;
+  uint_fast64_t center_med = 0;
+  double center_mean = 0;
+  uint_fast64_t center_time_active = 0;
+  uint_fast64_t center_time_active_exclusive = 0;
+  std::vector<Simple_Event> center_grouped = {};
+
+  std::string filename = {};
+  std::string function_name = {};
+  int line = 0;
+
+private:
+};
+
+typedef std::vector<Event> t_events;
+typedef std::map<unsigned int, std::vector<unsigned int>> sub_events;
+
+struct store
+{
+  inline static std::atomic<bool> write_events_locked = false;
+  inline static std::mutex event_mutex;
+  inline static ActiveClock::time_point track_start_time = ActiveClock::NOW();
+  inline static std::atomic<unsigned int> store_clear_cnt = 0;
+
+  inline static std::atomic<int> thread_cnt = -1;
+  inline static std::deque<t_events> a_events{};
+  inline static std::deque<sub_events> a_sub_events{};
+
+  inline static std::deque<unsigned int> a_current_event_id{}, a_current_event_cnt{}, a_string_id{};
+
+  inline static std::deque<int> a_thread_ids{};
+};
+inline thread_local t_events *event_ptr = nullptr;
+inline thread_local sub_events *sub_events_ptr = nullptr;
+
+inline thread_local unsigned int *current_event_id = nullptr;
+inline thread_local unsigned int *current_event_cnt = nullptr;
+inline thread_local unsigned int *string_id = nullptr;
+
+inline thread_local int *thread_id = nullptr;
+
+typedef std::map<int, EventGroup> line_result;
+typedef std::map<std::string_view, line_result> function_result;
+typedef std::map<std::string_view, function_result> filename_result;
+
+struct ctrack_result_settings
+{
+  unsigned int non_center_percent = 1;
+  double min_percent_active_exclusive = 0.0;			   // between 0-100
+  double percent_exclude_fastest_active_exclusive = 0.0; // between 0-100
+};
+
+struct summary_row
+{
+  std::string filename;
+  std::string function_name;
+  int line{};
+  int calls{};
+  double percent_ae_bracket{}; // ae[center]% by configuration
+  double percent_ae_all{};	 // ae[0-100]%
+  std::chrono::nanoseconds time_ae_all{};
+  std::chrono::nanoseconds time_a_all{};
+};
+
+struct summary_table
+{
+  std::vector<summary_row> rows;
+};
+
+struct detail_stats 
+{
+  // Info fields
+  std::string filename;
+  std::string function_name;
+  int line{};
+  std::chrono::nanoseconds time_acc{}; // Simple sum of all execution times (can exceed wall clock in MT)
+  std::chrono::nanoseconds sd{};		 // Standard deviation
+  double cv{}; // Coefficient of variation (sd/mean)
+  int calls{}; // Total number of calls
+  int threads{}; // Number of different threads that called this function
+
+  // Summary-like fields (for unified access)
+  double percent_ae_bracket{};			// ae[center]% as percentage of total time
+  double percent_ae_all{};				// ae[0-100]% as percentage of total time
+  std::chrono::nanoseconds time_ae_all{}; // Active exclusive time (wall clock minus child functions)
+  std::chrono::nanoseconds time_a_all{};	// Active time (actual wall clock time, handles MT overlap)
+
+  // Fastest/Center/Slowest stats
+  std::chrono::nanoseconds fastest_min{};
+  std::chrono::nanoseconds fastest_mean{};
+  std::chrono::nanoseconds center_min{};
+  std::chrono::nanoseconds center_mean{};
+  std::chrono::nanoseconds center_med{};
+  std::chrono::nanoseconds center_time_a{};  // Active time for center range
+  std::chrono::nanoseconds center_time_ae{}; // Active exclusive time for center range
+  std::chrono::nanoseconds center_max{};
+  std::chrono::nanoseconds slowest_mean{};
+  std::chrono::nanoseconds slowest_max{};
+
+  // Percentile ranges for reference
+  unsigned int fastest_range{};
+  unsigned int slowest_range{};
+};
+
+struct detail_table
+{
+  std::vector<detail_stats> rows;
+};
+
+struct ctrack_result_tables
+{
+  // Meta information
+  ActiveClock::time_point start_time;
+  ActiveClock::time_point end_time;
+  std::chrono::nanoseconds time_total{};
+  std::chrono::nanoseconds time_ctracked{};
+
+  // Table data
+  summary_table summary;
+  detail_table details;
+
+  // Settings used
+  ctrack_result_settings settings;
+};
+
+class ctrack_result
+{
+public:
+  ctrack_result(const ctrack_result_settings &settings, const ActiveClock::time_point &track_start_time, const ActiveClock::time_point &track_end_time) : settings(settings), track_start_time(track_start_time), track_end_time(track_end_time)
+  {
+    time_total = ActiveClock::duration_ns(track_start_time, track_end_time);
+    center_intervall_str = "[" + std::to_string(settings.non_center_percent) + "-" + std::to_string(100 - settings.non_center_percent) + "]";
+  }
+
+  template <typename StreamType>
+  void get_summary_table(StreamType &stream, bool use_color = false)
+  {
+    BeautifulTable info({
+      "Start",
+      "End",
+      "time total",
+      "time ctracked",
+      "time ctracked %",
+    }, use_color, alternate_colors);
+
+    info.addRow({BeautifulTable::table_timepoint(tables.start_time),
+      BeautifulTable::table_timepoint(tables.end_time),
+      BeautifulTable::table_time(static_cast<uint_fast64_t>(tables.time_total.count())),
+      BeautifulTable::table_time(static_cast<uint_fast64_t>(tables.time_ctracked.count())),
+      BeautifulTable::table_percentage(static_cast<uint_fast64_t>(tables.time_ctracked.count()), static_cast<uint_fast64_t>(tables.time_total.count()))});
+
+    info.print(stream);
+    BeautifulTable table({
+      "filename",
+      "function", 
+      "line", 
+      "calls", 
+      "ae" + center_intervall_str + "%", 
+      "ae[0-100]%",
+      "time ae[0-100]", 
+      "time a[0-100]"}, use_color, alternate_colors);
+
+    for (const auto &row : tables.summary.rows)
+    {
+      table.addRow({
+        BeautifulTable::stable_shortenPath(row.filename),
+        row.function_name,
+        BeautifulTable::table_string(row.line),
+        BeautifulTable::table_string(row.calls),
+        BeautifulTable::table_percentage(static_cast<uint_fast64_t>(row.percent_ae_bracket * tables.time_total.count() / 100.0), tables.time_total.count()),
+        BeautifulTable::table_percentage(static_cast<uint_fast64_t>(row.percent_ae_all * tables.time_total.count() / 100.0), tables.time_total.count()),
+        BeautifulTable::table_time(static_cast<uint_fast64_t>(row.time_ae_all.count())),
+        BeautifulTable::table_time(static_cast<uint_fast64_t>(row.time_a_all.count()))
+      });
+    }
+
+    table.print(stream);
+  }
+
+  template <typename StreamType>
+  void get_detail_table(StreamType &stream, bool use_color = false, bool reverse_vector = false)
+  {
+    auto details_copy = tables.details.rows;
+    if (reverse_vector)
+    {
+      std::reverse(details_copy.begin(), details_copy.end());
+    }
+    for (int i = static_cast<int>(details_copy.size()) - 1; i >= 0; i--)
+    {
+      const auto &detail = details_copy[i];
+
+      BeautifulTable info({"filename", "function", "line", "time acc", "sd", "cv", "calls", "threads"},
+                          use_color, default_colors);
+      info.addRow({
+        BeautifulTable::stable_shortenPath(detail.filename),
+        detail.function_name,
+        BeautifulTable::table_string(detail.line),
+        BeautifulTable::table_time(static_cast<uint_fast64_t>(detail.time_acc.count())),
+        BeautifulTable::table_time(static_cast<uint_fast64_t>(detail.sd.count())),
+        BeautifulTable::table_string(detail.cv),
+        BeautifulTable::table_string(detail.calls),
+        BeautifulTable::table_string(detail.threads)
+      });
+
+      const auto fastest_header = "fastest[0-" + std::to_string(detail.fastest_range)  + "]%";
+      const auto center_header  = "center" + center_intervall_str                  + "%";
+      const auto slowest_header = "slowest[" + std::to_string(detail.slowest_range)  + "-100]%";
+
+      BeautifulTable table(
+        {"min", "mean", "min", "mean", "med", "time a", "time ae", "max", "mean", "max"},
+        use_color,
+        default_colors,
+        {
+          {fastest_header, 2},
+          {center_header,  6},
+          {slowest_header, 2}
+        }
+      );
+
+      table.addRow({
+        BeautifulTable::table_time(static_cast<uint_fast64_t>(detail.fastest_min.count())),
+        BeautifulTable::table_time(static_cast<uint_fast64_t>(detail.fastest_mean.count())),
+        BeautifulTable::table_time(static_cast<uint_fast64_t>(detail.center_min.count())),
+        BeautifulTable::table_time(static_cast<uint_fast64_t>(detail.center_mean.count())),
+        BeautifulTable::table_time(static_cast<uint_fast64_t>(detail.center_med.count())),
+        BeautifulTable::table_time(static_cast<uint_fast64_t>(detail.center_time_a.count())),
+        BeautifulTable::table_time(static_cast<uint_fast64_t>(detail.center_time_ae.count())),
+        BeautifulTable::table_time(static_cast<uint_fast64_t>(detail.center_max.count())),
+        BeautifulTable::table_time(static_cast<uint_fast64_t>(detail.slowest_mean.count())),
+        BeautifulTable::table_time(static_cast<uint_fast64_t>(detail.slowest_max.count()))
+      });
+      info.print(stream);
+      table.print(stream);
+
+      stream << std::endl;
+    }
+  }
+
+  void calculate_stats()
+  {
+    std::vector<Simple_Event> grouped_events{};
+    for (auto &[filename, filename_entry] : f_res)
+    {
+      ctracked_files++;
+      for (auto &[function, function_entry] : filename_entry)
+      {
+        ctracked_functions++;
+        for (auto &[line, line_entry] : function_entry)
+        {
+          ctracked_uses++;
+          line_entry.filename = filename;
+          line_entry.function_name = function;
+          line_entry.line = line;
+          line_entry.calculateStats(settings.non_center_percent, a_events, child_graph);
+          sorted_events.push_back(&line_entry);
+          grouped_events.insert(grouped_events.end(), line_entry.all_grouped.begin(), line_entry.all_grouped.end());
+        }
+      }
+    }
+
+    std::sort(OPT_EXEC_POLICY grouped_events.begin(), grouped_events.end(), cmp_simple_event_by_start_time_asc);
+    auto all_grouped = sorted_create_grouped_simple_events(grouped_events);
+    sum_time_active_exclusive = sum_field(all_grouped, &Simple_Event::duration);
+
+    order_pointer_vector_by_field(sorted_events, &EventGroup::all_time_active_exclusive, false);
+
+    int fastest_events = static_cast<int>(sorted_events.size() * settings.percent_exclude_fastest_active_exclusive / 100);
+    // remove fastest keep in mind fastest elements are at the back
+    if (fastest_events > 0)
+      sorted_events.erase(sorted_events.end() - fastest_events, sorted_events.end());
+
+    uint_fast64_t min_time_active_exclusive = static_cast<uint_fast64_t>(time_total * settings.min_percent_active_exclusive / 100);
+    // remove fastest keep in mind fastest elements are at the back
+    if (min_time_active_exclusive > 0)
+      sorted_events.erase(
+        std::remove_if(sorted_events.begin(), sorted_events.end(), [min_time_active_exclusive](EventGroup *e)
+
+                       { return e->all_time_active_exclusive < min_time_active_exclusive; }),
+        sorted_events.end());
+
+    // Build the structured result tables
+    build_result_tables();
+  }
+
+  void move_events_from_store(std::deque<t_events> &events)
+  {
+    m_events_storage = std::move(events);
+  }
+
+  void populate_maps()
+  {
+    size_t total_events = 0;
+    for (const auto &event_vec : m_events_storage)
+    {
+      total_events += event_vec.size();
+    }
+    a_events.reserve(total_events);
+
+    for (const auto &event_vec : m_events_storage)
+    {
+      for (const auto &event : event_vec)
+      {
+        f_res[event.filename][event.function][event.line].all_events.push_back(&event);
+        a_events.insert({get_unique_event_id(event.thread_id, event.event_id), &event});
+      }
+    }
+  }
+
+  void add_sub_events(const sub_events &s_events, const unsigned int thread_id_)
+  {
+
+    for (auto const &[key, val] : s_events)
+    {
+      int_fast64_t parent_id = get_unique_event_id(thread_id_, key);
+      for (const auto &child : val)
+      {
+        child_graph[parent_id].push_back(get_unique_event_id(thread_id_, child));
+      }
+    }
+  }
+
+  std::unordered_map<int_fast64_t, const Event *> a_events{};
+  filename_result f_res{};
+
+  std::unordered_map<int_fast64_t, std::vector<int_fast64_t>> child_graph{};
+  ctrack_result_settings settings;
+  ActiveClock::time_point track_start_time, track_end_time;
+  uint_fast64_t time_total;
+  uint_fast64_t sum_time_active_exclusive = 0;
+
+  uint_fast64_t ctracked_files = 0;
+  uint_fast64_t ctracked_functions = 0;
+  uint_fast64_t ctracked_uses = 0;
+
+  std::vector<EventGroup *> sorted_events{};
+  std::string center_intervall_str;
+  ctrack_result_tables tables{};
+
+private:
+  std::deque<t_events> m_events_storage;
+
+  void build_result_tables()
+  {
+    // Populate meta information
+    tables.start_time = track_start_time;
+    tables.end_time = track_end_time;
+    tables.time_total = std::chrono::nanoseconds(time_total);
+    tables.time_ctracked = std::chrono::nanoseconds(sum_time_active_exclusive);
+    tables.settings = settings;
+
+    // Clear existing data
+    tables.summary.rows.clear();
+    tables.details.rows.clear();
+
+    // Reserve space for efficiency
+    tables.summary.rows.reserve(sorted_events.size());
+    tables.details.rows.reserve(sorted_events.size());
+
+    // Build summary and detail rows from sorted_events
+    for (const auto &entry : sorted_events)
+    {
+      // Build summary row
+      summary_row sum_row;
+      sum_row.filename = std::string(entry->filename);
+      sum_row.function_name = std::string(entry->function_name);
+      sum_row.line = entry->line;
+      sum_row.calls = entry->all_cnt;
+      sum_row.percent_ae_bracket = (time_total > 0) ? (static_cast<double>(entry->center_time_active_exclusive) / time_total * 100.0) : 0.0;
+      sum_row.percent_ae_all = (time_total > 0) ? (static_cast<double>(entry->all_time_active_exclusive) / time_total * 100.0) : 0.0;
+      sum_row.time_ae_all = std::chrono::nanoseconds(entry->all_time_active_exclusive);
+      sum_row.time_a_all  = std::chrono::nanoseconds(entry->all_time_active);
+      tables.summary.rows.push_back(sum_row);
+
+      // Build detail row
+      detail_stats detail_row;
+      detail_row.filename = std::string(entry->filename);
+      detail_row.function_name = std::string(entry->function_name);
+      detail_row.line = entry->line;
+      detail_row.time_acc = std::chrono::nanoseconds(entry->all_time_acc);
+      detail_row.sd = std::chrono::nanoseconds(static_cast<uint_fast64_t>(entry->all_st));
+      detail_row.cv = entry->all_cv;
+      detail_row.calls = entry->all_cnt;
+      detail_row.threads = entry->all_thread_cnt;
+
+      // Summary-like fields (same calculations as summary row)
+      detail_row.percent_ae_bracket = (time_total > 0) ? (static_cast<double>(entry->center_time_active_exclusive) / time_total * 100.0) : 0.0;
+      detail_row.percent_ae_all = (time_total > 0) ? (static_cast<double>(entry->all_time_active_exclusive) / time_total * 100.0) : 0.0;
+      detail_row.time_ae_all  = std::chrono::nanoseconds(entry->all_time_active_exclusive);
+      detail_row.time_a_all   = std::chrono::nanoseconds(entry->all_time_active);      // Fastest/Center/Slowest stats
+      detail_row.fastest_min  = std::chrono::nanoseconds(entry->fastest_min);
+      detail_row.fastest_mean = std::chrono::nanoseconds(static_cast<uint_fast64_t>(entry->fastest_mean));
+      detail_row.center_min   = std::chrono::nanoseconds(entry->center_min);
+      detail_row.center_mean  = std::chrono::nanoseconds(static_cast<uint_fast64_t>(entry->center_mean));
+      detail_row.center_med   = std::chrono::nanoseconds(entry->center_med);
+      detail_row.center_time_a  = std::chrono::nanoseconds(entry->center_time_active);
+      detail_row.center_time_ae = std::chrono::nanoseconds(entry->center_time_active_exclusive);
+      detail_row.center_max   = std::chrono::nanoseconds(entry->center_max);
+      detail_row.slowest_mean = std::chrono::nanoseconds(static_cast<uint_fast64_t>(entry->slowest_mean));
+      detail_row.slowest_max  = std::chrono::nanoseconds(entry->slowest_max);
+
+      detail_row.fastest_range = entry->fastest_range;
+      detail_row.slowest_range = entry->slowest_range;
+
+      tables.details.rows.push_back(detail_row);
+    }
+  }
+
+public:
+  const ctrack_result_tables &get_tables() const { return tables; }
+};
+
+inline int fetch_event_t_id()
+{
+  if (thread_id == nullptr || *thread_id == -1)
+  {
+    std::scoped_lock lock(store::event_mutex);
+
+    if (thread_id == nullptr)
+    {
+      store::a_thread_ids.emplace_back(++store::thread_cnt);
+      thread_id = &store::a_thread_ids[store::a_thread_ids.size() - 1];
+    }
+    else
+  {
+      *thread_id = ++store::thread_cnt;
+    }
+
+    store::a_events.emplace_back(t_events{});
+    store::a_sub_events.emplace_back(sub_events{});
+    store::a_current_event_id.emplace_back(0);
+    store::a_current_event_cnt.emplace_back(0);
+    store::a_string_id.emplace_back(0);
+
+    event_ptr = &store::a_events[*thread_id];
+    sub_events_ptr = &store::a_sub_events[*thread_id];
+
+    current_event_id = &store::a_current_event_id[*thread_id];
+    current_event_cnt = &store::a_current_event_cnt[*thread_id];
+    string_id = &store::a_string_id[*thread_id];
+
+    event_ptr->reserve(100);
+  }
+  return *thread_id;
+}
+
+class EventHandler
+{
+public:
+  EventHandler(int line = __builtin_LINE(),
+               const char *filename = __builtin_FILE(),
+               const char *function = __builtin_FUNCTION()) : line(line)
+  {
+#if defined(CTRACK_CLOCK_RDTSC) || defined(CTRACK_CLOCK_RDTSCP) || defined(CTRACK_CLOCK_RDTSCP_LFENCE)
+    static const bool _ = (calibrate_tsc(), true);
+#endif
+    previous_store_clear_cnt = store::store_clear_cnt;
+    this->filename = filename;
+    this->function = function;
+    while (store::write_events_locked) {}
+
+    register_event();
+    this->start_time = ActiveClock::NOW(); // needs calibration done
+  }
+  ~EventHandler()
+  {
+    auto end_time = ActiveClock::NOW();
+    while (store::write_events_locked)
+    {
+    }
+
+    if (store::store_clear_cnt != previous_store_clear_cnt)
+    {
+      register_event();
+    }
+
+    event_ptr->emplace_back(Event{start_time, end_time, filename, line, function, t_id, event_id});
+
+    *current_event_id = previous_event_id;
+    if (previous_event_id > 0)
+    {
+      auto &children = (*sub_events_ptr)[previous_event_id];
+      if (children.size() == children.capacity())
+        children.reserve(children.capacity() < 4 ? 4 : children.capacity() * 4);
+      children.push_back(event_id);
+    }
+  }
+
+private:
+  void register_event()
+  {
+    t_id = fetch_event_t_id();
+    previous_event_id = *current_event_id;
+    event_id = ++(*current_event_cnt);
+    *current_event_id = event_id;
+  }
+  ActiveClock::time_point start_time;
+  int line;
+  unsigned int previous_store_clear_cnt;
+
+  std::string_view filename, function;
+
+  int t_id;
+  unsigned int event_id;
+  unsigned int previous_event_id;
+};
+
+inline void clear_a_store()
+{
+  store::a_current_event_id.clear();
+  store::a_current_event_id.shrink_to_fit();
+
+  store::a_current_event_cnt.clear();
+  store::a_current_event_cnt.shrink_to_fit();
+
+  store::a_string_id.clear();
+  store::a_string_id.shrink_to_fit();
+
+  store::a_events.clear();
+  store::a_events.shrink_to_fit();
+
+  store::a_sub_events.clear();
+  store::a_sub_events.shrink_to_fit();
+
+  store::thread_cnt = -1;
+  for (auto &entry : store::a_thread_ids)
+  {
+    entry = -1;
+  }
+
+  event_ptr = nullptr;
+  sub_events_ptr = nullptr;
+  current_event_id = nullptr;
+  current_event_cnt = nullptr;
+  string_id = nullptr;
+  thread_id = nullptr;
+
+  store::store_clear_cnt++;
+  store::track_start_time = ActiveClock::NOW();
+}
+
+inline ctrack_result calc_stats_and_clear(ctrack_result_settings settings = {})
+{
+  auto end = ActiveClock::NOW();
+  ctrack_result res{settings, store::track_start_time, end};
+
+  // copy data
+  {
+    store::write_events_locked = true;
+    std::this_thread::sleep_for(std::chrono::milliseconds(100));
+    std::scoped_lock lock(store::event_mutex);
+
+    res.move_events_from_store(store::a_events);
+    res.populate_maps();
+
+    for (int thread_id_ = 0; thread_id_ <= store::thread_cnt; thread_id_++)
+    {
+      auto &t_sub_events = store::a_sub_events[thread_id_];
+      res.add_sub_events(t_sub_events, thread_id_);
+    }
+    clear_a_store();
+    store::write_events_locked = false;
+  }
+
+  res.calculate_stats();
+  store::track_start_time = ActiveClock::NOW();
+
+  return res;
+}
+
+inline void result_print(ctrack_result_settings settings = {})
+{
+  auto res = calc_stats_and_clear(settings);
+#if defined(CTRACK_CLOCK_RDTSC) || defined(CTRACK_CLOCK_RDTSCP) || defined(CTRACK_CLOCK_RDTSCP_LFENCE)
+  std::cout << "TSC frequency: " << cycles_per_ns << " GHz\n";
+#endif
+  std::cout << "Details" << std::endl;
+  res.get_detail_table(std::cout, true);
+  std::cout << "Summary" << std::endl;
+  res.get_summary_table(std::cout, true);
+}
+
+inline std::string result_as_string(ctrack_result_settings settings = {})
+{
+  auto res = calc_stats_and_clear(settings);
+  std::stringstream ss;
+#if defined(CTRACK_CLOCK_RDTSC) || defined(CTRACK_CLOCK_RDTSCP) || defined(CTRACK_CLOCK_RDTSCP_LFENCE)
+  ss << "TSC frequency: " << cycles_per_ns << " GHz\n";
 #endif
+  ss << "Summary\n";
+  res.get_summary_table(ss, false);
+  ss << "Details\n";
+  res.get_detail_table(ss, false, true);
 
-				std::ostringstream oss;
-				oss << std::put_time(&tm, "%Y-%m-%d %H:%M:%S");
-				return oss.str();
-			}
-
-			static inline std::string stable_shortenPath(const std::string &fullPath, size_t maxLength = 35)
-			{
-				namespace fs = std::filesystem;
-
-				fs::path path(fullPath);
-				std::string filename = path.filename().string();
-
-				if (filename.length() <= maxLength)
-				{
-					return filename;
-				}
-
-				// If filename is too long, truncate it and add ...
-				return filename.substr(0, maxLength - 3) + "...";
-			}
-
-			using bt = BeautifulTable;
-		};
-
-		struct Event
-		{
-			std::chrono::high_resolution_clock::time_point start_time;
-			std::chrono::high_resolution_clock::time_point end_time;
-			int line;
-			int thread_id;
-			std::string_view filename;
-			std::string_view function;
-			unsigned int event_id;
-			Event(const std::chrono::high_resolution_clock::time_point &start_time, const std::chrono::high_resolution_clock::time_point &end_time, const std::string_view filename, const int line, const std::string_view function, const int thread_id, const unsigned int event_id)
-				: start_time(start_time), end_time(end_time), line(line), thread_id(thread_id), filename(filename), function(function), event_id(event_id)
-			{
-			}
-		};
-
-		struct Simple_Event
-		{
-			uint_fast64_t duration = 0;
-			std::chrono::high_resolution_clock::time_point start_time{};
-			int_fast64_t unique_id = 0;
-			std::chrono::high_resolution_clock::time_point end_time{};
-			Simple_Event(const std::chrono::high_resolution_clock::time_point &start_time, const std::chrono::high_resolution_clock::time_point &end_time, const uint_fast64_t duration, const int_fast64_t unique_id) : duration(duration), start_time(start_time), unique_id(unique_id), end_time(end_time) {}
-			Simple_Event() {}
-		};
-
-		inline bool cmp_simple_event_by_duration_asc(const Simple_Event &a, const Simple_Event &b)
-		{
-			return a.duration < b.duration;
-		}
-		inline bool cmp_simple_event_by_start_time_asc(const Simple_Event &a, const Simple_Event &b)
-		{
-			return a.start_time < b.start_time;
-		}
-
-		inline uint_fast64_t get_unique_event_id(unsigned int thread_id, unsigned int event_id)
-		{
-			uint_fast64_t uniqueId = static_cast<uint_fast64_t>(thread_id);
-			uniqueId = uniqueId << 32;
-			uniqueId += static_cast<uint_fast64_t>(event_id);
-			return uniqueId;
-		}
-
-		inline std::vector<Simple_Event> create_simple_events(const std::vector<Event> &events)
-		{
-			std::vector<Simple_Event> simple_events{};
-			simple_events.resize(events.size());
-			std::transform(
-				OPT_EXEC_POLICY
-					events.begin(),
-				events.end(),
-				simple_events.begin(),
-				[](const Event &event)
-				{
-					Simple_Event simple_event(event.start_time, event.end_time, std::chrono::duration_cast<std::chrono::nanoseconds>(event.end_time - event.start_time).count(), get_unique_event_id(event.thread_id, event.event_id));
-					return simple_event;
-				});
-			return simple_events;
-		}
-
-		inline std::vector<Simple_Event> create_simple_events(const std::vector<const Event *> &events)
-		{
-			std::vector<Simple_Event> simple_events{};
-			simple_events.resize(events.size());
-			std::transform(
-				OPT_EXEC_POLICY
-					events.begin(),
-				events.end(),
-				simple_events.begin(),
-				[](const Event *event)
-				{
-					Simple_Event simple_event(event->start_time, event->end_time, std::chrono::duration_cast<std::chrono::nanoseconds>(event->end_time - event->start_time).count(), get_unique_event_id(event->thread_id, event->event_id));
-					return simple_event;
-				});
-			return simple_events;
-		}
-
-		// requires already sorted
-		inline std::vector<Simple_Event> sorted_create_grouped_simple_events(const std::vector<Simple_Event> &events)
-		{
-			std::vector<Simple_Event> result{};
-			if (events.size() == 0)
-				return result;
-			result.push_back(events[0]);
-			unsigned int current_idx = 0;
-
-			for (size_t i = 1; i < events.size(); i++)
-			{
-				if (result[current_idx].end_time >= events[i].start_time)
-				{
-					result[current_idx].end_time = std::max<std::chrono::high_resolution_clock::time_point>(result[current_idx].end_time, events[i].end_time);
-				}
-				else
-				{
-					result.push_back(events[i]);
-					current_idx++;
-				}
-			}
-
-			for (auto &entry : result)
-			{
-				entry.duration = std::chrono::duration_cast<std::chrono::nanoseconds>(entry.end_time - entry.start_time).count();
-			}
-
-			return result;
-		}
-
-		inline std::vector<Simple_Event> load_child_events_simple(const std::vector<Simple_Event> &parent_events_simple,
-																  const std::unordered_map<int_fast64_t, const Event *> &events_map, const std::unordered_map<int_fast64_t, std::vector<int_fast64_t>> &child_graph)
-		{
-			std::vector<const Event *> child_events{};
-
-			// std::set< int_fast64_t> parent_ids = get_distinct_field_values(parent_events_simple, &Simple_Event::unique_id);
-			for (const auto &simple_parent_event : parent_events_simple)
-			{
-				auto it = child_graph.find(simple_parent_event.unique_id);
-				if (it != child_graph.end())
-				{
-					for (auto &child_id : it->second)
-					{
-						auto &child_event = events_map.at(child_id);
-						auto &parent_event = events_map.at(simple_parent_event.unique_id);
-						if (child_event->filename == parent_event->filename &&
-							child_event->function == parent_event->function &&
-							child_event->line == parent_event->line)
-							continue;
-
-						child_events.push_back(child_event);
-					}
-				}
-			}
-
-			return create_simple_events(child_events);
-		};
-
-		class EventGroup
-		{
-		public:
-			void calculateStats(unsigned int non_center_percent, const std::unordered_map<int_fast64_t, const Event *> &events_map, const std::unordered_map<int_fast64_t, std::vector<int_fast64_t>> &child_graph)
-			{
-				if (all_events.size() == 0)
-					return;
-
-				auto all_events_simple = create_simple_events(all_events);
-				std::sort(OPT_EXEC_POLICY all_events_simple.begin(), all_events_simple.end(), cmp_simple_event_by_duration_asc);
-				all_cnt = static_cast<unsigned int>(all_events_simple.size());
-				const double factor = (1.0 / static_cast<double>(all_cnt));
-
-				auto all_child_events_simple = load_child_events_simple(all_events_simple, events_map, child_graph);
-
-				all_time_acc = sum_field(all_events_simple, &Simple_Event::duration);
-
-				const double all_mean = all_time_acc * factor;
-				if (std::fpclassify(all_mean) == FP_ZERO)
-					return;
-
-				all_st = calculate_std_dev_field(all_events_simple, &Simple_Event::duration, all_mean); // std::sqrt(all_variance);
-				all_cv = all_st / all_mean;
-
-				all_thread_cnt = static_cast<unsigned int>(get_distinct_field_values(all_events, &Event::thread_id).size());
-				unsigned int amount_non_center = all_cnt * non_center_percent / 100;
-
-				fastest_range = non_center_percent;
-				slowest_range = 100 - non_center_percent;
-
-				std::vector<Simple_Event> fastest_events_simple, slowest_events_simple, center_events_simple;
-				fastest_events_simple.reserve(amount_non_center);
-				slowest_events_simple.reserve(amount_non_center);
-				if (all_cnt > 2)
-					center_events_simple.reserve(all_cnt - 2 * amount_non_center);
-
-				for (unsigned int i = 0; i < all_events_simple.size(); i++)
-				{
-					if (i < amount_non_center)
-					{
-						fastest_events_simple.push_back(all_events_simple[i]);
-					}
-					else if (i >= all_cnt - amount_non_center)
-					{
-						slowest_events_simple.push_back(all_events_simple[i]);
-					}
-					else
-					{
-						center_events_simple.push_back(all_events_simple[i]);
-					}
-				}
-				if (amount_non_center > 0)
-				{
-					// fastest
-					fastest_min = fastest_events_simple[0].duration;
-					fastest_mean = sum_field(fastest_events_simple, &Simple_Event::duration) / static_cast<double>(amount_non_center);
-
-					// slowest
-					slowest_max = slowest_events_simple[slowest_events_simple.size() - 1].duration;
-					slowest_mean = sum_field(slowest_events_simple, &Simple_Event::duration) / static_cast<double>(amount_non_center);
-				}
-
-				// center
-				center_min = center_events_simple[0].duration;
-				center_max = center_events_simple[center_events_simple.size() - 1].duration;
-				center_mean = sum_field(center_events_simple, &Simple_Event::duration) / static_cast<double>(center_events_simple.size());
-				if (center_events_simple.size() % 2 == 1)
-					center_med = center_events_simple[center_events_simple.size() / 2].duration;
-				else
-					center_med = (center_events_simple[center_events_simple.size() / 2].duration + center_events_simple[center_events_simple.size() / 2 - 1].duration) / 2;
-
-				auto center_child_events_simple = load_child_events_simple(center_events_simple, events_map, child_graph);
-
-				std::sort(OPT_EXEC_POLICY center_events_simple.begin(), center_events_simple.end(), cmp_simple_event_by_start_time_asc);
-				center_grouped = sorted_create_grouped_simple_events(center_events_simple);
-				center_time_active = sum_field(center_grouped, &Simple_Event::duration);
-
-				std::sort(OPT_EXEC_POLICY center_child_events_simple.begin(), center_child_events_simple.end(), cmp_simple_event_by_start_time_asc);
-				auto center_child_events_grouped = sorted_create_grouped_simple_events(center_child_events_simple);
-				center_time_active_exclusive = center_time_active - sum_field(center_child_events_grouped, &Simple_Event::duration);
-
-				std::sort(OPT_EXEC_POLICY all_events_simple.begin(), all_events_simple.end(), cmp_simple_event_by_start_time_asc);
-				all_grouped = sorted_create_grouped_simple_events(all_events_simple);
-				all_time_active = sum_field(all_grouped, &Simple_Event::duration);
-
-				std::sort(OPT_EXEC_POLICY all_child_events_simple.begin(), all_child_events_simple.end(), cmp_simple_event_by_start_time_asc);
-				auto all_child_events_grouped = sorted_create_grouped_simple_events(all_child_events_simple);
-				all_time_active_exclusive = all_time_active - sum_field(all_child_events_grouped, &Simple_Event::duration);
-			}
-
-			// all_group
-
-			double all_cv = 0.0;
-			double all_st = 0.0;
-
-			unsigned int all_cnt = 0;
-			uint_fast64_t all_time_acc = 0;
-			uint_fast64_t all_time_active = 0;
-			uint_fast64_t all_time_active_exclusive = 0;
-			unsigned int all_thread_cnt = 0;
-			std::vector<Simple_Event> all_grouped = {};
-			std::vector<const Event *> all_events = {};
-
-			// fastest_group
-			unsigned int fastest_range = 0;
-			uint_fast64_t fastest_min = 0;
-			double fastest_mean = 0.0;
-
-			// slowest group
-			unsigned int slowest_range = 0;
-			uint_fast64_t slowest_max = 0;
-			double slowest_mean = 0.0;
-
-			// center group
-
-			uint_fast64_t center_min = 0;
-			uint_fast64_t center_max = 0;
-			uint_fast64_t center_med = 0;
-			double center_mean = 0;
-			uint_fast64_t center_time_active = 0;
-			uint_fast64_t center_time_active_exclusive = 0;
-			std::vector<Simple_Event> center_grouped = {};
-
-			std::string filename = {};
-			std::string function_name = {};
-			int line = 0;
-
-		private:
-		};
-
-		typedef std::vector<Event> t_events;
-		typedef std::map<unsigned int, std::vector<unsigned int>> sub_events;
-
-		struct store
-		{
-			inline static std::atomic<bool> write_events_locked = false;
-			inline static std::mutex event_mutex;
-			inline static std::chrono::high_resolution_clock::time_point track_start_time = std::chrono::high_resolution_clock::now();
-			inline static std::atomic<unsigned int> store_clear_cnt = 0;
-
-			inline static std::atomic<int> thread_cnt = -1;
-			inline static std::deque<t_events> a_events{};
-			inline static std::deque<sub_events> a_sub_events{};
-
-			inline static std::deque<unsigned int> a_current_event_id{}, a_current_event_cnt{}, a_string_id{};
-
-			inline static std::deque<int> a_thread_ids{};
-		};
-
-		inline thread_local t_events *event_ptr = nullptr;
-		inline thread_local sub_events *sub_events_ptr = nullptr;
-
-		inline thread_local unsigned int *current_event_id = nullptr;
-		inline thread_local unsigned int *current_event_cnt = nullptr;
-		inline thread_local unsigned int *string_id = nullptr;
-
-		inline thread_local int *thread_id = nullptr;
-
-		typedef std::map<int, EventGroup> line_result;
-		typedef std::map<std::string_view, line_result> function_result;
-		typedef std::map<std::string_view, function_result> filename_result;
-
-		struct ctrack_result_settings
-		{
-			unsigned int non_center_percent = 1;
-			double min_percent_active_exclusive = 0.0;			   // between 0-100
-			double percent_exclude_fastest_active_exclusive = 0.0; // between 0-100
-		};
-
-		struct summary_row
-		{
-			std::string filename;
-			std::string function_name;
-			int line{};
-			int calls{};
-			double percent_ae_bracket{}; // ae[center]% by configuration
-			double percent_ae_all{};	 // ae[0-100]%
-			std::chrono::nanoseconds time_ae_all{};
-			std::chrono::nanoseconds time_a_all{};
-		};
-
-		struct summary_table
-		{
-			std::vector<summary_row> rows;
-		};
-
-		struct detail_stats
-		{
-			// Info fields
-			std::string filename;
-			std::string function_name;
-			int line{};
-			std::chrono::nanoseconds time_acc{}; // Simple sum of all execution times (can exceed wall clock in MT)
-			std::chrono::nanoseconds sd{};		 // Standard deviation
-			double cv{};						 // Coefficient of variation (sd/mean)
-			int calls{};						 // Total number of calls
-			int threads{};						 // Number of different threads that called this function
-
-			// Summary-like fields (for unified access)
-			double percent_ae_bracket{};			// ae[center]% as percentage of total time
-			double percent_ae_all{};				// ae[0-100]% as percentage of total time
-			std::chrono::nanoseconds time_ae_all{}; // Active exclusive time (wall clock minus child functions)
-			std::chrono::nanoseconds time_a_all{};	// Active time (actual wall clock time, handles MT overlap)
-
-			// Fastest/Center/Slowest stats
-			std::chrono::nanoseconds fastest_min{};
-			std::chrono::nanoseconds fastest_mean{};
-			std::chrono::nanoseconds center_min{};
-			std::chrono::nanoseconds center_mean{};
-			std::chrono::nanoseconds center_med{};
-			std::chrono::nanoseconds center_time_a{};  // Active time for center range
-			std::chrono::nanoseconds center_time_ae{}; // Active exclusive time for center range
-			std::chrono::nanoseconds center_max{};
-			std::chrono::nanoseconds slowest_mean{};
-			std::chrono::nanoseconds slowest_max{};
-
-			// Percentile ranges for reference
-			unsigned int fastest_range{};
-			unsigned int slowest_range{};
-		};
-
-		struct detail_table
-		{
-			std::vector<detail_stats> rows;
-		};
-
-		struct ctrack_result_tables
-		{
-			// Meta information
-			std::chrono::high_resolution_clock::time_point start_time;
-			std::chrono::high_resolution_clock::time_point end_time;
-			std::chrono::nanoseconds time_total{};
-			std::chrono::nanoseconds time_ctracked{};
-
-			// Table data
-			summary_table summary;
-			detail_table details;
-
-			// Settings used
-			ctrack_result_settings settings;
-		};
-
-		class ctrack_result
-		{
-		public:
-			ctrack_result(const ctrack_result_settings &settings, const std::chrono::high_resolution_clock::time_point &track_start_time, const std::chrono::high_resolution_clock::time_point &track_end_time) : settings(settings), track_start_time(track_start_time), track_end_time(track_end_time)
-			{
-				time_total = std::chrono::duration_cast<std::chrono::nanoseconds>(
-								 track_end_time - track_start_time)
-								 .count();
-				center_intervall_str = "[" + std::to_string(settings.non_center_percent) + "-" + std::to_string(100 - settings.non_center_percent) + "]";
-			}
-
-			template <typename StreamType>
-			void get_summary_table(StreamType &stream, bool use_color = false)
-			{
-				BeautifulTable info({
-										"Start",
-										"End",
-										"time total",
-										"time ctracked",
-										"time ctracked %",
-									},
-									use_color, alternate_colors);
-				info.addRow({BeautifulTable::table_timepoint(tables.start_time), BeautifulTable::table_timepoint(tables.end_time),
-							 BeautifulTable::table_time(static_cast<uint_fast64_t>(tables.time_total.count())), BeautifulTable::table_time(static_cast<uint_fast64_t>(tables.time_ctracked.count())),
-							 BeautifulTable::table_percentage(static_cast<uint_fast64_t>(tables.time_ctracked.count()), static_cast<uint_fast64_t>(tables.time_total.count()))});
-
-				info.print(stream);
-				BeautifulTable table({"filename", "function", "line", "calls", "ae" + center_intervall_str + "%", "ae[0-100]%",
-									  "time ae[0-100]", "time a[0-100]"},
-									 use_color, alternate_colors);
-				for (const auto &row : tables.summary.rows)
-				{
-					table.addRow({BeautifulTable::stable_shortenPath(row.filename), row.function_name, BeautifulTable::table_string(row.line),
-								  BeautifulTable::table_string(row.calls),
-								  BeautifulTable::table_percentage(static_cast<uint_fast64_t>(row.percent_ae_bracket * tables.time_total.count() / 100.0), static_cast<uint_fast64_t>(tables.time_total.count())),
-								  BeautifulTable::table_percentage(static_cast<uint_fast64_t>(row.percent_ae_all * tables.time_total.count() / 100.0), static_cast<uint_fast64_t>(tables.time_total.count())),
-								  BeautifulTable::table_time(static_cast<uint_fast64_t>(row.time_ae_all.count())),
-								  BeautifulTable::table_time(static_cast<uint_fast64_t>(row.time_a_all.count()))});
-				}
-
-				table.print(stream);
-			}
-
-			template <typename StreamType>
-			void get_detail_table(StreamType &stream, bool use_color = false, bool reverse_vector = false)
-			{
-				auto details_copy = tables.details.rows;
-				if (reverse_vector)
-				{
-					std::reverse(details_copy.begin(), details_copy.end());
-				}
-				for (int i = static_cast<int>(details_copy.size()) - 1; i >= 0; i--)
-				{
-					const auto &detail = details_copy[i];
-
-					BeautifulTable info({"filename", "function", "line", "time acc", "sd", "cv", "calls", "threads"}, use_color, default_colors);
-					info.addRow({BeautifulTable::stable_shortenPath(detail.filename), detail.function_name, BeautifulTable::table_string(detail.line),
-								 BeautifulTable::table_time(static_cast<uint_fast64_t>(detail.time_acc.count())),
-								 BeautifulTable::table_time(static_cast<uint_fast64_t>(detail.sd.count())), BeautifulTable::table_string(detail.cv),
-								 BeautifulTable::table_string(detail.calls), BeautifulTable::table_string(detail.threads)});
-
-					BeautifulTable table({"min", "mean", "min", "mean", "med", "time a", "time ae", "max", "mean", "max"}, use_color, default_colors,
-										 {{"fastest[0-" + std::to_string(detail.fastest_range) + "]%", 2}, {"center" + center_intervall_str + "%", 6}, {"slowest[" + std::to_string(detail.slowest_range) + "-100]%", 2}});
-
-					table.addRow({BeautifulTable::table_time(static_cast<uint_fast64_t>(detail.fastest_min.count())), BeautifulTable::table_time(static_cast<uint_fast64_t>(detail.fastest_mean.count())),
-								  BeautifulTable::table_time(static_cast<uint_fast64_t>(detail.center_min.count())), BeautifulTable::table_time(static_cast<uint_fast64_t>(detail.center_mean.count())),
-								  BeautifulTable::table_time(static_cast<uint_fast64_t>(detail.center_med.count())), BeautifulTable::table_time(static_cast<uint_fast64_t>(detail.center_time_a.count())),
-								  BeautifulTable::table_time(static_cast<uint_fast64_t>(detail.center_time_ae.count())),
-								  BeautifulTable::table_time(static_cast<uint_fast64_t>(detail.center_max.count())),
-								  BeautifulTable::table_time(static_cast<uint_fast64_t>(detail.slowest_mean.count())), BeautifulTable::table_time(static_cast<uint_fast64_t>(detail.slowest_max.count()))});
-
-					info.print(stream);
-					table.print(stream);
-
-					stream << std::endl;
-				}
-			}
-
-			void calculate_stats()
-			{
-				std::vector<Simple_Event> grouped_events{};
-				for (auto &[filename, filename_entry] : f_res)
-				{
-					ctracked_files++;
-					for (auto &[function, function_entry] : filename_entry)
-					{
-						ctracked_functions++;
-						for (auto &[line, line_entry] : function_entry)
-						{
-							ctracked_uses++;
-							line_entry.filename = filename;
-							line_entry.function_name = function;
-							line_entry.line = line;
-							line_entry.calculateStats(settings.non_center_percent, a_events, child_graph);
-							sorted_events.push_back(&line_entry);
-							grouped_events.insert(grouped_events.end(), line_entry.all_grouped.begin(), line_entry.all_grouped.end());
-						}
-					}
-				}
-
-				std::sort(OPT_EXEC_POLICY grouped_events.begin(), grouped_events.end(), cmp_simple_event_by_start_time_asc);
-				auto all_grouped = sorted_create_grouped_simple_events(grouped_events);
-				sum_time_active_exclusive = sum_field(all_grouped, &Simple_Event::duration);
-
-				order_pointer_vector_by_field(sorted_events, &EventGroup::all_time_active_exclusive, false);
-
-				int fastest_events = static_cast<int>(sorted_events.size() * settings.percent_exclude_fastest_active_exclusive / 100);
-				// remove fastest keep in mind fastest elements are at the back
-				if (fastest_events > 0)
-					sorted_events.erase(sorted_events.end() - fastest_events, sorted_events.end());
-
-				uint_fast64_t min_time_active_exclusive = static_cast<uint_fast64_t>(time_total * settings.min_percent_active_exclusive / 100);
-				// remove fastest keep in mind fastest elements are at the back
-				if (min_time_active_exclusive > 0)
-					sorted_events.erase(std::remove_if(sorted_events.begin(), sorted_events.end(), [min_time_active_exclusive](EventGroup *e)
-													   { return e->all_time_active_exclusive < min_time_active_exclusive; }),
-										sorted_events.end());
-
-				// Build the structured result tables
-				build_result_tables();
-			}
-
-			void move_events_from_store(std::deque<t_events> &events)
-			{
-				m_events_storage = std::move(events);
-			}
-
-			void populate_maps()
-			{
-				size_t total_events = 0;
-				for (const auto &event_vec : m_events_storage)
-				{
-					total_events += event_vec.size();
-				}
-				a_events.reserve(total_events);
-
-				for (const auto &event_vec : m_events_storage)
-				{
-					for (const auto &event : event_vec)
-					{
-						f_res[event.filename][event.function][event.line].all_events.push_back(&event);
-						a_events.insert({get_unique_event_id(event.thread_id, event.event_id), &event});
-					}
-				}
-			}
-
-			void add_sub_events(const sub_events &s_events, const unsigned int thread_id_)
-			{
-
-				for (auto const &[key, val] : s_events)
-				{
-					int_fast64_t parent_id = get_unique_event_id(thread_id_, key);
-					for (const auto &child : val)
-					{
-						child_graph[parent_id].push_back(get_unique_event_id(thread_id_, child));
-					}
-				}
-			}
-
-			std::unordered_map<int_fast64_t, const Event *> a_events{};
-			filename_result f_res{};
-
-			std::unordered_map<int_fast64_t, std::vector<int_fast64_t>> child_graph{};
-			ctrack_result_settings settings;
-			std::chrono::high_resolution_clock::time_point track_start_time, track_end_time;
-			uint_fast64_t time_total;
-			uint_fast64_t sum_time_active_exclusive = 0;
-
-			uint_fast64_t ctracked_files = 0;
-			uint_fast64_t ctracked_functions = 0;
-			uint_fast64_t ctracked_uses = 0;
-
-			std::vector<EventGroup *> sorted_events{};
-			std::string center_intervall_str;
-			ctrack_result_tables tables{};
-
-		private:
-			std::deque<t_events> m_events_storage;
-
-			void build_result_tables()
-			{
-				// Populate meta information
-				tables.start_time = track_start_time;
-				tables.end_time = track_end_time;
-				tables.time_total = std::chrono::nanoseconds(time_total);
-				tables.time_ctracked = std::chrono::nanoseconds(sum_time_active_exclusive);
-				tables.settings = settings;
-
-				// Clear existing data
-				tables.summary.rows.clear();
-				tables.details.rows.clear();
-
-				// Reserve space for efficiency
-				tables.summary.rows.reserve(sorted_events.size());
-				tables.details.rows.reserve(sorted_events.size());
-
-				// Build summary and detail rows from sorted_events
-				for (const auto &entry : sorted_events)
-				{
-					// Build summary row
-					summary_row sum_row;
-					sum_row.filename = std::string(entry->filename);
-					sum_row.function_name = std::string(entry->function_name);
-					sum_row.line = entry->line;
-					sum_row.calls = entry->all_cnt;
-					sum_row.percent_ae_bracket = (time_total > 0) ? (static_cast<double>(entry->center_time_active_exclusive) / time_total * 100.0) : 0.0;
-					sum_row.percent_ae_all = (time_total > 0) ? (static_cast<double>(entry->all_time_active_exclusive) / time_total * 100.0) : 0.0;
-					sum_row.time_ae_all = std::chrono::nanoseconds(entry->all_time_active_exclusive);
-					sum_row.time_a_all = std::chrono::nanoseconds(entry->all_time_active);
-					tables.summary.rows.push_back(sum_row);
-
-					// Build detail row
-					detail_stats detail_row;
-					detail_row.filename = std::string(entry->filename);
-					detail_row.function_name = std::string(entry->function_name);
-					detail_row.line = entry->line;
-					detail_row.time_acc = std::chrono::nanoseconds(entry->all_time_acc);
-					detail_row.sd = std::chrono::nanoseconds(static_cast<uint_fast64_t>(entry->all_st));
-					detail_row.cv = entry->all_cv;
-					detail_row.calls = entry->all_cnt;
-					detail_row.threads = entry->all_thread_cnt;
-
-					// Summary-like fields (same calculations as summary row)
-					detail_row.percent_ae_bracket = (time_total > 0) ? (static_cast<double>(entry->center_time_active_exclusive) / time_total * 100.0) : 0.0;
-					detail_row.percent_ae_all = (time_total > 0) ? (static_cast<double>(entry->all_time_active_exclusive) / time_total * 100.0) : 0.0;
-					detail_row.time_ae_all = std::chrono::nanoseconds(entry->all_time_active_exclusive);
-					detail_row.time_a_all = std::chrono::nanoseconds(entry->all_time_active);
-
-					// Fastest/Center/Slowest stats
-					detail_row.fastest_min = std::chrono::nanoseconds(entry->fastest_min);
-					detail_row.fastest_mean = std::chrono::nanoseconds(static_cast<uint_fast64_t>(entry->fastest_mean));
-					detail_row.center_min = std::chrono::nanoseconds(entry->center_min);
-					detail_row.center_mean = std::chrono::nanoseconds(static_cast<uint_fast64_t>(entry->center_mean));
-					detail_row.center_med = std::chrono::nanoseconds(entry->center_med);
-					detail_row.center_time_a = std::chrono::nanoseconds(entry->center_time_active);
-					detail_row.center_time_ae = std::chrono::nanoseconds(entry->center_time_active_exclusive);
-					detail_row.center_max = std::chrono::nanoseconds(entry->center_max);
-					detail_row.slowest_mean = std::chrono::nanoseconds(static_cast<uint_fast64_t>(entry->slowest_mean));
-					detail_row.slowest_max = std::chrono::nanoseconds(entry->slowest_max);
-
-					detail_row.fastest_range = entry->fastest_range;
-					detail_row.slowest_range = entry->slowest_range;
-
-					tables.details.rows.push_back(detail_row);
-				}
-			}
-
-		public:
-			const ctrack_result_tables &get_tables() const { return tables; }
-		};
-
-		inline int fetch_event_t_id()
-		{
-			if (thread_id == nullptr || *thread_id == -1)
-			{
-				std::scoped_lock lock(store::event_mutex);
-
-				if (thread_id == nullptr)
-				{
-					store::a_thread_ids.emplace_back(++store::thread_cnt);
-					thread_id = &store::a_thread_ids[store::a_thread_ids.size() - 1];
-				}
-				else
-				{
-					*thread_id = ++store::thread_cnt;
-				}
-
-				store::a_events.emplace_back(t_events{});
-				store::a_sub_events.emplace_back(sub_events{});
-				store::a_current_event_id.emplace_back(0);
-				store::a_current_event_cnt.emplace_back(0);
-				store::a_string_id.emplace_back(0);
-
-				event_ptr = &store::a_events[*thread_id];
-				sub_events_ptr = &store::a_sub_events[*thread_id];
-
-				current_event_id = &store::a_current_event_id[*thread_id];
-				current_event_cnt = &store::a_current_event_cnt[*thread_id];
-				string_id = &store::a_string_id[*thread_id];
-
-				event_ptr->reserve(100);
-			}
-			return *thread_id;
-		}
-
-		class EventHandler
-		{
-		public:
-			EventHandler(int line = __builtin_LINE(), const char *filename = __builtin_FILE(), const char *function = __builtin_FUNCTION(), std::chrono::high_resolution_clock::time_point start_time = std::chrono::high_resolution_clock::now()) : line(line)
-
-			{
-
-				previous_store_clear_cnt = store::store_clear_cnt;
-				this->filename = filename;
-				this->function = function;
-				while (store::write_events_locked)
-				{
-				}
-
-				register_event();
-				this->start_time = start_time;
-			}
-			~EventHandler()
-			{
-				auto end_time = std::chrono::high_resolution_clock::now();
-				while (store::write_events_locked)
-				{
-				}
-
-				if (store::store_clear_cnt != previous_store_clear_cnt)
-				{
-					register_event();
-				}
-
-				if (event_ptr->capacity() - event_ptr->size() < 1)
-					event_ptr->reserve(event_ptr->capacity() * 4);
-
-				event_ptr->emplace_back(Event{start_time, end_time, filename, line, function, t_id, event_id});
-
-				*current_event_id = previous_event_id;
-				if (previous_event_id > 0)
-				{
-					if ((*sub_events_ptr)[previous_event_id].capacity() - (*sub_events_ptr)[previous_event_id].size() < 1)
-						(*sub_events_ptr)[previous_event_id].reserve((*sub_events_ptr)[previous_event_id].capacity() * 4);
-					(*sub_events_ptr)[previous_event_id].push_back(event_id);
-				}
-			}
-
-		private:
-			void register_event()
-			{
-				t_id = fetch_event_t_id();
-				previous_event_id = *current_event_id;
-				event_id = ++(*current_event_cnt);
-				*current_event_id = event_id;
-			}
-			std::chrono::high_resolution_clock::time_point start_time;
-			int line;
-			unsigned int previous_store_clear_cnt;
-
-			std::string_view filename, function;
-
-			int t_id;
-			unsigned int event_id;
-			unsigned int previous_event_id;
-		};
-
-		inline void clear_a_store()
-		{
-			store::a_current_event_id.clear();
-			store::a_current_event_id.shrink_to_fit();
-
-			store::a_current_event_cnt.clear();
-			store::a_current_event_cnt.shrink_to_fit();
-
-			store::a_string_id.clear();
-			store::a_string_id.shrink_to_fit();
-
-			store::a_events.clear();
-			store::a_events.shrink_to_fit();
-
-			store::a_sub_events.clear();
-			store::a_sub_events.shrink_to_fit();
-
-			store::thread_cnt = -1;
-			for (auto &entry : store::a_thread_ids)
-			{
-				entry = -1;
-			}
-
-			event_ptr = nullptr;
-			sub_events_ptr = nullptr;
-			current_event_id = nullptr;
-			current_event_cnt = nullptr;
-			string_id = nullptr;
-			thread_id = nullptr;
-
-			store::store_clear_cnt++;
-			store::track_start_time = std::chrono::high_resolution_clock::now();
-		}
-
-		inline ctrack_result calc_stats_and_clear(ctrack_result_settings settings = {})
-		{
-			auto end = std::chrono::high_resolution_clock::now();
-			ctrack_result res{settings, store::track_start_time, end};
-
-			// copy data
-			{
-				store::write_events_locked = true;
-				std::this_thread::sleep_for(std::chrono::milliseconds(100));
-				std::scoped_lock lock(store::event_mutex);
-
-				res.move_events_from_store(store::a_events);
-				res.populate_maps();
-
-				for (int thread_id_ = 0; thread_id_ <= store::thread_cnt; thread_id_++)
-				{
-					auto &t_sub_events = store::a_sub_events[thread_id_];
-					res.add_sub_events(t_sub_events, thread_id_);
-				}
-				clear_a_store();
-				store::write_events_locked = false;
-			}
-
-			res.calculate_stats();
-			store::track_start_time = std::chrono::high_resolution_clock::now();
-
-			return res;
-		}
-
-		inline void result_print(ctrack_result_settings settings = {})
-		{
-			auto res = calc_stats_and_clear(settings);
-			std::cout << "Details" << std::endl;
-			res.get_detail_table(std::cout, true);
-			std::cout << "Summary" << std::endl;
-			res.get_summary_table(std::cout, true);
-		}
-
-		inline std::string result_as_string(ctrack_result_settings settings = {})
-		{
-			auto res = calc_stats_and_clear(settings);
-			std::stringstream ss;
-			ss << "Summary\n";
-			res.get_summary_table(ss, false);
-			ss << "Details\n";
-			res.get_detail_table(ss, false, true);
-
-			return ss.str();
-		}
-
-		inline ctrack_result_tables result_get_tables(ctrack_result_settings settings = {})
-		{
-			auto res = calc_stats_and_clear(settings);
-			return res.get_tables();
-		}
-
-		inline summary_table result_get_summary_table(ctrack_result_settings settings = {})
-		{
-			auto res = calc_stats_and_clear(settings);
-			return res.get_tables().summary;
-		}
-
-		inline detail_table result_get_detail_table(ctrack_result_settings settings = {})
-		{
-			auto res = calc_stats_and_clear(settings);
-			return res.get_tables().details;
-		}
-	}
+  return ss.str();
+}
+
+inline ctrack_result_tables result_get_tables(ctrack_result_settings settings = {})
+{
+  auto res = calc_stats_and_clear(settings);
+  return res.get_tables();
+}
+
+inline summary_table result_get_summary_table(ctrack_result_settings settings = {})
+{
+  auto res = calc_stats_and_clear(settings);
+  return res.get_tables().summary;
+}
+
+inline detail_table result_get_detail_table(ctrack_result_settings settings = {})
+{
+  auto res = calc_stats_and_clear(settings);
+  return res.get_tables().details;
+}
+}
 }
 
 #ifndef CTRACK_DISABLE
@@ -1260,9 +1533,9 @@ namespace ctrack
 #define CTRACK_UNIQUE_NAME(prefix) CTRACK_CONCAT(prefix, __COUNTER__)
 
 #define CTRACK_IMPL \
-	ctrack::EventHandler CTRACK_UNIQUE_NAME(ctrack_instance_) { __builtin_LINE(), __builtin_FILE(), __builtin_FUNCTION() }
+ctrack::EventHandler CTRACK_UNIQUE_NAME(ctrack_instance_) { __builtin_LINE(), __builtin_FILE(), __builtin_FUNCTION() }
 #define CTRACK_IMPL_NAME(name) \
-	ctrack::EventHandler CTRACK_UNIQUE_NAME(ctrack_instance_) { __builtin_LINE(), __builtin_FILE(), name }
+ctrack::EventHandler CTRACK_UNIQUE_NAME(ctrack_instance_) { __builtin_LINE(), __builtin_FILE(), name }
 #if defined(CTRACK_DISABLE_DEV)
 #define CTRACK_PROD CTRACK_IMPL
 #define CTRACK_PROD_NAME(name) CTRACK_IMPL_NAME(name)
@@ -1293,4 +1566,4 @@ namespace ctrack
 #define CTRACK_NAME(name)
 #endif // CTRACK_DISABLE
 
-#endif
\ No newline at end of file
+#endif