diff --git a/CMakeLists.txt b/CMakeLists.txt index 4e8cb55..9c0194f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -20,15 +20,15 @@ option(ENABLE_WARNINGS "Enable warnings" OFF) # Check for TBB if(NOT MSVC AND NOT DISABLE_PAR) - find_package(TBB QUIET) - if(TBB_FOUND) - message(STATUS "TBB found. Enabling parallel execution.") - else() - message(STATUS "TBB not found. Disabling parallel execution.") - set(DISABLE_PAR ON) - endif() + find_package(TBB QUIET) + if(TBB_FOUND) + message(STATUS "TBB found. Enabling parallel execution.") + else() + message(STATUS "TBB not found. Disabling parallel execution.") + set(DISABLE_PAR ON) + endif() elseif(DISABLE_PAR) - message(STATUS "DISABLE_PAR set. Disabling parallel execution.") + message(STATUS "DISABLE_PAR set. Disabling parallel execution.") endif() # Create the ctrack library @@ -40,41 +40,41 @@ target_include_directories(ctrack INTERFACE # Configure ctrack based on TBB availability if(DISABLE_PAR) - target_compile_definitions(ctrack INTERFACE CTRACK_DISABLE_EXECUTION_POLICY) + target_compile_definitions(ctrack INTERFACE CTRACK_DISABLE_EXECUTION_POLICY) elseif(NOT MSVC AND TBB_FOUND) - target_link_libraries(ctrack INTERFACE TBB::tbb) + target_link_libraries(ctrack INTERFACE TBB::tbb) endif() set(CMAKE_EXPORT_COMPILE_COMMANDS ON) if(ENABLE_WARNINGS) - if (NOT MSVC) - include(cmake/add_warning.cmake) - include(cmake/warnings.cmake) - endif() + if (NOT MSVC) + include(cmake/add_warning.cmake) + include(cmake/warnings.cmake) + endif() endif() # Add the examples subdirectory if not disabled if(NOT DISABLE_EXAMPLES) - add_subdirectory(examples) + add_subdirectory(examples) else() - message(STATUS "Building examples disabled.") + message(STATUS "Building examples disabled.") endif() # Add the benchmark subdirectory if enabled if(BUILD_BENCHMARK) - add_subdirectory(benchmark) - message(STATUS "Building benchmark enabled.") + add_subdirectory(benchmark) + message(STATUS "Building benchmark enabled.") else() - message(STATUS "Building benchmark disabled.") + message(STATUS "Building benchmark disabled.") endif() # Add the test subdirectory if enabled if(BUILD_TESTS) - add_subdirectory(test) - enable_testing() - message(STATUS "Building tests enabled.") + add_subdirectory(test) + enable_testing() + message(STATUS "Building tests enabled.") else() - message(STATUS "Building tests disabled.") + message(STATUS "Building tests disabled.") endif() # Installation @@ -109,4 +109,4 @@ install(FILES "${CMAKE_CURRENT_BINARY_DIR}/ctrackConfig.cmake" "${CMAKE_CURRENT_BINARY_DIR}/ctrackConfigVersion.cmake" DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/ctrack -) \ No newline at end of file +) diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index 1e014d6..9d04305 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -1,24 +1,29 @@ -add_executable(ctrack_benchmark ctrack_benchmark.cpp) -target_link_libraries(ctrack_benchmark PRIVATE ctrack) - -# Enable threading support -set(THREADS_PREFER_PTHREAD_FLAG ON) -find_package(Threads REQUIRED) -target_link_libraries(ctrack_benchmark PRIVATE Threads::Threads) - -# Add filesystem library if needed (for older compilers) -if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") +# Helper macro — avoids repetition +macro(add_ctrack_benchmark target_name clock_define) + add_executable(${target_name} ctrack_benchmark.cpp) + target_link_libraries(${target_name} PRIVATE ctrack) + target_compile_options(${target_name} PRIVATE -O3) + if(NOT "${clock_define}" STREQUAL "") + target_compile_definitions(${target_name} PRIVATE ${clock_define}) + endif() + set(THREADS_PREFER_PTHREAD_FLAG ON) + find_package(Threads REQUIRED) + target_link_libraries(${target_name} PRIVATE Threads::Threads) + if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 9.0) - target_link_libraries(ctrack_benchmark PRIVATE stdc++fs) + target_link_libraries(${target_name} PRIVATE stdc++fs) endif() -elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") + elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 9.0) - target_link_libraries(ctrack_benchmark PRIVATE c++fs) + target_link_libraries(${target_name} PRIVATE c++fs) endif() -endif() - -# Set output directory -set_target_properties(ctrack_benchmark - PROPERTIES + endif() + set_target_properties(${target_name} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/benchmark" -) \ No newline at end of file + ) +endmacro() + +add_ctrack_benchmark(ctrack_benchmark "") +add_ctrack_benchmark(ctrack_benchmark_rdtsc CTRACK_CLOCK_RDTSC) +add_ctrack_benchmark(ctrack_benchmark_rdtscp CTRACK_CLOCK_RDTSCP) +add_ctrack_benchmark(ctrack_benchmark_rdtscp_lfence CTRACK_CLOCK_RDTSCP_LFENCE) diff --git a/benchmark/bench_results.svg b/benchmark/bench_results.svg new file mode 100644 index 0000000..36068fa --- /dev/null +++ b/benchmark/bench_results.svg @@ -0,0 +1,114 @@ + + + +Timer variant benchmark: accuracy error vs overhead +Scatter plot of 4 timer variants. X: accuracy error %, Y: overhead %. Lower is better on both axes. + + + + + + + + + + + + + + + + + + + + + + + + + overhead % (lower → better) + + + + + accuracy error % (lower → better) + + + + + + + + + + + + +0% +2% +4% +6% +8% +10% +12% +14% + + + + + + + + +0% +5% +10% +15% +20% + + + + + +chrono + + 12.84% err · 17.79% ovhd + + + + +RDTSC + + 5.85% err · 8.55% ovhd + + + + +RDTSCP + + 1.57% err · 15.36% ovhd + + + + +RDTSCP + LFENCE + + 0.31% err · 19.73% ovhd + + + + +chrono + +RDTSC + +RDTSCP + +RDTSCP + LFENCE + diff --git a/benchmark/ctrack_benchmark.cpp b/benchmark/ctrack_benchmark.cpp index e6ff4be..75c60f2 100644 --- a/benchmark/ctrack_benchmark.cpp +++ b/benchmark/ctrack_benchmark.cpp @@ -1,811 +1,870 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef _WIN32 -#include -#include -#else -#include -#include -#endif - -// Configuration -struct BenchmarkConfig -{ - size_t total_events = 50'000'000; // Default 50 million events - size_t thread_count = std::thread::hardware_concurrency(); - bool record_baseline = false; - bool compare_baseline = false; - std::string baseline_file = "ctrack_baseline.json"; - bool verbose = false; -}; - -// Baseline data structure -struct BaselineData -{ - double accuracy_error_percent; - double accuracy_error_ms_per_event; - double overhead_percent; - double overhead_ms; - double overhead_ns_per_event; - double memory_bytes_per_event; - double calculation_time_ms; - double peak_calc_memory_mb; - size_t total_events; - size_t thread_count; - std::string timestamp; - std::string platform; -}; - -// Global config -BenchmarkConfig g_config; - -// Get current memory usage in bytes -size_t get_memory_usage() -{ -#ifdef _WIN32 - PROCESS_MEMORY_COUNTERS_EX pmc; - GetProcessMemoryInfo(GetCurrentProcess(), (PROCESS_MEMORY_COUNTERS *)&pmc, sizeof(pmc)); - return pmc.WorkingSetSize; -#else - struct rusage usage; - getrusage(RUSAGE_SELF, &usage); - return usage.ru_maxrss * 1024; // Convert KB to bytes on Linux -#endif -} - -// Precise busy wait function - waits for specified nanoseconds -void busy_wait_ns(int64_t nanoseconds) -{ - auto start = std::chrono::high_resolution_clock::now(); - auto target_duration = std::chrono::nanoseconds(nanoseconds); - - while (true) - { - auto now = std::chrono::high_resolution_clock::now(); - auto elapsed = now - start; - if (elapsed >= target_duration) - { - break; - } - } -} - -// Benchmark functions with predictable timing -void leaf_function(int depth) -{ - CTRACK_NAME("leaf_function"); - // Busy wait for 1 microsecond (1000 ns) - busy_wait_ns(1000); -} - -void level_3_function(int depth) -{ - CTRACK_NAME("level_3_function"); - // Busy wait for 500 ns - busy_wait_ns(500); - - // Call leaf function twice - leaf_function(depth + 1); - leaf_function(depth + 1); -} - -void level_2_function(int depth, int iterations) -{ - CTRACK_NAME("level_2_function"); - // Busy wait for 300 ns - busy_wait_ns(300); - - for (int i = 0; i < iterations; ++i) - { - level_3_function(depth + 1); - } -} - -void level_1_function(int iterations) -{ - CTRACK_NAME("level_1_function"); - // Busy wait for 200 ns - busy_wait_ns(200); - - level_2_function(1, iterations); -} - -// Version without CTRACK for overhead measurement -void leaf_function_no_track(int depth) -{ - busy_wait_ns(1000); -} - -void level_3_function_no_track(int depth) -{ - busy_wait_ns(500); - leaf_function_no_track(depth + 1); - leaf_function_no_track(depth + 1); -} - -void level_2_function_no_track(int depth, int iterations) -{ - busy_wait_ns(300); - for (int i = 0; i < iterations; ++i) - { - level_3_function_no_track(depth + 1); - } -} - -void level_1_function_no_track(int iterations) -{ - busy_wait_ns(200); - level_2_function_no_track(1, iterations); -} - -// Worker thread function -void benchmark_worker(size_t events_per_thread, std::atomic &start_flag) -{ - // Wait for start signal - while (!start_flag.load()) - { - std::this_thread::yield(); - } - - // Calculate iterations to reach target event count - // Each level_1 call generates: 1 + 1 + iterations * (1 + 2) events - // For iterations=10: 1 + 1 + 10 * 3 = 32 events per call - const int iterations = 10; - const int events_per_call = 2 + iterations * 3; - size_t calls_needed = events_per_thread / events_per_call; - - for (size_t i = 0; i < calls_needed; ++i) - { - level_1_function(iterations); - } -} - -// Worker thread function without tracking -void benchmark_worker_no_track(size_t events_per_thread, std::atomic &start_flag) -{ - while (!start_flag.load()) - { - std::this_thread::yield(); - } - - const int iterations = 10; - const int events_per_call = 2 + iterations * 3; - size_t calls_needed = events_per_thread / events_per_call; - - for (size_t i = 0; i < calls_needed; ++i) - { - level_1_function_no_track(iterations); - } -} - -// Parse timing from CTRACK results string for a specific function -double parse_function_timing(const std::string &results, const std::string &function_name) -{ - // Look for the Details section first - size_t details_pos = results.find("Details"); - if (details_pos == std::string::npos) - { - return -1.0; // Details section not found - } - - // Look for the function name after the Details section - size_t func_pos = results.find(function_name, details_pos); - if (func_pos == std::string::npos) - { - return -1.0; // Function not found in Details section - } - - // Find the line containing this function in the Details section - size_t line_start = results.rfind('\n', func_pos); - if (line_start == std::string::npos) - line_start = details_pos; - else - line_start++; // Skip the newline - - size_t line_end = results.find('\n', func_pos); - if (line_end == std::string::npos) - line_end = results.length(); - - std::string line = results.substr(line_start, line_end - line_start); - - // Look for the "time acc" column value (4th column after filename, function, line) - // Split by | and find the 4th field - std::vector fields; - std::istringstream iss(line); - std::string field; - - while (std::getline(iss, field, '|')) - { - // Trim whitespace - field.erase(0, field.find_first_not_of(" \t")); - field.erase(field.find_last_not_of(" \t") + 1); - if (!field.empty()) - { - fields.push_back(field); - } - } - - // The time acc should be in the 4th field (0-indexed: filename=0, function=1, line=2, time_acc=3) - if (fields.size() > 3) - { - std::string time_acc = fields[3]; - - // Parse value and unit from time_acc (e.g., "2.09 ms") - std::istringstream time_iss(time_acc); - double value; - std::string unit; - - if (time_iss >> value >> unit) - { - // Convert to nanoseconds based on unit - if (unit == "s") - return value * 1e9; - else if (unit == "ms") - return value * 1e6; - else if (unit == "mcs") - return value * 1e3; - else if (unit == "ns") - return value; - } - } - - return -1.0; // Could not parse -} - -// Measure accuracy by comparing known timings with CTRACK measurements -std::pair measure_accuracy() -{ - std::cout << "\n=== Measuring Accuracy ===" << std::endl; - - // Clear any previous tracking data by getting and discarding results - ctrack::result_as_string(); - - // Run a controlled test with known timings - const int test_iterations = 100; - for (int i = 0; i < test_iterations; ++i) - { - level_1_function(10); - } - - // Get results - auto results = ctrack::result_as_string(); - - // Expected timings per iteration (in nanoseconds): - // leaf_function: 1000ns (called 20 times per iteration) = 20,000ns total per iteration - // level_3_function: 500ns + 2*1000ns = 2500ns (called 10 times per iteration) = 25,000ns total per iteration - // level_2_function: 300ns + 10*2500ns = 25,300ns (called 1 time per iteration) = 25,300ns total per iteration - // level_1_function: 200ns + 25,300ns = 25,500ns (called 1 time per iteration) = 25,500ns total per iteration - - struct ExpectedTiming - { - std::string name; - double expected_total_ns; - int call_count; - }; - - std::vector expected_timings = { - {"leaf_function", 1000.0 * 20 * test_iterations, 20 * test_iterations}, - {"level_3_function", 2500.0 * 10 * test_iterations, 10 * test_iterations}, - {"level_2_function", 25300.0 * 1 * test_iterations, 1 * test_iterations}, - {"level_1_function", 25500.0 * 1 * test_iterations, 1 * test_iterations}}; - - double total_expected_time = 0.0; - double total_actual_time = 0.0; - double max_absolute_error = 0.0; - - if (g_config.verbose) - { - std::cout << "Function accuracy analysis:" << std::endl; - } - - for (const auto &timing : expected_timings) - { - double actual_ns = parse_function_timing(results, timing.name); - if (actual_ns > 0) - { - double expected_ns = timing.expected_total_ns; - double absolute_error = std::abs(actual_ns - expected_ns); - double percent_error = (absolute_error / expected_ns) * 100.0; - - total_expected_time += expected_ns; - total_actual_time += actual_ns; - max_absolute_error = (std::max)(max_absolute_error, absolute_error); - - if (g_config.verbose) - { - std::cout << " " << timing.name << ": expected " << expected_ns / 1e6 << " ms, got " - << actual_ns / 1e6 << " ms (error: " << percent_error << "%)" << std::endl; - } - } - else if (g_config.verbose) - { - std::cout << " " << timing.name << ": could not parse timing" << std::endl; - } - } - - double overall_error_percent = 0.0; - double overall_error_ms = 0.0; - - if (total_expected_time > 0) - { - double total_absolute_error = std::abs(total_actual_time - total_expected_time); - overall_error_percent = (total_absolute_error / total_expected_time) * 100.0; - - // Calculate total number of events across all functions - double total_events = 0; - for (const auto &timing : expected_timings) - { - total_events += timing.call_count; - } - - // Convert to milliseconds per event - overall_error_ms = (total_absolute_error / 1e6) / total_events; // Convert to milliseconds per event - } - - if (g_config.verbose) - { - std::cout << "Overall accuracy error: " << overall_error_percent << "% (" << overall_error_ms << " ms per event)" << std::endl; - } - - return {overall_error_percent, overall_error_ms}; -} - -// Measure overhead by comparing with and without CTRACK -std::tuple measure_overhead() -{ - std::cout << "\n=== Measuring Overhead ===" << std::endl; - - const size_t overhead_events = 1'000'000; // 1M events for overhead test - size_t events_per_thread = overhead_events / g_config.thread_count; - - // Measure without CTRACK - auto start_no_track = std::chrono::high_resolution_clock::now(); - { - std::vector threads; - std::atomic start_flag{false}; - - for (size_t i = 0; i < g_config.thread_count; ++i) - { - threads.emplace_back(benchmark_worker_no_track, events_per_thread, std::ref(start_flag)); - } - - start_flag = true; - - for (auto &t : threads) - { - t.join(); - } - } - auto end_no_track = std::chrono::high_resolution_clock::now(); - auto duration_no_track = std::chrono::duration_cast(end_no_track - start_no_track).count(); - - // Clear tracking data by getting and discarding results - ctrack::result_as_string(); - - // Measure with CTRACK - auto start_track = std::chrono::high_resolution_clock::now(); - { - std::vector threads; - std::atomic start_flag{false}; - - for (size_t i = 0; i < g_config.thread_count; ++i) - { - threads.emplace_back(benchmark_worker, events_per_thread, std::ref(start_flag)); - } - - start_flag = true; - - for (auto &t : threads) - { - t.join(); - } - } - auto end_track = std::chrono::high_resolution_clock::now(); - auto duration_track = std::chrono::duration_cast(end_track - start_track).count(); - - double overhead_percent = ((double)(duration_track - duration_no_track) / duration_no_track) * 100.0; - double overhead_ms = (duration_track - duration_no_track) / 1000.0; // Convert microseconds to milliseconds - double overhead_ns_per_event = ((duration_track - duration_no_track) * 1000.0) / overhead_events; // nanoseconds per event - - if (g_config.verbose) - { - std::cout << "Without CTRACK: " << duration_no_track << " µs" << std::endl; - std::cout << "With CTRACK: " << duration_track << " µs" << std::endl; - std::cout << "Overhead: " << overhead_percent << "% (" << overhead_ms << " ms total, " - << overhead_ns_per_event << " ns per event)" << std::endl; - } - - return {overhead_percent, overhead_ms, overhead_ns_per_event}; -} - -// Measure memory usage and calculation time -std::tuple measure_memory_and_calculation_time() -{ - std::cout << "\n=== Measuring Memory Usage and Calculation Time ===" << std::endl; - - // Clear any previous tracking data by getting and discarding results - ctrack::result_as_string(); - - // Measure initial memory - size_t initial_memory = get_memory_usage(); - - // Generate events - size_t events_per_thread = g_config.total_events / g_config.thread_count; - - if (g_config.verbose) - { - std::cout << "Generating " << g_config.total_events << " events across " - << g_config.thread_count << " threads..." << std::endl; - } - - auto gen_start = std::chrono::high_resolution_clock::now(); - { - std::vector threads; - std::atomic start_flag{false}; - - for (size_t i = 0; i < g_config.thread_count; ++i) - { - threads.emplace_back(benchmark_worker, events_per_thread, std::ref(start_flag)); - } - - start_flag = true; - - for (auto &t : threads) - { - t.join(); - } - } - auto gen_end = std::chrono::high_resolution_clock::now(); - - // Measure memory after event generation - size_t post_event_memory = get_memory_usage(); - size_t memory_used = post_event_memory - initial_memory; - double bytes_per_event = (double)memory_used / g_config.total_events; - - if (g_config.verbose) - { - auto gen_duration = std::chrono::duration_cast(gen_end - gen_start).count(); - std::cout << "Event generation took: " << gen_duration << " ms" << std::endl; - std::cout << "Memory used: " << memory_used / (1024.0 * 1024.0) << " MB" << std::endl; - std::cout << "Memory per event: " << bytes_per_event << " bytes" << std::endl; - } - - // Measure calculation time and peak memory usage - std::atomic monitoring{true}; - std::atomic peak_memory{post_event_memory}; - - // Start memory monitoring thread - std::thread monitor_thread([&monitoring, &peak_memory, initial_memory]() - { - while (monitoring.load()) { - size_t current_memory = get_memory_usage(); - size_t current_peak = peak_memory.load(); - while (current_memory > current_peak && - !peak_memory.compare_exchange_weak(current_peak, current_memory)) {} - std::this_thread::sleep_for(std::chrono::milliseconds(10)); // Poll every 10ms - } }); - - auto calc_start = std::chrono::high_resolution_clock::now(); - auto results = ctrack::result_as_string(); - auto calc_end = std::chrono::high_resolution_clock::now(); - - // Stop monitoring - monitoring = false; - monitor_thread.join(); - - auto calc_duration = std::chrono::duration_cast(calc_end - calc_start).count() / 1000.0; - double peak_calc_memory_mb = (peak_memory.load() - initial_memory) / (1024.0 * 1024.0); - - if (g_config.verbose) - { - std::cout << "Result calculation took: " << calc_duration << " ms" << std::endl; - std::cout << "Peak memory during calculation: " << peak_calc_memory_mb << " MB" << std::endl; - } - - return {bytes_per_event, calc_duration, peak_calc_memory_mb}; -} - -// Save baseline to file -void save_baseline(const BaselineData &data) -{ - std::ofstream file(g_config.baseline_file); - if (!file) - { - std::cerr << "Error: Could not open baseline file for writing: " << g_config.baseline_file << std::endl; - return; - } - - // Simple JSON format - file << "{\n"; - file << " \"accuracy_error_percent\": " << data.accuracy_error_percent << ",\n"; - file << " \"accuracy_error_ms_per_event\": " << data.accuracy_error_ms_per_event << ",\n"; - file << " \"overhead_percent\": " << data.overhead_percent << ",\n"; - file << " \"overhead_ms\": " << data.overhead_ms << ",\n"; - file << " \"overhead_ns_per_event\": " << data.overhead_ns_per_event << ",\n"; - file << " \"memory_bytes_per_event\": " << data.memory_bytes_per_event << ",\n"; - file << " \"calculation_time_ms\": " << data.calculation_time_ms << ",\n"; - file << " \"peak_calc_memory_mb\": " << data.peak_calc_memory_mb << ",\n"; - file << " \"total_events\": " << data.total_events << ",\n"; - file << " \"thread_count\": " << data.thread_count << ",\n"; - file << " \"timestamp\": \"" << data.timestamp << "\",\n"; - file << " \"platform\": \"" << data.platform << "\"\n"; - file << "}\n"; - - std::cout << "\nBaseline saved to: " << g_config.baseline_file << std::endl; -} - -// Load baseline from file -bool load_baseline(BaselineData &data) -{ - std::ifstream file(g_config.baseline_file); - if (!file) - { - return false; - } - - // Simple JSON parsing (production code would use a proper JSON library) - std::string line; - while (std::getline(file, line)) - { - if (line.find("\"accuracy_error_percent\":") != std::string::npos) - { - size_t pos = line.find(": ") + 2; - size_t end = line.find(",", pos); - data.accuracy_error_percent = std::stod(line.substr(pos, end - pos)); - } - else if (line.find("\"accuracy_error_ms_per_event\":") != std::string::npos) - { - size_t pos = line.find(": ") + 2; - size_t end = line.find(",", pos); - data.accuracy_error_ms_per_event = std::stod(line.substr(pos, end - pos)); - } - else if (line.find("\"overhead_percent\":") != std::string::npos) - { - size_t pos = line.find(": ") + 2; - size_t end = line.find(",", pos); - data.overhead_percent = std::stod(line.substr(pos, end - pos)); - } - else if (line.find("\"overhead_ms\":") != std::string::npos) - { - size_t pos = line.find(": ") + 2; - size_t end = line.find(",", pos); - data.overhead_ms = std::stod(line.substr(pos, end - pos)); - } - else if (line.find("\"overhead_ns_per_event\":") != std::string::npos) - { - size_t pos = line.find(": ") + 2; - size_t end = line.find(",", pos); - data.overhead_ns_per_event = std::stod(line.substr(pos, end - pos)); - } - else if (line.find("\"memory_bytes_per_event\":") != std::string::npos) - { - size_t pos = line.find(": ") + 2; - size_t end = line.find(",", pos); - data.memory_bytes_per_event = std::stod(line.substr(pos, end - pos)); - } - else if (line.find("\"calculation_time_ms\":") != std::string::npos) - { - size_t pos = line.find(": ") + 2; - size_t end = line.find(",", pos); - data.calculation_time_ms = std::stod(line.substr(pos, end - pos)); - } - else if (line.find("\"peak_calc_memory_mb\":") != std::string::npos) - { - size_t pos = line.find(": ") + 2; - size_t end = line.find(",", pos); - data.peak_calc_memory_mb = std::stod(line.substr(pos, end - pos)); - } - else if (line.find("\"total_events\":") != std::string::npos) - { - size_t pos = line.find(": ") + 2; - size_t end = line.find(",", pos); - data.total_events = std::stoull(line.substr(pos, end - pos)); - } - else if (line.find("\"thread_count\":") != std::string::npos) - { - size_t pos = line.find(": ") + 2; - size_t end = line.find(",", pos); - data.thread_count = std::stoull(line.substr(pos, end - pos)); - } - } - - return true; -} - -// Compare current results with baseline -void compare_with_baseline(const BaselineData ¤t) -{ - BaselineData baseline; - if (!load_baseline(baseline)) - { - std::cerr << "Error: Could not load baseline file: " << g_config.baseline_file << std::endl; - return; - } - - std::cout << "\n=== Baseline Comparison ===" << std::endl; - std::cout << std::fixed << std::setprecision(2); - auto print_comparison = [](const std::string &metric, double baseline_val, double current_val, bool lower_is_better = true) - { - double diff = current_val - baseline_val; - double percent_change = (diff / baseline_val) * 100.0; - - std::string direction = (diff > 0) ? "increased" : "decreased"; - std::string indicator = (lower_is_better ? (diff > 0 ? "worse" : "better") : (diff > 0 ? "better" : "worse")); - - std::cout << metric << ":\n"; - std::cout << " Baseline: " << baseline_val << "\n"; - std::cout << " Current: " << current_val << "\n"; - std::cout << " Change: " << indicator << " - " << std::abs(percent_change) << "% " << direction << "\n\n"; - }; - - print_comparison("Accuracy Error %", baseline.accuracy_error_percent, current.accuracy_error_percent); - print_comparison("Accuracy Error (ms/event)", baseline.accuracy_error_ms_per_event, current.accuracy_error_ms_per_event); - print_comparison("Overhead %", std::abs(baseline.overhead_percent), std::abs(current.overhead_percent)); - print_comparison("Overhead Time (ms)", std::abs(baseline.overhead_ms), std::abs(current.overhead_ms)); - print_comparison("Overhead per Event (ns)", baseline.overhead_ns_per_event, current.overhead_ns_per_event); - print_comparison("Memory/Event (bytes)", baseline.memory_bytes_per_event, current.memory_bytes_per_event); - print_comparison("Calculation Time (ms)", baseline.calculation_time_ms, current.calculation_time_ms); - print_comparison("Peak Calc Memory (MB)", baseline.peak_calc_memory_mb, current.peak_calc_memory_mb); -} - -// Get platform string -std::string get_platform() -{ -#ifdef _WIN32 - return "Windows"; -#elif __APPLE__ - return "macOS"; -#elif __linux__ - return "Linux"; -#else - return "Unknown"; -#endif -} - -// Get current timestamp -std::string get_timestamp() -{ - auto now = std::chrono::system_clock::now(); - auto time_t = std::chrono::system_clock::to_time_t(now); - std::stringstream ss; -#ifdef _WIN32 - struct tm time_info; - localtime_s(&time_info, &time_t); - ss << std::put_time(&time_info, "%Y-%m-%d %H:%M:%S"); -#else - ss << std::put_time(std::localtime(&time_t), "%Y-%m-%d %H:%M:%S"); -#endif - return ss.str(); -} - -// Print usage -void print_usage(const char *program_name) -{ - std::cout << "Usage: " << program_name << " [options]\n"; - std::cout << "Options:\n"; - std::cout << " --events Number of events to generate (default: 50000000)\n"; - std::cout << " --threads Number of threads to use (default: hardware concurrency)\n"; - std::cout << " --baseline Baseline file path (default: ctrack_baseline.json)\n"; - std::cout << " --record-baseline Record current results as baseline\n"; - std::cout << " --compare-baseline Compare results with baseline\n"; - std::cout << " --verbose Enable verbose output\n"; - std::cout << " --help Show this help message\n"; -} - -// Parse command line arguments -bool parse_args(int argc, char *argv[]) -{ - for (int i = 1; i < argc; ++i) - { - std::string arg = argv[i]; - - if (arg == "--help") - { - print_usage(argv[0]); - return false; - } - else if (arg == "--events" && i + 1 < argc) - { - g_config.total_events = std::stoull(argv[++i]); - } - else if (arg == "--threads" && i + 1 < argc) - { - g_config.thread_count = std::stoull(argv[++i]); - } - else if (arg == "--baseline" && i + 1 < argc) - { - g_config.baseline_file = argv[++i]; - } - else if (arg == "--record-baseline") - { - g_config.record_baseline = true; - } - else if (arg == "--compare-baseline") - { - g_config.compare_baseline = true; - } - else if (arg == "--verbose") - { - g_config.verbose = true; - } - else - { - std::cerr << "Unknown option: " << arg << std::endl; - print_usage(argv[0]); - return false; - } - } - - return true; -} - -int main(int argc, char *argv[]) -{ - if (!parse_args(argc, argv)) - { - return 1; - } - - std::cout << "CTRACK Comprehensive Benchmark\n"; - std::cout << "==============================\n"; - std::cout << "Total events: " << g_config.total_events << "\n"; - std::cout << "Thread count: " << g_config.thread_count << "\n"; - std::cout << "Events per thread: " << g_config.total_events / g_config.thread_count << "\n"; - - // Run benchmarks - auto [accuracy_error_percent, accuracy_error_ms_per_event] = measure_accuracy(); - auto [overhead_percent, overhead_ms, overhead_ns_per_event] = measure_overhead(); - auto [bytes_per_event, calc_time, peak_calc_memory] = measure_memory_and_calculation_time(); - - // Prepare results - BaselineData current_data; - current_data.accuracy_error_percent = accuracy_error_percent; - current_data.accuracy_error_ms_per_event = accuracy_error_ms_per_event; - current_data.overhead_percent = overhead_percent; - current_data.overhead_ms = overhead_ms; - current_data.overhead_ns_per_event = overhead_ns_per_event; - current_data.memory_bytes_per_event = bytes_per_event; - current_data.calculation_time_ms = calc_time; - current_data.peak_calc_memory_mb = peak_calc_memory; - current_data.total_events = g_config.total_events; - current_data.thread_count = g_config.thread_count; - current_data.timestamp = get_timestamp(); - current_data.platform = get_platform(); - - // Print summary - std::cout << "\n=== Benchmark Results ===" << std::endl; - std::cout << std::fixed << std::setprecision(2); - std::cout << "Accuracy error: " << accuracy_error_percent << "% (" << accuracy_error_ms_per_event << " ms per event)" << std::endl; - std::cout << "Overhead: " << overhead_percent << "% (" << overhead_ms << " ms total, " - << overhead_ns_per_event << " ns per event)" << std::endl; - std::cout << "Memory per event: " << bytes_per_event << " bytes" << std::endl; - std::cout << "Calculation time: " << calc_time << " ms" << std::endl; - std::cout << "Peak calculation memory: " << peak_calc_memory << " MB" << std::endl; - - // Handle baseline operations - if (g_config.record_baseline) - { - save_baseline(current_data); - } - - if (g_config.compare_baseline) - { - compare_with_baseline(current_data); - } - - return 0; -} \ No newline at end of file +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef _WIN32 +#include +#include +#else +#include +#include +#endif + +// Prevent the compiler from inlining or collapsing calls across call-sites. +// With -O3 the _no_track helpers would otherwise be fully inlined into the +// worker loop, letting the optimiser merge/eliminate busy-wait iterations and +// producing artificially low (even negative) overhead measurements. +#if defined(_MSC_VER) +#define BENCHMARK_NOINLINE __declspec(noinline) +#else +#define BENCHMARK_NOINLINE __attribute__((noinline)) +#endif + +// --------------------------------------------------------------------------- +// Orthogonal wall-clock: does NOT share the vDSO/TSC path used by either +// std::chrono or ctrack's internal clocks, so it can measure overhead without +// self-measurement bias regardless of which ctrack clock variant is compiled. +// +// On Windows we fall back to QueryPerformanceCounter which goes through the +// HAL and is independent of both RDTSC and the C++ runtime clock. +// --------------------------------------------------------------------------- +inline int64_t raw_clock_ns() +{ +#ifdef _WIN32 + LARGE_INTEGER freq, cnt; + QueryPerformanceFrequency(&freq); + QueryPerformanceCounter(&cnt); + return static_cast(cnt.QuadPart * 1'000'000'000LL / freq.QuadPart); +#else + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC_RAW, &ts); + return static_cast(ts.tv_sec) * 1'000'000'000LL + ts.tv_nsec; +#endif +} + + +// Configuration +struct BenchmarkConfig +{ + size_t total_events = 50'000'000; // Default 50 million events + size_t thread_count = std::thread::hardware_concurrency(); + bool record_baseline = false; + bool compare_baseline = false; + std::string baseline_file = "ctrack_baseline.json"; + bool verbose = false; +}; + +// Baseline data structure +struct BaselineData +{ + double accuracy_error_percent; + double accuracy_error_us_per_event; + double overhead_percent; + double overhead_ms; + double overhead_ns_per_event; + double memory_bytes_per_event; + double calculation_time_ms; + double peak_calc_memory_mb; + size_t total_events; + size_t thread_count; + std::string timestamp; + std::string platform; +}; + +// Global config +BenchmarkConfig g_config; + +// Get current memory usage in bytes +size_t get_memory_usage() +{ +#ifdef _WIN32 + PROCESS_MEMORY_COUNTERS_EX pmc; + GetProcessMemoryInfo(GetCurrentProcess(), (PROCESS_MEMORY_COUNTERS *)&pmc, sizeof(pmc)); + return pmc.WorkingSetSize; +#else + struct rusage usage; + getrusage(RUSAGE_SELF, &usage); + return usage.ru_maxrss * 1024; // Convert KB to bytes on Linux +#endif +} + +// Precise busy wait function - waits for specified nanoseconds +BENCHMARK_NOINLINE void busy_wait_ns(int64_t nanoseconds) +{ + auto start = std::chrono::high_resolution_clock::now(); + auto target_duration = std::chrono::nanoseconds(nanoseconds); + + while (true) + { + auto now = std::chrono::high_resolution_clock::now(); + auto elapsed = now - start; + if (elapsed >= target_duration) + { + break; + } + } +} + +// Benchmark functions with predictable timing +void leaf_function(int depth) +{ + CTRACK_NAME("leaf_function"); + // Busy wait for 1 microsecond (1000 ns) + busy_wait_ns(1000); +} + +void level_3_function(int depth) +{ + CTRACK_NAME("level_3_function"); + // Busy wait for 500 ns + busy_wait_ns(500); + + // Call leaf function twice + leaf_function(depth + 1); + leaf_function(depth + 1); +} + +void level_2_function(int depth, int iterations) +{ + CTRACK_NAME("level_2_function"); + // Busy wait for 300 ns + busy_wait_ns(300); + + for (int i = 0; i < iterations; ++i) + { + level_3_function(depth + 1); + } +} + +void level_1_function(int iterations) +{ + CTRACK_NAME("level_1_function"); + // Busy wait for 200 ns + busy_wait_ns(200); + + level_2_function(1, iterations); +} + +// Version without CTRACK for overhead measurement +BENCHMARK_NOINLINE void leaf_function_no_track(int depth) +{ + busy_wait_ns(1000); +} + +BENCHMARK_NOINLINE void level_3_function_no_track(int depth) +{ + busy_wait_ns(500); + leaf_function_no_track(depth + 1); + leaf_function_no_track(depth + 1); +} + +BENCHMARK_NOINLINE void level_2_function_no_track(int depth, int iterations) +{ + busy_wait_ns(300); + for (int i = 0; i < iterations; ++i) + { + level_3_function_no_track(depth + 1); + } +} + +BENCHMARK_NOINLINE void level_1_function_no_track(int iterations) +{ + busy_wait_ns(200); + level_2_function_no_track(1, iterations); +} + +// Worker thread function +void benchmark_worker(size_t events_per_thread, std::atomic &start_flag) +{ + // Wait for start signal + while (!start_flag.load()) + { + std::this_thread::yield(); + } + + // Calculate iterations to reach target event count + // Each level_1 call generates: 1 + 1 + iterations * (1 + 2) events + // For iterations=10: 1 + 1 + 10 * 3 = 32 events per call + const int iterations = 10; + const int events_per_call = 2 + iterations * 3; + size_t calls_needed = events_per_thread / events_per_call; + + for (size_t i = 0; i < calls_needed; ++i) + { + level_1_function(iterations); + } +} + +// Worker thread function without tracking +void benchmark_worker_no_track(size_t events_per_thread, std::atomic &start_flag) +{ + while (!start_flag.load()) + { + std::this_thread::yield(); + } + + const int iterations = 10; + const int events_per_call = 2 + iterations * 3; + size_t calls_needed = events_per_thread / events_per_call; + + for (size_t i = 0; i < calls_needed; ++i) + { + level_1_function_no_track(iterations); + } +} + +// Parse timing from CTRACK results string for a specific function +double parse_function_timing(const std::string &results, const std::string &function_name) +{ + // Look for the Details section first + size_t details_pos = results.find("Details"); + if (details_pos == std::string::npos) + { + return -1.0; // Details section not found + } + + // Look for the function name after the Details section + size_t func_pos = results.find(function_name, details_pos); + if (func_pos == std::string::npos) + { + return -1.0; // Function not found in Details section + } + + // Find the line containing this function in the Details section + size_t line_start = results.rfind('\n', func_pos); + if (line_start == std::string::npos) + line_start = details_pos; + else + line_start++; // Skip the newline + + size_t line_end = results.find('\n', func_pos); + if (line_end == std::string::npos) + line_end = results.length(); + + std::string line = results.substr(line_start, line_end - line_start); + + // Look for the "time acc" column value (4th column after filename, function, line) + // Split by | and find the 4th field + std::vector fields; + std::istringstream iss(line); + std::string field; + + while (std::getline(iss, field, '|')) + { + // Trim whitespace + field.erase(0, field.find_first_not_of(" \t")); + field.erase(field.find_last_not_of(" \t") + 1); + if (!field.empty()) + { + fields.push_back(field); + } + } + + // The time acc should be in the 4th field (0-indexed: filename=0, function=1, line=2, time_acc=3) + if (fields.size() > 3) + { + std::string time_acc = fields[3]; + + // Parse value and unit from time_acc (e.g., "2.09 ms") + std::istringstream time_iss(time_acc); + double value; + std::string unit; + + if (time_iss >> value >> unit) + { + // Convert to nanoseconds based on unit + if (unit == "s") + return value * 1e9; + else if (unit == "ms") + return value * 1e6; + else if (unit == "us") + return value * 1e3; + else if (unit == "ns") + return value; + } + } + + return -1.0; // Could not parse +} + +// Measure accuracy by comparing known timings with CTRACK measurements +std::pair measure_accuracy() +{ + std::cout << "\n=== Measuring Accuracy ===" << std::endl; + + // Clear any previous tracking data by getting and discarding results + ctrack::result_as_string(); + + // Run a controlled test with known timings + const int test_iterations = 100; + for (int i = 0; i < test_iterations; ++i) + { + level_1_function(10); + } + + // Get results + auto results = ctrack::result_as_string(); + + // Expected timings per iteration (in nanoseconds): + // leaf_function: 1000ns (called 20 times per iteration) = 20,000ns total per iteration + // level_3_function: 500ns + 2*1000ns = 2500ns (called 10 times per iteration) = 25,000ns total per iteration + // level_2_function: 300ns + 10*2500ns = 25,300ns (called 1 time per iteration) = 25,300ns total per iteration + // level_1_function: 200ns + 25,300ns = 25,500ns (called 1 time per iteration) = 25,500ns total per iteration + + struct ExpectedTiming + { + std::string name; + double expected_total_ns; + int call_count; + }; + + std::vector expected_timings = { + {"leaf_function", 1000.0 * 20 * test_iterations, 20 * test_iterations}, + {"level_3_function", 2500.0 * 10 * test_iterations, 10 * test_iterations}, + {"level_2_function", 25300.0 * 1 * test_iterations, 1 * test_iterations}, + {"level_1_function", 25500.0 * 1 * test_iterations, 1 * test_iterations}}; + + double total_expected_time = 0.0; + double total_actual_time = 0.0; + double max_absolute_error = 0.0; + + if (g_config.verbose) + { + std::cout << "Function accuracy analysis:" << std::endl; + } + + for (const auto &timing : expected_timings) + { + double actual_ns = parse_function_timing(results, timing.name); + if (actual_ns > 0) + { + double expected_ns = timing.expected_total_ns; + double absolute_error = std::abs(actual_ns - expected_ns); + double percent_error = (absolute_error / expected_ns) * 100.0; + + total_expected_time += expected_ns; + total_actual_time += actual_ns; + max_absolute_error = (std::max)(max_absolute_error, absolute_error); + + if (g_config.verbose) + { + std::cout << " " << timing.name << ": expected " << expected_ns / 1e6 << " ms, got " + << actual_ns / 1e6 << " ms (error: " << percent_error << "%)" << std::endl; + } + } + else if (g_config.verbose) + { + std::cout << " " << timing.name << ": could not parse timing" << std::endl; + } + } + + double overall_error_percent = 0.0; + double overall_error_ms = 0.0; + + if (total_expected_time > 0) + { + double total_absolute_error = std::abs(total_actual_time - total_expected_time); + overall_error_percent = (total_absolute_error / total_expected_time) * 100.0; + + // Calculate total number of events across all functions + double total_events = 0; + for (const auto &timing : expected_timings) + { + total_events += timing.call_count; + } + + // Convert to milliseconds per event + overall_error_ms = (total_absolute_error / 1e3) / total_events; // Convert to us per event + } + + if (g_config.verbose) + { + std::cout << "Overall accuracy error: " << overall_error_percent << "% (" << overall_error_ms << " ms per event)" << std::endl; + } + + return {overall_error_percent, overall_error_ms}; +} + +// --------------------------------------------------------------------------- +// measure_overhead: uses raw_clock_ns() (CLOCK_MONOTONIC_RAW / QPC) so the +// outer timer is orthogonal to whatever clock ctrack uses internally. +// This eliminates the vDSO-cache self-measurement bias that made the chrono +// build appear to have artificially low overhead. +// --------------------------------------------------------------------------- +std::tuple measure_overhead() +{ + std::cout << "\n=== Measuring Overhead ===" << std::endl; + + const size_t overhead_events = 1'000'000; + size_t events_per_thread = overhead_events / g_config.thread_count; + + // Helper: spawn threads, wait for join, return nothing (timing done outside) + auto run_variant = [&](bool with_track) + { + std::vector threads; + std::atomic start_flag{false}; + for (size_t i = 0; i < g_config.thread_count; ++i) + { + if (with_track) + threads.emplace_back(benchmark_worker, events_per_thread, std::ref(start_flag)); + else + threads.emplace_back(benchmark_worker_no_track, events_per_thread, std::ref(start_flag)); + } + start_flag = true; + for (auto &t : threads) t.join(); + // NOTE: result_as_string() is intentionally NOT called here. + // It must stay outside the timed window. + }; + + // Warmup + run_variant(false); + ctrack::result_as_string(); // clear accumulated state + run_variant(true); + ctrack::result_as_string(); // clear accumulated state + + // Multi-trial with alternating order + const int NUM_TRIALS = 5; + std::vector no_track_times, track_times; + + for (int trial = 0; trial < NUM_TRIALS; ++trial) + { + bool no_track_first = (trial % 2 == 0); + + // measure(with_track): clear ctrack state BEFORE t0, time pure work, + // discard results AFTER t1. + auto measure = [&](bool with_track) -> double + { + // Pre-clear: outside timed window + ctrack::result_as_string(); + + int64_t t0 = raw_clock_ns(); // ← CLOCK_MONOTONIC_RAW / QPC + run_variant(with_track); + int64_t t1 = raw_clock_ns(); // ← CLOCK_MONOTONIC_RAW / QPC + + // Post-clear: outside timed window + if (with_track) ctrack::result_as_string(); + + return static_cast(t1 - t0) / 1'000.0; // ns → µs + }; + + if (no_track_first) + { + no_track_times.push_back(measure(false)); + track_times .push_back(measure(true)); + } + else + { + track_times .push_back(measure(true)); + no_track_times.push_back(measure(false)); + } + } + + // Median to reject scheduler outliers + auto median = [](std::vector v) -> double + { + std::sort(v.begin(), v.end()); + return v[v.size() / 2]; + }; + + double dur_no_track = median(no_track_times); + double dur_track = median(track_times); + double raw_diff = dur_track - dur_no_track; // µs + double clamped_diff = std::max(0.0, raw_diff); + + double overhead_percent = (clamped_diff / dur_no_track) * 100.0; + double overhead_ms = clamped_diff / 1'000.0; + double overhead_ns_per_event = (clamped_diff * 1'000.0) / overhead_events; + + if (g_config.verbose) + { + std::cout << "Without ctrack (median): " << dur_no_track << " µs\n"; + std::cout << "With ctrack (median): " << dur_track << " µs\n"; + if (raw_diff < 0) + std::cout << "Raw diff: " << raw_diff << " µs (negative — clamped to 0, measurement noise)\n"; + std::cout << "Overhead: " << overhead_percent << "% (" + << overhead_ms << " ms, " << overhead_ns_per_event << " ns/event)\n"; + } + + return {overhead_percent, overhead_ms, overhead_ns_per_event}; +} + +std::tuple measure_memory_and_calculation_time() +{ + std::cout << "\n=== Measuring Memory Usage and Calculation Time ===" << std::endl; + ctrack::result_as_string(); + size_t initial_memory = get_memory_usage(); + size_t events_per_thread = g_config.total_events / g_config.thread_count; + + if (g_config.verbose) + { + std::cout << "Generating " << g_config.total_events << " events across " + << g_config.thread_count << " threads..." << std::endl; + } + + auto gen_start = std::chrono::high_resolution_clock::now(); + { + std::vector threads; + std::atomic start_flag{false}; + + for (size_t i = 0; i < g_config.thread_count; ++i) + { + threads.emplace_back(benchmark_worker, events_per_thread, std::ref(start_flag)); + } + + start_flag = true; + + for (auto &t : threads) + { + t.join(); + } + } + auto gen_end = std::chrono::high_resolution_clock::now(); + + // Measure memory after event generation + size_t post_event_memory = get_memory_usage(); + size_t memory_used = post_event_memory - initial_memory; + double bytes_per_event = (double)memory_used / g_config.total_events; + + if (g_config.verbose) + { + auto gen_duration = std::chrono::duration_cast(gen_end - gen_start).count(); + std::cout << "Event generation took: " << gen_duration << " ms" << std::endl; + std::cout << "Memory used: " << memory_used / (1024.0 * 1024.0) << " MB" << std::endl; + std::cout << "Memory per event: " << bytes_per_event << " bytes" << std::endl; + } + + // Measure calculation time and peak memory usage + std::atomic monitoring{true}; + std::atomic peak_memory{post_event_memory}; + + // Start memory monitoring thread + std::thread monitor_thread([&monitoring, &peak_memory, initial_memory]() + { + while (monitoring.load()) { + size_t current_memory = get_memory_usage(); + size_t current_peak = peak_memory.load(); + while (current_memory > current_peak && + !peak_memory.compare_exchange_weak(current_peak, current_memory)) {} + std::this_thread::sleep_for(std::chrono::milliseconds(10)); // Poll every 10ms + } }); + + auto calc_start = std::chrono::high_resolution_clock::now(); + auto results = ctrack::result_as_string(); + auto calc_end = std::chrono::high_resolution_clock::now(); + + // Stop monitoring + monitoring = false; + monitor_thread.join(); + + auto calc_duration = std::chrono::duration_cast(calc_end - calc_start).count() / 1000.0; + double peak_calc_memory_mb = (peak_memory.load() - initial_memory) / (1024.0 * 1024.0); + + if (g_config.verbose) + { + std::cout << "Result calculation took: " << calc_duration << " ms" << std::endl; + std::cout << "Peak memory during calculation: " << peak_calc_memory_mb << " MB" << std::endl; + } + + return {bytes_per_event, calc_duration, peak_calc_memory_mb}; +} + +// Save baseline to file +void save_baseline(const BaselineData &data) +{ + std::ofstream file(g_config.baseline_file); + if (!file) + { + std::cerr << "Error: Could not open baseline file for writing: " << g_config.baseline_file << std::endl; + return; + } + + // Simple JSON format + file << "{\n"; + file << " \"accuracy_error_percent\": " << data.accuracy_error_percent << ",\n"; + file << " \"accuracy_error_ms_per_event\": " << data.accuracy_error_us_per_event << ",\n"; + file << " \"overhead_percent\": " << data.overhead_percent << ",\n"; + file << " \"overhead_ms\": " << data.overhead_ms << ",\n"; + file << " \"overhead_ns_per_event\": " << data.overhead_ns_per_event << ",\n"; + file << " \"memory_bytes_per_event\": " << data.memory_bytes_per_event << ",\n"; + file << " \"calculation_time_ms\": " << data.calculation_time_ms << ",\n"; + file << " \"peak_calc_memory_mb\": " << data.peak_calc_memory_mb << ",\n"; + file << " \"total_events\": " << data.total_events << ",\n"; + file << " \"thread_count\": " << data.thread_count << ",\n"; + file << " \"timestamp\": \"" << data.timestamp << "\",\n"; + file << " \"platform\": \"" << data.platform << "\"\n"; + file << "}\n"; + + std::cout << "\nBaseline saved to: " << g_config.baseline_file << std::endl; +} + +// Load baseline from file +bool load_baseline(BaselineData &data) +{ + std::ifstream file(g_config.baseline_file); + if (!file) + { + return false; + } + + // Simple JSON parsing (production code would use a proper JSON library) + std::string line; + while (std::getline(file, line)) + { + if (line.find("\"accuracy_error_percent\":") != std::string::npos) + { + size_t pos = line.find(": ") + 2; + size_t end = line.find(",", pos); + data.accuracy_error_percent = std::stod(line.substr(pos, end - pos)); + } + else if (line.find("\"accuracy_error_ms_per_event\":") != std::string::npos) + { + size_t pos = line.find(": ") + 2; + size_t end = line.find(",", pos); + data.accuracy_error_us_per_event = std::stod(line.substr(pos, end - pos)); + } + else if (line.find("\"overhead_percent\":") != std::string::npos) + { + size_t pos = line.find(": ") + 2; + size_t end = line.find(",", pos); + data.overhead_percent = std::stod(line.substr(pos, end - pos)); + } + else if (line.find("\"overhead_ms\":") != std::string::npos) + { + size_t pos = line.find(": ") + 2; + size_t end = line.find(",", pos); + data.overhead_ms = std::stod(line.substr(pos, end - pos)); + } + else if (line.find("\"overhead_ns_per_event\":") != std::string::npos) + { + size_t pos = line.find(": ") + 2; + size_t end = line.find(",", pos); + data.overhead_ns_per_event = std::stod(line.substr(pos, end - pos)); + } + else if (line.find("\"memory_bytes_per_event\":") != std::string::npos) + { + size_t pos = line.find(": ") + 2; + size_t end = line.find(",", pos); + data.memory_bytes_per_event = std::stod(line.substr(pos, end - pos)); + } + else if (line.find("\"calculation_time_ms\":") != std::string::npos) + { + size_t pos = line.find(": ") + 2; + size_t end = line.find(",", pos); + data.calculation_time_ms = std::stod(line.substr(pos, end - pos)); + } + else if (line.find("\"peak_calc_memory_mb\":") != std::string::npos) + { + size_t pos = line.find(": ") + 2; + size_t end = line.find(",", pos); + data.peak_calc_memory_mb = std::stod(line.substr(pos, end - pos)); + } + else if (line.find("\"total_events\":") != std::string::npos) + { + size_t pos = line.find(": ") + 2; + size_t end = line.find(",", pos); + data.total_events = std::stoull(line.substr(pos, end - pos)); + } + else if (line.find("\"thread_count\":") != std::string::npos) + { + size_t pos = line.find(": ") + 2; + size_t end = line.find(",", pos); + data.thread_count = std::stoull(line.substr(pos, end - pos)); + } + } + + return true; +} + +// Compare current results with baseline +void compare_with_baseline(const BaselineData ¤t) +{ + BaselineData baseline; + if (!load_baseline(baseline)) + { + std::cerr << "Error: Could not load baseline file: " << g_config.baseline_file << std::endl; + return; + } + + std::cout << "\n=== Baseline Comparison ===" << std::endl; + std::cout << std::fixed << std::setprecision(2); + auto print_comparison = [](const std::string &metric, double baseline_val, double current_val, bool lower_is_better = true) + { + double diff = current_val - baseline_val; + double percent_change = (diff / baseline_val) * 100.0; + + std::string direction = (diff > 0) ? "increased" : "decreased"; + std::string indicator = (lower_is_better ? (diff > 0 ? "worse" : "better") : (diff > 0 ? "better" : "worse")); + + std::cout << metric << ":\n"; + std::cout << " Baseline: " << baseline_val << "\n"; + std::cout << " Current: " << current_val << "\n"; + std::cout << " Change: " << indicator << " - " << std::abs(percent_change) << "% " << direction << "\n\n"; + }; + + print_comparison("Accuracy Error %", baseline.accuracy_error_percent, current.accuracy_error_percent); + print_comparison("Accuracy Error (ms/event)", baseline.accuracy_error_us_per_event, current.accuracy_error_us_per_event); + print_comparison("Overhead %", std::abs(baseline.overhead_percent), std::abs(current.overhead_percent)); + print_comparison("Overhead Time (ms)", std::abs(baseline.overhead_ms), std::abs(current.overhead_ms)); + print_comparison("Overhead per Event (ns)", baseline.overhead_ns_per_event, current.overhead_ns_per_event); + print_comparison("Memory/Event (bytes)", baseline.memory_bytes_per_event, current.memory_bytes_per_event); + print_comparison("Calculation Time (ms)", baseline.calculation_time_ms, current.calculation_time_ms); + print_comparison("Peak Calc Memory (MB)", baseline.peak_calc_memory_mb, current.peak_calc_memory_mb); +} + +// Get platform string +std::string get_platform() +{ +#ifdef _WIN32 + return "Windows"; +#elif __APPLE__ + return "macOS"; +#elif __linux__ + return "Linux"; +#else + return "Unknown"; +#endif +} + +// Get current timestamp +std::string get_timestamp() +{ + auto now = std::chrono::system_clock::now(); + auto time_t = std::chrono::system_clock::to_time_t(now); + std::stringstream ss; +#ifdef _WIN32 + struct tm time_info; + localtime_s(&time_info, &time_t); + ss << std::put_time(&time_info, "%Y-%m-%d %H:%M:%S"); +#else + ss << std::put_time(std::localtime(&time_t), "%Y-%m-%d %H:%M:%S"); +#endif + return ss.str(); +} + +// Print usage +void print_usage(const char *program_name) +{ + std::cout << "Usage: " << program_name << " [options]\n"; + std::cout << "Options:\n"; + std::cout << " --events Number of events to generate (default: 50000000)\n"; + std::cout << " --threads Number of threads to use (default: hardware concurrency)\n"; + std::cout << " --baseline Baseline file path (default: ctrack_baseline.json)\n"; + std::cout << " --record-baseline Record current results as baseline\n"; + std::cout << " --compare-baseline Compare results with baseline\n"; + std::cout << " --verbose Enable verbose output\n"; + std::cout << " --help Show this help message\n"; +} + +// Parse command line arguments +bool parse_args(int argc, char *argv[]) +{ + for (int i = 1; i < argc; ++i) + { + std::string arg = argv[i]; + + if (arg == "--help") + { + print_usage(argv[0]); + return false; + } + else if (arg == "--events" && i + 1 < argc) + { + g_config.total_events = std::stoull(argv[++i]); + } + else if (arg == "--threads" && i + 1 < argc) + { + g_config.thread_count = std::stoull(argv[++i]); + } + else if (arg == "--baseline" && i + 1 < argc) + { + g_config.baseline_file = argv[++i]; + } + else if (arg == "--record-baseline") + { + g_config.record_baseline = true; + } + else if (arg == "--compare-baseline") + { + g_config.compare_baseline = true; + } + else if (arg == "--verbose") + { + g_config.verbose = true; + } + else + { + std::cerr << "Unknown option: " << arg << std::endl; + print_usage(argv[0]); + return false; + } + } + + return true; +} + +int main(int argc, char *argv[]) +{ + if (!parse_args(argc, argv)) + { + return 1; + } + + std::cout << "CTRACK Comprehensive Benchmark\n"; + std::cout << "==============================\n"; + std::cout << "Total events: " << g_config.total_events << "\n"; + std::cout << "Thread count: " << g_config.thread_count << "\n"; + std::cout << "Events per thread: " << g_config.total_events / g_config.thread_count << "\n"; + + // Run benchmarks + auto [accuracy_error_percent, accuracy_error_us_per_event] = measure_accuracy(); + auto [overhead_percent, overhead_ms, overhead_ns_per_event] = measure_overhead(); + auto [bytes_per_event, calc_time, peak_calc_memory] = measure_memory_and_calculation_time(); + + // Prepare results + BaselineData current_data; + current_data.accuracy_error_percent = accuracy_error_percent; + current_data.accuracy_error_us_per_event = accuracy_error_us_per_event; + current_data.overhead_percent = overhead_percent; + current_data.overhead_ms = overhead_ms; + current_data.overhead_ns_per_event = overhead_ns_per_event; + current_data.memory_bytes_per_event = bytes_per_event; + current_data.calculation_time_ms = calc_time; + current_data.peak_calc_memory_mb = peak_calc_memory; + current_data.total_events = g_config.total_events; + current_data.thread_count = g_config.thread_count; + current_data.timestamp = get_timestamp(); + current_data.platform = get_platform(); + + // Print summary + std::cout << "\n=== Benchmark Results ===" << std::endl; + std::cout << std::fixed << std::setprecision(2); + std::cout << "Accuracy error: " << accuracy_error_percent << "% (" << accuracy_error_us_per_event << " us per event)" << std::endl; + std::cout << "Overhead: " << overhead_percent << "% (" << overhead_ms << " ms total, " + << overhead_ns_per_event << " ns per event)" << std::endl; + std::cout << "Memory per event: " << bytes_per_event << " bytes" << std::endl; + std::cout << "Calculation time: " << calc_time << " ms" << std::endl; + std::cout << "Peak calculation memory: " << peak_calc_memory << " MB" << std::endl; + + // Handle baseline operations + if (g_config.record_baseline) + { + save_baseline(current_data); + } + + if (g_config.compare_baseline) + { + compare_with_baseline(current_data); + } + + return 0; +} diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 2e82e61..ea3cc76 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -1,4 +1,5 @@ +#add_compile_definitions(CTRACK_CLOCK_RDTSC) # ""=chrono # Create executables for each example add_executable(basic_singlethreaded basic_singlethreaded.cpp) add_executable(multithreaded_prime_counter multithreaded_prime_counter.cpp) diff --git a/examples/basic_singlethreaded.cpp b/examples/basic_singlethreaded.cpp index 27fcd81..802d314 100644 --- a/examples/basic_singlethreaded.cpp +++ b/examples/basic_singlethreaded.cpp @@ -43,4 +43,4 @@ int main() { ctrack::result_print(); //std::cout << ctrack::result_as_string() << std::endl; return 0; -} \ No newline at end of file +} diff --git a/include/ctrack.hpp b/include/ctrack.hpp index 52d309c..527504c 100644 --- a/include/ctrack.hpp +++ b/include/ctrack.hpp @@ -27,6 +27,7 @@ #include #include #include +#include #define CTRACK_VERSION_MAJOR 1 #define CTRACK_VERSION_MINOR 1 @@ -38,8 +39,8 @@ // Create a string version #define CTRACK_VERSION_STRING \ - TOSTRING(CTRACK_VERSION_MAJOR) \ - "_" TOSTRING(CTRACK_VERSION_MINOR) "_" TOSTRING(CTRACK_VERSION_PATCH) +TOSTRING(CTRACK_VERSION_MAJOR) \ +"_" TOSTRING(CTRACK_VERSION_MINOR) "_" TOSTRING(CTRACK_VERSION_PATCH) // Use the version string as the namespace name #define CTRACK_VERSION_NAMESPACE v##CTRACK_VERSION_MAJOR##_##CTRACK_VERSION_MINOR##_##CTRACK_VERSION_PATCH @@ -47,1211 +48,1483 @@ namespace ctrack { - inline namespace CTRACK_VERSION_NAMESPACE - { + +// Cross-platform inline + intrinsic shims (to survive from compiler optim) +#if defined(_MSC_VER) +#define CTRACK_ALWAYS_INLINE __forceinline +#elif defined(__GNUC__) || defined(__clang__) +#define CTRACK_ALWAYS_INLINE inline __attribute__((always_inline)) +#else +#define CTRACK_ALWAYS_INLINE inline +#endif + +// TSC clock backends (x86_64 only) +// otherwise, only Clock_Chrono compiles +#if defined(__x86_64__) || defined(_M_X64) + +#if defined(_MSC_VER) +#include +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN +#endif +#include +#else +#include +#include +#endif + +// TSC -> ns conversion state +// Defined once inside EventHandler constructor. Then read only +inline double cycles_per_ns = 3000.0; // 3Ghz +inline std::chrono::system_clock::time_point tsc_anchor_system{}; +inline uint64_t tsc_anchor_cycles = 0; + +// CPUID wrapper +inline void ctrack_cpuid( + uint32_t leaf, + uint32_t subleaf, + uint32_t& eax, + uint32_t& ebx, + uint32_t& ecx, + uint32_t& edx) +{ +#if defined(_MSC_VER) + int regs[4]; + __cpuidex(regs, static_cast(leaf), static_cast(subleaf)); + eax = regs[0]; ebx = regs[1]; ecx = regs[2]; edx = regs[3]; +#else + __cpuid_count(leaf, subleaf, eax, ebx, ecx, edx); +#endif +} + +// C1: CPUID 0x15, exact TSC frequency. Intel Skylake+ (2015+) +inline double tsc_ghz_from_cpuid_15h() { + uint32_t a, b, c, d; + ctrack_cpuid(0, 0, a, b, c, d); + if (a < 0x15) return 0.0; + + ctrack_cpuid(0x15, 0, a, b, c, d); + // EAX = denominator, EBX = numerator, ECX = core crystal Hz + if (a == 0 || b == 0 || c == 0) return 0.0; + return (static_cast(c) * b / a) / 1e9; +} + +// C2: CPUID 0x16, base frequency in MHz. Intel Haswell+ (2013+) +inline double tsc_ghz_from_cpuid_16h() { + uint32_t a, b, c, d; + ctrack_cpuid(0, 0, a, b, c, d); + if (a < 0x16) return 0.0; + + ctrack_cpuid(0x16, 0, a, b, c, d); + uint32_t base_mhz = a & 0xFFFF; + if (base_mhz == 0) return 0.0; + return static_cast(base_mhz) / 1000.0; +} + +// C3 (Linux): intel_pstate base_frequency Intel CPU only +inline double tsc_ghz_from_sysfs_base() { +#if defined(__linux__) + std::ifstream f("/sys/devices/system/cpu/cpu0/cpufreq/base_frequency"); + if (!f) return 0.0; + double khz; + if (!(f >> khz) || khz <= 0.0) return 0.0; + return khz / 1e6; +#else + return 0.0; +#endif +} + +// C4 (Windows): registry ~MHz, set at boot from CPUID +inline double tsc_ghz_from_windows_registry() { +#if defined(_WIN32) + HKEY key; + if (RegOpenKeyExA(HKEY_LOCAL_MACHINE, + "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0", 0, KEY_READ, &key) != ERROR_SUCCESS) + return 0.0; + DWORD mhz = 0, size = sizeof(DWORD); + LONG status = RegQueryValueExA(key, "~MHz", nullptr, nullptr, reinterpret_cast(&mhz), &size); + RegCloseKey(key); + if (status != ERROR_SUCCESS || mhz == 0) return 0.0; + return static_cast(mhz) / 1000.0; +#else + return 0.0; +#endif +} + +// Calibration fallback: lightweight runtime calibration (~3ms) +// +// Last-resort fallback for AMD bare-metal and virtualized environments +// where no static frequency source is available. Three 1ms samples, +// median wins. This is the *only* path that pays a startup cost; users +// on Intel hardware will exit at C1 or C2 before reaching here. +inline double tsc_ghz_from_calibration() { + constexpr int N = 3; + double samples[N]; + + for (int i = 0; i < N; ++i) { + auto wall_t0 = std::chrono::steady_clock::now(); + uint64_t tsc_t0 = __rdtsc(); + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + uint64_t tsc_t1 = __rdtsc(); + auto wall_t1 = std::chrono::steady_clock::now(); + + double ns = std::chrono::duration(wall_t1 - wall_t0).count(); + if (ns <= 0.0) { samples[i] = 0.0; continue; } + samples[i] = static_cast(tsc_t1 - tsc_t0) / ns; // cycles/ns = GHz + } + + std::sort(samples, samples + N); + return samples[N / 2]; // median rejects the worst scheduler hiccup +} + +// Master calibration: try sources in order, abort if all fail +inline void calibrate_tsc() { + double ghz = tsc_ghz_from_cpuid_15h(); + if (ghz <= 0.0) ghz = tsc_ghz_from_cpuid_16h(); + if (ghz <= 0.0) ghz = tsc_ghz_from_sysfs_base(); + if (ghz <= 0.0) ghz = tsc_ghz_from_windows_registry(); + if (ghz <= 0.0) ghz = tsc_ghz_from_calibration(); + + if (ghz <= 0.0) { + std::cerr << + "[ctrack] FATAL: TSC clock backend selected at compile time but no usable frequency source found.\n" + "[ctrack] Rebuild without CTRACK_CLOCK_RDTSC / RDTSCP / RDTSCP_LFENCE to use the chrono fallback.\n"; + std::abort(); + } + + cycles_per_ns = ghz; + tsc_anchor_cycles = __rdtsc(); + tsc_anchor_system = std::chrono::system_clock::now(); +} + +inline uint_fast64_t cycles_to_ns(uint64_t cycles) { + return static_cast(cycles / cycles_per_ns); +} + +inline std::string cycles_to_timestring(uint64_t tp) { + int64_t delta_cycles = static_cast(tp) - static_cast(tsc_anchor_cycles); + auto delta_ns = std::chrono::nanoseconds(static_cast(delta_cycles / cycles_per_ns)); + auto system_tp = tsc_anchor_system + delta_ns; + auto tt = std::chrono::system_clock::to_time_t(system_tp); + std::tm tm{}; +#if defined(_WIN32) + localtime_s(&tm, &tt); +#else + localtime_r(&tt, &tm); +#endif + std::ostringstream oss; + oss << std::put_time(&tm, "%Y-%m-%d %H:%M:%S"); + return oss.str(); +} + +#if defined(CTRACK_CLOCK_RDTSC) +struct Clock_RDTSC { + using time_point = uint64_t; + CTRACK_ALWAYS_INLINE static time_point NOW() { return __rdtsc(); } + static inline uint_fast64_t duration_ns(time_point s, time_point e) { return cycles_to_ns(e - s); } + static inline std::string to_string(const time_point &tp) { return cycles_to_timestring(tp); } +}; +using ActiveClock = Clock_RDTSC; +#elif defined(CTRACK_CLOCK_RDTSCP) +struct Clock_RDTSCP { + using time_point = uint64_t; + CTRACK_ALWAYS_INLINE static time_point NOW() { unsigned int aux; return __rdtscp(&aux); } + static inline uint_fast64_t duration_ns(time_point s, time_point e) { return cycles_to_ns(e - s); } + static inline std::string to_string(const time_point &tp) { return cycles_to_timestring(tp); } +}; +using ActiveClock = Clock_RDTSCP; +#elif defined(CTRACK_CLOCK_RDTSCP_LFENCE) +struct Clock_RDTSCP_LFENCE { + using time_point = uint64_t; + CTRACK_ALWAYS_INLINE static time_point NOW() { _mm_lfence(); unsigned int aux; return __rdtscp(&aux); } + static inline uint_fast64_t duration_ns(time_point s, time_point e) { return cycles_to_ns(e - s); } + static inline std::string to_string(const time_point &tp) { return cycles_to_timestring(tp); } +}; +using ActiveClock = Clock_RDTSCP_LFENCE; +#endif + +#else // not x86_64 + +// Hard-fail at compile time if a TSC backend is requested on a non-x86 build. +#if defined(CTRACK_CLOCK_RDTSC) || defined(CTRACK_CLOCK_RDTSCP) || defined(CTRACK_CLOCK_RDTSCP_LFENCE) +#error "CTRACK_CLOCK_RDTSC* requires x86_64. Remove the macro to use Clock_Chrono." +#endif + +#endif // x86_64 + +// ── Chrono fallback (default if no TSC backend selected) ───────────────── +#if !defined(CTRACK_CLOCK_RDTSC) && !defined(CTRACK_CLOCK_RDTSCP) && !defined(CTRACK_CLOCK_RDTSCP_LFENCE) +struct Clock_Chrono { + using time_point = std::chrono::high_resolution_clock::time_point; + CTRACK_ALWAYS_INLINE static time_point NOW() { + return std::chrono::high_resolution_clock::now(); + } + static inline uint_fast64_t duration_ns(time_point s, time_point e) { + return std::chrono::duration_cast(e - s).count(); + } + static inline std::string to_string(const time_point &tp) { + auto system_tp = std::chrono::system_clock::now() + + std::chrono::duration_cast( + tp - std::chrono::high_resolution_clock::now()); + auto tt = std::chrono::system_clock::to_time_t(system_tp); + std::tm tm{}; +#if defined(_WIN32) + localtime_s(&tm, &tt); +#else + localtime_r(&tt, &tm); +#endif + std::ostringstream oss; + oss << std::put_time(&tm, "%Y-%m-%d %H:%M:%S"); + return oss.str(); + } +}; +using ActiveClock = Clock_Chrono; +#endif // chrono + + +inline namespace CTRACK_VERSION_NAMESPACE +{ #ifndef CTRACK_DISABLE_EXECUTION_POLICY - constexpr auto execution_policy = std::execution::par_unseq; +constexpr auto execution_policy = std::execution::par_unseq; #define OPT_EXEC_POLICY execution_policy, #else #define OPT_EXEC_POLICY #endif - template - auto sum_field(const std::vector &vec, Field T::*field) - { - using FieldType = std::decay_t().*field)>; - return std::transform_reduce( - OPT_EXEC_POLICY - vec.begin(), - vec.end(), - FieldType{}, - std::plus<>(), - [field](const auto &item) - { return item.*field; }); - } - - template - auto sum_squared_field(const std::vector &values, Field T::*field) - { - using FieldType = std::decay_t().*field)>; - return std::transform_reduce( - OPT_EXEC_POLICY - values.begin(), - values.end(), - FieldType{}, - std::plus<>(), - [field](const T &v) - { - return (v.*field) * (v.*field); - }); - } - - template - double calculate_std_dev_field(std::vector &values, Field T::*field, const double mean) - { - double res = std::transform_reduce( - OPT_EXEC_POLICY - values.begin(), - values.end(), - 0.0, - std::plus<>(), - [mean, field](const T &v) - { - return std::pow(static_cast(v.*field) - mean, 2); - }); - - return sqrt(res / values.size()); - } - - template - auto get_distinct_field_values(const std::vector &vec, Field T::*field) - { - std::set().*field)>> distinct_values; - - std::transform(vec.begin(), vec.end(), - std::inserter(distinct_values, distinct_values.end()), - [field](const T *item) - { return item->*field; }); - return distinct_values; - } - - template - auto get_distinct_field_values(const std::vector &vec, Field T::*field) - { - std::set().*field)>> distinct_values; - - std::transform(vec.begin(), vec.end(), - std::inserter(distinct_values, distinct_values.end()), - [field](const T &item) - { return item.*field; }); - return distinct_values; - } - - template - size_t count_distinct_field_values(const std::vector &vec, Field T::*field) - { - return get_distinct_field_values(vec, field).size(); - } - - template - void order_pointer_vector_by_field(std::vector &vec, MemberType StructType::*member, bool asc = true) - { - std::sort(OPT_EXEC_POLICY vec.begin(), vec.end(), - [member, asc](const StructType *a, const StructType *b) - { - if (asc) - return (a->*member) < (b->*member); - else - return (a->*member) > (b->*member); - }); - } - - template - size_t countAllEvents(const std::deque> &events) - { - return std::transform_reduce( - OPT_EXEC_POLICY - events.begin(), - events.end(), - size_t(0), - std::plus<>(), - [](const auto &vec) - { - return vec.size(); - }); - } - - struct ColorScheme - { - std::string border_color; - std::string header_color; - std::string top_header_color; - std::string row_color; - - ColorScheme(const std::string &border, - const std::string &header, - const std::string &top_header, - const std::string &row) - : border_color(border), - header_color(header), - top_header_color(top_header), - row_color(row) {} - }; - - static inline const ColorScheme default_colors{ - "\033[38;5;24m", // Darker Blue (Border) - "\033[1;38;5;135m", // Purple (Header) - "\033[1;38;5;92m", // Darker Purple (Top Header) - "\033[38;5;39m" // Light Blue (Row) - }; - - // Alternate color scheme (still nice to read on terminals) - static inline const ColorScheme alternate_colors{ - "\033[38;5;28m", // Dark Green (Border) - "\033[1;38;5;208m", // Orange (Header) - "\033[1;38;5;130m", // Dark Orange (Top Header) - "\033[38;5;71m" // Light Green (Row) - }; - - class BeautifulTable - { - private: - std::vector> top_header; - std::vector header; - std::vector> rows; - std::vector columnWidths; - bool useColor; - ColorScheme colors; - static inline const std::string RESET_COLOR = "\033[0m"; - - void updateColumnWidths(const std::vector &row) - { - for (size_t i = 0; i < row.size(); ++i) - { - if (i >= columnWidths.size()) - { - columnWidths.push_back(row[i].length()); - } - else - { - columnWidths[i] = std::max(columnWidths[i], row[i].length()); - } - } - } - - template - void printHorizontalLine(StreamType &stream) const - { - if (useColor) - stream << colors.border_color; - stream << "+"; - for (size_t width : columnWidths) - { - stream << std::string(width + 2, '-') << "+"; - } - if (useColor) - stream << RESET_COLOR; - stream << "\n"; - } - - template - void printRow(StreamType &stream, const std::vector &row, const std::string &color, bool center = false) const - { - if (useColor) - stream << colors.border_color; - stream << "|"; - if (useColor) - stream << RESET_COLOR << color; - for (size_t i = 0; i < row.size(); ++i) - { - if (center) - { - size_t padding = columnWidths[i] - row[i].length(); - size_t leftPadding = padding / 2; - size_t rightPadding = padding - leftPadding; - stream << std::string(leftPadding + 1, ' ') << row[i] << std::string(rightPadding + 1, ' '); - } - else - { - stream << " " << std::setw(static_cast(columnWidths[i])) << std::right << row[i] << " "; - } - if (useColor) - stream << RESET_COLOR << colors.border_color; - stream << "|"; - if (useColor) - stream << RESET_COLOR << color; - } - if (useColor) - stream << RESET_COLOR; - stream << "\n"; - } - - template - void printRow(StreamType &stream, const std::vector> &row, const std::string &color) const - { - if (useColor) - stream << colors.border_color; - stream << "|"; - if (useColor) - stream << RESET_COLOR << color; - int y = 0; - for (size_t i = 0; i < row.size(); ++i) - { - size_t sum = row[i].second - 1; - for (int x = y; x < y + row[i].second; x++) - { - sum += columnWidths[x] + 2; - } - y += row[i].second; - - size_t textWidth = row[i].first.length(); - size_t totalPadding = sum - textWidth; - size_t leftPadding = totalPadding / 2; - size_t rightPadding = totalPadding - leftPadding; - - // Print left padding - stream << std::string(leftPadding, ' '); - - // Print text - stream << row[i].first; - - // Print right padding - stream << std::string(rightPadding, ' '); - if (useColor) - stream << RESET_COLOR << colors.border_color; - stream << "|"; - if (useColor) - stream << RESET_COLOR << color; - } - if (useColor) - stream << RESET_COLOR; - stream << "\n"; - } - - public: - BeautifulTable(const std::vector &headerColumns, bool enableColor = false, const ColorScheme &colors = default_colors, const std::vector> &top_header = {}) - : top_header(top_header), header(headerColumns), useColor(enableColor), colors(colors) - { - updateColumnWidths(header); - } - - void addRow(const std::vector &row) - { - if (row.size() != header.size()) - { - throw std::invalid_argument("Row size must match header size"); - } - rows.push_back(row); - updateColumnWidths(row); - } - - template - void print(StreamType &stream) const - { - if (top_header.size() > 0) - { - printHorizontalLine(stream); - printRow(stream, top_header, colors.top_header_color); - } - printHorizontalLine(stream); - printRow(stream, header, colors.header_color, true); - printHorizontalLine(stream); - for (const auto &row : rows) - { - printRow(stream, row, colors.row_color); - printHorizontalLine(stream); - } - } - - template - static inline std::string table_string(const T &value) - { - std::ostringstream oss; - oss << value; - return oss.str(); - } - - static inline std::string table_time(uint_fast64_t nanoseconds) - { - return table_time(static_cast(nanoseconds)); - } - - static inline std::string table_time(double nanoseconds) - { - const char *units[] = {"ns", "mcs", "ms", "s"}; - int unit = 0; - double value = static_cast(nanoseconds); - while (value >= 1000 && unit < 3) - { - value /= 1000; - unit++; - } - std::ostringstream oss; - oss << std::fixed << std::setprecision(2) << value << " " << units[unit]; - return oss.str(); - } - - static inline std::string table_percentage(uint_fast64_t value, uint_fast64_t total) - { - if (total == 0) - { - return "nan%"; - } - - // Calculate the percentage - double percentage = (static_cast(value) / total) * 100.0; - - // Format the percentage as a string with 2 decimal places - std::ostringstream ss; - ss << std::fixed << std::setprecision(2) << percentage << "%"; - - return ss.str(); - } - - static inline std::string table_timepoint(const std::chrono::high_resolution_clock::time_point &tp) - { - auto system_tp = std::chrono::system_clock::now() + - std::chrono::duration_cast( - tp - std::chrono::high_resolution_clock::now()); - - auto tt = std::chrono::system_clock::to_time_t(system_tp); - std::tm tm{}; +template +auto sum_field(const std::vector &vec, Field T::*field) +{ + using FieldType = std::decay_t().*field)>; + return std::transform_reduce( + OPT_EXEC_POLICY + vec.begin(), + vec.end(), + FieldType{}, + std::plus<>(), + [field](const auto &item) + { return item.*field; } + ); +} -#if defined(_WIN32) - localtime_s(&tm, &tt); -#else - localtime_r(&tt, &tm); +template +auto sum_squared_field(const std::vector &values, Field T::*field) +{ + using FieldType = std::decay_t().*field)>; + return std::transform_reduce( + OPT_EXEC_POLICY + values.begin(), + values.end(), + FieldType{}, + std::plus<>(), + [field](const T &v) + { + return (v.*field) * (v.*field); + } + ); +} + +template +double calculate_std_dev_field(std::vector &values, Field T::*field, const double mean) +{ + double res = std::transform_reduce( + OPT_EXEC_POLICY + values.begin(), + values.end(), + 0.0, + std::plus<>(), + [mean, field](const T &v) + { + return std::pow(static_cast(v.*field) - mean, 2); + } + ); + + return sqrt(res / values.size()); +} + +template +auto get_distinct_field_values(const std::vector &vec, Field T::*field) +{ + std::set().*field)>> distinct_values; + + std::transform( + vec.begin(), vec.end(), + std::inserter(distinct_values, distinct_values.end()), + [field](const T *item) + { return item->*field; } + ); + return distinct_values; +} + +template +auto get_distinct_field_values(const std::vector &vec, Field T::*field) +{ + std::set().*field)>> distinct_values; + + std::transform( + vec.begin(), vec.end(), + std::inserter(distinct_values, distinct_values.end()), + [field](const T &item) + { return item.*field; } + ); + return distinct_values; +} + +template +size_t count_distinct_field_values(const std::vector &vec, Field T::*field) +{ + return get_distinct_field_values(vec, field).size(); +} + +template +void order_pointer_vector_by_field(std::vector &vec, MemberType StructType::*member, bool asc = true) +{ + std::sort( + OPT_EXEC_POLICY vec.begin(), vec.end(), + [member, asc](const StructType *a, const StructType *b){ + if (asc) + return (a->*member) < (b->*member); + else + return (a->*member) > (b->*member); + }); +} + +template +size_t countAllEvents(const std::deque> &events) +{ + return std::transform_reduce( + OPT_EXEC_POLICY + events.begin(), + events.end(), + size_t(0), + std::plus<>(), + [](const auto &vec) + { + return vec.size(); + }); +} + +struct ColorScheme +{ + std::string border_color; + std::string header_color; + std::string top_header_color; + std::string row_color; + + ColorScheme(const std::string &border, + const std::string &header, + const std::string &top_header, + const std::string &row) + : border_color(border), + header_color(header), + top_header_color(top_header), + row_color(row) + {} +}; + +static inline const ColorScheme default_colors{ + "\033[38;5;24m", // Darker Blue (Border) + "\033[1;38;5;135m", // Purple (Header) + "\033[1;38;5;92m", // Darker Purple (Top Header) + "\033[38;5;39m" // Light Blue (Row) +}; + +// Alternate color scheme (still nice to read on terminals) +static inline const ColorScheme alternate_colors{ + "\033[38;5;28m", // Dark Green (Border) + "\033[1;38;5;208m", // Orange (Header) + "\033[1;38;5;130m", // Dark Orange (Top Header) + "\033[38;5;71m" // Light Green (Row) +}; + +class BeautifulTable +{ +private: + std::vector> top_header; + std::vector header; + std::vector> rows; + std::vector columnWidths; + bool useColor; + ColorScheme colors; + static inline const std::string RESET_COLOR = "\033[0m"; + + void updateColumnWidths(const std::vector &row) + { + for (size_t i = 0; i < row.size(); ++i) + { + if (i >= columnWidths.size()) + { + columnWidths.push_back(row[i].length()); + } + else + { + columnWidths[i] = std::max(columnWidths[i], row[i].length()); + } + } + } + + template + void printHorizontalLine(StreamType &stream) const + { + if (useColor) + stream << colors.border_color; + stream << "+"; + for (size_t width : columnWidths) + { + stream << std::string(width + 2, '-') << "+"; + } + if (useColor) + stream << RESET_COLOR; + stream << "\n"; + } + + template + void printRow(StreamType &stream, const std::vector &row, const std::string &color, bool center = false) const + { + if (useColor) + stream << colors.border_color; + stream << "|"; + if (useColor) + stream << RESET_COLOR << color; + for (size_t i = 0; i < row.size(); ++i) + { + if (center) + { + size_t padding = columnWidths[i] - row[i].length(); + size_t leftPadding = padding / 2; + size_t rightPadding = padding - leftPadding; + stream << std::string(leftPadding + 1, ' ') << row[i] << std::string(rightPadding + 1, ' '); + } + else + { + stream << " " << std::setw(static_cast(columnWidths[i])) << std::right << row[i] << " "; + } + if (useColor) + stream << RESET_COLOR << colors.border_color; + stream << "|"; + if (useColor) + stream << RESET_COLOR << color; + } + if (useColor) + stream << RESET_COLOR; + stream << "\n"; + } + + template + void printRow(StreamType &stream, const std::vector> &row, const std::string &color) const + { + if (useColor) + stream << colors.border_color; + stream << "|"; + if (useColor) + stream << RESET_COLOR << color; + int y = 0; + for (size_t i = 0; i < row.size(); ++i) + { + size_t sum = row[i].second - 1; + for (int x = y; x < y + row[i].second; x++) + { + sum += columnWidths[x] + 2; + } + y += row[i].second; + + size_t textWidth = row[i].first.length(); + size_t totalPadding = sum - textWidth; + size_t leftPadding = totalPadding / 2; + size_t rightPadding = totalPadding - leftPadding; + + // Print left padding + stream << std::string(leftPadding, ' '); + + // Print text + stream << row[i].first; + + // Print right padding + stream << std::string(rightPadding, ' '); + if (useColor) + stream << RESET_COLOR << colors.border_color; + stream << "|"; + if (useColor) + stream << RESET_COLOR << color; + } + if (useColor) + stream << RESET_COLOR; + stream << "\n"; + } + +public: + BeautifulTable(const std::vector &headerColumns, bool enableColor = false, const ColorScheme &colors = default_colors, const std::vector> &top_header = {}) + : top_header(top_header), header(headerColumns), useColor(enableColor), colors(colors) + { + updateColumnWidths(header); + } + + void addRow(const std::vector &row) + { + if (row.size() != header.size()) + { + throw std::invalid_argument("Row size must match header size"); + } + rows.push_back(row); + updateColumnWidths(row); + } + + template + void print(StreamType &stream) const + { + if (top_header.size() > 0) + { + printHorizontalLine(stream); + printRow(stream, top_header, colors.top_header_color); + } + printHorizontalLine(stream); + printRow(stream, header, colors.header_color, true); + printHorizontalLine(stream); + for (const auto &row : rows) + { + printRow(stream, row, colors.row_color); + printHorizontalLine(stream); + } + } + + template + static inline std::string table_string(const T &value) + { + std::ostringstream oss; + oss << value; + return oss.str(); + } + + static inline std::string table_time(uint_fast64_t nanoseconds) + { + return table_time(static_cast(nanoseconds)); + } + + static inline std::string table_time(double nanoseconds) + { + const char *units[] = {"ns", "us", "ms", "s"}; + int unit = 0; + double value = static_cast(nanoseconds); + while (value >= 1000 && unit < 3) + { + value /= 1000; + unit++; + } + std::ostringstream oss; + oss << std::fixed << std::setprecision(2) << value << " " << units[unit]; + return oss.str(); + } + + static inline std::string table_percentage(uint_fast64_t value, uint_fast64_t total) + { + if (total == 0) + { + return "nan%"; + } + + // Calculate the percentage + double percentage = (static_cast(value) / total) * 100.0; + + // Format the percentage as a string with 2 decimal places + std::ostringstream ss; + ss << std::fixed << std::setprecision(2) << percentage << "%"; + + return ss.str(); + } + + static inline std::string table_timepoint(const ActiveClock::time_point &tp) + { + return ActiveClock::to_string(tp); + } + + static inline std::string stable_shortenPath(const std::string &fullPath, size_t maxLength = 35) + { + namespace fs = std::filesystem; + + fs::path path(fullPath); + std::string filename = path.filename().string(); + + if (filename.length() <= maxLength) + { + return filename; + } + + // If filename is too long, truncate it and add ... + return filename.substr(0, maxLength - 3) + "..."; + } + + using bt = BeautifulTable; +}; + + + + + + +struct Event +{ + ActiveClock::time_point start_time; + ActiveClock::time_point end_time; + int line; + int thread_id; + std::string_view filename; + std::string_view function; + unsigned int event_id; + + Event(const ActiveClock::time_point &start_time, const ActiveClock::time_point &end_time, const std::string_view filename, const int line, const std::string_view function, const int thread_id, const unsigned int event_id) + : start_time(start_time), end_time(end_time), line(line), thread_id(thread_id), filename(filename), function(function), event_id(event_id) + {} +}; + +struct Simple_Event +{ + uint_fast64_t duration = 0; + ActiveClock::time_point start_time{}; + int_fast64_t unique_id = 0; + ActiveClock::time_point end_time{}; + Simple_Event(const ActiveClock::time_point &start_time, const ActiveClock::time_point &end_time, const uint_fast64_t duration, const int_fast64_t unique_id) : duration(duration), start_time(start_time), unique_id(unique_id), end_time(end_time) {} + Simple_Event() {} +}; + +inline bool cmp_simple_event_by_duration_asc(const Simple_Event &a, const Simple_Event &b) +{ + return a.duration < b.duration; +} +inline bool cmp_simple_event_by_start_time_asc(const Simple_Event &a, const Simple_Event &b) +{ + return a.start_time < b.start_time; +} + +inline uint_fast64_t get_unique_event_id(unsigned int thread_id, unsigned int event_id) +{ + uint_fast64_t uniqueId = static_cast(thread_id); + uniqueId = uniqueId << 32; + uniqueId += static_cast(event_id); + return uniqueId; +} + +inline std::vector create_simple_events(const std::vector &events) +{ + std::vector simple_events{}; + simple_events.resize(events.size()); + std::transform( + OPT_EXEC_POLICY + events.begin(), + events.end(), + simple_events.begin(), + [](const Event &event) + { + Simple_Event simple_event(event.start_time, event.end_time, ActiveClock::duration_ns(event.start_time, event.end_time), get_unique_event_id(event.thread_id, event.event_id)); + return simple_event; + }); + return simple_events; +} + +inline std::vector create_simple_events(const std::vector &events) +{ + std::vector simple_events{}; + simple_events.resize(events.size()); + std::transform( + OPT_EXEC_POLICY + events.begin(), + events.end(), + simple_events.begin(), + [](const Event *event){ + Simple_Event simple_event(event->start_time, event->end_time, ActiveClock::duration_ns(event->start_time, event->end_time), get_unique_event_id(event->thread_id, event->event_id)); + return simple_event; + }); + return simple_events; +} + +// requires already sorted +inline std::vector sorted_create_grouped_simple_events(const std::vector &events) +{ + std::vector result{}; + if (events.size() == 0) + return result; + result.push_back(events[0]); + unsigned int current_idx = 0; + + for (size_t i = 1; i < events.size(); i++) + { + if (result[current_idx].end_time >= events[i].start_time) + { + result[current_idx].end_time = std::max(result[current_idx].end_time, events[i].end_time); + } + else + { + result.push_back(events[i]); + current_idx++; + } + } + + for (auto &entry : result) + { + entry.duration = ActiveClock::duration_ns(entry.start_time, entry.end_time); + } + + return result; +} + +inline std::vector load_child_events_simple(const std::vector &parent_events_simple, + const std::unordered_map &events_map, + const std::unordered_map> &child_graph) +{ + std::vector child_events{}; + + for (const auto &simple_parent_event : parent_events_simple) + { + auto it = child_graph.find(simple_parent_event.unique_id); + if (it != child_graph.end()) + { + auto &parent_event = events_map.at(simple_parent_event.unique_id); + for (auto &child_id : it->second) + { + auto &child_event = events_map.at(child_id); + if (child_event->filename == parent_event->filename && + child_event->function == parent_event->function && + child_event->line == parent_event->line) + continue; + + child_events.push_back(child_event); + } + } + } + + return create_simple_events(child_events); +}; + +class EventGroup +{ +public: + void calculateStats(unsigned int non_center_percent, const std::unordered_map &events_map, const std::unordered_map> &child_graph) + { + if (all_events.size() == 0) + return; + + + auto all_events_simple = create_simple_events(all_events); + std::sort(OPT_EXEC_POLICY all_events_simple.begin(), all_events_simple.end(), cmp_simple_event_by_duration_asc); + all_cnt = static_cast(all_events_simple.size()); + const double factor = (1.0 / static_cast(all_cnt)); + + auto all_child_events_simple = load_child_events_simple(all_events_simple, events_map, child_graph); + + all_time_acc = sum_field(all_events_simple, &Simple_Event::duration); + + const double all_mean = all_time_acc * factor; + if (std::fpclassify(all_mean) == FP_ZERO) + return; + + all_st = calculate_std_dev_field(all_events_simple, &Simple_Event::duration, all_mean); // std::sqrt(all_variance); + all_cv = all_st / all_mean; + + all_thread_cnt = static_cast(get_distinct_field_values(all_events, &Event::thread_id).size()); + unsigned int amount_non_center = all_cnt * non_center_percent / 100; + + fastest_range = non_center_percent; + slowest_range = 100 - non_center_percent; + + std::vector fastest_events_simple, slowest_events_simple, center_events_simple; + fastest_events_simple.reserve(amount_non_center); + slowest_events_simple.reserve(amount_non_center); + if (all_cnt > 2) + center_events_simple.reserve(all_cnt - 2 * amount_non_center); + + for (unsigned int i = 0; i < all_events_simple.size(); i++) + { + if (i < amount_non_center) + { + fastest_events_simple.push_back(all_events_simple[i]); + } + else if (i >= all_cnt - amount_non_center) + { + slowest_events_simple.push_back(all_events_simple[i]); + } + else + { + center_events_simple.push_back(all_events_simple[i]); + } + } + if (amount_non_center > 0) + { + // fastest + fastest_min = fastest_events_simple[0].duration; + fastest_mean = sum_field(fastest_events_simple, &Simple_Event::duration) / static_cast(amount_non_center); + + // slowest + slowest_max = slowest_events_simple[slowest_events_simple.size() - 1].duration; + slowest_mean = sum_field(slowest_events_simple, &Simple_Event::duration) / static_cast(amount_non_center); + } + + // center + center_min = center_events_simple[0].duration; + center_max = center_events_simple[center_events_simple.size() - 1].duration; + center_mean = sum_field(center_events_simple, &Simple_Event::duration) / static_cast(center_events_simple.size()); + if (center_events_simple.size() % 2 == 1) + center_med = center_events_simple[center_events_simple.size() / 2].duration; + else + center_med = (center_events_simple[center_events_simple.size() / 2].duration + center_events_simple[center_events_simple.size() / 2 - 1].duration) / 2; + + auto center_child_events_simple = load_child_events_simple(center_events_simple, events_map, child_graph); + + std::sort(OPT_EXEC_POLICY center_events_simple.begin(), center_events_simple.end(), cmp_simple_event_by_start_time_asc); + center_grouped = sorted_create_grouped_simple_events(center_events_simple); + center_time_active = sum_field(center_grouped, &Simple_Event::duration); + + std::sort(OPT_EXEC_POLICY center_child_events_simple.begin(), center_child_events_simple.end(), cmp_simple_event_by_start_time_asc); + auto center_child_events_grouped = sorted_create_grouped_simple_events(center_child_events_simple); + center_time_active_exclusive = center_time_active - sum_field(center_child_events_grouped, &Simple_Event::duration); + + std::sort(OPT_EXEC_POLICY all_events_simple.begin(), all_events_simple.end(), cmp_simple_event_by_start_time_asc); + all_grouped = sorted_create_grouped_simple_events(all_events_simple); + all_time_active = sum_field(all_grouped, &Simple_Event::duration); + + std::sort(OPT_EXEC_POLICY all_child_events_simple.begin(), all_child_events_simple.end(), cmp_simple_event_by_start_time_asc); + auto all_child_events_grouped = sorted_create_grouped_simple_events(all_child_events_simple); + all_time_active_exclusive = all_time_active - sum_field(all_child_events_grouped, &Simple_Event::duration); + } + + // all_group + + double all_cv = 0.0; + double all_st = 0.0; + + unsigned int all_cnt = 0; + uint_fast64_t all_time_acc = 0; + uint_fast64_t all_time_active = 0; + uint_fast64_t all_time_active_exclusive = 0; + unsigned int all_thread_cnt = 0; + std::vector all_grouped = {}; + std::vector all_events = {}; + + // fastest_group + unsigned int fastest_range = 0; + uint_fast64_t fastest_min = 0; + double fastest_mean = 0.0; + + // slowest group + unsigned int slowest_range = 0; + uint_fast64_t slowest_max = 0; + double slowest_mean = 0.0; + + // center group + + uint_fast64_t center_min = 0; + uint_fast64_t center_max = 0; + uint_fast64_t center_med = 0; + double center_mean = 0; + uint_fast64_t center_time_active = 0; + uint_fast64_t center_time_active_exclusive = 0; + std::vector center_grouped = {}; + + std::string filename = {}; + std::string function_name = {}; + int line = 0; + +private: +}; + +typedef std::vector t_events; +typedef std::map> sub_events; + +struct store +{ + inline static std::atomic write_events_locked = false; + inline static std::mutex event_mutex; + inline static ActiveClock::time_point track_start_time = ActiveClock::NOW(); + inline static std::atomic store_clear_cnt = 0; + + inline static std::atomic thread_cnt = -1; + inline static std::deque a_events{}; + inline static std::deque a_sub_events{}; + + inline static std::deque a_current_event_id{}, a_current_event_cnt{}, a_string_id{}; + + inline static std::deque a_thread_ids{}; +}; +inline thread_local t_events *event_ptr = nullptr; +inline thread_local sub_events *sub_events_ptr = nullptr; + +inline thread_local unsigned int *current_event_id = nullptr; +inline thread_local unsigned int *current_event_cnt = nullptr; +inline thread_local unsigned int *string_id = nullptr; + +inline thread_local int *thread_id = nullptr; + +typedef std::map line_result; +typedef std::map function_result; +typedef std::map filename_result; + +struct ctrack_result_settings +{ + unsigned int non_center_percent = 1; + double min_percent_active_exclusive = 0.0; // between 0-100 + double percent_exclude_fastest_active_exclusive = 0.0; // between 0-100 +}; + +struct summary_row +{ + std::string filename; + std::string function_name; + int line{}; + int calls{}; + double percent_ae_bracket{}; // ae[center]% by configuration + double percent_ae_all{}; // ae[0-100]% + std::chrono::nanoseconds time_ae_all{}; + std::chrono::nanoseconds time_a_all{}; +}; + +struct summary_table +{ + std::vector rows; +}; + +struct detail_stats +{ + // Info fields + std::string filename; + std::string function_name; + int line{}; + std::chrono::nanoseconds time_acc{}; // Simple sum of all execution times (can exceed wall clock in MT) + std::chrono::nanoseconds sd{}; // Standard deviation + double cv{}; // Coefficient of variation (sd/mean) + int calls{}; // Total number of calls + int threads{}; // Number of different threads that called this function + + // Summary-like fields (for unified access) + double percent_ae_bracket{}; // ae[center]% as percentage of total time + double percent_ae_all{}; // ae[0-100]% as percentage of total time + std::chrono::nanoseconds time_ae_all{}; // Active exclusive time (wall clock minus child functions) + std::chrono::nanoseconds time_a_all{}; // Active time (actual wall clock time, handles MT overlap) + + // Fastest/Center/Slowest stats + std::chrono::nanoseconds fastest_min{}; + std::chrono::nanoseconds fastest_mean{}; + std::chrono::nanoseconds center_min{}; + std::chrono::nanoseconds center_mean{}; + std::chrono::nanoseconds center_med{}; + std::chrono::nanoseconds center_time_a{}; // Active time for center range + std::chrono::nanoseconds center_time_ae{}; // Active exclusive time for center range + std::chrono::nanoseconds center_max{}; + std::chrono::nanoseconds slowest_mean{}; + std::chrono::nanoseconds slowest_max{}; + + // Percentile ranges for reference + unsigned int fastest_range{}; + unsigned int slowest_range{}; +}; + +struct detail_table +{ + std::vector rows; +}; + +struct ctrack_result_tables +{ + // Meta information + ActiveClock::time_point start_time; + ActiveClock::time_point end_time; + std::chrono::nanoseconds time_total{}; + std::chrono::nanoseconds time_ctracked{}; + + // Table data + summary_table summary; + detail_table details; + + // Settings used + ctrack_result_settings settings; +}; + +class ctrack_result +{ +public: + ctrack_result(const ctrack_result_settings &settings, const ActiveClock::time_point &track_start_time, const ActiveClock::time_point &track_end_time) : settings(settings), track_start_time(track_start_time), track_end_time(track_end_time) + { + time_total = ActiveClock::duration_ns(track_start_time, track_end_time); + center_intervall_str = "[" + std::to_string(settings.non_center_percent) + "-" + std::to_string(100 - settings.non_center_percent) + "]"; + } + + template + void get_summary_table(StreamType &stream, bool use_color = false) + { + BeautifulTable info({ + "Start", + "End", + "time total", + "time ctracked", + "time ctracked %", + }, use_color, alternate_colors); + + info.addRow({BeautifulTable::table_timepoint(tables.start_time), + BeautifulTable::table_timepoint(tables.end_time), + BeautifulTable::table_time(static_cast(tables.time_total.count())), + BeautifulTable::table_time(static_cast(tables.time_ctracked.count())), + BeautifulTable::table_percentage(static_cast(tables.time_ctracked.count()), static_cast(tables.time_total.count()))}); + + info.print(stream); + BeautifulTable table({ + "filename", + "function", + "line", + "calls", + "ae" + center_intervall_str + "%", + "ae[0-100]%", + "time ae[0-100]", + "time a[0-100]"}, use_color, alternate_colors); + + for (const auto &row : tables.summary.rows) + { + table.addRow({ + BeautifulTable::stable_shortenPath(row.filename), + row.function_name, + BeautifulTable::table_string(row.line), + BeautifulTable::table_string(row.calls), + BeautifulTable::table_percentage(static_cast(row.percent_ae_bracket * tables.time_total.count() / 100.0), tables.time_total.count()), + BeautifulTable::table_percentage(static_cast(row.percent_ae_all * tables.time_total.count() / 100.0), tables.time_total.count()), + BeautifulTable::table_time(static_cast(row.time_ae_all.count())), + BeautifulTable::table_time(static_cast(row.time_a_all.count())) + }); + } + + table.print(stream); + } + + template + void get_detail_table(StreamType &stream, bool use_color = false, bool reverse_vector = false) + { + auto details_copy = tables.details.rows; + if (reverse_vector) + { + std::reverse(details_copy.begin(), details_copy.end()); + } + for (int i = static_cast(details_copy.size()) - 1; i >= 0; i--) + { + const auto &detail = details_copy[i]; + + BeautifulTable info({"filename", "function", "line", "time acc", "sd", "cv", "calls", "threads"}, + use_color, default_colors); + info.addRow({ + BeautifulTable::stable_shortenPath(detail.filename), + detail.function_name, + BeautifulTable::table_string(detail.line), + BeautifulTable::table_time(static_cast(detail.time_acc.count())), + BeautifulTable::table_time(static_cast(detail.sd.count())), + BeautifulTable::table_string(detail.cv), + BeautifulTable::table_string(detail.calls), + BeautifulTable::table_string(detail.threads) + }); + + const auto fastest_header = "fastest[0-" + std::to_string(detail.fastest_range) + "]%"; + const auto center_header = "center" + center_intervall_str + "%"; + const auto slowest_header = "slowest[" + std::to_string(detail.slowest_range) + "-100]%"; + + BeautifulTable table( + {"min", "mean", "min", "mean", "med", "time a", "time ae", "max", "mean", "max"}, + use_color, + default_colors, + { + {fastest_header, 2}, + {center_header, 6}, + {slowest_header, 2} + } + ); + + table.addRow({ + BeautifulTable::table_time(static_cast(detail.fastest_min.count())), + BeautifulTable::table_time(static_cast(detail.fastest_mean.count())), + BeautifulTable::table_time(static_cast(detail.center_min.count())), + BeautifulTable::table_time(static_cast(detail.center_mean.count())), + BeautifulTable::table_time(static_cast(detail.center_med.count())), + BeautifulTable::table_time(static_cast(detail.center_time_a.count())), + BeautifulTable::table_time(static_cast(detail.center_time_ae.count())), + BeautifulTable::table_time(static_cast(detail.center_max.count())), + BeautifulTable::table_time(static_cast(detail.slowest_mean.count())), + BeautifulTable::table_time(static_cast(detail.slowest_max.count())) + }); + info.print(stream); + table.print(stream); + + stream << std::endl; + } + } + + void calculate_stats() + { + std::vector grouped_events{}; + for (auto &[filename, filename_entry] : f_res) + { + ctracked_files++; + for (auto &[function, function_entry] : filename_entry) + { + ctracked_functions++; + for (auto &[line, line_entry] : function_entry) + { + ctracked_uses++; + line_entry.filename = filename; + line_entry.function_name = function; + line_entry.line = line; + line_entry.calculateStats(settings.non_center_percent, a_events, child_graph); + sorted_events.push_back(&line_entry); + grouped_events.insert(grouped_events.end(), line_entry.all_grouped.begin(), line_entry.all_grouped.end()); + } + } + } + + std::sort(OPT_EXEC_POLICY grouped_events.begin(), grouped_events.end(), cmp_simple_event_by_start_time_asc); + auto all_grouped = sorted_create_grouped_simple_events(grouped_events); + sum_time_active_exclusive = sum_field(all_grouped, &Simple_Event::duration); + + order_pointer_vector_by_field(sorted_events, &EventGroup::all_time_active_exclusive, false); + + int fastest_events = static_cast(sorted_events.size() * settings.percent_exclude_fastest_active_exclusive / 100); + // remove fastest keep in mind fastest elements are at the back + if (fastest_events > 0) + sorted_events.erase(sorted_events.end() - fastest_events, sorted_events.end()); + + uint_fast64_t min_time_active_exclusive = static_cast(time_total * settings.min_percent_active_exclusive / 100); + // remove fastest keep in mind fastest elements are at the back + if (min_time_active_exclusive > 0) + sorted_events.erase( + std::remove_if(sorted_events.begin(), sorted_events.end(), [min_time_active_exclusive](EventGroup *e) + + { return e->all_time_active_exclusive < min_time_active_exclusive; }), + sorted_events.end()); + + // Build the structured result tables + build_result_tables(); + } + + void move_events_from_store(std::deque &events) + { + m_events_storage = std::move(events); + } + + void populate_maps() + { + size_t total_events = 0; + for (const auto &event_vec : m_events_storage) + { + total_events += event_vec.size(); + } + a_events.reserve(total_events); + + for (const auto &event_vec : m_events_storage) + { + for (const auto &event : event_vec) + { + f_res[event.filename][event.function][event.line].all_events.push_back(&event); + a_events.insert({get_unique_event_id(event.thread_id, event.event_id), &event}); + } + } + } + + void add_sub_events(const sub_events &s_events, const unsigned int thread_id_) + { + + for (auto const &[key, val] : s_events) + { + int_fast64_t parent_id = get_unique_event_id(thread_id_, key); + for (const auto &child : val) + { + child_graph[parent_id].push_back(get_unique_event_id(thread_id_, child)); + } + } + } + + std::unordered_map a_events{}; + filename_result f_res{}; + + std::unordered_map> child_graph{}; + ctrack_result_settings settings; + ActiveClock::time_point track_start_time, track_end_time; + uint_fast64_t time_total; + uint_fast64_t sum_time_active_exclusive = 0; + + uint_fast64_t ctracked_files = 0; + uint_fast64_t ctracked_functions = 0; + uint_fast64_t ctracked_uses = 0; + + std::vector sorted_events{}; + std::string center_intervall_str; + ctrack_result_tables tables{}; + +private: + std::deque m_events_storage; + + void build_result_tables() + { + // Populate meta information + tables.start_time = track_start_time; + tables.end_time = track_end_time; + tables.time_total = std::chrono::nanoseconds(time_total); + tables.time_ctracked = std::chrono::nanoseconds(sum_time_active_exclusive); + tables.settings = settings; + + // Clear existing data + tables.summary.rows.clear(); + tables.details.rows.clear(); + + // Reserve space for efficiency + tables.summary.rows.reserve(sorted_events.size()); + tables.details.rows.reserve(sorted_events.size()); + + // Build summary and detail rows from sorted_events + for (const auto &entry : sorted_events) + { + // Build summary row + summary_row sum_row; + sum_row.filename = std::string(entry->filename); + sum_row.function_name = std::string(entry->function_name); + sum_row.line = entry->line; + sum_row.calls = entry->all_cnt; + sum_row.percent_ae_bracket = (time_total > 0) ? (static_cast(entry->center_time_active_exclusive) / time_total * 100.0) : 0.0; + sum_row.percent_ae_all = (time_total > 0) ? (static_cast(entry->all_time_active_exclusive) / time_total * 100.0) : 0.0; + sum_row.time_ae_all = std::chrono::nanoseconds(entry->all_time_active_exclusive); + sum_row.time_a_all = std::chrono::nanoseconds(entry->all_time_active); + tables.summary.rows.push_back(sum_row); + + // Build detail row + detail_stats detail_row; + detail_row.filename = std::string(entry->filename); + detail_row.function_name = std::string(entry->function_name); + detail_row.line = entry->line; + detail_row.time_acc = std::chrono::nanoseconds(entry->all_time_acc); + detail_row.sd = std::chrono::nanoseconds(static_cast(entry->all_st)); + detail_row.cv = entry->all_cv; + detail_row.calls = entry->all_cnt; + detail_row.threads = entry->all_thread_cnt; + + // Summary-like fields (same calculations as summary row) + detail_row.percent_ae_bracket = (time_total > 0) ? (static_cast(entry->center_time_active_exclusive) / time_total * 100.0) : 0.0; + detail_row.percent_ae_all = (time_total > 0) ? (static_cast(entry->all_time_active_exclusive) / time_total * 100.0) : 0.0; + detail_row.time_ae_all = std::chrono::nanoseconds(entry->all_time_active_exclusive); + detail_row.time_a_all = std::chrono::nanoseconds(entry->all_time_active); // Fastest/Center/Slowest stats + detail_row.fastest_min = std::chrono::nanoseconds(entry->fastest_min); + detail_row.fastest_mean = std::chrono::nanoseconds(static_cast(entry->fastest_mean)); + detail_row.center_min = std::chrono::nanoseconds(entry->center_min); + detail_row.center_mean = std::chrono::nanoseconds(static_cast(entry->center_mean)); + detail_row.center_med = std::chrono::nanoseconds(entry->center_med); + detail_row.center_time_a = std::chrono::nanoseconds(entry->center_time_active); + detail_row.center_time_ae = std::chrono::nanoseconds(entry->center_time_active_exclusive); + detail_row.center_max = std::chrono::nanoseconds(entry->center_max); + detail_row.slowest_mean = std::chrono::nanoseconds(static_cast(entry->slowest_mean)); + detail_row.slowest_max = std::chrono::nanoseconds(entry->slowest_max); + + detail_row.fastest_range = entry->fastest_range; + detail_row.slowest_range = entry->slowest_range; + + tables.details.rows.push_back(detail_row); + } + } + +public: + const ctrack_result_tables &get_tables() const { return tables; } +}; + +inline int fetch_event_t_id() +{ + if (thread_id == nullptr || *thread_id == -1) + { + std::scoped_lock lock(store::event_mutex); + + if (thread_id == nullptr) + { + store::a_thread_ids.emplace_back(++store::thread_cnt); + thread_id = &store::a_thread_ids[store::a_thread_ids.size() - 1]; + } + else + { + *thread_id = ++store::thread_cnt; + } + + store::a_events.emplace_back(t_events{}); + store::a_sub_events.emplace_back(sub_events{}); + store::a_current_event_id.emplace_back(0); + store::a_current_event_cnt.emplace_back(0); + store::a_string_id.emplace_back(0); + + event_ptr = &store::a_events[*thread_id]; + sub_events_ptr = &store::a_sub_events[*thread_id]; + + current_event_id = &store::a_current_event_id[*thread_id]; + current_event_cnt = &store::a_current_event_cnt[*thread_id]; + string_id = &store::a_string_id[*thread_id]; + + event_ptr->reserve(100); + } + return *thread_id; +} + +class EventHandler +{ +public: + EventHandler(int line = __builtin_LINE(), + const char *filename = __builtin_FILE(), + const char *function = __builtin_FUNCTION()) : line(line) + { +#if defined(CTRACK_CLOCK_RDTSC) || defined(CTRACK_CLOCK_RDTSCP) || defined(CTRACK_CLOCK_RDTSCP_LFENCE) + static const bool _ = (calibrate_tsc(), true); +#endif + previous_store_clear_cnt = store::store_clear_cnt; + this->filename = filename; + this->function = function; + while (store::write_events_locked) {} + + register_event(); + this->start_time = ActiveClock::NOW(); // needs calibration done + } + ~EventHandler() + { + auto end_time = ActiveClock::NOW(); + while (store::write_events_locked) + { + } + + if (store::store_clear_cnt != previous_store_clear_cnt) + { + register_event(); + } + + event_ptr->emplace_back(Event{start_time, end_time, filename, line, function, t_id, event_id}); + + *current_event_id = previous_event_id; + if (previous_event_id > 0) + { + auto &children = (*sub_events_ptr)[previous_event_id]; + if (children.size() == children.capacity()) + children.reserve(children.capacity() < 4 ? 4 : children.capacity() * 4); + children.push_back(event_id); + } + } + +private: + void register_event() + { + t_id = fetch_event_t_id(); + previous_event_id = *current_event_id; + event_id = ++(*current_event_cnt); + *current_event_id = event_id; + } + ActiveClock::time_point start_time; + int line; + unsigned int previous_store_clear_cnt; + + std::string_view filename, function; + + int t_id; + unsigned int event_id; + unsigned int previous_event_id; +}; + +inline void clear_a_store() +{ + store::a_current_event_id.clear(); + store::a_current_event_id.shrink_to_fit(); + + store::a_current_event_cnt.clear(); + store::a_current_event_cnt.shrink_to_fit(); + + store::a_string_id.clear(); + store::a_string_id.shrink_to_fit(); + + store::a_events.clear(); + store::a_events.shrink_to_fit(); + + store::a_sub_events.clear(); + store::a_sub_events.shrink_to_fit(); + + store::thread_cnt = -1; + for (auto &entry : store::a_thread_ids) + { + entry = -1; + } + + event_ptr = nullptr; + sub_events_ptr = nullptr; + current_event_id = nullptr; + current_event_cnt = nullptr; + string_id = nullptr; + thread_id = nullptr; + + store::store_clear_cnt++; + store::track_start_time = ActiveClock::NOW(); +} + +inline ctrack_result calc_stats_and_clear(ctrack_result_settings settings = {}) +{ + auto end = ActiveClock::NOW(); + ctrack_result res{settings, store::track_start_time, end}; + + // copy data + { + store::write_events_locked = true; + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + std::scoped_lock lock(store::event_mutex); + + res.move_events_from_store(store::a_events); + res.populate_maps(); + + for (int thread_id_ = 0; thread_id_ <= store::thread_cnt; thread_id_++) + { + auto &t_sub_events = store::a_sub_events[thread_id_]; + res.add_sub_events(t_sub_events, thread_id_); + } + clear_a_store(); + store::write_events_locked = false; + } + + res.calculate_stats(); + store::track_start_time = ActiveClock::NOW(); + + return res; +} + +inline void result_print(ctrack_result_settings settings = {}) +{ + auto res = calc_stats_and_clear(settings); +#if defined(CTRACK_CLOCK_RDTSC) || defined(CTRACK_CLOCK_RDTSCP) || defined(CTRACK_CLOCK_RDTSCP_LFENCE) + std::cout << "TSC frequency: " << cycles_per_ns << " GHz\n"; +#endif + std::cout << "Details" << std::endl; + res.get_detail_table(std::cout, true); + std::cout << "Summary" << std::endl; + res.get_summary_table(std::cout, true); +} + +inline std::string result_as_string(ctrack_result_settings settings = {}) +{ + auto res = calc_stats_and_clear(settings); + std::stringstream ss; +#if defined(CTRACK_CLOCK_RDTSC) || defined(CTRACK_CLOCK_RDTSCP) || defined(CTRACK_CLOCK_RDTSCP_LFENCE) + ss << "TSC frequency: " << cycles_per_ns << " GHz\n"; #endif + ss << "Summary\n"; + res.get_summary_table(ss, false); + ss << "Details\n"; + res.get_detail_table(ss, false, true); - std::ostringstream oss; - oss << std::put_time(&tm, "%Y-%m-%d %H:%M:%S"); - return oss.str(); - } - - static inline std::string stable_shortenPath(const std::string &fullPath, size_t maxLength = 35) - { - namespace fs = std::filesystem; - - fs::path path(fullPath); - std::string filename = path.filename().string(); - - if (filename.length() <= maxLength) - { - return filename; - } - - // If filename is too long, truncate it and add ... - return filename.substr(0, maxLength - 3) + "..."; - } - - using bt = BeautifulTable; - }; - - struct Event - { - std::chrono::high_resolution_clock::time_point start_time; - std::chrono::high_resolution_clock::time_point end_time; - int line; - int thread_id; - std::string_view filename; - std::string_view function; - unsigned int event_id; - Event(const std::chrono::high_resolution_clock::time_point &start_time, const std::chrono::high_resolution_clock::time_point &end_time, const std::string_view filename, const int line, const std::string_view function, const int thread_id, const unsigned int event_id) - : start_time(start_time), end_time(end_time), line(line), thread_id(thread_id), filename(filename), function(function), event_id(event_id) - { - } - }; - - struct Simple_Event - { - uint_fast64_t duration = 0; - std::chrono::high_resolution_clock::time_point start_time{}; - int_fast64_t unique_id = 0; - std::chrono::high_resolution_clock::time_point end_time{}; - Simple_Event(const std::chrono::high_resolution_clock::time_point &start_time, const std::chrono::high_resolution_clock::time_point &end_time, const uint_fast64_t duration, const int_fast64_t unique_id) : duration(duration), start_time(start_time), unique_id(unique_id), end_time(end_time) {} - Simple_Event() {} - }; - - inline bool cmp_simple_event_by_duration_asc(const Simple_Event &a, const Simple_Event &b) - { - return a.duration < b.duration; - } - inline bool cmp_simple_event_by_start_time_asc(const Simple_Event &a, const Simple_Event &b) - { - return a.start_time < b.start_time; - } - - inline uint_fast64_t get_unique_event_id(unsigned int thread_id, unsigned int event_id) - { - uint_fast64_t uniqueId = static_cast(thread_id); - uniqueId = uniqueId << 32; - uniqueId += static_cast(event_id); - return uniqueId; - } - - inline std::vector create_simple_events(const std::vector &events) - { - std::vector simple_events{}; - simple_events.resize(events.size()); - std::transform( - OPT_EXEC_POLICY - events.begin(), - events.end(), - simple_events.begin(), - [](const Event &event) - { - Simple_Event simple_event(event.start_time, event.end_time, std::chrono::duration_cast(event.end_time - event.start_time).count(), get_unique_event_id(event.thread_id, event.event_id)); - return simple_event; - }); - return simple_events; - } - - inline std::vector create_simple_events(const std::vector &events) - { - std::vector simple_events{}; - simple_events.resize(events.size()); - std::transform( - OPT_EXEC_POLICY - events.begin(), - events.end(), - simple_events.begin(), - [](const Event *event) - { - Simple_Event simple_event(event->start_time, event->end_time, std::chrono::duration_cast(event->end_time - event->start_time).count(), get_unique_event_id(event->thread_id, event->event_id)); - return simple_event; - }); - return simple_events; - } - - // requires already sorted - inline std::vector sorted_create_grouped_simple_events(const std::vector &events) - { - std::vector result{}; - if (events.size() == 0) - return result; - result.push_back(events[0]); - unsigned int current_idx = 0; - - for (size_t i = 1; i < events.size(); i++) - { - if (result[current_idx].end_time >= events[i].start_time) - { - result[current_idx].end_time = std::max(result[current_idx].end_time, events[i].end_time); - } - else - { - result.push_back(events[i]); - current_idx++; - } - } - - for (auto &entry : result) - { - entry.duration = std::chrono::duration_cast(entry.end_time - entry.start_time).count(); - } - - return result; - } - - inline std::vector load_child_events_simple(const std::vector &parent_events_simple, - const std::unordered_map &events_map, const std::unordered_map> &child_graph) - { - std::vector child_events{}; - - // std::set< int_fast64_t> parent_ids = get_distinct_field_values(parent_events_simple, &Simple_Event::unique_id); - for (const auto &simple_parent_event : parent_events_simple) - { - auto it = child_graph.find(simple_parent_event.unique_id); - if (it != child_graph.end()) - { - for (auto &child_id : it->second) - { - auto &child_event = events_map.at(child_id); - auto &parent_event = events_map.at(simple_parent_event.unique_id); - if (child_event->filename == parent_event->filename && - child_event->function == parent_event->function && - child_event->line == parent_event->line) - continue; - - child_events.push_back(child_event); - } - } - } - - return create_simple_events(child_events); - }; - - class EventGroup - { - public: - void calculateStats(unsigned int non_center_percent, const std::unordered_map &events_map, const std::unordered_map> &child_graph) - { - if (all_events.size() == 0) - return; - - auto all_events_simple = create_simple_events(all_events); - std::sort(OPT_EXEC_POLICY all_events_simple.begin(), all_events_simple.end(), cmp_simple_event_by_duration_asc); - all_cnt = static_cast(all_events_simple.size()); - const double factor = (1.0 / static_cast(all_cnt)); - - auto all_child_events_simple = load_child_events_simple(all_events_simple, events_map, child_graph); - - all_time_acc = sum_field(all_events_simple, &Simple_Event::duration); - - const double all_mean = all_time_acc * factor; - if (std::fpclassify(all_mean) == FP_ZERO) - return; - - all_st = calculate_std_dev_field(all_events_simple, &Simple_Event::duration, all_mean); // std::sqrt(all_variance); - all_cv = all_st / all_mean; - - all_thread_cnt = static_cast(get_distinct_field_values(all_events, &Event::thread_id).size()); - unsigned int amount_non_center = all_cnt * non_center_percent / 100; - - fastest_range = non_center_percent; - slowest_range = 100 - non_center_percent; - - std::vector fastest_events_simple, slowest_events_simple, center_events_simple; - fastest_events_simple.reserve(amount_non_center); - slowest_events_simple.reserve(amount_non_center); - if (all_cnt > 2) - center_events_simple.reserve(all_cnt - 2 * amount_non_center); - - for (unsigned int i = 0; i < all_events_simple.size(); i++) - { - if (i < amount_non_center) - { - fastest_events_simple.push_back(all_events_simple[i]); - } - else if (i >= all_cnt - amount_non_center) - { - slowest_events_simple.push_back(all_events_simple[i]); - } - else - { - center_events_simple.push_back(all_events_simple[i]); - } - } - if (amount_non_center > 0) - { - // fastest - fastest_min = fastest_events_simple[0].duration; - fastest_mean = sum_field(fastest_events_simple, &Simple_Event::duration) / static_cast(amount_non_center); - - // slowest - slowest_max = slowest_events_simple[slowest_events_simple.size() - 1].duration; - slowest_mean = sum_field(slowest_events_simple, &Simple_Event::duration) / static_cast(amount_non_center); - } - - // center - center_min = center_events_simple[0].duration; - center_max = center_events_simple[center_events_simple.size() - 1].duration; - center_mean = sum_field(center_events_simple, &Simple_Event::duration) / static_cast(center_events_simple.size()); - if (center_events_simple.size() % 2 == 1) - center_med = center_events_simple[center_events_simple.size() / 2].duration; - else - center_med = (center_events_simple[center_events_simple.size() / 2].duration + center_events_simple[center_events_simple.size() / 2 - 1].duration) / 2; - - auto center_child_events_simple = load_child_events_simple(center_events_simple, events_map, child_graph); - - std::sort(OPT_EXEC_POLICY center_events_simple.begin(), center_events_simple.end(), cmp_simple_event_by_start_time_asc); - center_grouped = sorted_create_grouped_simple_events(center_events_simple); - center_time_active = sum_field(center_grouped, &Simple_Event::duration); - - std::sort(OPT_EXEC_POLICY center_child_events_simple.begin(), center_child_events_simple.end(), cmp_simple_event_by_start_time_asc); - auto center_child_events_grouped = sorted_create_grouped_simple_events(center_child_events_simple); - center_time_active_exclusive = center_time_active - sum_field(center_child_events_grouped, &Simple_Event::duration); - - std::sort(OPT_EXEC_POLICY all_events_simple.begin(), all_events_simple.end(), cmp_simple_event_by_start_time_asc); - all_grouped = sorted_create_grouped_simple_events(all_events_simple); - all_time_active = sum_field(all_grouped, &Simple_Event::duration); - - std::sort(OPT_EXEC_POLICY all_child_events_simple.begin(), all_child_events_simple.end(), cmp_simple_event_by_start_time_asc); - auto all_child_events_grouped = sorted_create_grouped_simple_events(all_child_events_simple); - all_time_active_exclusive = all_time_active - sum_field(all_child_events_grouped, &Simple_Event::duration); - } - - // all_group - - double all_cv = 0.0; - double all_st = 0.0; - - unsigned int all_cnt = 0; - uint_fast64_t all_time_acc = 0; - uint_fast64_t all_time_active = 0; - uint_fast64_t all_time_active_exclusive = 0; - unsigned int all_thread_cnt = 0; - std::vector all_grouped = {}; - std::vector all_events = {}; - - // fastest_group - unsigned int fastest_range = 0; - uint_fast64_t fastest_min = 0; - double fastest_mean = 0.0; - - // slowest group - unsigned int slowest_range = 0; - uint_fast64_t slowest_max = 0; - double slowest_mean = 0.0; - - // center group - - uint_fast64_t center_min = 0; - uint_fast64_t center_max = 0; - uint_fast64_t center_med = 0; - double center_mean = 0; - uint_fast64_t center_time_active = 0; - uint_fast64_t center_time_active_exclusive = 0; - std::vector center_grouped = {}; - - std::string filename = {}; - std::string function_name = {}; - int line = 0; - - private: - }; - - typedef std::vector t_events; - typedef std::map> sub_events; - - struct store - { - inline static std::atomic write_events_locked = false; - inline static std::mutex event_mutex; - inline static std::chrono::high_resolution_clock::time_point track_start_time = std::chrono::high_resolution_clock::now(); - inline static std::atomic store_clear_cnt = 0; - - inline static std::atomic thread_cnt = -1; - inline static std::deque a_events{}; - inline static std::deque a_sub_events{}; - - inline static std::deque a_current_event_id{}, a_current_event_cnt{}, a_string_id{}; - - inline static std::deque a_thread_ids{}; - }; - - inline thread_local t_events *event_ptr = nullptr; - inline thread_local sub_events *sub_events_ptr = nullptr; - - inline thread_local unsigned int *current_event_id = nullptr; - inline thread_local unsigned int *current_event_cnt = nullptr; - inline thread_local unsigned int *string_id = nullptr; - - inline thread_local int *thread_id = nullptr; - - typedef std::map line_result; - typedef std::map function_result; - typedef std::map filename_result; - - struct ctrack_result_settings - { - unsigned int non_center_percent = 1; - double min_percent_active_exclusive = 0.0; // between 0-100 - double percent_exclude_fastest_active_exclusive = 0.0; // between 0-100 - }; - - struct summary_row - { - std::string filename; - std::string function_name; - int line{}; - int calls{}; - double percent_ae_bracket{}; // ae[center]% by configuration - double percent_ae_all{}; // ae[0-100]% - std::chrono::nanoseconds time_ae_all{}; - std::chrono::nanoseconds time_a_all{}; - }; - - struct summary_table - { - std::vector rows; - }; - - struct detail_stats - { - // Info fields - std::string filename; - std::string function_name; - int line{}; - std::chrono::nanoseconds time_acc{}; // Simple sum of all execution times (can exceed wall clock in MT) - std::chrono::nanoseconds sd{}; // Standard deviation - double cv{}; // Coefficient of variation (sd/mean) - int calls{}; // Total number of calls - int threads{}; // Number of different threads that called this function - - // Summary-like fields (for unified access) - double percent_ae_bracket{}; // ae[center]% as percentage of total time - double percent_ae_all{}; // ae[0-100]% as percentage of total time - std::chrono::nanoseconds time_ae_all{}; // Active exclusive time (wall clock minus child functions) - std::chrono::nanoseconds time_a_all{}; // Active time (actual wall clock time, handles MT overlap) - - // Fastest/Center/Slowest stats - std::chrono::nanoseconds fastest_min{}; - std::chrono::nanoseconds fastest_mean{}; - std::chrono::nanoseconds center_min{}; - std::chrono::nanoseconds center_mean{}; - std::chrono::nanoseconds center_med{}; - std::chrono::nanoseconds center_time_a{}; // Active time for center range - std::chrono::nanoseconds center_time_ae{}; // Active exclusive time for center range - std::chrono::nanoseconds center_max{}; - std::chrono::nanoseconds slowest_mean{}; - std::chrono::nanoseconds slowest_max{}; - - // Percentile ranges for reference - unsigned int fastest_range{}; - unsigned int slowest_range{}; - }; - - struct detail_table - { - std::vector rows; - }; - - struct ctrack_result_tables - { - // Meta information - std::chrono::high_resolution_clock::time_point start_time; - std::chrono::high_resolution_clock::time_point end_time; - std::chrono::nanoseconds time_total{}; - std::chrono::nanoseconds time_ctracked{}; - - // Table data - summary_table summary; - detail_table details; - - // Settings used - ctrack_result_settings settings; - }; - - class ctrack_result - { - public: - ctrack_result(const ctrack_result_settings &settings, const std::chrono::high_resolution_clock::time_point &track_start_time, const std::chrono::high_resolution_clock::time_point &track_end_time) : settings(settings), track_start_time(track_start_time), track_end_time(track_end_time) - { - time_total = std::chrono::duration_cast( - track_end_time - track_start_time) - .count(); - center_intervall_str = "[" + std::to_string(settings.non_center_percent) + "-" + std::to_string(100 - settings.non_center_percent) + "]"; - } - - template - void get_summary_table(StreamType &stream, bool use_color = false) - { - BeautifulTable info({ - "Start", - "End", - "time total", - "time ctracked", - "time ctracked %", - }, - use_color, alternate_colors); - info.addRow({BeautifulTable::table_timepoint(tables.start_time), BeautifulTable::table_timepoint(tables.end_time), - BeautifulTable::table_time(static_cast(tables.time_total.count())), BeautifulTable::table_time(static_cast(tables.time_ctracked.count())), - BeautifulTable::table_percentage(static_cast(tables.time_ctracked.count()), static_cast(tables.time_total.count()))}); - - info.print(stream); - BeautifulTable table({"filename", "function", "line", "calls", "ae" + center_intervall_str + "%", "ae[0-100]%", - "time ae[0-100]", "time a[0-100]"}, - use_color, alternate_colors); - for (const auto &row : tables.summary.rows) - { - table.addRow({BeautifulTable::stable_shortenPath(row.filename), row.function_name, BeautifulTable::table_string(row.line), - BeautifulTable::table_string(row.calls), - BeautifulTable::table_percentage(static_cast(row.percent_ae_bracket * tables.time_total.count() / 100.0), static_cast(tables.time_total.count())), - BeautifulTable::table_percentage(static_cast(row.percent_ae_all * tables.time_total.count() / 100.0), static_cast(tables.time_total.count())), - BeautifulTable::table_time(static_cast(row.time_ae_all.count())), - BeautifulTable::table_time(static_cast(row.time_a_all.count()))}); - } - - table.print(stream); - } - - template - void get_detail_table(StreamType &stream, bool use_color = false, bool reverse_vector = false) - { - auto details_copy = tables.details.rows; - if (reverse_vector) - { - std::reverse(details_copy.begin(), details_copy.end()); - } - for (int i = static_cast(details_copy.size()) - 1; i >= 0; i--) - { - const auto &detail = details_copy[i]; - - BeautifulTable info({"filename", "function", "line", "time acc", "sd", "cv", "calls", "threads"}, use_color, default_colors); - info.addRow({BeautifulTable::stable_shortenPath(detail.filename), detail.function_name, BeautifulTable::table_string(detail.line), - BeautifulTable::table_time(static_cast(detail.time_acc.count())), - BeautifulTable::table_time(static_cast(detail.sd.count())), BeautifulTable::table_string(detail.cv), - BeautifulTable::table_string(detail.calls), BeautifulTable::table_string(detail.threads)}); - - BeautifulTable table({"min", "mean", "min", "mean", "med", "time a", "time ae", "max", "mean", "max"}, use_color, default_colors, - {{"fastest[0-" + std::to_string(detail.fastest_range) + "]%", 2}, {"center" + center_intervall_str + "%", 6}, {"slowest[" + std::to_string(detail.slowest_range) + "-100]%", 2}}); - - table.addRow({BeautifulTable::table_time(static_cast(detail.fastest_min.count())), BeautifulTable::table_time(static_cast(detail.fastest_mean.count())), - BeautifulTable::table_time(static_cast(detail.center_min.count())), BeautifulTable::table_time(static_cast(detail.center_mean.count())), - BeautifulTable::table_time(static_cast(detail.center_med.count())), BeautifulTable::table_time(static_cast(detail.center_time_a.count())), - BeautifulTable::table_time(static_cast(detail.center_time_ae.count())), - BeautifulTable::table_time(static_cast(detail.center_max.count())), - BeautifulTable::table_time(static_cast(detail.slowest_mean.count())), BeautifulTable::table_time(static_cast(detail.slowest_max.count()))}); - - info.print(stream); - table.print(stream); - - stream << std::endl; - } - } - - void calculate_stats() - { - std::vector grouped_events{}; - for (auto &[filename, filename_entry] : f_res) - { - ctracked_files++; - for (auto &[function, function_entry] : filename_entry) - { - ctracked_functions++; - for (auto &[line, line_entry] : function_entry) - { - ctracked_uses++; - line_entry.filename = filename; - line_entry.function_name = function; - line_entry.line = line; - line_entry.calculateStats(settings.non_center_percent, a_events, child_graph); - sorted_events.push_back(&line_entry); - grouped_events.insert(grouped_events.end(), line_entry.all_grouped.begin(), line_entry.all_grouped.end()); - } - } - } - - std::sort(OPT_EXEC_POLICY grouped_events.begin(), grouped_events.end(), cmp_simple_event_by_start_time_asc); - auto all_grouped = sorted_create_grouped_simple_events(grouped_events); - sum_time_active_exclusive = sum_field(all_grouped, &Simple_Event::duration); - - order_pointer_vector_by_field(sorted_events, &EventGroup::all_time_active_exclusive, false); - - int fastest_events = static_cast(sorted_events.size() * settings.percent_exclude_fastest_active_exclusive / 100); - // remove fastest keep in mind fastest elements are at the back - if (fastest_events > 0) - sorted_events.erase(sorted_events.end() - fastest_events, sorted_events.end()); - - uint_fast64_t min_time_active_exclusive = static_cast(time_total * settings.min_percent_active_exclusive / 100); - // remove fastest keep in mind fastest elements are at the back - if (min_time_active_exclusive > 0) - sorted_events.erase(std::remove_if(sorted_events.begin(), sorted_events.end(), [min_time_active_exclusive](EventGroup *e) - { return e->all_time_active_exclusive < min_time_active_exclusive; }), - sorted_events.end()); - - // Build the structured result tables - build_result_tables(); - } - - void move_events_from_store(std::deque &events) - { - m_events_storage = std::move(events); - } - - void populate_maps() - { - size_t total_events = 0; - for (const auto &event_vec : m_events_storage) - { - total_events += event_vec.size(); - } - a_events.reserve(total_events); - - for (const auto &event_vec : m_events_storage) - { - for (const auto &event : event_vec) - { - f_res[event.filename][event.function][event.line].all_events.push_back(&event); - a_events.insert({get_unique_event_id(event.thread_id, event.event_id), &event}); - } - } - } - - void add_sub_events(const sub_events &s_events, const unsigned int thread_id_) - { - - for (auto const &[key, val] : s_events) - { - int_fast64_t parent_id = get_unique_event_id(thread_id_, key); - for (const auto &child : val) - { - child_graph[parent_id].push_back(get_unique_event_id(thread_id_, child)); - } - } - } - - std::unordered_map a_events{}; - filename_result f_res{}; - - std::unordered_map> child_graph{}; - ctrack_result_settings settings; - std::chrono::high_resolution_clock::time_point track_start_time, track_end_time; - uint_fast64_t time_total; - uint_fast64_t sum_time_active_exclusive = 0; - - uint_fast64_t ctracked_files = 0; - uint_fast64_t ctracked_functions = 0; - uint_fast64_t ctracked_uses = 0; - - std::vector sorted_events{}; - std::string center_intervall_str; - ctrack_result_tables tables{}; - - private: - std::deque m_events_storage; - - void build_result_tables() - { - // Populate meta information - tables.start_time = track_start_time; - tables.end_time = track_end_time; - tables.time_total = std::chrono::nanoseconds(time_total); - tables.time_ctracked = std::chrono::nanoseconds(sum_time_active_exclusive); - tables.settings = settings; - - // Clear existing data - tables.summary.rows.clear(); - tables.details.rows.clear(); - - // Reserve space for efficiency - tables.summary.rows.reserve(sorted_events.size()); - tables.details.rows.reserve(sorted_events.size()); - - // Build summary and detail rows from sorted_events - for (const auto &entry : sorted_events) - { - // Build summary row - summary_row sum_row; - sum_row.filename = std::string(entry->filename); - sum_row.function_name = std::string(entry->function_name); - sum_row.line = entry->line; - sum_row.calls = entry->all_cnt; - sum_row.percent_ae_bracket = (time_total > 0) ? (static_cast(entry->center_time_active_exclusive) / time_total * 100.0) : 0.0; - sum_row.percent_ae_all = (time_total > 0) ? (static_cast(entry->all_time_active_exclusive) / time_total * 100.0) : 0.0; - sum_row.time_ae_all = std::chrono::nanoseconds(entry->all_time_active_exclusive); - sum_row.time_a_all = std::chrono::nanoseconds(entry->all_time_active); - tables.summary.rows.push_back(sum_row); - - // Build detail row - detail_stats detail_row; - detail_row.filename = std::string(entry->filename); - detail_row.function_name = std::string(entry->function_name); - detail_row.line = entry->line; - detail_row.time_acc = std::chrono::nanoseconds(entry->all_time_acc); - detail_row.sd = std::chrono::nanoseconds(static_cast(entry->all_st)); - detail_row.cv = entry->all_cv; - detail_row.calls = entry->all_cnt; - detail_row.threads = entry->all_thread_cnt; - - // Summary-like fields (same calculations as summary row) - detail_row.percent_ae_bracket = (time_total > 0) ? (static_cast(entry->center_time_active_exclusive) / time_total * 100.0) : 0.0; - detail_row.percent_ae_all = (time_total > 0) ? (static_cast(entry->all_time_active_exclusive) / time_total * 100.0) : 0.0; - detail_row.time_ae_all = std::chrono::nanoseconds(entry->all_time_active_exclusive); - detail_row.time_a_all = std::chrono::nanoseconds(entry->all_time_active); - - // Fastest/Center/Slowest stats - detail_row.fastest_min = std::chrono::nanoseconds(entry->fastest_min); - detail_row.fastest_mean = std::chrono::nanoseconds(static_cast(entry->fastest_mean)); - detail_row.center_min = std::chrono::nanoseconds(entry->center_min); - detail_row.center_mean = std::chrono::nanoseconds(static_cast(entry->center_mean)); - detail_row.center_med = std::chrono::nanoseconds(entry->center_med); - detail_row.center_time_a = std::chrono::nanoseconds(entry->center_time_active); - detail_row.center_time_ae = std::chrono::nanoseconds(entry->center_time_active_exclusive); - detail_row.center_max = std::chrono::nanoseconds(entry->center_max); - detail_row.slowest_mean = std::chrono::nanoseconds(static_cast(entry->slowest_mean)); - detail_row.slowest_max = std::chrono::nanoseconds(entry->slowest_max); - - detail_row.fastest_range = entry->fastest_range; - detail_row.slowest_range = entry->slowest_range; - - tables.details.rows.push_back(detail_row); - } - } - - public: - const ctrack_result_tables &get_tables() const { return tables; } - }; - - inline int fetch_event_t_id() - { - if (thread_id == nullptr || *thread_id == -1) - { - std::scoped_lock lock(store::event_mutex); - - if (thread_id == nullptr) - { - store::a_thread_ids.emplace_back(++store::thread_cnt); - thread_id = &store::a_thread_ids[store::a_thread_ids.size() - 1]; - } - else - { - *thread_id = ++store::thread_cnt; - } - - store::a_events.emplace_back(t_events{}); - store::a_sub_events.emplace_back(sub_events{}); - store::a_current_event_id.emplace_back(0); - store::a_current_event_cnt.emplace_back(0); - store::a_string_id.emplace_back(0); - - event_ptr = &store::a_events[*thread_id]; - sub_events_ptr = &store::a_sub_events[*thread_id]; - - current_event_id = &store::a_current_event_id[*thread_id]; - current_event_cnt = &store::a_current_event_cnt[*thread_id]; - string_id = &store::a_string_id[*thread_id]; - - event_ptr->reserve(100); - } - return *thread_id; - } - - class EventHandler - { - public: - EventHandler(int line = __builtin_LINE(), const char *filename = __builtin_FILE(), const char *function = __builtin_FUNCTION(), std::chrono::high_resolution_clock::time_point start_time = std::chrono::high_resolution_clock::now()) : line(line) - - { - - previous_store_clear_cnt = store::store_clear_cnt; - this->filename = filename; - this->function = function; - while (store::write_events_locked) - { - } - - register_event(); - this->start_time = start_time; - } - ~EventHandler() - { - auto end_time = std::chrono::high_resolution_clock::now(); - while (store::write_events_locked) - { - } - - if (store::store_clear_cnt != previous_store_clear_cnt) - { - register_event(); - } - - if (event_ptr->capacity() - event_ptr->size() < 1) - event_ptr->reserve(event_ptr->capacity() * 4); - - event_ptr->emplace_back(Event{start_time, end_time, filename, line, function, t_id, event_id}); - - *current_event_id = previous_event_id; - if (previous_event_id > 0) - { - if ((*sub_events_ptr)[previous_event_id].capacity() - (*sub_events_ptr)[previous_event_id].size() < 1) - (*sub_events_ptr)[previous_event_id].reserve((*sub_events_ptr)[previous_event_id].capacity() * 4); - (*sub_events_ptr)[previous_event_id].push_back(event_id); - } - } - - private: - void register_event() - { - t_id = fetch_event_t_id(); - previous_event_id = *current_event_id; - event_id = ++(*current_event_cnt); - *current_event_id = event_id; - } - std::chrono::high_resolution_clock::time_point start_time; - int line; - unsigned int previous_store_clear_cnt; - - std::string_view filename, function; - - int t_id; - unsigned int event_id; - unsigned int previous_event_id; - }; - - inline void clear_a_store() - { - store::a_current_event_id.clear(); - store::a_current_event_id.shrink_to_fit(); - - store::a_current_event_cnt.clear(); - store::a_current_event_cnt.shrink_to_fit(); - - store::a_string_id.clear(); - store::a_string_id.shrink_to_fit(); - - store::a_events.clear(); - store::a_events.shrink_to_fit(); - - store::a_sub_events.clear(); - store::a_sub_events.shrink_to_fit(); - - store::thread_cnt = -1; - for (auto &entry : store::a_thread_ids) - { - entry = -1; - } - - event_ptr = nullptr; - sub_events_ptr = nullptr; - current_event_id = nullptr; - current_event_cnt = nullptr; - string_id = nullptr; - thread_id = nullptr; - - store::store_clear_cnt++; - store::track_start_time = std::chrono::high_resolution_clock::now(); - } - - inline ctrack_result calc_stats_and_clear(ctrack_result_settings settings = {}) - { - auto end = std::chrono::high_resolution_clock::now(); - ctrack_result res{settings, store::track_start_time, end}; - - // copy data - { - store::write_events_locked = true; - std::this_thread::sleep_for(std::chrono::milliseconds(100)); - std::scoped_lock lock(store::event_mutex); - - res.move_events_from_store(store::a_events); - res.populate_maps(); - - for (int thread_id_ = 0; thread_id_ <= store::thread_cnt; thread_id_++) - { - auto &t_sub_events = store::a_sub_events[thread_id_]; - res.add_sub_events(t_sub_events, thread_id_); - } - clear_a_store(); - store::write_events_locked = false; - } - - res.calculate_stats(); - store::track_start_time = std::chrono::high_resolution_clock::now(); - - return res; - } - - inline void result_print(ctrack_result_settings settings = {}) - { - auto res = calc_stats_and_clear(settings); - std::cout << "Details" << std::endl; - res.get_detail_table(std::cout, true); - std::cout << "Summary" << std::endl; - res.get_summary_table(std::cout, true); - } - - inline std::string result_as_string(ctrack_result_settings settings = {}) - { - auto res = calc_stats_and_clear(settings); - std::stringstream ss; - ss << "Summary\n"; - res.get_summary_table(ss, false); - ss << "Details\n"; - res.get_detail_table(ss, false, true); - - return ss.str(); - } - - inline ctrack_result_tables result_get_tables(ctrack_result_settings settings = {}) - { - auto res = calc_stats_and_clear(settings); - return res.get_tables(); - } - - inline summary_table result_get_summary_table(ctrack_result_settings settings = {}) - { - auto res = calc_stats_and_clear(settings); - return res.get_tables().summary; - } - - inline detail_table result_get_detail_table(ctrack_result_settings settings = {}) - { - auto res = calc_stats_and_clear(settings); - return res.get_tables().details; - } - } + return ss.str(); +} + +inline ctrack_result_tables result_get_tables(ctrack_result_settings settings = {}) +{ + auto res = calc_stats_and_clear(settings); + return res.get_tables(); +} + +inline summary_table result_get_summary_table(ctrack_result_settings settings = {}) +{ + auto res = calc_stats_and_clear(settings); + return res.get_tables().summary; +} + +inline detail_table result_get_detail_table(ctrack_result_settings settings = {}) +{ + auto res = calc_stats_and_clear(settings); + return res.get_tables().details; +} +} } #ifndef CTRACK_DISABLE @@ -1260,9 +1533,9 @@ namespace ctrack #define CTRACK_UNIQUE_NAME(prefix) CTRACK_CONCAT(prefix, __COUNTER__) #define CTRACK_IMPL \ - ctrack::EventHandler CTRACK_UNIQUE_NAME(ctrack_instance_) { __builtin_LINE(), __builtin_FILE(), __builtin_FUNCTION() } +ctrack::EventHandler CTRACK_UNIQUE_NAME(ctrack_instance_) { __builtin_LINE(), __builtin_FILE(), __builtin_FUNCTION() } #define CTRACK_IMPL_NAME(name) \ - ctrack::EventHandler CTRACK_UNIQUE_NAME(ctrack_instance_) { __builtin_LINE(), __builtin_FILE(), name } +ctrack::EventHandler CTRACK_UNIQUE_NAME(ctrack_instance_) { __builtin_LINE(), __builtin_FILE(), name } #if defined(CTRACK_DISABLE_DEV) #define CTRACK_PROD CTRACK_IMPL #define CTRACK_PROD_NAME(name) CTRACK_IMPL_NAME(name) @@ -1293,4 +1566,4 @@ namespace ctrack #define CTRACK_NAME(name) #endif // CTRACK_DISABLE -#endif \ No newline at end of file +#endif