From 584784e6c614f23d1307a6030bee936853a25550 Mon Sep 17 00:00:00 2001 From: Michael Vandeberg Date: Mon, 20 Apr 2026 12:10:15 -0600 Subject: [PATCH] perf/bench: remove unnecessary heap allocs in asio accept_churn benches Replace unique_ptr with value-typed sockets across all asio accept_churn benchmarks (coroutine and callback variants). Vectors are pre-reserved so no reallocation invalidates references held by pending async operations. This matches the corosio benchmark's allocation pattern for an apples-to-apples comparison. --- .../asio/callback/accept_churn_bench.cpp | 56 ++++++------ .../asio/coroutine/accept_churn_bench.cpp | 85 +++++++++---------- 2 files changed, 73 insertions(+), 68 deletions(-) diff --git a/perf/bench/asio/callback/accept_churn_bench.cpp b/perf/bench/asio/callback/accept_churn_bench.cpp index 1d7bd8ca..297c1f7f 100644 --- a/perf/bench/asio/callback/accept_churn_bench.cpp +++ b/perf/bench/asio/callback/accept_churn_bench.cpp @@ -77,8 +77,8 @@ struct sequential_churn_op std::atomic& running; perf::statistics& latency_stats; std::atomic& ops; - std::unique_ptr client; - std::unique_ptr server; + tcp_socket client; + tcp_socket server; perf::stopwatch sw; char byte = 'X'; char recv_byte = 0; @@ -93,19 +93,19 @@ struct sequential_churn_op sw.reset(); connect_done = false; accept_done = false; - client = std::make_unique( ioc.get_executor() ); - server = std::make_unique( ioc.get_executor() ); + client = tcp_socket( ioc.get_executor() ); + server = tcp_socket( ioc.get_executor() ); boost::system::error_code ec; - ec = client->open( tcp::v4(), ec ); + ec = client.open( tcp::v4(), ec ); if( ec ) { asio::post( ioc, [this]() { start(); } ); return; } - configure_churn_socket( *client ); + configure_churn_socket( client ); - client->async_connect(ep, [this](boost::system::error_code ec) { + client.async_connect(ep, [this](boost::system::error_code ec) { if (ec) return; connect_done = true; @@ -113,7 +113,7 @@ struct sequential_churn_op do_write(); }); - acc.async_accept(*server, [this](boost::system::error_code ec) { + acc.async_accept(server, [this](boost::system::error_code ec) { if (ec) return; accept_done = true; @@ -126,7 +126,7 @@ struct sequential_churn_op { byte = 'X'; asio::async_write( - *client, asio::buffer(&byte, 1), + client, asio::buffer(&byte, 1), [this](boost::system::error_code ec, std::size_t) { if (ec) return; @@ -138,7 +138,7 @@ struct sequential_churn_op { recv_byte = 0; asio::async_read( - *server, asio::buffer(&recv_byte, 1), + server, asio::buffer(&recv_byte, 1), [this](boost::system::error_code ec, std::size_t) { if (ec) return; @@ -148,8 +148,8 @@ struct sequential_churn_op void finish() { - client->close(); - server->close(); + client.close(); + server.close(); latency_stats.add(sw.elapsed_ns()); ops.fetch_add(1, std::memory_order_relaxed); @@ -169,7 +169,9 @@ bench_sequential_churn(bench::state& state) std::atomic running{true}; sequential_churn_op op{ioc, acc, ep, running, state.latency(), - state.ops(), {}, {}, {}}; + state.ops(), + tcp_socket(ioc.get_executor()), + tcp_socket(ioc.get_executor()), {}}; perf::stopwatch total_sw; @@ -198,7 +200,9 @@ bench_sequential_churn_lockless(bench::state& state) std::atomic running{true}; sequential_churn_op op{ioc, acc, ep, running, state.latency(), - state.ops(), {}, {}, {}}; + state.ops(), + tcp_socket(ioc.get_executor()), + tcp_socket(ioc.get_executor()), {}}; perf::stopwatch total_sw; @@ -244,7 +248,9 @@ bench_concurrent_churn(bench::state& state) asio::ip::address_v4::loopback(), acceptors[i].local_endpoint().port() ); ops.push_back( std::make_unique( sequential_churn_op{ ioc, acceptors[i], ep, running, - state.latency(), state.ops(), {}, {}, {} } ) ); + state.latency(), state.ops(), + tcp_socket(ioc.get_executor()), + tcp_socket(ioc.get_executor()), {} } ) ); ops.back()->start(); } @@ -273,8 +279,8 @@ struct burst_churn_op std::atomic& ops; int burst_size; - std::vector> clients; - std::vector> servers; + std::vector clients; + std::vector servers; int accepted_count = 0; perf::stopwatch sw; @@ -295,27 +301,27 @@ struct burst_churn_op // partial failure doesn't leave dangling async_accept operations. for( int i = 0; i < burst_size; ++i ) { - clients.push_back( std::make_unique( ioc.get_executor() ) ); + clients.emplace_back( ioc.get_executor() ); boost::system::error_code ec; - ec = clients.back()->open( tcp::v4(), ec ); + ec = clients.back().open( tcp::v4(), ec ); if( ec ) { clients.clear(); asio::post( ioc, [this]() { start(); } ); return; } - configure_churn_socket( *clients.back() ); + configure_churn_socket( clients.back() ); } // Initiate all connects and accepts for( int i = 0; i < burst_size; ++i ) { - clients[i]->async_connect( ep, + clients[i].async_connect( ep, [](boost::system::error_code) {} ); - servers.push_back(std::make_unique(ioc.get_executor())); + servers.emplace_back( ioc.get_executor() ); acc.async_accept( - *servers.back(), [this](boost::system::error_code ec) { + servers.back(), [this](boost::system::error_code ec) { if (ec) return; ++accepted_count; @@ -328,9 +334,9 @@ struct burst_churn_op void close_all() { for (auto& c : clients) - c->close(); + c.close(); for (auto& s : servers) - s->close(); + s.close(); burst_stats.add(sw.elapsed_ns()); ops.fetch_add(1, std::memory_order_relaxed); diff --git a/perf/bench/asio/coroutine/accept_churn_bench.cpp b/perf/bench/asio/coroutine/accept_churn_bench.cpp index 53008ac9..eafa532b 100644 --- a/perf/bench/asio/coroutine/accept_churn_bench.cpp +++ b/perf/bench/asio/coroutine/accept_churn_bench.cpp @@ -21,7 +21,6 @@ #include #include -#include #include #include @@ -81,34 +80,34 @@ bench_sequential_churn(bench::state& state) { auto lp = state.lap(); - auto client = std::make_unique(ioc); - auto server = std::make_unique(ioc); + tcp_socket client(ioc.get_executor()); + tcp_socket server(ioc.get_executor()); boost::system::error_code ec; - ec = client->open(tcp::v4(), ec); + ec = client.open(tcp::v4(), ec); if (ec) continue; - configure_churn_socket(*client); + configure_churn_socket(client); asio::co_spawn( ioc, [](tcp_socket& c, tcp::endpoint ep) -> asio::awaitable { co_await c.async_connect(ep, asio::deferred); - }(*client, ep), + }(client, ep), asio::detached); - *server = co_await acc.async_accept(asio::deferred); + server = co_await acc.async_accept(asio::deferred); char byte = 'X'; co_await asio::async_write( - *client, asio::buffer(&byte, 1), asio::deferred); + client, asio::buffer(&byte, 1), asio::deferred); char recv = 0; co_await asio::async_read( - *server, asio::buffer(&recv, 1), asio::deferred); + server, asio::buffer(&recv, 1), asio::deferred); - client->close(); - server->close(); + client.close(); + server.close(); } } catch (std::exception const&) @@ -151,34 +150,34 @@ bench_sequential_churn_lockless(bench::state& state) { auto lp = state.lap(); - auto client = std::make_unique(ioc); - auto server = std::make_unique(ioc); + tcp_socket client(ioc.get_executor()); + tcp_socket server(ioc.get_executor()); boost::system::error_code ec; - ec = client->open(tcp::v4(), ec); + ec = client.open(tcp::v4(), ec); if (ec) continue; - configure_churn_socket(*client); + configure_churn_socket(client); asio::co_spawn( ioc, [](tcp_socket& c, tcp::endpoint ep) -> asio::awaitable { co_await c.async_connect(ep, asio::deferred); - }(*client, ep), + }(client, ep), asio::detached); - *server = co_await acc.async_accept(asio::deferred); + server = co_await acc.async_accept(asio::deferred); char byte = 'X'; co_await asio::async_write( - *client, asio::buffer(&byte, 1), asio::deferred); + client, asio::buffer(&byte, 1), asio::deferred); char recv = 0; co_await asio::async_read( - *server, asio::buffer(&recv, 1), asio::deferred); + server, asio::buffer(&recv, 1), asio::deferred); - client->close(); - server->close(); + client.close(); + server.close(); } } catch (std::exception const&) @@ -230,34 +229,34 @@ bench_concurrent_churn(bench::state& state) { auto lp = state.lap(); - auto client = std::make_unique(ioc); - auto server = std::make_unique(ioc); + tcp_socket client(ioc.get_executor()); + tcp_socket server(ioc.get_executor()); boost::system::error_code ec; - ec = client->open(tcp::v4(), ec); + ec = client.open(tcp::v4(), ec); if (ec) continue; - configure_churn_socket(*client); + configure_churn_socket(client); asio::co_spawn( ioc, [](tcp_socket& c, tcp::endpoint ep) -> asio::awaitable { co_await c.async_connect(ep, asio::deferred); - }(*client, ep), + }(client, ep), asio::detached); - *server = co_await acc.async_accept(asio::deferred); + server = co_await acc.async_accept(asio::deferred); char byte = 'X'; co_await asio::async_write( - *client, asio::buffer(&byte, 1), asio::deferred); + client, asio::buffer(&byte, 1), asio::deferred); char recv = 0; co_await asio::async_read( - *server, asio::buffer(&recv, 1), asio::deferred); + server, asio::buffer(&recv, 1), asio::deferred); - client->close(); - server->close(); + client.close(); + server.close(); } } catch (std::exception const&) @@ -306,7 +305,7 @@ bench_burst_churn(bench::state& state) { auto lp = state.lap(); - std::vector> clients; + std::vector clients; std::vector servers; clients.reserve(burst_size); servers.reserve(burst_size); @@ -314,16 +313,16 @@ bench_burst_churn(bench::state& state) bool open_ok = true; for (int i = 0; i < burst_size; ++i) { - clients.push_back(std::make_unique(ioc)); + clients.emplace_back(ioc.get_executor()); boost::system::error_code ec; - ec = clients.back()->open(tcp::v4(), ec); + ec = clients.back().open(tcp::v4(), ec); if (ec) { clients.clear(); open_ok = false; break; } - configure_churn_socket(*clients.back()); + configure_churn_socket(clients.back()); } if (!open_ok) continue; @@ -335,7 +334,7 @@ bench_burst_churn(bench::state& state) [](tcp_socket& c, tcp::endpoint ep) -> asio::awaitable { co_await c.async_connect(ep, asio::deferred); - }(*clients[i], ep), + }(clients[i], ep), asio::detached); } @@ -346,7 +345,7 @@ bench_burst_churn(bench::state& state) } for (auto& c : clients) - c->close(); + c.close(); for (auto& s : servers) s.close(); } @@ -394,7 +393,7 @@ bench_burst_churn_lockless(bench::state& state) { auto lp = state.lap(); - std::vector> clients; + std::vector clients; std::vector servers; clients.reserve(burst_size); servers.reserve(burst_size); @@ -402,16 +401,16 @@ bench_burst_churn_lockless(bench::state& state) bool open_ok = true; for (int i = 0; i < burst_size; ++i) { - clients.push_back(std::make_unique(ioc)); + clients.emplace_back(ioc.get_executor()); boost::system::error_code ec; - ec = clients.back()->open(tcp::v4(), ec); + ec = clients.back().open(tcp::v4(), ec); if (ec) { clients.clear(); open_ok = false; break; } - configure_churn_socket(*clients.back()); + configure_churn_socket(clients.back()); } if (!open_ok) continue; @@ -423,7 +422,7 @@ bench_burst_churn_lockless(bench::state& state) [](tcp_socket& c, tcp::endpoint ep) -> asio::awaitable { co_await c.async_connect(ep, asio::deferred); - }(*clients[i], ep), + }(clients[i], ep), asio::detached); } @@ -434,7 +433,7 @@ bench_burst_churn_lockless(bench::state& state) } for (auto& c : clients) - c->close(); + c.close(); for (auto& s : servers) s.close(); }