diff --git a/src/perftest_communication.c b/src/perftest_communication.c index 27439c2a..533599a9 100755 --- a/src/perftest_communication.c +++ b/src/perftest_communication.c @@ -1294,9 +1294,10 @@ int rdma_client_connect(struct pingpong_context *ctx,struct perftest_parameters } if (event->event != RDMA_CM_EVENT_ESTABLISHED) { - fprintf(stderr, "Unexpected CM event bl blka %d\n", event->event); + fprintf(stderr, "Unexpected CM event bl blka %s; error: %d.\n", + rdma_event_str(event->event), event->status); rdma_ack_cm_event(event); - return FAILURE; + return FAILURE; } if (user_param->connection_type == UD) { diff --git a/src/perftest_resources.c b/src/perftest_resources.c index b7b7e5b7..18423f8e 100755 --- a/src/perftest_resources.c +++ b/src/perftest_resources.c @@ -110,6 +110,7 @@ static __always_inline int poll_completions( struct perftest_parameters* duration_param; struct check_alive_data check_alive_data; +volatile sig_atomic_t g_sigalarm_fired = 0; /****************************************************************************** * Beginning @@ -6621,6 +6622,7 @@ uint16_t ctx_get_local_lid(struct ibv_context *context,int port) ******************************************************************************/ void catch_alarm(int sig) { + g_sigalarm_fired = 1; switch (duration_param->state) { case START_STATE: duration_param->state = SAMPLE_STATE; @@ -6648,6 +6650,7 @@ void catch_alarm(int sig) void check_alive(int sig) { + g_sigalarm_fired = 1; if (check_alive_data.current_totrcnt > check_alive_data.last_totrcnt) { check_alive_data.last_totrcnt = check_alive_data.current_totrcnt; alarm(60); diff --git a/src/perftest_resources.h b/src/perftest_resources.h index 95c43a04..3ba105f7 100644 --- a/src/perftest_resources.h +++ b/src/perftest_resources.h @@ -72,6 +72,9 @@ #include #include #include +#include +#include +#include #include "perftest_parameters.h" #ifdef HAVE_CUDA @@ -201,6 +204,7 @@ static inline uint64_t build_wr_id(uint32_t wr_index, uint16_t qp_index) return ((uint64_t)wr_index) | ((uint64_t)qp_index << WR_ID_QP_INDEX_OFFSET); } +extern volatile sig_atomic_t g_sigalarm_fired; /****************************************************************************** * Perftest resources Structures and data types. ******************************************************************************/ @@ -942,15 +946,31 @@ static __inline void increase_rem_addr(struct ibv_send_wr *wr,int size,uint64_t static __inline int ctx_notify_send_recv_events(struct pingpong_context *ctx) { fd_set rfds; + int ret; - FD_ZERO(&rfds); - FD_SET(ctx->recv_channel->fd, &rfds); - FD_SET(ctx->send_channel->fd, &rfds); + do { + FD_ZERO(&rfds); + FD_SET(ctx->recv_channel->fd, &rfds); + FD_SET(ctx->send_channel->fd, &rfds); - if (select(MAX(ctx->recv_channel->fd, - ctx->send_channel->fd) + 1, - &rfds, NULL, NULL, NULL) == -1) { - fprintf(stderr, "Failed to get completion events\n"); + g_sigalarm_fired = 0; + + ret = select(MAX(ctx->recv_channel->fd, + ctx->send_channel->fd) + 1, + &rfds, NULL, NULL, NULL); + + if (ret == -1 && errno == EINTR) { + if (g_sigalarm_fired) { + fprintf(stderr, "Confirmed: select() was interrupted by SIGALARM. Retrying...\n"); + } else { + fprintf(stderr, "Warning: select() interrupted by another signal. Retrying...\n"); + } + } + + } while (ret == -1 && errno == EINTR); + + if (ret == -1) { + fprintf(stderr, "Failed to get completion events: %s\n", strerror(errno)); return FAILURE; }