Fix: perform relayd socket pair cleanup on control socket error
authorJonathan Rajotte <jonathan.rajotte-julien@efficios.com>
Wed, 6 Jun 2018 01:00:28 +0000 (21:00 -0400)
committerJonathan Rajotte <jonathan.rajotte-julien@efficios.com>
Fri, 21 Sep 2018 04:15:24 +0000 (00:15 -0400)
A reference to the local context for the socket pair is used to "force" an
evaluation of the data and metadata streams since we changed the endpoint
status. This imitates what is currently done for the data socket.

This prevents hitting network timeouts multiple times in a row when an
error occurs. For now, there is no mechanism for retry hence
"terminating" all communication make sense and prevent unwanted delays
on operation.

Signed-off-by: Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
Signed-off-by: Jérémie Galarneau <jeremie.galarneau@efficios.com>
src/common/consumer/consumer.c

index 0539eeb7be17143a98d7741d5448552d7fe2e862..3556b789760eca81a4074193670d18a902f7607b 100644 (file)
@@ -792,13 +792,12 @@ int consumer_send_relayd_stream(struct lttng_consumer_stream *stream,
                ret = relayd_add_stream(&relayd->control_sock, stream->name,
                                path, &stream->relayd_stream_id,
                                stream->chan->tracefile_size, stream->chan->tracefile_count);
                ret = relayd_add_stream(&relayd->control_sock, stream->name,
                                path, &stream->relayd_stream_id,
                                stream->chan->tracefile_size, stream->chan->tracefile_count);
+               pthread_mutex_unlock(&relayd->ctrl_sock_mutex);
                if (ret < 0) {
                        ERR("Relayd add stream failed. Cleaning up relayd %" PRIu64".", relayd->net_seq_idx);
                        lttng_consumer_cleanup_relayd(relayd);
                if (ret < 0) {
                        ERR("Relayd add stream failed. Cleaning up relayd %" PRIu64".", relayd->net_seq_idx);
                        lttng_consumer_cleanup_relayd(relayd);
-                       pthread_mutex_unlock(&relayd->ctrl_sock_mutex);
                        goto end;
                }
                        goto end;
                }
-               pthread_mutex_unlock(&relayd->ctrl_sock_mutex);
 
                uatomic_inc(&relayd->refcount);
                stream->sent_to_relayd = 1;
 
                uatomic_inc(&relayd->refcount);
                stream->sent_to_relayd = 1;
@@ -836,13 +835,12 @@ int consumer_send_relayd_streams_sent(uint64_t net_seq_idx)
                /* Add stream on the relayd */
                pthread_mutex_lock(&relayd->ctrl_sock_mutex);
                ret = relayd_streams_sent(&relayd->control_sock);
                /* Add stream on the relayd */
                pthread_mutex_lock(&relayd->ctrl_sock_mutex);
                ret = relayd_streams_sent(&relayd->control_sock);
+               pthread_mutex_unlock(&relayd->ctrl_sock_mutex);
                if (ret < 0) {
                        ERR("Relayd streams sent failed. Cleaning up relayd %" PRIu64".", relayd->net_seq_idx);
                        lttng_consumer_cleanup_relayd(relayd);
                if (ret < 0) {
                        ERR("Relayd streams sent failed. Cleaning up relayd %" PRIu64".", relayd->net_seq_idx);
                        lttng_consumer_cleanup_relayd(relayd);
-                       pthread_mutex_unlock(&relayd->ctrl_sock_mutex);
                        goto end;
                }
                        goto end;
                }
-               pthread_mutex_unlock(&relayd->ctrl_sock_mutex);
        } else {
                ERR("Relayd ID %" PRIu64 " unknown. Can't send streams_sent.",
                                net_seq_idx);
        } else {
                ERR("Relayd ID %" PRIu64 " unknown. Can't send streams_sent.",
                                net_seq_idx);
@@ -3640,14 +3638,13 @@ int consumer_data_pending(uint64_t id)
                pthread_mutex_lock(&relayd->ctrl_sock_mutex);
                ret = relayd_begin_data_pending(&relayd->control_sock,
                                relayd->relayd_session_id);
                pthread_mutex_lock(&relayd->ctrl_sock_mutex);
                ret = relayd_begin_data_pending(&relayd->control_sock,
                                relayd->relayd_session_id);
+               pthread_mutex_unlock(&relayd->ctrl_sock_mutex);
                if (ret < 0) {
                        /* Communication error thus the relayd so no data pending. */
                        ERR("Relayd begin data pending failed. Cleaning up relayd %" PRIu64".", relayd->net_seq_idx);
                        lttng_consumer_cleanup_relayd(relayd);
                if (ret < 0) {
                        /* Communication error thus the relayd so no data pending. */
                        ERR("Relayd begin data pending failed. Cleaning up relayd %" PRIu64".", relayd->net_seq_idx);
                        lttng_consumer_cleanup_relayd(relayd);
-                       pthread_mutex_unlock(&relayd->ctrl_sock_mutex);
                        goto data_not_pending;
                }
                        goto data_not_pending;
                }
-               pthread_mutex_unlock(&relayd->ctrl_sock_mutex);
        }
 
        cds_lfht_for_each_entry_duplicate(ht->ht,
        }
 
        cds_lfht_for_each_entry_duplicate(ht->ht,
@@ -3710,13 +3707,12 @@ int consumer_data_pending(uint64_t id)
                pthread_mutex_lock(&relayd->ctrl_sock_mutex);
                ret = relayd_end_data_pending(&relayd->control_sock,
                                relayd->relayd_session_id, &is_data_inflight);
                pthread_mutex_lock(&relayd->ctrl_sock_mutex);
                ret = relayd_end_data_pending(&relayd->control_sock,
                                relayd->relayd_session_id, &is_data_inflight);
+               pthread_mutex_unlock(&relayd->ctrl_sock_mutex);
                if (ret < 0) {
                        ERR("Relayd end data pending failed. Cleaning up relayd %" PRIu64".", relayd->net_seq_idx);
                        lttng_consumer_cleanup_relayd(relayd);
                if (ret < 0) {
                        ERR("Relayd end data pending failed. Cleaning up relayd %" PRIu64".", relayd->net_seq_idx);
                        lttng_consumer_cleanup_relayd(relayd);
-                       pthread_mutex_unlock(&relayd->ctrl_sock_mutex);
                        goto data_not_pending;
                }
                        goto data_not_pending;
                }
-               pthread_mutex_unlock(&relayd->ctrl_sock_mutex);
                if (is_data_inflight) {
                        goto data_pending;
                }
                if (is_data_inflight) {
                        goto data_pending;
                }
This page took 0.028122 seconds and 5 git commands to generate.