Fix deadlock: don't take channel lock in timer
authorMathieu Desnoyers <mathieu.desnoyers@efficios.com>
Tue, 16 Jul 2013 13:53:33 +0000 (09:53 -0400)
committerMathieu Desnoyers <mathieu.desnoyers@efficios.com>
Tue, 16 Jul 2013 18:33:47 +0000 (14:33 -0400)
Reviewed-by: Julien Desfossez <julien.desfossez@efficios.com>
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
src/common/consumer-metadata-cache.c
src/common/consumer-metadata-cache.h
src/common/consumer-stream.c
src/common/consumer-timer.c
src/common/consumer.c
src/common/ust-consumer/ust-consumer.c
src/common/ust-consumer/ust-consumer.h

index 5967a8eb296a51041f754622f65bb0b706ffe393..d597e64e37883c91182bcaf9c54327cd9cc0d54f 100644 (file)
@@ -190,7 +190,7 @@ void consumer_metadata_cache_destroy(struct lttng_consumer_channel *channel)
  * Return 0 if everything has been flushed, 1 if there is data not flushed.
  */
 int consumer_metadata_cache_flushed(struct lttng_consumer_channel *channel,
-               uint64_t offset)
+               uint64_t offset, int timer)
 {
        int ret = 0;
        struct lttng_consumer_stream *metadata_stream;
@@ -199,12 +199,15 @@ int consumer_metadata_cache_flushed(struct lttng_consumer_channel *channel,
        assert(channel->metadata_cache);
 
        /*
-        * XXX This consumer_data.lock should eventually be replaced by
-        * a channel lock. It protects metadata_stream read and endpoint
-        * status check.
+        * If not called from a timer handler, we have to take the
+        * channel lock to be mutually exclusive with channel teardown.
+        * Timer handler does not need to take this lock because it is
+        * already synchronized by timer stop (and, more importantly,
+        * taking this lock in a timer handler would cause a deadlock).
         */
-       pthread_mutex_lock(&consumer_data.lock);
-       pthread_mutex_lock(&channel->lock);
+       if (!timer) {
+               pthread_mutex_lock(&channel->lock);
+       }
        pthread_mutex_lock(&channel->timer_lock);
        pthread_mutex_lock(&channel->metadata_cache->lock);
 
@@ -229,8 +232,9 @@ int consumer_metadata_cache_flushed(struct lttng_consumer_channel *channel,
 
        pthread_mutex_unlock(&channel->metadata_cache->lock);
        pthread_mutex_unlock(&channel->timer_lock);
-       pthread_mutex_unlock(&channel->lock);
-       pthread_mutex_unlock(&consumer_data.lock);
+       if (!timer) {
+               pthread_mutex_unlock(&channel->lock);
+       }
 
        return ret;
 }
index 8f485d6390feda1e399de76348aa15fdf371ef5d..aaf9f24d2a5a978b4c0024d290be86910a1ae0df 100644 (file)
@@ -54,6 +54,6 @@ int consumer_metadata_cache_write(struct lttng_consumer_channel *channel,
 int consumer_metadata_cache_allocate(struct lttng_consumer_channel *channel);
 void consumer_metadata_cache_destroy(struct lttng_consumer_channel *channel);
 int consumer_metadata_cache_flushed(struct lttng_consumer_channel *channel,
-               uint64_t offset);
+               uint64_t offset, int timer);
 
 #endif /* CONSUMER_METADATA_CACHE_H */
index 02887fcc457aaa3deb435d94bfc3ccb04fa2496b..717e0a7351a49461e4702c872fc91914b8efaffe 100644 (file)
@@ -281,7 +281,6 @@ void consumer_stream_destroy(struct lttng_consumer_stream *stream,
                if (stream->globally_visible) {
                        pthread_mutex_lock(&consumer_data.lock);
                        pthread_mutex_lock(&stream->chan->lock);
-                       pthread_mutex_lock(&stream->chan->timer_lock);
                        pthread_mutex_lock(&stream->lock);
                        /* Remove every reference of the stream in the consumer. */
                        consumer_stream_delete(stream, ht);
@@ -295,7 +294,6 @@ void consumer_stream_destroy(struct lttng_consumer_stream *stream,
                        consumer_data.need_update = 1;
 
                        pthread_mutex_unlock(&stream->lock);
-                       pthread_mutex_unlock(&stream->chan->timer_lock);
                        pthread_mutex_unlock(&stream->chan->lock);
                        pthread_mutex_unlock(&consumer_data.lock);
                } else {
index a7f4536a25883cde8bec0aa8f4a3d9a13196e379..37c9861b906a0a81c6b08a302c73e486e6c6d3dd 100644 (file)
@@ -84,15 +84,15 @@ static void metadata_switch_timer(struct lttng_consumer_local_data *ctx,
                 *   - Calling lttng_ustconsumer_recv_metadata():
                 *     - channel->metadata_cache->lock
                 *     - Calling consumer_metadata_cache_flushed():
-                *       - consumer_data.lock
-                *         - channel->lock
-                *           - channel->metadata_cache->lock
+                *       - channel->timer_lock
+                *         - channel->metadata_cache->lock
                 *
-                * Both consumer_data.lock and channel->lock currently
-                * cause a deadlock, since they are held while
-                * consumer_timer_switch_stop() is called.
+                * Ensure that neither consumer_data.lock nor
+                * channel->lock are taken within this function, since
+                * they are held while consumer_timer_switch_stop() is
+                * called.
                 */
-               ret = lttng_ustconsumer_request_metadata(ctx, channel);
+               ret = lttng_ustconsumer_request_metadata(ctx, channel, 1);
                if (ret < 0) {
                        channel->switch_timer_error = 1;
                }
index a0fb2384cd566fe59b58bbca771b3a279f8670c2..6cf85d96a75b0afa69994886d9a94d91a7faaa29 100644 (file)
@@ -292,7 +292,6 @@ void consumer_del_channel(struct lttng_consumer_channel *channel)
 
        pthread_mutex_lock(&consumer_data.lock);
        pthread_mutex_lock(&channel->lock);
-       pthread_mutex_lock(&channel->timer_lock);
 
        /* Delete streams that might have been left in the stream list. */
        cds_list_for_each_entry_safe(stream, stmp, &channel->streams.head,
@@ -326,7 +325,6 @@ void consumer_del_channel(struct lttng_consumer_channel *channel)
 
        call_rcu(&channel->node.head, free_channel_rcu);
 end:
-       pthread_mutex_unlock(&channel->timer_lock);
        pthread_mutex_unlock(&channel->lock);
        pthread_mutex_unlock(&consumer_data.lock);
 }
@@ -1869,7 +1867,6 @@ void consumer_del_metadata_stream(struct lttng_consumer_stream *stream,
 
        pthread_mutex_lock(&consumer_data.lock);
        pthread_mutex_lock(&stream->chan->lock);
-       pthread_mutex_lock(&stream->chan->timer_lock);
        pthread_mutex_lock(&stream->lock);
 
        switch (consumer_data.type) {
@@ -1964,13 +1961,12 @@ void consumer_del_metadata_stream(struct lttng_consumer_stream *stream,
 end:
        /*
         * Nullify the stream reference so it is not used after deletion. The
-        * consumer data lock MUST be acquired before being able to check for a
-        * NULL pointer value.
+        * channel lock MUST be acquired before being able to check for
+        * NULL pointer value.
         */
        stream->chan->metadata_stream = NULL;
 
        pthread_mutex_unlock(&stream->lock);
-       pthread_mutex_unlock(&stream->chan->timer_lock);
        pthread_mutex_unlock(&stream->chan->lock);
        pthread_mutex_unlock(&consumer_data.lock);
 
index 61b220ee012a76930d3149be0eba07ba2fef919b..f8c02ee72cf03adf6eeca28548aa4f0aff98afcb 100644 (file)
@@ -614,7 +614,6 @@ static int close_metadata(uint64_t chan_key)
 
        pthread_mutex_lock(&consumer_data.lock);
        pthread_mutex_lock(&channel->lock);
-       pthread_mutex_lock(&channel->timer_lock);
 
        if (cds_lfht_is_node_deleted(&channel->node.node)) {
                goto error_unlock;
@@ -642,7 +641,6 @@ static int close_metadata(uint64_t chan_key)
        }
 
 error_unlock:
-       pthread_mutex_unlock(&channel->timer_lock);
        pthread_mutex_unlock(&channel->lock);
        pthread_mutex_unlock(&consumer_data.lock);
 error:
@@ -759,7 +757,7 @@ static int snapshot_metadata(uint64_t key, char *path, uint64_t relayd_id,
         * Ask the sessiond if we have new metadata waiting and update the
         * consumer metadata cache.
         */
-       ret = lttng_ustconsumer_request_metadata(ctx, metadata_channel);
+       ret = lttng_ustconsumer_request_metadata(ctx, metadata_channel, 0);
        if (ret < 0) {
                goto error;
        }
@@ -982,7 +980,8 @@ error:
  * Receive the metadata updates from the sessiond.
  */
 int lttng_ustconsumer_recv_metadata(int sock, uint64_t key, uint64_t offset,
-               uint64_t len, struct lttng_consumer_channel *channel)
+               uint64_t len, struct lttng_consumer_channel *channel,
+               int timer)
 {
        int ret, ret_code = LTTNG_OK;
        char *metadata_str;
@@ -1019,7 +1018,7 @@ int lttng_ustconsumer_recv_metadata(int sock, uint64_t key, uint64_t offset,
        }
        pthread_mutex_unlock(&channel->metadata_cache->lock);
 
-       while (consumer_metadata_cache_flushed(channel, offset + len)) {
+       while (consumer_metadata_cache_flushed(channel, offset + len, timer)) {
                DBG("Waiting for metadata to be flushed");
                usleep(DEFAULT_METADATA_AVAILABILITY_WAIT_TIME);
        }
@@ -1354,7 +1353,7 @@ int lttng_ustconsumer_recv_cmd(struct lttng_consumer_local_data *ctx,
                }
 
                ret = lttng_ustconsumer_recv_metadata(sock, key, offset,
-                               len, channel);
+                               len, channel, 0);
                if (ret < 0) {
                        /* error receiving from sessiond */
                        goto error_fatal;
@@ -1813,7 +1812,7 @@ void lttng_ustconsumer_close_stream_wakeup(struct lttng_consumer_stream *stream)
  * introduces deadlocks.
  */
 int lttng_ustconsumer_request_metadata(struct lttng_consumer_local_data *ctx,
-               struct lttng_consumer_channel *channel)
+               struct lttng_consumer_channel *channel, int timer)
 {
        struct lttcomm_metadata_request_msg request;
        struct lttcomm_consumer_msg msg;
@@ -1901,7 +1900,7 @@ int lttng_ustconsumer_request_metadata(struct lttng_consumer_local_data *ctx,
        }
 
        ret_code = lttng_ustconsumer_recv_metadata(ctx->consumer_metadata_socket,
-                       key, offset, len, channel);
+                       key, offset, len, channel, timer);
        if (ret_code >= 0) {
                /*
                 * Only send the status msg if the sessiond is alive meaning a positive
index e65acac0b99a25630688ceb5ae3cec7650dac80b..c10cd13a2876152ed305c94c72ec64154276a373 100644 (file)
@@ -54,9 +54,10 @@ int lttng_ustconsumer_data_pending(struct lttng_consumer_stream *stream);
 void lttng_ustconsumer_close_metadata(struct lttng_ht *ht);
 void lttng_ustconsumer_close_stream_wakeup(struct lttng_consumer_stream *stream);
 int lttng_ustconsumer_recv_metadata(int sock, uint64_t key, uint64_t offset,
-               uint64_t len, struct lttng_consumer_channel *channel);
+               uint64_t len, struct lttng_consumer_channel *channel,
+               int timer);
 int lttng_ustconsumer_request_metadata(struct lttng_consumer_local_data *ctx,
-               struct lttng_consumer_channel *channel);
+               struct lttng_consumer_channel *channel, int timer);
 
 #else /* HAVE_LIBLTTNG_UST_CTL */
 
@@ -164,13 +165,14 @@ void lttng_ustconsumer_close_stream_wakeup(struct lttng_consumer_stream *stream)
 }
 static inline
 int lttng_ustconsumer_recv_metadata(int sock, uint64_t key, uint64_t offset,
-               uint64_t len, struct lttng_consumer_channel *channel)
+               uint64_t len, struct lttng_consumer_channel *channel,
+               int timer)
 {
        return -ENOSYS;
 }
 static inline
 int lttng_ustconsumer_request_metadata(struct lttng_consumer_local_data *ctx,
-               struct lttng_consumer_channel *channel)
+               struct lttng_consumer_channel *channel, int timer)
 {
        return -ENOSYS;
 }
This page took 0.033219 seconds and 5 git commands to generate.