From 8bcf120eff0f6f94d04d4bd6db6b337fed495723 Mon Sep 17 00:00:00 2001 From: =?utf8?q?J=C3=A9r=C3=A9mie=20Galarneau?= Date: Fri, 6 Jul 2018 19:14:43 -0400 Subject: [PATCH 01/16] Fix: remove inode from inode registry ht MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Signed-off-by: Jérémie Galarneau --- src/common/fd-tracker/inode.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/common/fd-tracker/inode.c b/src/common/fd-tracker/inode.c index e0d8bbfcc..f02f0a812 100644 --- a/src/common/fd-tracker/inode.c +++ b/src/common/fd-tracker/inode.c @@ -103,6 +103,9 @@ void lttng_inode_destroy(struct lttng_inode *inode) PERROR("Failed to unlink %s", inode->path); } } + rcu_read_lock(); + cds_lfht_del(inode->registry_ht, &inode->registry_node); + rcu_read_unlock(); call_rcu(&inode->rcu_head, lttng_inode_delete); } @@ -249,6 +252,7 @@ struct lttng_inode *lttng_inode_create(const struct inode_id *id, cds_lfht_node_init(&inode->registry_node); inode->id = *id; inode->path = strdup(path); + inode->registry_ht = ht; if (!inode->path) { goto error; } -- 2.34.1 From 2292792212c7f3c9d9bdef413f4230d9ea70ab68 Mon Sep 17 00:00:00 2001 From: Jonathan Rajotte Date: Mon, 9 Jul 2018 11:32:37 -0400 Subject: [PATCH 02/16] Rev5: Update extra_version information --- version/extra_version_description | 1 + version/extra_version_name | 1 + 2 files changed, 2 insertions(+) create mode 100644 version/extra_version_description create mode 100644 version/extra_version_name diff --git a/version/extra_version_description b/version/extra_version_description new file mode 100644 index 000000000..97ee0a857 --- /dev/null +++ b/version/extra_version_description @@ -0,0 +1 @@ +EfficiOS Revision 5 diff --git a/version/extra_version_name b/version/extra_version_name new file mode 100644 index 000000000..b29e2ee0a --- /dev/null +++ b/version/extra_version_name @@ -0,0 +1 @@ +Backport - 2.9.9 -- 2.34.1 From 089623df79c3215cb00d8f35aeb82eea1914eca4 Mon Sep 17 00:00:00 2001 From: =?utf8?q?J=C3=A9r=C3=A9mie=20Galarneau?= Date: Thu, 13 Sep 2018 17:04:45 -0400 Subject: [PATCH 03/16] Fix: relayd control socket mutex is not destroyed MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Signed-off-by: Jérémie Galarneau --- src/common/consumer/consumer.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/common/consumer/consumer.c b/src/common/consumer/consumer.c index c2a2b1587..0539eeb7b 100644 --- a/src/common/consumer/consumer.c +++ b/src/common/consumer/consumer.c @@ -320,6 +320,7 @@ static void free_relayd_rcu(struct rcu_head *head) (void) relayd_close(&relayd->control_sock); (void) relayd_close(&relayd->data_sock); + pthread_mutex_destroy(&relayd->ctrl_sock_mutex); free(relayd); } -- 2.34.1 From 0027222c507a42315b0c1ed82e02f7e1718014d6 Mon Sep 17 00:00:00 2001 From: Jonathan Rajotte Date: Tue, 5 Jun 2018 21:00:28 -0400 Subject: [PATCH 04/16] Fix: perform relayd socket pair cleanup on control socket error MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit A reference to the local context for the socket pair is used to "force" an evaluation of the data and metadata streams since we changed the endpoint status. This imitates what is currently done for the data socket. This prevents hitting network timeouts multiple times in a row when an error occurs. For now, there is no mechanism for retry hence "terminating" all communication make sense and prevent unwanted delays on operation. Signed-off-by: Jonathan Rajotte Signed-off-by: Jérémie Galarneau --- src/common/consumer/consumer.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/common/consumer/consumer.c b/src/common/consumer/consumer.c index 0539eeb7b..3556b7897 100644 --- a/src/common/consumer/consumer.c +++ b/src/common/consumer/consumer.c @@ -792,13 +792,12 @@ int consumer_send_relayd_stream(struct lttng_consumer_stream *stream, ret = relayd_add_stream(&relayd->control_sock, stream->name, path, &stream->relayd_stream_id, stream->chan->tracefile_size, stream->chan->tracefile_count); + pthread_mutex_unlock(&relayd->ctrl_sock_mutex); if (ret < 0) { ERR("Relayd add stream failed. Cleaning up relayd %" PRIu64".", relayd->net_seq_idx); lttng_consumer_cleanup_relayd(relayd); - pthread_mutex_unlock(&relayd->ctrl_sock_mutex); goto end; } - pthread_mutex_unlock(&relayd->ctrl_sock_mutex); uatomic_inc(&relayd->refcount); stream->sent_to_relayd = 1; @@ -836,13 +835,12 @@ int consumer_send_relayd_streams_sent(uint64_t net_seq_idx) /* Add stream on the relayd */ pthread_mutex_lock(&relayd->ctrl_sock_mutex); ret = relayd_streams_sent(&relayd->control_sock); + pthread_mutex_unlock(&relayd->ctrl_sock_mutex); if (ret < 0) { ERR("Relayd streams sent failed. Cleaning up relayd %" PRIu64".", relayd->net_seq_idx); lttng_consumer_cleanup_relayd(relayd); - pthread_mutex_unlock(&relayd->ctrl_sock_mutex); goto end; } - pthread_mutex_unlock(&relayd->ctrl_sock_mutex); } else { ERR("Relayd ID %" PRIu64 " unknown. Can't send streams_sent.", net_seq_idx); @@ -3640,14 +3638,13 @@ int consumer_data_pending(uint64_t id) pthread_mutex_lock(&relayd->ctrl_sock_mutex); ret = relayd_begin_data_pending(&relayd->control_sock, relayd->relayd_session_id); + pthread_mutex_unlock(&relayd->ctrl_sock_mutex); if (ret < 0) { /* Communication error thus the relayd so no data pending. */ ERR("Relayd begin data pending failed. Cleaning up relayd %" PRIu64".", relayd->net_seq_idx); lttng_consumer_cleanup_relayd(relayd); - pthread_mutex_unlock(&relayd->ctrl_sock_mutex); goto data_not_pending; } - pthread_mutex_unlock(&relayd->ctrl_sock_mutex); } cds_lfht_for_each_entry_duplicate(ht->ht, @@ -3710,13 +3707,12 @@ int consumer_data_pending(uint64_t id) pthread_mutex_lock(&relayd->ctrl_sock_mutex); ret = relayd_end_data_pending(&relayd->control_sock, relayd->relayd_session_id, &is_data_inflight); + pthread_mutex_unlock(&relayd->ctrl_sock_mutex); if (ret < 0) { ERR("Relayd end data pending failed. Cleaning up relayd %" PRIu64".", relayd->net_seq_idx); lttng_consumer_cleanup_relayd(relayd); - pthread_mutex_unlock(&relayd->ctrl_sock_mutex); goto data_not_pending; } - pthread_mutex_unlock(&relayd->ctrl_sock_mutex); if (is_data_inflight) { goto data_pending; } -- 2.34.1 From 7c4de72fc87029fc79fdb4b1585a3c326fc9d735 Mon Sep 17 00:00:00 2001 From: Jonathan Rajotte Date: Fri, 7 Sep 2018 15:18:37 -0400 Subject: [PATCH 05/16] Fix: use LTTNG_VIEWER_ATTACH_UNK to report a closed session MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit LTTNG_VIEWER_NEW_STREAMS_HUP is not a valid error number for the LTTNG_VIEWER_ATTACH_SESSION command. This result in erroneous error reporting on the client side. Signed-off-by: Jonathan Rajotte Signed-off-by: Jérémie Galarneau --- src/bin/lttng-relayd/live.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bin/lttng-relayd/live.c b/src/bin/lttng-relayd/live.c index 066dc879e..d0b8abdae 100644 --- a/src/bin/lttng-relayd/live.c +++ b/src/bin/lttng-relayd/live.c @@ -1170,7 +1170,7 @@ int viewer_attach_session(struct relay_connection *conn) if (closed) { send_streams = 0; response.streams_count = 0; - response.status = htobe32(LTTNG_VIEWER_NEW_STREAMS_HUP); + response.status = htobe32(LTTNG_VIEWER_ATTACH_UNK); goto send_reply; } -- 2.34.1 From 4213c2279637b397d7be3764288b5b26749ad6d1 Mon Sep 17 00:00:00 2001 From: Jonathan Rajotte Date: Fri, 7 Sep 2018 15:18:38 -0400 Subject: [PATCH 06/16] Fix: skip closed session on viewer listing MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit There is no value in listing a closed session. A viewer cannot hook itself to a closed session in live mode and the session is about to be removed from the sessions hash table. Signed-off-by: Jonathan Rajotte Signed-off-by: Jérémie Galarneau --- src/bin/lttng-relayd/live.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/bin/lttng-relayd/live.c b/src/bin/lttng-relayd/live.c index d0b8abdae..3921f251f 100644 --- a/src/bin/lttng-relayd/live.c +++ b/src/bin/lttng-relayd/live.c @@ -916,6 +916,11 @@ int viewer_list_sessions(struct relay_connection *conn) health_code_update(); + if (session->connection_closed) { + /* Skip closed session */ + continue; + } + if (count >= buf_count) { struct lttng_viewer_session *newbuf; uint32_t new_buf_count = buf_count << 1; -- 2.34.1 From a34de389971fdcb6d367efab780710a2f8e1c703 Mon Sep 17 00:00:00 2001 From: =?utf8?q?J=C3=A9r=C3=A9mie=20Galarneau?= Date: Mon, 10 Sep 2018 20:09:12 -0400 Subject: [PATCH 07/16] Fix: acquire stream lock during kernel metadata snapshot MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The stream lock is not taken when interacting with the kernel metadata stream that is created at the time a snapshot is taken. This was noticed while reviewing the code for an unrelated reason, so there is no known problem caused by this. Nevertheless, this is incorrect as the stream is globally visible in the consumer. Moreover, the stream was not cleaned-up which can cause a leak whenever a metadata snapshot fails. Signed-off-by: Jérémie Galarneau Signed-off-by: Jonathan Rajotte --- src/common/kernel-consumer/kernel-consumer.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/common/kernel-consumer/kernel-consumer.c b/src/common/kernel-consumer/kernel-consumer.c index 0afa143a5..ccca69e0e 100644 --- a/src/common/kernel-consumer/kernel-consumer.c +++ b/src/common/kernel-consumer/kernel-consumer.c @@ -331,7 +331,7 @@ end: * * Returns 0 on success, < 0 on error */ -int lttng_kconsumer_snapshot_metadata(uint64_t key, char *path, +static int lttng_kconsumer_snapshot_metadata(uint64_t key, char *path, uint64_t relayd_id, struct lttng_consumer_local_data *ctx) { int ret, use_relayd = 0; @@ -350,11 +350,12 @@ int lttng_kconsumer_snapshot_metadata(uint64_t key, char *path, if (!metadata_channel) { ERR("Kernel snapshot metadata not found for key %" PRIu64, key); ret = -1; - goto error; + goto error_no_channel; } metadata_stream = metadata_channel->metadata_stream; assert(metadata_stream); + pthread_mutex_lock(&metadata_stream->lock); /* Flag once that we have a valid relayd for the stream. */ if (relayd_id != (uint64_t) -1ULL) { @@ -364,7 +365,7 @@ int lttng_kconsumer_snapshot_metadata(uint64_t key, char *path, if (use_relayd) { ret = consumer_send_relayd_stream(metadata_stream, path); if (ret < 0) { - goto error; + goto error_snapshot; } } else { ret = utils_create_stream_file(path, metadata_stream->name, @@ -372,7 +373,7 @@ int lttng_kconsumer_snapshot_metadata(uint64_t key, char *path, metadata_stream->tracefile_count_current, metadata_stream->uid, metadata_stream->gid, NULL); if (ret < 0) { - goto error; + goto error_snapshot; } metadata_stream->out_fd = ret; } @@ -385,7 +386,8 @@ int lttng_kconsumer_snapshot_metadata(uint64_t key, char *path, if (ret_read != -EAGAIN) { ERR("Kernel snapshot reading metadata subbuffer (ret: %zd)", ret_read); - goto error; + ret = ret_read; + goto error_snapshot; } /* ret_read is negative at this point so we will exit the loop. */ continue; @@ -410,11 +412,12 @@ int lttng_kconsumer_snapshot_metadata(uint64_t key, char *path, } ret = 0; - +error_snapshot: + pthread_mutex_unlock(&metadata_stream->lock); cds_list_del(&metadata_stream->send_node); consumer_stream_destroy(metadata_stream, NULL); metadata_channel->metadata_stream = NULL; -error: +error_no_channel: rcu_read_unlock(); return ret; } -- 2.34.1 From ef749d37dd98b5b22ac6588cd6cc9b86c7c04f7f Mon Sep 17 00:00:00 2001 From: Jonathan Rajotte Date: Mon, 10 Sep 2018 20:09:13 -0400 Subject: [PATCH 08/16] Fix: skip uid registry when metadata key is 0 MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit A value of zero for the metadata key indicate that metadata was never created/pushed to the consumer. This can occur in scenario were a tracker is present since metadata might never be created/pushed. Signed-off-by: Jonathan Rajotte Signed-off-by: Jérémie Galarneau --- src/bin/lttng-sessiond/ust-app.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/bin/lttng-sessiond/ust-app.c b/src/bin/lttng-sessiond/ust-app.c index ed9c9fe35..d3369fe39 100644 --- a/src/bin/lttng-sessiond/ust-app.c +++ b/src/bin/lttng-sessiond/ust-app.c @@ -5866,6 +5866,11 @@ int ust_app_snapshot_record(struct ltt_ust_session *usess, struct buffer_reg_channel *reg_chan; struct consumer_socket *socket; + if (!reg->registry->reg.ust->metadata_key) { + /* Skip since no metadata is present */ + continue; + } + /* Get consumer socket to use to push the metadata.*/ socket = consumer_find_socket_by_bitness(reg->bits_per_long, usess->consumer); -- 2.34.1 From 097da1af8827ae4c038e0ede0f4efbe339e9b9fd Mon Sep 17 00:00:00 2001 From: Jonathan Rajotte Date: Mon, 10 Sep 2018 20:09:14 -0400 Subject: [PATCH 09/16] Fix: holding the stream lock does not equate to having data pending MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The live timer can hold the stream lock while sending empty beacon. An empty beacon does not mean that data is still pending for the stream. Signed-off-by: Jonathan Rajotte Signed-off-by: Jérémie Galarneau --- src/common/consumer/consumer.c | 34 +--------------------------------- 1 file changed, 1 insertion(+), 33 deletions(-) diff --git a/src/common/consumer/consumer.c b/src/common/consumer/consumer.c index 3556b7897..7b87a85fe 100644 --- a/src/common/consumer/consumer.c +++ b/src/common/consumer/consumer.c @@ -3538,34 +3538,6 @@ error_nosignal: return ret; } -/* - * Try to lock the stream mutex. - * - * On success, 1 is returned else 0 indicating that the mutex is NOT lock. - */ -static int stream_try_lock(struct lttng_consumer_stream *stream) -{ - int ret; - - assert(stream); - - /* - * Try to lock the stream mutex. On failure, we know that the stream is - * being used else where hence there is data still being extracted. - */ - ret = pthread_mutex_trylock(&stream->lock); - if (ret) { - /* For both EBUSY and EINVAL error, the mutex is NOT locked. */ - ret = 0; - goto end; - } - - ret = 1; - -end: - return ret; -} - /* * Search for a relayd associated to the session id and return the reference. * @@ -3651,11 +3623,7 @@ int consumer_data_pending(uint64_t id) ht->hash_fct(&id, lttng_ht_seed), ht->match_fct, &id, &iter.iter, stream, node_session_id.node) { - /* If this call fails, the stream is being used hence data pending. */ - ret = stream_try_lock(stream); - if (!ret) { - goto data_pending; - } + pthread_mutex_lock(&stream->lock); /* * A removed node from the hash table indicates that the stream has -- 2.34.1 From c27c3f5a6a9ead7ffc150a557354cbc644c17dc6 Mon Sep 17 00:00:00 2001 From: Jonathan Rajotte Date: Mon, 10 Sep 2018 20:09:11 -0400 Subject: [PATCH 10/16] Fix: double put on error path MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Let relay_index_try_flush be responsible for the self-reference put on error path. Code flow of relay_index_try_flush is a bit tricky but the only error flow (via relay_index_file_write) will always mark the index as flushed and perform the self-reference put. Signed-off-by: Jonathan Rajotte Signed-off-by: Jérémie Galarneau --- src/bin/lttng-relayd/main.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/src/bin/lttng-relayd/main.c b/src/bin/lttng-relayd/main.c index 87a22c525..7f091c2b7 100644 --- a/src/bin/lttng-relayd/main.c +++ b/src/bin/lttng-relayd/main.c @@ -2286,8 +2286,13 @@ static int relay_recv_index(const struct lttcomm_relayd_hdr *recv_hdr, /* no flush. */ ret = 0; } else { + /* + * ret < 0 + * + * relay_index_try_flush is responsible for the self-reference + * put of the index object on error. + */ ERR("relay_index_try_flush error %d", ret); - relay_index_put(index); ret = -1; } @@ -2691,9 +2696,13 @@ static int handle_index_data(struct relay_stream *stream, uint64_t net_seq_num, /* No flush. */ ret = 0; } else { - /* Put self-ref for this index due to error. */ - relay_index_put(index); - index = NULL; + /* + * ret < 0 + * + * relay_index_try_flush is responsible for the self-reference + * put of the index object on error. + */ + ERR("relay_index_try_flush error %d", ret); ret = -1; } end: -- 2.34.1 From f5a2cd657eb3a81a8788438d796e906148a1b3b1 Mon Sep 17 00:00:00 2001 From: Jonathan Rajotte Date: Wed, 12 Sep 2018 11:55:57 -0400 Subject: [PATCH 11/16] fd-tracker Fix: error path lead to null pointer dereference of handle Upstream status: pending review and upstream merge of the fd-tracker feature. Signed-off-by: Jonathan Rajotte --- src/common/fd-tracker/fd-tracker.c | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/src/common/fd-tracker/fd-tracker.c b/src/common/fd-tracker/fd-tracker.c index 8af38c29c..2cf26f723 100644 --- a/src/common/fd-tracker/fd-tracker.c +++ b/src/common/fd-tracker/fd-tracker.c @@ -500,18 +500,17 @@ struct fs_handle *fd_tracker_open_fs_handle(struct fd_tracker *tracker, if (tracker->count.suspendable.active > 0) { ret = fd_tracker_suspend_handles(tracker, 1); if (ret) { - goto error_destroy; + goto end; } } else { /* * There are not enough active suspendable file - * descriptors to open a new fd and still accomodate the - * tracker's capacity. + * descriptors to open a new fd and still accommodate + * the tracker's capacity. */ WARN("Cannot open file system handle, too many unsuspendable file descriptors are opened (%u)", tracker->count.unsuspendable); - ret = -EMFILE; - goto error_destroy; + goto end; } } @@ -524,15 +523,13 @@ struct fs_handle *fd_tracker_open_fs_handle(struct fd_tracker *tracker, ret = pthread_mutex_init(&handle->lock, NULL); if (ret) { PERROR("Failed to initialize handle mutex while creating fs handle"); - free(handle); - goto error_free; + goto error_mutex_init; } handle->fd = open_from_properties(path, &properties); if (handle->fd < 0) { PERROR("Failed to open fs handle to %s, open() returned", path); - ret = -errno; - goto error_destroy; + goto error; } handle->properties = properties; @@ -542,28 +539,26 @@ struct fs_handle *fd_tracker_open_fs_handle(struct fd_tracker *tracker, if (!handle->inode) { ERR("Failed to get lttng_inode corresponding to file %s", path); - goto error_destroy; + goto error; } if (fstat(handle->fd, &fd_stat)) { PERROR("Failed to retrieve file descriptor inode while creating fs handle, fstat() returned"); - ret = -errno; - goto error_destroy; + goto error; } handle->ino = fd_stat.st_ino; fd_tracker_track(tracker, handle); - pthread_mutex_unlock(&tracker->lock); end: + pthread_mutex_unlock(&tracker->lock); return handle; -error_destroy: - pthread_mutex_destroy(&handle->lock); -error_free: +error: if (handle->inode) { lttng_inode_put(handle->inode); } + pthread_mutex_destroy(&handle->lock); +error_mutex_init: free(handle); - pthread_mutex_unlock(&tracker->lock); handle = NULL; goto end; } -- 2.34.1 From 9e3d760f2bf927be2c59d0ccb3b64e2663ae7263 Mon Sep 17 00:00:00 2001 From: Jonathan Rajotte Date: Sun, 5 Aug 2018 21:38:10 -0400 Subject: [PATCH 12/16] fd-tracker Fix: do not warn on index file not found Upstream status pending on fd-tracker merge Signed-off-by: Jonathan Rajotte --- src/bin/lttng-relayd/index-file.c | 10 ++++++++-- src/common/fd-tracker/fd-tracker.c | 18 +++++++++++++++--- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/src/bin/lttng-relayd/index-file.c b/src/bin/lttng-relayd/index-file.c index 04dac4068..4a69f39bc 100644 --- a/src/bin/lttng-relayd/index-file.c +++ b/src/bin/lttng-relayd/index-file.c @@ -65,8 +65,9 @@ int unlink_through_handle(const char *path) DBG("Unlinking index at %s through a filesystem handle", path); handle = fd_tracker_open_fs_handle(the_fd_tracker, path, flags, NULL); if (!handle) { - /* There is nothing to do. */ - DBG("File %s does not exist, ignoring unlink", path); + if (errno == ENOENT) { + DBG("File %s does not exist, ignoring unlink", path); + } goto end; } @@ -151,12 +152,14 @@ struct relay_index_file *relay_index_file_create(const char *path_name, fs_handle = fd_tracker_open_fs_handle(the_fd_tracker, idx_file_path, flags, &mode); if (!fs_handle) { + PERROR("Failed to open index file at %s", idx_file_path); goto error; } index_file->handle = fs_handle; fd = fs_handle_get_fd(fs_handle); if (fd < 0) { + PERROR("Failed to get fd of index file at %s", idx_file_path); goto error; } @@ -302,6 +305,7 @@ int relay_index_file_write(const struct relay_index_file *index_file, fd = fs_handle_get_fd(index_file->handle); if (fd < 0) { + PERROR("Failed to get fd from handle"); ret = fd; goto end; } @@ -329,6 +333,7 @@ int relay_index_file_read(const struct relay_index_file *index_file, fd = fs_handle_get_fd(index_file->handle); if (fd < 0) { + PERROR("Failed to get fd of handle %p", index_file->handle); ret = fd; goto end; } @@ -352,6 +357,7 @@ int relay_index_file_seek_end(struct relay_index_file *index_file) fd = fs_handle_get_fd(index_file->handle); if (fd < 0) { + PERROR("Failed to get fd of handle %p", index_file->handle); ret = fd; goto end; } diff --git a/src/common/fd-tracker/fd-tracker.c b/src/common/fd-tracker/fd-tracker.c index 2cf26f723..eddf5ada6 100644 --- a/src/common/fd-tracker/fd-tracker.c +++ b/src/common/fd-tracker/fd-tracker.c @@ -305,9 +305,9 @@ int fs_handle_restore(struct fs_handle *handle) ret = open_from_properties(path, &handle->properties); if (ret < 0) { + errno = -ret; PERROR("Failed to restore filesystem handle to %s, open() failed", path); - ret = -errno; goto end; } fd = ret; @@ -483,10 +483,13 @@ end: return ret; } +/* + * If return NULL check errno for error. + */ struct fs_handle *fd_tracker_open_fs_handle(struct fd_tracker *tracker, const char *path, int flags, mode_t *mode) { - int ret; + int ret = 0; struct fs_handle *handle = NULL; struct stat fd_stat; struct open_properties properties = { @@ -500,6 +503,8 @@ struct fs_handle *fd_tracker_open_fs_handle(struct fd_tracker *tracker, if (tracker->count.suspendable.active > 0) { ret = fd_tracker_suspend_handles(tracker, 1); if (ret) { + ERR("Suspend handled failed"); + ret = EMFILE; goto end; } } else { @@ -510,12 +515,14 @@ struct fs_handle *fd_tracker_open_fs_handle(struct fd_tracker *tracker, */ WARN("Cannot open file system handle, too many unsuspendable file descriptors are opened (%u)", tracker->count.unsuspendable); + ret = EMFILE; goto end; } } handle = zmalloc(sizeof(*handle)); if (!handle) { + ret = ENOMEM; goto end; } handle->tracker = tracker; @@ -523,12 +530,14 @@ struct fs_handle *fd_tracker_open_fs_handle(struct fd_tracker *tracker, ret = pthread_mutex_init(&handle->lock, NULL); if (ret) { PERROR("Failed to initialize handle mutex while creating fs handle"); + ret = errno; goto error_mutex_init; } handle->fd = open_from_properties(path, &properties); if (handle->fd < 0) { - PERROR("Failed to open fs handle to %s, open() returned", path); + /* ret contains -errno on error. */ + ret = -ret; goto error; } @@ -537,6 +546,7 @@ struct fs_handle *fd_tracker_open_fs_handle(struct fd_tracker *tracker, handle->inode = lttng_inode_registry_get_inode(tracker->inode_registry, handle->fd, path); if (!handle->inode) { + ret = errno; ERR("Failed to get lttng_inode corresponding to file %s", path); goto error; @@ -544,6 +554,7 @@ struct fs_handle *fd_tracker_open_fs_handle(struct fd_tracker *tracker, if (fstat(handle->fd, &fd_stat)) { PERROR("Failed to retrieve file descriptor inode while creating fs handle, fstat() returned"); + ret = errno; goto error; } handle->ino = fd_stat.st_ino; @@ -551,6 +562,7 @@ struct fs_handle *fd_tracker_open_fs_handle(struct fd_tracker *tracker, fd_tracker_track(tracker, handle); end: pthread_mutex_unlock(&tracker->lock); + errno = ret; return handle; error: if (handle->inode) { -- 2.34.1 From 86a047c71886157d4e4c292f295b09ae70391097 Mon Sep 17 00:00:00 2001 From: Jonathan Rajotte Date: Sun, 22 Jul 2018 23:38:34 -0400 Subject: [PATCH 13/16] Perform local data pending check then relayd Signed-off-by: Jonathan Rajotte --- src/common/consumer/consumer.c | 61 ++++++++++++++++------------------ 1 file changed, 29 insertions(+), 32 deletions(-) diff --git a/src/common/consumer/consumer.c b/src/common/consumer/consumer.c index 7b87a85fe..482d4a63b 100644 --- a/src/common/consumer/consumer.c +++ b/src/common/consumer/consumer.c @@ -3604,21 +3604,6 @@ int consumer_data_pending(uint64_t id) /* Ease our life a bit */ ht = consumer_data.stream_list_ht; - relayd = find_relayd_by_session_id(id); - if (relayd) { - /* Send init command for data pending. */ - pthread_mutex_lock(&relayd->ctrl_sock_mutex); - ret = relayd_begin_data_pending(&relayd->control_sock, - relayd->relayd_session_id); - pthread_mutex_unlock(&relayd->ctrl_sock_mutex); - if (ret < 0) { - /* Communication error thus the relayd so no data pending. */ - ERR("Relayd begin data pending failed. Cleaning up relayd %" PRIu64".", relayd->net_seq_idx); - lttng_consumer_cleanup_relayd(relayd); - goto data_not_pending; - } - } - cds_lfht_for_each_entry_duplicate(ht->ht, ht->hash_fct(&id, lttng_ht_seed), ht->match_fct, &id, @@ -3641,9 +3626,29 @@ int consumer_data_pending(uint64_t id) } } - /* Relayd check */ - if (relayd) { - pthread_mutex_lock(&relayd->ctrl_sock_mutex); + pthread_mutex_unlock(&stream->lock); + } + + relayd = find_relayd_by_session_id(id); + if (relayd) { + unsigned int is_data_inflight = 0; + + /* Send init command for data pending. */ + pthread_mutex_lock(&relayd->ctrl_sock_mutex); + ret = relayd_begin_data_pending(&relayd->control_sock, + relayd->relayd_session_id); + if (ret < 0) { + pthread_mutex_unlock(&relayd->ctrl_sock_mutex); + /* Communication error thus the relayd so no data pending. */ + ERR("Relayd begin data pending failed. Cleaning up relayd %" PRIu64".", relayd->net_seq_idx); + lttng_consumer_cleanup_relayd(relayd); + goto data_not_pending; + } + + cds_lfht_for_each_entry_duplicate(ht->ht, + ht->hash_fct(&id, lttng_ht_seed), + ht->match_fct, &id, + &iter.iter, stream, node_session_id.node) { if (stream->metadata_flag) { ret = relayd_quiescent_control(&relayd->control_sock, stream->relayd_stream_id); @@ -3652,27 +3657,19 @@ int consumer_data_pending(uint64_t id) stream->relayd_stream_id, stream->next_net_seq_num - 1); } - if (ret < 0) { - ERR("Relayd data pending failed. Cleaning up relayd %" PRIu64".", relayd->net_seq_idx); - lttng_consumer_cleanup_relayd(relayd); + if (ret == 1) { pthread_mutex_unlock(&relayd->ctrl_sock_mutex); pthread_mutex_unlock(&stream->lock); - goto data_not_pending; + goto data_pending; } - pthread_mutex_unlock(&relayd->ctrl_sock_mutex); - if (ret == 1) { + if (ret < 0) { + pthread_mutex_unlock(&relayd->ctrl_sock_mutex); pthread_mutex_unlock(&stream->lock); - goto data_pending; + goto data_not_pending; } } - pthread_mutex_unlock(&stream->lock); - } - - if (relayd) { - unsigned int is_data_inflight = 0; - /* Send init command for data pending. */ - pthread_mutex_lock(&relayd->ctrl_sock_mutex); + /* Send end command for data pending. */ ret = relayd_end_data_pending(&relayd->control_sock, relayd->relayd_session_id, &is_data_inflight); pthread_mutex_unlock(&relayd->ctrl_sock_mutex); -- 2.34.1 From 80db18da0437527e6cf3cc9e96f713a7ac21ae00 Mon Sep 17 00:00:00 2001 From: =?utf8?q?J=C3=A9r=C3=A9mie=20Galarneau?= Date: Fri, 20 Jul 2018 18:41:49 -0400 Subject: [PATCH 14/16] Set consumer's verbosity to the max level on --verbose-consumer MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The consumer's verbosity is set to '1' when --verbose-consumer is used when launching the session daemon. This means that all DBG2/3() statements are ignored. This commit always sets the consumer's verbosity to the maximal level. Signed-off-by: Jérémie Galarneau --- src/bin/lttng-consumerd/lttng-consumerd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bin/lttng-consumerd/lttng-consumerd.c b/src/bin/lttng-consumerd/lttng-consumerd.c index 50e2f4648..78316056d 100644 --- a/src/bin/lttng-consumerd/lttng-consumerd.c +++ b/src/bin/lttng-consumerd/lttng-consumerd.c @@ -252,7 +252,7 @@ static int parse_args(int argc, char **argv) lttng_opt_quiet = 1; break; case 'v': - lttng_opt_verbose = 1; + lttng_opt_verbose = 3; break; case 'V': fprintf(stdout, "%s\n", VERSION); -- 2.34.1 From 9a951630b590d14966e19b8d54f078fe174c1f47 Mon Sep 17 00:00:00 2001 From: Jonathan Rajotte Date: Tue, 28 Aug 2018 21:19:53 -0400 Subject: [PATCH 15/16] Teardown relayd on communication error during data pending Signed-off-by: Jonathan Rajotte --- src/common/consumer/consumer.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/common/consumer/consumer.c b/src/common/consumer/consumer.c index 482d4a63b..292bd3938 100644 --- a/src/common/consumer/consumer.c +++ b/src/common/consumer/consumer.c @@ -3638,8 +3638,8 @@ int consumer_data_pending(uint64_t id) ret = relayd_begin_data_pending(&relayd->control_sock, relayd->relayd_session_id); if (ret < 0) { - pthread_mutex_unlock(&relayd->ctrl_sock_mutex); /* Communication error thus the relayd so no data pending. */ + pthread_mutex_unlock(&relayd->ctrl_sock_mutex); ERR("Relayd begin data pending failed. Cleaning up relayd %" PRIu64".", relayd->net_seq_idx); lttng_consumer_cleanup_relayd(relayd); goto data_not_pending; @@ -3663,6 +3663,8 @@ int consumer_data_pending(uint64_t id) goto data_pending; } if (ret < 0) { + ERR("Relayd data pending failed. Cleaning up relayd %" PRIu64".", relayd->net_seq_idx); + lttng_consumer_cleanup_relayd(relayd); pthread_mutex_unlock(&relayd->ctrl_sock_mutex); pthread_mutex_unlock(&stream->lock); goto data_not_pending; -- 2.34.1 From ac8d0a59d7cb787ef38a18c03f385b8be14f4d86 Mon Sep 17 00:00:00 2001 From: Jonathan Rajotte Date: Fri, 21 Sep 2018 04:57:16 -0400 Subject: [PATCH 16/16] EfficiOS backport 2.9 revision 6 Signed-off-by: Jonathan Rajotte --- version/extra_version_description | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version/extra_version_description b/version/extra_version_description index 97ee0a857..2cfc12a2e 100644 --- a/version/extra_version_description +++ b/version/extra_version_description @@ -1 +1 @@ -EfficiOS Revision 5 +EfficiOS Revision 6 -- 2.34.1