#include <sys/stat.h>
#include <sys/types.h>
#include <sys/wait.h>
+#include <sys/resource.h>
#include <inttypes.h>
#include <urcu/futex.h>
#include <urcu/uatomic.h>
#include <unistd.h>
#include <fcntl.h>
+#include <ctype.h>
#include <lttng/lttng.h>
#include <common/common.h>
*/
static struct relay_conn_queue relay_conn_queue;
+/* Cap of file desriptors to be in simultaneous use by the relay daemon. */
+static unsigned int lttng_opt_fd_cap;
+
/* Global relay stream hash table. */
struct lttng_ht *relay_streams_ht;
/* Relayd health monitoring */
struct health_app *health_relayd;
+/* Global fd tracker. */
+struct fd_tracker *the_fd_tracker;
+
static struct option long_options[] = {
{ "control-port", 1, 0, 'C', },
{ "data-port", 1, 0, 'D', },
{ "daemonize", 0, 0, 'd', },
{ "background", 0, 0, 'b', },
{ "group", 1, 0, 'g', },
+ { "fd-cap", 1, 0, '\0', },
{ "help", 0, 0, 'h', },
{ "output", 1, 0, 'o', },
{ "verbose", 0, 0, 'v', },
switch (opt) {
case 0:
- fprintf(stderr, "option %s", optname);
- if (arg) {
- fprintf(stderr, " with arg %s\n", arg);
+ if (!strcmp(optname, "fd-cap")) {
+ unsigned long v;
+
+ errno = 0;
+ v = strtoul(arg, NULL, 0);
+ if (errno != 0 || !isdigit(arg[0])) {
+ ERR("Wrong value in --fd-cap parameter: %s", arg);
+ ret = -1;
+ goto end;
+ }
+ if (v < DEFAULT_RELAYD_MINIMAL_FD_CAP) {
+ ERR("File descriptor cap must be set to at least %d",
+ DEFAULT_RELAYD_MINIMAL_FD_CAP);
+ }
+ if (v >= UINT_MAX) {
+ ERR("File descriptor cap overflow in --fd-cap parameter: %s", arg);
+ ret = -1;
+ goto end;
+ }
+ lttng_opt_fd_cap = (unsigned int) v;
+ DBG3("File descriptor cap set to %u", lttng_opt_fd_cap);
+
+ } else {
+ fprintf(stderr, "unknown option %s", optname);
+ if (arg) {
+ fprintf(stderr, " with arg %s\n", arg);
+ }
}
break;
case 'C':
goto exit;
}
}
+ if (lttng_opt_fd_cap == 0) {
+ int ret;
+ struct rlimit rlimit;
+
+ ret = getrlimit(RLIMIT_NOFILE, &rlimit);
+ if (ret) {
+ PERROR("Failed to get file descriptor limit");
+ retval = -1;
+ }
+
+ lttng_opt_fd_cap = rlimit.rlim_cur;
+ }
exit:
free(optstring);
static void print_global_objects(void)
{
- rcu_register_thread();
-
print_viewer_streams();
print_relay_streams();
print_sessions();
-
- rcu_unregister_thread();
}
/*
if (tracing_group_name_override) {
free((void *) tracing_group_name);
}
+ fd_tracker_log(the_fd_tracker);
}
/*
memset(&reply, 0, sizeof(reply));
- switch (conn->minor) {
- case 1:
- case 2:
- case 3:
- break;
- case 4: /* LTTng sessiond 2.4 */
- default:
+ if (conn->minor < 4) {
+ /* From 2.1 to 2.3 */
+ ret = 0;
+ } else if (conn->minor >= 4 && conn->minor < 11) {
+ /* From 2.4 to 2.10 */
ret = cmd_create_session_2_4(payload, session_name,
hostname, &live_timer, &snapshot);
+ } else {
+ /* From 2.11 to ... */
+ ret = cmd_create_session_2_11(payload, session_name,
+ hostname, &live_timer, &snapshot);
}
+
if (ret < 0) {
goto send_reply;
}
uint64_t stream_handle = -1ULL;
char *path_name = NULL, *channel_name = NULL;
uint64_t tracefile_size = 0, tracefile_count = 0;
+ struct relay_stream_chunk_id stream_chunk_id = { 0 };
if (!session || !conn->version_check_done) {
ERR("Trying to add a stream before version check");
goto end_no_session;
}
- switch (session->minor) {
- case 1: /* LTTng sessiond 2.1. Allocates path_name and channel_name. */
+ if (session->minor == 1) {
+ /* For 2.1 */
ret = cmd_recv_stream_2_1(payload, &path_name,
&channel_name);
- break;
- case 2: /* LTTng sessiond 2.2. Allocates path_name and channel_name. */
- default:
+ } else if (session->minor > 1 && session->minor < 11) {
+ /* From 2.2 to 2.10 */
ret = cmd_recv_stream_2_2(payload, &path_name,
&channel_name, &tracefile_size, &tracefile_count);
- break;
+ } else {
+ /* From 2.11 to ... */
+ ret = cmd_recv_stream_2_11(payload, &path_name,
+ &channel_name, &tracefile_size, &tracefile_count,
+ &stream_chunk_id.value);
+ stream_chunk_id.is_set = true;
}
+
if (ret < 0) {
goto send_reply;
}
/* We pass ownership of path_name and channel_name. */
stream = stream_create(trace, stream_handle, path_name,
- channel_name, tracefile_size, tracefile_count);
+ channel_name, tracefile_size, tracefile_count,
+ &stream_chunk_id);
path_name = NULL;
channel_name = NULL;
goto end_stream_unlock;
}
- stream->chunk_id = stream_info.new_chunk_id;
+ assert(stream->current_chunk_id.is_set);
+ stream->current_chunk_id.value = stream_info.new_chunk_id;
if (stream->is_metadata) {
/*
chunk_id = be64toh(msg.chunk_id);
- DBG("Evaluating rotate pending for chunk id %" PRIu64, chunk_id);
+ DBG("Evaluating rotate pending for session \"%s\" and chunk id %" PRIu64,
+ session->session_name, chunk_id);
/*
* Iterate over all the streams in the session and check if they are
rotate_pending = true;
DBG("Stream %" PRIu64 " is still rotating",
stream->stream_handle);
- } else if (stream->chunk_id < chunk_id) {
+ } else if (stream->current_chunk_id.value < chunk_id) {
/*
* Stream closed on the consumer but still active on the
* relay.
DBG3("Partial reception of data connection header (received %" PRIu64 " bytes, %" PRIu64 " bytes left to receive, fd = %i)",
state->received, state->left_to_receive,
conn->sock->fd);
- ret = 0;
goto end;
}
conn->protocol.data.state.receive_payload.rotate_index = true;
}
- ret = 0;
end_stream_unlock:
pthread_mutex_unlock(&stream->lock);
stream_put(stream);
uint64_t left_to_receive = state->left_to_receive;
struct relay_session *session;
+ DBG3("Receiving data for stream id %" PRIu64 " seqnum %" PRIu64 ", %" PRIu64" bytes received, %" PRIu64 " bytes left to receive",
+ state->header.stream_id, state->header.net_seq_num,
+ state->received, left_to_receive);
+
stream = stream_get_by_id(state->header.stream_id);
if (!stream) {
/* Protocol error. */
- DBG("relay_process_data_receive_payload: Cannot find stream %" PRIu64,
+ ERR("relay_process_data_receive_payload: cannot find stream %" PRIu64,
state->header.stream_id);
status = RELAY_CONNECTION_STATUS_ERROR;
goto end;
pthread_mutex_lock(&stream->lock);
session = stream->trace->session;
-
- DBG3("Receiving data for stream id %" PRIu64 " seqnum %" PRIu64 ", %" PRIu64" bytes received, %" PRIu64 " bytes left to receive",
- state->header.stream_id, state->header.net_seq_num,
- state->received, left_to_receive);
+ if (!conn->session) {
+ ret = connection_set_session(conn, session);
+ if (ret) {
+ status = RELAY_CONNECTION_STATUS_ERROR;
+ goto end_stream_unlock;
+ }
+ }
/*
* The size of the "chunk" received on any iteration is bounded by:
ret = write_padding_to_file(stream->stream_fd->fd,
state->header.padding_size);
- if (ret < 0) {
+ if ((int64_t) ret < (int64_t) state->header.padding_size) {
ERR("write_padding_to_file: fail stream %" PRIu64 " net_seq_num %" PRIu64 " ret %d",
stream->stream_handle,
state->header.net_seq_num, ret);
status = relay_process_control(ctrl_conn);
if (status != RELAY_CONNECTION_STATUS_OK) {
+ /*
+ * On socket error flag the session as aborted to force
+ * the cleanup of its stream otherwise it can leak
+ * during the lifetime of the relayd.
+ *
+ * This prevents situations in which streams can be
+ * left opened because an index was received, the
+ * control connection is closed, and the data
+ * connection is closed (uncleanly) before the packet's
+ * data provided.
+ *
+ * Since the control connection encountered an error,
+ * it is okay to be conservative and close the
+ * session right now as we can't rely on the protocol
+ * being respected anymore.
+ */
+ if (status == RELAY_CONNECTION_STATUS_ERROR) {
+ session_abort(ctrl_conn->session);
+ }
+
/* Clear the connection on error or close. */
relay_thread_close_connection(&events,
pollfd,
status = relay_process_data(data_conn);
/* Connection closed or error. */
if (status != RELAY_CONNECTION_STATUS_OK) {
+ /*
+ * On socket error flag the session as aborted to force
+ * the cleanup of its stream otherwise it can leak
+ * during the lifetime of the relayd.
+ *
+ * This prevents situations in which streams can be
+ * left opened because an index was received, the
+ * control connection is closed, and the data
+ * connection is closed (uncleanly) before the packet's
+ * data provided.
+ *
+ * Since the data connection encountered an error,
+ * it is okay to be conservative and close the
+ * session right now as we can't rely on the protocol
+ * being respected anymore.
+ */
+ if (status == RELAY_CONNECTION_STATUS_ERROR) {
+ session_abort(data_conn->session);
+ }
relay_thread_close_connection(&events, pollfd,
data_conn);
/*
exit:
error:
- /* Cleanup reamaining connection object. */
+ /* Cleanup remaining connection object. */
rcu_read_lock();
cds_lfht_for_each_entry(relay_connections_ht->ht, &iter.iter,
destroy_conn,
sock_n.node) {
health_code_update();
- if (session_abort(destroy_conn->session)) {
- assert(0);
- }
+ session_abort(destroy_conn->session);
/*
* No need to grab another ref, because we own
goto exit_options;
}
+ ret = fclose(stdin);
+ if (ret) {
+ PERROR("Failed to close stdin");
+ goto exit_options;
+ }
/* Try to create directory if -o, --output is specified. */
if (opt_output_path) {
if (*opt_output_path != '/') {
/* Daemonize */
if (opt_daemon || opt_background) {
- int i;
-
ret = lttng_daemonize(&child_ppid, &recv_child_signal,
!opt_background);
if (ret < 0) {
retval = -1;
goto exit_options;
}
+ }
- /*
- * We are in the child. Make sure all other file
- * descriptors are closed, in case we are called with
- * more opened file descriptors than the standard ones.
- */
- for (i = 3; i < sysconf(_SC_OPEN_MAX); i++) {
- (void) close(i);
- }
+ /*
+ * The RCU thread registration (and use, through the fd-tracker's
+ * creation) is done after the daemonization to allow us to not
+ * deal with liburcu's fork() management as the call RCU needs to
+ * be restored.
+ */
+ rcu_register_thread();
+
+ the_fd_tracker = fd_tracker_create(lttng_opt_fd_cap);
+ if (!the_fd_tracker) {
+ retval = -1;
+ goto exit_options;
}
/* Initialize thread health monitoring */
/* Ensure all prior call_rcu are done. */
rcu_barrier();
+ fd_tracker_destroy(the_fd_tracker);
+ rcu_unregister_thread();
+
if (!retval) {
exit(EXIT_SUCCESS);
} else {