Fix: rotation may never complete in per-PID buffering mode
[lttng-tools.git] / src / bin / lttng-sessiond / sessiond-timer.c
index 0d500aa5522ae92bd41a645dc587cd05c44faba7..b8cf4825aa4e67ed6c325a07f2cc2723ed5519a5 100644 (file)
@@ -1,5 +1,6 @@
 /*
  * Copyright (C) 2017 - Julien Desfossez <jdesfossez@efficios.com>
+ * Copyright (C) 2018 - Jérémie Galarneau <jeremie.galarneau@efficios.com>
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License, version 2 only, as
 #include "health-sessiond.h"
 #include "rotation-thread.h"
 
+#define LTTNG_SESSIOND_SIG_QS                          SIGRTMIN + 10
+#define LTTNG_SESSIOND_SIG_EXIT                                SIGRTMIN + 11
+#define LTTNG_SESSIOND_SIG_PENDING_ROTATION_CHECK      SIGRTMIN + 12
+#define LTTNG_SESSIOND_SIG_SCHEDULED_ROTATION          SIGRTMIN + 13
+
+#define UINT_TO_PTR(value)                             \
+       ({                                              \
+               assert(value <= UINTPTR_MAX);           \
+               (void *) (uintptr_t) value;             \
+       })
+#define PTR_TO_UINT(ptr) ((uintptr_t) ptr)
+
+/*
+ * Handle timer teardown race wrt memory free of private data by sessiond
+ * signals are handled by a single thread, which permits a synchronization
+ * point between handling of each signal. Internal lock ensures mutual
+ * exclusion.
+ */
 static
-struct timer_signal_data timer_signal = {
+struct timer_signal_data {
+       /* Thread managing signals. */
+       pthread_t tid;
+       int qs_done;
+       pthread_mutex_t lock;
+} timer_signal = {
        .tid = 0,
        .qs_done = 0,
        .lock = PTHREAD_MUTEX_INITIALIZER,
@@ -43,7 +67,7 @@ void setmask(sigset_t *mask)
        if (ret) {
                PERROR("sigemptyset");
        }
-       ret = sigaddset(mask, LTTNG_SESSIOND_SIG_TEARDOWN);
+       ret = sigaddset(mask, LTTNG_SESSIOND_SIG_QS);
        if (ret) {
                PERROR("sigaddset teardown");
        }
@@ -51,19 +75,23 @@ void setmask(sigset_t *mask)
        if (ret) {
                PERROR("sigaddset exit");
        }
-       ret = sigaddset(mask, LTTNG_SESSIOND_SIG_ROTATE_PENDING);
+       ret = sigaddset(mask, LTTNG_SESSIOND_SIG_PENDING_ROTATION_CHECK);
        if (ret) {
-               PERROR("sigaddset switch");
+               PERROR("sigaddset pending rotation check");
+       }
+       ret = sigaddset(mask, LTTNG_SESSIOND_SIG_SCHEDULED_ROTATION);
+       if (ret) {
+               PERROR("sigaddset scheduled rotation");
        }
 }
 
 /*
- * This is the same function as consumer_timer_signal_thread_qs, when it
+ * This is the same function as timer_signal_thread_qs, when it
  * returns, it means that no timer signr is currently pending or being handled
  * by the timer thread. This cannot be called from the timer thread.
  */
 static
-void sessiond_timer_signal_thread_qs(unsigned int signr)
+void timer_signal_thread_qs(unsigned int signr)
 {
        sigset_t pending_set;
        int ret;
@@ -100,10 +128,10 @@ void sessiond_timer_signal_thread_qs(unsigned int signr)
        cmm_smp_mb();
 
        /*
-        * Kill with LTTNG_SESSIOND_SIG_TEARDOWN, so signal management thread
+        * Kill with LTTNG_SESSIOND_SIG_QS, so signal management thread
         * wakes up.
         */
-       kill(getpid(), LTTNG_SESSIOND_SIG_TEARDOWN);
+       kill(getpid(), LTTNG_SESSIOND_SIG_QS);
 
        while (!CMM_LOAD_SHARED(timer_signal.qs_done)) {
                caa_cpu_relax();
@@ -121,19 +149,17 @@ void sessiond_timer_signal_thread_qs(unsigned int signr)
  * a positive value if no timer was created (not an error).
  */
 static
-int session_timer_start(timer_t *timer_id, struct ltt_session *session,
+int timer_start(timer_t *timer_id, uint64_t session_id,
                unsigned int timer_interval_us, int signal, bool one_shot)
 {
        int ret = 0, delete_ret;
        struct sigevent sev;
        struct itimerspec its;
 
-       assert(session);
-
        sev.sigev_notify = SIGEV_SIGNAL;
        sev.sigev_signo = signal;
-       sev.sigev_value.sival_ptr = session;
-       ret = timer_create(CLOCKID, &sev, timer_id);
+       sev.sigev_value.sival_ptr = UINT_TO_PTR(session_id);
+       ret = timer_create(CLOCK_MONOTONIC, &sev, timer_id);
        if (ret == -1) {
                PERROR("timer_create");
                goto end;
@@ -167,7 +193,7 @@ end:
 }
 
 static
-int session_timer_stop(timer_t *timer_id, int signal)
+int timer_stop(timer_t *timer_id, int signal)
 {
        int ret = 0;
 
@@ -177,191 +203,133 @@ int session_timer_stop(timer_t *timer_id, int signal)
                goto end;
        }
 
-       sessiond_timer_signal_thread_qs(signal);
+       timer_signal_thread_qs(signal);
        *timer_id = 0;
 end:
        return ret;
 }
 
-int sessiond_timer_rotate_pending_start(struct ltt_session *session,
+int timer_session_rotation_pending_check_start(struct ltt_session *session,
                unsigned int interval_us)
 {
        int ret;
 
-       DBG("Enabling rotate pending timer on session %" PRIu64, session->id);
+       DBG("Enabling session rotation pending check timer on session %" PRIu64,
+                       session->id);
        /*
         * We arm this timer in a one-shot mode so we don't have to disable it
-        * explicitly (which could deadlock if the timer thread is blocked writing
-        * in the rotation_timer_pipe).
+        * explicitly (which could deadlock if the timer thread is blocked
+        * writing in the rotation_timer_pipe).
+        *
         * Instead, we re-arm it if needed after the rotation_pending check as
-        * returned. Also, this timer is usually only needed once, so there is no
-        * need to go through the whole signal teardown scheme everytime.
+        * returned. Also, this timer is usually only needed once, so there is
+        * no need to go through the whole signal teardown scheme everytime.
         */
-       ret = session_timer_start(&session->rotate_relay_pending_timer,
-                       session, interval_us,
-                       LTTNG_SESSIOND_SIG_ROTATE_PENDING,
+       ret = timer_start(&session->rotation_pending_check_timer,
+                       session->id, interval_us,
+                       LTTNG_SESSIOND_SIG_PENDING_ROTATION_CHECK,
                        /* one-shot */ true);
        if (ret == 0) {
-               session->rotate_relay_pending_timer_enabled = true;
+               session->rotation_pending_check_timer_enabled = true;
        }
 
        return ret;
 }
 
 /*
- * Stop and delete the channel's live timer.
- * Called with session and session_list locks held.
+ * Call with session and session_list locks held.
  */
-void sessiond_timer_rotate_pending_stop(struct ltt_session *session)
+int timer_session_rotation_pending_check_stop(struct ltt_session *session)
 {
        int ret;
 
        assert(session);
 
-       DBG("Disabling timer rotate pending on session %" PRIu64, session->id);
-       ret = session_timer_stop(&session->rotate_relay_pending_timer,
-                       LTTNG_SESSIOND_SIG_ROTATE_PENDING);
+       DBG("Disabling session rotation pending check timer on session %" PRIu64,
+                       session->id);
+       ret = timer_stop(&session->rotation_pending_check_timer,
+                       LTTNG_SESSIOND_SIG_PENDING_ROTATION_CHECK);
        if (ret == -1) {
-               ERR("Failed to stop rotate_pending timer");
+               ERR("Failed to stop rotate_pending_check timer");
+       } else {
+               session->rotation_pending_check_timer_enabled = false;
        }
-
-       session->rotate_relay_pending_timer_enabled = false;
+       return ret;
 }
 
 /*
- * Block the RT signals for the entire process. It must be called from the
- * sessiond main before creating the threads
+ * Call with session and session_list locks held.
  */
-int sessiond_timer_signal_init(void)
+int timer_session_rotation_schedule_timer_start(struct ltt_session *session,
+               unsigned int interval_us)
 {
        int ret;
-       sigset_t mask;
-
-       /* Block signal for entire process, so only our thread processes it. */
-       setmask(&mask);
-       ret = pthread_sigmask(SIG_BLOCK, &mask, NULL);
-       if (ret) {
-               errno = ret;
-               PERROR("pthread_sigmask");
-               return -1;
-       }
-       return 0;
-}
 
-/*
- * Called with the rotation_timer_queue lock held.
- * Return true if the same timer job already exists in the queue, false if not.
- */
-static
-bool check_duplicate_timer_job(struct timer_thread_parameters *ctx,
-               struct ltt_session *session, unsigned int signal)
-{
-       bool ret = false;
-       struct sessiond_rotation_timer *node;
-
-       rcu_read_lock();
-       cds_list_for_each_entry(node, &ctx->rotation_timer_queue->list, head) {
-               if (node->session_id == session->id && node->signal == signal) {
-                       ret = true;
-                       goto end;
-               }
+       DBG("Enabling scheduled rotation timer on session \"%s\" (%ui µs)", session->name,
+                       interval_us);
+       ret = timer_start(&session->rotation_schedule_timer, session->id,
+                       interval_us, LTTNG_SESSIOND_SIG_SCHEDULED_ROTATION,
+                       /* one-shot */ false);
+       if (ret < 0) {
+               goto end;
        }
-
+       session->rotation_schedule_timer_enabled = true;
 end:
-       rcu_read_unlock();
        return ret;
 }
 
 /*
- * Add the session ID and signal value to the rotation_timer_queue if it is
- * not already there and wakeup the rotation thread. The rotation thread
- * empties the whole queue everytime it is woken up. The event_pipe is
- * non-blocking, if it would block, we just return because we know the
- * rotation thread will be awaken anyway.
+ * Call with session and session_list locks held.
  */
-static
-int enqueue_timer_rotate_job(struct timer_thread_parameters *ctx,
-               struct ltt_session *session, unsigned int signal)
+int timer_session_rotation_schedule_timer_stop(struct ltt_session *session)
 {
-       int ret;
-       bool has_duplicate_timer_job;
-       char *c = "!";
-
-       pthread_mutex_lock(&ctx->rotation_timer_queue->lock);
-       has_duplicate_timer_job = check_duplicate_timer_job(ctx, session,
-                       signal);
+       int ret = 0;
 
-       if (!has_duplicate_timer_job) {
-               struct sessiond_rotation_timer *timer_data = NULL;
+       assert(session);
 
-               timer_data = zmalloc(sizeof(struct sessiond_rotation_timer));
-               if (!timer_data) {
-                       PERROR("Allocation of timer data");
-                       goto error;
-               }
-               timer_data->session_id = session->id;
-               timer_data->signal = signal;
-               cds_list_add_tail(&timer_data->head,
-                               &ctx->rotation_timer_queue->list);
-       } else {
-               /*
-                * This timer job is already pending, we don't need to add
-                * it.
-                */
-               pthread_mutex_unlock(&ctx->rotation_timer_queue->lock);
-               ret = 0;
+       if (!session->rotation_schedule_timer_enabled) {
                goto end;
        }
-       pthread_mutex_unlock(&ctx->rotation_timer_queue->lock);
 
-       ret = lttng_write(
-                       lttng_pipe_get_writefd(ctx->rotation_timer_queue->event_pipe),
-                       c, 1);
+       DBG("Disabling scheduled rotation timer on session %s", session->name);
+       ret = timer_stop(&session->rotation_schedule_timer,
+                       LTTNG_SESSIOND_SIG_SCHEDULED_ROTATION);
        if (ret < 0) {
-               /*
-                * We do not want to block in the timer handler, the job has been
-                * enqueued in the list, the wakeup pipe is probably full, the job
-                * will be processed when the rotation_thread catches up.
-                */
-               if (errno == EAGAIN || errno == EWOULDBLOCK) {
-                       ret = 0;
-                       goto end;
-               }
-               PERROR("Timer wakeup rotation thread");
-               goto error;
+               ERR("Failed to stop scheduled rotation timer of session \"%s\"",
+                               session->name);
+               goto end;
        }
 
+       session->rotation_schedule_timer_enabled = false;
        ret = 0;
-       goto end;
-
-error:
-       ret = -1;
 end:
        return ret;
 }
 
 /*
- * Ask the rotation thread to check if the last rotation started in this
- * session is still pending on the relay.
+ * Block the RT signals for the entire process. It must be called from the
+ * sessiond main before creating the threads
  */
-static
-void relay_rotation_pending_timer(struct timer_thread_parameters *ctx,
-               int sig, siginfo_t *si)
+int timer_signal_init(void)
 {
        int ret;
-       struct ltt_session *session = si->si_value.sival_ptr;
-       assert(session);
+       sigset_t mask;
 
-       ret = enqueue_timer_rotate_job(ctx, session, LTTNG_SESSIOND_SIG_ROTATE_PENDING);
+       /* Block signal for entire process, so only our thread processes it. */
+       setmask(&mask);
+       ret = pthread_sigmask(SIG_BLOCK, &mask, NULL);
        if (ret) {
-               PERROR("wakeup rotate pipe");
+               errno = ret;
+               PERROR("pthread_sigmask");
+               return -1;
        }
+       return 0;
 }
 
 /*
  * This thread is the sighandler for the timer signals.
  */
-void *sessiond_timer_thread(void *data)
+void *timer_thread_func(void *data)
 {
        int signr;
        sigset_t mask;
@@ -372,7 +340,6 @@ void *sessiond_timer_thread(void *data)
        rcu_thread_online();
 
        health_register(health_sessiond, HEALTH_SESSIOND_TYPE_TIMER);
-
        health_code_update();
 
        /* Only self thread will receive signal mask. */
@@ -396,15 +363,20 @@ void *sessiond_timer_thread(void *data)
                                PERROR("sigwaitinfo");
                        }
                        continue;
-               } else if (signr == LTTNG_SESSIOND_SIG_TEARDOWN) {
+               } else if (signr == LTTNG_SESSIOND_SIG_QS) {
                        cmm_smp_mb();
                        CMM_STORE_SHARED(timer_signal.qs_done, 1);
                        cmm_smp_mb();
-                       DBG("Signal timer metadata thread teardown");
                } else if (signr == LTTNG_SESSIOND_SIG_EXIT) {
                        goto end;
-               } else if (signr == LTTNG_SESSIOND_SIG_ROTATE_PENDING) {
-                       relay_rotation_pending_timer(ctx, info.si_signo, &info);
+               } else if (signr == LTTNG_SESSIOND_SIG_PENDING_ROTATION_CHECK) {
+                       rotation_thread_enqueue_job(ctx->rotation_thread_job_queue,
+                                       ROTATION_THREAD_JOB_TYPE_CHECK_PENDING_ROTATION,
+                                       /* session_id */ PTR_TO_UINT(info.si_value.sival_ptr));
+               } else if (signr == LTTNG_SESSIOND_SIG_SCHEDULED_ROTATION) {
+                       rotation_thread_enqueue_job(ctx->rotation_thread_job_queue,
+                                       ROTATION_THREAD_JOB_TYPE_SCHEDULED_ROTATION,
+                                       /* session_id */ PTR_TO_UINT(info.si_value.sival_ptr));
                } else {
                        ERR("Unexpected signal %d\n", info.si_signo);
                }
@@ -417,3 +389,8 @@ end:
        rcu_unregister_thread();
        return NULL;
 }
+
+void timer_exit(void)
+{
+       kill(getpid(), LTTNG_SESSIOND_SIG_EXIT);
+}
This page took 0.029156 seconds and 5 git commands to generate.