Fix: race between kconsumerd and sessiond on tear down
authorJonathan Rajotte <jonathan.rajotte-julien@efficios.com>
Thu, 3 Sep 2015 21:48:36 +0000 (17:48 -0400)
committerJérémie Galarneau <jeremie.galarneau@efficios.com>
Sat, 5 Sep 2015 19:55:41 +0000 (15:55 -0400)
v2: minimize indentation by using return on condition.

Kconsumerd and sessiond both have reference on lttng-module. This can lead to a race
on modprobe_remove_lttng_all which might fail to unload modules due to
certain modules not having a ref count equal to zero at the time.

waitpid is used to force a synchronization on the child (kconsumer) termination.

This also have been applied to ust consumers for the sake of consistency.

Fixes: #878
Signed-off-by: Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
Signed-off-by: Jérémie Galarneau <jeremie.galarneau@efficios.com>
src/bin/lttng-sessiond/main.c

index d4dae16fd43656db074bb064b2c48720710ebc7c..2983cbf8c1da67dbed1c2092db68b3631532cb24 100644 (file)
@@ -586,6 +586,34 @@ static int generate_lock_file_path(char *path, size_t len)
        return ret;
 }
 
+/*
+ * Wait on consumer process termination.
+ *
+ * Need to be called with the consumer data lock held or from a context
+ * ensuring no concurrent access to data (e.g: cleanup).
+ */
+static void wait_consumer(struct consumer_data *consumer_data)
+{
+       pid_t ret;
+       int status;
+
+       if (consumer_data->pid <= 0) {
+               return;
+       }
+
+       DBG("Waiting for complete teardown of consumerd (PID: %d)",
+                       consumer_data->pid);
+       ret = waitpid(consumer_data->pid, &status, 0);
+       if (ret == -1) {
+               PERROR("consumerd waitpid pid: %d", consumer_data->pid)
+       }
+       if (!WIFEXITED(status)) {
+               ERR("consumerd termination with error: %d",
+                               WEXITSTATUS(ret));
+       }
+       consumer_data->pid = 0;
+}
+
 /*
  * Cleanup the session daemon's data structures.
  */
@@ -680,6 +708,11 @@ static void sessiond_cleanup(void)
                }
        }
 
+       wait_consumer(&kconsumer_data);
+       wait_consumer(&ustconsumer64_data);
+       wait_consumer(&ustconsumer32_data);
+
+
        DBG("Cleaning up all agent apps");
        agent_app_ht_clean();
 
@@ -1500,7 +1533,6 @@ error:
 
        unlink(consumer_data->err_unix_sock_path);
        unlink(consumer_data->cmd_unix_sock_path);
-       consumer_data->pid = 0;
        pthread_mutex_unlock(&consumer_data->lock);
 
        /* Cleanup metadata socket mutex. */
This page took 0.028372 seconds and 5 git commands to generate.