X-Git-Url: http://git.efficios.com/?p=lttng-tools.git;a=blobdiff_plain;f=src%2Fbin%2Flttng-sessiond%2Fhealth.c;h=7e1d4731a71de92702f200c9cbb96359fb8a58ab;hp=fafc9095df57e89d88baeede25e4adfa8b634fe9;hb=8782cc7477fae212607b9fd6395a4b2e2d3357ed;hpb=67e056446003680180ed4f384217facd90b1a776 diff --git a/src/bin/lttng-sessiond/health.c b/src/bin/lttng-sessiond/health.c index fafc9095d..7e1d4731a 100644 --- a/src/bin/lttng-sessiond/health.c +++ b/src/bin/lttng-sessiond/health.c @@ -1,5 +1,6 @@ /* * Copyright (C) 2012 - David Goulet + * Copyright (C) 2013 - Mathieu Desnoyers * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License, version 2 only, as @@ -29,48 +30,76 @@ #include "health.h" -static struct timespec time_delta = { - .tv_sec = DEFAULT_HEALTH_CHECK_DELTA_S, - .tv_nsec = DEFAULT_HEALTH_CHECK_DELTA_NS, +/* + * An application-specific error state for unregistered thread keeps + * track of thread errors. A thread reporting a health error, normally + * unregisters and quits. This makes the TLS health state not available + * to the health_check_state() call so on unregister we update this + * global error array so we can keep track of which thread was on error + * if the TLS health state has been removed. + */ +struct health_app { + /* List of health state, for each application thread */ + struct cds_list_head list; + /* + * This lock ensures that TLS memory used for the node and its + * container structure don't get reclaimed after the TLS owner + * thread exits until we have finished using it. + */ + pthread_mutex_t lock; + int nr_types; + struct timespec time_delta; + /* Health flags containing thread type error state */ + enum health_flags *flags; }; /* Define TLS health state. */ DEFINE_URCU_TLS(struct health_state, health_state); -/* - * It ensures that TLS memory used for the node and its container structure - * don't get reclaimed after the TLS owner thread exits until we have finished - * using it. - */ -static pthread_mutex_t health_mutex = PTHREAD_MUTEX_INITIALIZER; +struct health_app *health_app_create(int nr_types) +{ + struct health_app *ha; -static struct health_tls_state_list health_state_list = { - .head = CDS_LIST_HEAD_INIT(health_state_list.head), -}; + ha = zmalloc(sizeof(*ha)); + if (!ha) { + return NULL; + } + ha->flags = zmalloc(sizeof(*ha->flags)); + if (!ha->flags) { + goto error_flags; + } + CDS_INIT_LIST_HEAD(&ha->list); + pthread_mutex_init(&ha->lock, NULL); + ha->nr_types = nr_types; + ha->time_delta.tv_sec = DEFAULT_HEALTH_CHECK_DELTA_S; + ha->time_delta.tv_nsec = DEFAULT_HEALTH_CHECK_DELTA_NS; + return ha; + +error_flags: + free(ha); + return NULL; +} -/* - * This keeps track of the error state for unregistered thread. A thread - * reporting a health error, normally unregisters and quits. This makes the TLS - * health state not available to the health_check_state() call so on unregister - * we update this global error array so we can keep track of which thread was - * on error if the TLS health state has been removed. - */ -static enum health_flags global_error_state[HEALTH_NUM_TYPE]; +void health_app_destroy(struct health_app *ha) +{ + free(ha->flags); + free(ha); +} /* * Lock health state global list mutex. */ -static void state_lock(void) +static void state_lock(struct health_app *ha) { - pthread_mutex_lock(&health_mutex); + pthread_mutex_lock(&ha->lock); } /* * Unlock health state global list mutex. */ -static void state_unlock(void) +static void state_unlock(struct health_app *ha) { - pthread_mutex_unlock(&health_mutex); + pthread_mutex_unlock(&ha->lock); } /* @@ -113,7 +142,7 @@ static int time_diff_gt(const struct timespec *time_a, * * Return 0 if health is bad or else 1. */ -static int validate_state(struct health_state *state) +static int validate_state(struct health_app *ha, struct health_state *state) { int retval = 1, ret; unsigned long current, last; @@ -152,7 +181,8 @@ static int validate_state(struct health_state *state) state->last = current; memcpy(&state->last_time, ¤t_time, sizeof(current_time)); } else { - if (time_diff_gt(¤t_time, &state->last_time, &time_delta)) { + if (time_diff_gt(¤t_time, &state->last_time, + &ha->time_delta)) { if (current == last && !HEALTH_IS_IN_POLL(current)) { /* error */ retval = 0; @@ -181,23 +211,23 @@ end: * * Return 0 if health is bad or else 1. */ -int health_check_state(enum health_type type) +int health_check_state(struct health_app *ha, int type) { int retval = 1; struct health_state *state; - assert(type < HEALTH_NUM_TYPE); + assert(type < ha->nr_types); - state_lock(); + state_lock(ha); - cds_list_for_each_entry(state, &health_state_list.head, node) { + cds_list_for_each_entry(state, &ha->list, node) { int ret; if (state->type != type) { continue; } - ret = validate_state(state); + ret = validate_state(ha, state); if (!ret) { retval = 0; goto end; @@ -205,12 +235,12 @@ int health_check_state(enum health_type type) } /* Check the global state since some state might not be visible anymore. */ - if (global_error_state[type] & HEALTH_ERROR) { + if (ha->flags[type] & HEALTH_ERROR) { retval = 0; } end: - state_unlock(); + state_unlock(ha); DBG("Health check for type %d is %s", (int) type, (retval == 0) ? "BAD" : "GOOD"); @@ -220,9 +250,9 @@ end: /* * Init health state. */ -void health_register(enum health_type type) +void health_register(struct health_app *ha, int type) { - assert(type < HEALTH_NUM_TYPE); + assert(type < ha->nr_types); /* Init TLS state. */ uatomic_set(&URCU_TLS(health_state).last, 0); @@ -233,41 +263,42 @@ void health_register(enum health_type type) uatomic_set(&URCU_TLS(health_state).type, type); /* Add it to the global TLS state list. */ - state_lock(); - cds_list_add(&URCU_TLS(health_state).node, &health_state_list.head); - state_unlock(); + state_lock(ha); + cds_list_add(&URCU_TLS(health_state).node, &ha->list); + state_unlock(ha); } /* * Remove node from global list. */ -void health_unregister(void) +void health_unregister(struct health_app *ha) { - state_lock(); + state_lock(ha); /* * On error, set the global_error_state since we are about to remove * the node from the global list. */ if (uatomic_read(&URCU_TLS(health_state).flags) & HEALTH_ERROR) { - uatomic_set(&global_error_state[URCU_TLS(health_state).type], + uatomic_set(&ha->flags[URCU_TLS(health_state).type], HEALTH_ERROR); } cds_list_del(&URCU_TLS(health_state).node); - state_unlock(); + state_unlock(ha); } /* * Initiliazie health check subsytem. This should be called before any health * register occurs. */ -void health_init(void) +void health_init(struct health_app *ha) { /* * Get the maximum value between the default delta value and the TCP * timeout with a safety net of the default health check delta. */ - time_delta.tv_sec = max_t(unsigned long, + ha->time_delta.tv_sec = max_t(unsigned long, lttcomm_inet_tcp_timeout + DEFAULT_HEALTH_CHECK_DELTA_S, - time_delta.tv_sec); - DBG("Health check time delta in seconds set to %lu", time_delta.tv_sec); + ha->time_delta.tv_sec); + DBG("Health check time delta in seconds set to %lu", + ha->time_delta.tv_sec); }