SoW-2020-0002: Trace Hit Counters: trigger error reporting integration
[lttng-tools.git] / tests / regression / tools / health / test_health.sh
CommitLineData
9d16b343
MJ
1# Copyright (C) 2012 Christian Babeux <christian.babeux@efficios.com>
2# Copyright (C) 2014 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
a33d2d4a 3#
9d16b343 4# SPDX-License-Identifier: GPL-2.0-only
a33d2d4a
MD
5
6TESTDIR=${CURDIR}/../../..
a33d2d4a
MD
7UST_EVENT_NAME="tp:tptest"
8KERNEL_EVENT_NAME="sched_switch"
9CHANNEL_NAME="testchan"
10HEALTH_CHECK_BIN="health_check"
d9ab3385 11NUM_TESTS=106
a33d2d4a
MD
12SLEEP_TIME=30
13
14source $TESTDIR/utils/utils.sh
15
a33d2d4a
MD
16function report_errors
17{
18 test_thread_error_string="$1"
19 test_relayd="$2"
20 err_no_relayd_match="Error querying relayd health"
21
22 # Check for health errors
23 # Include inability to contact relayd health as an expected
24 # error, since this can happen whenever the relayd shutdown due
25 # to an error in any thread.
26 out=$(grep "${test_thread_error_string}" ${STDOUT_PATH} | wc -l)
27 if [ $test_relayd -ne 0 ]; then
28 outerr=$(grep "${err_no_relayd_match}" ${STDERR_PATH} | wc -l)
29 else
30 outerr=0
31 fi
32 if [ $out -eq 0 ] && [ $outerr -eq 0 ]; then
33 fail "Validation failure"
34 diag "Health returned:"
35 diag "stdout:"
36 file=${STDOUT_PATH}
37 while read line ; do
38 diag "$line"
39 done < ${file}
40
41 diag "stderr:"
42 file=${STDERR_PATH}
43 while read line ; do
44 diag "$line"
45 done < ${file}
46 else
47 pass "Validation OK"
48 fi
49}
50
51function test_health
52{
53 test_suffix="$1"
54 test_thread_name="$2"
55 test_thread_error_string="$3"
56 test_needs_root="$4"
57 test_consumerd="$5"
58 test_relayd="$6"
59
60 diag "Test health problem detection with ${test_thread_name}"
61
62 # Set the socket timeout to 5 so the health check detection
63 # happens within 25 s
64 export LTTNG_NETWORK_SOCKET_TIMEOUT=5
65 export LTTNG_RELAYD_HEALTH="${HEALTH_PATH}/test-health"
66
67 # Activate testpoints
68 export LTTNG_TESTPOINT_ENABLE=1
69
70 # Activate specific thread test
71 export ${test_thread_name}_${test_suffix}=1
72
c89998b2 73 # Spawn sessiond with preloaded testpoint override lib
a33d2d4a
MD
74 export LD_PRELOAD="$CURDIR/$SESSIOND_PRELOAD"
75
76 diag "Start session daemon"
77 start_lttng_sessiond
78
79 if [ ${test_consumerd} -eq 1 ]; then
80 create_lttng_session_no_output $SESSION_NAME
81
82 diag "With UST consumer daemons"
c4926bb5 83 enable_ust_lttng_event_ok $SESSION_NAME $UST_EVENT_NAME $CHANNEL_NAME
a33d2d4a
MD
84
85 skip $isroot "Root access is needed. Skipping kernel consumer health check test." "1" ||
86 {
87 diag "With kernel consumer daemon"
88 lttng_enable_kernel_event $SESSION_NAME $KERNEL_EVENT_NAME $CHANNEL_NAME
89 }
e563bbdb 90 start_lttng_tracing_ok $SESSION_NAME
a33d2d4a
MD
91 fi
92
93 if [ ${test_relayd} -eq 1 ]; then
94 diag "With relay daemon"
95 RELAYD_ARGS="--relayd-path=${LTTNG_RELAYD_HEALTH}"
96
97 start_lttng_relayd "-o $TRACE_PATH"
98 else
99 RELAYD_ARGS=
100 fi
101
102 # Check health status, not caring about result
103 $CURDIR/$HEALTH_CHECK_BIN ${RELAYD_ARGS} \
104 > /dev/null
105
106 # Wait
107 diag "Check after running for ${SLEEP_TIME} seconds"
108 sleep ${SLEEP_TIME}
109
110 # Check health status
111 $CURDIR/$HEALTH_CHECK_BIN ${RELAYD_ARGS} \
112 > ${STDOUT_PATH} 2> ${STDERR_PATH}
113
114
115 if [ ${test_needs_root} -eq 1 ]; then
116 skip ${isroot} "Root access needed for test \"${test_thread_name}\"." "1" ||
117 {
118 report_errors "${test_thread_error_string}" "${test_relayd}"
119 }
120 else
121 report_errors "${test_thread_error_string}" "${test_relayd}"
122 fi
123
124 if [ ${test_relayd} -eq 1 ]; then
05aa48da
MD
125 # We may fail to stop relayd here, and this is OK, since
126 # it may have been killed volountarily by testpoint.
d9ab3385 127 stop_lttng_relayd_notap $KILL_SIGNAL
a33d2d4a 128 fi
d9ab3385
JG
129 stop_lttng_consumerd $KILL_SIGNAL
130 stop_lttng_sessiond $KILL_SIGNAL
a33d2d4a
MD
131
132 unset LTTNG_TESTPOINT_ENABLE
133 unset ${test_thread_name}_${test_suffix}
134 unset LD_PRELOAD
135 unset LTTNG_NETWORK_SOCKET_TIMEOUT
136 unset LTTNG_RELAYD_HEALTH
137}
138
139plan_tests $NUM_TESTS
140
141print_test_banner "$TEST_DESC"
142
f37e092d
MD
143if [ -f "$CURDIR/$SESSIOND_PRELOAD" ]; then
144 foundobj=1
145else
146 foundobj=0
147fi
148
149skip $foundobj "No shared object generated. Skipping all tests." $NUM_TESTS && exit 0
150
a33d2d4a
MD
151THREAD=("LTTNG_SESSIOND_THREAD_MANAGE_CLIENTS"
152 "LTTNG_SESSIOND_THREAD_MANAGE_APPS"
153 "LTTNG_SESSIOND_THREAD_REG_APPS"
154 "LTTNG_SESSIOND_THREAD_HT_CLEANUP"
155 "LTTNG_SESSIOND_THREAD_APP_MANAGE_NOTIFY"
156 "LTTNG_SESSIOND_THREAD_APP_REG_DISPATCH"
157 "LTTNG_SESSIOND_THREAD_MANAGE_KERNEL"
158
159 "LTTNG_CONSUMERD_THREAD_CHANNEL"
160 "LTTNG_CONSUMERD_THREAD_METADATA"
161 "LTTNG_CONSUMERD_THREAD_METADATA_TIMER"
162
163 "LTTNG_RELAYD_THREAD_DISPATCHER"
164 "LTTNG_RELAYD_THREAD_WORKER"
165 "LTTNG_RELAYD_THREAD_LISTENER"
166 "LTTNG_RELAYD_THREAD_LIVE_DISPATCHER"
167 "LTTNG_RELAYD_THREAD_LIVE_WORKER"
168 "LTTNG_RELAYD_THREAD_LIVE_LISTENER"
169)
170
171ERROR_STRING=(
172 "Thread \"Session daemon command\" is not responding in component \"sessiond\"."
173 "Thread \"Session daemon application manager\" is not responding in component \"sessiond\"."
174 "Thread \"Session daemon application registration\" is not responding in component \"sessiond\"."
175 "Thread \"Session daemon hash table cleanup\" is not responding in component \"sessiond\"."
176 "Thread \"Session daemon application notification manager\" is not responding in component \"sessiond\"."
177 "Thread \"Session daemon application registration dispatcher\" is not responding in component \"sessiond\"."
178 "Thread \"Session daemon kernel\" is not responding in component \"sessiond\"."
179
180 "Thread \"Consumer daemon channel\" is not responding"
181 "Thread \"Consumer daemon metadata\" is not responding"
182 "Thread \"Consumer daemon metadata timer\" is not responding"
183
184 "Thread \"Relay daemon dispatcher\" is not responding in component \"relayd\"."
185 "Thread \"Relay daemon worker\" is not responding in component \"relayd\"."
186 "Thread \"Relay daemon listener\" is not responding in component \"relayd\"."
187 "Thread \"Relay daemon live dispatcher\" is not responding in component \"relayd\"."
188 "Thread \"Relay daemon live worker\" is not responding in component \"relayd\"."
189 "Thread \"Relay daemon live listener\" is not responding in component \"relayd\"."
190)
191
192# TODO
193# "LTTNG_SESSIOND_THREAD_MANAGE_CONSUMER"
194# "Thread \"Session daemon manage consumer\" is not responding in component \"sessiond\"."
195
196# TODO: test kernel consumerd specifically in addition to UST consumerd
197
198# TODO: need refactoring of consumerd teardown
199# "LTTNG_CONSUMERD_THREAD_SESSIOND"
200# "Thread \"Consumer daemon session daemon command manager\" is not responding"
201
202# TODO: this thread is responsible for close a file descriptor that
203# triggers teardown of metadata thread. We should revisit teardown of
204# consumerd.
205# "LTTNG_CONSUMERD_THREAD_DATA"
206# "Thread \"Consumer daemon data\" is not responding"
207
208NEEDS_ROOT=(
209 0
210 0
211 0
212 0
213 0
214 0
215 1
216
217 0
218 0
219 0
220
221 0
222 0
223 0
224 0
225 0
226 0
227)
228
229TEST_CONSUMERD=(
230 0
231 0
232 0
233 0
234 0
235 0
236 0
237
238 1
239 1
240 1
241
242 1
243 1
244 1
245 1
246 1
247 1
248)
249
250TEST_RELAYD=(
251 0
252 0
253 0
254 0
255 0
256 0
257 0
258
259 0
260 0
261 0
262
263 1
264 1
265 1
266 1
267 1
268 1
269)
270
271STDOUT_PATH=$(mktemp)
272STDERR_PATH=$(mktemp)
273TRACE_PATH=$(mktemp -d)
274HEALTH_PATH=$(mktemp -d)
275
276if [ "$(id -u)" == "0" ]; then
277 isroot=1
278else
279 isroot=0
280fi
281
282THREAD_COUNT=${#THREAD[@]}
283i=0
284while [ "$i" -lt "$THREAD_COUNT" ]; do
285 test_health "${TEST_SUFFIX}" \
286 "${THREAD[$i]}" \
287 "${ERROR_STRING[$i]}" \
288 "${NEEDS_ROOT[$i]}" \
289 "${TEST_CONSUMERD[$i]}" \
290 "${TEST_RELAYD[$i]}"
291 let "i++"
292done
293
294rm -rf ${HEALTH_PATH}
295rm -rf ${TRACE_PATH}
296rm -f ${STDOUT_PATH}
297rm -f ${STDERR_PATH}
This page took 0.062681 seconds and 5 git commands to generate.