8b2febe05c7e55a53f942d2a4e2a22a917acc2c0
[lttng-tools.git] / src / bin / lttng-relayd / health-relayd.c
1 /*
2 * Copyright (C) 2013 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License, version 2 only,
6 * as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License along
14 * with this program; if not, write to the Free Software Foundation, Inc.,
15 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
16 */
17
18 #define _LGPL_SOURCE
19 #include <fcntl.h>
20 #include <getopt.h>
21 #include <grp.h>
22 #include <limits.h>
23 #include <pthread.h>
24 #include <signal.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <sys/ipc.h>
29 #include <sys/resource.h>
30 #include <sys/shm.h>
31 #include <sys/socket.h>
32 #include <sys/stat.h>
33 #include <sys/types.h>
34 #include <urcu/list.h>
35 #include <poll.h>
36 #include <unistd.h>
37 #include <sys/mman.h>
38 #include <assert.h>
39 #include <urcu/compiler.h>
40 #include <inttypes.h>
41
42 #include <common/defaults.h>
43 #include <common/common.h>
44 #include <common/consumer/consumer.h>
45 #include <common/consumer/consumer-timer.h>
46 #include <common/compat/poll.h>
47 #include <common/sessiond-comm/sessiond-comm.h>
48 #include <common/utils.h>
49 #include <common/compat/getenv.h>
50
51 #include "lttng-relayd.h"
52 #include "health-relayd.h"
53
54 /* Global health check unix path */
55 static
56 char health_unix_sock_path[PATH_MAX];
57
58 int health_quit_pipe[2] = { -1, -1 };
59
60 /*
61 * Check if the thread quit pipe was triggered.
62 *
63 * Return 1 if it was triggered else 0;
64 */
65 static
66 int check_health_quit_pipe(int fd, uint32_t events)
67 {
68 if (fd == health_quit_pipe[0] && (events & LPOLLIN)) {
69 return 1;
70 }
71
72 return 0;
73 }
74
75 /*
76 * Send data on a unix socket using the liblttsessiondcomm API.
77 *
78 * Return lttcomm error code.
79 */
80 static int send_unix_sock(int sock, void *buf, size_t len)
81 {
82 /* Check valid length */
83 if (len == 0) {
84 return -1;
85 }
86
87 return lttcomm_send_unix_sock(sock, buf, len);
88 }
89
90 static int create_lttng_rundir_with_perm(const char *rundir)
91 {
92 int ret;
93
94 DBG3("Creating LTTng run directory: %s", rundir);
95
96 ret = mkdir(rundir, S_IRWXU);
97 if (ret < 0) {
98 if (errno != EEXIST) {
99 ERR("Unable to create %s", rundir);
100 goto error;
101 } else {
102 ret = 0;
103 }
104 } else if (ret == 0) {
105 int is_root = !getuid();
106
107 if (is_root) {
108 gid_t gid;
109
110 ret = utils_get_group_id(tracing_group_name, true, &gid);
111 if (ret) {
112 /* Default to root group. */
113 gid = 0;
114 }
115
116 ret = chown(rundir, 0, gid);
117 if (ret < 0) {
118 ERR("Unable to set group on %s", rundir);
119 PERROR("chown");
120 ret = -1;
121 goto error;
122 }
123
124 ret = chmod(rundir,
125 S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
126 if (ret < 0) {
127 ERR("Unable to set permissions on %s", health_unix_sock_path);
128 PERROR("chmod");
129 ret = -1;
130 goto error;
131 }
132 }
133 }
134
135 error:
136 return ret;
137 }
138
139 static
140 int parse_health_env(void)
141 {
142 const char *health_path;
143
144 health_path = lttng_secure_getenv(LTTNG_RELAYD_HEALTH_ENV);
145 if (health_path) {
146 strncpy(health_unix_sock_path, health_path,
147 PATH_MAX);
148 health_unix_sock_path[PATH_MAX - 1] = '\0';
149 }
150
151 return 0;
152 }
153
154 static
155 int setup_health_path(void)
156 {
157 int is_root, ret = 0;
158 const char *home_path = NULL;
159 char *rundir = NULL, *relayd_path = NULL;
160
161 ret = parse_health_env();
162 if (ret) {
163 return ret;
164 }
165
166 is_root = !getuid();
167
168 if (is_root) {
169 rundir = strdup(DEFAULT_LTTNG_RUNDIR);
170 if (!rundir) {
171 ret = -ENOMEM;
172 goto end;
173 }
174 } else {
175 /*
176 * Create rundir from home path. This will create something like
177 * $HOME/.lttng
178 */
179 home_path = utils_get_home_dir();
180
181 if (home_path == NULL) {
182 /* TODO: Add --socket PATH option */
183 ERR("Can't get HOME directory for sockets creation.");
184 ret = -EPERM;
185 goto end;
186 }
187
188 ret = asprintf(&rundir, DEFAULT_LTTNG_HOME_RUNDIR, home_path);
189 if (ret < 0) {
190 ret = -ENOMEM;
191 goto end;
192 }
193 }
194
195 ret = asprintf(&relayd_path, DEFAULT_RELAYD_PATH, rundir);
196 if (ret < 0) {
197 ret = -ENOMEM;
198 goto end;
199 }
200
201 ret = create_lttng_rundir_with_perm(rundir);
202 if (ret < 0) {
203 goto end;
204 }
205
206 ret = create_lttng_rundir_with_perm(relayd_path);
207 if (ret < 0) {
208 goto end;
209 }
210
211 if (is_root) {
212 if (strlen(health_unix_sock_path) != 0) {
213 goto end;
214 }
215 snprintf(health_unix_sock_path, sizeof(health_unix_sock_path),
216 DEFAULT_GLOBAL_RELAY_HEALTH_UNIX_SOCK,
217 (int) getpid());
218 } else {
219 /* Set health check Unix path */
220 if (strlen(health_unix_sock_path) != 0) {
221 goto end;
222 }
223
224 snprintf(health_unix_sock_path, sizeof(health_unix_sock_path),
225 DEFAULT_HOME_RELAY_HEALTH_UNIX_SOCK,
226 home_path, (int) getpid());
227 }
228
229 end:
230 free(rundir);
231 free(relayd_path);
232 return ret;
233 }
234
235 /*
236 * Thread managing health check socket.
237 */
238 void *thread_manage_health(void *data)
239 {
240 int sock = -1, new_sock = -1, ret, i, pollfd, err = -1;
241 uint32_t revents, nb_fd;
242 struct lttng_poll_event events;
243 struct health_comm_msg msg;
244 struct health_comm_reply reply;
245 int is_root;
246
247 DBG("[thread] Manage health check started");
248
249 setup_health_path();
250
251 rcu_register_thread();
252
253 /* We might hit an error path before this is created. */
254 lttng_poll_init(&events);
255
256 /* Create unix socket */
257 sock = lttcomm_create_unix_sock(health_unix_sock_path);
258 if (sock < 0) {
259 ERR("Unable to create health check Unix socket");
260 err = -1;
261 goto error;
262 }
263
264 is_root = !getuid();
265 if (is_root) {
266 /* lttng health client socket path permissions */
267 gid_t gid;
268
269 ret = utils_get_group_id(tracing_group_name, true, &gid);
270 if (ret) {
271 /* Default to root group. */
272 gid = 0;
273 }
274
275 ret = chown(health_unix_sock_path, 0, gid);
276 if (ret < 0) {
277 ERR("Unable to set group on %s", health_unix_sock_path);
278 PERROR("chown");
279 err = -1;
280 goto error;
281 }
282
283 ret = chmod(health_unix_sock_path,
284 S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP);
285 if (ret < 0) {
286 ERR("Unable to set permissions on %s", health_unix_sock_path);
287 PERROR("chmod");
288 err = -1;
289 goto error;
290 }
291 }
292
293 /*
294 * Set the CLOEXEC flag. Return code is useless because either way, the
295 * show must go on.
296 */
297 (void) utils_set_fd_cloexec(sock);
298
299 ret = lttcomm_listen_unix_sock(sock);
300 if (ret < 0) {
301 goto error;
302 }
303
304 /* Size is set to 1 for the consumer_channel pipe */
305 ret = lttng_poll_create(&events, 2, LTTNG_CLOEXEC);
306 if (ret < 0) {
307 ERR("Poll set creation failed");
308 goto error;
309 }
310
311 ret = lttng_poll_add(&events, health_quit_pipe[0], LPOLLIN);
312 if (ret < 0) {
313 goto error;
314 }
315
316 /* Add the application registration socket */
317 ret = lttng_poll_add(&events, sock, LPOLLIN | LPOLLPRI);
318 if (ret < 0) {
319 goto error;
320 }
321
322 lttng_relay_notify_ready();
323
324 while (1) {
325 DBG("Health check ready");
326
327 /* Inifinite blocking call, waiting for transmission */
328 restart:
329 ret = lttng_poll_wait(&events, -1);
330 if (ret < 0) {
331 /*
332 * Restart interrupted system call.
333 */
334 if (errno == EINTR) {
335 goto restart;
336 }
337 goto error;
338 }
339
340 nb_fd = ret;
341
342 for (i = 0; i < nb_fd; i++) {
343 /* Fetch once the poll data */
344 revents = LTTNG_POLL_GETEV(&events, i);
345 pollfd = LTTNG_POLL_GETFD(&events, i);
346
347 /* Thread quit pipe has been closed. Killing thread. */
348 ret = check_health_quit_pipe(pollfd, revents);
349 if (ret) {
350 err = 0;
351 goto exit;
352 }
353
354 /* Event on the registration socket */
355 if (pollfd == sock) {
356 if (revents & LPOLLIN) {
357 continue;
358 } else if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
359 ERR("Health socket poll error");
360 goto error;
361 } else {
362 ERR("Unexpected poll events %u for sock %d", revents, pollfd);
363 goto error;
364 }
365 }
366 }
367
368 new_sock = lttcomm_accept_unix_sock(sock);
369 if (new_sock < 0) {
370 goto error;
371 }
372
373 /*
374 * Set the CLOEXEC flag. Return code is useless because either way, the
375 * show must go on.
376 */
377 (void) utils_set_fd_cloexec(new_sock);
378
379 DBG("Receiving data from client for health...");
380 ret = lttcomm_recv_unix_sock(new_sock, (void *)&msg, sizeof(msg));
381 if (ret <= 0) {
382 DBG("Nothing recv() from client... continuing");
383 ret = close(new_sock);
384 if (ret) {
385 PERROR("close");
386 }
387 new_sock = -1;
388 continue;
389 }
390
391 rcu_thread_online();
392
393 assert(msg.cmd == HEALTH_CMD_CHECK);
394
395 memset(&reply, 0, sizeof(reply));
396 for (i = 0; i < NR_HEALTH_RELAYD_TYPES; i++) {
397 /*
398 * health_check_state return 0 if thread is in
399 * error.
400 */
401 if (!health_check_state(health_relayd, i)) {
402 reply.ret_code |= 1ULL << i;
403 }
404 }
405
406 DBG2("Health check return value %" PRIx64, reply.ret_code);
407
408 ret = send_unix_sock(new_sock, (void *) &reply, sizeof(reply));
409 if (ret < 0) {
410 ERR("Failed to send health data back to client");
411 }
412
413 /* End of transmission */
414 ret = close(new_sock);
415 if (ret) {
416 PERROR("close");
417 }
418 new_sock = -1;
419 }
420
421 error:
422 lttng_relay_stop_threads();
423 exit:
424 if (err) {
425 ERR("Health error occurred in %s", __func__);
426 }
427 DBG("Health check thread dying");
428 unlink(health_unix_sock_path);
429 if (sock >= 0) {
430 ret = close(sock);
431 if (ret) {
432 PERROR("close");
433 }
434 }
435
436 /*
437 * We do NOT rmdir rundir nor the relayd path because there are
438 * other processes using them.
439 */
440
441 lttng_poll_clean(&events);
442
443 rcu_unregister_thread();
444 return NULL;
445 }
This page took 0.058758 seconds and 4 git commands to generate.