Backport: relayd: track the health thread's poll fd with fd-tracker
[lttng-tools.git] / src / bin / lttng-relayd / health-relayd.c
1 /*
2 * Copyright (C) 2013 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License, version 2 only,
6 * as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License along
14 * with this program; if not, write to the Free Software Foundation, Inc.,
15 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
16 */
17
18 #define _LGPL_SOURCE
19 #include <fcntl.h>
20 #include <getopt.h>
21 #include <grp.h>
22 #include <limits.h>
23 #include <pthread.h>
24 #include <signal.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <sys/ipc.h>
29 #include <sys/resource.h>
30 #include <sys/shm.h>
31 #include <sys/socket.h>
32 #include <sys/stat.h>
33 #include <sys/types.h>
34 #include <urcu/list.h>
35 #include <poll.h>
36 #include <unistd.h>
37 #include <sys/mman.h>
38 #include <assert.h>
39 #include <urcu/compiler.h>
40 #include <inttypes.h>
41
42 #include <common/defaults.h>
43 #include <common/common.h>
44 #include <common/consumer/consumer.h>
45 #include <common/consumer/consumer-timer.h>
46 #include <common/compat/poll.h>
47 #include <common/sessiond-comm/sessiond-comm.h>
48 #include <common/utils.h>
49 #include <common/compat/getenv.h>
50 #include <common/fd-tracker/utils.h>
51
52 #include "lttng-relayd.h"
53 #include "health-relayd.h"
54
55 /* Global health check unix path */
56 static
57 char health_unix_sock_path[PATH_MAX];
58
59 int health_quit_pipe[2];
60
61 /*
62 * Check if the thread quit pipe was triggered.
63 *
64 * Return 1 if it was triggered else 0;
65 */
66 static
67 int check_health_quit_pipe(int fd, uint32_t events)
68 {
69 if (fd == health_quit_pipe[0] && (events & LPOLLIN)) {
70 return 1;
71 }
72
73 return 0;
74 }
75
76 /*
77 * Send data on a unix socket using the liblttsessiondcomm API.
78 *
79 * Return lttcomm error code.
80 */
81 static int send_unix_sock(int sock, void *buf, size_t len)
82 {
83 /* Check valid length */
84 if (len == 0) {
85 return -1;
86 }
87
88 return lttcomm_send_unix_sock(sock, buf, len);
89 }
90
91 static int create_lttng_rundir_with_perm(const char *rundir)
92 {
93 int ret;
94
95 DBG3("Creating LTTng run directory: %s", rundir);
96
97 ret = mkdir(rundir, S_IRWXU);
98 if (ret < 0) {
99 if (errno != EEXIST) {
100 ERR("Unable to create %s", rundir);
101 goto error;
102 } else {
103 ret = 0;
104 }
105 } else if (ret == 0) {
106 int is_root = !getuid();
107
108 if (is_root) {
109 ret = chown(rundir, 0,
110 utils_get_group_id(tracing_group_name));
111 if (ret < 0) {
112 ERR("Unable to set group on %s", rundir);
113 PERROR("chown");
114 ret = -1;
115 goto error;
116 }
117
118 ret = chmod(rundir,
119 S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
120 if (ret < 0) {
121 ERR("Unable to set permissions on %s", health_unix_sock_path);
122 PERROR("chmod");
123 ret = -1;
124 goto error;
125 }
126 }
127 }
128
129 error:
130 return ret;
131 }
132
133 static
134 int parse_health_env(void)
135 {
136 const char *health_path;
137
138 health_path = lttng_secure_getenv(LTTNG_RELAYD_HEALTH_ENV);
139 if (health_path) {
140 strncpy(health_unix_sock_path, health_path,
141 PATH_MAX);
142 health_unix_sock_path[PATH_MAX - 1] = '\0';
143 }
144
145 return 0;
146 }
147
148 static
149 int setup_health_path(void)
150 {
151 int is_root, ret = 0;
152 char *home_path = NULL, *rundir = NULL, *relayd_path = NULL;
153
154 ret = parse_health_env();
155 if (ret) {
156 return ret;
157 }
158
159 is_root = !getuid();
160
161 if (is_root) {
162 rundir = strdup(DEFAULT_LTTNG_RUNDIR);
163 if (!rundir) {
164 ret = -ENOMEM;
165 goto end;
166 }
167 } else {
168 /*
169 * Create rundir from home path. This will create something like
170 * $HOME/.lttng
171 */
172 home_path = utils_get_home_dir();
173
174 if (home_path == NULL) {
175 /* TODO: Add --socket PATH option */
176 ERR("Can't get HOME directory for sockets creation.");
177 ret = -EPERM;
178 goto end;
179 }
180
181 ret = asprintf(&rundir, DEFAULT_LTTNG_HOME_RUNDIR, home_path);
182 if (ret < 0) {
183 ret = -ENOMEM;
184 goto end;
185 }
186 }
187
188 ret = asprintf(&relayd_path, DEFAULT_RELAYD_PATH, rundir);
189 if (ret < 0) {
190 ret = -ENOMEM;
191 goto end;
192 }
193
194 ret = create_lttng_rundir_with_perm(rundir);
195 if (ret < 0) {
196 goto end;
197 }
198
199 ret = create_lttng_rundir_with_perm(relayd_path);
200 if (ret < 0) {
201 goto end;
202 }
203
204 if (is_root) {
205 if (strlen(health_unix_sock_path) != 0) {
206 goto end;
207 }
208 snprintf(health_unix_sock_path, sizeof(health_unix_sock_path),
209 DEFAULT_GLOBAL_RELAY_HEALTH_UNIX_SOCK,
210 (int) getpid());
211 } else {
212 /* Set health check Unix path */
213 if (strlen(health_unix_sock_path) != 0) {
214 goto end;
215 }
216
217 snprintf(health_unix_sock_path, sizeof(health_unix_sock_path),
218 DEFAULT_HOME_RELAY_HEALTH_UNIX_SOCK,
219 home_path, (int) getpid());
220 }
221
222 end:
223 free(rundir);
224 free(relayd_path);
225 return ret;
226 }
227
228 static
229 int open_unix_socket(void *data, int *out_fd)
230 {
231 int ret;
232 const char *path = data;
233
234 ret = lttcomm_create_unix_sock(path);
235 if (ret < 0) {
236 goto end;
237 }
238
239 *out_fd = ret;
240 ret = 0;
241 end:
242 return ret;
243 }
244
245 static
246 int accept_unix_socket(void *data, int *out_fd)
247 {
248 int ret;
249 int accepting_sock = *((int *) data);
250
251 ret = lttcomm_accept_unix_sock(accepting_sock);
252 if (ret < 0) {
253 goto end;
254 }
255
256 *out_fd = ret;
257 ret = 0;
258 end:
259 return ret;
260 }
261
262 /*
263 * Thread managing health check socket.
264 */
265 void *thread_manage_health(void *data)
266 {
267 int sock = -1, new_sock = -1, ret, i, pollfd, err = -1;
268 uint32_t revents, nb_fd;
269 struct lttng_poll_event events;
270 struct health_comm_msg msg;
271 struct health_comm_reply reply;
272 int is_root;
273 char *sock_name;
274
275 DBG("[thread] Manage health check started");
276
277 setup_health_path();
278
279 rcu_register_thread();
280
281 /* We might hit an error path before this is created. */
282 lttng_poll_init(&events);
283
284 /* Create unix socket */
285 ret = asprintf(&sock_name, "Unix socket @ %s", health_unix_sock_path);
286 if (ret == -1) {
287 PERROR("Failed to allocate unix socket name");
288 err = -1;
289 goto error;
290 }
291 ret = fd_tracker_open_unsuspendable_fd(the_fd_tracker, &sock,
292 (const char **) &sock_name, 1, open_unix_socket,
293 health_unix_sock_path);
294 free(sock_name);
295 if (ret < 0) {
296 ERR("Unable to create health check Unix socket");
297 err = -1;
298 goto error;
299 }
300
301 is_root = !getuid();
302 if (is_root) {
303 /* lttng health client socket path permissions */
304 ret = chown(health_unix_sock_path, 0,
305 utils_get_group_id(tracing_group_name));
306 if (ret < 0) {
307 ERR("Unable to set group on %s", health_unix_sock_path);
308 PERROR("chown");
309 err = -1;
310 goto error;
311 }
312
313 ret = chmod(health_unix_sock_path,
314 S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP);
315 if (ret < 0) {
316 ERR("Unable to set permissions on %s", health_unix_sock_path);
317 PERROR("chmod");
318 err = -1;
319 goto error;
320 }
321 }
322
323 /*
324 * Set the CLOEXEC flag. Return code is useless because either way, the
325 * show must go on.
326 */
327 (void) utils_set_fd_cloexec(sock);
328
329 ret = lttcomm_listen_unix_sock(sock);
330 if (ret < 0) {
331 goto error;
332 }
333
334 /* Size is set to 2 for the unix socket and quit pipe. */
335 ret = fd_tracker_util_poll_create(the_fd_tracker,
336 "Health management thread epoll", &events, 2,
337 LTTNG_CLOEXEC);
338 if (ret < 0) {
339 ERR("Poll set creation failed");
340 goto error;
341 }
342
343 ret = lttng_poll_add(&events, health_quit_pipe[0], LPOLLIN);
344 if (ret < 0) {
345 goto error;
346 }
347
348 /* Add the application registration socket */
349 ret = lttng_poll_add(&events, sock, LPOLLIN | LPOLLPRI);
350 if (ret < 0) {
351 goto error;
352 }
353
354 lttng_relay_notify_ready();
355
356 while (1) {
357 char *accepted_socket_name;
358
359 DBG("Health check ready");
360
361 /* Inifinite blocking call, waiting for transmission */
362 restart:
363 ret = lttng_poll_wait(&events, -1);
364 if (ret < 0) {
365 /*
366 * Restart interrupted system call.
367 */
368 if (errno == EINTR) {
369 goto restart;
370 }
371 goto error;
372 }
373
374 nb_fd = ret;
375
376 for (i = 0; i < nb_fd; i++) {
377 /* Fetch once the poll data */
378 revents = LTTNG_POLL_GETEV(&events, i);
379 pollfd = LTTNG_POLL_GETFD(&events, i);
380
381 if (!revents) {
382 /* No activity for this FD (poll implementation). */
383 continue;
384 }
385
386 /* Thread quit pipe has been closed. Killing thread. */
387 ret = check_health_quit_pipe(pollfd, revents);
388 if (ret) {
389 err = 0;
390 goto exit;
391 }
392
393 /* Event on the registration socket */
394 if (pollfd == sock) {
395 if (revents & LPOLLIN) {
396 continue;
397 } else if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
398 ERR("Health socket poll error");
399 goto error;
400 } else {
401 ERR("Unexpected poll events %u for sock %d", revents, pollfd);
402 goto error;
403 }
404 }
405 }
406
407 ret = asprintf(&accepted_socket_name, "Socket accepted from unix socket @ %s",
408 health_unix_sock_path);
409 if (ret == -1) {
410 PERROR("Failed to allocate name of accepted socket from unix socket @ %s",
411 health_unix_sock_path);
412 goto error;
413 }
414 ret = fd_tracker_open_unsuspendable_fd(the_fd_tracker, &new_sock,
415 (const char **) &accepted_socket_name, 1,
416 accept_unix_socket, &sock);
417 free(accepted_socket_name);
418 if (ret < 0) {
419 goto error;
420 }
421
422 /*
423 * Set the CLOEXEC flag. Return code is useless because either way, the
424 * show must go on.
425 */
426 (void) utils_set_fd_cloexec(new_sock);
427
428 DBG("Receiving data from client for health...");
429 ret = lttcomm_recv_unix_sock(new_sock, (void *)&msg, sizeof(msg));
430 if (ret <= 0) {
431 DBG("Nothing recv() from client... continuing");
432 ret = fd_tracker_close_unsuspendable_fd(the_fd_tracker,
433 &new_sock, 1, fd_tracker_util_close_fd,
434 NULL);
435 if (ret) {
436 PERROR("close");
437 }
438 new_sock = -1;
439 continue;
440 }
441
442 rcu_thread_online();
443
444 assert(msg.cmd == HEALTH_CMD_CHECK);
445
446 memset(&reply, 0, sizeof(reply));
447 for (i = 0; i < NR_HEALTH_RELAYD_TYPES; i++) {
448 /*
449 * health_check_state return 0 if thread is in
450 * error.
451 */
452 if (!health_check_state(health_relayd, i)) {
453 reply.ret_code |= 1ULL << i;
454 }
455 }
456
457 DBG2("Health check return value %" PRIx64, reply.ret_code);
458
459 ret = send_unix_sock(new_sock, (void *) &reply, sizeof(reply));
460 if (ret < 0) {
461 ERR("Failed to send health data back to client");
462 }
463
464 /* End of transmission */
465 ret = fd_tracker_close_unsuspendable_fd(the_fd_tracker,
466 &new_sock, 1, fd_tracker_util_close_fd,
467 NULL);
468 if (ret) {
469 PERROR("close");
470 }
471 new_sock = -1;
472 }
473
474 error:
475 lttng_relay_stop_threads();
476 exit:
477 if (err) {
478 ERR("Health error occurred in %s", __func__);
479 }
480 DBG("Health check thread dying");
481 unlink(health_unix_sock_path);
482 if (sock >= 0) {
483 ret = fd_tracker_close_unsuspendable_fd(the_fd_tracker, &sock,
484 1, fd_tracker_util_close_fd, NULL);
485 if (ret) {
486 PERROR("close");
487 }
488 }
489
490 /*
491 * We do NOT rmdir rundir nor the relayd path because there are
492 * other processes using them.
493 */
494
495 (void) fd_tracker_util_poll_clean(the_fd_tracker, &events);
496
497 rcu_unregister_thread();
498 return NULL;
499 }
This page took 0.041185 seconds and 5 git commands to generate.