Fix: Build out of src tree
[lttng-tools.git] / src / common / consumer.c
CommitLineData
3bd1e081
MD
1/*
2 * Copyright (C) 2011 - Julien Desfossez <julien.desfossez@polymtl.ca>
3 * Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
00e2e675 4 * 2012 - David Goulet <dgoulet@efficios.com>
3bd1e081 5 *
d14d33bf
AM
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2 only,
8 * as published by the Free Software Foundation.
3bd1e081 9 *
d14d33bf
AM
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
3bd1e081 14 *
d14d33bf
AM
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
3bd1e081
MD
18 */
19
20#define _GNU_SOURCE
21#include <assert.h>
3bd1e081
MD
22#include <poll.h>
23#include <pthread.h>
24#include <stdlib.h>
25#include <string.h>
26#include <sys/mman.h>
27#include <sys/socket.h>
28#include <sys/types.h>
29#include <unistd.h>
77c7c900 30#include <inttypes.h>
3bd1e081 31
990570ed 32#include <common/common.h>
fb3a43a9
DG
33#include <common/utils.h>
34#include <common/compat/poll.h>
10a8a223 35#include <common/kernel-ctl/kernel-ctl.h>
00e2e675 36#include <common/sessiond-comm/relayd.h>
10a8a223
DG
37#include <common/sessiond-comm/sessiond-comm.h>
38#include <common/kernel-consumer/kernel-consumer.h>
00e2e675 39#include <common/relayd/relayd.h>
10a8a223
DG
40#include <common/ust-consumer/ust-consumer.h>
41
42#include "consumer.h"
3bd1e081
MD
43
44struct lttng_consumer_global_data consumer_data = {
3bd1e081
MD
45 .stream_count = 0,
46 .need_update = 1,
47 .type = LTTNG_CONSUMER_UNKNOWN,
48};
49
50/* timeout parameter, to control the polling thread grace period. */
51int consumer_poll_timeout = -1;
52
53/*
54 * Flag to inform the polling thread to quit when all fd hung up. Updated by
55 * the consumer_thread_receive_fds when it notices that all fds has hung up.
56 * Also updated by the signal handler (consumer_should_exit()). Read by the
57 * polling threads.
58 */
59volatile int consumer_quit = 0;
60
61/*
62 * Find a stream. The consumer_data.lock must be locked during this
63 * call.
64 */
65static struct lttng_consumer_stream *consumer_find_stream(int key)
66{
e4421fec
DG
67 struct lttng_ht_iter iter;
68 struct lttng_ht_node_ulong *node;
69 struct lttng_consumer_stream *stream = NULL;
3bd1e081 70
7ad0a0cb
MD
71 /* Negative keys are lookup failures */
72 if (key < 0)
73 return NULL;
e4421fec 74
6065ceec
DG
75 rcu_read_lock();
76
e4421fec
DG
77 lttng_ht_lookup(consumer_data.stream_ht, (void *)((unsigned long) key),
78 &iter);
79 node = lttng_ht_iter_get_node_ulong(&iter);
80 if (node != NULL) {
81 stream = caa_container_of(node, struct lttng_consumer_stream, node);
3bd1e081 82 }
e4421fec 83
6065ceec
DG
84 rcu_read_unlock();
85
e4421fec 86 return stream;
3bd1e081
MD
87}
88
7ad0a0cb
MD
89static void consumer_steal_stream_key(int key)
90{
91 struct lttng_consumer_stream *stream;
92
04253271 93 rcu_read_lock();
7ad0a0cb 94 stream = consumer_find_stream(key);
04253271 95 if (stream) {
7ad0a0cb 96 stream->key = -1;
04253271
MD
97 /*
98 * We don't want the lookup to match, but we still need
99 * to iterate on this stream when iterating over the hash table. Just
100 * change the node key.
101 */
102 stream->node.key = -1;
103 }
104 rcu_read_unlock();
7ad0a0cb
MD
105}
106
3bd1e081
MD
107static struct lttng_consumer_channel *consumer_find_channel(int key)
108{
e4421fec
DG
109 struct lttng_ht_iter iter;
110 struct lttng_ht_node_ulong *node;
111 struct lttng_consumer_channel *channel = NULL;
3bd1e081 112
7ad0a0cb
MD
113 /* Negative keys are lookup failures */
114 if (key < 0)
115 return NULL;
e4421fec 116
6065ceec
DG
117 rcu_read_lock();
118
e4421fec
DG
119 lttng_ht_lookup(consumer_data.channel_ht, (void *)((unsigned long) key),
120 &iter);
121 node = lttng_ht_iter_get_node_ulong(&iter);
122 if (node != NULL) {
123 channel = caa_container_of(node, struct lttng_consumer_channel, node);
3bd1e081 124 }
e4421fec 125
6065ceec
DG
126 rcu_read_unlock();
127
e4421fec 128 return channel;
3bd1e081
MD
129}
130
7ad0a0cb
MD
131static void consumer_steal_channel_key(int key)
132{
133 struct lttng_consumer_channel *channel;
134
04253271 135 rcu_read_lock();
7ad0a0cb 136 channel = consumer_find_channel(key);
04253271 137 if (channel) {
7ad0a0cb 138 channel->key = -1;
04253271
MD
139 /*
140 * We don't want the lookup to match, but we still need
141 * to iterate on this channel when iterating over the hash table. Just
142 * change the node key.
143 */
144 channel->node.key = -1;
145 }
146 rcu_read_unlock();
7ad0a0cb
MD
147}
148
702b1ea4
MD
149static
150void consumer_free_stream(struct rcu_head *head)
151{
152 struct lttng_ht_node_ulong *node =
153 caa_container_of(head, struct lttng_ht_node_ulong, head);
154 struct lttng_consumer_stream *stream =
155 caa_container_of(node, struct lttng_consumer_stream, node);
156
157 free(stream);
158}
159
00e2e675
DG
160/*
161 * RCU protected relayd socket pair free.
162 */
163static void consumer_rcu_free_relayd(struct rcu_head *head)
164{
165 struct lttng_ht_node_ulong *node =
166 caa_container_of(head, struct lttng_ht_node_ulong, head);
167 struct consumer_relayd_sock_pair *relayd =
168 caa_container_of(node, struct consumer_relayd_sock_pair, node);
169
170 free(relayd);
171}
172
173/*
174 * Destroy and free relayd socket pair object.
175 *
176 * This function MUST be called with the consumer_data lock acquired.
177 */
d09e1200 178static void destroy_relayd(struct consumer_relayd_sock_pair *relayd)
00e2e675
DG
179{
180 int ret;
181 struct lttng_ht_iter iter;
182
173af62f
DG
183 if (relayd == NULL) {
184 return;
185 }
186
00e2e675
DG
187 DBG("Consumer destroy and close relayd socket pair");
188
189 iter.iter.node = &relayd->node.node;
190 ret = lttng_ht_del(consumer_data.relayd_ht, &iter);
173af62f
DG
191 if (ret != 0) {
192 /* We assume the relayd was already destroyed */
193 return;
194 }
00e2e675
DG
195
196 /* Close all sockets */
197 pthread_mutex_lock(&relayd->ctrl_sock_mutex);
198 (void) relayd_close(&relayd->control_sock);
199 pthread_mutex_unlock(&relayd->ctrl_sock_mutex);
200 (void) relayd_close(&relayd->data_sock);
201
202 /* RCU free() call */
203 call_rcu(&relayd->node.head, consumer_rcu_free_relayd);
204}
205
a6ba4fe1
DG
206/*
207 * Flag a relayd socket pair for destruction. Destroy it if the refcount
208 * reaches zero.
209 *
210 * RCU read side lock MUST be aquired before calling this function.
211 */
212void consumer_flag_relayd_for_destroy(struct consumer_relayd_sock_pair *relayd)
213{
214 assert(relayd);
215
216 /* Set destroy flag for this object */
217 uatomic_set(&relayd->destroy_flag, 1);
218
219 /* Destroy the relayd if refcount is 0 */
220 if (uatomic_read(&relayd->refcount) == 0) {
d09e1200 221 destroy_relayd(relayd);
a6ba4fe1
DG
222 }
223}
224
3bd1e081
MD
225/*
226 * Remove a stream from the global list protected by a mutex. This
227 * function is also responsible for freeing its data structures.
228 */
229void consumer_del_stream(struct lttng_consumer_stream *stream)
230{
231 int ret;
e4421fec 232 struct lttng_ht_iter iter;
3bd1e081 233 struct lttng_consumer_channel *free_chan = NULL;
00e2e675
DG
234 struct consumer_relayd_sock_pair *relayd;
235
236 assert(stream);
3bd1e081
MD
237
238 pthread_mutex_lock(&consumer_data.lock);
239
240 switch (consumer_data.type) {
241 case LTTNG_CONSUMER_KERNEL:
242 if (stream->mmap_base != NULL) {
243 ret = munmap(stream->mmap_base, stream->mmap_len);
244 if (ret != 0) {
245 perror("munmap");
246 }
247 }
248 break;
7753dea8
MD
249 case LTTNG_CONSUMER32_UST:
250 case LTTNG_CONSUMER64_UST:
3bd1e081
MD
251 lttng_ustconsumer_del_stream(stream);
252 break;
253 default:
254 ERR("Unknown consumer_data type");
255 assert(0);
256 goto end;
257 }
258
6065ceec 259 rcu_read_lock();
04253271
MD
260 iter.iter.node = &stream->node.node;
261 ret = lttng_ht_del(consumer_data.stream_ht, &iter);
262 assert(!ret);
e4421fec 263
6065ceec
DG
264 rcu_read_unlock();
265
3bd1e081
MD
266 if (consumer_data.stream_count <= 0) {
267 goto end;
268 }
269 consumer_data.stream_count--;
270 if (!stream) {
271 goto end;
272 }
273 if (stream->out_fd >= 0) {
4c462e79
MD
274 ret = close(stream->out_fd);
275 if (ret) {
276 PERROR("close");
277 }
3bd1e081 278 }
b5c5fc29 279 if (stream->wait_fd >= 0 && !stream->wait_fd_is_copy) {
4c462e79
MD
280 ret = close(stream->wait_fd);
281 if (ret) {
282 PERROR("close");
283 }
3bd1e081 284 }
2c1dd183 285 if (stream->shm_fd >= 0 && stream->wait_fd != stream->shm_fd) {
4c462e79
MD
286 ret = close(stream->shm_fd);
287 if (ret) {
288 PERROR("close");
289 }
3bd1e081 290 }
00e2e675
DG
291
292 /* Check and cleanup relayd */
b0b335c8 293 rcu_read_lock();
00e2e675
DG
294 relayd = consumer_find_relayd(stream->net_seq_idx);
295 if (relayd != NULL) {
b0b335c8
MD
296 uatomic_dec(&relayd->refcount);
297 assert(uatomic_read(&relayd->refcount) >= 0);
173af62f 298
3f8e211f
DG
299 /* Closing streams requires to lock the control socket. */
300 pthread_mutex_lock(&relayd->ctrl_sock_mutex);
173af62f
DG
301 ret = relayd_send_close_stream(&relayd->control_sock,
302 stream->relayd_stream_id,
303 stream->next_net_seq_num - 1);
3f8e211f 304 pthread_mutex_unlock(&relayd->ctrl_sock_mutex);
173af62f 305 if (ret < 0) {
a4b92340
DG
306 DBG("Unable to close stream on the relayd. Continuing");
307 /*
308 * Continue here. There is nothing we can do for the relayd.
309 * Chances are that the relayd has closed the socket so we just
310 * continue cleaning up.
311 */
173af62f
DG
312 }
313
314 /* Both conditions are met, we destroy the relayd. */
315 if (uatomic_read(&relayd->refcount) == 0 &&
316 uatomic_read(&relayd->destroy_flag)) {
d09e1200 317 destroy_relayd(relayd);
00e2e675 318 }
00e2e675 319 }
b0b335c8 320 rcu_read_unlock();
00e2e675 321
c30aaa51
MD
322 uatomic_dec(&stream->chan->refcount);
323 if (!uatomic_read(&stream->chan->refcount)
324 && !uatomic_read(&stream->chan->nb_init_streams)) {
3bd1e081 325 free_chan = stream->chan;
00e2e675
DG
326 }
327
702b1ea4 328 call_rcu(&stream->node.head, consumer_free_stream);
3bd1e081
MD
329end:
330 consumer_data.need_update = 1;
331 pthread_mutex_unlock(&consumer_data.lock);
332
c30aaa51 333 if (free_chan) {
3bd1e081 334 consumer_del_channel(free_chan);
c30aaa51 335 }
3bd1e081
MD
336}
337
338struct lttng_consumer_stream *consumer_allocate_stream(
339 int channel_key, int stream_key,
340 int shm_fd, int wait_fd,
341 enum lttng_consumer_stream_state state,
342 uint64_t mmap_len,
343 enum lttng_event_output output,
6df2e2c9
MD
344 const char *path_name,
345 uid_t uid,
00e2e675
DG
346 gid_t gid,
347 int net_index,
c80048c6
MD
348 int metadata_flag,
349 int *alloc_ret)
3bd1e081
MD
350{
351 struct lttng_consumer_stream *stream;
352 int ret;
353
effcf122 354 stream = zmalloc(sizeof(*stream));
3bd1e081
MD
355 if (stream == NULL) {
356 perror("malloc struct lttng_consumer_stream");
c80048c6
MD
357 *alloc_ret = -ENOMEM;
358 return NULL;
3bd1e081
MD
359 }
360 stream->chan = consumer_find_channel(channel_key);
361 if (!stream->chan) {
c80048c6
MD
362 *alloc_ret = -ENOENT;
363 goto error;
3bd1e081
MD
364 }
365 stream->chan->refcount++;
366 stream->key = stream_key;
367 stream->shm_fd = shm_fd;
368 stream->wait_fd = wait_fd;
369 stream->out_fd = -1;
370 stream->out_fd_offset = 0;
371 stream->state = state;
372 stream->mmap_len = mmap_len;
373 stream->mmap_base = NULL;
374 stream->output = output;
6df2e2c9
MD
375 stream->uid = uid;
376 stream->gid = gid;
00e2e675
DG
377 stream->net_seq_idx = net_index;
378 stream->metadata_flag = metadata_flag;
379 strncpy(stream->path_name, path_name, sizeof(stream->path_name));
380 stream->path_name[sizeof(stream->path_name) - 1] = '\0';
e4421fec 381 lttng_ht_node_init_ulong(&stream->node, stream->key);
00e2e675 382 lttng_ht_node_init_ulong(&stream->waitfd_node, stream->wait_fd);
3bd1e081
MD
383
384 switch (consumer_data.type) {
385 case LTTNG_CONSUMER_KERNEL:
386 break;
7753dea8
MD
387 case LTTNG_CONSUMER32_UST:
388 case LTTNG_CONSUMER64_UST:
5af2f756 389 stream->cpu = stream->chan->cpucount++;
3bd1e081
MD
390 ret = lttng_ustconsumer_allocate_stream(stream);
391 if (ret) {
c80048c6
MD
392 *alloc_ret = -EINVAL;
393 goto error;
3bd1e081
MD
394 }
395 break;
396 default:
397 ERR("Unknown consumer_data type");
c80048c6
MD
398 *alloc_ret = -EINVAL;
399 goto error;
3bd1e081 400 }
c30aaa51
MD
401
402 /*
403 * When nb_init_streams reaches 0, we don't need to trigger any action in
404 * terms of destroying the associated channel, because the action that
405 * causes the count to become 0 also causes a stream to be added. The
406 * channel deletion will thus be triggered by the following removal of this
407 * stream.
408 */
409 if (uatomic_read(&stream->chan->nb_init_streams) > 0) {
410 uatomic_dec(&stream->chan->nb_init_streams);
411 }
412
413 DBG3("Allocated stream %s (key %d, shm_fd %d, wait_fd %d, mmap_len %llu,"
414 " out_fd %d, net_seq_idx %d)", stream->path_name, stream->key,
415 stream->shm_fd, stream->wait_fd,
416 (unsigned long long) stream->mmap_len, stream->out_fd,
00e2e675 417 stream->net_seq_idx);
3bd1e081 418 return stream;
c80048c6
MD
419
420error:
421 free(stream);
422 return NULL;
3bd1e081
MD
423}
424
425/*
426 * Add a stream to the global list protected by a mutex.
427 */
428int consumer_add_stream(struct lttng_consumer_stream *stream)
429{
430 int ret = 0;
c77fc10a
DG
431 struct lttng_ht_node_ulong *node;
432 struct lttng_ht_iter iter;
00e2e675 433 struct consumer_relayd_sock_pair *relayd;
3bd1e081
MD
434
435 pthread_mutex_lock(&consumer_data.lock);
7ad0a0cb
MD
436 /* Steal stream identifier, for UST */
437 consumer_steal_stream_key(stream->key);
c77fc10a 438
b0b335c8 439 rcu_read_lock();
c77fc10a
DG
440 lttng_ht_lookup(consumer_data.stream_ht,
441 (void *)((unsigned long) stream->key), &iter);
442 node = lttng_ht_iter_get_node_ulong(&iter);
443 if (node != NULL) {
444 rcu_read_unlock();
445 /* Stream already exist. Ignore the insertion */
446 goto end;
447 }
448
04253271 449 lttng_ht_add_unique_ulong(consumer_data.stream_ht, &stream->node);
00e2e675
DG
450
451 /* Check and cleanup relayd */
452 relayd = consumer_find_relayd(stream->net_seq_idx);
453 if (relayd != NULL) {
b0b335c8 454 uatomic_inc(&relayd->refcount);
00e2e675 455 }
b0b335c8 456 rcu_read_unlock();
00e2e675
DG
457
458 /* Update consumer data */
3bd1e081
MD
459 consumer_data.stream_count++;
460 consumer_data.need_update = 1;
461
3bd1e081
MD
462end:
463 pthread_mutex_unlock(&consumer_data.lock);
702b1ea4 464
3bd1e081
MD
465 return ret;
466}
467
00e2e675 468/*
3f8e211f
DG
469 * Add relayd socket to global consumer data hashtable. RCU read side lock MUST
470 * be acquired before calling this.
00e2e675 471 */
d09e1200 472static int add_relayd(struct consumer_relayd_sock_pair *relayd)
00e2e675
DG
473{
474 int ret = 0;
475 struct lttng_ht_node_ulong *node;
476 struct lttng_ht_iter iter;
477
478 if (relayd == NULL) {
479 ret = -1;
480 goto end;
481 }
482
00e2e675
DG
483 lttng_ht_lookup(consumer_data.relayd_ht,
484 (void *)((unsigned long) relayd->net_seq_idx), &iter);
485 node = lttng_ht_iter_get_node_ulong(&iter);
486 if (node != NULL) {
00e2e675
DG
487 /* Relayd already exist. Ignore the insertion */
488 goto end;
489 }
490 lttng_ht_add_unique_ulong(consumer_data.relayd_ht, &relayd->node);
491
00e2e675
DG
492end:
493 return ret;
494}
495
496/*
497 * Allocate and return a consumer relayd socket.
498 */
499struct consumer_relayd_sock_pair *consumer_allocate_relayd_sock_pair(
500 int net_seq_idx)
501{
502 struct consumer_relayd_sock_pair *obj = NULL;
503
504 /* Negative net sequence index is a failure */
505 if (net_seq_idx < 0) {
506 goto error;
507 }
508
509 obj = zmalloc(sizeof(struct consumer_relayd_sock_pair));
510 if (obj == NULL) {
511 PERROR("zmalloc relayd sock");
512 goto error;
513 }
514
515 obj->net_seq_idx = net_seq_idx;
516 obj->refcount = 0;
173af62f 517 obj->destroy_flag = 0;
00e2e675
DG
518 lttng_ht_node_init_ulong(&obj->node, obj->net_seq_idx);
519 pthread_mutex_init(&obj->ctrl_sock_mutex, NULL);
520
521error:
522 return obj;
523}
524
525/*
526 * Find a relayd socket pair in the global consumer data.
527 *
528 * Return the object if found else NULL.
b0b335c8
MD
529 * RCU read-side lock must be held across this call and while using the
530 * returned object.
00e2e675
DG
531 */
532struct consumer_relayd_sock_pair *consumer_find_relayd(int key)
533{
534 struct lttng_ht_iter iter;
535 struct lttng_ht_node_ulong *node;
536 struct consumer_relayd_sock_pair *relayd = NULL;
537
538 /* Negative keys are lookup failures */
539 if (key < 0) {
540 goto error;
541 }
542
00e2e675
DG
543 lttng_ht_lookup(consumer_data.relayd_ht, (void *)((unsigned long) key),
544 &iter);
545 node = lttng_ht_iter_get_node_ulong(&iter);
546 if (node != NULL) {
547 relayd = caa_container_of(node, struct consumer_relayd_sock_pair, node);
548 }
549
00e2e675
DG
550error:
551 return relayd;
552}
553
554/*
555 * Handle stream for relayd transmission if the stream applies for network
556 * streaming where the net sequence index is set.
557 *
558 * Return destination file descriptor or negative value on error.
559 */
6197aea7 560static int write_relayd_stream_header(struct lttng_consumer_stream *stream,
1d4dfdef
DG
561 size_t data_size, unsigned long padding,
562 struct consumer_relayd_sock_pair *relayd)
00e2e675
DG
563{
564 int outfd = -1, ret;
00e2e675
DG
565 struct lttcomm_relayd_data_hdr data_hdr;
566
567 /* Safety net */
568 assert(stream);
6197aea7 569 assert(relayd);
00e2e675
DG
570
571 /* Reset data header */
572 memset(&data_hdr, 0, sizeof(data_hdr));
573
00e2e675
DG
574 if (stream->metadata_flag) {
575 /* Caller MUST acquire the relayd control socket lock */
576 ret = relayd_send_metadata(&relayd->control_sock, data_size);
577 if (ret < 0) {
578 goto error;
579 }
580
581 /* Metadata are always sent on the control socket. */
582 outfd = relayd->control_sock.fd;
583 } else {
584 /* Set header with stream information */
585 data_hdr.stream_id = htobe64(stream->relayd_stream_id);
586 data_hdr.data_size = htobe32(data_size);
1d4dfdef 587 data_hdr.padding_size = htobe32(padding);
173af62f 588 data_hdr.net_seq_num = htobe64(stream->next_net_seq_num++);
00e2e675
DG
589 /* Other fields are zeroed previously */
590
591 ret = relayd_send_data_hdr(&relayd->data_sock, &data_hdr,
592 sizeof(data_hdr));
593 if (ret < 0) {
594 goto error;
595 }
596
597 /* Set to go on data socket */
598 outfd = relayd->data_sock.fd;
599 }
600
601error:
602 return outfd;
603}
604
3bd1e081
MD
605/*
606 * Update a stream according to what we just received.
607 */
608void consumer_change_stream_state(int stream_key,
609 enum lttng_consumer_stream_state state)
610{
611 struct lttng_consumer_stream *stream;
612
613 pthread_mutex_lock(&consumer_data.lock);
614 stream = consumer_find_stream(stream_key);
615 if (stream) {
616 stream->state = state;
617 }
618 consumer_data.need_update = 1;
619 pthread_mutex_unlock(&consumer_data.lock);
620}
621
702b1ea4
MD
622static
623void consumer_free_channel(struct rcu_head *head)
624{
625 struct lttng_ht_node_ulong *node =
626 caa_container_of(head, struct lttng_ht_node_ulong, head);
627 struct lttng_consumer_channel *channel =
628 caa_container_of(node, struct lttng_consumer_channel, node);
629
630 free(channel);
631}
632
3bd1e081
MD
633/*
634 * Remove a channel from the global list protected by a mutex. This
635 * function is also responsible for freeing its data structures.
636 */
637void consumer_del_channel(struct lttng_consumer_channel *channel)
638{
639 int ret;
e4421fec 640 struct lttng_ht_iter iter;
3bd1e081
MD
641
642 pthread_mutex_lock(&consumer_data.lock);
643
644 switch (consumer_data.type) {
645 case LTTNG_CONSUMER_KERNEL:
646 break;
7753dea8
MD
647 case LTTNG_CONSUMER32_UST:
648 case LTTNG_CONSUMER64_UST:
3bd1e081
MD
649 lttng_ustconsumer_del_channel(channel);
650 break;
651 default:
652 ERR("Unknown consumer_data type");
653 assert(0);
654 goto end;
655 }
656
6065ceec 657 rcu_read_lock();
04253271
MD
658 iter.iter.node = &channel->node.node;
659 ret = lttng_ht_del(consumer_data.channel_ht, &iter);
660 assert(!ret);
6065ceec
DG
661 rcu_read_unlock();
662
3bd1e081
MD
663 if (channel->mmap_base != NULL) {
664 ret = munmap(channel->mmap_base, channel->mmap_len);
665 if (ret != 0) {
666 perror("munmap");
667 }
668 }
b5c5fc29 669 if (channel->wait_fd >= 0 && !channel->wait_fd_is_copy) {
4c462e79
MD
670 ret = close(channel->wait_fd);
671 if (ret) {
672 PERROR("close");
673 }
3bd1e081 674 }
2c1dd183 675 if (channel->shm_fd >= 0 && channel->wait_fd != channel->shm_fd) {
4c462e79
MD
676 ret = close(channel->shm_fd);
677 if (ret) {
678 PERROR("close");
679 }
3bd1e081 680 }
702b1ea4
MD
681
682 call_rcu(&channel->node.head, consumer_free_channel);
3bd1e081
MD
683end:
684 pthread_mutex_unlock(&consumer_data.lock);
685}
686
687struct lttng_consumer_channel *consumer_allocate_channel(
688 int channel_key,
689 int shm_fd, int wait_fd,
690 uint64_t mmap_len,
c30aaa51
MD
691 uint64_t max_sb_size,
692 unsigned int nb_init_streams)
3bd1e081
MD
693{
694 struct lttng_consumer_channel *channel;
695 int ret;
696
276b26d1 697 channel = zmalloc(sizeof(*channel));
3bd1e081
MD
698 if (channel == NULL) {
699 perror("malloc struct lttng_consumer_channel");
700 goto end;
701 }
702 channel->key = channel_key;
703 channel->shm_fd = shm_fd;
704 channel->wait_fd = wait_fd;
705 channel->mmap_len = mmap_len;
706 channel->max_sb_size = max_sb_size;
707 channel->refcount = 0;
c30aaa51 708 channel->nb_init_streams = nb_init_streams;
e4421fec 709 lttng_ht_node_init_ulong(&channel->node, channel->key);
3bd1e081
MD
710
711 switch (consumer_data.type) {
712 case LTTNG_CONSUMER_KERNEL:
713 channel->mmap_base = NULL;
714 channel->mmap_len = 0;
715 break;
7753dea8
MD
716 case LTTNG_CONSUMER32_UST:
717 case LTTNG_CONSUMER64_UST:
3bd1e081
MD
718 ret = lttng_ustconsumer_allocate_channel(channel);
719 if (ret) {
720 free(channel);
721 return NULL;
722 }
723 break;
724 default:
725 ERR("Unknown consumer_data type");
726 assert(0);
727 goto end;
728 }
729 DBG("Allocated channel (key %d, shm_fd %d, wait_fd %d, mmap_len %llu, max_sb_size %llu)",
00e2e675 730 channel->key, channel->shm_fd, channel->wait_fd,
3bd1e081
MD
731 (unsigned long long) channel->mmap_len,
732 (unsigned long long) channel->max_sb_size);
733end:
734 return channel;
735}
736
737/*
738 * Add a channel to the global list protected by a mutex.
739 */
740int consumer_add_channel(struct lttng_consumer_channel *channel)
741{
c77fc10a
DG
742 struct lttng_ht_node_ulong *node;
743 struct lttng_ht_iter iter;
744
3bd1e081 745 pthread_mutex_lock(&consumer_data.lock);
7ad0a0cb
MD
746 /* Steal channel identifier, for UST */
747 consumer_steal_channel_key(channel->key);
6065ceec 748 rcu_read_lock();
c77fc10a
DG
749
750 lttng_ht_lookup(consumer_data.channel_ht,
751 (void *)((unsigned long) channel->key), &iter);
752 node = lttng_ht_iter_get_node_ulong(&iter);
753 if (node != NULL) {
754 /* Channel already exist. Ignore the insertion */
755 goto end;
756 }
757
04253271 758 lttng_ht_add_unique_ulong(consumer_data.channel_ht, &channel->node);
c77fc10a
DG
759
760end:
6065ceec 761 rcu_read_unlock();
3bd1e081 762 pthread_mutex_unlock(&consumer_data.lock);
702b1ea4 763
7ad0a0cb 764 return 0;
3bd1e081
MD
765}
766
767/*
768 * Allocate the pollfd structure and the local view of the out fds to avoid
769 * doing a lookup in the linked list and concurrency issues when writing is
770 * needed. Called with consumer_data.lock held.
771 *
772 * Returns the number of fds in the structures.
773 */
774int consumer_update_poll_array(
775 struct lttng_consumer_local_data *ctx, struct pollfd **pollfd,
fb3a43a9 776 struct lttng_consumer_stream **local_stream)
3bd1e081 777{
3bd1e081 778 int i = 0;
e4421fec
DG
779 struct lttng_ht_iter iter;
780 struct lttng_consumer_stream *stream;
3bd1e081
MD
781
782 DBG("Updating poll fd array");
481d6c57 783 rcu_read_lock();
e4421fec
DG
784 cds_lfht_for_each_entry(consumer_data.stream_ht->ht, &iter.iter, stream,
785 node.node) {
786 if (stream->state != LTTNG_CONSUMER_ACTIVE_STREAM) {
3bd1e081
MD
787 continue;
788 }
e4421fec
DG
789 DBG("Active FD %d", stream->wait_fd);
790 (*pollfd)[i].fd = stream->wait_fd;
3bd1e081 791 (*pollfd)[i].events = POLLIN | POLLPRI;
e4421fec 792 local_stream[i] = stream;
3bd1e081
MD
793 i++;
794 }
481d6c57 795 rcu_read_unlock();
3bd1e081
MD
796
797 /*
798 * Insert the consumer_poll_pipe at the end of the array and don't
799 * increment i so nb_fd is the number of real FD.
800 */
801 (*pollfd)[i].fd = ctx->consumer_poll_pipe[0];
509bb1cf 802 (*pollfd)[i].events = POLLIN | POLLPRI;
3bd1e081
MD
803 return i;
804}
805
806/*
807 * Poll on the should_quit pipe and the command socket return -1 on error and
808 * should exit, 0 if data is available on the command socket
809 */
810int lttng_consumer_poll_socket(struct pollfd *consumer_sockpoll)
811{
812 int num_rdy;
813
88f2b785 814restart:
3bd1e081
MD
815 num_rdy = poll(consumer_sockpoll, 2, -1);
816 if (num_rdy == -1) {
88f2b785
MD
817 /*
818 * Restart interrupted system call.
819 */
820 if (errno == EINTR) {
821 goto restart;
822 }
3bd1e081
MD
823 perror("Poll error");
824 goto exit;
825 }
509bb1cf 826 if (consumer_sockpoll[0].revents & (POLLIN | POLLPRI)) {
3bd1e081
MD
827 DBG("consumer_should_quit wake up");
828 goto exit;
829 }
830 return 0;
831
832exit:
833 return -1;
834}
835
836/*
837 * Set the error socket.
838 */
839void lttng_consumer_set_error_sock(
840 struct lttng_consumer_local_data *ctx, int sock)
841{
842 ctx->consumer_error_socket = sock;
843}
844
845/*
846 * Set the command socket path.
847 */
3bd1e081
MD
848void lttng_consumer_set_command_sock_path(
849 struct lttng_consumer_local_data *ctx, char *sock)
850{
851 ctx->consumer_command_sock_path = sock;
852}
853
854/*
855 * Send return code to the session daemon.
856 * If the socket is not defined, we return 0, it is not a fatal error
857 */
858int lttng_consumer_send_error(
859 struct lttng_consumer_local_data *ctx, int cmd)
860{
861 if (ctx->consumer_error_socket > 0) {
862 return lttcomm_send_unix_sock(ctx->consumer_error_socket, &cmd,
863 sizeof(enum lttcomm_sessiond_command));
864 }
865
866 return 0;
867}
868
869/*
870 * Close all the tracefiles and stream fds, should be called when all instances
871 * are destroyed.
872 */
873void lttng_consumer_cleanup(void)
874{
e4421fec 875 struct lttng_ht_iter iter;
6065ceec
DG
876 struct lttng_ht_node_ulong *node;
877
878 rcu_read_lock();
3bd1e081
MD
879
880 /*
6065ceec
DG
881 * close all outfd. Called when there are no more threads running (after
882 * joining on the threads), no need to protect list iteration with mutex.
3bd1e081 883 */
6065ceec
DG
884 cds_lfht_for_each_entry(consumer_data.stream_ht->ht, &iter.iter, node,
885 node) {
702b1ea4
MD
886 struct lttng_consumer_stream *stream =
887 caa_container_of(node, struct lttng_consumer_stream, node);
888 consumer_del_stream(stream);
3bd1e081 889 }
e4421fec 890
6065ceec
DG
891 cds_lfht_for_each_entry(consumer_data.channel_ht->ht, &iter.iter, node,
892 node) {
702b1ea4
MD
893 struct lttng_consumer_channel *channel =
894 caa_container_of(node, struct lttng_consumer_channel, node);
895 consumer_del_channel(channel);
3bd1e081 896 }
6065ceec
DG
897
898 rcu_read_unlock();
d6ce1df2
MD
899
900 lttng_ht_destroy(consumer_data.stream_ht);
901 lttng_ht_destroy(consumer_data.channel_ht);
3bd1e081
MD
902}
903
904/*
905 * Called from signal handler.
906 */
907void lttng_consumer_should_exit(struct lttng_consumer_local_data *ctx)
908{
909 int ret;
910 consumer_quit = 1;
6f94560a
MD
911 do {
912 ret = write(ctx->consumer_should_quit[1], "4", 1);
913 } while (ret < 0 && errno == EINTR);
3bd1e081
MD
914 if (ret < 0) {
915 perror("write consumer quit");
916 }
917}
918
00e2e675
DG
919void lttng_consumer_sync_trace_file(struct lttng_consumer_stream *stream,
920 off_t orig_offset)
3bd1e081
MD
921{
922 int outfd = stream->out_fd;
923
924 /*
925 * This does a blocking write-and-wait on any page that belongs to the
926 * subbuffer prior to the one we just wrote.
927 * Don't care about error values, as these are just hints and ways to
928 * limit the amount of page cache used.
929 */
930 if (orig_offset < stream->chan->max_sb_size) {
931 return;
932 }
b9182dd9 933 lttng_sync_file_range(outfd, orig_offset - stream->chan->max_sb_size,
3bd1e081
MD
934 stream->chan->max_sb_size,
935 SYNC_FILE_RANGE_WAIT_BEFORE
936 | SYNC_FILE_RANGE_WRITE
937 | SYNC_FILE_RANGE_WAIT_AFTER);
938 /*
939 * Give hints to the kernel about how we access the file:
940 * POSIX_FADV_DONTNEED : we won't re-access data in a near future after
941 * we write it.
942 *
943 * We need to call fadvise again after the file grows because the
944 * kernel does not seem to apply fadvise to non-existing parts of the
945 * file.
946 *
947 * Call fadvise _after_ having waited for the page writeback to
948 * complete because the dirty page writeback semantic is not well
949 * defined. So it can be expected to lead to lower throughput in
950 * streaming.
951 */
952 posix_fadvise(outfd, orig_offset - stream->chan->max_sb_size,
953 stream->chan->max_sb_size, POSIX_FADV_DONTNEED);
954}
955
956/*
957 * Initialise the necessary environnement :
958 * - create a new context
959 * - create the poll_pipe
960 * - create the should_quit pipe (for signal handler)
961 * - create the thread pipe (for splice)
962 *
963 * Takes a function pointer as argument, this function is called when data is
964 * available on a buffer. This function is responsible to do the
965 * kernctl_get_next_subbuf, read the data with mmap or splice depending on the
966 * buffer configuration and then kernctl_put_next_subbuf at the end.
967 *
968 * Returns a pointer to the new context or NULL on error.
969 */
970struct lttng_consumer_local_data *lttng_consumer_create(
971 enum lttng_consumer_type type,
4078b776 972 ssize_t (*buffer_ready)(struct lttng_consumer_stream *stream,
d41f73b7 973 struct lttng_consumer_local_data *ctx),
3bd1e081
MD
974 int (*recv_channel)(struct lttng_consumer_channel *channel),
975 int (*recv_stream)(struct lttng_consumer_stream *stream),
976 int (*update_stream)(int stream_key, uint32_t state))
977{
978 int ret, i;
979 struct lttng_consumer_local_data *ctx;
980
981 assert(consumer_data.type == LTTNG_CONSUMER_UNKNOWN ||
982 consumer_data.type == type);
983 consumer_data.type = type;
984
effcf122 985 ctx = zmalloc(sizeof(struct lttng_consumer_local_data));
3bd1e081
MD
986 if (ctx == NULL) {
987 perror("allocating context");
988 goto error;
989 }
990
991 ctx->consumer_error_socket = -1;
992 /* assign the callbacks */
993 ctx->on_buffer_ready = buffer_ready;
994 ctx->on_recv_channel = recv_channel;
995 ctx->on_recv_stream = recv_stream;
996 ctx->on_update_stream = update_stream;
997
998 ret = pipe(ctx->consumer_poll_pipe);
999 if (ret < 0) {
1000 perror("Error creating poll pipe");
1001 goto error_poll_pipe;
1002 }
1003
04fdd819
MD
1004 /* set read end of the pipe to non-blocking */
1005 ret = fcntl(ctx->consumer_poll_pipe[0], F_SETFL, O_NONBLOCK);
1006 if (ret < 0) {
1007 perror("fcntl O_NONBLOCK");
1008 goto error_poll_fcntl;
1009 }
1010
1011 /* set write end of the pipe to non-blocking */
1012 ret = fcntl(ctx->consumer_poll_pipe[1], F_SETFL, O_NONBLOCK);
1013 if (ret < 0) {
1014 perror("fcntl O_NONBLOCK");
1015 goto error_poll_fcntl;
1016 }
1017
3bd1e081
MD
1018 ret = pipe(ctx->consumer_should_quit);
1019 if (ret < 0) {
1020 perror("Error creating recv pipe");
1021 goto error_quit_pipe;
1022 }
1023
1024 ret = pipe(ctx->consumer_thread_pipe);
1025 if (ret < 0) {
1026 perror("Error creating thread pipe");
1027 goto error_thread_pipe;
1028 }
1029
fb3a43a9
DG
1030 ret = utils_create_pipe(ctx->consumer_metadata_pipe);
1031 if (ret < 0) {
1032 goto error_metadata_pipe;
1033 }
3bd1e081 1034
fb3a43a9
DG
1035 ret = utils_create_pipe(ctx->consumer_splice_metadata_pipe);
1036 if (ret < 0) {
1037 goto error_splice_pipe;
1038 }
1039
1040 return ctx;
3bd1e081 1041
fb3a43a9
DG
1042error_splice_pipe:
1043 utils_close_pipe(ctx->consumer_metadata_pipe);
1044error_metadata_pipe:
1045 utils_close_pipe(ctx->consumer_thread_pipe);
3bd1e081
MD
1046error_thread_pipe:
1047 for (i = 0; i < 2; i++) {
1048 int err;
1049
1050 err = close(ctx->consumer_should_quit[i]);
4c462e79
MD
1051 if (err) {
1052 PERROR("close");
1053 }
3bd1e081 1054 }
04fdd819 1055error_poll_fcntl:
3bd1e081
MD
1056error_quit_pipe:
1057 for (i = 0; i < 2; i++) {
1058 int err;
1059
1060 err = close(ctx->consumer_poll_pipe[i]);
4c462e79
MD
1061 if (err) {
1062 PERROR("close");
1063 }
3bd1e081
MD
1064 }
1065error_poll_pipe:
1066 free(ctx);
1067error:
1068 return NULL;
1069}
1070
1071/*
1072 * Close all fds associated with the instance and free the context.
1073 */
1074void lttng_consumer_destroy(struct lttng_consumer_local_data *ctx)
1075{
4c462e79
MD
1076 int ret;
1077
1078 ret = close(ctx->consumer_error_socket);
1079 if (ret) {
1080 PERROR("close");
1081 }
1082 ret = close(ctx->consumer_thread_pipe[0]);
1083 if (ret) {
1084 PERROR("close");
1085 }
1086 ret = close(ctx->consumer_thread_pipe[1]);
1087 if (ret) {
1088 PERROR("close");
1089 }
1090 ret = close(ctx->consumer_poll_pipe[0]);
1091 if (ret) {
1092 PERROR("close");
1093 }
1094 ret = close(ctx->consumer_poll_pipe[1]);
1095 if (ret) {
1096 PERROR("close");
1097 }
1098 ret = close(ctx->consumer_should_quit[0]);
1099 if (ret) {
1100 PERROR("close");
1101 }
1102 ret = close(ctx->consumer_should_quit[1]);
1103 if (ret) {
1104 PERROR("close");
1105 }
fb3a43a9
DG
1106 utils_close_pipe(ctx->consumer_splice_metadata_pipe);
1107
3bd1e081
MD
1108 unlink(ctx->consumer_command_sock_path);
1109 free(ctx);
1110}
1111
6197aea7
DG
1112/*
1113 * Write the metadata stream id on the specified file descriptor.
1114 */
1115static int write_relayd_metadata_id(int fd,
1116 struct lttng_consumer_stream *stream,
1d4dfdef
DG
1117 struct consumer_relayd_sock_pair *relayd,
1118 unsigned long padding)
6197aea7
DG
1119{
1120 int ret;
1d4dfdef 1121 struct lttcomm_relayd_metadata_payload hdr;
6197aea7 1122
1d4dfdef
DG
1123 hdr.stream_id = htobe64(stream->relayd_stream_id);
1124 hdr.padding_size = htobe32(padding);
6197aea7 1125 do {
1d4dfdef 1126 ret = write(fd, (void *) &hdr, sizeof(hdr));
6197aea7
DG
1127 } while (ret < 0 && errno == EINTR);
1128 if (ret < 0) {
1129 PERROR("write metadata stream id");
1130 goto end;
1131 }
1d4dfdef
DG
1132 DBG("Metadata stream id %" PRIu64 " with padding %lu written before data",
1133 stream->relayd_stream_id, padding);
6197aea7
DG
1134
1135end:
1136 return ret;
1137}
1138
3bd1e081 1139/*
09e26845
DG
1140 * Mmap the ring buffer, read it and write the data to the tracefile. This is a
1141 * core function for writing trace buffers to either the local filesystem or
1142 * the network.
1143 *
1144 * Careful review MUST be put if any changes occur!
3bd1e081
MD
1145 *
1146 * Returns the number of bytes written
1147 */
4078b776 1148ssize_t lttng_consumer_on_read_subbuffer_mmap(
3bd1e081 1149 struct lttng_consumer_local_data *ctx,
1d4dfdef
DG
1150 struct lttng_consumer_stream *stream, unsigned long len,
1151 unsigned long padding)
3bd1e081 1152{
f02e1e8a
DG
1153 unsigned long mmap_offset;
1154 ssize_t ret = 0, written = 0;
1155 off_t orig_offset = stream->out_fd_offset;
1156 /* Default is on the disk */
1157 int outfd = stream->out_fd;
f02e1e8a
DG
1158 struct consumer_relayd_sock_pair *relayd = NULL;
1159
1160 /* RCU lock for the relayd pointer */
1161 rcu_read_lock();
1162
1163 /* Flag that the current stream if set for network streaming. */
1164 if (stream->net_seq_idx != -1) {
1165 relayd = consumer_find_relayd(stream->net_seq_idx);
1166 if (relayd == NULL) {
1167 goto end;
1168 }
1169 }
1170
1171 /* get the offset inside the fd to mmap */
3bd1e081
MD
1172 switch (consumer_data.type) {
1173 case LTTNG_CONSUMER_KERNEL:
f02e1e8a
DG
1174 ret = kernctl_get_mmap_read_offset(stream->wait_fd, &mmap_offset);
1175 break;
7753dea8
MD
1176 case LTTNG_CONSUMER32_UST:
1177 case LTTNG_CONSUMER64_UST:
f02e1e8a
DG
1178 ret = lttng_ustctl_get_mmap_read_offset(stream->chan->handle,
1179 stream->buf, &mmap_offset);
1180 break;
3bd1e081
MD
1181 default:
1182 ERR("Unknown consumer_data type");
1183 assert(0);
1184 }
f02e1e8a
DG
1185 if (ret != 0) {
1186 errno = -ret;
1187 PERROR("tracer ctl get_mmap_read_offset");
1188 written = ret;
1189 goto end;
1190 }
b9182dd9 1191
f02e1e8a
DG
1192 /* Handle stream on the relayd if the output is on the network */
1193 if (relayd) {
1194 unsigned long netlen = len;
1195
1196 /*
1197 * Lock the control socket for the complete duration of the function
1198 * since from this point on we will use the socket.
1199 */
1200 if (stream->metadata_flag) {
1201 /* Metadata requires the control socket. */
1202 pthread_mutex_lock(&relayd->ctrl_sock_mutex);
1d4dfdef 1203 netlen += sizeof(struct lttcomm_relayd_metadata_payload);
f02e1e8a
DG
1204 }
1205
1d4dfdef 1206 ret = write_relayd_stream_header(stream, netlen, padding, relayd);
f02e1e8a
DG
1207 if (ret >= 0) {
1208 /* Use the returned socket. */
1209 outfd = ret;
1210
1211 /* Write metadata stream id before payload */
1212 if (stream->metadata_flag) {
1d4dfdef 1213 ret = write_relayd_metadata_id(outfd, stream, relayd, padding);
f02e1e8a 1214 if (ret < 0) {
f02e1e8a
DG
1215 written = ret;
1216 goto end;
1217 }
f02e1e8a
DG
1218 }
1219 }
1220 /* Else, use the default set before which is the filesystem. */
1d4dfdef
DG
1221 } else {
1222 /* No streaming, we have to set the len with the full padding */
1223 len += padding;
f02e1e8a
DG
1224 }
1225
1226 while (len > 0) {
1227 do {
1228 ret = write(outfd, stream->mmap_base + mmap_offset, len);
1229 } while (ret < 0 && errno == EINTR);
1d4dfdef 1230 DBG("Consumer mmap write() ret %zd (len %lu)", ret, len);
f02e1e8a
DG
1231 if (ret < 0) {
1232 PERROR("Error in file write");
1233 if (written == 0) {
1234 written = ret;
1235 }
1236 goto end;
1237 } else if (ret > len) {
77c7c900 1238 PERROR("Error in file write (ret %zd > len %lu)", ret, len);
f02e1e8a
DG
1239 written += ret;
1240 goto end;
1241 } else {
1242 len -= ret;
1243 mmap_offset += ret;
1244 }
f02e1e8a
DG
1245
1246 /* This call is useless on a socket so better save a syscall. */
1247 if (!relayd) {
1248 /* This won't block, but will start writeout asynchronously */
1249 lttng_sync_file_range(outfd, stream->out_fd_offset, ret,
1250 SYNC_FILE_RANGE_WRITE);
1251 stream->out_fd_offset += ret;
1252 }
1253 written += ret;
1254 }
1255 lttng_consumer_sync_trace_file(stream, orig_offset);
1256
1257end:
1258 /* Unlock only if ctrl socket used */
1259 if (relayd && stream->metadata_flag) {
1260 pthread_mutex_unlock(&relayd->ctrl_sock_mutex);
1261 }
1262
1263 rcu_read_unlock();
1264 return written;
3bd1e081
MD
1265}
1266
1267/*
1268 * Splice the data from the ring buffer to the tracefile.
1269 *
1270 * Returns the number of bytes spliced.
1271 */
4078b776 1272ssize_t lttng_consumer_on_read_subbuffer_splice(
3bd1e081 1273 struct lttng_consumer_local_data *ctx,
1d4dfdef
DG
1274 struct lttng_consumer_stream *stream, unsigned long len,
1275 unsigned long padding)
3bd1e081 1276{
f02e1e8a
DG
1277 ssize_t ret = 0, written = 0, ret_splice = 0;
1278 loff_t offset = 0;
1279 off_t orig_offset = stream->out_fd_offset;
1280 int fd = stream->wait_fd;
1281 /* Default is on the disk */
1282 int outfd = stream->out_fd;
f02e1e8a 1283 struct consumer_relayd_sock_pair *relayd = NULL;
fb3a43a9 1284 int *splice_pipe;
f02e1e8a 1285
3bd1e081
MD
1286 switch (consumer_data.type) {
1287 case LTTNG_CONSUMER_KERNEL:
f02e1e8a 1288 break;
7753dea8
MD
1289 case LTTNG_CONSUMER32_UST:
1290 case LTTNG_CONSUMER64_UST:
f02e1e8a 1291 /* Not supported for user space tracing */
3bd1e081
MD
1292 return -ENOSYS;
1293 default:
1294 ERR("Unknown consumer_data type");
1295 assert(0);
3bd1e081
MD
1296 }
1297
f02e1e8a
DG
1298 /* RCU lock for the relayd pointer */
1299 rcu_read_lock();
1300
1301 /* Flag that the current stream if set for network streaming. */
1302 if (stream->net_seq_idx != -1) {
1303 relayd = consumer_find_relayd(stream->net_seq_idx);
1304 if (relayd == NULL) {
1305 goto end;
1306 }
1307 }
1308
fb3a43a9
DG
1309 /*
1310 * Choose right pipe for splice. Metadata and trace data are handled by
1311 * different threads hence the use of two pipes in order not to race or
1312 * corrupt the written data.
1313 */
1314 if (stream->metadata_flag) {
1315 splice_pipe = ctx->consumer_splice_metadata_pipe;
1316 } else {
1317 splice_pipe = ctx->consumer_thread_pipe;
1318 }
1319
f02e1e8a 1320 /* Write metadata stream id before payload */
1d4dfdef
DG
1321 if (relayd) {
1322 int total_len = len;
f02e1e8a 1323
1d4dfdef
DG
1324 if (stream->metadata_flag) {
1325 /*
1326 * Lock the control socket for the complete duration of the function
1327 * since from this point on we will use the socket.
1328 */
1329 pthread_mutex_lock(&relayd->ctrl_sock_mutex);
1330
1331 ret = write_relayd_metadata_id(splice_pipe[1], stream, relayd,
1332 padding);
1333 if (ret < 0) {
1334 written = ret;
1335 goto end;
1336 }
1337
1338 total_len += sizeof(struct lttcomm_relayd_metadata_payload);
1339 }
1340
1341 ret = write_relayd_stream_header(stream, total_len, padding, relayd);
1342 if (ret >= 0) {
1343 /* Use the returned socket. */
1344 outfd = ret;
1345 } else {
1346 ERR("Remote relayd disconnected. Stopping");
f02e1e8a
DG
1347 goto end;
1348 }
1d4dfdef
DG
1349 } else {
1350 /* No streaming, we have to set the len with the full padding */
1351 len += padding;
f02e1e8a
DG
1352 }
1353
1354 while (len > 0) {
1d4dfdef
DG
1355 DBG("splice chan to pipe offset %lu of len %lu (fd : %d, pipe: %d)",
1356 (unsigned long)offset, len, fd, splice_pipe[1]);
fb3a43a9 1357 ret_splice = splice(fd, &offset, splice_pipe[1], NULL, len,
f02e1e8a
DG
1358 SPLICE_F_MOVE | SPLICE_F_MORE);
1359 DBG("splice chan to pipe, ret %zd", ret_splice);
1360 if (ret_splice < 0) {
1361 PERROR("Error in relay splice");
1362 if (written == 0) {
1363 written = ret_splice;
1364 }
1365 ret = errno;
1366 goto splice_error;
1367 }
1368
1369 /* Handle stream on the relayd if the output is on the network */
1370 if (relayd) {
1371 if (stream->metadata_flag) {
1d4dfdef
DG
1372 size_t metadata_payload_size =
1373 sizeof(struct lttcomm_relayd_metadata_payload);
1374
f02e1e8a 1375 /* Update counter to fit the spliced data */
1d4dfdef
DG
1376 ret_splice += metadata_payload_size;
1377 len += metadata_payload_size;
f02e1e8a
DG
1378 /*
1379 * We do this so the return value can match the len passed as
1380 * argument to this function.
1381 */
1d4dfdef 1382 written -= metadata_payload_size;
f02e1e8a
DG
1383 }
1384 }
1385
1386 /* Splice data out */
fb3a43a9 1387 ret_splice = splice(splice_pipe[0], NULL, outfd, NULL,
f02e1e8a 1388 ret_splice, SPLICE_F_MOVE | SPLICE_F_MORE);
1d4dfdef 1389 DBG("Consumer splice pipe to file, ret %zd", ret_splice);
f02e1e8a
DG
1390 if (ret_splice < 0) {
1391 PERROR("Error in file splice");
1392 if (written == 0) {
1393 written = ret_splice;
1394 }
1395 ret = errno;
1396 goto splice_error;
1397 } else if (ret_splice > len) {
1398 errno = EINVAL;
1399 PERROR("Wrote more data than requested %zd (len: %lu)",
1400 ret_splice, len);
1401 written += ret_splice;
1402 ret = errno;
1403 goto splice_error;
1404 }
1405 len -= ret_splice;
1406
1407 /* This call is useless on a socket so better save a syscall. */
1408 if (!relayd) {
1409 /* This won't block, but will start writeout asynchronously */
1410 lttng_sync_file_range(outfd, stream->out_fd_offset, ret_splice,
1411 SYNC_FILE_RANGE_WRITE);
1412 stream->out_fd_offset += ret_splice;
1413 }
1414 written += ret_splice;
1415 }
1416 lttng_consumer_sync_trace_file(stream, orig_offset);
1417
1418 ret = ret_splice;
1419
1420 goto end;
1421
1422splice_error:
1423 /* send the appropriate error description to sessiond */
1424 switch (ret) {
1425 case EBADF:
f73fabfd 1426 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_SPLICE_EBADF);
f02e1e8a
DG
1427 break;
1428 case EINVAL:
f73fabfd 1429 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_SPLICE_EINVAL);
f02e1e8a
DG
1430 break;
1431 case ENOMEM:
f73fabfd 1432 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_SPLICE_ENOMEM);
f02e1e8a
DG
1433 break;
1434 case ESPIPE:
f73fabfd 1435 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_SPLICE_ESPIPE);
f02e1e8a
DG
1436 break;
1437 }
1438
1439end:
1440 if (relayd && stream->metadata_flag) {
1441 pthread_mutex_unlock(&relayd->ctrl_sock_mutex);
1442 }
1443
1444 rcu_read_unlock();
1445 return written;
3bd1e081
MD
1446}
1447
1448/*
1449 * Take a snapshot for a specific fd
1450 *
1451 * Returns 0 on success, < 0 on error
1452 */
1453int lttng_consumer_take_snapshot(struct lttng_consumer_local_data *ctx,
1454 struct lttng_consumer_stream *stream)
1455{
1456 switch (consumer_data.type) {
1457 case LTTNG_CONSUMER_KERNEL:
1458 return lttng_kconsumer_take_snapshot(ctx, stream);
7753dea8
MD
1459 case LTTNG_CONSUMER32_UST:
1460 case LTTNG_CONSUMER64_UST:
3bd1e081
MD
1461 return lttng_ustconsumer_take_snapshot(ctx, stream);
1462 default:
1463 ERR("Unknown consumer_data type");
1464 assert(0);
1465 return -ENOSYS;
1466 }
1467
1468}
1469
1470/*
1471 * Get the produced position
1472 *
1473 * Returns 0 on success, < 0 on error
1474 */
1475int lttng_consumer_get_produced_snapshot(
1476 struct lttng_consumer_local_data *ctx,
1477 struct lttng_consumer_stream *stream,
1478 unsigned long *pos)
1479{
1480 switch (consumer_data.type) {
1481 case LTTNG_CONSUMER_KERNEL:
1482 return lttng_kconsumer_get_produced_snapshot(ctx, stream, pos);
7753dea8
MD
1483 case LTTNG_CONSUMER32_UST:
1484 case LTTNG_CONSUMER64_UST:
3bd1e081
MD
1485 return lttng_ustconsumer_get_produced_snapshot(ctx, stream, pos);
1486 default:
1487 ERR("Unknown consumer_data type");
1488 assert(0);
1489 return -ENOSYS;
1490 }
1491}
1492
1493int lttng_consumer_recv_cmd(struct lttng_consumer_local_data *ctx,
1494 int sock, struct pollfd *consumer_sockpoll)
1495{
1496 switch (consumer_data.type) {
1497 case LTTNG_CONSUMER_KERNEL:
1498 return lttng_kconsumer_recv_cmd(ctx, sock, consumer_sockpoll);
7753dea8
MD
1499 case LTTNG_CONSUMER32_UST:
1500 case LTTNG_CONSUMER64_UST:
3bd1e081
MD
1501 return lttng_ustconsumer_recv_cmd(ctx, sock, consumer_sockpoll);
1502 default:
1503 ERR("Unknown consumer_data type");
1504 assert(0);
1505 return -ENOSYS;
1506 }
1507}
1508
fb3a43a9
DG
1509/*
1510 * Iterate over all stream element of the hashtable and free them. This is race
1511 * free since the hashtable received MUST be in a race free synchronization
1512 * state. It's the caller responsability to make sure of that.
1513 */
1514static void destroy_stream_ht(struct lttng_ht *ht)
1515{
1516 int ret;
1517 struct lttng_ht_iter iter;
1518 struct lttng_consumer_stream *stream;
1519
1520 if (ht == NULL) {
1521 return;
1522 }
1523
d09e1200 1524 rcu_read_lock();
fb3a43a9
DG
1525 cds_lfht_for_each_entry(ht->ht, &iter.iter, stream, node.node) {
1526 ret = lttng_ht_del(ht, &iter);
1527 assert(!ret);
1528
1529 free(stream);
1530 }
d09e1200 1531 rcu_read_unlock();
fb3a43a9
DG
1532
1533 lttng_ht_destroy(ht);
1534}
1535
1536/*
1537 * Clean up a metadata stream and free its memory.
1538 */
1539static void consumer_del_metadata_stream(struct lttng_consumer_stream *stream)
1540{
1541 int ret;
fb3a43a9
DG
1542 struct consumer_relayd_sock_pair *relayd;
1543
1544 assert(stream);
1545 /*
1546 * This call should NEVER receive regular stream. It must always be
1547 * metadata stream and this is crucial for data structure synchronization.
1548 */
1549 assert(stream->metadata_flag);
1550
1551 pthread_mutex_lock(&consumer_data.lock);
1552 switch (consumer_data.type) {
1553 case LTTNG_CONSUMER_KERNEL:
1554 if (stream->mmap_base != NULL) {
1555 ret = munmap(stream->mmap_base, stream->mmap_len);
1556 if (ret != 0) {
1557 PERROR("munmap metadata stream");
1558 }
1559 }
1560 break;
1561 case LTTNG_CONSUMER32_UST:
1562 case LTTNG_CONSUMER64_UST:
1563 lttng_ustconsumer_del_stream(stream);
1564 break;
1565 default:
1566 ERR("Unknown consumer_data type");
1567 assert(0);
1568 }
1569 pthread_mutex_unlock(&consumer_data.lock);
1570
1571 if (stream->out_fd >= 0) {
1572 ret = close(stream->out_fd);
1573 if (ret) {
1574 PERROR("close");
1575 }
1576 }
1577
1578 if (stream->wait_fd >= 0 && !stream->wait_fd_is_copy) {
1579 ret = close(stream->wait_fd);
1580 if (ret) {
1581 PERROR("close");
1582 }
1583 }
1584
1585 if (stream->shm_fd >= 0 && stream->wait_fd != stream->shm_fd) {
1586 ret = close(stream->shm_fd);
1587 if (ret) {
1588 PERROR("close");
1589 }
1590 }
1591
1592 /* Check and cleanup relayd */
1593 rcu_read_lock();
1594 relayd = consumer_find_relayd(stream->net_seq_idx);
1595 if (relayd != NULL) {
1596 uatomic_dec(&relayd->refcount);
1597 assert(uatomic_read(&relayd->refcount) >= 0);
1598
1599 /* Closing streams requires to lock the control socket. */
1600 pthread_mutex_lock(&relayd->ctrl_sock_mutex);
1601 ret = relayd_send_close_stream(&relayd->control_sock,
1602 stream->relayd_stream_id, stream->next_net_seq_num - 1);
1603 pthread_mutex_unlock(&relayd->ctrl_sock_mutex);
1604 if (ret < 0) {
1605 DBG("Unable to close stream on the relayd. Continuing");
1606 /*
1607 * Continue here. There is nothing we can do for the relayd.
1608 * Chances are that the relayd has closed the socket so we just
1609 * continue cleaning up.
1610 */
1611 }
1612
1613 /* Both conditions are met, we destroy the relayd. */
1614 if (uatomic_read(&relayd->refcount) == 0 &&
1615 uatomic_read(&relayd->destroy_flag)) {
d09e1200 1616 destroy_relayd(relayd);
fb3a43a9
DG
1617 }
1618 }
1619 rcu_read_unlock();
1620
1621 /* Atomically decrement channel refcount since other threads can use it. */
1622 uatomic_dec(&stream->chan->refcount);
c30aaa51
MD
1623 if (!uatomic_read(&stream->chan->refcount)
1624 && !uatomic_read(&stream->chan->nb_init_streams)) {
1625 /* Go for channel deletion! */
1626 consumer_del_channel(stream->chan);
fb3a43a9
DG
1627 }
1628
1629 free(stream);
1630}
1631
1632/*
1633 * Action done with the metadata stream when adding it to the consumer internal
1634 * data structures to handle it.
1635 */
1636static void consumer_add_metadata_stream(struct lttng_consumer_stream *stream)
1637{
1638 struct consumer_relayd_sock_pair *relayd;
1639
1640 /* Find relayd and, if one is found, increment refcount. */
1641 rcu_read_lock();
1642 relayd = consumer_find_relayd(stream->net_seq_idx);
1643 if (relayd != NULL) {
1644 uatomic_inc(&relayd->refcount);
1645 }
1646 rcu_read_unlock();
1647}
1648
1649/*
1650 * Thread polls on metadata file descriptor and write them on disk or on the
1651 * network.
1652 */
1653void *lttng_consumer_thread_poll_metadata(void *data)
1654{
1655 int ret, i, pollfd;
1656 uint32_t revents, nb_fd;
1657 struct lttng_consumer_stream *stream;
1658 struct lttng_ht_iter iter;
1659 struct lttng_ht_node_ulong *node;
1660 struct lttng_ht *metadata_ht = NULL;
1661 struct lttng_poll_event events;
1662 struct lttng_consumer_local_data *ctx = data;
1663 ssize_t len;
1664
1665 rcu_register_thread();
1666
1667 DBG("Thread metadata poll started");
1668
1669 metadata_ht = lttng_ht_new(0, LTTNG_HT_TYPE_ULONG);
1670 if (metadata_ht == NULL) {
1671 goto end;
1672 }
1673
1674 /* Size is set to 1 for the consumer_metadata pipe */
1675 ret = lttng_poll_create(&events, 2, LTTNG_CLOEXEC);
1676 if (ret < 0) {
1677 ERR("Poll set creation failed");
1678 goto end;
1679 }
1680
1681 ret = lttng_poll_add(&events, ctx->consumer_metadata_pipe[0], LPOLLIN);
1682 if (ret < 0) {
1683 goto end;
1684 }
1685
1686 /* Main loop */
1687 DBG("Metadata main loop started");
1688
1689 while (1) {
1690 lttng_poll_reset(&events);
1691
1692 nb_fd = LTTNG_POLL_GETNB(&events);
1693
1694 /* Only the metadata pipe is set */
1695 if (nb_fd == 0 && consumer_quit == 1) {
1696 goto end;
1697 }
1698
1699restart:
1700 DBG("Metadata poll wait with %d fd(s)", nb_fd);
1701 ret = lttng_poll_wait(&events, -1);
1702 DBG("Metadata event catched in thread");
1703 if (ret < 0) {
1704 if (errno == EINTR) {
1705 goto restart;
1706 }
1707 goto error;
1708 }
1709
1710 for (i = 0; i < nb_fd; i++) {
1711 revents = LTTNG_POLL_GETEV(&events, i);
1712 pollfd = LTTNG_POLL_GETFD(&events, i);
1713
1714 /* Check the metadata pipe for incoming metadata. */
1715 if (pollfd == ctx->consumer_metadata_pipe[0]) {
4adabd61 1716 if (revents & (LPOLLERR | LPOLLHUP )) {
fb3a43a9
DG
1717 DBG("Metadata thread pipe hung up");
1718 /*
1719 * Remove the pipe from the poll set and continue the loop
1720 * since their might be data to consume.
1721 */
1722 lttng_poll_del(&events, ctx->consumer_metadata_pipe[0]);
1723 close(ctx->consumer_metadata_pipe[0]);
1724 continue;
1725 } else if (revents & LPOLLIN) {
fb3a43a9 1726 do {
633d0084
DG
1727 /* Get the stream pointer received */
1728 ret = read(pollfd, &stream, sizeof(stream));
fb3a43a9 1729 } while (ret < 0 && errno == EINTR);
633d0084
DG
1730 if (ret < 0 ||
1731 ret < sizeof(struct lttng_consumer_stream *)) {
fb3a43a9 1732 PERROR("read metadata stream");
fb3a43a9
DG
1733 /*
1734 * Let's continue here and hope we can still work
1735 * without stopping the consumer. XXX: Should we?
1736 */
1737 continue;
1738 }
1739
1740 DBG("Adding metadata stream %d to poll set",
1741 stream->wait_fd);
1742
d09e1200 1743 rcu_read_lock();
fb3a43a9
DG
1744 /* The node should be init at this point */
1745 lttng_ht_add_unique_ulong(metadata_ht,
1746 &stream->waitfd_node);
d09e1200 1747 rcu_read_unlock();
fb3a43a9
DG
1748
1749 /* Add metadata stream to the global poll events list */
1750 lttng_poll_add(&events, stream->wait_fd,
1751 LPOLLIN | LPOLLPRI);
1752
1753 consumer_add_metadata_stream(stream);
1754 }
1755
1756 /* Metadata pipe handled. Continue handling the others */
1757 continue;
1758 }
1759
1760 /* From here, the event is a metadata wait fd */
1761
d09e1200 1762 rcu_read_lock();
fb3a43a9
DG
1763 lttng_ht_lookup(metadata_ht, (void *)((unsigned long) pollfd),
1764 &iter);
1765 node = lttng_ht_iter_get_node_ulong(&iter);
1766 if (node == NULL) {
1767 /* FD not found, continue loop */
d09e1200 1768 rcu_read_unlock();
fb3a43a9
DG
1769 continue;
1770 }
1771
1772 stream = caa_container_of(node, struct lttng_consumer_stream,
1773 waitfd_node);
1774
1775 /* Get the data out of the metadata file descriptor */
1776 if (revents & (LPOLLIN | LPOLLPRI)) {
1777 DBG("Metadata available on fd %d", pollfd);
1778 assert(stream->wait_fd == pollfd);
1779
1780 len = ctx->on_buffer_ready(stream, ctx);
1781 /* It's ok to have an unavailable sub-buffer */
1782 if (len < 0 && len != -EAGAIN) {
d09e1200 1783 rcu_read_unlock();
fb3a43a9
DG
1784 goto end;
1785 } else if (len > 0) {
1786 stream->data_read = 1;
1787 }
1788 }
1789
1790 /*
1791 * Remove the stream from the hash table since there is no data
1792 * left on the fd because we previously did a read on the buffer.
1793 */
4adabd61 1794 if (revents & (LPOLLERR | LPOLLHUP)) {
fb3a43a9
DG
1795 DBG("Metadata fd %d is hup|err|nval.", pollfd);
1796 if (!stream->hangup_flush_done
1797 && (consumer_data.type == LTTNG_CONSUMER32_UST
1798 || consumer_data.type == LTTNG_CONSUMER64_UST)) {
1799 DBG("Attempting to flush and consume the UST buffers");
1800 lttng_ustconsumer_on_stream_hangup(stream);
1801
1802 /* We just flushed the stream now read it. */
1803 len = ctx->on_buffer_ready(stream, ctx);
1804 /* It's ok to have an unavailable sub-buffer */
1805 if (len < 0 && len != -EAGAIN) {
d09e1200 1806 rcu_read_unlock();
fb3a43a9
DG
1807 goto end;
1808 }
1809 }
1810
1811 /* Removing it from hash table, poll set and free memory */
1812 lttng_ht_del(metadata_ht, &iter);
d09e1200 1813
fb3a43a9
DG
1814 lttng_poll_del(&events, stream->wait_fd);
1815 consumer_del_metadata_stream(stream);
1816 }
d09e1200 1817 rcu_read_unlock();
fb3a43a9
DG
1818 }
1819 }
1820
1821error:
1822end:
1823 DBG("Metadata poll thread exiting");
1824 lttng_poll_clean(&events);
1825
1826 if (metadata_ht) {
1827 destroy_stream_ht(metadata_ht);
1828 }
1829
1830 rcu_unregister_thread();
1831 return NULL;
1832}
1833
3bd1e081 1834/*
e4421fec 1835 * This thread polls the fds in the set to consume the data and write
3bd1e081
MD
1836 * it to tracefile if necessary.
1837 */
1838void *lttng_consumer_thread_poll_fds(void *data)
1839{
1840 int num_rdy, num_hup, high_prio, ret, i;
1841 struct pollfd *pollfd = NULL;
1842 /* local view of the streams */
1843 struct lttng_consumer_stream **local_stream = NULL;
1844 /* local view of consumer_data.fds_count */
1845 int nb_fd = 0;
3bd1e081 1846 struct lttng_consumer_local_data *ctx = data;
00e2e675 1847 ssize_t len;
fb3a43a9
DG
1848 pthread_t metadata_thread;
1849 void *status;
3bd1e081 1850
e7b994a3
DG
1851 rcu_register_thread();
1852
fb3a43a9
DG
1853 /* Start metadata polling thread */
1854 ret = pthread_create(&metadata_thread, NULL,
1855 lttng_consumer_thread_poll_metadata, (void *) ctx);
1856 if (ret < 0) {
1857 PERROR("pthread_create metadata thread");
1858 goto end;
1859 }
1860
effcf122 1861 local_stream = zmalloc(sizeof(struct lttng_consumer_stream));
3bd1e081
MD
1862
1863 while (1) {
1864 high_prio = 0;
1865 num_hup = 0;
1866
1867 /*
e4421fec 1868 * the fds set has been updated, we need to update our
3bd1e081
MD
1869 * local array as well
1870 */
1871 pthread_mutex_lock(&consumer_data.lock);
1872 if (consumer_data.need_update) {
1873 if (pollfd != NULL) {
1874 free(pollfd);
1875 pollfd = NULL;
1876 }
1877 if (local_stream != NULL) {
1878 free(local_stream);
1879 local_stream = NULL;
1880 }
1881
1882 /* allocate for all fds + 1 for the consumer_poll_pipe */
effcf122 1883 pollfd = zmalloc((consumer_data.stream_count + 1) * sizeof(struct pollfd));
3bd1e081
MD
1884 if (pollfd == NULL) {
1885 perror("pollfd malloc");
1886 pthread_mutex_unlock(&consumer_data.lock);
1887 goto end;
1888 }
1889
1890 /* allocate for all fds + 1 for the consumer_poll_pipe */
effcf122 1891 local_stream = zmalloc((consumer_data.stream_count + 1) *
3bd1e081
MD
1892 sizeof(struct lttng_consumer_stream));
1893 if (local_stream == NULL) {
1894 perror("local_stream malloc");
1895 pthread_mutex_unlock(&consumer_data.lock);
1896 goto end;
1897 }
fb3a43a9 1898 ret = consumer_update_poll_array(ctx, &pollfd, local_stream);
3bd1e081
MD
1899 if (ret < 0) {
1900 ERR("Error in allocating pollfd or local_outfds");
f73fabfd 1901 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_POLL_ERROR);
3bd1e081
MD
1902 pthread_mutex_unlock(&consumer_data.lock);
1903 goto end;
1904 }
1905 nb_fd = ret;
1906 consumer_data.need_update = 0;
1907 }
1908 pthread_mutex_unlock(&consumer_data.lock);
1909
4078b776
MD
1910 /* No FDs and consumer_quit, consumer_cleanup the thread */
1911 if (nb_fd == 0 && consumer_quit == 1) {
1912 goto end;
1913 }
3bd1e081 1914 /* poll on the array of fds */
88f2b785 1915 restart:
3bd1e081
MD
1916 DBG("polling on %d fd", nb_fd + 1);
1917 num_rdy = poll(pollfd, nb_fd + 1, consumer_poll_timeout);
1918 DBG("poll num_rdy : %d", num_rdy);
1919 if (num_rdy == -1) {
88f2b785
MD
1920 /*
1921 * Restart interrupted system call.
1922 */
1923 if (errno == EINTR) {
1924 goto restart;
1925 }
3bd1e081 1926 perror("Poll error");
f73fabfd 1927 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_POLL_ERROR);
3bd1e081
MD
1928 goto end;
1929 } else if (num_rdy == 0) {
1930 DBG("Polling thread timed out");
1931 goto end;
1932 }
1933
3bd1e081 1934 /*
00e2e675
DG
1935 * If the consumer_poll_pipe triggered poll go directly to the
1936 * beginning of the loop to update the array. We want to prioritize
1937 * array update over low-priority reads.
3bd1e081 1938 */
509bb1cf 1939 if (pollfd[nb_fd].revents & (POLLIN | POLLPRI)) {
04fdd819
MD
1940 size_t pipe_readlen;
1941 char tmp;
1942
3bd1e081 1943 DBG("consumer_poll_pipe wake up");
04fdd819
MD
1944 /* Consume 1 byte of pipe data */
1945 do {
1946 pipe_readlen = read(ctx->consumer_poll_pipe[0], &tmp, 1);
1947 } while (pipe_readlen == -1 && errno == EINTR);
3bd1e081
MD
1948 continue;
1949 }
1950
1951 /* Take care of high priority channels first. */
1952 for (i = 0; i < nb_fd; i++) {
fb3a43a9 1953 if (pollfd[i].revents & POLLPRI) {
d41f73b7
MD
1954 DBG("Urgent read on fd %d", pollfd[i].fd);
1955 high_prio = 1;
4078b776 1956 len = ctx->on_buffer_ready(local_stream[i], ctx);
d41f73b7 1957 /* it's ok to have an unavailable sub-buffer */
4078b776
MD
1958 if (len < 0 && len != -EAGAIN) {
1959 goto end;
1960 } else if (len > 0) {
1961 local_stream[i]->data_read = 1;
d41f73b7 1962 }
3bd1e081
MD
1963 }
1964 }
1965
4078b776
MD
1966 /*
1967 * If we read high prio channel in this loop, try again
1968 * for more high prio data.
1969 */
1970 if (high_prio) {
3bd1e081
MD
1971 continue;
1972 }
1973
1974 /* Take care of low priority channels. */
4078b776
MD
1975 for (i = 0; i < nb_fd; i++) {
1976 if ((pollfd[i].revents & POLLIN) ||
1977 local_stream[i]->hangup_flush_done) {
4078b776
MD
1978 DBG("Normal read on fd %d", pollfd[i].fd);
1979 len = ctx->on_buffer_ready(local_stream[i], ctx);
1980 /* it's ok to have an unavailable sub-buffer */
1981 if (len < 0 && len != -EAGAIN) {
1982 goto end;
1983 } else if (len > 0) {
1984 local_stream[i]->data_read = 1;
1985 }
1986 }
1987 }
1988
1989 /* Handle hangup and errors */
1990 for (i = 0; i < nb_fd; i++) {
1991 if (!local_stream[i]->hangup_flush_done
1992 && (pollfd[i].revents & (POLLHUP | POLLERR | POLLNVAL))
1993 && (consumer_data.type == LTTNG_CONSUMER32_UST
1994 || consumer_data.type == LTTNG_CONSUMER64_UST)) {
1995 DBG("fd %d is hup|err|nval. Attempting flush and read.",
1996 pollfd[i].fd);
1997 lttng_ustconsumer_on_stream_hangup(local_stream[i]);
1998 /* Attempt read again, for the data we just flushed. */
1999 local_stream[i]->data_read = 1;
2000 }
2001 /*
2002 * If the poll flag is HUP/ERR/NVAL and we have
2003 * read no data in this pass, we can remove the
2004 * stream from its hash table.
2005 */
2006 if ((pollfd[i].revents & POLLHUP)) {
2007 DBG("Polling fd %d tells it has hung up.", pollfd[i].fd);
2008 if (!local_stream[i]->data_read) {
702b1ea4 2009 consumer_del_stream(local_stream[i]);
4078b776
MD
2010 num_hup++;
2011 }
2012 } else if (pollfd[i].revents & POLLERR) {
2013 ERR("Error returned in polling fd %d.", pollfd[i].fd);
2014 if (!local_stream[i]->data_read) {
702b1ea4 2015 consumer_del_stream(local_stream[i]);
4078b776
MD
2016 num_hup++;
2017 }
2018 } else if (pollfd[i].revents & POLLNVAL) {
2019 ERR("Polling fd %d tells fd is not open.", pollfd[i].fd);
2020 if (!local_stream[i]->data_read) {
702b1ea4 2021 consumer_del_stream(local_stream[i]);
4078b776 2022 num_hup++;
3bd1e081
MD
2023 }
2024 }
4078b776 2025 local_stream[i]->data_read = 0;
3bd1e081
MD
2026 }
2027 }
2028end:
2029 DBG("polling thread exiting");
2030 if (pollfd != NULL) {
2031 free(pollfd);
2032 pollfd = NULL;
2033 }
2034 if (local_stream != NULL) {
2035 free(local_stream);
2036 local_stream = NULL;
2037 }
fb3a43a9
DG
2038
2039 /*
2040 * Close the write side of the pipe so epoll_wait() in
2041 * lttng_consumer_thread_poll_metadata can catch it. The thread is
2042 * monitoring the read side of the pipe. If we close them both, epoll_wait
2043 * strangely does not return and could create a endless wait period if the
2044 * pipe is the only tracked fd in the poll set. The thread will take care
2045 * of closing the read side.
2046 */
2047 close(ctx->consumer_metadata_pipe[1]);
2048 if (ret) {
2049 ret = pthread_join(metadata_thread, &status);
2050 if (ret < 0) {
2051 PERROR("pthread_join metadata thread");
2052 }
2053 }
2054
e7b994a3 2055 rcu_unregister_thread();
3bd1e081
MD
2056 return NULL;
2057}
2058
2059/*
2060 * This thread listens on the consumerd socket and receives the file
2061 * descriptors from the session daemon.
2062 */
2063void *lttng_consumer_thread_receive_fds(void *data)
2064{
2065 int sock, client_socket, ret;
2066 /*
2067 * structure to poll for incoming data on communication socket avoids
2068 * making blocking sockets.
2069 */
2070 struct pollfd consumer_sockpoll[2];
2071 struct lttng_consumer_local_data *ctx = data;
2072
e7b994a3
DG
2073 rcu_register_thread();
2074
3bd1e081
MD
2075 DBG("Creating command socket %s", ctx->consumer_command_sock_path);
2076 unlink(ctx->consumer_command_sock_path);
2077 client_socket = lttcomm_create_unix_sock(ctx->consumer_command_sock_path);
2078 if (client_socket < 0) {
2079 ERR("Cannot create command socket");
2080 goto end;
2081 }
2082
2083 ret = lttcomm_listen_unix_sock(client_socket);
2084 if (ret < 0) {
2085 goto end;
2086 }
2087
32258573 2088 DBG("Sending ready command to lttng-sessiond");
f73fabfd 2089 ret = lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_COMMAND_SOCK_READY);
3bd1e081
MD
2090 /* return < 0 on error, but == 0 is not fatal */
2091 if (ret < 0) {
32258573 2092 ERR("Error sending ready command to lttng-sessiond");
3bd1e081
MD
2093 goto end;
2094 }
2095
2096 ret = fcntl(client_socket, F_SETFL, O_NONBLOCK);
2097 if (ret < 0) {
2098 perror("fcntl O_NONBLOCK");
2099 goto end;
2100 }
2101
2102 /* prepare the FDs to poll : to client socket and the should_quit pipe */
2103 consumer_sockpoll[0].fd = ctx->consumer_should_quit[0];
2104 consumer_sockpoll[0].events = POLLIN | POLLPRI;
2105 consumer_sockpoll[1].fd = client_socket;
2106 consumer_sockpoll[1].events = POLLIN | POLLPRI;
2107
2108 if (lttng_consumer_poll_socket(consumer_sockpoll) < 0) {
2109 goto end;
2110 }
2111 DBG("Connection on client_socket");
2112
2113 /* Blocking call, waiting for transmission */
2114 sock = lttcomm_accept_unix_sock(client_socket);
2115 if (sock <= 0) {
2116 WARN("On accept");
2117 goto end;
2118 }
2119 ret = fcntl(sock, F_SETFL, O_NONBLOCK);
2120 if (ret < 0) {
2121 perror("fcntl O_NONBLOCK");
2122 goto end;
2123 }
2124
2125 /* update the polling structure to poll on the established socket */
2126 consumer_sockpoll[1].fd = sock;
2127 consumer_sockpoll[1].events = POLLIN | POLLPRI;
2128
2129 while (1) {
2130 if (lttng_consumer_poll_socket(consumer_sockpoll) < 0) {
2131 goto end;
2132 }
2133 DBG("Incoming command on sock");
2134 ret = lttng_consumer_recv_cmd(ctx, sock, consumer_sockpoll);
2135 if (ret == -ENOENT) {
2136 DBG("Received STOP command");
2137 goto end;
2138 }
4cbc1a04
DG
2139 if (ret <= 0) {
2140 /*
2141 * This could simply be a session daemon quitting. Don't output
2142 * ERR() here.
2143 */
2144 DBG("Communication interrupted on command socket");
3bd1e081
MD
2145 goto end;
2146 }
2147 if (consumer_quit) {
2148 DBG("consumer_thread_receive_fds received quit from signal");
2149 goto end;
2150 }
2151 DBG("received fds on sock");
2152 }
2153end:
2154 DBG("consumer_thread_receive_fds exiting");
2155
2156 /*
2157 * when all fds have hung up, the polling thread
2158 * can exit cleanly
2159 */
2160 consumer_quit = 1;
2161
2162 /*
2163 * 2s of grace period, if no polling events occur during
2164 * this period, the polling thread will exit even if there
2165 * are still open FDs (should not happen, but safety mechanism).
2166 */
2167 consumer_poll_timeout = LTTNG_CONSUMER_POLL_TIMEOUT;
2168
04fdd819
MD
2169 /*
2170 * Wake-up the other end by writing a null byte in the pipe
2171 * (non-blocking). Important note: Because writing into the
2172 * pipe is non-blocking (and therefore we allow dropping wakeup
2173 * data, as long as there is wakeup data present in the pipe
2174 * buffer to wake up the other end), the other end should
2175 * perform the following sequence for waiting:
2176 * 1) empty the pipe (reads).
2177 * 2) perform update operation.
2178 * 3) wait on the pipe (poll).
2179 */
2180 do {
2181 ret = write(ctx->consumer_poll_pipe[1], "", 1);
6f94560a 2182 } while (ret < 0 && errno == EINTR);
e7b994a3 2183 rcu_unregister_thread();
3bd1e081
MD
2184 return NULL;
2185}
d41f73b7 2186
4078b776 2187ssize_t lttng_consumer_read_subbuffer(struct lttng_consumer_stream *stream,
d41f73b7
MD
2188 struct lttng_consumer_local_data *ctx)
2189{
2190 switch (consumer_data.type) {
2191 case LTTNG_CONSUMER_KERNEL:
2192 return lttng_kconsumer_read_subbuffer(stream, ctx);
7753dea8
MD
2193 case LTTNG_CONSUMER32_UST:
2194 case LTTNG_CONSUMER64_UST:
d41f73b7
MD
2195 return lttng_ustconsumer_read_subbuffer(stream, ctx);
2196 default:
2197 ERR("Unknown consumer_data type");
2198 assert(0);
2199 return -ENOSYS;
2200 }
2201}
2202
2203int lttng_consumer_on_recv_stream(struct lttng_consumer_stream *stream)
2204{
2205 switch (consumer_data.type) {
2206 case LTTNG_CONSUMER_KERNEL:
2207 return lttng_kconsumer_on_recv_stream(stream);
7753dea8
MD
2208 case LTTNG_CONSUMER32_UST:
2209 case LTTNG_CONSUMER64_UST:
d41f73b7
MD
2210 return lttng_ustconsumer_on_recv_stream(stream);
2211 default:
2212 ERR("Unknown consumer_data type");
2213 assert(0);
2214 return -ENOSYS;
2215 }
2216}
e4421fec
DG
2217
2218/*
2219 * Allocate and set consumer data hash tables.
2220 */
2221void lttng_consumer_init(void)
2222{
2223 consumer_data.stream_ht = lttng_ht_new(0, LTTNG_HT_TYPE_ULONG);
2224 consumer_data.channel_ht = lttng_ht_new(0, LTTNG_HT_TYPE_ULONG);
00e2e675 2225 consumer_data.relayd_ht = lttng_ht_new(0, LTTNG_HT_TYPE_ULONG);
e4421fec 2226}
7735ef9e
DG
2227
2228/*
2229 * Process the ADD_RELAYD command receive by a consumer.
2230 *
2231 * This will create a relayd socket pair and add it to the relayd hash table.
2232 * The caller MUST acquire a RCU read side lock before calling it.
2233 */
2234int consumer_add_relayd_socket(int net_seq_idx, int sock_type,
2235 struct lttng_consumer_local_data *ctx, int sock,
2236 struct pollfd *consumer_sockpoll, struct lttcomm_sock *relayd_sock)
2237{
2238 int fd, ret = -1;
2239 struct consumer_relayd_sock_pair *relayd;
2240
2241 DBG("Consumer adding relayd socket (idx: %d)", net_seq_idx);
2242
2243 /* Get relayd reference if exists. */
2244 relayd = consumer_find_relayd(net_seq_idx);
2245 if (relayd == NULL) {
2246 /* Not found. Allocate one. */
2247 relayd = consumer_allocate_relayd_sock_pair(net_seq_idx);
2248 if (relayd == NULL) {
f73fabfd 2249 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_OUTFD_ERROR);
7735ef9e
DG
2250 goto error;
2251 }
2252 }
2253
2254 /* Poll on consumer socket. */
2255 if (lttng_consumer_poll_socket(consumer_sockpoll) < 0) {
2256 ret = -EINTR;
2257 goto error;
2258 }
2259
2260 /* Get relayd socket from session daemon */
2261 ret = lttcomm_recv_fds_unix_sock(sock, &fd, 1);
2262 if (ret != sizeof(fd)) {
f73fabfd 2263 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_ERROR_RECV_FD);
7735ef9e
DG
2264 ret = -1;
2265 goto error;
2266 }
2267
2268 /* Copy socket information and received FD */
2269 switch (sock_type) {
2270 case LTTNG_STREAM_CONTROL:
2271 /* Copy received lttcomm socket */
2272 lttcomm_copy_sock(&relayd->control_sock, relayd_sock);
2273 ret = lttcomm_create_sock(&relayd->control_sock);
2274 if (ret < 0) {
2275 goto error;
2276 }
2277
2278 /* Close the created socket fd which is useless */
2279 close(relayd->control_sock.fd);
2280
2281 /* Assign new file descriptor */
2282 relayd->control_sock.fd = fd;
2283 break;
2284 case LTTNG_STREAM_DATA:
2285 /* Copy received lttcomm socket */
2286 lttcomm_copy_sock(&relayd->data_sock, relayd_sock);
2287 ret = lttcomm_create_sock(&relayd->data_sock);
2288 if (ret < 0) {
2289 goto error;
2290 }
2291
2292 /* Close the created socket fd which is useless */
2293 close(relayd->data_sock.fd);
2294
2295 /* Assign new file descriptor */
2296 relayd->data_sock.fd = fd;
2297 break;
2298 default:
2299 ERR("Unknown relayd socket type (%d)", sock_type);
2300 goto error;
2301 }
2302
2303 DBG("Consumer %s socket created successfully with net idx %d (fd: %d)",
2304 sock_type == LTTNG_STREAM_CONTROL ? "control" : "data",
2305 relayd->net_seq_idx, fd);
2306
2307 /*
2308 * Add relayd socket pair to consumer data hashtable. If object already
2309 * exists or on error, the function gracefully returns.
2310 */
d09e1200 2311 add_relayd(relayd);
7735ef9e
DG
2312
2313 /* All good! */
2314 ret = 0;
2315
2316error:
2317 return ret;
2318}
This page took 0.140083 seconds and 5 git commands to generate.