Merge mmap/splice fct. for both consumers
[lttng-tools.git] / src / common / consumer.c
CommitLineData
3bd1e081
MD
1/*
2 * Copyright (C) 2011 - Julien Desfossez <julien.desfossez@polymtl.ca>
3 * Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
00e2e675 4 * 2012 - David Goulet <dgoulet@efficios.com>
3bd1e081 5 *
d14d33bf
AM
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2 only,
8 * as published by the Free Software Foundation.
3bd1e081 9 *
d14d33bf
AM
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
3bd1e081 14 *
d14d33bf
AM
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
3bd1e081
MD
18 */
19
20#define _GNU_SOURCE
21#include <assert.h>
3bd1e081
MD
22#include <poll.h>
23#include <pthread.h>
24#include <stdlib.h>
25#include <string.h>
26#include <sys/mman.h>
27#include <sys/socket.h>
28#include <sys/types.h>
29#include <unistd.h>
30
990570ed 31#include <common/common.h>
10a8a223 32#include <common/kernel-ctl/kernel-ctl.h>
00e2e675 33#include <common/sessiond-comm/relayd.h>
10a8a223
DG
34#include <common/sessiond-comm/sessiond-comm.h>
35#include <common/kernel-consumer/kernel-consumer.h>
00e2e675 36#include <common/relayd/relayd.h>
10a8a223
DG
37#include <common/ust-consumer/ust-consumer.h>
38
39#include "consumer.h"
3bd1e081
MD
40
41struct lttng_consumer_global_data consumer_data = {
3bd1e081
MD
42 .stream_count = 0,
43 .need_update = 1,
44 .type = LTTNG_CONSUMER_UNKNOWN,
45};
46
47/* timeout parameter, to control the polling thread grace period. */
48int consumer_poll_timeout = -1;
49
50/*
51 * Flag to inform the polling thread to quit when all fd hung up. Updated by
52 * the consumer_thread_receive_fds when it notices that all fds has hung up.
53 * Also updated by the signal handler (consumer_should_exit()). Read by the
54 * polling threads.
55 */
56volatile int consumer_quit = 0;
57
58/*
59 * Find a stream. The consumer_data.lock must be locked during this
60 * call.
61 */
62static struct lttng_consumer_stream *consumer_find_stream(int key)
63{
e4421fec
DG
64 struct lttng_ht_iter iter;
65 struct lttng_ht_node_ulong *node;
66 struct lttng_consumer_stream *stream = NULL;
3bd1e081 67
7ad0a0cb
MD
68 /* Negative keys are lookup failures */
69 if (key < 0)
70 return NULL;
e4421fec 71
6065ceec
DG
72 rcu_read_lock();
73
e4421fec
DG
74 lttng_ht_lookup(consumer_data.stream_ht, (void *)((unsigned long) key),
75 &iter);
76 node = lttng_ht_iter_get_node_ulong(&iter);
77 if (node != NULL) {
78 stream = caa_container_of(node, struct lttng_consumer_stream, node);
3bd1e081 79 }
e4421fec 80
6065ceec
DG
81 rcu_read_unlock();
82
e4421fec 83 return stream;
3bd1e081
MD
84}
85
7ad0a0cb
MD
86static void consumer_steal_stream_key(int key)
87{
88 struct lttng_consumer_stream *stream;
89
04253271 90 rcu_read_lock();
7ad0a0cb 91 stream = consumer_find_stream(key);
04253271 92 if (stream) {
7ad0a0cb 93 stream->key = -1;
04253271
MD
94 /*
95 * We don't want the lookup to match, but we still need
96 * to iterate on this stream when iterating over the hash table. Just
97 * change the node key.
98 */
99 stream->node.key = -1;
100 }
101 rcu_read_unlock();
7ad0a0cb
MD
102}
103
3bd1e081
MD
104static struct lttng_consumer_channel *consumer_find_channel(int key)
105{
e4421fec
DG
106 struct lttng_ht_iter iter;
107 struct lttng_ht_node_ulong *node;
108 struct lttng_consumer_channel *channel = NULL;
3bd1e081 109
7ad0a0cb
MD
110 /* Negative keys are lookup failures */
111 if (key < 0)
112 return NULL;
e4421fec 113
6065ceec
DG
114 rcu_read_lock();
115
e4421fec
DG
116 lttng_ht_lookup(consumer_data.channel_ht, (void *)((unsigned long) key),
117 &iter);
118 node = lttng_ht_iter_get_node_ulong(&iter);
119 if (node != NULL) {
120 channel = caa_container_of(node, struct lttng_consumer_channel, node);
3bd1e081 121 }
e4421fec 122
6065ceec
DG
123 rcu_read_unlock();
124
e4421fec 125 return channel;
3bd1e081
MD
126}
127
7ad0a0cb
MD
128static void consumer_steal_channel_key(int key)
129{
130 struct lttng_consumer_channel *channel;
131
04253271 132 rcu_read_lock();
7ad0a0cb 133 channel = consumer_find_channel(key);
04253271 134 if (channel) {
7ad0a0cb 135 channel->key = -1;
04253271
MD
136 /*
137 * We don't want the lookup to match, but we still need
138 * to iterate on this channel when iterating over the hash table. Just
139 * change the node key.
140 */
141 channel->node.key = -1;
142 }
143 rcu_read_unlock();
7ad0a0cb
MD
144}
145
702b1ea4
MD
146static
147void consumer_free_stream(struct rcu_head *head)
148{
149 struct lttng_ht_node_ulong *node =
150 caa_container_of(head, struct lttng_ht_node_ulong, head);
151 struct lttng_consumer_stream *stream =
152 caa_container_of(node, struct lttng_consumer_stream, node);
153
154 free(stream);
155}
156
00e2e675
DG
157/*
158 * RCU protected relayd socket pair free.
159 */
160static void consumer_rcu_free_relayd(struct rcu_head *head)
161{
162 struct lttng_ht_node_ulong *node =
163 caa_container_of(head, struct lttng_ht_node_ulong, head);
164 struct consumer_relayd_sock_pair *relayd =
165 caa_container_of(node, struct consumer_relayd_sock_pair, node);
166
167 free(relayd);
168}
169
170/*
171 * Destroy and free relayd socket pair object.
172 *
173 * This function MUST be called with the consumer_data lock acquired.
174 */
175void consumer_destroy_relayd(struct consumer_relayd_sock_pair *relayd)
176{
177 int ret;
178 struct lttng_ht_iter iter;
179
173af62f
DG
180 if (relayd == NULL) {
181 return;
182 }
183
00e2e675
DG
184 DBG("Consumer destroy and close relayd socket pair");
185
186 iter.iter.node = &relayd->node.node;
187 ret = lttng_ht_del(consumer_data.relayd_ht, &iter);
173af62f
DG
188 if (ret != 0) {
189 /* We assume the relayd was already destroyed */
190 return;
191 }
00e2e675
DG
192
193 /* Close all sockets */
194 pthread_mutex_lock(&relayd->ctrl_sock_mutex);
195 (void) relayd_close(&relayd->control_sock);
196 pthread_mutex_unlock(&relayd->ctrl_sock_mutex);
197 (void) relayd_close(&relayd->data_sock);
198
199 /* RCU free() call */
200 call_rcu(&relayd->node.head, consumer_rcu_free_relayd);
201}
202
3bd1e081
MD
203/*
204 * Remove a stream from the global list protected by a mutex. This
205 * function is also responsible for freeing its data structures.
206 */
207void consumer_del_stream(struct lttng_consumer_stream *stream)
208{
209 int ret;
e4421fec 210 struct lttng_ht_iter iter;
3bd1e081 211 struct lttng_consumer_channel *free_chan = NULL;
00e2e675
DG
212 struct consumer_relayd_sock_pair *relayd;
213
214 assert(stream);
3bd1e081
MD
215
216 pthread_mutex_lock(&consumer_data.lock);
217
218 switch (consumer_data.type) {
219 case LTTNG_CONSUMER_KERNEL:
220 if (stream->mmap_base != NULL) {
221 ret = munmap(stream->mmap_base, stream->mmap_len);
222 if (ret != 0) {
223 perror("munmap");
224 }
225 }
226 break;
7753dea8
MD
227 case LTTNG_CONSUMER32_UST:
228 case LTTNG_CONSUMER64_UST:
3bd1e081
MD
229 lttng_ustconsumer_del_stream(stream);
230 break;
231 default:
232 ERR("Unknown consumer_data type");
233 assert(0);
234 goto end;
235 }
236
6065ceec 237 rcu_read_lock();
04253271
MD
238 iter.iter.node = &stream->node.node;
239 ret = lttng_ht_del(consumer_data.stream_ht, &iter);
240 assert(!ret);
e4421fec 241
6065ceec
DG
242 rcu_read_unlock();
243
3bd1e081
MD
244 if (consumer_data.stream_count <= 0) {
245 goto end;
246 }
247 consumer_data.stream_count--;
248 if (!stream) {
249 goto end;
250 }
251 if (stream->out_fd >= 0) {
4c462e79
MD
252 ret = close(stream->out_fd);
253 if (ret) {
254 PERROR("close");
255 }
3bd1e081 256 }
b5c5fc29 257 if (stream->wait_fd >= 0 && !stream->wait_fd_is_copy) {
4c462e79
MD
258 ret = close(stream->wait_fd);
259 if (ret) {
260 PERROR("close");
261 }
3bd1e081 262 }
2c1dd183 263 if (stream->shm_fd >= 0 && stream->wait_fd != stream->shm_fd) {
4c462e79
MD
264 ret = close(stream->shm_fd);
265 if (ret) {
266 PERROR("close");
267 }
3bd1e081 268 }
00e2e675
DG
269
270 /* Check and cleanup relayd */
b0b335c8 271 rcu_read_lock();
00e2e675
DG
272 relayd = consumer_find_relayd(stream->net_seq_idx);
273 if (relayd != NULL) {
b0b335c8
MD
274 uatomic_dec(&relayd->refcount);
275 assert(uatomic_read(&relayd->refcount) >= 0);
173af62f
DG
276
277 ret = relayd_send_close_stream(&relayd->control_sock,
278 stream->relayd_stream_id,
279 stream->next_net_seq_num - 1);
280 if (ret < 0) {
281 ERR("Unable to close stream on the relayd. Continuing");
282 /* Continue here. There is nothing we can do for the relayd.*/
283 }
284
285 /* Both conditions are met, we destroy the relayd. */
286 if (uatomic_read(&relayd->refcount) == 0 &&
287 uatomic_read(&relayd->destroy_flag)) {
00e2e675
DG
288 consumer_destroy_relayd(relayd);
289 }
00e2e675 290 }
b0b335c8 291 rcu_read_unlock();
00e2e675
DG
292
293 if (!--stream->chan->refcount) {
3bd1e081 294 free_chan = stream->chan;
00e2e675
DG
295 }
296
702b1ea4
MD
297
298 call_rcu(&stream->node.head, consumer_free_stream);
3bd1e081
MD
299end:
300 consumer_data.need_update = 1;
301 pthread_mutex_unlock(&consumer_data.lock);
302
303 if (free_chan)
304 consumer_del_channel(free_chan);
305}
306
307struct lttng_consumer_stream *consumer_allocate_stream(
308 int channel_key, int stream_key,
309 int shm_fd, int wait_fd,
310 enum lttng_consumer_stream_state state,
311 uint64_t mmap_len,
312 enum lttng_event_output output,
6df2e2c9
MD
313 const char *path_name,
314 uid_t uid,
00e2e675
DG
315 gid_t gid,
316 int net_index,
317 int metadata_flag)
3bd1e081
MD
318{
319 struct lttng_consumer_stream *stream;
320 int ret;
321
effcf122 322 stream = zmalloc(sizeof(*stream));
3bd1e081
MD
323 if (stream == NULL) {
324 perror("malloc struct lttng_consumer_stream");
325 goto end;
326 }
327 stream->chan = consumer_find_channel(channel_key);
328 if (!stream->chan) {
329 perror("Unable to find channel key");
330 goto end;
331 }
332 stream->chan->refcount++;
333 stream->key = stream_key;
334 stream->shm_fd = shm_fd;
335 stream->wait_fd = wait_fd;
336 stream->out_fd = -1;
337 stream->out_fd_offset = 0;
338 stream->state = state;
339 stream->mmap_len = mmap_len;
340 stream->mmap_base = NULL;
341 stream->output = output;
6df2e2c9
MD
342 stream->uid = uid;
343 stream->gid = gid;
00e2e675
DG
344 stream->net_seq_idx = net_index;
345 stream->metadata_flag = metadata_flag;
346 strncpy(stream->path_name, path_name, sizeof(stream->path_name));
347 stream->path_name[sizeof(stream->path_name) - 1] = '\0';
e4421fec 348 lttng_ht_node_init_ulong(&stream->node, stream->key);
00e2e675 349 lttng_ht_node_init_ulong(&stream->waitfd_node, stream->wait_fd);
3bd1e081
MD
350
351 switch (consumer_data.type) {
352 case LTTNG_CONSUMER_KERNEL:
353 break;
7753dea8
MD
354 case LTTNG_CONSUMER32_UST:
355 case LTTNG_CONSUMER64_UST:
5af2f756 356 stream->cpu = stream->chan->cpucount++;
3bd1e081
MD
357 ret = lttng_ustconsumer_allocate_stream(stream);
358 if (ret) {
359 free(stream);
360 return NULL;
361 }
362 break;
363 default:
364 ERR("Unknown consumer_data type");
365 assert(0);
366 goto end;
367 }
00e2e675 368 DBG("Allocated stream %s (key %d, shm_fd %d, wait_fd %d, mmap_len %llu, out_fd %d, net_seq_idx %d)",
3bd1e081
MD
369 stream->path_name, stream->key,
370 stream->shm_fd,
371 stream->wait_fd,
372 (unsigned long long) stream->mmap_len,
00e2e675
DG
373 stream->out_fd,
374 stream->net_seq_idx);
3bd1e081
MD
375end:
376 return stream;
377}
378
379/*
380 * Add a stream to the global list protected by a mutex.
381 */
382int consumer_add_stream(struct lttng_consumer_stream *stream)
383{
384 int ret = 0;
c77fc10a
DG
385 struct lttng_ht_node_ulong *node;
386 struct lttng_ht_iter iter;
00e2e675 387 struct consumer_relayd_sock_pair *relayd;
3bd1e081
MD
388
389 pthread_mutex_lock(&consumer_data.lock);
7ad0a0cb
MD
390 /* Steal stream identifier, for UST */
391 consumer_steal_stream_key(stream->key);
c77fc10a 392
b0b335c8 393 rcu_read_lock();
c77fc10a
DG
394 lttng_ht_lookup(consumer_data.stream_ht,
395 (void *)((unsigned long) stream->key), &iter);
396 node = lttng_ht_iter_get_node_ulong(&iter);
397 if (node != NULL) {
398 rcu_read_unlock();
399 /* Stream already exist. Ignore the insertion */
400 goto end;
401 }
402
04253271 403 lttng_ht_add_unique_ulong(consumer_data.stream_ht, &stream->node);
00e2e675
DG
404
405 /* Check and cleanup relayd */
406 relayd = consumer_find_relayd(stream->net_seq_idx);
407 if (relayd != NULL) {
b0b335c8 408 uatomic_inc(&relayd->refcount);
00e2e675 409 }
b0b335c8 410 rcu_read_unlock();
00e2e675
DG
411
412 /* Update consumer data */
3bd1e081
MD
413 consumer_data.stream_count++;
414 consumer_data.need_update = 1;
415
416 switch (consumer_data.type) {
417 case LTTNG_CONSUMER_KERNEL:
418 break;
7753dea8
MD
419 case LTTNG_CONSUMER32_UST:
420 case LTTNG_CONSUMER64_UST:
3bd1e081
MD
421 /* Streams are in CPU number order (we rely on this) */
422 stream->cpu = stream->chan->nr_streams++;
423 break;
424 default:
425 ERR("Unknown consumer_data type");
426 assert(0);
427 goto end;
428 }
429
430end:
431 pthread_mutex_unlock(&consumer_data.lock);
702b1ea4 432
3bd1e081
MD
433 return ret;
434}
435
00e2e675
DG
436/*
437 * Add relayd socket to global consumer data hashtable.
438 */
439int consumer_add_relayd(struct consumer_relayd_sock_pair *relayd)
440{
441 int ret = 0;
442 struct lttng_ht_node_ulong *node;
443 struct lttng_ht_iter iter;
444
445 if (relayd == NULL) {
446 ret = -1;
447 goto end;
448 }
449
450 rcu_read_lock();
451
452 lttng_ht_lookup(consumer_data.relayd_ht,
453 (void *)((unsigned long) relayd->net_seq_idx), &iter);
454 node = lttng_ht_iter_get_node_ulong(&iter);
455 if (node != NULL) {
456 rcu_read_unlock();
457 /* Relayd already exist. Ignore the insertion */
458 goto end;
459 }
460 lttng_ht_add_unique_ulong(consumer_data.relayd_ht, &relayd->node);
461
462 rcu_read_unlock();
463
464end:
465 return ret;
466}
467
468/*
469 * Allocate and return a consumer relayd socket.
470 */
471struct consumer_relayd_sock_pair *consumer_allocate_relayd_sock_pair(
472 int net_seq_idx)
473{
474 struct consumer_relayd_sock_pair *obj = NULL;
475
476 /* Negative net sequence index is a failure */
477 if (net_seq_idx < 0) {
478 goto error;
479 }
480
481 obj = zmalloc(sizeof(struct consumer_relayd_sock_pair));
482 if (obj == NULL) {
483 PERROR("zmalloc relayd sock");
484 goto error;
485 }
486
487 obj->net_seq_idx = net_seq_idx;
488 obj->refcount = 0;
173af62f 489 obj->destroy_flag = 0;
00e2e675
DG
490 lttng_ht_node_init_ulong(&obj->node, obj->net_seq_idx);
491 pthread_mutex_init(&obj->ctrl_sock_mutex, NULL);
492
493error:
494 return obj;
495}
496
497/*
498 * Find a relayd socket pair in the global consumer data.
499 *
500 * Return the object if found else NULL.
b0b335c8
MD
501 * RCU read-side lock must be held across this call and while using the
502 * returned object.
00e2e675
DG
503 */
504struct consumer_relayd_sock_pair *consumer_find_relayd(int key)
505{
506 struct lttng_ht_iter iter;
507 struct lttng_ht_node_ulong *node;
508 struct consumer_relayd_sock_pair *relayd = NULL;
509
510 /* Negative keys are lookup failures */
511 if (key < 0) {
512 goto error;
513 }
514
00e2e675
DG
515 lttng_ht_lookup(consumer_data.relayd_ht, (void *)((unsigned long) key),
516 &iter);
517 node = lttng_ht_iter_get_node_ulong(&iter);
518 if (node != NULL) {
519 relayd = caa_container_of(node, struct consumer_relayd_sock_pair, node);
520 }
521
00e2e675
DG
522error:
523 return relayd;
524}
525
526/*
527 * Handle stream for relayd transmission if the stream applies for network
528 * streaming where the net sequence index is set.
529 *
530 * Return destination file descriptor or negative value on error.
531 */
532int consumer_handle_stream_before_relayd(struct lttng_consumer_stream *stream,
533 size_t data_size)
534{
535 int outfd = -1, ret;
536 struct consumer_relayd_sock_pair *relayd;
537 struct lttcomm_relayd_data_hdr data_hdr;
538
539 /* Safety net */
540 assert(stream);
541
542 /* Reset data header */
543 memset(&data_hdr, 0, sizeof(data_hdr));
544
b0b335c8 545 rcu_read_lock();
00e2e675
DG
546 /* Get relayd reference of the stream. */
547 relayd = consumer_find_relayd(stream->net_seq_idx);
548 if (relayd == NULL) {
549 /* Stream is either local or corrupted */
550 goto error;
551 }
552
553 DBG("Consumer found relayd socks with index %d", stream->net_seq_idx);
554 if (stream->metadata_flag) {
555 /* Caller MUST acquire the relayd control socket lock */
556 ret = relayd_send_metadata(&relayd->control_sock, data_size);
557 if (ret < 0) {
558 goto error;
559 }
560
561 /* Metadata are always sent on the control socket. */
562 outfd = relayd->control_sock.fd;
563 } else {
564 /* Set header with stream information */
565 data_hdr.stream_id = htobe64(stream->relayd_stream_id);
566 data_hdr.data_size = htobe32(data_size);
173af62f 567 data_hdr.net_seq_num = htobe64(stream->next_net_seq_num++);
00e2e675
DG
568 /* Other fields are zeroed previously */
569
570 ret = relayd_send_data_hdr(&relayd->data_sock, &data_hdr,
571 sizeof(data_hdr));
572 if (ret < 0) {
573 goto error;
574 }
575
576 /* Set to go on data socket */
577 outfd = relayd->data_sock.fd;
578 }
579
580error:
b0b335c8 581 rcu_read_unlock();
00e2e675
DG
582 return outfd;
583}
584
3bd1e081
MD
585/*
586 * Update a stream according to what we just received.
587 */
588void consumer_change_stream_state(int stream_key,
589 enum lttng_consumer_stream_state state)
590{
591 struct lttng_consumer_stream *stream;
592
593 pthread_mutex_lock(&consumer_data.lock);
594 stream = consumer_find_stream(stream_key);
595 if (stream) {
596 stream->state = state;
597 }
598 consumer_data.need_update = 1;
599 pthread_mutex_unlock(&consumer_data.lock);
600}
601
702b1ea4
MD
602static
603void consumer_free_channel(struct rcu_head *head)
604{
605 struct lttng_ht_node_ulong *node =
606 caa_container_of(head, struct lttng_ht_node_ulong, head);
607 struct lttng_consumer_channel *channel =
608 caa_container_of(node, struct lttng_consumer_channel, node);
609
610 free(channel);
611}
612
3bd1e081
MD
613/*
614 * Remove a channel from the global list protected by a mutex. This
615 * function is also responsible for freeing its data structures.
616 */
617void consumer_del_channel(struct lttng_consumer_channel *channel)
618{
619 int ret;
e4421fec 620 struct lttng_ht_iter iter;
3bd1e081
MD
621
622 pthread_mutex_lock(&consumer_data.lock);
623
624 switch (consumer_data.type) {
625 case LTTNG_CONSUMER_KERNEL:
626 break;
7753dea8
MD
627 case LTTNG_CONSUMER32_UST:
628 case LTTNG_CONSUMER64_UST:
3bd1e081
MD
629 lttng_ustconsumer_del_channel(channel);
630 break;
631 default:
632 ERR("Unknown consumer_data type");
633 assert(0);
634 goto end;
635 }
636
6065ceec 637 rcu_read_lock();
04253271
MD
638 iter.iter.node = &channel->node.node;
639 ret = lttng_ht_del(consumer_data.channel_ht, &iter);
640 assert(!ret);
6065ceec
DG
641 rcu_read_unlock();
642
3bd1e081
MD
643 if (channel->mmap_base != NULL) {
644 ret = munmap(channel->mmap_base, channel->mmap_len);
645 if (ret != 0) {
646 perror("munmap");
647 }
648 }
b5c5fc29 649 if (channel->wait_fd >= 0 && !channel->wait_fd_is_copy) {
4c462e79
MD
650 ret = close(channel->wait_fd);
651 if (ret) {
652 PERROR("close");
653 }
3bd1e081 654 }
2c1dd183 655 if (channel->shm_fd >= 0 && channel->wait_fd != channel->shm_fd) {
4c462e79
MD
656 ret = close(channel->shm_fd);
657 if (ret) {
658 PERROR("close");
659 }
3bd1e081 660 }
702b1ea4
MD
661
662 call_rcu(&channel->node.head, consumer_free_channel);
3bd1e081
MD
663end:
664 pthread_mutex_unlock(&consumer_data.lock);
665}
666
667struct lttng_consumer_channel *consumer_allocate_channel(
668 int channel_key,
669 int shm_fd, int wait_fd,
670 uint64_t mmap_len,
671 uint64_t max_sb_size)
672{
673 struct lttng_consumer_channel *channel;
674 int ret;
675
276b26d1 676 channel = zmalloc(sizeof(*channel));
3bd1e081
MD
677 if (channel == NULL) {
678 perror("malloc struct lttng_consumer_channel");
679 goto end;
680 }
681 channel->key = channel_key;
682 channel->shm_fd = shm_fd;
683 channel->wait_fd = wait_fd;
684 channel->mmap_len = mmap_len;
685 channel->max_sb_size = max_sb_size;
686 channel->refcount = 0;
687 channel->nr_streams = 0;
e4421fec 688 lttng_ht_node_init_ulong(&channel->node, channel->key);
3bd1e081
MD
689
690 switch (consumer_data.type) {
691 case LTTNG_CONSUMER_KERNEL:
692 channel->mmap_base = NULL;
693 channel->mmap_len = 0;
694 break;
7753dea8
MD
695 case LTTNG_CONSUMER32_UST:
696 case LTTNG_CONSUMER64_UST:
3bd1e081
MD
697 ret = lttng_ustconsumer_allocate_channel(channel);
698 if (ret) {
699 free(channel);
700 return NULL;
701 }
702 break;
703 default:
704 ERR("Unknown consumer_data type");
705 assert(0);
706 goto end;
707 }
708 DBG("Allocated channel (key %d, shm_fd %d, wait_fd %d, mmap_len %llu, max_sb_size %llu)",
00e2e675 709 channel->key, channel->shm_fd, channel->wait_fd,
3bd1e081
MD
710 (unsigned long long) channel->mmap_len,
711 (unsigned long long) channel->max_sb_size);
712end:
713 return channel;
714}
715
716/*
717 * Add a channel to the global list protected by a mutex.
718 */
719int consumer_add_channel(struct lttng_consumer_channel *channel)
720{
c77fc10a
DG
721 struct lttng_ht_node_ulong *node;
722 struct lttng_ht_iter iter;
723
3bd1e081 724 pthread_mutex_lock(&consumer_data.lock);
7ad0a0cb
MD
725 /* Steal channel identifier, for UST */
726 consumer_steal_channel_key(channel->key);
6065ceec 727 rcu_read_lock();
c77fc10a
DG
728
729 lttng_ht_lookup(consumer_data.channel_ht,
730 (void *)((unsigned long) channel->key), &iter);
731 node = lttng_ht_iter_get_node_ulong(&iter);
732 if (node != NULL) {
733 /* Channel already exist. Ignore the insertion */
734 goto end;
735 }
736
04253271 737 lttng_ht_add_unique_ulong(consumer_data.channel_ht, &channel->node);
c77fc10a
DG
738
739end:
6065ceec 740 rcu_read_unlock();
3bd1e081 741 pthread_mutex_unlock(&consumer_data.lock);
702b1ea4 742
7ad0a0cb 743 return 0;
3bd1e081
MD
744}
745
746/*
747 * Allocate the pollfd structure and the local view of the out fds to avoid
748 * doing a lookup in the linked list and concurrency issues when writing is
749 * needed. Called with consumer_data.lock held.
750 *
751 * Returns the number of fds in the structures.
752 */
753int consumer_update_poll_array(
754 struct lttng_consumer_local_data *ctx, struct pollfd **pollfd,
00e2e675
DG
755 struct lttng_consumer_stream **local_stream,
756 struct lttng_ht *metadata_ht)
3bd1e081 757{
3bd1e081 758 int i = 0;
e4421fec
DG
759 struct lttng_ht_iter iter;
760 struct lttng_consumer_stream *stream;
3bd1e081
MD
761
762 DBG("Updating poll fd array");
481d6c57 763 rcu_read_lock();
e4421fec
DG
764 cds_lfht_for_each_entry(consumer_data.stream_ht->ht, &iter.iter, stream,
765 node.node) {
766 if (stream->state != LTTNG_CONSUMER_ACTIVE_STREAM) {
3bd1e081
MD
767 continue;
768 }
e4421fec
DG
769 DBG("Active FD %d", stream->wait_fd);
770 (*pollfd)[i].fd = stream->wait_fd;
3bd1e081 771 (*pollfd)[i].events = POLLIN | POLLPRI;
00e2e675
DG
772 if (stream->metadata_flag && metadata_ht) {
773 lttng_ht_add_unique_ulong(metadata_ht, &stream->waitfd_node);
774 DBG("Active FD added to metadata hash table");
775 }
e4421fec 776 local_stream[i] = stream;
3bd1e081
MD
777 i++;
778 }
481d6c57 779 rcu_read_unlock();
3bd1e081
MD
780
781 /*
782 * Insert the consumer_poll_pipe at the end of the array and don't
783 * increment i so nb_fd is the number of real FD.
784 */
785 (*pollfd)[i].fd = ctx->consumer_poll_pipe[0];
509bb1cf 786 (*pollfd)[i].events = POLLIN | POLLPRI;
3bd1e081
MD
787 return i;
788}
789
790/*
791 * Poll on the should_quit pipe and the command socket return -1 on error and
792 * should exit, 0 if data is available on the command socket
793 */
794int lttng_consumer_poll_socket(struct pollfd *consumer_sockpoll)
795{
796 int num_rdy;
797
88f2b785 798restart:
3bd1e081
MD
799 num_rdy = poll(consumer_sockpoll, 2, -1);
800 if (num_rdy == -1) {
88f2b785
MD
801 /*
802 * Restart interrupted system call.
803 */
804 if (errno == EINTR) {
805 goto restart;
806 }
3bd1e081
MD
807 perror("Poll error");
808 goto exit;
809 }
509bb1cf 810 if (consumer_sockpoll[0].revents & (POLLIN | POLLPRI)) {
3bd1e081
MD
811 DBG("consumer_should_quit wake up");
812 goto exit;
813 }
814 return 0;
815
816exit:
817 return -1;
818}
819
820/*
821 * Set the error socket.
822 */
823void lttng_consumer_set_error_sock(
824 struct lttng_consumer_local_data *ctx, int sock)
825{
826 ctx->consumer_error_socket = sock;
827}
828
829/*
830 * Set the command socket path.
831 */
3bd1e081
MD
832void lttng_consumer_set_command_sock_path(
833 struct lttng_consumer_local_data *ctx, char *sock)
834{
835 ctx->consumer_command_sock_path = sock;
836}
837
838/*
839 * Send return code to the session daemon.
840 * If the socket is not defined, we return 0, it is not a fatal error
841 */
842int lttng_consumer_send_error(
843 struct lttng_consumer_local_data *ctx, int cmd)
844{
845 if (ctx->consumer_error_socket > 0) {
846 return lttcomm_send_unix_sock(ctx->consumer_error_socket, &cmd,
847 sizeof(enum lttcomm_sessiond_command));
848 }
849
850 return 0;
851}
852
853/*
854 * Close all the tracefiles and stream fds, should be called when all instances
855 * are destroyed.
856 */
857void lttng_consumer_cleanup(void)
858{
e4421fec 859 struct lttng_ht_iter iter;
6065ceec
DG
860 struct lttng_ht_node_ulong *node;
861
862 rcu_read_lock();
3bd1e081
MD
863
864 /*
6065ceec
DG
865 * close all outfd. Called when there are no more threads running (after
866 * joining on the threads), no need to protect list iteration with mutex.
3bd1e081 867 */
6065ceec
DG
868 cds_lfht_for_each_entry(consumer_data.stream_ht->ht, &iter.iter, node,
869 node) {
702b1ea4
MD
870 struct lttng_consumer_stream *stream =
871 caa_container_of(node, struct lttng_consumer_stream, node);
872 consumer_del_stream(stream);
3bd1e081 873 }
e4421fec 874
6065ceec
DG
875 cds_lfht_for_each_entry(consumer_data.channel_ht->ht, &iter.iter, node,
876 node) {
702b1ea4
MD
877 struct lttng_consumer_channel *channel =
878 caa_container_of(node, struct lttng_consumer_channel, node);
879 consumer_del_channel(channel);
3bd1e081 880 }
6065ceec
DG
881
882 rcu_read_unlock();
d6ce1df2
MD
883
884 lttng_ht_destroy(consumer_data.stream_ht);
885 lttng_ht_destroy(consumer_data.channel_ht);
3bd1e081
MD
886}
887
888/*
889 * Called from signal handler.
890 */
891void lttng_consumer_should_exit(struct lttng_consumer_local_data *ctx)
892{
893 int ret;
894 consumer_quit = 1;
6f94560a
MD
895 do {
896 ret = write(ctx->consumer_should_quit[1], "4", 1);
897 } while (ret < 0 && errno == EINTR);
3bd1e081
MD
898 if (ret < 0) {
899 perror("write consumer quit");
900 }
901}
902
00e2e675
DG
903void lttng_consumer_sync_trace_file(struct lttng_consumer_stream *stream,
904 off_t orig_offset)
3bd1e081
MD
905{
906 int outfd = stream->out_fd;
907
908 /*
909 * This does a blocking write-and-wait on any page that belongs to the
910 * subbuffer prior to the one we just wrote.
911 * Don't care about error values, as these are just hints and ways to
912 * limit the amount of page cache used.
913 */
914 if (orig_offset < stream->chan->max_sb_size) {
915 return;
916 }
b9182dd9 917 lttng_sync_file_range(outfd, orig_offset - stream->chan->max_sb_size,
3bd1e081
MD
918 stream->chan->max_sb_size,
919 SYNC_FILE_RANGE_WAIT_BEFORE
920 | SYNC_FILE_RANGE_WRITE
921 | SYNC_FILE_RANGE_WAIT_AFTER);
922 /*
923 * Give hints to the kernel about how we access the file:
924 * POSIX_FADV_DONTNEED : we won't re-access data in a near future after
925 * we write it.
926 *
927 * We need to call fadvise again after the file grows because the
928 * kernel does not seem to apply fadvise to non-existing parts of the
929 * file.
930 *
931 * Call fadvise _after_ having waited for the page writeback to
932 * complete because the dirty page writeback semantic is not well
933 * defined. So it can be expected to lead to lower throughput in
934 * streaming.
935 */
936 posix_fadvise(outfd, orig_offset - stream->chan->max_sb_size,
937 stream->chan->max_sb_size, POSIX_FADV_DONTNEED);
938}
939
940/*
941 * Initialise the necessary environnement :
942 * - create a new context
943 * - create the poll_pipe
944 * - create the should_quit pipe (for signal handler)
945 * - create the thread pipe (for splice)
946 *
947 * Takes a function pointer as argument, this function is called when data is
948 * available on a buffer. This function is responsible to do the
949 * kernctl_get_next_subbuf, read the data with mmap or splice depending on the
950 * buffer configuration and then kernctl_put_next_subbuf at the end.
951 *
952 * Returns a pointer to the new context or NULL on error.
953 */
954struct lttng_consumer_local_data *lttng_consumer_create(
955 enum lttng_consumer_type type,
4078b776 956 ssize_t (*buffer_ready)(struct lttng_consumer_stream *stream,
d41f73b7 957 struct lttng_consumer_local_data *ctx),
3bd1e081
MD
958 int (*recv_channel)(struct lttng_consumer_channel *channel),
959 int (*recv_stream)(struct lttng_consumer_stream *stream),
960 int (*update_stream)(int stream_key, uint32_t state))
961{
962 int ret, i;
963 struct lttng_consumer_local_data *ctx;
964
965 assert(consumer_data.type == LTTNG_CONSUMER_UNKNOWN ||
966 consumer_data.type == type);
967 consumer_data.type = type;
968
effcf122 969 ctx = zmalloc(sizeof(struct lttng_consumer_local_data));
3bd1e081
MD
970 if (ctx == NULL) {
971 perror("allocating context");
972 goto error;
973 }
974
975 ctx->consumer_error_socket = -1;
976 /* assign the callbacks */
977 ctx->on_buffer_ready = buffer_ready;
978 ctx->on_recv_channel = recv_channel;
979 ctx->on_recv_stream = recv_stream;
980 ctx->on_update_stream = update_stream;
981
982 ret = pipe(ctx->consumer_poll_pipe);
983 if (ret < 0) {
984 perror("Error creating poll pipe");
985 goto error_poll_pipe;
986 }
987
04fdd819
MD
988 /* set read end of the pipe to non-blocking */
989 ret = fcntl(ctx->consumer_poll_pipe[0], F_SETFL, O_NONBLOCK);
990 if (ret < 0) {
991 perror("fcntl O_NONBLOCK");
992 goto error_poll_fcntl;
993 }
994
995 /* set write end of the pipe to non-blocking */
996 ret = fcntl(ctx->consumer_poll_pipe[1], F_SETFL, O_NONBLOCK);
997 if (ret < 0) {
998 perror("fcntl O_NONBLOCK");
999 goto error_poll_fcntl;
1000 }
1001
3bd1e081
MD
1002 ret = pipe(ctx->consumer_should_quit);
1003 if (ret < 0) {
1004 perror("Error creating recv pipe");
1005 goto error_quit_pipe;
1006 }
1007
1008 ret = pipe(ctx->consumer_thread_pipe);
1009 if (ret < 0) {
1010 perror("Error creating thread pipe");
1011 goto error_thread_pipe;
1012 }
1013
1014 return ctx;
1015
1016
1017error_thread_pipe:
1018 for (i = 0; i < 2; i++) {
1019 int err;
1020
1021 err = close(ctx->consumer_should_quit[i]);
4c462e79
MD
1022 if (err) {
1023 PERROR("close");
1024 }
3bd1e081 1025 }
04fdd819 1026error_poll_fcntl:
3bd1e081
MD
1027error_quit_pipe:
1028 for (i = 0; i < 2; i++) {
1029 int err;
1030
1031 err = close(ctx->consumer_poll_pipe[i]);
4c462e79
MD
1032 if (err) {
1033 PERROR("close");
1034 }
3bd1e081
MD
1035 }
1036error_poll_pipe:
1037 free(ctx);
1038error:
1039 return NULL;
1040}
1041
1042/*
1043 * Close all fds associated with the instance and free the context.
1044 */
1045void lttng_consumer_destroy(struct lttng_consumer_local_data *ctx)
1046{
4c462e79
MD
1047 int ret;
1048
1049 ret = close(ctx->consumer_error_socket);
1050 if (ret) {
1051 PERROR("close");
1052 }
1053 ret = close(ctx->consumer_thread_pipe[0]);
1054 if (ret) {
1055 PERROR("close");
1056 }
1057 ret = close(ctx->consumer_thread_pipe[1]);
1058 if (ret) {
1059 PERROR("close");
1060 }
1061 ret = close(ctx->consumer_poll_pipe[0]);
1062 if (ret) {
1063 PERROR("close");
1064 }
1065 ret = close(ctx->consumer_poll_pipe[1]);
1066 if (ret) {
1067 PERROR("close");
1068 }
1069 ret = close(ctx->consumer_should_quit[0]);
1070 if (ret) {
1071 PERROR("close");
1072 }
1073 ret = close(ctx->consumer_should_quit[1]);
1074 if (ret) {
1075 PERROR("close");
1076 }
3bd1e081
MD
1077 unlink(ctx->consumer_command_sock_path);
1078 free(ctx);
1079}
1080
1081/*
1082 * Mmap the ring buffer, read it and write the data to the tracefile.
1083 *
1084 * Returns the number of bytes written
1085 */
4078b776 1086ssize_t lttng_consumer_on_read_subbuffer_mmap(
3bd1e081
MD
1087 struct lttng_consumer_local_data *ctx,
1088 struct lttng_consumer_stream *stream, unsigned long len)
1089{
f02e1e8a
DG
1090 unsigned long mmap_offset;
1091 ssize_t ret = 0, written = 0;
1092 off_t orig_offset = stream->out_fd_offset;
1093 /* Default is on the disk */
1094 int outfd = stream->out_fd;
1095 uint64_t metadata_id;
1096 struct consumer_relayd_sock_pair *relayd = NULL;
1097
1098 /* RCU lock for the relayd pointer */
1099 rcu_read_lock();
1100
1101 /* Flag that the current stream if set for network streaming. */
1102 if (stream->net_seq_idx != -1) {
1103 relayd = consumer_find_relayd(stream->net_seq_idx);
1104 if (relayd == NULL) {
1105 goto end;
1106 }
1107 }
1108
1109 /* get the offset inside the fd to mmap */
3bd1e081
MD
1110 switch (consumer_data.type) {
1111 case LTTNG_CONSUMER_KERNEL:
f02e1e8a
DG
1112 ret = kernctl_get_mmap_read_offset(stream->wait_fd, &mmap_offset);
1113 break;
7753dea8
MD
1114 case LTTNG_CONSUMER32_UST:
1115 case LTTNG_CONSUMER64_UST:
f02e1e8a
DG
1116 ret = lttng_ustctl_get_mmap_read_offset(stream->chan->handle,
1117 stream->buf, &mmap_offset);
1118 break;
3bd1e081
MD
1119 default:
1120 ERR("Unknown consumer_data type");
1121 assert(0);
1122 }
f02e1e8a
DG
1123 if (ret != 0) {
1124 errno = -ret;
1125 PERROR("tracer ctl get_mmap_read_offset");
1126 written = ret;
1127 goto end;
1128 }
b9182dd9 1129
f02e1e8a
DG
1130 /* Handle stream on the relayd if the output is on the network */
1131 if (relayd) {
1132 unsigned long netlen = len;
1133
1134 /*
1135 * Lock the control socket for the complete duration of the function
1136 * since from this point on we will use the socket.
1137 */
1138 if (stream->metadata_flag) {
1139 /* Metadata requires the control socket. */
1140 pthread_mutex_lock(&relayd->ctrl_sock_mutex);
1141 netlen += sizeof(stream->relayd_stream_id);
1142 }
1143
1144 ret = consumer_handle_stream_before_relayd(stream, netlen);
1145 if (ret >= 0) {
1146 /* Use the returned socket. */
1147 outfd = ret;
1148
1149 /* Write metadata stream id before payload */
1150 if (stream->metadata_flag) {
1151 metadata_id = htobe64(stream->relayd_stream_id);
1152 do {
1153 ret = write(outfd, (void *) &metadata_id,
1154 sizeof(stream->relayd_stream_id));
1155 } while (ret < 0 && errno == EINTR);
1156 if (ret < 0) {
1157 PERROR("write metadata stream id");
1158 written = ret;
1159 goto end;
1160 }
1161 DBG("Metadata stream id %zu written before data",
1162 stream->relayd_stream_id);
1163 /*
1164 * We do this so the return value can match the len passed as
1165 * argument to this function.
1166 */
1167 written -= sizeof(stream->relayd_stream_id);
1168 }
1169 }
1170 /* Else, use the default set before which is the filesystem. */
1171 }
1172
1173 while (len > 0) {
1174 do {
1175 ret = write(outfd, stream->mmap_base + mmap_offset, len);
1176 } while (ret < 0 && errno == EINTR);
1177 if (ret < 0) {
1178 PERROR("Error in file write");
1179 if (written == 0) {
1180 written = ret;
1181 }
1182 goto end;
1183 } else if (ret > len) {
1184 PERROR("Error in file write (ret %ld > len %lu)", ret, len);
1185 written += ret;
1186 goto end;
1187 } else {
1188 len -= ret;
1189 mmap_offset += ret;
1190 }
1191 DBG("Consumer mmap write() ret %ld (len %lu)", ret, len);
1192
1193 /* This call is useless on a socket so better save a syscall. */
1194 if (!relayd) {
1195 /* This won't block, but will start writeout asynchronously */
1196 lttng_sync_file_range(outfd, stream->out_fd_offset, ret,
1197 SYNC_FILE_RANGE_WRITE);
1198 stream->out_fd_offset += ret;
1199 }
1200 written += ret;
1201 }
1202 lttng_consumer_sync_trace_file(stream, orig_offset);
1203
1204end:
1205 /* Unlock only if ctrl socket used */
1206 if (relayd && stream->metadata_flag) {
1207 pthread_mutex_unlock(&relayd->ctrl_sock_mutex);
1208 }
1209
1210 rcu_read_unlock();
1211 return written;
3bd1e081
MD
1212}
1213
1214/*
1215 * Splice the data from the ring buffer to the tracefile.
1216 *
1217 * Returns the number of bytes spliced.
1218 */
4078b776 1219ssize_t lttng_consumer_on_read_subbuffer_splice(
3bd1e081
MD
1220 struct lttng_consumer_local_data *ctx,
1221 struct lttng_consumer_stream *stream, unsigned long len)
1222{
f02e1e8a
DG
1223 ssize_t ret = 0, written = 0, ret_splice = 0;
1224 loff_t offset = 0;
1225 off_t orig_offset = stream->out_fd_offset;
1226 int fd = stream->wait_fd;
1227 /* Default is on the disk */
1228 int outfd = stream->out_fd;
1229 uint64_t metadata_id;
1230 struct consumer_relayd_sock_pair *relayd = NULL;
1231
3bd1e081
MD
1232 switch (consumer_data.type) {
1233 case LTTNG_CONSUMER_KERNEL:
f02e1e8a 1234 break;
7753dea8
MD
1235 case LTTNG_CONSUMER32_UST:
1236 case LTTNG_CONSUMER64_UST:
f02e1e8a 1237 /* Not supported for user space tracing */
3bd1e081
MD
1238 return -ENOSYS;
1239 default:
1240 ERR("Unknown consumer_data type");
1241 assert(0);
3bd1e081
MD
1242 }
1243
f02e1e8a
DG
1244 /* RCU lock for the relayd pointer */
1245 rcu_read_lock();
1246
1247 /* Flag that the current stream if set for network streaming. */
1248 if (stream->net_seq_idx != -1) {
1249 relayd = consumer_find_relayd(stream->net_seq_idx);
1250 if (relayd == NULL) {
1251 goto end;
1252 }
1253 }
1254
1255 /* Write metadata stream id before payload */
1256 if (stream->metadata_flag && relayd) {
1257 /*
1258 * Lock the control socket for the complete duration of the function
1259 * since from this point on we will use the socket.
1260 */
1261 pthread_mutex_lock(&relayd->ctrl_sock_mutex);
1262
1263 metadata_id = htobe64(stream->relayd_stream_id);
1264 do {
1265 ret = write(ctx->consumer_thread_pipe[1], (void *) &metadata_id,
1266 sizeof(stream->relayd_stream_id));
1267 } while (ret < 0 && errno == EINTR);
1268 if (ret < 0) {
1269 PERROR("write metadata stream id");
1270 written = ret;
1271 goto end;
1272 }
1273 DBG("Metadata stream id %zu written before data",
1274 stream->relayd_stream_id);
1275 }
1276
1277 while (len > 0) {
1278 DBG("splice chan to pipe offset %lu of len %lu (fd : %d)",
1279 (unsigned long)offset, len, fd);
1280 ret_splice = splice(fd, &offset, ctx->consumer_thread_pipe[1], NULL, len,
1281 SPLICE_F_MOVE | SPLICE_F_MORE);
1282 DBG("splice chan to pipe, ret %zd", ret_splice);
1283 if (ret_splice < 0) {
1284 PERROR("Error in relay splice");
1285 if (written == 0) {
1286 written = ret_splice;
1287 }
1288 ret = errno;
1289 goto splice_error;
1290 }
1291
1292 /* Handle stream on the relayd if the output is on the network */
1293 if (relayd) {
1294 if (stream->metadata_flag) {
1295 /* Update counter to fit the spliced data */
1296 ret_splice += sizeof(stream->relayd_stream_id);
1297 len += sizeof(stream->relayd_stream_id);
1298 /*
1299 * We do this so the return value can match the len passed as
1300 * argument to this function.
1301 */
1302 written -= sizeof(stream->relayd_stream_id);
1303 }
1304
1305 ret = consumer_handle_stream_before_relayd(stream, ret_splice);
1306 if (ret >= 0) {
1307 /* Use the returned socket. */
1308 outfd = ret;
1309 } else {
1310 if (outfd == -1) {
1311 ERR("Remote relayd disconnected. Stopping");
1312 goto end;
1313 }
1314 }
1315 }
1316
1317 /* Splice data out */
1318 ret_splice = splice(ctx->consumer_thread_pipe[0], NULL, outfd, NULL,
1319 ret_splice, SPLICE_F_MOVE | SPLICE_F_MORE);
1320 DBG("Kernel consumer splice pipe to file, ret %zd", ret_splice);
1321 if (ret_splice < 0) {
1322 PERROR("Error in file splice");
1323 if (written == 0) {
1324 written = ret_splice;
1325 }
1326 ret = errno;
1327 goto splice_error;
1328 } else if (ret_splice > len) {
1329 errno = EINVAL;
1330 PERROR("Wrote more data than requested %zd (len: %lu)",
1331 ret_splice, len);
1332 written += ret_splice;
1333 ret = errno;
1334 goto splice_error;
1335 }
1336 len -= ret_splice;
1337
1338 /* This call is useless on a socket so better save a syscall. */
1339 if (!relayd) {
1340 /* This won't block, but will start writeout asynchronously */
1341 lttng_sync_file_range(outfd, stream->out_fd_offset, ret_splice,
1342 SYNC_FILE_RANGE_WRITE);
1343 stream->out_fd_offset += ret_splice;
1344 }
1345 written += ret_splice;
1346 }
1347 lttng_consumer_sync_trace_file(stream, orig_offset);
1348
1349 ret = ret_splice;
1350
1351 goto end;
1352
1353splice_error:
1354 /* send the appropriate error description to sessiond */
1355 switch (ret) {
1356 case EBADF:
1357 lttng_consumer_send_error(ctx, CONSUMERD_SPLICE_EBADF);
1358 break;
1359 case EINVAL:
1360 lttng_consumer_send_error(ctx, CONSUMERD_SPLICE_EINVAL);
1361 break;
1362 case ENOMEM:
1363 lttng_consumer_send_error(ctx, CONSUMERD_SPLICE_ENOMEM);
1364 break;
1365 case ESPIPE:
1366 lttng_consumer_send_error(ctx, CONSUMERD_SPLICE_ESPIPE);
1367 break;
1368 }
1369
1370end:
1371 if (relayd && stream->metadata_flag) {
1372 pthread_mutex_unlock(&relayd->ctrl_sock_mutex);
1373 }
1374
1375 rcu_read_unlock();
1376 return written;
3bd1e081
MD
1377}
1378
1379/*
1380 * Take a snapshot for a specific fd
1381 *
1382 * Returns 0 on success, < 0 on error
1383 */
1384int lttng_consumer_take_snapshot(struct lttng_consumer_local_data *ctx,
1385 struct lttng_consumer_stream *stream)
1386{
1387 switch (consumer_data.type) {
1388 case LTTNG_CONSUMER_KERNEL:
1389 return lttng_kconsumer_take_snapshot(ctx, stream);
7753dea8
MD
1390 case LTTNG_CONSUMER32_UST:
1391 case LTTNG_CONSUMER64_UST:
3bd1e081
MD
1392 return lttng_ustconsumer_take_snapshot(ctx, stream);
1393 default:
1394 ERR("Unknown consumer_data type");
1395 assert(0);
1396 return -ENOSYS;
1397 }
1398
1399}
1400
1401/*
1402 * Get the produced position
1403 *
1404 * Returns 0 on success, < 0 on error
1405 */
1406int lttng_consumer_get_produced_snapshot(
1407 struct lttng_consumer_local_data *ctx,
1408 struct lttng_consumer_stream *stream,
1409 unsigned long *pos)
1410{
1411 switch (consumer_data.type) {
1412 case LTTNG_CONSUMER_KERNEL:
1413 return lttng_kconsumer_get_produced_snapshot(ctx, stream, pos);
7753dea8
MD
1414 case LTTNG_CONSUMER32_UST:
1415 case LTTNG_CONSUMER64_UST:
3bd1e081
MD
1416 return lttng_ustconsumer_get_produced_snapshot(ctx, stream, pos);
1417 default:
1418 ERR("Unknown consumer_data type");
1419 assert(0);
1420 return -ENOSYS;
1421 }
1422}
1423
1424int lttng_consumer_recv_cmd(struct lttng_consumer_local_data *ctx,
1425 int sock, struct pollfd *consumer_sockpoll)
1426{
1427 switch (consumer_data.type) {
1428 case LTTNG_CONSUMER_KERNEL:
1429 return lttng_kconsumer_recv_cmd(ctx, sock, consumer_sockpoll);
7753dea8
MD
1430 case LTTNG_CONSUMER32_UST:
1431 case LTTNG_CONSUMER64_UST:
3bd1e081
MD
1432 return lttng_ustconsumer_recv_cmd(ctx, sock, consumer_sockpoll);
1433 default:
1434 ERR("Unknown consumer_data type");
1435 assert(0);
1436 return -ENOSYS;
1437 }
1438}
1439
1440/*
e4421fec 1441 * This thread polls the fds in the set to consume the data and write
3bd1e081
MD
1442 * it to tracefile if necessary.
1443 */
1444void *lttng_consumer_thread_poll_fds(void *data)
1445{
1446 int num_rdy, num_hup, high_prio, ret, i;
1447 struct pollfd *pollfd = NULL;
1448 /* local view of the streams */
1449 struct lttng_consumer_stream **local_stream = NULL;
1450 /* local view of consumer_data.fds_count */
1451 int nb_fd = 0;
3bd1e081 1452 struct lttng_consumer_local_data *ctx = data;
00e2e675
DG
1453 struct lttng_ht *metadata_ht;
1454 struct lttng_ht_iter iter;
1455 struct lttng_ht_node_ulong *node;
1456 struct lttng_consumer_stream *metadata_stream;
1457 ssize_t len;
1458
1459 metadata_ht = lttng_ht_new(0, LTTNG_HT_TYPE_ULONG);
3bd1e081 1460
e7b994a3
DG
1461 rcu_register_thread();
1462
effcf122 1463 local_stream = zmalloc(sizeof(struct lttng_consumer_stream));
3bd1e081
MD
1464
1465 while (1) {
1466 high_prio = 0;
1467 num_hup = 0;
1468
1469 /*
e4421fec 1470 * the fds set has been updated, we need to update our
3bd1e081
MD
1471 * local array as well
1472 */
1473 pthread_mutex_lock(&consumer_data.lock);
1474 if (consumer_data.need_update) {
1475 if (pollfd != NULL) {
1476 free(pollfd);
1477 pollfd = NULL;
1478 }
1479 if (local_stream != NULL) {
1480 free(local_stream);
1481 local_stream = NULL;
1482 }
1483
1484 /* allocate for all fds + 1 for the consumer_poll_pipe */
effcf122 1485 pollfd = zmalloc((consumer_data.stream_count + 1) * sizeof(struct pollfd));
3bd1e081
MD
1486 if (pollfd == NULL) {
1487 perror("pollfd malloc");
1488 pthread_mutex_unlock(&consumer_data.lock);
1489 goto end;
1490 }
1491
1492 /* allocate for all fds + 1 for the consumer_poll_pipe */
effcf122 1493 local_stream = zmalloc((consumer_data.stream_count + 1) *
3bd1e081
MD
1494 sizeof(struct lttng_consumer_stream));
1495 if (local_stream == NULL) {
1496 perror("local_stream malloc");
1497 pthread_mutex_unlock(&consumer_data.lock);
1498 goto end;
1499 }
00e2e675
DG
1500 ret = consumer_update_poll_array(ctx, &pollfd, local_stream,
1501 metadata_ht);
3bd1e081
MD
1502 if (ret < 0) {
1503 ERR("Error in allocating pollfd or local_outfds");
1504 lttng_consumer_send_error(ctx, CONSUMERD_POLL_ERROR);
1505 pthread_mutex_unlock(&consumer_data.lock);
1506 goto end;
1507 }
1508 nb_fd = ret;
1509 consumer_data.need_update = 0;
1510 }
1511 pthread_mutex_unlock(&consumer_data.lock);
1512
4078b776
MD
1513 /* No FDs and consumer_quit, consumer_cleanup the thread */
1514 if (nb_fd == 0 && consumer_quit == 1) {
1515 goto end;
1516 }
3bd1e081 1517 /* poll on the array of fds */
88f2b785 1518 restart:
3bd1e081
MD
1519 DBG("polling on %d fd", nb_fd + 1);
1520 num_rdy = poll(pollfd, nb_fd + 1, consumer_poll_timeout);
1521 DBG("poll num_rdy : %d", num_rdy);
1522 if (num_rdy == -1) {
88f2b785
MD
1523 /*
1524 * Restart interrupted system call.
1525 */
1526 if (errno == EINTR) {
1527 goto restart;
1528 }
3bd1e081
MD
1529 perror("Poll error");
1530 lttng_consumer_send_error(ctx, CONSUMERD_POLL_ERROR);
1531 goto end;
1532 } else if (num_rdy == 0) {
1533 DBG("Polling thread timed out");
1534 goto end;
1535 }
1536
3bd1e081 1537 /*
00e2e675
DG
1538 * If the consumer_poll_pipe triggered poll go directly to the
1539 * beginning of the loop to update the array. We want to prioritize
1540 * array update over low-priority reads.
3bd1e081 1541 */
509bb1cf 1542 if (pollfd[nb_fd].revents & (POLLIN | POLLPRI)) {
04fdd819
MD
1543 size_t pipe_readlen;
1544 char tmp;
1545
3bd1e081 1546 DBG("consumer_poll_pipe wake up");
04fdd819
MD
1547 /* Consume 1 byte of pipe data */
1548 do {
1549 pipe_readlen = read(ctx->consumer_poll_pipe[0], &tmp, 1);
1550 } while (pipe_readlen == -1 && errno == EINTR);
3bd1e081
MD
1551 continue;
1552 }
1553
1554 /* Take care of high priority channels first. */
1555 for (i = 0; i < nb_fd; i++) {
00e2e675
DG
1556 /* Lookup for metadata which is the highest priority */
1557 lttng_ht_lookup(metadata_ht,
1558 (void *)((unsigned long) pollfd[i].fd), &iter);
1559 node = lttng_ht_iter_get_node_ulong(&iter);
1560 if (node != NULL &&
1561 (pollfd[i].revents & (POLLIN | POLLPRI))) {
1562 DBG("Urgent metadata read on fd %d", pollfd[i].fd);
1563 metadata_stream = caa_container_of(node,
1564 struct lttng_consumer_stream, waitfd_node);
1565 high_prio = 1;
1566 len = ctx->on_buffer_ready(metadata_stream, ctx);
1567 /* it's ok to have an unavailable sub-buffer */
1568 if (len < 0 && len != -EAGAIN) {
1569 goto end;
1570 } else if (len > 0) {
1571 metadata_stream->data_read = 1;
1572 }
1573 } else if (pollfd[i].revents & POLLPRI) {
d41f73b7
MD
1574 DBG("Urgent read on fd %d", pollfd[i].fd);
1575 high_prio = 1;
4078b776 1576 len = ctx->on_buffer_ready(local_stream[i], ctx);
d41f73b7 1577 /* it's ok to have an unavailable sub-buffer */
4078b776
MD
1578 if (len < 0 && len != -EAGAIN) {
1579 goto end;
1580 } else if (len > 0) {
1581 local_stream[i]->data_read = 1;
d41f73b7 1582 }
3bd1e081
MD
1583 }
1584 }
1585
4078b776
MD
1586 /*
1587 * If we read high prio channel in this loop, try again
1588 * for more high prio data.
1589 */
1590 if (high_prio) {
3bd1e081
MD
1591 continue;
1592 }
1593
1594 /* Take care of low priority channels. */
4078b776
MD
1595 for (i = 0; i < nb_fd; i++) {
1596 if ((pollfd[i].revents & POLLIN) ||
1597 local_stream[i]->hangup_flush_done) {
4078b776
MD
1598 DBG("Normal read on fd %d", pollfd[i].fd);
1599 len = ctx->on_buffer_ready(local_stream[i], ctx);
1600 /* it's ok to have an unavailable sub-buffer */
1601 if (len < 0 && len != -EAGAIN) {
1602 goto end;
1603 } else if (len > 0) {
1604 local_stream[i]->data_read = 1;
1605 }
1606 }
1607 }
1608
1609 /* Handle hangup and errors */
1610 for (i = 0; i < nb_fd; i++) {
1611 if (!local_stream[i]->hangup_flush_done
1612 && (pollfd[i].revents & (POLLHUP | POLLERR | POLLNVAL))
1613 && (consumer_data.type == LTTNG_CONSUMER32_UST
1614 || consumer_data.type == LTTNG_CONSUMER64_UST)) {
1615 DBG("fd %d is hup|err|nval. Attempting flush and read.",
1616 pollfd[i].fd);
1617 lttng_ustconsumer_on_stream_hangup(local_stream[i]);
1618 /* Attempt read again, for the data we just flushed. */
1619 local_stream[i]->data_read = 1;
1620 }
1621 /*
1622 * If the poll flag is HUP/ERR/NVAL and we have
1623 * read no data in this pass, we can remove the
1624 * stream from its hash table.
1625 */
1626 if ((pollfd[i].revents & POLLHUP)) {
1627 DBG("Polling fd %d tells it has hung up.", pollfd[i].fd);
1628 if (!local_stream[i]->data_read) {
00e2e675
DG
1629 if (local_stream[i]->metadata_flag) {
1630 iter.iter.node = &local_stream[i]->waitfd_node.node;
1631 ret = lttng_ht_del(metadata_ht, &iter);
1632 assert(!ret);
1633 }
702b1ea4 1634 consumer_del_stream(local_stream[i]);
4078b776
MD
1635 num_hup++;
1636 }
1637 } else if (pollfd[i].revents & POLLERR) {
1638 ERR("Error returned in polling fd %d.", pollfd[i].fd);
1639 if (!local_stream[i]->data_read) {
00e2e675
DG
1640 if (local_stream[i]->metadata_flag) {
1641 iter.iter.node = &local_stream[i]->waitfd_node.node;
1642 ret = lttng_ht_del(metadata_ht, &iter);
1643 assert(!ret);
1644 }
702b1ea4 1645 consumer_del_stream(local_stream[i]);
4078b776
MD
1646 num_hup++;
1647 }
1648 } else if (pollfd[i].revents & POLLNVAL) {
1649 ERR("Polling fd %d tells fd is not open.", pollfd[i].fd);
1650 if (!local_stream[i]->data_read) {
00e2e675
DG
1651 if (local_stream[i]->metadata_flag) {
1652 iter.iter.node = &local_stream[i]->waitfd_node.node;
1653 ret = lttng_ht_del(metadata_ht, &iter);
1654 assert(!ret);
1655 }
702b1ea4 1656 consumer_del_stream(local_stream[i]);
4078b776 1657 num_hup++;
3bd1e081
MD
1658 }
1659 }
4078b776 1660 local_stream[i]->data_read = 0;
3bd1e081
MD
1661 }
1662 }
1663end:
1664 DBG("polling thread exiting");
1665 if (pollfd != NULL) {
1666 free(pollfd);
1667 pollfd = NULL;
1668 }
1669 if (local_stream != NULL) {
1670 free(local_stream);
1671 local_stream = NULL;
1672 }
e7b994a3 1673 rcu_unregister_thread();
3bd1e081
MD
1674 return NULL;
1675}
1676
1677/*
1678 * This thread listens on the consumerd socket and receives the file
1679 * descriptors from the session daemon.
1680 */
1681void *lttng_consumer_thread_receive_fds(void *data)
1682{
1683 int sock, client_socket, ret;
1684 /*
1685 * structure to poll for incoming data on communication socket avoids
1686 * making blocking sockets.
1687 */
1688 struct pollfd consumer_sockpoll[2];
1689 struct lttng_consumer_local_data *ctx = data;
1690
e7b994a3
DG
1691 rcu_register_thread();
1692
3bd1e081
MD
1693 DBG("Creating command socket %s", ctx->consumer_command_sock_path);
1694 unlink(ctx->consumer_command_sock_path);
1695 client_socket = lttcomm_create_unix_sock(ctx->consumer_command_sock_path);
1696 if (client_socket < 0) {
1697 ERR("Cannot create command socket");
1698 goto end;
1699 }
1700
1701 ret = lttcomm_listen_unix_sock(client_socket);
1702 if (ret < 0) {
1703 goto end;
1704 }
1705
32258573 1706 DBG("Sending ready command to lttng-sessiond");
3bd1e081
MD
1707 ret = lttng_consumer_send_error(ctx, CONSUMERD_COMMAND_SOCK_READY);
1708 /* return < 0 on error, but == 0 is not fatal */
1709 if (ret < 0) {
32258573 1710 ERR("Error sending ready command to lttng-sessiond");
3bd1e081
MD
1711 goto end;
1712 }
1713
1714 ret = fcntl(client_socket, F_SETFL, O_NONBLOCK);
1715 if (ret < 0) {
1716 perror("fcntl O_NONBLOCK");
1717 goto end;
1718 }
1719
1720 /* prepare the FDs to poll : to client socket and the should_quit pipe */
1721 consumer_sockpoll[0].fd = ctx->consumer_should_quit[0];
1722 consumer_sockpoll[0].events = POLLIN | POLLPRI;
1723 consumer_sockpoll[1].fd = client_socket;
1724 consumer_sockpoll[1].events = POLLIN | POLLPRI;
1725
1726 if (lttng_consumer_poll_socket(consumer_sockpoll) < 0) {
1727 goto end;
1728 }
1729 DBG("Connection on client_socket");
1730
1731 /* Blocking call, waiting for transmission */
1732 sock = lttcomm_accept_unix_sock(client_socket);
1733 if (sock <= 0) {
1734 WARN("On accept");
1735 goto end;
1736 }
1737 ret = fcntl(sock, F_SETFL, O_NONBLOCK);
1738 if (ret < 0) {
1739 perror("fcntl O_NONBLOCK");
1740 goto end;
1741 }
1742
1743 /* update the polling structure to poll on the established socket */
1744 consumer_sockpoll[1].fd = sock;
1745 consumer_sockpoll[1].events = POLLIN | POLLPRI;
1746
1747 while (1) {
1748 if (lttng_consumer_poll_socket(consumer_sockpoll) < 0) {
1749 goto end;
1750 }
1751 DBG("Incoming command on sock");
1752 ret = lttng_consumer_recv_cmd(ctx, sock, consumer_sockpoll);
1753 if (ret == -ENOENT) {
1754 DBG("Received STOP command");
1755 goto end;
1756 }
1757 if (ret < 0) {
1758 ERR("Communication interrupted on command socket");
1759 goto end;
1760 }
1761 if (consumer_quit) {
1762 DBG("consumer_thread_receive_fds received quit from signal");
1763 goto end;
1764 }
1765 DBG("received fds on sock");
1766 }
1767end:
1768 DBG("consumer_thread_receive_fds exiting");
1769
1770 /*
1771 * when all fds have hung up, the polling thread
1772 * can exit cleanly
1773 */
1774 consumer_quit = 1;
1775
1776 /*
1777 * 2s of grace period, if no polling events occur during
1778 * this period, the polling thread will exit even if there
1779 * are still open FDs (should not happen, but safety mechanism).
1780 */
1781 consumer_poll_timeout = LTTNG_CONSUMER_POLL_TIMEOUT;
1782
04fdd819
MD
1783 /*
1784 * Wake-up the other end by writing a null byte in the pipe
1785 * (non-blocking). Important note: Because writing into the
1786 * pipe is non-blocking (and therefore we allow dropping wakeup
1787 * data, as long as there is wakeup data present in the pipe
1788 * buffer to wake up the other end), the other end should
1789 * perform the following sequence for waiting:
1790 * 1) empty the pipe (reads).
1791 * 2) perform update operation.
1792 * 3) wait on the pipe (poll).
1793 */
1794 do {
1795 ret = write(ctx->consumer_poll_pipe[1], "", 1);
6f94560a 1796 } while (ret < 0 && errno == EINTR);
e7b994a3 1797 rcu_unregister_thread();
3bd1e081
MD
1798 return NULL;
1799}
d41f73b7 1800
4078b776 1801ssize_t lttng_consumer_read_subbuffer(struct lttng_consumer_stream *stream,
d41f73b7
MD
1802 struct lttng_consumer_local_data *ctx)
1803{
1804 switch (consumer_data.type) {
1805 case LTTNG_CONSUMER_KERNEL:
1806 return lttng_kconsumer_read_subbuffer(stream, ctx);
7753dea8
MD
1807 case LTTNG_CONSUMER32_UST:
1808 case LTTNG_CONSUMER64_UST:
d41f73b7
MD
1809 return lttng_ustconsumer_read_subbuffer(stream, ctx);
1810 default:
1811 ERR("Unknown consumer_data type");
1812 assert(0);
1813 return -ENOSYS;
1814 }
1815}
1816
1817int lttng_consumer_on_recv_stream(struct lttng_consumer_stream *stream)
1818{
1819 switch (consumer_data.type) {
1820 case LTTNG_CONSUMER_KERNEL:
1821 return lttng_kconsumer_on_recv_stream(stream);
7753dea8
MD
1822 case LTTNG_CONSUMER32_UST:
1823 case LTTNG_CONSUMER64_UST:
d41f73b7
MD
1824 return lttng_ustconsumer_on_recv_stream(stream);
1825 default:
1826 ERR("Unknown consumer_data type");
1827 assert(0);
1828 return -ENOSYS;
1829 }
1830}
e4421fec
DG
1831
1832/*
1833 * Allocate and set consumer data hash tables.
1834 */
1835void lttng_consumer_init(void)
1836{
1837 consumer_data.stream_ht = lttng_ht_new(0, LTTNG_HT_TYPE_ULONG);
1838 consumer_data.channel_ht = lttng_ht_new(0, LTTNG_HT_TYPE_ULONG);
00e2e675 1839 consumer_data.relayd_ht = lttng_ht_new(0, LTTNG_HT_TYPE_ULONG);
e4421fec 1840}
This page took 0.120416 seconds and 5 git commands to generate.