SoW-2020-0002: Trace Hit Counters: trigger error reporting integration
[lttng-tools.git] / src / bin / lttng-sessiond / trigger-error-accounting.c
1 /*
2 * Copyright (C) 2020 Francis Deslauriers <francis.deslauriers@efficios.com>
3 *
4 * SPDX-License-Identifier: GPL-2.0-only
5 *
6 */
7
8 #include <fcntl.h>
9 #include <sys/mman.h>
10 #include <sys/stat.h>
11 #include <unistd.h>
12 #include <urcu/compiler.h>
13 #include <pthread.h>
14
15 #include <common/error.h>
16 #include <common/hashtable/hashtable.h>
17 #include <common/index-allocator.h>
18 #include <common/kernel-ctl/kernel-ctl.h>
19 #include <common/shm.h>
20 #include <lttng/trigger/trigger-internal.h>
21
22 #include "lttng-ust-error.h"
23 #include "trigger-error-accounting.h"
24 #include "ust-app.h"
25
26 struct index_ht_entry {
27 struct lttng_ht_node_u64 node;
28 uint64_t error_counter_index;
29 struct rcu_head rcu_head;
30 };
31
32 struct error_account_entry {
33 struct lttng_ht_node_u64 node;
34 struct rcu_head rcu_head;
35 struct lttng_ust_object_data *counter;
36 struct lttng_ust_object_data **cpu_counters;
37 struct ustctl_daemon_counter *daemon_counter;
38 };
39
40 struct kernel_error_account_entry {
41 int kernel_trigger_error_counter_fd;
42 };
43
44 static struct kernel_error_account_entry kernel_error_accountant = { 0 };
45
46 /* Hashtable mapping trigger token to index_ht_entry */
47 static struct lttng_ht *error_counter_indexes_ht;
48
49 /* Hashtable mapping uid to error_account_entry */
50 static struct lttng_ht *error_counter_uid_ht;
51
52 static uint64_t error_counter_size = 0;
53 struct lttng_index_allocator *index_allocator;
54
55
56 void trigger_error_accounting_init(uint64_t nb_bucket)
57 {
58 struct lttng_index_allocator *error_counter_index_allocator;
59
60 error_counter_index_allocator = lttng_index_allocator_create(nb_bucket);
61 if (!error_counter_index_allocator) {
62 ERR("Failed to allocate trigger error counter index");
63 goto error_index_allocator;
64 }
65
66 index_allocator = error_counter_index_allocator;
67
68 error_counter_indexes_ht = lttng_ht_new(16, LTTNG_HT_TYPE_U64);
69 error_counter_uid_ht = lttng_ht_new(16, LTTNG_HT_TYPE_U64);
70 error_counter_size = nb_bucket;
71
72 error_index_allocator:
73 return;
74 }
75
76 static
77 enum trigger_error_accounting_status get_error_counter_index_for_token(
78 uint64_t tracer_token, uint64_t *error_counter_index)
79 {
80 struct lttng_ht_node_u64 *node;
81 struct lttng_ht_iter iter;
82 struct index_ht_entry *index_entry;;
83 enum trigger_error_accounting_status status;
84
85 lttng_ht_lookup(error_counter_indexes_ht, &tracer_token, &iter);
86 node = lttng_ht_iter_get_node_u64(&iter);
87 if (node) {
88 index_entry = caa_container_of(node, struct index_ht_entry, node);
89 *error_counter_index = index_entry->error_counter_index;
90 status = TRIGGER_ERROR_ACCOUNTING_STATUS_OK;
91 } else {
92 status = TRIGGER_ERROR_ACCOUNTING_STATUS_NOT_FOUND;
93 }
94
95 return status;
96 }
97
98 #ifdef HAVE_LIBLTTNG_UST_CTL
99 static
100 struct error_account_entry *get_uid_accounting_entry(const struct ust_app *app)
101 {
102 struct error_account_entry *entry;
103 struct lttng_ht_node_u64 *node;
104 struct lttng_ht_iter iter;
105 uint64_t key = app->uid;
106
107 lttng_ht_lookup(error_counter_uid_ht, &key, &iter);
108 node = lttng_ht_iter_get_node_u64(&iter);
109 if(node == NULL) {
110 entry = NULL;
111 } else {
112 entry = caa_container_of(node, struct error_account_entry, node);
113 }
114
115 return entry;
116 }
117
118 static
119 struct error_account_entry *create_uid_accounting_entry(
120 const struct ust_app *app)
121 {
122 int i, ret, nr_counter_cpu_fds;
123 struct ustctl_counter_dimension dimension[1] = {0};
124 struct ustctl_daemon_counter *daemon_counter;
125 struct lttng_ust_object_data *counter, **counter_cpus;
126 int *counter_cpu_fds;
127 struct error_account_entry *entry = NULL;
128
129 entry = zmalloc(sizeof(struct error_account_entry));
130 if (!entry) {
131 PERROR("Allocating trigger error acounting entry")
132 goto error;
133 }
134
135 nr_counter_cpu_fds = ustctl_get_nr_cpu_per_counter();
136 counter_cpu_fds = zmalloc(nr_counter_cpu_fds * sizeof(*counter_cpu_fds));
137 if (!counter_cpu_fds) {
138 ret = -1;
139 goto error_counter_cpu_fds_alloc;
140 }
141
142 counter_cpus = zmalloc(nr_counter_cpu_fds * sizeof(*counter_cpus));
143 if (!counter_cpus) {
144 ret = -1;
145 goto error_counter_cpus_alloc;
146 }
147
148 for (i = 0; i < nr_counter_cpu_fds; i++) {
149 counter_cpu_fds[i] = shm_create_anonymous("trigger-error-accounting");
150 //FIXME error handling
151 }
152
153
154 dimension[0].size = error_counter_size;
155 dimension[0].has_underflow = false;
156 dimension[0].has_overflow = false;
157
158 daemon_counter = ustctl_create_counter(1, dimension, 0, -1,
159 nr_counter_cpu_fds, counter_cpu_fds,
160 USTCTL_COUNTER_BITNESS_32,
161 USTCTL_COUNTER_ARITHMETIC_MODULAR,
162 USTCTL_COUNTER_ALLOC_PER_CPU);
163 assert(daemon_counter);
164
165 ret = ustctl_create_counter_data(daemon_counter, &counter);
166 assert(ret == 0);
167
168 for (i = 0; i < nr_counter_cpu_fds; i++) {
169 ret = ustctl_create_counter_cpu_data(daemon_counter, i,
170 &counter_cpus[i]);
171 assert(ret == 0);
172 }
173
174 entry->daemon_counter = daemon_counter;
175 entry->counter = counter;
176 entry->cpu_counters = counter_cpus;
177
178 lttng_ht_node_init_u64(&entry->node, app->uid);
179 lttng_ht_add_unique_u64(error_counter_uid_ht, &entry->node);
180
181 goto end;
182
183 error_counter_cpus_alloc:
184 free(counter_cpu_fds);
185 error_counter_cpu_fds_alloc:
186 free(entry);
187 error:
188 entry = NULL;
189 end:
190 return entry;
191 }
192
193 static
194 enum trigger_error_accounting_status send_counter_data_to_ust(
195 struct ust_app *app,
196 struct lttng_ust_object_data *new_counter)
197 {
198 int ret;
199 enum trigger_error_accounting_status status;
200
201 /* Attach counter to trigger group */
202 pthread_mutex_lock(&app->sock_lock);
203 ret = ustctl_send_counter_data_to_ust(app->sock,
204 app->token_communication.handle->handle, new_counter);
205 pthread_mutex_unlock(&app->sock_lock);
206 if (ret < 0) {
207 if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) {
208 ERR("Error ustctl send counter data to app pid: %d with ret %d",
209 app->pid, ret);
210 status = TRIGGER_ERROR_ACCOUNTING_STATUS_ERR;
211 } else {
212 DBG3("UST app send counter data to ust failed. Application is dead.");
213 status = TRIGGER_ERROR_ACCOUNTING_STATUS_APP_DEAD;
214 }
215 goto end;
216 }
217
218 status = TRIGGER_ERROR_ACCOUNTING_STATUS_OK;
219 end:
220 return status;
221 }
222
223 static
224 enum trigger_error_accounting_status send_counter_cpu_data_to_ust(
225 struct ust_app *app,
226 struct lttng_ust_object_data *new_counter,
227 struct lttng_ust_object_data *new_counter_cpu)
228 {
229 int ret;
230 enum trigger_error_accounting_status status;
231
232 pthread_mutex_lock(&app->sock_lock);
233 ret = ustctl_send_counter_cpu_data_to_ust(app->sock,
234 new_counter, new_counter_cpu);
235 pthread_mutex_unlock(&app->sock_lock);
236 if (ret < 0) {
237 if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) {
238 ERR("Error ustctl send counter cpu data to app pid: %d with ret %d",
239 app->pid, ret);
240 status = TRIGGER_ERROR_ACCOUNTING_STATUS_ERR;
241 } else {
242 DBG3("UST app send counter cpu data to ust failed. Application is dead.");
243 status = TRIGGER_ERROR_ACCOUNTING_STATUS_APP_DEAD;
244 }
245 goto end;
246 }
247
248 status = TRIGGER_ERROR_ACCOUNTING_STATUS_OK;
249 end:
250 return status;
251 }
252
253 enum trigger_error_accounting_status trigger_error_accounting_register_app(
254 struct ust_app *app)
255 {
256 int ret;
257 uint64_t i, nr_counter_cpu_fds;
258 struct lttng_ust_object_data *new_counter;
259 struct error_account_entry *entry;
260 enum trigger_error_accounting_status status;
261
262 /*
263 * Check if we already have a error counter for the user id of this
264 * app. If not, create one.
265 */
266 entry = get_uid_accounting_entry(app);
267 if (entry == NULL) {
268 entry = create_uid_accounting_entry(app);
269 }
270
271 /* Duplicate counter object data*/
272 ret = ustctl_duplicate_ust_object_data(&new_counter,
273 entry->counter);
274 assert(ret == 0);
275
276 status = send_counter_data_to_ust(app, new_counter);
277 if (status != TRIGGER_ERROR_ACCOUNTING_STATUS_OK) {
278 goto end;
279 }
280
281 nr_counter_cpu_fds = ustctl_get_nr_cpu_per_counter();
282 for (i = 0; i < nr_counter_cpu_fds; i++) {
283 struct lttng_ust_object_data *new_counter_cpu = NULL;
284
285 ret = ustctl_duplicate_ust_object_data(&new_counter_cpu,
286 entry->cpu_counters[i]);
287 assert(ret == 0);
288
289 status = send_counter_cpu_data_to_ust(app,
290 new_counter, new_counter_cpu);
291 if (status != TRIGGER_ERROR_ACCOUNTING_STATUS_OK) {
292 goto end;
293 }
294 }
295
296 end:
297 return status;
298 }
299
300 static
301 enum trigger_error_accounting_status trigger_error_accounting_ust_get_count(
302 const struct lttng_trigger *trigger, uint64_t *count)
303 {
304 struct lttng_ht_iter iter;
305 struct error_account_entry *uid_entry;
306 uint64_t error_counter_index, global_sum = 0;
307 enum trigger_error_accounting_status status;
308 size_t dimension_indexes[1];
309
310 /*
311 * Go over all error counters (ignoring uid) as a trigger (and trigger
312 * errors) can be generated from any applications that this session
313 * daemon is managing.
314 */
315
316 status = get_error_counter_index_for_token(
317 lttng_trigger_get_tracer_token(trigger), &error_counter_index);
318 if (status != TRIGGER_ERROR_ACCOUNTING_STATUS_OK) {
319 goto end;
320 }
321
322 dimension_indexes[0] = error_counter_index;
323
324 rcu_read_lock();
325
326 cds_lfht_for_each_entry(error_counter_uid_ht->ht, &iter.iter,
327 uid_entry, node.node) {
328 int ret;
329 int64_t local_value = 0;;
330 bool overflow = 0, underflow = 0;
331 ret = ustctl_counter_aggregate(uid_entry->daemon_counter,
332 dimension_indexes, &local_value, &overflow,
333 &underflow);
334 assert(ret == 0);
335
336 /* should always be zero or above. */
337 assert(local_value >= 0);
338 global_sum += (uint64_t) local_value;
339
340 }
341
342 rcu_read_unlock();
343
344 *count = global_sum;
345 status = TRIGGER_ERROR_ACCOUNTING_STATUS_OK;
346
347 end:
348 return status;
349 }
350
351 static
352 enum trigger_error_accounting_status trigger_error_accounting_ust_clear(
353 const struct lttng_trigger *trigger)
354 {
355 struct lttng_ht_iter iter;
356 struct error_account_entry *uid_entry;
357 uint64_t error_counter_index;
358 enum trigger_error_accounting_status status;
359 size_t dimension_indexes[1];
360
361 /*
362 * Go over all error counters (ignoring uid) as a trigger (and trigger
363 * errors) can be generated from any applications that this session
364 * daemon is managing.
365 */
366
367 status = get_error_counter_index_for_token(
368 lttng_trigger_get_tracer_token(trigger),
369 &error_counter_index);
370 if (status != TRIGGER_ERROR_ACCOUNTING_STATUS_OK) {
371 ERR("Error getting trigger error counter index");
372 goto end;
373 }
374
375 dimension_indexes[0] = error_counter_index;
376
377 rcu_read_lock();
378 cds_lfht_for_each_entry(error_counter_uid_ht->ht, &iter.iter,
379 uid_entry, node.node) {
380 int ret;
381 ret = ustctl_counter_clear(uid_entry->daemon_counter,
382 dimension_indexes);
383 assert(ret == 0);
384 }
385
386 rcu_read_unlock();
387 status = TRIGGER_ERROR_ACCOUNTING_STATUS_OK;
388 end:
389 return status;
390 }
391
392 #endif /* HAVE_LIBLTTNG_UST_CTL */
393
394 void trigger_error_accounting_register_kernel(int kernel_trigger_group_fd)
395 {
396 int local_fd = -1, ret;
397 struct lttng_kernel_counter_conf error_counter_conf;
398
399
400 error_counter_conf.arithmetic = LTTNG_KERNEL_COUNTER_ARITHMETIC_MODULAR;
401 error_counter_conf.bitness = LTTNG_KERNEL_COUNTER_BITNESS_64BITS;
402 error_counter_conf.global_sum_step = 0;
403 error_counter_conf.number_dimensions = 1;
404 error_counter_conf.dimensions[0].size = error_counter_size;
405 error_counter_conf.dimensions[0].has_underflow = false;
406 error_counter_conf.dimensions[0].has_overflow = false;
407
408 ret = kernctl_create_trigger_group_error_counter(
409 kernel_trigger_group_fd, &error_counter_conf);
410 if (ret < 0) {
411 PERROR("ioctl kernel create trigger group error counter");
412 goto error;
413 }
414
415 /* Store locally */
416 local_fd = ret;
417
418 /* Prevent fd duplication after execlp() */
419 ret = fcntl(local_fd, F_SETFD, FD_CLOEXEC);
420 if (ret < 0) {
421 PERROR("fcntl trigger error counter fd");
422 goto error;
423 }
424
425 DBG("Kernel trigger group error counter (fd: %d)", local_fd);
426
427 kernel_error_accountant.kernel_trigger_error_counter_fd = local_fd;
428
429 error:
430 return;
431 }
432
433 static
434 enum trigger_error_accounting_status create_error_counter_index_for_token(
435 uint64_t tracer_token, uint64_t *error_counter_index)
436 {
437 struct index_ht_entry *index_entry;;
438 enum lttng_index_allocator_status index_alloc_status;
439 uint64_t local_error_counter_index;
440 enum trigger_error_accounting_status status;
441
442 /* Allocate a new index for that counter. */
443 index_alloc_status = lttng_index_allocator_alloc(index_allocator,
444 &local_error_counter_index);
445 switch (index_alloc_status) {
446 case LTTNG_INDEX_ALLOCATOR_STATUS_EMPTY:
447 ERR("No more index available in the configured trigger error counter: number-of-indices=%"PRIu64,
448 lttng_index_allocator_get_index_count(
449 index_allocator));
450 status = TRIGGER_ERROR_ACCOUNTING_STATUS_NO_INDEX_AVAILABLE;
451 goto end;
452 case LTTNG_INDEX_ALLOCATOR_STATUS_OK:
453 break;
454 default:
455 status = TRIGGER_ERROR_ACCOUNTING_STATUS_ERR;
456 goto end;
457 }
458
459 index_entry = zmalloc(sizeof(*index_entry));
460 if (index_entry == NULL) {
461 PERROR("Trigger error counter hashtable entry zmalloc");
462 status = TRIGGER_ERROR_ACCOUNTING_STATUS_NOMEM;
463 goto end;
464 }
465
466 index_entry->error_counter_index = local_error_counter_index;
467 lttng_ht_node_init_u64(&index_entry->node, tracer_token);
468
469 lttng_ht_add_unique_u64(error_counter_indexes_ht, &index_entry->node);
470
471 *error_counter_index = local_error_counter_index;
472 status = TRIGGER_ERROR_ACCOUNTING_STATUS_OK;
473 end:
474 return status;
475 }
476
477 enum trigger_error_accounting_status trigger_error_accounting_register_trigger(
478 const struct lttng_trigger *trigger,
479 uint64_t *error_counter_index)
480 {
481 enum trigger_error_accounting_status status;
482 uint64_t local_error_counter_index;
483
484 /* Check if this trigger already has a error counter index assigned. */
485 status = get_error_counter_index_for_token(
486 lttng_trigger_get_tracer_token(trigger),
487 &local_error_counter_index);
488 switch (status) {
489 case TRIGGER_ERROR_ACCOUNTING_STATUS_NOT_FOUND:
490 DBG("Trigger error counter index for this tracer token not found. Allocating a new one.");
491 status = create_error_counter_index_for_token(
492 lttng_trigger_get_tracer_token(trigger),
493 &local_error_counter_index);
494 if (status != TRIGGER_ERROR_ACCOUNTING_STATUS_OK) {
495 goto end;
496 }
497 case TRIGGER_ERROR_ACCOUNTING_STATUS_OK:
498 *error_counter_index = local_error_counter_index;
499 status = TRIGGER_ERROR_ACCOUNTING_STATUS_OK;
500 break;
501 default:
502 break;
503 }
504
505 end:
506 return status;
507 }
508
509 static
510 enum trigger_error_accounting_status trigger_error_accounting_kernel_get_count(
511 const struct lttng_trigger *trigger, uint64_t *count)
512 {
513 struct lttng_kernel_counter_value counter_value;
514 enum trigger_error_accounting_status status;
515 uint64_t error_counter_index;
516 int ret;
517
518 status = get_error_counter_index_for_token(
519 lttng_trigger_get_tracer_token(trigger), &error_counter_index);
520 if (status != TRIGGER_ERROR_ACCOUNTING_STATUS_OK) {
521 goto end;
522 }
523
524 counter_value.number_dimensions = 1;
525 counter_value.dimension_indexes[0] = error_counter_index;
526
527 assert(kernel_error_accountant.kernel_trigger_error_counter_fd);
528
529 ret = kernctl_counter_get_value(
530 kernel_error_accountant.kernel_trigger_error_counter_fd,
531 &counter_value);
532 if (ret) {
533 ERR("Error getting trigger error count.");
534 status = TRIGGER_ERROR_ACCOUNTING_STATUS_ERR;
535 goto end;
536 }
537
538 if (counter_value.value < 0) {
539 ERR("Trigger error counter less than zero.");
540 status = TRIGGER_ERROR_ACCOUNTING_STATUS_ERR;
541 goto end;
542 }
543
544 /* Error count can't be negative. */
545 assert(counter_value.value >= 0);
546 *count = (uint64_t) counter_value.value;
547
548 status = TRIGGER_ERROR_ACCOUNTING_STATUS_OK;
549
550 end:
551 return status;
552 }
553
554 enum trigger_error_accounting_status trigger_error_accounting_get_count(
555 const struct lttng_trigger *trigger, uint64_t *count)
556 {
557 switch (lttng_trigger_get_underlying_domain_type_restriction(trigger)) {
558 case LTTNG_DOMAIN_KERNEL:
559 return trigger_error_accounting_kernel_get_count(trigger, count);
560 #ifdef HAVE_LIBLTTNG_UST_CTL
561 case LTTNG_DOMAIN_UST:
562 return trigger_error_accounting_ust_get_count(trigger, count);
563 #endif /* HAVE_LIBLTTNG_UST_CTL */
564 default:
565 abort();
566 }
567 }
568
569 static
570 enum trigger_error_accounting_status trigger_error_accounting_clear(
571 const struct lttng_trigger *trigger)
572 {
573 switch (lttng_trigger_get_underlying_domain_type_restriction(trigger)) {
574 case LTTNG_DOMAIN_KERNEL:
575 // FIXME: Should we clear it here? Right now I believe it's clear when
576 // we create a new trigger in the kernel.
577 return TRIGGER_ERROR_ACCOUNTING_STATUS_OK;
578 #ifdef HAVE_LIBLTTNG_UST_CTL
579 case LTTNG_DOMAIN_UST:
580 return trigger_error_accounting_ust_clear(trigger);
581 #endif /* HAVE_LIBLTTNG_UST_CTL */
582 default:
583 abort();
584 }
585 }
586
587 void trigger_error_accounting_unregister_trigger(
588 const struct lttng_trigger *trigger)
589 {
590 struct lttng_ht_iter iter;
591 struct lttng_ht_node_u64 *node;
592 struct index_ht_entry *index_entry;
593 enum trigger_error_accounting_status status;
594 enum lttng_index_allocator_status index_alloc_status;
595 uint64_t tracer_token = lttng_trigger_get_tracer_token(trigger);
596
597
598 status = trigger_error_accounting_clear(trigger);
599 if (status != TRIGGER_ERROR_ACCOUNTING_STATUS_OK) {
600 ERR("Error clearing trigger error counter index");
601 }
602
603 lttng_ht_lookup(error_counter_indexes_ht, &tracer_token, &iter);
604 node = lttng_ht_iter_get_node_u64(&iter);
605 if(node) {
606 index_entry = caa_container_of(node, struct index_ht_entry, node);
607 index_alloc_status = lttng_index_allocator_release(
608 index_allocator,
609 index_entry->error_counter_index);
610 if (index_alloc_status != LTTNG_INDEX_ALLOCATOR_STATUS_OK) {
611 ERR("Error releasing trigger error counter index");
612 }
613
614 lttng_ht_del(error_counter_indexes_ht, &iter);
615 free(index_entry);
616 }
617 }
618
619 static void free_error_account_entry(struct rcu_head *head)
620 {
621 struct error_account_entry *entry = caa_container_of(head,
622 struct error_account_entry, rcu_head);
623 #ifdef HAVE_LIBLTTNG_UST_CTL
624 ustctl_destroy_counter(entry->daemon_counter);
625 #endif /* HAVE_LIBLTTNG_UST_CTL */
626
627 free(entry);
628 }
629
630 static void free_index_ht_entry(struct rcu_head *head)
631 {
632 struct index_ht_entry *entry = caa_container_of(head,
633 struct index_ht_entry, rcu_head);
634 free(entry);
635 }
636
637 void trigger_error_accounting_fini(void)
638 {
639 struct lttng_ht_iter iter;
640 struct index_ht_entry *index_entry;
641 struct error_account_entry *uid_entry;
642
643 if (kernel_error_accountant.kernel_trigger_error_counter_fd) {
644 int ret = close(kernel_error_accountant.kernel_trigger_error_counter_fd);
645 if (ret) {
646 PERROR("Closing kernel trigger error counter");
647 }
648 }
649
650 rcu_read_lock();
651
652 cds_lfht_for_each_entry(error_counter_uid_ht->ht, &iter.iter,
653 uid_entry, node.node) {
654 cds_lfht_del(error_counter_uid_ht->ht, &uid_entry->node.node);
655 call_rcu(&uid_entry->rcu_head, free_error_account_entry);
656 }
657
658 cds_lfht_for_each_entry(error_counter_indexes_ht->ht, &iter.iter,
659 index_entry, node.node) {
660 cds_lfht_del(error_counter_indexes_ht->ht, &index_entry->node.node);
661 call_rcu(&index_entry->rcu_head, free_index_ht_entry);
662 }
663
664 rcu_read_unlock();
665
666 lttng_ht_destroy(error_counter_uid_ht);
667 lttng_ht_destroy(error_counter_indexes_ht);
668 }
This page took 0.042916 seconds and 5 git commands to generate.