Fix: concurrent exec(2) file descriptor leak
[lttng-ust.git] / libringbuffer / shm.c
CommitLineData
1d498196
MD
1/*
2 * libringbuffer/shm.c
3 *
e92f3e28 4 * Copyright (C) 2005-2012 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
1d498196 5 *
e92f3e28
MD
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; only
9 * version 2.1 of the License.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
1d498196
MD
19 */
20
3fbec7dc 21#define _LGPL_SOURCE
bfcda6ce 22#include <config.h>
1d498196
MD
23#include "shm.h"
24#include <unistd.h>
25#include <fcntl.h>
26#include <sys/mman.h>
a9ff648c 27#include <sys/types.h>
1d498196
MD
28#include <sys/stat.h> /* For mode constants */
29#include <fcntl.h> /* For O_* constants */
30#include <assert.h>
8da6cd6d
MD
31#include <stdio.h>
32#include <signal.h>
33#include <dirent.h>
4318ae1b 34#include <lttng/align.h>
96e80018 35#include <limits.h>
8a208943 36#include <stdbool.h>
bfcda6ce 37#ifdef HAVE_LIBNUMA
4b68c31f 38#include <numa.h>
8a208943 39#include <numaif.h>
bfcda6ce 40#endif
3a81f31d 41#include <helper.h>
6548fca4 42#include <ust-fd.h>
4d4838ba 43#include "mmap.h"
3a81f31d
MD
44
45/*
46 * Ensure we have the required amount of space available by writing 0
47 * into the entire buffer. Not doing so can trigger SIGBUS when going
48 * beyond the available shm space.
49 */
50static
51int zero_file(int fd, size_t len)
52{
53 ssize_t retlen;
54 size_t written = 0;
55 char *zeropage;
56 long pagelen;
57 int ret;
58
59 pagelen = sysconf(_SC_PAGESIZE);
60 if (pagelen < 0)
61 return (int) pagelen;
62 zeropage = calloc(pagelen, 1);
63 if (!zeropage)
64 return -ENOMEM;
65
66 while (len > written) {
67 do {
68 retlen = write(fd, zeropage,
69 min_t(size_t, pagelen, len - written));
70 } while (retlen == -1UL && errno == EINTR);
71 if (retlen < 0) {
72 ret = (int) retlen;
73 goto error;
74 }
75 written += retlen;
76 }
77 ret = 0;
78error:
79 free(zeropage);
80 return ret;
81}
1d498196
MD
82
83struct shm_object_table *shm_object_table_create(size_t max_nb_obj)
84{
85 struct shm_object_table *table;
86
87 table = zmalloc(sizeof(struct shm_object_table) +
88 max_nb_obj * sizeof(table->objects[0]));
74d48abe
MD
89 if (!table)
90 return NULL;
1d498196
MD
91 table->size = max_nb_obj;
92 return table;
93}
94
74d81a6c
MD
95static
96struct shm_object *_shm_object_table_alloc_shm(struct shm_object_table *table,
a9ff648c 97 size_t memory_map_size,
5ea386c3 98 int stream_fd)
1d498196 99{
5ea386c3 100 int shmfd, waitfd[2], ret, i;
1d498196
MD
101 struct shm_object *obj;
102 char *memory_map;
103
5ea386c3
MD
104 if (stream_fd < 0)
105 return NULL;
1d498196
MD
106 if (table->allocated_len >= table->size)
107 return NULL;
7a9c21bd 108 obj = &table->objects[table->allocated_len];
1d498196
MD
109
110 /* wait_fd: create pipe */
37cdae11 111 ret = pipe2(waitfd, O_CLOEXEC);
1d498196
MD
112 if (ret < 0) {
113 PERROR("pipe");
114 goto error_pipe;
115 }
5d61a504
MD
116 /* The write end of the pipe needs to be non-blocking */
117 ret = fcntl(waitfd[1], F_SETFL, O_NONBLOCK);
118 if (ret < 0) {
119 PERROR("fcntl");
120 goto error_fcntl;
121 }
7a9c21bd 122 memcpy(obj->wait_fd, waitfd, sizeof(waitfd));
1d498196 123
5ea386c3 124 /* create shm */
a9ff648c 125
5ea386c3 126 shmfd = stream_fd;
3a81f31d
MD
127 ret = zero_file(shmfd, memory_map_size);
128 if (ret) {
129 PERROR("zero_file");
130 goto error_zero_file;
131 }
1d498196
MD
132 ret = ftruncate(shmfd, memory_map_size);
133 if (ret) {
134 PERROR("ftruncate");
135 goto error_ftruncate;
136 }
d0f6cf57
MD
137 /*
138 * Also ensure the file metadata is synced with the storage by using
139 * fsync(2).
140 */
141 ret = fsync(shmfd);
142 if (ret) {
143 PERROR("fsync");
144 goto error_fsync;
145 }
5ea386c3 146 obj->shm_fd_ownership = 0;
1d498196
MD
147 obj->shm_fd = shmfd;
148
149 /* memory_map: mmap */
150 memory_map = mmap(NULL, memory_map_size, PROT_READ | PROT_WRITE,
4d4838ba 151 MAP_SHARED | LTTNG_MAP_POPULATE, shmfd, 0);
1d498196
MD
152 if (memory_map == MAP_FAILED) {
153 PERROR("mmap");
154 goto error_mmap;
155 }
74d81a6c 156 obj->type = SHM_OBJECT_SHM;
1d498196
MD
157 obj->memory_map = memory_map;
158 obj->memory_map_size = memory_map_size;
159 obj->allocated_len = 0;
dc613eb9 160 obj->index = table->allocated_len++;
7a9c21bd 161
1d498196
MD
162 return obj;
163
164error_mmap:
d0f6cf57 165error_fsync:
1d498196 166error_ftruncate:
3a81f31d 167error_zero_file:
1d498196
MD
168error_fcntl:
169 for (i = 0; i < 2; i++) {
170 ret = close(waitfd[i]);
171 if (ret) {
172 PERROR("close");
173 assert(0);
174 }
175 }
176error_pipe:
1d498196 177 return NULL;
1d498196
MD
178}
179
74d81a6c
MD
180static
181struct shm_object *_shm_object_table_alloc_mem(struct shm_object_table *table,
182 size_t memory_map_size)
183{
184 struct shm_object *obj;
185 void *memory_map;
ff0f5728 186 int waitfd[2], i, ret;
74d81a6c
MD
187
188 if (table->allocated_len >= table->size)
189 return NULL;
190 obj = &table->objects[table->allocated_len];
191
192 memory_map = zmalloc(memory_map_size);
193 if (!memory_map)
194 goto alloc_error;
195
ff0f5728 196 /* wait_fd: create pipe */
37cdae11 197 ret = pipe2(waitfd, O_CLOEXEC);
ff0f5728
MD
198 if (ret < 0) {
199 PERROR("pipe");
200 goto error_pipe;
201 }
ff0f5728
MD
202 /* The write end of the pipe needs to be non-blocking */
203 ret = fcntl(waitfd[1], F_SETFL, O_NONBLOCK);
204 if (ret < 0) {
205 PERROR("fcntl");
206 goto error_fcntl;
207 }
208 memcpy(obj->wait_fd, waitfd, sizeof(waitfd));
209
210 /* no shm_fd */
74d81a6c 211 obj->shm_fd = -1;
5ea386c3 212 obj->shm_fd_ownership = 0;
74d81a6c
MD
213
214 obj->type = SHM_OBJECT_MEM;
215 obj->memory_map = memory_map;
216 obj->memory_map_size = memory_map_size;
217 obj->allocated_len = 0;
218 obj->index = table->allocated_len++;
219
220 return obj;
221
ff0f5728
MD
222error_fcntl:
223 for (i = 0; i < 2; i++) {
224 ret = close(waitfd[i]);
225 if (ret) {
226 PERROR("close");
227 assert(0);
228 }
229 }
230error_pipe:
231 free(memory_map);
74d81a6c
MD
232alloc_error:
233 return NULL;
234}
235
8a208943
MD
236/*
237 * libnuma prints errors on the console even for numa_available().
238 * Work-around this limitation by using get_mempolicy() directly to
239 * check whether the kernel supports mempolicy.
240 */
241#ifdef HAVE_LIBNUMA
242static bool lttng_is_numa_available(void)
243{
244 int ret;
245
246 ret = get_mempolicy(NULL, NULL, 0, NULL, 0);
247 if (ret && errno == ENOSYS) {
248 return false;
249 }
250 return numa_available() > 0;
251}
252#endif
253
74d81a6c
MD
254struct shm_object *shm_object_table_alloc(struct shm_object_table *table,
255 size_t memory_map_size,
a9ff648c 256 enum shm_object_type type,
4b68c31f
MD
257 int stream_fd,
258 int cpu)
74d81a6c 259{
4b68c31f 260 struct shm_object *shm_object;
bfcda6ce 261#ifdef HAVE_LIBNUMA
8a208943
MD
262 int oldnode = 0, node;
263 bool numa_avail;
4b68c31f 264
8a208943
MD
265 numa_avail = lttng_is_numa_available();
266 if (numa_avail) {
267 oldnode = numa_preferred();
268 if (cpu >= 0) {
269 node = numa_node_of_cpu(cpu);
270 if (node >= 0)
271 numa_set_preferred(node);
272 }
273 if (cpu < 0 || node < 0)
274 numa_set_localalloc();
4b68c31f 275 }
bfcda6ce 276#endif /* HAVE_LIBNUMA */
74d81a6c
MD
277 switch (type) {
278 case SHM_OBJECT_SHM:
4b68c31f 279 shm_object = _shm_object_table_alloc_shm(table, memory_map_size,
5ea386c3 280 stream_fd);
4b68c31f 281 break;
74d81a6c 282 case SHM_OBJECT_MEM:
4b68c31f
MD
283 shm_object = _shm_object_table_alloc_mem(table, memory_map_size);
284 break;
74d81a6c
MD
285 default:
286 assert(0);
287 }
bfcda6ce 288#ifdef HAVE_LIBNUMA
8a208943
MD
289 if (numa_avail)
290 numa_set_preferred(oldnode);
bfcda6ce 291#endif /* HAVE_LIBNUMA */
4b68c31f 292 return shm_object;
74d81a6c
MD
293}
294
295struct shm_object *shm_object_table_append_shm(struct shm_object_table *table,
296 int shm_fd, int wakeup_fd, uint32_t stream_nr,
297 size_t memory_map_size)
193183fb
MD
298{
299 struct shm_object *obj;
300 char *memory_map;
74d81a6c 301 int ret;
193183fb
MD
302
303 if (table->allocated_len >= table->size)
304 return NULL;
74d81a6c
MD
305 /* streams _must_ be received in sequential order, else fail. */
306 if (stream_nr + 1 != table->allocated_len)
307 return NULL;
308
193183fb
MD
309 obj = &table->objects[table->allocated_len];
310
74d81a6c
MD
311 /* wait_fd: set write end of the pipe. */
312 obj->wait_fd[0] = -1; /* read end is unset */
313 obj->wait_fd[1] = wakeup_fd;
193183fb 314 obj->shm_fd = shm_fd;
5ea386c3 315 obj->shm_fd_ownership = 1;
193183fb 316
74d81a6c
MD
317 /* The write end of the pipe needs to be non-blocking */
318 ret = fcntl(obj->wait_fd[1], F_SETFL, O_NONBLOCK);
319 if (ret < 0) {
320 PERROR("fcntl");
321 goto error_fcntl;
322 }
323
193183fb
MD
324 /* memory_map: mmap */
325 memory_map = mmap(NULL, memory_map_size, PROT_READ | PROT_WRITE,
4d4838ba 326 MAP_SHARED | LTTNG_MAP_POPULATE, shm_fd, 0);
193183fb
MD
327 if (memory_map == MAP_FAILED) {
328 PERROR("mmap");
329 goto error_mmap;
330 }
74d81a6c 331 obj->type = SHM_OBJECT_SHM;
193183fb
MD
332 obj->memory_map = memory_map;
333 obj->memory_map_size = memory_map_size;
334 obj->allocated_len = memory_map_size;
335 obj->index = table->allocated_len++;
336
337 return obj;
338
74d81a6c 339error_fcntl:
193183fb
MD
340error_mmap:
341 return NULL;
342}
343
74d81a6c
MD
344/*
345 * Passing ownership of mem to object.
346 */
347struct shm_object *shm_object_table_append_mem(struct shm_object_table *table,
ff0f5728 348 void *mem, size_t memory_map_size, int wakeup_fd)
74d81a6c
MD
349{
350 struct shm_object *obj;
ff0f5728 351 int ret;
74d81a6c
MD
352
353 if (table->allocated_len >= table->size)
354 return NULL;
355 obj = &table->objects[table->allocated_len];
356
ff0f5728
MD
357 obj->wait_fd[0] = -1; /* read end is unset */
358 obj->wait_fd[1] = wakeup_fd;
74d81a6c 359 obj->shm_fd = -1;
5ea386c3 360 obj->shm_fd_ownership = 0;
74d81a6c 361
ff0f5728
MD
362 /* The write end of the pipe needs to be non-blocking */
363 ret = fcntl(obj->wait_fd[1], F_SETFL, O_NONBLOCK);
364 if (ret < 0) {
365 PERROR("fcntl");
366 goto error_fcntl;
367 }
368
74d81a6c
MD
369 obj->type = SHM_OBJECT_MEM;
370 obj->memory_map = mem;
371 obj->memory_map_size = memory_map_size;
372 obj->allocated_len = memory_map_size;
373 obj->index = table->allocated_len++;
374
375 return obj;
ff0f5728
MD
376
377error_fcntl:
378 return NULL;
74d81a6c
MD
379}
380
1d498196 381static
6548fca4 382void shmp_object_destroy(struct shm_object *obj, int consumer)
1d498196 383{
74d81a6c
MD
384 switch (obj->type) {
385 case SHM_OBJECT_SHM:
386 {
387 int ret, i;
1d498196 388
7a784989
MD
389 ret = munmap(obj->memory_map, obj->memory_map_size);
390 if (ret) {
391 PERROR("umnmap");
392 assert(0);
393 }
6548fca4 394
5ea386c3 395 if (obj->shm_fd_ownership) {
6548fca4
MD
396 /* Delete FDs only if called from app (not consumer). */
397 if (!consumer) {
398 lttng_ust_lock_fd_tracker();
399 ret = close(obj->shm_fd);
400 if (!ret) {
401 lttng_ust_delete_fd_from_tracker(obj->shm_fd);
402 } else {
403 PERROR("close");
404 assert(0);
405 }
406 lttng_ust_unlock_fd_tracker();
407 } else {
408 ret = close(obj->shm_fd);
409 if (ret) {
410 PERROR("close");
411 assert(0);
412 }
a9ff648c
MD
413 }
414 }
74d81a6c
MD
415 for (i = 0; i < 2; i++) {
416 if (obj->wait_fd[i] < 0)
417 continue;
6548fca4
MD
418 if (!consumer) {
419 lttng_ust_lock_fd_tracker();
420 ret = close(obj->wait_fd[i]);
421 if (!ret) {
422 lttng_ust_delete_fd_from_tracker(obj->wait_fd[i]);
423 } else {
424 PERROR("close");
425 assert(0);
426 }
427 lttng_ust_unlock_fd_tracker();
428 } else {
429 ret = close(obj->wait_fd[i]);
430 if (ret) {
431 PERROR("close");
432 assert(0);
433 }
74d81a6c 434 }
1d498196 435 }
74d81a6c
MD
436 break;
437 }
438 case SHM_OBJECT_MEM:
ff0f5728
MD
439 {
440 int ret, i;
441
442 for (i = 0; i < 2; i++) {
443 if (obj->wait_fd[i] < 0)
444 continue;
6548fca4
MD
445 if (!consumer) {
446 lttng_ust_lock_fd_tracker();
447 ret = close(obj->wait_fd[i]);
448 if (!ret) {
449 lttng_ust_delete_fd_from_tracker(obj->wait_fd[i]);
450 } else {
451 PERROR("close");
452 assert(0);
453 }
454 lttng_ust_unlock_fd_tracker();
455 } else {
456 ret = close(obj->wait_fd[i]);
457 if (ret) {
458 PERROR("close");
459 assert(0);
460 }
ff0f5728
MD
461 }
462 }
74d81a6c
MD
463 free(obj->memory_map);
464 break;
ff0f5728 465 }
74d81a6c
MD
466 default:
467 assert(0);
1d498196
MD
468 }
469}
470
6548fca4 471void shm_object_table_destroy(struct shm_object_table *table, int consumer)
1d498196
MD
472{
473 int i;
474
475 for (i = 0; i < table->allocated_len; i++)
6548fca4 476 shmp_object_destroy(&table->objects[i], consumer);
1d498196
MD
477 free(table);
478}
479
480/*
481 * zalloc_shm - allocate memory within a shm object.
482 *
483 * Shared memory is already zeroed by shmget.
484 * *NOT* multithread-safe (should be protected by mutex).
485 * Returns a -1, -1 tuple on error.
486 */
487struct shm_ref zalloc_shm(struct shm_object *obj, size_t len)
488{
489 struct shm_ref ref;
490 struct shm_ref shm_ref_error = { -1, -1 };
491
492 if (obj->memory_map_size - obj->allocated_len < len)
493 return shm_ref_error;
494 ref.index = obj->index;
495 ref.offset = obj->allocated_len;
496 obj->allocated_len += len;
497 return ref;
498}
499
500void align_shm(struct shm_object *obj, size_t align)
501{
502 size_t offset_len = offset_align(obj->allocated_len, align);
503 obj->allocated_len += offset_len;
504}
This page took 0.060097 seconds and 5 git commands to generate.