2 * Copyright (C) 2018 - Jérémie Galarneau <jeremie.galarneau@efficios.com>
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of the GNU General Public License, version 2 only, as
6 * published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 51
15 * Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19 #include <urcu/list.h>
20 #include <urcu/rculfhash.h>
23 #include <sys/types.h>
29 #include "common/macros.h"
30 #include "common/error.h"
31 #include "common/defaults.h"
32 #include "common/hashtable/utils.h"
33 #include "common/hashtable/hashtable.h"
35 #include "fd-tracker.h"
37 /* Tracker lock must be taken by the user. */
38 #define TRACKED_COUNT(tracker) \
39 (tracker->count.suspendable.active + \
40 tracker->count.suspendable.suspended + \
41 tracker->count.unsuspendable)
43 /* Tracker lock must be taken by the user. */
44 #define ACTIVE_COUNT(tracker) \
45 (tracker->count.suspendable.active + \
46 tracker->count.unsuspendable)
48 /* Tracker lock must be taken by the user. */
49 #define SUSPENDED_COUNT(tracker) \
50 (tracker->count.suspendable.suspended)
52 /* Tracker lock must be taken by the user. */
53 #define SUSPENDABLE_COUNT(tracker) \
54 (tracker->count.suspendable.active + \
55 tracker->count.suspendable.suspended)
57 /* Tracker lock must be taken by the user. */
58 #define UNSUSPENDABLE_COUNT(tracker) \
59 (tracker->count.unsuspendable)
66 unsigned int suspended
;
68 unsigned int unsuspendable
;
70 unsigned int capacity
;
74 /* Failures to suspend or restore fs handles. */
78 * The head of the active_handles list is always the least recently
79 * used active handle. When an handle is used, it is removed from the
80 * list and added to the end. When a file has to be suspended, the
81 * first element in the list is "popped", suspended, and added to the
82 * list of suspended handles.
84 struct cds_list_head active_handles
;
85 struct cds_list_head suspended_handles
;
86 struct cds_lfht
*unsuspendable_fds
;
89 struct open_properties
{
99 * A fs_handle is not ref-counted. Therefore, it is assumed that a
100 * handle is never in-use while it is being reclaimed. It can be
101 * shared by multiple threads, but external synchronization is required
102 * to ensure it is not still being used when it is reclaimed (close method).
103 * In this respect, it is not different from a regular file descriptor.
105 * The fs_handle lock always nests _within_ the tracker's lock.
108 pthread_mutex_t lock
;
110 * Weak reference to the tracker. All fs_handles are assumed to have
111 * been closed at the moment of the destruction of the fd_tracker.
113 struct fd_tracker
*tracker
;
114 struct open_properties properties
;
116 /* inode number of the file at the time of the handle's creation. */
119 /* Offset to which the file should be restored. */
121 struct cds_list_head handles_list_node
;
124 struct unsuspendable_fd
{
126 * Accesses are only performed through the tracker, which is protected
131 struct cds_lfht_node tracker_node
;
132 struct rcu_head rcu_head
;
136 pthread_mutex_t lock
;
140 .lock
= PTHREAD_MUTEX_INITIALIZER
,
143 static int match_fd(struct cds_lfht_node
*node
, const void *key
);
144 static void unsuspendable_fd_destroy(struct unsuspendable_fd
*entry
);
145 static struct unsuspendable_fd
*unsuspendable_fd_create(const char *name
,
147 static int open_from_properties(struct open_properties
*properties
);
149 static void fs_handle_log(struct fs_handle
*handle
);
150 static int fs_handle_suspend(struct fs_handle
*handle
);
151 static int fs_handle_restore(struct fs_handle
*handle
);
153 static void fd_tracker_track(struct fd_tracker
*tracker
,
154 struct fs_handle
*handle
);
155 static void fd_tracker_untrack(struct fd_tracker
*tracker
,
156 struct fs_handle
*handle
);
157 static int fd_tracker_suspend_handles(struct fd_tracker
*tracker
,
159 static int fd_tracker_restore_handle(struct fd_tracker
*tracker
,
160 struct fs_handle
*handle
);
162 /* Match function of the tracker's unsuspendable_fds hash table. */
164 int match_fd(struct cds_lfht_node
*node
, const void *key
)
166 struct unsuspendable_fd
*entry
=
167 caa_container_of(node
, struct unsuspendable_fd
, tracker_node
);
169 return hash_match_key_ulong((void *) (unsigned long) entry
->fd
,
174 void delete_unsuspendable_fd(struct rcu_head
*head
)
176 struct unsuspendable_fd
*fd
= caa_container_of(head
,
177 struct unsuspendable_fd
, rcu_head
);
184 void unsuspendable_fd_destroy(struct unsuspendable_fd
*entry
)
189 call_rcu(&entry
->rcu_head
, delete_unsuspendable_fd
);
193 struct unsuspendable_fd
*unsuspendable_fd_create(const char *name
, int fd
)
195 struct unsuspendable_fd
*entry
=
196 zmalloc(sizeof(*entry
));
202 entry
->name
= strdup(name
);
207 cds_lfht_node_init(&entry
->tracker_node
);
211 unsuspendable_fd_destroy(entry
);
216 void fs_handle_log(struct fs_handle
*handle
)
218 pthread_mutex_lock(&handle
->lock
);
219 if (handle
->fd
>= 0) {
220 DBG_NO_LOC(" %s [active, fd %d%s]",
221 handle
->properties
.path
,
223 handle
->in_use
? ", in use" : "");
225 DBG_NO_LOC(" %s [suspended]", handle
->properties
.path
);
227 pthread_mutex_unlock(&handle
->lock
);
231 int fs_handle_suspend(struct fs_handle
*handle
)
236 pthread_mutex_lock(&handle
->lock
);
237 assert(handle
->fd
>= 0);
238 if (handle
->in_use
) {
239 /* This handle can't be suspended as it is currently in use. */
244 ret
= stat(handle
->properties
.path
, &fs_stat
);
246 PERROR("Filesystem handle to %s cannot be suspended as stat() failed",
247 handle
->properties
.path
);
252 if (fs_stat
.st_ino
!= handle
->ino
) {
253 /* Don't suspend as the handle would not be restorable. */
254 WARN("Filesystem handle to %s cannot be suspended as its inode changed",
255 handle
->properties
.path
);
260 handle
->offset
= lseek(handle
->fd
, 0, SEEK_CUR
);
261 if (handle
->offset
== -1) {
262 WARN("Filesystem handle to %s cannot be suspended as lseek() failed to sample its current position",
263 handle
->properties
.path
);
268 ret
= close(handle
->fd
);
270 PERROR("Filesystem handle to %s cannot be suspended as close() failed",
271 handle
->properties
.path
);
275 DBG("Suspended filesystem handle to %s (fd %i) at position %" PRId64
,
276 handle
->properties
.path
, handle
->fd
, handle
->offset
);
280 handle
->tracker
->stats
.errors
++;
282 pthread_mutex_unlock(&handle
->lock
);
286 /* Caller must hold the tracker and handle's locks. */
288 int fs_handle_restore(struct fs_handle
*handle
)
292 assert(handle
->fd
== -1);
293 ret
= open_from_properties(&handle
->properties
);
295 PERROR("Failed to restore filesystem handle to %s, open() failed",
296 handle
->properties
.path
);
302 ret
= lseek(fd
, handle
->offset
, SEEK_SET
);
304 PERROR("Failed to restore filesystem handle to %s, lseek() failed",
305 handle
->properties
.path
);
309 DBG("Restored filesystem handle to %s (fd %i) at position %" PRId64
,
310 handle
->properties
.path
, fd
, handle
->offset
);
322 int open_from_properties(struct open_properties
*properties
)
327 * open() ignores the 'flags' parameter unless the O_CREAT or O_TMPFILE
328 * flags are set. O_TMPFILE would not make sense in the context of a
329 * suspendable fs_handle as it would not be restorable (see OPEN(2)),
330 * thus it is ignored here.
332 if ((properties
->flags
& O_CREAT
) && properties
->mode
.is_set
) {
333 ret
= open(properties
->path
, properties
->flags
,
334 properties
->mode
.value
);
336 ret
= open(properties
->path
, properties
->flags
);
339 * Some flags should not be used beyond the initial open() of a
340 * restorable file system handle. O_CREAT and O_TRUNC must
341 * be cleared since it would be unexpected to re-use them
342 * when the handle is retored:
343 * - O_CREAT should not be needed as the file has been created
344 * on the initial call to open(),
345 * - O_TRUNC would destroy the file's contents by truncating it
348 properties
->flags
&= ~(O_CREAT
| O_TRUNC
);
357 struct fd_tracker
*fd_tracker_create(unsigned int capacity
)
359 struct fd_tracker
*tracker
= zmalloc(sizeof(struct fd_tracker
));
365 pthread_mutex_lock(&seed
.lock
);
366 if (!seed
.initialized
) {
367 seed
.value
= (unsigned long) time(NULL
);
368 seed
.initialized
= true;
370 pthread_mutex_unlock(&seed
.lock
);
372 CDS_INIT_LIST_HEAD(&tracker
->active_handles
);
373 CDS_INIT_LIST_HEAD(&tracker
->suspended_handles
);
374 tracker
->capacity
= capacity
;
375 tracker
->unsuspendable_fds
= cds_lfht_new(DEFAULT_HT_SIZE
, 1, 0,
376 CDS_LFHT_AUTO_RESIZE
| CDS_LFHT_ACCOUNTING
, NULL
);
377 DBG("File descriptor tracker created with a limit of %u simultaneously-opened FDs",
383 void fd_tracker_log(struct fd_tracker
*tracker
)
385 struct fs_handle
*handle
;
386 struct unsuspendable_fd
*unsuspendable_fd
;
387 struct cds_lfht_iter iter
;
389 pthread_mutex_lock(&tracker
->lock
);
390 DBG_NO_LOC("File descriptor tracker");
391 DBG_NO_LOC(" Stats:");
392 DBG_NO_LOC(" uses: %" PRIu64
, tracker
->stats
.uses
);
393 DBG_NO_LOC(" misses: %" PRIu64
, tracker
->stats
.misses
);
394 DBG_NO_LOC(" errors: %" PRIu64
, tracker
->stats
.errors
);
395 DBG_NO_LOC(" Tracked: %u", TRACKED_COUNT(tracker
));
396 DBG_NO_LOC(" active: %u", ACTIVE_COUNT(tracker
));
397 DBG_NO_LOC(" suspendable: %u", SUSPENDABLE_COUNT(tracker
));
398 DBG_NO_LOC(" unsuspendable: %u", UNSUSPENDABLE_COUNT(tracker
));
399 DBG_NO_LOC(" suspended: %u", SUSPENDED_COUNT(tracker
));
400 DBG_NO_LOC(" capacity: %u", tracker
->capacity
);
402 DBG_NO_LOC(" Tracked suspendable file descriptors");
403 cds_list_for_each_entry(handle
, &tracker
->active_handles
,
405 fs_handle_log(handle
);
407 cds_list_for_each_entry(handle
, &tracker
->suspended_handles
,
409 fs_handle_log(handle
);
411 if (!SUSPENDABLE_COUNT(tracker
)) {
415 DBG_NO_LOC(" Tracked unsuspendable file descriptors");
417 cds_lfht_for_each_entry(tracker
->unsuspendable_fds
, &iter
,
418 unsuspendable_fd
, tracker_node
) {
419 DBG_NO_LOC(" %s [active, fd %d]", unsuspendable_fd
->name
? : "Unnamed",
420 unsuspendable_fd
->fd
);
423 if (!UNSUSPENDABLE_COUNT(tracker
)) {
427 pthread_mutex_unlock(&tracker
->lock
);
430 int fd_tracker_destroy(struct fd_tracker
*tracker
)
435 * Refuse to destroy the tracker as fs_handles may still old
436 * weak references to the tracker.
438 pthread_mutex_lock(&tracker
->lock
);
439 if (TRACKED_COUNT(tracker
)) {
440 ERR("A file descriptor leak has been detected: %u tracked file descriptors are still being tracked",
441 TRACKED_COUNT(tracker
));
442 pthread_mutex_unlock(&tracker
->lock
);
443 fd_tracker_log(tracker
);
447 pthread_mutex_unlock(&tracker
->lock
);
449 ret
= cds_lfht_destroy(tracker
->unsuspendable_fds
, NULL
);
451 pthread_mutex_destroy(&tracker
->lock
);
457 struct fs_handle
*fd_tracker_open_fs_handle(struct fd_tracker
*tracker
,
458 const char *path
, int flags
, mode_t
*mode
)
461 struct fs_handle
*handle
= NULL
;
463 struct open_properties properties
= {
464 .path
= strdup(path
),
466 .mode
.is_set
= !!mode
,
467 .mode
.value
= mode
? *mode
: 0,
470 if (!properties
.path
) {
474 pthread_mutex_lock(&tracker
->lock
);
475 if (ACTIVE_COUNT(tracker
) == tracker
->capacity
) {
476 if (tracker
->count
.suspendable
.active
> 0) {
477 ret
= fd_tracker_suspend_handles(tracker
, 1);
483 * There are not enough active suspendable file
484 * descriptors to open a new fd and still accomodate the
485 * tracker's capacity.
487 WARN("Cannot open file system handle, too many unsuspendable file descriptors are opened (%u)",
488 tracker
->count
.unsuspendable
);
494 handle
= zmalloc(sizeof(*handle
));
499 ret
= pthread_mutex_init(&handle
->lock
, NULL
);
501 PERROR("Failed to initialize handle mutex while creating fs handle");
506 handle
->fd
= open_from_properties(&properties
);
507 if (handle
->fd
< 0) {
508 PERROR("Failed to open fs handle to %s, open() returned", path
);
514 * Clear the create flag from the open flags as it would make no sense
515 * to use it when restoring a fs handle.
517 properties
.flags
&= ~O_CREAT
;
518 handle
->properties
= properties
;
519 properties
.path
= NULL
;
521 if (fstat(handle
->fd
, &fd_stat
)) {
522 PERROR("Failed to retrieve file descriptor inode while creating fs handle, fstat() returned");
526 handle
->ino
= fd_stat
.st_ino
;
528 fd_tracker_track(tracker
, handle
);
529 handle
->tracker
= tracker
;
530 pthread_mutex_unlock(&tracker
->lock
);
532 free(properties
.path
);
535 pthread_mutex_unlock(&tracker
->lock
);
536 (void) fs_handle_close(handle
);
541 /* Caller must hold the tracker's lock. */
543 int fd_tracker_suspend_handles(struct fd_tracker
*tracker
,
546 unsigned int left_to_close
= count
;
547 struct fs_handle
*handle
, *tmp
;
549 cds_list_for_each_entry_safe(handle
, tmp
, &tracker
->active_handles
,
553 fd_tracker_untrack(tracker
, handle
);
554 ret
= fs_handle_suspend(handle
);
555 fd_tracker_track(tracker
, handle
);
560 if (!left_to_close
) {
564 return left_to_close
? -EMFILE
: 0;
567 int fd_tracker_open_unsuspendable_fd(struct fd_tracker
*tracker
,
568 int *out_fds
, const char **names
, unsigned int fd_count
,
569 fd_open_cb open
, void *user_data
)
571 int ret
, user_ret
, i
, fds_to_suspend
;
572 unsigned int active_fds
;
573 struct unsuspendable_fd
*entries
[fd_count
];
575 memset(entries
, 0, sizeof(entries
));
577 pthread_mutex_lock(&tracker
->lock
);
579 active_fds
= ACTIVE_COUNT(tracker
);
580 fds_to_suspend
= (int) active_fds
+ (int) fd_count
- (int) tracker
->capacity
;
581 if (fds_to_suspend
> 0) {
582 if (fds_to_suspend
<= tracker
->count
.suspendable
.active
) {
583 ret
= fd_tracker_suspend_handles(tracker
, fds_to_suspend
);
589 * There are not enough active suspendable file
590 * descriptors to open a new fd and still accomodate the
591 * tracker's capacity.
593 WARN("Cannot open unsuspendable fd, too many unsuspendable file descriptors are opened (%u)",
594 tracker
->count
.unsuspendable
);
600 user_ret
= open(user_data
, out_fds
);
607 * Add the fds returned by the user's callback to the hashtable
608 * of unsuspendable fds.
610 for (i
= 0; i
< fd_count
; i
++) {
611 struct unsuspendable_fd
*entry
=
612 unsuspendable_fd_create(names
? names
[i
] : NULL
,
617 goto end_free_entries
;
623 for (i
= 0; i
< fd_count
; i
++) {
624 struct cds_lfht_node
*node
;
625 struct unsuspendable_fd
*entry
= entries
[i
];
627 node
= cds_lfht_add_unique(
628 tracker
->unsuspendable_fds
,
629 hash_key_ulong((void *) (unsigned long) out_fds
[i
],
632 (void *) (unsigned long) out_fds
[i
],
633 &entry
->tracker_node
);
635 if (node
!= &entry
->tracker_node
) {
638 goto end_free_entries
;
642 tracker
->count
.unsuspendable
+= fd_count
;
646 pthread_mutex_unlock(&tracker
->lock
);
649 for (i
= 0; i
< fd_count
; i
++) {
650 unsuspendable_fd_destroy(entries
[i
]);
655 int fd_tracker_close_unsuspendable_fd(struct fd_tracker
*tracker
,
656 int *fds_in
, unsigned int fd_count
, fd_close_cb close
,
659 int i
, ret
, user_ret
;
663 * Maintain a local copy of fds_in as the user's callback may modify its
664 * contents (e.g. setting the fd(s) to -1 after close).
666 memcpy(fds
, fds_in
, sizeof(*fds
) * fd_count
);
668 pthread_mutex_lock(&tracker
->lock
);
671 /* Let the user close the file descriptors. */
672 user_ret
= close(user_data
, fds_in
);
678 /* Untrack the fds that were just closed by the user's callback. */
679 for (i
= 0; i
< fd_count
; i
++) {
680 struct cds_lfht_node
*node
;
681 struct cds_lfht_iter iter
;
682 struct unsuspendable_fd
*entry
;
684 cds_lfht_lookup(tracker
->unsuspendable_fds
,
685 hash_key_ulong((void *) (unsigned long) fds
[i
],
688 (void *) (unsigned long) fds
[i
],
690 node
= cds_lfht_iter_get_node(&iter
);
692 /* Unknown file descriptor. */
693 WARN("Untracked file descriptor %d passed to fd_tracker_close_unsuspendable_fd()",
698 entry
= caa_container_of(node
,
699 struct unsuspendable_fd
,
702 cds_lfht_del(tracker
->unsuspendable_fds
, node
);
703 unsuspendable_fd_destroy(entry
);
707 tracker
->count
.unsuspendable
-= fd_count
;
711 pthread_mutex_unlock(&tracker
->lock
);
715 /* Caller must have taken the tracker's and handle's locks. */
717 void fd_tracker_track(struct fd_tracker
*tracker
, struct fs_handle
*handle
)
719 if (handle
->fd
>= 0) {
720 tracker
->count
.suspendable
.active
++;
721 cds_list_add_tail(&handle
->handles_list_node
,
722 &tracker
->active_handles
);
724 tracker
->count
.suspendable
.suspended
++;
725 cds_list_add_tail(&handle
->handles_list_node
,
726 &tracker
->suspended_handles
);
730 /* Caller must have taken the tracker's and handle's locks. */
732 void fd_tracker_untrack(struct fd_tracker
*tracker
, struct fs_handle
*handle
)
734 if (handle
->fd
>= 0) {
735 tracker
->count
.suspendable
.active
--;
737 tracker
->count
.suspendable
.suspended
--;
739 cds_list_del(&handle
->handles_list_node
);
742 /* Caller must have taken the tracker's and handle's locks. */
744 int fd_tracker_restore_handle(struct fd_tracker
*tracker
,
745 struct fs_handle
*handle
)
749 fd_tracker_untrack(tracker
, handle
);
750 if (ACTIVE_COUNT(tracker
) >= tracker
->capacity
) {
751 ret
= fd_tracker_suspend_handles(tracker
, 1);
756 ret
= fs_handle_restore(handle
);
758 fd_tracker_track(tracker
, handle
);
759 return ret
? ret
: handle
->fd
;
762 int fs_handle_get_fd(struct fs_handle
*handle
)
767 * TODO This should be optimized as it is a fairly hot path.
768 * The fd-tracker's lock should only be taken when a fs_handle is
769 * restored (slow path). On the fast path (fs_handle is active),
770 * the only effect on the fd_tracker is marking the handle as the
771 * most recently used. Currently, it is done by a call to the
772 * track/untrack helpers, but it should be done atomically.
774 * Note that the lock's nesting order must still be respected here.
775 * The handle's lock nests inside the tracker's lock.
777 pthread_mutex_lock(&handle
->tracker
->lock
);
778 pthread_mutex_lock(&handle
->lock
);
779 assert(!handle
->in_use
);
781 handle
->tracker
->stats
.uses
++;
782 if (handle
->fd
>= 0) {
784 /* Mark as most recently used. */
785 fd_tracker_untrack(handle
->tracker
, handle
);
786 fd_tracker_track(handle
->tracker
, handle
);
788 handle
->tracker
->stats
.misses
++;
789 ret
= fd_tracker_restore_handle(handle
->tracker
, handle
);
791 handle
->tracker
->stats
.errors
++;
795 handle
->in_use
= true;
797 pthread_mutex_unlock(&handle
->lock
);
798 pthread_mutex_unlock(&handle
->tracker
->lock
);
802 void fs_handle_put_fd(struct fs_handle
*handle
)
804 pthread_mutex_lock(&handle
->lock
);
805 handle
->in_use
= false;
806 pthread_mutex_unlock(&handle
->lock
);
809 int fs_handle_unlink(struct fs_handle
*handle
)
813 pthread_mutex_lock(&handle
->tracker
->lock
);
814 pthread_mutex_lock(&handle
->lock
);
815 ret
= lttng_inode_defer_unlink(handle
->inode
);
816 pthread_mutex_unlock(&handle
->lock
);
817 pthread_mutex_unlock(&handle
->tracker
->lock
);
821 int fs_handle_close(struct fs_handle
*handle
)
830 pthread_mutex_lock(&handle
->tracker
->lock
);
831 pthread_mutex_lock(&handle
->lock
);
832 fd_tracker_untrack(handle
->tracker
, handle
);
833 if (handle
->fd
>= 0) {
834 assert(!handle
->in_use
);
836 * The return value of close() is not propagated as there
837 * isn't much the user can do about it.
839 if (close(handle
->fd
)) {
840 PERROR("Failed to close the file descritptor (%d) of fs handle to %s, close() returned",
841 handle
->fd
, handle
->properties
.path
);
845 pthread_mutex_unlock(&handle
->lock
);
846 pthread_mutex_destroy(&handle
->lock
);
847 pthread_mutex_unlock(&handle
->tracker
->lock
);
848 free(handle
->properties
.path
);