From df038819ef174fb8b0d5a76c293a3b94ce2a43b9 Mon Sep 17 00:00:00 2001 From: =?utf8?q?J=C3=A9r=C3=A9mie=20Galarneau?= Date: Fri, 22 Nov 2019 18:41:09 -0500 Subject: [PATCH] fd-tracker: add an fd-tracker util to common MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This commit adds an fd-tracker utility to the common libs. This interface allows a process to keep track of its open file descriptors and enforce a limit to the number of file descriptors that may be simultaneously opened. The intent is to use this interface as part of the relay daemon to mitigate file descriptors exhaustion problems that are encountered when the relay has to handle a large number of streams. The fd-tracker defines two classes of file descriptors: suspendable and unsuspendable file descriptors. Suspendable file descriptors are handles to filesystem objects (e.g. regular files) that may be closed and re-opened later without affecting the application. A suspendable file descriptor can be opened by creating a filesystem handle (fs_handle) using the fd-tracker. The raw file descritptor must then be obtained and released using that handle. Closing the handle will effectively ensure that the file descritptor is closed. Unsuspendable file descriptors are file descriptors that cannot be closed without affecting the application's state. For instance, it is not possible to close and re-open a pipe, a TCP socket, or an epoll fd without involving some app-specific logic. Thus, the fd-tracker considers those file descriptors as unsuspendable. Opening an unsuspendable file descritptor will return a raw file decriptor to the application. It is its responsability to notify the fd-tracker of the file descriptor's closing to ensure the number of active file descriptors can be tracked accurately. If a request to open a new file descriptors is made to the fd-tracker and the process has already reached its maximal count of simultaneously opened file descriptors, an attempt will be made to suspend a suspendable file descriptor to release a slot. Suspending a file descriptor involves: - verifying that the file is still available on the FS (restorable), - sampling its current position, - closing the file descriptor. Note that suspending a file descriptor eliminates the POSIX guarantee that a file may be unlinked at any time without affecting the application (provided that it holds an open FD to that file). Applications using the fd-tracker that need to maintain this guarantee should open those files as unsuspendable file descriptors. To protect against unlinking and file replacement scenarios, the fd-tracker samples the files' inode number when a fs_handle is created. This inode number will then be checked anytime the handle is suspended or restored to ensure that the application is made aware of the file's unavailability. This is preferable to inadvertently opening another file of the same name if the original file was unlinked and/or replaced between a fs_handle's suspension and restoration. Signed-off-by: Jérémie Galarneau --- configure.ac | 1 + src/common/Makefile.am | 4 +- src/common/fd-tracker/Makefile.am | 3 + src/common/fd-tracker/fd-tracker.c | 834 +++++++++++++++++++++++++++++ src/common/fd-tracker/fd-tracker.h | 164 ++++++ src/common/fd-tracker/utils.c | 24 + src/common/fd-tracker/utils.h | 27 + 7 files changed, 1055 insertions(+), 2 deletions(-) create mode 100644 src/common/fd-tracker/Makefile.am create mode 100644 src/common/fd-tracker/fd-tracker.c create mode 100644 src/common/fd-tracker/fd-tracker.h create mode 100644 src/common/fd-tracker/utils.c create mode 100644 src/common/fd-tracker/utils.h diff --git a/configure.ac b/configure.ac index 7b05c823b..217d47893 100644 --- a/configure.ac +++ b/configure.ac @@ -1133,6 +1133,7 @@ AC_CONFIG_FILES([ src/common/health/Makefile src/common/config/Makefile src/common/string-utils/Makefile + src/common/fd-tracker/Makefile src/lib/Makefile src/lib/lttng-ctl/Makefile src/lib/lttng-ctl/filter/Makefile diff --git a/src/common/Makefile.am b/src/common/Makefile.am index 896e2d6a1..e6628f8a4 100644 --- a/src/common/Makefile.am +++ b/src/common/Makefile.am @@ -6,7 +6,7 @@ SUBDIRS = string-utils # since SUBDIRS is decided at configure time. DIST_SUBDIRS = compat health hashtable kernel-ctl sessiond-comm relayd \ kernel-consumer ust-consumer testpoint index config consumer \ - string-utils + string-utils fd-tracker # # Common library noinst_LTLIBRARIES = libcommon.la @@ -63,7 +63,7 @@ SUBDIRS += sessiond-comm endif if BUILD_LIB_RELAYD -SUBDIRS += relayd +SUBDIRS += relayd fd-tracker endif if BUILD_LIB_KERNEL_CONSUMER diff --git a/src/common/fd-tracker/Makefile.am b/src/common/fd-tracker/Makefile.am new file mode 100644 index 000000000..8453f4b10 --- /dev/null +++ b/src/common/fd-tracker/Makefile.am @@ -0,0 +1,3 @@ +noinst_LTLIBRARIES = libfd-tracker.la + +libfd_tracker_la_SOURCES = fd-tracker.h fd-tracker.c utils.h utils.c diff --git a/src/common/fd-tracker/fd-tracker.c b/src/common/fd-tracker/fd-tracker.c new file mode 100644 index 000000000..67da7eab4 --- /dev/null +++ b/src/common/fd-tracker/fd-tracker.c @@ -0,0 +1,834 @@ +/* + * Copyright (C) 2018 - Jérémie Galarneau + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License, version 2 only, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "common/macros.h" +#include "common/error.h" +#include "common/defaults.h" +#include "common/hashtable/utils.h" +#include "common/hashtable/hashtable.h" + +#include "fd-tracker.h" + +/* Tracker lock must be taken by the user. */ +#define TRACKED_COUNT(tracker) \ + (tracker->count.suspendable.active + \ + tracker->count.suspendable.suspended + \ + tracker->count.unsuspendable) + +/* Tracker lock must be taken by the user. */ +#define ACTIVE_COUNT(tracker) \ + (tracker->count.suspendable.active + \ + tracker->count.unsuspendable) + +/* Tracker lock must be taken by the user. */ +#define SUSPENDED_COUNT(tracker) \ + (tracker->count.suspendable.suspended) + +/* Tracker lock must be taken by the user. */ +#define SUSPENDABLE_COUNT(tracker) \ + (tracker->count.suspendable.active + \ + tracker->count.suspendable.suspended) + +/* Tracker lock must be taken by the user. */ +#define UNSUSPENDABLE_COUNT(tracker) \ + (tracker->count.unsuspendable) + +struct fd_tracker { + pthread_mutex_t lock; + struct { + struct { + unsigned int active; + unsigned int suspended; + } suspendable; + unsigned int unsuspendable; + } count; + unsigned int capacity; + struct { + uint64_t uses; + uint64_t misses; + /* Failures to suspend or restore fs handles. */ + uint64_t errors; + } stats; + /* + * The head of the active_handles list is always the least recently + * used active handle. When an handle is used, it is removed from the + * list and added to the end. When a file has to be suspended, the + * first element in the list is "popped", suspended, and added to the + * list of suspended handles. + */ + struct cds_list_head active_handles; + struct cds_list_head suspended_handles; + struct cds_lfht *unsuspendable_fds; +}; + +struct open_properties { + char *path; + int flags; + struct { + bool is_set; + mode_t value; + } mode; +}; + +/* + * A fs_handle is not ref-counted. Therefore, it is assumed that a + * handle is never in-use while it is being reclaimed. It can be + * shared by multiple threads, but external synchronization is required + * to ensure it is not still being used when it is reclaimed (close method). + * In this respect, it is not different from a regular file descriptor. + * + * The fs_handle lock always nests _within_ the tracker's lock. + */ +struct fs_handle { + pthread_mutex_t lock; + /* + * Weak reference to the tracker. All fs_handles are assumed to have + * been closed at the moment of the destruction of the fd_tracker. + */ + struct fd_tracker *tracker; + struct open_properties properties; + int fd; + /* inode number of the file at the time of the handle's creation. */ + uint64_t ino; + bool in_use; + /* Offset to which the file should be restored. */ + off_t offset; + struct cds_list_head handles_list_node; +}; + +struct unsuspendable_fd { + /* + * Accesses are only performed through the tracker, which is protected + * by its own lock. + */ + int fd; + char *name; + struct cds_lfht_node tracker_node; + struct rcu_head rcu_head; +}; + +static struct { + pthread_mutex_t lock; + bool initialized; + unsigned long value; +} seed = { + .lock = PTHREAD_MUTEX_INITIALIZER, +}; + +static int match_fd(struct cds_lfht_node *node, const void *key); +static void unsuspendable_fd_destroy(struct unsuspendable_fd *entry); +static struct unsuspendable_fd *unsuspendable_fd_create(const char *name, + int fd); +static int open_from_properties(struct open_properties *properties); + +static void fs_handle_log(struct fs_handle *handle); +static int fs_handle_suspend(struct fs_handle *handle); +static int fs_handle_restore(struct fs_handle *handle); + +static void fd_tracker_track(struct fd_tracker *tracker, + struct fs_handle *handle); +static void fd_tracker_untrack(struct fd_tracker *tracker, + struct fs_handle *handle); +static int fd_tracker_suspend_handles(struct fd_tracker *tracker, + unsigned int count); +static int fd_tracker_restore_handle(struct fd_tracker *tracker, + struct fs_handle *handle); + +/* Match function of the tracker's unsuspendable_fds hash table. */ +static +int match_fd(struct cds_lfht_node *node, const void *key) +{ + struct unsuspendable_fd *entry = + caa_container_of(node, struct unsuspendable_fd, tracker_node); + + return hash_match_key_ulong((void *) (unsigned long) entry->fd, + (void *) key); +} + +static +void delete_unsuspendable_fd(struct rcu_head *head) +{ + struct unsuspendable_fd *fd = caa_container_of(head, + struct unsuspendable_fd, rcu_head); + + free(fd->name); + free(fd); +} + +static +void unsuspendable_fd_destroy(struct unsuspendable_fd *entry) +{ + if (!entry) { + return; + } + call_rcu(&entry->rcu_head, delete_unsuspendable_fd); +} + +static +struct unsuspendable_fd *unsuspendable_fd_create(const char *name, int fd) +{ + struct unsuspendable_fd *entry = + zmalloc(sizeof(*entry)); + + if (!entry) { + goto error; + } + if (name) { + entry->name = strdup(name); + if (!entry->name) { + goto error; + } + } + cds_lfht_node_init(&entry->tracker_node); + entry->fd = fd; + return entry; +error: + unsuspendable_fd_destroy(entry); + return NULL; +} + +static +void fs_handle_log(struct fs_handle *handle) +{ + pthread_mutex_lock(&handle->lock); + if (handle->fd >= 0) { + DBG_NO_LOC(" %s [active, fd %d%s]", + handle->properties.path, + handle->fd, + handle->in_use ? ", in use" : ""); + } else { + DBG_NO_LOC(" %s [suspended]", handle->properties.path); + } + pthread_mutex_unlock(&handle->lock); +} + +static +int fs_handle_suspend(struct fs_handle *handle) +{ + int ret = 0; + struct stat fs_stat; + + pthread_mutex_lock(&handle->lock); + assert(handle->fd >= 0); + if (handle->in_use) { + /* This handle can't be suspended as it is currently in use. */ + ret = -EAGAIN; + goto end; + } + + ret = stat(handle->properties.path, &fs_stat); + if (ret) { + PERROR("Filesystem handle to %s cannot be suspended as stat() failed", + handle->properties.path); + ret = -errno; + goto end; + } + + if (fs_stat.st_ino != handle->ino) { + /* Don't suspend as the handle would not be restorable. */ + WARN("Filesystem handle to %s cannot be suspended as its inode changed", + handle->properties.path); + ret = -ENOENT; + goto end; + } + + handle->offset = lseek(handle->fd, 0, SEEK_CUR); + if (handle->offset == -1) { + WARN("Filesystem handle to %s cannot be suspended as lseek() failed to sample its current position", + handle->properties.path); + ret = -errno; + goto end; + } + + ret = close(handle->fd); + if (ret) { + PERROR("Filesystem handle to %s cannot be suspended as close() failed", + handle->properties.path); + ret = -errno; + goto end; + } + DBG("Suspended filesystem handle to %s (fd %i) at position %" PRId64, + handle->properties.path, handle->fd, handle->offset); + handle->fd = -1; +end: + if (ret) { + handle->tracker->stats.errors++; + } + pthread_mutex_unlock(&handle->lock); + return ret; +} + +/* Caller must hold the tracker and handle's locks. */ +static +int fs_handle_restore(struct fs_handle *handle) +{ + int ret, fd = -1; + + assert(handle->fd == -1); + ret = open_from_properties(&handle->properties); + if (ret < 0) { + PERROR("Failed to restore filesystem handle to %s, open() failed", + handle->properties.path); + ret = -errno; + goto end; + } + fd = ret; + + ret = lseek(fd, handle->offset, SEEK_SET); + if (ret < 0) { + PERROR("Failed to restore filesystem handle to %s, lseek() failed", + handle->properties.path); + ret = -errno; + goto end; + } + DBG("Restored filesystem handle to %s (fd %i) at position %" PRId64, + handle->properties.path, fd, handle->offset); + ret = 0; + handle->fd = fd; + fd = -1; +end: + if (fd >= 0) { + (void) close(fd); + } + return ret; +} + +static +int open_from_properties(struct open_properties *properties) +{ + int ret; + + /* + * open() ignores the 'flags' parameter unless the O_CREAT or O_TMPFILE + * flags are set. O_TMPFILE would not make sense in the context of a + * suspendable fs_handle as it would not be restorable (see OPEN(2)), + * thus it is ignored here. + */ + if ((properties->flags & O_CREAT) && properties->mode.is_set) { + ret = open(properties->path, properties->flags, + properties->mode.value); + } else { + ret = open(properties->path, properties->flags); + } + /* + * Some flags should not be used beyond the initial open() of a + * restorable file system handle. O_CREAT and O_TRUNC must + * be cleared since it would be unexpected to re-use them + * when the handle is retored: + * - O_CREAT should not be needed as the file has been created + * on the initial call to open(), + * - O_TRUNC would destroy the file's contents by truncating it + * to length 0. + */ + properties->flags &= ~(O_CREAT | O_TRUNC); + if (ret < 0) { + ret = -errno; + goto end; + } +end: + return ret; +} + +struct fd_tracker *fd_tracker_create(unsigned int capacity) +{ + struct fd_tracker *tracker = zmalloc(sizeof(struct fd_tracker)); + + if (!tracker) { + goto end; + } + + pthread_mutex_lock(&seed.lock); + if (!seed.initialized) { + seed.value = (unsigned long) time(NULL); + seed.initialized = true; + } + pthread_mutex_unlock(&seed.lock); + + CDS_INIT_LIST_HEAD(&tracker->active_handles); + CDS_INIT_LIST_HEAD(&tracker->suspended_handles); + tracker->capacity = capacity; + tracker->unsuspendable_fds = cds_lfht_new(DEFAULT_HT_SIZE, 1, 0, + CDS_LFHT_AUTO_RESIZE | CDS_LFHT_ACCOUNTING, NULL); + DBG("File descriptor tracker created with a limit of %u simultaneously-opened FDs", + capacity); +end: + return tracker; +} + +void fd_tracker_log(struct fd_tracker *tracker) +{ + struct fs_handle *handle; + struct unsuspendable_fd *unsuspendable_fd; + struct cds_lfht_iter iter; + + pthread_mutex_lock(&tracker->lock); + DBG_NO_LOC("File descriptor tracker"); + DBG_NO_LOC(" Stats:"); + DBG_NO_LOC(" uses: %" PRIu64, tracker->stats.uses); + DBG_NO_LOC(" misses: %" PRIu64, tracker->stats.misses); + DBG_NO_LOC(" errors: %" PRIu64, tracker->stats.errors); + DBG_NO_LOC(" Tracked: %u", TRACKED_COUNT(tracker)); + DBG_NO_LOC(" active: %u", ACTIVE_COUNT(tracker)); + DBG_NO_LOC(" suspendable: %u", SUSPENDABLE_COUNT(tracker)); + DBG_NO_LOC(" unsuspendable: %u", UNSUSPENDABLE_COUNT(tracker)); + DBG_NO_LOC(" suspended: %u", SUSPENDED_COUNT(tracker)); + DBG_NO_LOC(" capacity: %u", tracker->capacity); + + DBG_NO_LOC(" Tracked suspendable file descriptors"); + cds_list_for_each_entry(handle, &tracker->active_handles, + handles_list_node) { + fs_handle_log(handle); + } + cds_list_for_each_entry(handle, &tracker->suspended_handles, + handles_list_node) { + fs_handle_log(handle); + } + if (!SUSPENDABLE_COUNT(tracker)) { + DBG_NO_LOC(" None"); + } + + DBG_NO_LOC(" Tracked unsuspendable file descriptors"); + rcu_read_lock(); + cds_lfht_for_each_entry(tracker->unsuspendable_fds, &iter, + unsuspendable_fd, tracker_node) { + DBG_NO_LOC(" %s [active, fd %d]", unsuspendable_fd->name ? : "Unnamed", + unsuspendable_fd->fd); + } + rcu_read_unlock(); + if (!UNSUSPENDABLE_COUNT(tracker)) { + DBG_NO_LOC(" None"); + } + + pthread_mutex_unlock(&tracker->lock); +} + +int fd_tracker_destroy(struct fd_tracker *tracker) +{ + int ret = 0; + + /* + * Refuse to destroy the tracker as fs_handles may still old + * weak references to the tracker. + */ + pthread_mutex_lock(&tracker->lock); + if (TRACKED_COUNT(tracker)) { + ERR("A file descriptor leak has been detected: %u tracked file descriptors are still being tracked", + TRACKED_COUNT(tracker)); + pthread_mutex_unlock(&tracker->lock); + fd_tracker_log(tracker); + ret = -1; + goto end; + } + pthread_mutex_unlock(&tracker->lock); + + ret = cds_lfht_destroy(tracker->unsuspendable_fds, NULL); + assert(!ret); + pthread_mutex_destroy(&tracker->lock); + free(tracker); +end: + return ret; +} + +struct fs_handle *fd_tracker_open_fs_handle(struct fd_tracker *tracker, + const char *path, int flags, mode_t *mode) +{ + int ret; + struct fs_handle *handle = NULL; + struct stat fd_stat; + struct open_properties properties = { + .path = strdup(path), + .flags = flags, + .mode.is_set = !!mode, + .mode.value = mode ? *mode : 0, + }; + + if (!properties.path) { + goto end; + } + + pthread_mutex_lock(&tracker->lock); + if (ACTIVE_COUNT(tracker) == tracker->capacity) { + if (tracker->count.suspendable.active > 0) { + ret = fd_tracker_suspend_handles(tracker, 1); + if (ret) { + goto error_destroy; + } + } else { + /* + * There are not enough active suspendable file + * descriptors to open a new fd and still accomodate the + * tracker's capacity. + */ + WARN("Cannot open file system handle, too many unsuspendable file descriptors are opened (%u)", + tracker->count.unsuspendable); + ret = -EMFILE; + goto error_destroy; + } + } + + handle = zmalloc(sizeof(*handle)); + if (!handle) { + goto end; + } + + ret = pthread_mutex_init(&handle->lock, NULL); + if (ret) { + PERROR("Failed to initialize handle mutex while creating fs handle"); + free(handle); + goto end; + } + + handle->fd = open_from_properties(&properties); + if (handle->fd < 0) { + PERROR("Failed to open fs handle to %s, open() returned", path); + ret = -errno; + goto error_destroy; + } + + handle->properties = properties; + properties.path = NULL; + + if (fstat(handle->fd, &fd_stat)) { + PERROR("Failed to retrieve file descriptor inode while creating fs handle, fstat() returned"); + ret = -errno; + goto error_destroy; + } + handle->ino = fd_stat.st_ino; + + fd_tracker_track(tracker, handle); + handle->tracker = tracker; + pthread_mutex_unlock(&tracker->lock); +end: + free(properties.path); + return handle; +error_destroy: + pthread_mutex_unlock(&tracker->lock); + (void) fs_handle_close(handle); + handle = NULL; + goto end; +} + +/* Caller must hold the tracker's lock. */ +static +int fd_tracker_suspend_handles(struct fd_tracker *tracker, + unsigned int count) +{ + unsigned int left_to_close = count; + struct fs_handle *handle, *tmp; + + cds_list_for_each_entry_safe(handle, tmp, &tracker->active_handles, + handles_list_node) { + int ret; + + fd_tracker_untrack(tracker, handle); + ret = fs_handle_suspend(handle); + fd_tracker_track(tracker, handle); + if (!ret) { + left_to_close--; + } + + if (!left_to_close) { + break; + } + } + return left_to_close ? -EMFILE : 0; +} + +int fd_tracker_open_unsuspendable_fd(struct fd_tracker *tracker, + int *out_fds, const char **names, unsigned int fd_count, + fd_open_cb open, void *user_data) +{ + int ret, user_ret, i, fds_to_suspend; + unsigned int active_fds; + struct unsuspendable_fd *entries[fd_count]; + + memset(entries, 0, sizeof(entries)); + + pthread_mutex_lock(&tracker->lock); + + active_fds = ACTIVE_COUNT(tracker); + fds_to_suspend = (int) active_fds + (int) fd_count - (int) tracker->capacity; + if (fds_to_suspend > 0) { + if (fds_to_suspend <= tracker->count.suspendable.active) { + ret = fd_tracker_suspend_handles(tracker, fds_to_suspend); + if (ret) { + goto end; + } + } else { + /* + * There are not enough active suspendable file + * descriptors to open a new fd and still accomodate the + * tracker's capacity. + */ + WARN("Cannot open unsuspendable fd, too many unsuspendable file descriptors are opened (%u)", + tracker->count.unsuspendable); + ret = -EMFILE; + goto end; + } + } + + user_ret = open(user_data, out_fds); + if (user_ret) { + ret = user_ret; + goto end; + } + + /* + * Add the fds returned by the user's callback to the hashtable + * of unsuspendable fds. + */ + for (i = 0; i < fd_count; i++) { + struct unsuspendable_fd *entry = + unsuspendable_fd_create(names ? names[i] : NULL, + out_fds[i]); + + if (!entry) { + ret = -1; + goto end_free_entries; + } + entries[i] = entry; + } + + rcu_read_lock(); + for (i = 0; i < fd_count; i++) { + struct cds_lfht_node *node; + struct unsuspendable_fd *entry = entries[i]; + + node = cds_lfht_add_unique( + tracker->unsuspendable_fds, + hash_key_ulong((void *) (unsigned long) out_fds[i], + seed.value), + match_fd, + (void *) (unsigned long) out_fds[i], + &entry->tracker_node); + + if (node != &entry->tracker_node) { + ret = -EEXIST; + rcu_read_unlock(); + goto end_free_entries; + } + entries[i] = NULL; + } + tracker->count.unsuspendable += fd_count; + rcu_read_unlock(); + ret = user_ret; +end: + pthread_mutex_unlock(&tracker->lock); + return ret; +end_free_entries: + for (i = 0; i < fd_count; i++) { + unsuspendable_fd_destroy(entries[i]); + } + goto end; +} + +int fd_tracker_close_unsuspendable_fd(struct fd_tracker *tracker, + int *fds_in, unsigned int fd_count, fd_close_cb close, + void *user_data) +{ + int i, ret, user_ret; + int fds[fd_count]; + + /* + * Maintain a local copy of fds_in as the user's callback may modify its + * contents (e.g. setting the fd(s) to -1 after close). + */ + memcpy(fds, fds_in, sizeof(*fds) * fd_count); + + pthread_mutex_lock(&tracker->lock); + rcu_read_lock(); + + /* Let the user close the file descriptors. */ + user_ret = close(user_data, fds_in); + if (user_ret) { + ret = user_ret; + goto end; + } + + /* Untrack the fds that were just closed by the user's callback. */ + for (i = 0; i < fd_count; i++) { + struct cds_lfht_node *node; + struct cds_lfht_iter iter; + struct unsuspendable_fd *entry; + + cds_lfht_lookup(tracker->unsuspendable_fds, + hash_key_ulong((void *) (unsigned long) fds[i], + seed.value), + match_fd, + (void *) (unsigned long) fds[i], + &iter); + node = cds_lfht_iter_get_node(&iter); + if (!node) { + /* Unknown file descriptor. */ + WARN("Untracked file descriptor %d passed to fd_tracker_close_unsuspendable_fd()", + fds[i]); + ret = -EINVAL; + goto end; + } + entry = caa_container_of(node, + struct unsuspendable_fd, + tracker_node); + + cds_lfht_del(tracker->unsuspendable_fds, node); + unsuspendable_fd_destroy(entry); + fds[i] = -1; + } + + tracker->count.unsuspendable -= fd_count; + ret = 0; +end: + rcu_read_unlock(); + pthread_mutex_unlock(&tracker->lock); + return ret; +} + +/* Caller must have taken the tracker's and handle's locks. */ +static +void fd_tracker_track(struct fd_tracker *tracker, struct fs_handle *handle) +{ + if (handle->fd >= 0) { + tracker->count.suspendable.active++; + cds_list_add_tail(&handle->handles_list_node, + &tracker->active_handles); + } else { + tracker->count.suspendable.suspended++; + cds_list_add_tail(&handle->handles_list_node, + &tracker->suspended_handles); + } +} + +/* Caller must have taken the tracker's and handle's locks. */ +static +void fd_tracker_untrack(struct fd_tracker *tracker, struct fs_handle *handle) +{ + if (handle->fd >= 0) { + tracker->count.suspendable.active--; + } else { + tracker->count.suspendable.suspended--; + } + cds_list_del(&handle->handles_list_node); +} + +/* Caller must have taken the tracker's and handle's locks. */ +static +int fd_tracker_restore_handle(struct fd_tracker *tracker, + struct fs_handle *handle) +{ + int ret; + + fd_tracker_untrack(tracker, handle); + if (ACTIVE_COUNT(tracker) >= tracker->capacity) { + ret = fd_tracker_suspend_handles(tracker, 1); + if (ret) { + goto end; + } + } + ret = fs_handle_restore(handle); +end: + fd_tracker_track(tracker, handle); + return ret ? ret : handle->fd; +} + +int fs_handle_get_fd(struct fs_handle *handle) +{ + int ret; + + /* + * TODO This should be optimized as it is a fairly hot path. + * The fd-tracker's lock should only be taken when a fs_handle is + * restored (slow path). On the fast path (fs_handle is active), + * the only effect on the fd_tracker is marking the handle as the + * most recently used. Currently, it is done by a call to the + * track/untrack helpers, but it should be done atomically. + * + * Note that the lock's nesting order must still be respected here. + * The handle's lock nests inside the tracker's lock. + */ + pthread_mutex_lock(&handle->tracker->lock); + pthread_mutex_lock(&handle->lock); + assert(!handle->in_use); + + handle->tracker->stats.uses++; + if (handle->fd >= 0) { + ret = handle->fd; + /* Mark as most recently used. */ + fd_tracker_untrack(handle->tracker, handle); + fd_tracker_track(handle->tracker, handle); + } else { + handle->tracker->stats.misses++; + ret = fd_tracker_restore_handle(handle->tracker, handle); + if (ret < 0) { + handle->tracker->stats.errors++; + goto end; + } + } + handle->in_use = true; +end: + pthread_mutex_unlock(&handle->lock); + pthread_mutex_unlock(&handle->tracker->lock); + return ret; +} + +void fs_handle_put_fd(struct fs_handle *handle) +{ + pthread_mutex_lock(&handle->lock); + handle->in_use = false; + pthread_mutex_unlock(&handle->lock); +} + +int fs_handle_close(struct fs_handle *handle) +{ + int ret = 0; + + if (!handle) { + ret = -EINVAL; + goto end; + } + + pthread_mutex_lock(&handle->tracker->lock); + pthread_mutex_lock(&handle->lock); + fd_tracker_untrack(handle->tracker, handle); + if (handle->fd >= 0) { + /* + * The return value of close() is not propagated as there + * isn't much the user can do about it. + */ + if (close(handle->fd)) { + PERROR("Failed to close the file descritptor (%d) of fs handle to %s, close() returned", + handle->fd, handle->properties.path); + } + handle->fd = -1; + } + pthread_mutex_unlock(&handle->lock); + pthread_mutex_destroy(&handle->lock); + pthread_mutex_unlock(&handle->tracker->lock); + free(handle->properties.path); + free(handle); +end: + return ret; +} diff --git a/src/common/fd-tracker/fd-tracker.h b/src/common/fd-tracker/fd-tracker.h new file mode 100644 index 000000000..ff5f49a1b --- /dev/null +++ b/src/common/fd-tracker/fd-tracker.h @@ -0,0 +1,164 @@ +/* + * Copyright (C) 2018 - Jérémie Galarneau + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License, version 2 only, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#ifndef FD_TRACKER_H +#define FD_TRACKER_H + +#include +#include + +struct fs_handle; +struct fd_tracker; + +/* + * Callback which returns a file descriptor to track through the fd + * tracker. This callback must not make use of the fd_tracker as a deadlock + * may occur. + * + * The int pointer argument is an output parameter that should be used to return + * the advertised number of file descriptors. + * + * Must return zero on success. Negative values should map to a UNIX error code. + */ +typedef int (*fd_open_cb)(void *, int *out_fds); + +/* + * Callback to allow the user to close a now-untracked file descriptor. This + * callback must not make use of the fd_tracker as a deadlock may occur. + * + * The callback can freely modify the in_fds argument as it is copied by the + * fd_tracker before being used. The fd tracker assumes in_fds to be closed by + * the time the callback returns. + * + * Must return zero on success. Negative values should map to a UNIX error code. + */ +typedef int (*fd_close_cb)(void *, int *in_fds); + +/* + * Set the maximal number of fds that the process should be allowed to open at + * any given time. This function must be called before any other of this + * interface. + */ +struct fd_tracker *fd_tracker_create(unsigned int capacity); + +/* Returns an error if file descriptors are leaked. */ +int fd_tracker_destroy(struct fd_tracker *tracker); + +/* + * Open a handle to a suspendable filesystem file descriptor. + * + * See OPEN(3) for an explanation of flags and mode. NULL is returned in case of + * error and errno is left untouched. Note that passing NULL as mode will result + * in open()'s default behaviour being used (using the process' umask). + * + * A fs_handle wraps a file descriptor created by OPEN(3). It is suspendable + * meaning that the underlying file may be closed at any time unless the + * handle is marked as being in-use (see fs_handle_get_fd() and + * fs_handle_put_fd()). + * + * If the tracker opted to close the underlying file descriptor, it will + * be restored to its last known state when it is obtained through + * the fs_handle's fs_handle_get_fd() method. + * + * Note that a suspendable file descriptor can be closed by the fd tracker at + * anytime when it is not in use. This means that the user should not rely on it + * being safe to unlink the file. Moreover, concurent modifications to the file + * (e.g. truncation) may react differently than if the file descriptor was kept + * open. + */ +struct fs_handle *fd_tracker_open_fs_handle(struct fd_tracker *tracker, + const char *path, int flags, mode_t *mode); + +/* + * Open a tracked unsuspendable file descriptor. + * + * This function allows the fd tracker to keep track of unsuspendable + * file descriptors. A callback, open, is passed to allow the tracker + * to atomically reserve an entry for a given count of new file descriptors, + * suspending file descriptors as needed, and invoke the provided callback + * without ever exceeding the tracker's capacity. + * + * fd_count indicates the count of file descriptors that will be opened and + * returned by the open callback. The storage location at out_fds is assumed + * to be large enough to hold 'fd_count * sizeof(int)'. + * + * Names may be provided to allow easier debugging of file descriptor + * exhaustions. + * + * The callback's return value is returned to the user. Additionally, two + * negative tracker-specific codes may be returned: + * - ENOMEM: allocation of a new entry failed, + * - EMFILE: too many unsuspendable fds are opened and the tracker can't + * accomodate the request for a new unsuspendable entry. + */ +int fd_tracker_open_unsuspendable_fd(struct fd_tracker *tracker, + int *out_fds, const char **names, unsigned int fd_count, + fd_open_cb open, void *data); + +/* + * Close a tracked unsuspendable file descriptor. + * + * This function allows the fd tracker to keep track of unsuspendable + * file descriptors. A callback, close, is passed to allow the tracker + * to atomically release a file descriptor entry. + * + * Returns 0 if the close callback returned success. Returns the value returned + * by the close callback if it is negative. Additionally, a tracker-specific + * code may be returned: + * - EINVAL: a file descriptor was unknown to the tracker + * + * Closed fds are set to -1 in the fds array which, in the event of an error, + * allows the user to know which file descriptors are no longer being tracked. + */ +int fd_tracker_close_unsuspendable_fd(struct fd_tracker *tracker, + int *fds, unsigned int fd_count, fd_close_cb close, + void *data); + +/* + * Log the contents of the fd_tracker. + */ +void fd_tracker_log(struct fd_tracker *tracker); + +/* + * Marks the handle as the most recently used and marks the 'fd' as + * "in-use". This prevents the tracker from recycling the underlying + * file descriptor while it is actively being used by a thread. + * + * Don't forget that the tracker may be initiating an fd 'suspension' + * from another thread as the need to free an fd slot may arise from any + * thread within the daemon. + * + * Note that a restorable fd should never be held for longer than + * strictly necessary (e.g. the duration of a syscall()). + * + * Returns the fd on success, otherwise a negative value may be returned + * if the restoration of the fd failed. + */ +int fs_handle_get_fd(struct fs_handle *handle); + +/* + * Used by the application to signify that it is no longer using the + * underlying fd and that it may be suspended. + */ +void fs_handle_put_fd(struct fs_handle *handle); + +/* + * Frees the handle and discards the underlying fd. + */ +int fs_handle_close(struct fs_handle *handle); + +#endif /* FD_TRACKER_H */ diff --git a/src/common/fd-tracker/utils.c b/src/common/fd-tracker/utils.c new file mode 100644 index 000000000..ac4e882c7 --- /dev/null +++ b/src/common/fd-tracker/utils.c @@ -0,0 +1,24 @@ +/* + * Copyright (C) 2018 - Jérémie Galarneau + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License, version 2 only, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include +#include + +int fd_tracker_util_close_fd(void *unused, int *fd) +{ + return close(*fd); +} diff --git a/src/common/fd-tracker/utils.h b/src/common/fd-tracker/utils.h new file mode 100644 index 000000000..e1c3b790d --- /dev/null +++ b/src/common/fd-tracker/utils.h @@ -0,0 +1,27 @@ +/* + * Copyright (C) 2018 - Jérémie Galarneau + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License, version 2 only, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#ifndef FD_TRACKER_UTILS_H +#define FD_TRACKER_UTILS_H + +/* + * Utility implementing a close_fd callback which receives one file descriptor + * and closes it, returning close()'s return value. + */ +int fd_tracker_util_close_fd(void *, int *fd); + +#endif /* FD_TRACKER_UTILS_H */ -- 2.34.1