From 26cc635c6066aea8d0c4109c63da574f0448f4fa Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 21 Nov 2016 16:15:36 -0500 Subject: [PATCH] Use rseq for cpu_id in libringbuffer Signed-off-by: Mathieu Desnoyers --- include/lttng/ringbuffer-config.h | 7 ++ liblttng-ust/lttng-ring-buffer-client.h | 29 ++++++-- liblttng-ust/lttng-ust-comm.c | 3 + libringbuffer/frontend_api.h | 21 ++++-- libringbuffer/rseq-arm.h | 2 +- libringbuffer/rseq-ppc.h | 4 +- libringbuffer/rseq-x86.h | 9 ++- libringbuffer/rseq.c | 99 +++++++++++++++++++++---- libringbuffer/rseq.h | 81 +++++++++++--------- 9 files changed, 184 insertions(+), 71 deletions(-) diff --git a/include/lttng/ringbuffer-config.h b/include/lttng/ringbuffer-config.h index 9ed9a34d..ef152621 100644 --- a/include/lttng/ringbuffer-config.h +++ b/include/lttng/ringbuffer-config.h @@ -216,6 +216,13 @@ struct lttng_ust_lib_ring_buffer_config { char padding[LTTNG_UST_RING_BUFFER_CONFIG_PADDING]; }; +/* State returned by rseq_start, passed as argument to rseq_finish. */ +struct lttng_rseq_state { + volatile struct rseq *rseqp; + int32_t cpu_id; /* cpu_id at start. */ + uint32_t event_counter; /* event_counter at start. */ +}; + /* * ring buffer context * diff --git a/liblttng-ust/lttng-ring-buffer-client.h b/liblttng-ust/lttng-ring-buffer-client.h index 5e95244f..b397a5e0 100644 --- a/liblttng-ust/lttng-ring-buffer-client.h +++ b/liblttng-ust/lttng-ring-buffer-client.h @@ -695,12 +695,27 @@ int lttng_event_reserve(struct lttng_ust_lib_ring_buffer_ctx *ctx, uint32_t event_id) { struct lttng_channel *lttng_chan = channel_get_private(ctx->chan); + struct lttng_rseq_state rseq_state; int ret, cpu; - //TODO register (lazy) - cpu = lib_ring_buffer_get_cpu(&client_config); - if (cpu < 0) + if (lib_ring_buffer_begin(&client_config)) return -EPERM; +retry: + rseq_state = rseq_start(); + if (caa_unlikely(rseq_cpu_at_start(rseq_state) < 0)) { + if (caa_unlikely(rseq_cpu_at_start(rseq_state) == -1)) { + if (!rseq_register_current_thread()) + goto retry; + } + /* rseq is unavailable. */ + cpu = lib_ring_buffer_get_cpu(&client_config); + if (caa_unlikely(cpu < 0)) { + ret = -EPERM; + goto end; + } + } else { + cpu = rseq_cpu_at_start(rseq_state); + } ctx->cpu = cpu; switch (lttng_chan->header_type) { @@ -724,13 +739,13 @@ int lttng_event_reserve(struct lttng_ust_lib_ring_buffer_ctx *ctx, if (lib_ring_buffer_backend_get_pages(&client_config, ctx, &ctx->backend_pages)) { ret = -EPERM; - goto put; + goto end; } } lttng_write_event_header(&client_config, ctx, event_id); return 0; -put: - lib_ring_buffer_put_cpu(&client_config); +end: + lib_ring_buffer_end(&client_config); return ret; } @@ -738,7 +753,7 @@ static void lttng_event_commit(struct lttng_ust_lib_ring_buffer_ctx *ctx) { lib_ring_buffer_commit(&client_config, ctx); - lib_ring_buffer_put_cpu(&client_config); + lib_ring_buffer_end(&client_config); } static diff --git a/liblttng-ust/lttng-ust-comm.c b/liblttng-ust/lttng-ust-comm.c index 7cd6a227..effb7bd8 100644 --- a/liblttng-ust/lttng-ust-comm.c +++ b/liblttng-ust/lttng-ust-comm.c @@ -56,6 +56,7 @@ #include "lttng-ust-statedump.h" #include "clock.h" #include "../libringbuffer/getcpu.h" +#include "../libringbuffer/rseq.h" #include "getenv.h" /* @@ -1659,6 +1660,7 @@ void __attribute__((constructor)) lttng_ust_init(void) lttng_ust_clock_init(); lttng_ust_getcpu_init(); lttng_ust_statedump_init(); + rseq_init(); lttng_ring_buffer_metadata_client_init(); lttng_ring_buffer_client_overwrite_init(); lttng_ring_buffer_client_overwrite_rt_init(); @@ -1795,6 +1797,7 @@ void lttng_ust_cleanup(int exiting) lttng_ring_buffer_client_overwrite_rt_exit(); lttng_ring_buffer_client_overwrite_exit(); lttng_ring_buffer_metadata_client_exit(); + rseq_destroy(); lttng_ust_statedump_destroy(); exit_tracepoint(); if (!exiting) { diff --git a/libringbuffer/frontend_api.h b/libringbuffer/frontend_api.h index 14015973..19fc09e9 100644 --- a/libringbuffer/frontend_api.h +++ b/libringbuffer/frontend_api.h @@ -34,8 +34,14 @@ #include #include +static inline +int lib_ring_buffer_get_cpu(const struct lttng_ust_lib_ring_buffer_config *config) +{ + return lttng_ust_get_cpu(); +} + /** - * lib_ring_buffer_get_cpu - Precedes ring buffer reserve/commit. + * lib_ring_buffer_begin - Precedes ring buffer reserve/commit. * * Keeps a ring buffer nesting count as supplementary safety net to * ensure tracer client code will never trigger an endless recursion. @@ -49,11 +55,10 @@ * section. */ static inline -int lib_ring_buffer_get_cpu(const struct lttng_ust_lib_ring_buffer_config *config) +int lib_ring_buffer_begin(const struct lttng_ust_lib_ring_buffer_config *config) { - int cpu, nesting; + int nesting; - cpu = lttng_ust_get_cpu(); nesting = ++URCU_TLS(lib_ring_buffer_nesting); cmm_barrier(); @@ -61,15 +66,15 @@ int lib_ring_buffer_get_cpu(const struct lttng_ust_lib_ring_buffer_config *confi WARN_ON_ONCE(1); URCU_TLS(lib_ring_buffer_nesting)--; return -EPERM; - } else - return cpu; + } + return 0; } /** - * lib_ring_buffer_put_cpu - Follows ring buffer reserve/commit. + * lib_ring_buffer_end - Follows ring buffer reserve/commit. */ static inline -void lib_ring_buffer_put_cpu(const struct lttng_ust_lib_ring_buffer_config *config) +void lib_ring_buffer_end(const struct lttng_ust_lib_ring_buffer_config *config) { cmm_barrier(); URCU_TLS(lib_ring_buffer_nesting)--; /* TLS */ diff --git a/libringbuffer/rseq-arm.h b/libringbuffer/rseq-arm.h index c0b172c2..656b87ef 100644 --- a/libringbuffer/rseq-arm.h +++ b/libringbuffer/rseq-arm.h @@ -73,7 +73,7 @@ do { \ _teardown \ "b %l[failure]\n\t" \ "4:\n\t" \ - : /* no outputs */ \ + : /* gcc asm goto does not allow outputs */ \ : [start_event_counter]"r"((_start_value).event_counter), \ [current_event_counter]"m"((_start_value).rseqp->u.e.event_counter), \ [rseq_cs]"r"(&(_start_value).rseqp->rseq_cs) \ diff --git a/libringbuffer/rseq-ppc.h b/libringbuffer/rseq-ppc.h index 6b1b13a3..ac3c1862 100644 --- a/libringbuffer/rseq-ppc.h +++ b/libringbuffer/rseq-ppc.h @@ -76,7 +76,7 @@ _teardown \ "b %l[failure]\n\t" \ "5:\n\t" \ - : /* no outputs */ \ + : /* gcc asm goto does not allow outputs */ \ : [start_event_counter]"r"((_start_value).event_counter), \ [current_event_counter]"m"((_start_value).rseqp->u.e.event_counter), \ [rseq_cs]"b"(&(_start_value).rseqp->rseq_cs) \ @@ -182,7 +182,7 @@ _teardown \ "b %l[failure]\n\t" \ "5:\n\t" \ - : /* no outputs */ \ + : /* gcc asm goto does not allow outputs */ \ : [start_event_counter]"r"((_start_value).event_counter), \ [current_event_counter]"m"((_start_value).rseqp->u.e.event_counter), \ [rseq_cs]"b"(&(_start_value).rseqp->rseq_cs) \ diff --git a/libringbuffer/rseq-x86.h b/libringbuffer/rseq-x86.h index 29e5647d..556f4544 100644 --- a/libringbuffer/rseq-x86.h +++ b/libringbuffer/rseq-x86.h @@ -46,7 +46,8 @@ do { \ "1:\n\t" \ _setup \ RSEQ_INJECT_ASM(1) \ - "movq $3b, %[rseq_cs]\n\t" \ + "leaq 3b(%%rip), %%rax\n\t" \ + "movq %%rax, %[rseq_cs]\n\t" \ RSEQ_INJECT_ASM(2) \ "cmpl %[start_event_counter], %[current_event_counter]\n\t" \ "jnz 4f\n\t" \ @@ -63,14 +64,14 @@ do { \ _teardown \ "jmp %l[failure]\n\t" \ ".popsection\n\t" \ - : /* no outputs */ \ + : /* gcc asm goto does not allow outputs */ \ : [start_event_counter]"r"((_start_value).event_counter), \ [current_event_counter]"m"((_start_value).rseqp->u.e.event_counter), \ [rseq_cs]"m"((_start_value).rseqp->rseq_cs) \ _spec_input \ _final_input \ RSEQ_INJECT_INPUT \ - : "memory", "cc" \ + : "memory", "cc", "rax" \ _extra_clobber \ RSEQ_INJECT_CLOBBER \ : _failure \ @@ -181,7 +182,7 @@ do { \ _teardown \ "jmp %l[failure]\n\t" \ ".popsection\n\t" \ - : /* no outputs */ \ + : /* gcc asm goto does not allow outputs */ \ : [start_event_counter]"m"((_start_value).event_counter), \ [current_event_counter]"m"((_start_value).rseqp->u.e.event_counter), \ [rseq_cs]"m"((_start_value).rseqp->rseq_cs) \ diff --git a/libringbuffer/rseq.c b/libringbuffer/rseq.c index 219f416e..0be7d10b 100644 --- a/libringbuffer/rseq.c +++ b/libringbuffer/rseq.c @@ -31,10 +31,24 @@ __attribute__((weak)) __thread volatile struct rseq __rseq_abi = { .u.e.cpu_id = -1, }; +/* Own state, not shared with other libs. */ +static __thread int rseq_registered; + +static pthread_key_t rseq_key; + + +#ifdef __NR_rseq static int sys_rseq(volatile struct rseq *rseq_abi, int flags) { return syscall(__NR_rseq, rseq_abi, flags); } +#else +static int sys_rseq(volatile struct rseq *rseq_abi, int flags) +{ + errno = ENOSYS; + return -1; +} +#endif static void signal_off_save(sigset_t *oldset) { @@ -56,29 +70,82 @@ static void signal_restore(sigset_t oldset) abort(); } +int rseq_unregister_current_thread(void) +{ + sigset_t oldset; + int rc, ret = 0; + + signal_off_save(&oldset); + if (rseq_registered) { + rc = sys_rseq(NULL, 0); + if (rc) { + fprintf(stderr, "Error: sys_rseq(...) failed(%d): %s\n", + errno, strerror(errno)); + ret = -1; + goto end; + } + rseq_registered = 0; + } +end: + signal_restore(oldset); + return ret; +} + +static void destroy_rseq_key(void *key) +{ + if (rseq_unregister_current_thread()) + abort(); +} + int rseq_register_current_thread(void) { - int rc; + sigset_t oldset; + int rc, ret = 0; - rc = sys_rseq(&__rseq_abi, 0); - if (rc) { - fprintf(stderr, "Error: sys_rseq(...) failed(%d): %s\n", - errno, strerror(errno)); - return -1; + signal_off_save(&oldset); + if (caa_likely(!rseq_registered)) { + rc = sys_rseq(&__rseq_abi, 0); + if (rc) { + fprintf(stderr, "Error: sys_rseq(...) failed(%d): %s\n", + errno, strerror(errno)); + __rseq_abi.u.e.cpu_id = -2; + ret = -1; + goto end; + } + rseq_registered = 1; + assert(rseq_current_cpu_raw() >= 0); + /* + * Register destroy notifier. Pointer needs to + * be non-NULL. + */ + if (pthread_setspecific(rseq_key, (void *)0x1)) + abort(); } - assert(rseq_current_cpu() >= 0); - return 0; +end: + signal_restore(oldset); + return ret; } -int rseq_unregister_current_thread(void) +void rseq_init(void) { - int rc; + int ret; - rc = sys_rseq(NULL, 0); - if (rc) { - fprintf(stderr, "Error: sys_rseq(...) failed(%d): %s\n", - errno, strerror(errno)); - return -1; + ret = pthread_key_create(&rseq_key, destroy_rseq_key); + if (ret) { + errno = -ret; + perror("pthread_key_create"); + abort(); + } +} + +void rseq_destroy(void) +{ + int ret; + + ret = pthread_key_delete(rseq_key); + if (ret) { + errno = -ret; + perror("pthread_key_delete"); + abort(); } - return 0; } diff --git a/libringbuffer/rseq.h b/libringbuffer/rseq.h index 1f07b1c5..ffb9c50a 100644 --- a/libringbuffer/rseq.h +++ b/libringbuffer/rseq.h @@ -34,9 +34,11 @@ #include #include #include +#include #include #include #include +#include /* for struct lttng_rseq_state */ #include "linux-rseq-abi.h" /* @@ -71,22 +73,24 @@ extern __thread volatile struct rseq __rseq_abi; #if defined(__x86_64__) || defined(__i386__) -#include +#include "rseq-x86.h" +#ifdef __NR_rseq +#define ARCH_HAS_RSEQ 1 +#endif #elif defined(__ARMEL__) -#include +#include "rseq-arm.h" +#ifdef __NR_rseq +#define ARCH_HAS_RSEQ 1 +#endif #elif defined(__PPC__) -#include +#include "rseq-ppc.h" +#ifdef __NR_rseq +#define ARCH_HAS_RSEQ 1 +#endif #else #error unsupported target #endif -/* State returned by rseq_start, passed as argument to rseq_finish. */ -struct rseq_state { - volatile struct rseq *rseqp; - int32_t cpu_id; /* cpu_id at start. */ - uint32_t event_counter; /* event_counter at start. */ -}; - /* * Register rseq for the current thread. This needs to be called once * by any thread which uses restartable sequences, before they start @@ -99,12 +103,10 @@ int rseq_register_current_thread(void); */ int rseq_unregister_current_thread(void); -/* - * Restartable sequence fallback for reading the current CPU number. - */ -int rseq_fallback_current_cpu(void); +void rseq_init(void); +void rseq_destroy(void); -static inline int32_t rseq_cpu_at_start(struct rseq_state start_value) +static inline int32_t rseq_cpu_at_start(struct lttng_rseq_state start_value) { return start_value.cpu_id; } @@ -114,20 +116,11 @@ static inline int32_t rseq_current_cpu_raw(void) return CMM_LOAD_SHARED(__rseq_abi.u.e.cpu_id); } -static inline int32_t rseq_current_cpu(void) -{ - int32_t cpu; - - cpu = rseq_current_cpu_raw(); - if (caa_unlikely(cpu < 0)) - cpu = rseq_fallback_current_cpu(); - return cpu; -} - +#ifdef ARCH_HAS_RSEQ static inline __attribute__((always_inline)) -struct rseq_state rseq_start(void) +struct lttng_rseq_state rseq_start(void) { - struct rseq_state result; + struct lttng_rseq_state result; result.rseqp = &__rseq_abi; if (has_single_copy_load_64()) { @@ -161,6 +154,16 @@ struct rseq_state rseq_start(void) cmm_barrier(); return result; } +#else +static inline __attribute__((always_inline)) +struct lttng_rseq_state rseq_start(void) +{ + struct lttng_rseq_state result = { + .cpu_id = -2, + }; + return result; +} +#endif enum rseq_finish_type { RSEQ_FINISH_SINGLE, @@ -176,11 +179,12 @@ enum rseq_finish_type { * p_final and to_write_final are used for the final write. If this * write takes place, the rseq_finish2 is guaranteed to succeed. */ +#ifdef ARCH_HAS_RSEQ static inline __attribute__((always_inline)) bool __rseq_finish(intptr_t *p_spec, intptr_t to_write_spec, void *p_memcpy, void *to_write_memcpy, size_t len_memcpy, intptr_t *p_final, intptr_t to_write_final, - struct rseq_state start_value, + struct lttng_rseq_state start_value, enum rseq_finish_type type, bool release) { RSEQ_INJECT_C(9) @@ -247,10 +251,21 @@ failure: RSEQ_INJECT_FAILED return false; } +#else +static inline __attribute__((always_inline)) +bool __rseq_finish(intptr_t *p_spec, intptr_t to_write_spec, + void *p_memcpy, void *to_write_memcpy, size_t len_memcpy, + intptr_t *p_final, intptr_t to_write_final, + struct lttng_rseq_state start_value, + enum rseq_finish_type type, bool release) +{ + return false; +} +#endif static inline __attribute__((always_inline)) bool rseq_finish(intptr_t *p, intptr_t to_write, - struct rseq_state start_value) + struct lttng_rseq_state start_value) { return __rseq_finish(NULL, 0, NULL, NULL, 0, @@ -261,7 +276,7 @@ bool rseq_finish(intptr_t *p, intptr_t to_write, static inline __attribute__((always_inline)) bool rseq_finish2(intptr_t *p_spec, intptr_t to_write_spec, intptr_t *p_final, intptr_t to_write_final, - struct rseq_state start_value) + struct lttng_rseq_state start_value) { return __rseq_finish(p_spec, to_write_spec, NULL, NULL, 0, @@ -272,7 +287,7 @@ bool rseq_finish2(intptr_t *p_spec, intptr_t to_write_spec, static inline __attribute__((always_inline)) bool rseq_finish2_release(intptr_t *p_spec, intptr_t to_write_spec, intptr_t *p_final, intptr_t to_write_final, - struct rseq_state start_value) + struct lttng_rseq_state start_value) { return __rseq_finish(p_spec, to_write_spec, NULL, NULL, 0, @@ -283,7 +298,7 @@ bool rseq_finish2_release(intptr_t *p_spec, intptr_t to_write_spec, static inline __attribute__((always_inline)) bool rseq_finish_memcpy(void *p_memcpy, void *to_write_memcpy, size_t len_memcpy, intptr_t *p_final, intptr_t to_write_final, - struct rseq_state start_value) + struct lttng_rseq_state start_value) { return __rseq_finish(NULL, 0, p_memcpy, to_write_memcpy, len_memcpy, @@ -294,7 +309,7 @@ bool rseq_finish_memcpy(void *p_memcpy, void *to_write_memcpy, static inline __attribute__((always_inline)) bool rseq_finish_memcpy_release(void *p_memcpy, void *to_write_memcpy, size_t len_memcpy, intptr_t *p_final, intptr_t to_write_final, - struct rseq_state start_value) + struct lttng_rseq_state start_value) { return __rseq_finish(NULL, 0, p_memcpy, to_write_memcpy, len_memcpy, -- 2.34.1