From b76e5200cf54fd88a687ebc2fca52ea530d793d6 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Sat, 17 Sep 2016 19:44:25 -0400 Subject: [PATCH] Restartable sequences: add rseq library and abi header Signed-off-by: Mathieu Desnoyers --- libringbuffer/Makefile.am | 3 +- libringbuffer/linux-rseq-abi.h | 106 ++++++++ libringbuffer/rseq-arm.h | 168 ++++++++++++ libringbuffer/rseq-ppc.h | 273 +++++++++++++++++++ libringbuffer/rseq-x86.h | 306 +++++++++++++++++++++ libringbuffer/rseq.c | 247 +++++++++++++++++ libringbuffer/rseq.h | 477 +++++++++++++++++++++++++++++++++ 7 files changed, 1579 insertions(+), 1 deletion(-) create mode 100644 libringbuffer/linux-rseq-abi.h create mode 100644 libringbuffer/rseq-arm.h create mode 100644 libringbuffer/rseq-ppc.h create mode 100644 libringbuffer/rseq-x86.h create mode 100644 libringbuffer/rseq.c create mode 100644 libringbuffer/rseq.h diff --git a/libringbuffer/Makefile.am b/libringbuffer/Makefile.am index 271c8bee..1750e42d 100644 --- a/libringbuffer/Makefile.am +++ b/libringbuffer/Makefile.am @@ -11,7 +11,8 @@ libringbuffer_la_SOURCES = \ api.h \ backend.h backend_internal.h backend_types.h \ frontend_api.h frontend.h frontend_internal.h frontend_types.h \ - nohz.h vatomic.h tlsfixup.h + nohz.h vatomic.h tlsfixup.h \ + rseq.c rseq.h rseq-x86.h rseq-arm.h rseq-ppc.h libringbuffer_la_LIBADD = \ -lpthread \ diff --git a/libringbuffer/linux-rseq-abi.h b/libringbuffer/linux-rseq-abi.h new file mode 100644 index 00000000..ee45be65 --- /dev/null +++ b/libringbuffer/linux-rseq-abi.h @@ -0,0 +1,106 @@ +#ifndef _UAPI_LINUX_RSEQ_H +#define _UAPI_LINUX_RSEQ_H + +/* + * linux/rseq.h + * + * Restartable sequences system call API + * + * Copyright (c) 2015-2016 Mathieu Desnoyers + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifdef __KERNEL__ +# include +#else /* #ifdef __KERNEL__ */ +# include +#endif /* #else #ifdef __KERNEL__ */ + +#include + +#ifdef __LP64__ +# define RSEQ_FIELD_u32_u64(field) uint64_t field +#elif defined(__BYTE_ORDER) ? \ + __BYTE_ORDER == __BIG_ENDIAN : defined(__BIG_ENDIAN) +# define RSEQ_FIELD_u32_u64(field) uint32_t _padding ## field, field +#else +# define RSEQ_FIELD_u32_u64(field) uint32_t field, _padding ## field +#endif + +enum rseq_flags { + RSEQ_FORCE_UNREGISTER = (1 << 0), +}; + +/* + * struct rseq_cs is aligned on 4 * 8 bytes to ensure it is always + * contained within a single cache-line. It is usually declared as + * link-time constant data. + */ +struct rseq_cs { + RSEQ_FIELD_u32_u64(start_ip); + RSEQ_FIELD_u32_u64(post_commit_ip); + RSEQ_FIELD_u32_u64(abort_ip); +} __attribute__((aligned(4 * sizeof(uint64_t)))); + +union rseq_cpu_event { + struct { + /* + * Restartable sequences cpu_id field. + * Updated by the kernel, and read by user-space with + * single-copy atomicity semantics. Aligned on 32-bit. + * Negative values are reserved for user-space. + */ + int32_t cpu_id; + /* + * Restartable sequences event_counter field. + * Updated by the kernel, and read by user-space with + * single-copy atomicity semantics. Aligned on 32-bit. + */ + uint32_t event_counter; + } e; + /* + * On architectures with 64-bit aligned reads, both cpu_id and + * event_counter can be read with single-copy atomicity + * semantics. + */ + uint64_t v; +}; + +/* + * struct rseq is aligned on 2 * 8 bytes to ensure it is always + * contained within a single cache-line. + */ +struct rseq { + union rseq_cpu_event u; + /* + * Restartable sequences rseq_cs field. + * Contains NULL when no critical section is active for the + * current thread, or holds a pointer to the currently active + * struct rseq_cs. + * Updated by user-space at the beginning and end of assembly + * instruction sequence block, and by the kernel when it + * restarts an assembly instruction sequence block. Read by the + * kernel with single-copy atomicity semantics. Aligned on + * 64-bit. + */ + RSEQ_FIELD_u32_u64(rseq_cs); +} __attribute__((aligned(2 * sizeof(uint64_t)))); + +#endif /* _UAPI_LINUX_RSEQ_H */ diff --git a/libringbuffer/rseq-arm.h b/libringbuffer/rseq-arm.h new file mode 100644 index 00000000..289abd49 --- /dev/null +++ b/libringbuffer/rseq-arm.h @@ -0,0 +1,168 @@ +/* + * rseq-arm.h + * + * (C) Copyright 2016 - Mathieu Desnoyers + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define smp_mb() __asm__ __volatile__ ("dmb" : : : "memory") +#define smp_rmb() __asm__ __volatile__ ("dmb" : : : "memory") +#define smp_wmb() __asm__ __volatile__ ("dmb" : : : "memory") + +#define smp_load_acquire(p) \ +__extension__ ({ \ + __typeof(*p) ____p1 = READ_ONCE(*p); \ + smp_mb(); \ + ____p1; \ +}) + +#define smp_acquire__after_ctrl_dep() smp_rmb() + +#define smp_store_release(p, v) \ +do { \ + smp_mb(); \ + WRITE_ONCE(*p, v); \ +} while (0) + +#define has_fast_acquire_release() 0 +#define has_single_copy_load_64() 1 + +/* + * The __rseq_table section can be used by debuggers to better handle + * single-stepping through the restartable critical sections. + * + * Load the immediate value 0 into register r1 right after the ldr + * instruction to improve instruction-level parallelism: load the + * constant while the processor is stalled waiting for the load to + * complete, which is required by the following comparison and branch. + */ + +#define RSEQ_FINISH_ASM(_target_final, _to_write_final, _start_value, \ + _failure, _spec_store, _spec_input, \ + _final_store, _final_input, _extra_clobber, \ + _setup, _teardown, _scratch) \ +do { \ + _scratch \ + __asm__ __volatile__ goto ( \ + ".pushsection __rseq_table, \"aw\"\n\t" \ + ".balign 32\n\t" \ + ".word 1f, 0x0, 2f, 0x0, 5f, 0x0, 0x0, 0x0\n\t" \ + ".popsection\n\t" \ + "1:\n\t" \ + _setup \ + RSEQ_INJECT_ASM(1) \ + "adr r0, 3f\n\t" \ + "str r0, [%[rseq_cs]]\n\t" \ + RSEQ_INJECT_ASM(2) \ + "ldr r0, %[current_event_counter]\n\t" \ + "mov r1, #0\n\t" \ + "cmp %[start_event_counter], r0\n\t" \ + "bne 5f\n\t" \ + RSEQ_INJECT_ASM(3) \ + _spec_store \ + _final_store \ + "2:\n\t" \ + RSEQ_INJECT_ASM(5) \ + "str r1, [%[rseq_cs]]\n\t" \ + _teardown \ + "b 4f\n\t" \ + ".balign 32\n\t" \ + "3:\n\t" \ + ".word 1b, 0x0, 2b, 0x0, 5f, 0x0, 0x0, 0x0\n\t" \ + "5:\n\t" \ + "mov r1, #0\n\t" \ + "str r1, [%[rseq_cs]]\n\t" \ + _teardown \ + "b %l[failure]\n\t" \ + "4:\n\t" \ + : /* no outputs */ \ + : [start_event_counter]"r"((_start_value).event_counter), \ + [current_event_counter]"m"((_start_value).rseqp->u.e.event_counter), \ + [rseq_cs]"r"(&(_start_value).rseqp->rseq_cs) \ + _spec_input \ + _final_input \ + RSEQ_INJECT_INPUT \ + : "r0", "r1", "memory", "cc" \ + _extra_clobber \ + RSEQ_INJECT_CLOBBER \ + : _failure \ + ); \ +} while (0) + +#define RSEQ_FINISH_FINAL_STORE_ASM() \ + "str %[to_write_final], [%[target_final]]\n\t" + +#define RSEQ_FINISH_FINAL_STORE_RELEASE_ASM() \ + "dmb\n\t" \ + RSEQ_FINISH_FINAL_STORE_ASM() + +#define RSEQ_FINISH_FINAL_STORE_INPUT(_target_final, _to_write_final) \ + , [to_write_final]"r"(_to_write_final), \ + [target_final]"r"(_target_final) + +#define RSEQ_FINISH_SPECULATIVE_STORE_ASM() \ + "str %[to_write_spec], [%[target_spec]]\n\t" \ + RSEQ_INJECT_ASM(4) + +#define RSEQ_FINISH_SPECULATIVE_STORE_INPUT(_target_spec, _to_write_spec) \ + , [to_write_spec]"r"(_to_write_spec), \ + [target_spec]"r"(_target_spec) + +/* TODO: implement a faster memcpy. */ +#define RSEQ_FINISH_MEMCPY_STORE_ASM() \ + "cmp %[len_memcpy], #0\n\t" \ + "beq 333f\n\t" \ + "222:\n\t" \ + "ldrb %%r0, [%[to_write_memcpy]]\n\t" \ + "strb %%r0, [%[target_memcpy]]\n\t" \ + "adds %[to_write_memcpy], #1\n\t" \ + "adds %[target_memcpy], #1\n\t" \ + "subs %[len_memcpy], #1\n\t" \ + "bne 222b\n\t" \ + "333:\n\t" \ + RSEQ_INJECT_ASM(4) + +#define RSEQ_FINISH_MEMCPY_STORE_INPUT(_target_memcpy, _to_write_memcpy, _len_memcpy) \ + , [to_write_memcpy]"r"(_to_write_memcpy), \ + [target_memcpy]"r"(_target_memcpy), \ + [len_memcpy]"r"(_len_memcpy), \ + [rseq_scratch0]"m"(rseq_scratch[0]), \ + [rseq_scratch1]"m"(rseq_scratch[1]), \ + [rseq_scratch2]"m"(rseq_scratch[2]) + +/* We can use r0. */ +#define RSEQ_FINISH_MEMCPY_CLOBBER() + +#define RSEQ_FINISH_MEMCPY_SCRATCH() \ + uint32_t rseq_scratch[3]; + +/* + * We need to save and restore those input registers so they can be + * modified within the assembly. + */ +#define RSEQ_FINISH_MEMCPY_SETUP() \ + "str %[to_write_memcpy], %[rseq_scratch0]\n\t" \ + "str %[target_memcpy], %[rseq_scratch1]\n\t" \ + "str %[len_memcpy], %[rseq_scratch2]\n\t" + +#define RSEQ_FINISH_MEMCPY_TEARDOWN() \ + "ldr %[len_memcpy], %[rseq_scratch2]\n\t" \ + "ldr %[target_memcpy], %[rseq_scratch1]\n\t" \ + "ldr %[to_write_memcpy], %[rseq_scratch0]\n\t" diff --git a/libringbuffer/rseq-ppc.h b/libringbuffer/rseq-ppc.h new file mode 100644 index 00000000..8a76d07f --- /dev/null +++ b/libringbuffer/rseq-ppc.h @@ -0,0 +1,273 @@ +/* + * rseq-ppc.h + * + * (C) Copyright 2016 - Mathieu Desnoyers + * (C) Copyright 2016 - Boqun Feng + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define smp_mb() __asm__ __volatile__ ("sync" : : : "memory") +#define smp_lwsync() __asm__ __volatile__ ("lwsync" : : : "memory") +#define smp_rmb() smp_lwsync() +#define smp_wmb() smp_lwsync() + +#define smp_load_acquire(p) \ +__extension__ ({ \ + __typeof(*p) ____p1 = READ_ONCE(*p); \ + smp_lwsync(); \ + ____p1; \ +}) + +#define smp_acquire__after_ctrl_dep() smp_lwsync() + +#define smp_store_release(p, v) \ +do { \ + smp_lwsync(); \ + WRITE_ONCE(*p, v); \ +} while (0) + +#define has_fast_acquire_release() 0 + +#ifdef __PPC64__ +#define has_single_copy_load_64() 1 +#else +#define has_single_copy_load_64() 0 +#endif + +/* + * The __rseq_table section can be used by debuggers to better handle + * single-stepping through the restartable critical sections. + */ + +#ifdef __PPC64__ + +#define RSEQ_FINISH_ASM(_target_final, _to_write_final, _start_value, \ + _failure, _spec_store, _spec_input, \ + _final_store, _final_input, _extra_clobber, \ + _setup, _teardown, _scratch) \ + __asm__ __volatile__ goto ( \ + ".pushsection __rseq_table, \"aw\"\n\t" \ + ".balign 32\n\t" \ + "3:\n\t" \ + ".quad 1f, 2f, 4f, 0x0\n\t" \ + ".popsection\n\t" \ + "1:\n\t" \ + _setup \ + RSEQ_INJECT_ASM(1) \ + "lis %%r17, (3b)@highest\n\t" \ + "ori %%r17, %%r17, (3b)@higher\n\t" \ + "rldicr %%r17, %%r17, 32, 31\n\t" \ + "oris %%r17, %%r17, (3b)@h\n\t" \ + "ori %%r17, %%r17, (3b)@l\n\t" \ + "std %%r17, 0(%[rseq_cs])\n\t" \ + RSEQ_INJECT_ASM(2) \ + "lwz %%r17, %[current_event_counter]\n\t" \ + "cmpw cr7, %[start_event_counter], %%r17\n\t" \ + "bne- cr7, 4f\n\t" \ + RSEQ_INJECT_ASM(3) \ + _spec_store \ + _final_store \ + "2:\n\t" \ + RSEQ_INJECT_ASM(5) \ + "li %%r17, 0\n\t" \ + "std %%r17, 0(%[rseq_cs])\n\t" \ + _teardown \ + "b 5f\n\t" \ + "4:\n\t" \ + "li %%r17, 0\n\t" \ + "std %%r17, 0(%[rseq_cs])\n\t" \ + _teardown \ + "b %l[failure]\n\t" \ + "5:\n\t" \ + : /* no outputs */ \ + : [start_event_counter]"r"((_start_value).event_counter), \ + [current_event_counter]"m"((_start_value).rseqp->u.e.event_counter), \ + [rseq_cs]"b"(&(_start_value).rseqp->rseq_cs) \ + _spec_input \ + _final_input \ + RSEQ_INJECT_INPUT \ + : "r17", "memory", "cc" \ + _extra_clobber \ + RSEQ_INJECT_CLOBBER \ + : _failure \ + ) + +#define RSEQ_FINISH_FINAL_STORE_ASM() \ + "std %[to_write_final], 0(%[target_final])\n\t" + +#define RSEQ_FINISH_FINAL_STORE_RELEASE_ASM() \ + "lwsync\n\t" \ + RSEQ_FINISH_FINAL_STORE_ASM() + +#define RSEQ_FINISH_FINAL_STORE_INPUT(_target_final, _to_write_final) \ + , [to_write_final]"r"(_to_write_final), \ + [target_final]"b"(_target_final) + +#define RSEQ_FINISH_SPECULATIVE_STORE_ASM() \ + "std %[to_write_spec], 0(%[target_spec])\n\t" \ + RSEQ_INJECT_ASM(4) + +#define RSEQ_FINISH_SPECULATIVE_STORE_INPUT(_target_spec, _to_write_spec) \ + , [to_write_spec]"r"(_to_write_spec), \ + [target_spec]"b"(_target_spec) + +/* TODO: implement a faster memcpy. */ +#define RSEQ_FINISH_MEMCPY_STORE_ASM() \ + "cmpdi %%r19, 0\n\t" \ + "beq 333f\n\t" \ + "addi %%r20, %%r20, -1\n\t" \ + "addi %%r21, %%r21, -1\n\t" \ + "222:\n\t" \ + "lbzu %%r18, 1(%%r20)\n\t" \ + "stbu %%r18, 1(%%r21)\n\t" \ + "addi %%r19, %%r19, -1\n\t" \ + "cmpdi %%r19, 0\n\t" \ + "bne 222b\n\t" \ + "333:\n\t" \ + RSEQ_INJECT_ASM(4) + +#define RSEQ_FINISH_MEMCPY_STORE_INPUT(_target_memcpy, _to_write_memcpy, _len_memcpy) \ + , [to_write_memcpy]"r"(_to_write_memcpy), \ + [target_memcpy]"r"(_target_memcpy), \ + [len_memcpy]"r"(_len_memcpy) + +#define RSEQ_FINISH_MEMCPY_CLOBBER() \ + , "r18", "r19", "r20", "r21" + +#define RSEQ_FINISH_MEMCPY_SCRATCH() + +/* + * We use extra registers to hold the input registers, and we don't need to + * save and restore the input registers. + */ +#define RSEQ_FINISH_MEMCPY_SETUP() \ + "mr %%r19, %[len_memcpy]\n\t" \ + "mr %%r20, %[to_write_memcpy]\n\t" \ + "mr %%r21, %[target_memcpy]\n\t" \ + +#define RSEQ_FINISH_MEMCPY_TEARDOWN() + +#else /* #ifdef __PPC64__ */ + +#define RSEQ_FINISH_ASM(_target_final, _to_write_final, _start_value, \ + _failure, _spec_store, _spec_input, \ + _final_store, _final_input, _extra_clobber, \ + _setup, _teardown, _scratch) \ + __asm__ __volatile__ goto ( \ + ".pushsection __rseq_table, \"aw\"\n\t" \ + ".balign 32\n\t" \ + "3:\n\t" \ + /* 32-bit only supported on BE */ \ + ".long 0x0, 1f, 0x0, 2f, 0x0, 4f, 0x0, 0x0\n\t" \ + ".popsection\n\t" \ + "1:\n\t" \ + _setup \ + RSEQ_INJECT_ASM(1) \ + "lis %%r17, (3b)@ha\n\t" \ + "addi %%r17, %%r17, (3b)@l\n\t" \ + "stw %%r17, 0(%[rseq_cs])\n\t" \ + RSEQ_INJECT_ASM(2) \ + "lwz %%r17, %[current_event_counter]\n\t" \ + "cmpw cr7, %[start_event_counter], %%r17\n\t" \ + "bne- cr7, 4f\n\t" \ + RSEQ_INJECT_ASM(3) \ + _spec_store \ + _final_store \ + "2:\n\t" \ + RSEQ_INJECT_ASM(5) \ + "li %%r17, 0\n\t" \ + "stw %%r17, 0(%[rseq_cs])\n\t" \ + _teardown \ + "b 5f\n\t" \ + "4:\n\t" \ + "li %%r17, 0\n\t" \ + "std %%r17, 0(%[rseq_cs])\n\t" \ + _teardown \ + "b %l[failure]\n\t" \ + "5:\n\t" \ + : /* no outputs */ \ + : [start_event_counter]"r"((_start_value).event_counter), \ + [current_event_counter]"m"((_start_value).rseqp->u.e.event_counter), \ + [rseq_cs]"b"(&(_start_value).rseqp->rseq_cs) \ + _spec_input \ + _final_input \ + RSEQ_INJECT_INPUT \ + : "r17", "memory", "cc" \ + _extra_clobber \ + RSEQ_INJECT_CLOBBER \ + : _failure \ + ) + +#define RSEQ_FINISH_FINAL_STORE_ASM() \ + "stw %[to_write_final], 0(%[target_final])\n\t" + +#define RSEQ_FINISH_FINAL_STORE_RELEASE_ASM() \ + "lwsync\n\t" \ + RSEQ_FINISH_FINAL_STORE_ASM() + +#define RSEQ_FINISH_FINAL_STORE_INPUT(_target_final, _to_write_final) \ + , [to_write_final]"r"(_to_write_final), \ + [target_final]"b"(_target_final) + +#define RSEQ_FINISH_SPECULATIVE_STORE_ASM() \ + "stw %[to_write_spec], 0(%[target_spec])\n\t" \ + RSEQ_INJECT_ASM(4) + +#define RSEQ_FINISH_SPECULATIVE_STORE_INPUT(_target_spec, _to_write_spec) \ + , [to_write_spec]"r"(_to_write_spec), \ + [target_spec]"b"(_target_spec) + +/* TODO: implement a faster memcpy. */ +#define RSEQ_FINISH_MEMCPY_STORE_ASM() \ + "cmpwi %%r19, 0\n\t" \ + "beq 333f\n\t" \ + "addi %%r20, %%r20, -1\n\t" \ + "addi %%r21, %%r21, -1\n\t" \ + "222:\n\t" \ + "lbzu %%r18, 1(%%r20)\n\t" \ + "stbu %%r18, 1(%%r21)\n\t" \ + "addi %%r19, %%r19, -1\n\t" \ + "cmpwi %%r19, 0\n\t" \ + "bne 222b\n\t" \ + "333:\n\t" \ + RSEQ_INJECT_ASM(4) + +#define RSEQ_FINISH_MEMCPY_STORE_INPUT(_target_memcpy, _to_write_memcpy, _len_memcpy) \ + , [to_write_memcpy]"r"(_to_write_memcpy), \ + [target_memcpy]"r"(_target_memcpy), \ + [len_memcpy]"r"(_len_memcpy) + +#define RSEQ_FINISH_MEMCPY_CLOBBER() \ + , "r18", "r19", "r20", "r21" + +#define RSEQ_FINISH_MEMCPY_SCRATCH() + +/* + * We use extra registers to hold the input registers, and we don't need to + * save and restore the input registers. + */ +#define RSEQ_FINISH_MEMCPY_SETUP() \ + "mr %%r19, %[len_memcpy]\n\t" \ + "mr %%r20, %[to_write_memcpy]\n\t" \ + "mr %%r21, %[target_memcpy]\n\t" \ + +#define RSEQ_FINISH_MEMCPY_TEARDOWN() + +#endif /* #else #ifdef __PPC64__ */ diff --git a/libringbuffer/rseq-x86.h b/libringbuffer/rseq-x86.h new file mode 100644 index 00000000..7154bfa4 --- /dev/null +++ b/libringbuffer/rseq-x86.h @@ -0,0 +1,306 @@ +/* + * rseq-x86.h + * + * (C) Copyright 2016 - Mathieu Desnoyers + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifdef __x86_64__ + +#define smp_mb() __asm__ __volatile__ ("mfence" : : : "memory") +#define smp_rmb() barrier() +#define smp_wmb() barrier() + +#define smp_load_acquire(p) \ +__extension__ ({ \ + __typeof(*p) ____p1 = READ_ONCE(*p); \ + barrier(); \ + ____p1; \ +}) + +#define smp_acquire__after_ctrl_dep() smp_rmb() + +#define smp_store_release(p, v) \ +do { \ + barrier(); \ + WRITE_ONCE(*p, v); \ +} while (0) + +#define has_fast_acquire_release() 1 +#define has_single_copy_load_64() 1 + +/* + * The __rseq_table section can be used by debuggers to better handle + * single-stepping through the restartable critical sections. + */ +#define RSEQ_FINISH_ASM(_target_final, _to_write_final, _start_value, \ + _failure, _spec_store, _spec_input, \ + _final_store, _final_input, _extra_clobber, \ + _setup, _teardown, _scratch) \ +do { \ + _scratch \ + __asm__ __volatile__ goto ( \ + ".pushsection __rseq_table, \"aw\"\n\t" \ + ".balign 32\n\t" \ + "3:\n\t" \ + ".quad 1f, 2f, 4f, 0x0\n\t" \ + ".popsection\n\t" \ + "1:\n\t" \ + _setup \ + RSEQ_INJECT_ASM(1) \ + "movq $3b, %[rseq_cs]\n\t" \ + RSEQ_INJECT_ASM(2) \ + "cmpl %[start_event_counter], %[current_event_counter]\n\t" \ + "jnz 4f\n\t" \ + RSEQ_INJECT_ASM(3) \ + _spec_store \ + _final_store \ + "2:\n\t" \ + RSEQ_INJECT_ASM(5) \ + "movq $0, %[rseq_cs]\n\t" \ + _teardown \ + ".pushsection __rseq_failure, \"a\"\n\t" \ + "4:\n\t" \ + "movq $0, %[rseq_cs]\n\t" \ + _teardown \ + "jmp %l[failure]\n\t" \ + ".popsection\n\t" \ + : /* no outputs */ \ + : [start_event_counter]"r"((_start_value).event_counter), \ + [current_event_counter]"m"((_start_value).rseqp->u.e.event_counter), \ + [rseq_cs]"m"((_start_value).rseqp->rseq_cs) \ + _spec_input \ + _final_input \ + RSEQ_INJECT_INPUT \ + : "memory", "cc" \ + _extra_clobber \ + RSEQ_INJECT_CLOBBER \ + : _failure \ + ); \ +} while (0) + +#define RSEQ_FINISH_FINAL_STORE_ASM() \ + "movq %[to_write_final], %[target_final]\n\t" + +/* x86-64 is TSO */ +#define RSEQ_FINISH_FINAL_STORE_RELEASE_ASM() \ + RSEQ_FINISH_FINAL_STORE_ASM() + +#define RSEQ_FINISH_FINAL_STORE_INPUT(_target_final, _to_write_final) \ + , [to_write_final]"r"(_to_write_final), \ + [target_final]"m"(*(_target_final)) + +#define RSEQ_FINISH_SPECULATIVE_STORE_ASM() \ + "movq %[to_write_spec], %[target_spec]\n\t" \ + RSEQ_INJECT_ASM(4) + +#define RSEQ_FINISH_SPECULATIVE_STORE_INPUT(_target_spec, _to_write_spec) \ + , [to_write_spec]"r"(_to_write_spec), \ + [target_spec]"m"(*(_target_spec)) + +/* TODO: implement a faster memcpy. */ +#define RSEQ_FINISH_MEMCPY_STORE_ASM() \ + "test %[len_memcpy], %[len_memcpy]\n\t" \ + "jz 333f\n\t" \ + "222:\n\t" \ + "movb (%[to_write_memcpy]), %%al\n\t" \ + "movb %%al, (%[target_memcpy])\n\t" \ + "inc %[to_write_memcpy]\n\t" \ + "inc %[target_memcpy]\n\t" \ + "dec %[len_memcpy]\n\t" \ + "jnz 222b\n\t" \ + "333:\n\t" \ + RSEQ_INJECT_ASM(4) + +#define RSEQ_FINISH_MEMCPY_STORE_INPUT(_target_memcpy, _to_write_memcpy, _len_memcpy) \ + , [to_write_memcpy]"r"(_to_write_memcpy), \ + [target_memcpy]"r"(_target_memcpy), \ + [len_memcpy]"r"(_len_memcpy), \ + [rseq_scratch0]"m"(rseq_scratch[0]), \ + [rseq_scratch1]"m"(rseq_scratch[1]), \ + [rseq_scratch2]"m"(rseq_scratch[2]) + +#define RSEQ_FINISH_MEMCPY_CLOBBER() \ + , "rax" + +#define RSEQ_FINISH_MEMCPY_SCRATCH() \ + uint64_t rseq_scratch[3]; + +/* + * We need to save and restore those input registers so they can be + * modified within the assembly. + */ +#define RSEQ_FINISH_MEMCPY_SETUP() \ + "movq %[to_write_memcpy], %[rseq_scratch0]\n\t" \ + "movq %[target_memcpy], %[rseq_scratch1]\n\t" \ + "movq %[len_memcpy], %[rseq_scratch2]\n\t" + +#define RSEQ_FINISH_MEMCPY_TEARDOWN() \ + "movq %[rseq_scratch2], %[len_memcpy]\n\t" \ + "movq %[rseq_scratch1], %[target_memcpy]\n\t" \ + "movq %[rseq_scratch0], %[to_write_memcpy]\n\t" + +#elif __i386__ + +/* + * Support older 32-bit architectures that do not implement fence + * instructions. + */ +#define smp_mb() \ + __asm__ __volatile__ ("lock; addl $0,0(%%esp)" : : : "memory") +#define smp_rmb() \ + __asm__ __volatile__ ("lock; addl $0,0(%%esp)" : : : "memory") +#define smp_wmb() \ + __asm__ __volatile__ ("lock; addl $0,0(%%esp)" : : : "memory") + +#define smp_load_acquire(p) \ +__extension__ ({ \ + __typeof(*p) ____p1 = READ_ONCE(*p); \ + smp_mb(); \ + ____p1; \ +}) + +#define smp_acquire__after_ctrl_dep() smp_rmb() + +#define smp_store_release(p, v) \ +do { \ + smp_mb(); \ + WRITE_ONCE(*p, v); \ +} while (0) + +#define has_fast_acquire_release() 0 +#define has_single_copy_load_64() 0 + +/* + * Use eax as scratch register and take memory operands as input to + * lessen register pressure. Especially needed when compiling + * do_rseq_memcpy() in O0. + */ +#define RSEQ_FINISH_ASM(_target_final, _to_write_final, _start_value, \ + _failure, _spec_store, _spec_input, \ + _final_store, _final_input, _extra_clobber, \ + _setup, _teardown, _scratch) \ +do { \ + _scratch \ + __asm__ __volatile__ goto ( \ + ".pushsection __rseq_table, \"aw\"\n\t" \ + ".balign 32\n\t" \ + "3:\n\t" \ + ".long 1f, 0x0, 2f, 0x0, 4f, 0x0, 0x0, 0x0\n\t" \ + ".popsection\n\t" \ + "1:\n\t" \ + _setup \ + RSEQ_INJECT_ASM(1) \ + "movl $3b, %[rseq_cs]\n\t" \ + RSEQ_INJECT_ASM(2) \ + "movl %[start_event_counter], %%eax\n\t" \ + "cmpl %%eax, %[current_event_counter]\n\t" \ + "jnz 4f\n\t" \ + RSEQ_INJECT_ASM(3) \ + _spec_store \ + _final_store \ + "2:\n\t" \ + RSEQ_INJECT_ASM(5) \ + "movl $0, %[rseq_cs]\n\t" \ + _teardown \ + ".pushsection __rseq_failure, \"a\"\n\t" \ + "4:\n\t" \ + "movl $0, %[rseq_cs]\n\t" \ + _teardown \ + "jmp %l[failure]\n\t" \ + ".popsection\n\t" \ + : /* no outputs */ \ + : [start_event_counter]"m"((_start_value).event_counter), \ + [current_event_counter]"m"((_start_value).rseqp->u.e.event_counter), \ + [rseq_cs]"m"((_start_value).rseqp->rseq_cs) \ + _spec_input \ + _final_input \ + RSEQ_INJECT_INPUT \ + : "memory", "cc", "eax" \ + _extra_clobber \ + RSEQ_INJECT_CLOBBER \ + : _failure \ + ); \ +} while (0) + +#define RSEQ_FINISH_FINAL_STORE_ASM() \ + "movl %[to_write_final], %%eax\n\t" \ + "movl %%eax, %[target_final]\n\t" + +#define RSEQ_FINISH_FINAL_STORE_RELEASE_ASM() \ + "lock; addl $0,0(%%esp)\n\t" \ + RSEQ_FINISH_FINAL_STORE_ASM() + +#define RSEQ_FINISH_FINAL_STORE_INPUT(_target_final, _to_write_final) \ + , [to_write_final]"m"(_to_write_final), \ + [target_final]"m"(*(_target_final)) + +#define RSEQ_FINISH_SPECULATIVE_STORE_ASM() \ + "movl %[to_write_spec], %%eax\n\t" \ + "movl %%eax, %[target_spec]\n\t" \ + RSEQ_INJECT_ASM(4) + +#define RSEQ_FINISH_SPECULATIVE_STORE_INPUT(_target_spec, _to_write_spec) \ + , [to_write_spec]"m"(_to_write_spec), \ + [target_spec]"m"(*(_target_spec)) + +/* TODO: implement a faster memcpy. */ +#define RSEQ_FINISH_MEMCPY_STORE_ASM() \ + "movl %[len_memcpy], %%eax\n\t" \ + "test %%eax, %%eax\n\t" \ + "jz 333f\n\t" \ + "222:\n\t" \ + "movb (%[to_write_memcpy]), %%al\n\t" \ + "movb %%al, (%[target_memcpy])\n\t" \ + "inc %[to_write_memcpy]\n\t" \ + "inc %[target_memcpy]\n\t" \ + "decl %[rseq_scratch2]\n\t" \ + "jnz 222b\n\t" \ + "333:\n\t" \ + RSEQ_INJECT_ASM(4) + +#define RSEQ_FINISH_MEMCPY_STORE_INPUT(_target_memcpy, _to_write_memcpy, _len_memcpy) \ + , [to_write_memcpy]"r"(_to_write_memcpy), \ + [target_memcpy]"r"(_target_memcpy), \ + [len_memcpy]"m"(_len_memcpy), \ + [rseq_scratch0]"m"(rseq_scratch[0]), \ + [rseq_scratch1]"m"(rseq_scratch[1]), \ + [rseq_scratch2]"m"(rseq_scratch[2]) + +#define RSEQ_FINISH_MEMCPY_CLOBBER() + +#define RSEQ_FINISH_MEMCPY_SCRATCH() \ + uint32_t rseq_scratch[3]; + +/* + * We need to save and restore those input registers so they can be + * modified within the assembly. + */ +#define RSEQ_FINISH_MEMCPY_SETUP() \ + "movl %[to_write_memcpy], %[rseq_scratch0]\n\t" \ + "movl %[target_memcpy], %[rseq_scratch1]\n\t" \ + "movl %[len_memcpy], %%eax\n\t" \ + "movl %%eax, %[rseq_scratch2]\n\t" + +#define RSEQ_FINISH_MEMCPY_TEARDOWN() \ + "movl %[rseq_scratch1], %[target_memcpy]\n\t" \ + "movl %[rseq_scratch0], %[to_write_memcpy]\n\t" + +#endif diff --git a/libringbuffer/rseq.c b/libringbuffer/rseq.c new file mode 100644 index 00000000..c8193a37 --- /dev/null +++ b/libringbuffer/rseq.c @@ -0,0 +1,247 @@ +/* + * rseq.c + * + * Copyright (C) 2016 Mathieu Desnoyers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; only + * version 2.1 of the License. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#ifdef __NR_membarrier +# define membarrier(...) syscall(__NR_membarrier, __VA_ARGS__) +#else +# define membarrier(...) -ENOSYS +#endif + +struct rseq_thread_state { + uint32_t fallback_wait_cnt; + uint32_t fallback_cnt; + sigset_t sigmask_saved; +}; + +__attribute__((weak)) __thread volatile struct rseq __rseq_abi = { + .u.e.cpu_id = -1, +}; + +static __thread volatile struct rseq_thread_state rseq_thread_state; + +int rseq_has_sys_membarrier; + +static int sys_rseq(volatile struct rseq *rseq_abi, int flags) +{ + return syscall(__NR_rseq, rseq_abi, flags); +} + +int rseq_register_current_thread(void) +{ + int rc; + + rc = sys_rseq(&__rseq_abi, 0); + if (rc) { + fprintf(stderr, "Error: sys_rseq(...) failed(%d): %s\n", + errno, strerror(errno)); + return -1; + } + assert(rseq_current_cpu() >= 0); + return 0; +} + +int rseq_unregister_current_thread(void) +{ + int rc; + + rc = sys_rseq(NULL, 0); + if (rc) { + fprintf(stderr, "Error: sys_rseq(...) failed(%d): %s\n", + errno, strerror(errno)); + return -1; + } + return 0; +} + +int rseq_init_lock(struct rseq_lock *rlock) +{ + int ret; + + ret = pthread_mutex_init(&rlock->lock, NULL); + if (ret) { + errno = ret; + return -1; + } + rlock->state = RSEQ_LOCK_STATE_RESTART; + return 0; +} + +int rseq_destroy_lock(struct rseq_lock *rlock) +{ + int ret; + + ret = pthread_mutex_destroy(&rlock->lock); + if (ret) { + errno = ret; + return -1; + } + return 0; +} + +static void signal_off_save(sigset_t *oldset) +{ + sigset_t set; + int ret; + + sigfillset(&set); + ret = pthread_sigmask(SIG_BLOCK, &set, oldset); + if (ret) + abort(); +} + +static void signal_restore(sigset_t oldset) +{ + int ret; + + ret = pthread_sigmask(SIG_SETMASK, &oldset, NULL); + if (ret) + abort(); +} + +static void rseq_fallback_lock(struct rseq_lock *rlock) +{ + signal_off_save((sigset_t *)&rseq_thread_state.sigmask_saved); + pthread_mutex_lock(&rlock->lock); + rseq_thread_state.fallback_cnt++; + /* + * For concurrent threads arriving before we set LOCK: + * reading cpu_id after setting the state to LOCK + * ensures they restart. + */ + ACCESS_ONCE(rlock->state) = RSEQ_LOCK_STATE_LOCK; + /* + * For concurrent threads arriving after we set LOCK: + * those will grab the lock, so we are protected by + * mutual exclusion. + */ +} + +void rseq_fallback_wait(struct rseq_lock *rlock) +{ + signal_off_save((sigset_t *)&rseq_thread_state.sigmask_saved); + pthread_mutex_lock(&rlock->lock); + rseq_thread_state.fallback_wait_cnt++; + pthread_mutex_unlock(&rlock->lock); + signal_restore(rseq_thread_state.sigmask_saved); +} + +static void rseq_fallback_unlock(struct rseq_lock *rlock, int cpu_at_start) +{ + /* + * Concurrent rseq arriving before we set state back to RESTART + * grab the lock. Those arriving after we set state back to + * RESTART will perform restartable critical sections. The next + * owner of the lock will take take of making sure it prevents + * concurrent restartable sequences from completing. We may be + * writing from another CPU, so update the state with a store + * release semantic to ensure restartable sections will see our + * side effect (writing to *p) before they enter their + * restartable critical section. + * + * In cases where we observe that we are on the right CPU after the + * critical section, program order ensures that following restartable + * critical sections will see our stores, so we don't have to use + * store-release or membarrier. + * + * Use sys_membarrier when available to remove the memory barrier + * implied by smp_load_acquire(). + */ + barrier(); + if (likely(rseq_current_cpu() == cpu_at_start)) { + ACCESS_ONCE(rlock->state) = RSEQ_LOCK_STATE_RESTART; + } else { + if (!has_fast_acquire_release() && rseq_has_sys_membarrier) { + if (membarrier(MEMBARRIER_CMD_SHARED, 0)) + abort(); + ACCESS_ONCE(rlock->state) = RSEQ_LOCK_STATE_RESTART; + } else { + /* + * Store with release semantic to ensure + * restartable sections will see our side effect + * (writing to *p) before they enter their + * restartable critical section. Matches + * smp_load_acquire() in rseq_start(). + */ + smp_store_release(&rlock->state, + RSEQ_LOCK_STATE_RESTART); + } + } + pthread_mutex_unlock(&rlock->lock); + signal_restore(rseq_thread_state.sigmask_saved); +} + +int rseq_fallback_current_cpu(void) +{ + int cpu; + + cpu = sched_getcpu(); + if (cpu < 0) { + perror("sched_getcpu()"); + abort(); + } + return cpu; +} + +int rseq_fallback_begin(struct rseq_lock *rlock) +{ + rseq_fallback_lock(rlock); + return rseq_fallback_current_cpu(); +} + +void rseq_fallback_end(struct rseq_lock *rlock, int cpu) +{ + rseq_fallback_unlock(rlock, cpu); +} + +/* Handle non-initialized rseq for this thread. */ +void rseq_fallback_noinit(struct rseq_state *rseq_state) +{ + rseq_state->lock_state = RSEQ_LOCK_STATE_FAIL; + rseq_state->cpu_id = 0; +} + +uint32_t rseq_get_fallback_wait_cnt(void) +{ + return rseq_thread_state.fallback_wait_cnt; +} + +uint32_t rseq_get_fallback_cnt(void) +{ + return rseq_thread_state.fallback_cnt; +} + +void __attribute__((constructor)) rseq_init(void) +{ + int ret; + + ret = membarrier(MEMBARRIER_CMD_QUERY, 0); + if (ret >= 0 && (ret & MEMBARRIER_CMD_SHARED)) + rseq_has_sys_membarrier = 1; +} diff --git a/libringbuffer/rseq.h b/libringbuffer/rseq.h new file mode 100644 index 00000000..e76a9946 --- /dev/null +++ b/libringbuffer/rseq.h @@ -0,0 +1,477 @@ +/* + * rseq.h + * + * (C) Copyright 2016 - Mathieu Desnoyers + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RSEQ_H +#define RSEQ_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "linux-rseq-abi.h" + +/* + * Empty code injection macros, override when testing. + * It is important to consider that the ASM injection macros need to be + * fully reentrant (e.g. do not modify the stack). + */ +#ifndef RSEQ_INJECT_ASM +#define RSEQ_INJECT_ASM(n) +#endif + +#ifndef RSEQ_INJECT_C +#define RSEQ_INJECT_C(n) +#endif + +#ifndef RSEQ_INJECT_INPUT +#define RSEQ_INJECT_INPUT +#endif + +#ifndef RSEQ_INJECT_CLOBBER +#define RSEQ_INJECT_CLOBBER +#endif + +#ifndef RSEQ_INJECT_FAILED +#define RSEQ_INJECT_FAILED +#endif + +#ifndef RSEQ_FALLBACK_CNT +#define RSEQ_FALLBACK_CNT 3 +#endif + +uint32_t rseq_get_fallback_wait_cnt(void); +uint32_t rseq_get_fallback_cnt(void); + +extern __thread volatile struct rseq __rseq_abi; +extern int rseq_has_sys_membarrier; + +#define likely(x) __builtin_expect(!!(x), 1) +#define unlikely(x) __builtin_expect(!!(x), 0) +#define barrier() __asm__ __volatile__("" : : : "memory") + +#define ACCESS_ONCE(x) (*(__volatile__ __typeof__(x) *)&(x)) +#define WRITE_ONCE(x, v) __extension__ ({ ACCESS_ONCE(x) = (v); }) +#define READ_ONCE(x) ACCESS_ONCE(x) + +#if defined(__x86_64__) || defined(__i386__) +#include +#elif defined(__ARMEL__) +#include +#elif defined(__PPC__) +#include +#else +#error unsupported target +#endif + +enum rseq_lock_state { + RSEQ_LOCK_STATE_RESTART = 0, + RSEQ_LOCK_STATE_LOCK = 1, + RSEQ_LOCK_STATE_FAIL = 2, +}; + +struct rseq_lock { + pthread_mutex_t lock; + int32_t state; /* enum rseq_lock_state */ +}; + +/* State returned by rseq_start, passed as argument to rseq_finish. */ +struct rseq_state { + volatile struct rseq *rseqp; + int32_t cpu_id; /* cpu_id at start. */ + uint32_t event_counter; /* event_counter at start. */ + int32_t lock_state; /* Lock state at start. */ +}; + +/* + * Register rseq for the current thread. This needs to be called once + * by any thread which uses restartable sequences, before they start + * using restartable sequences. If initialization is not invoked, or if + * it fails, the restartable critical sections will fall-back on locking + * (rseq_lock). + */ +int rseq_register_current_thread(void); + +/* + * Unregister rseq for current thread. + */ +int rseq_unregister_current_thread(void); + +/* + * The fallback lock should be initialized before being used by any + * thread, and destroyed after all threads are done using it. This lock + * should be used by all rseq calls associated with shared data, either + * between threads, or between processes in a shared memory. + * + * There may be many rseq_lock per process, e.g. one per protected data + * structure. + */ +int rseq_init_lock(struct rseq_lock *rlock); +int rseq_destroy_lock(struct rseq_lock *rlock); + +/* + * Restartable sequence fallback prototypes. Fallback on locking when + * rseq is not initialized, not available on the system, or during + * single-stepping to ensure forward progress. + */ +int rseq_fallback_begin(struct rseq_lock *rlock); +void rseq_fallback_end(struct rseq_lock *rlock, int cpu); +void rseq_fallback_wait(struct rseq_lock *rlock); +void rseq_fallback_noinit(struct rseq_state *rseq_state); + +/* + * Restartable sequence fallback for reading the current CPU number. + */ +int rseq_fallback_current_cpu(void); + +static inline int32_t rseq_cpu_at_start(struct rseq_state start_value) +{ + return start_value.cpu_id; +} + +static inline int32_t rseq_current_cpu_raw(void) +{ + return ACCESS_ONCE(__rseq_abi.u.e.cpu_id); +} + +static inline int32_t rseq_current_cpu(void) +{ + int32_t cpu; + + cpu = rseq_current_cpu_raw(); + if (unlikely(cpu < 0)) + cpu = rseq_fallback_current_cpu(); + return cpu; +} + +static inline __attribute__((always_inline)) +struct rseq_state rseq_start(struct rseq_lock *rlock) +{ + struct rseq_state result; + + result.rseqp = &__rseq_abi; + if (has_single_copy_load_64()) { + union rseq_cpu_event u; + + u.v = ACCESS_ONCE(result.rseqp->u.v); + result.event_counter = u.e.event_counter; + result.cpu_id = u.e.cpu_id; + } else { + result.event_counter = + ACCESS_ONCE(result.rseqp->u.e.event_counter); + /* load event_counter before cpu_id. */ + RSEQ_INJECT_C(6) + result.cpu_id = ACCESS_ONCE(result.rseqp->u.e.cpu_id); + } + /* + * Read event counter before lock state and cpu_id. This ensures + * that when the state changes from RESTART to LOCK, if we have + * some threads that have already seen the RESTART still in + * flight, they will necessarily be preempted/signalled before a + * thread can see the LOCK state for that same CPU. That + * preemption/signalling will cause them to restart, so they + * don't interfere with the lock. + */ + RSEQ_INJECT_C(7) + + if (!has_fast_acquire_release() && likely(rseq_has_sys_membarrier)) { + result.lock_state = ACCESS_ONCE(rlock->state); + barrier(); + } else { + /* + * Load lock state with acquire semantic. Matches + * smp_store_release() in rseq_fallback_end(). + */ + result.lock_state = smp_load_acquire(&rlock->state); + } + if (unlikely(result.cpu_id < 0)) + rseq_fallback_noinit(&result); + /* + * Ensure the compiler does not re-order loads of protected + * values before we load the event counter. + */ + barrier(); + return result; +} + +enum rseq_finish_type { + RSEQ_FINISH_SINGLE, + RSEQ_FINISH_TWO, + RSEQ_FINISH_MEMCPY, +}; + +/* + * p_spec and to_write_spec are used for a speculative write attempted + * near the end of the restartable sequence. A rseq_finish2 may fail + * even after this write takes place. + * + * p_final and to_write_final are used for the final write. If this + * write takes place, the rseq_finish2 is guaranteed to succeed. + */ +static inline __attribute__((always_inline)) +bool __rseq_finish(struct rseq_lock *rlock, + intptr_t *p_spec, intptr_t to_write_spec, + void *p_memcpy, void *to_write_memcpy, size_t len_memcpy, + intptr_t *p_final, intptr_t to_write_final, + struct rseq_state start_value, + enum rseq_finish_type type, bool release) +{ + RSEQ_INJECT_C(9) + + if (unlikely(start_value.lock_state != RSEQ_LOCK_STATE_RESTART)) { + if (start_value.lock_state == RSEQ_LOCK_STATE_LOCK) + rseq_fallback_wait(rlock); + return false; + } + switch (type) { + case RSEQ_FINISH_SINGLE: + RSEQ_FINISH_ASM(p_final, to_write_final, start_value, failure, + /* no speculative write */, /* no speculative write */, + RSEQ_FINISH_FINAL_STORE_ASM(), + RSEQ_FINISH_FINAL_STORE_INPUT(p_final, to_write_final), + /* no extra clobber */, /* no arg */, /* no arg */, + /* no arg */ + ); + break; + case RSEQ_FINISH_TWO: + if (release) { + RSEQ_FINISH_ASM(p_final, to_write_final, start_value, failure, + RSEQ_FINISH_SPECULATIVE_STORE_ASM(), + RSEQ_FINISH_SPECULATIVE_STORE_INPUT(p_spec, to_write_spec), + RSEQ_FINISH_FINAL_STORE_RELEASE_ASM(), + RSEQ_FINISH_FINAL_STORE_INPUT(p_final, to_write_final), + /* no extra clobber */, /* no arg */, /* no arg */, + /* no arg */ + ); + } else { + RSEQ_FINISH_ASM(p_final, to_write_final, start_value, failure, + RSEQ_FINISH_SPECULATIVE_STORE_ASM(), + RSEQ_FINISH_SPECULATIVE_STORE_INPUT(p_spec, to_write_spec), + RSEQ_FINISH_FINAL_STORE_ASM(), + RSEQ_FINISH_FINAL_STORE_INPUT(p_final, to_write_final), + /* no extra clobber */, /* no arg */, /* no arg */, + /* no arg */ + ); + } + break; + case RSEQ_FINISH_MEMCPY: + if (release) { + RSEQ_FINISH_ASM(p_final, to_write_final, start_value, failure, + RSEQ_FINISH_MEMCPY_STORE_ASM(), + RSEQ_FINISH_MEMCPY_STORE_INPUT(p_memcpy, to_write_memcpy, len_memcpy), + RSEQ_FINISH_FINAL_STORE_RELEASE_ASM(), + RSEQ_FINISH_FINAL_STORE_INPUT(p_final, to_write_final), + RSEQ_FINISH_MEMCPY_CLOBBER(), + RSEQ_FINISH_MEMCPY_SETUP(), + RSEQ_FINISH_MEMCPY_TEARDOWN(), + RSEQ_FINISH_MEMCPY_SCRATCH() + ); + } else { + RSEQ_FINISH_ASM(p_final, to_write_final, start_value, failure, + RSEQ_FINISH_MEMCPY_STORE_ASM(), + RSEQ_FINISH_MEMCPY_STORE_INPUT(p_memcpy, to_write_memcpy, len_memcpy), + RSEQ_FINISH_FINAL_STORE_ASM(), + RSEQ_FINISH_FINAL_STORE_INPUT(p_final, to_write_final), + RSEQ_FINISH_MEMCPY_CLOBBER(), + RSEQ_FINISH_MEMCPY_SETUP(), + RSEQ_FINISH_MEMCPY_TEARDOWN(), + RSEQ_FINISH_MEMCPY_SCRATCH() + ); + } + break; + } + return true; +failure: + RSEQ_INJECT_FAILED + return false; +} + +static inline __attribute__((always_inline)) +bool rseq_finish(struct rseq_lock *rlock, + intptr_t *p, intptr_t to_write, + struct rseq_state start_value) +{ + return __rseq_finish(rlock, NULL, 0, + NULL, NULL, 0, + p, to_write, start_value, + RSEQ_FINISH_SINGLE, false); +} + +static inline __attribute__((always_inline)) +bool rseq_finish2(struct rseq_lock *rlock, + intptr_t *p_spec, intptr_t to_write_spec, + intptr_t *p_final, intptr_t to_write_final, + struct rseq_state start_value) +{ + return __rseq_finish(rlock, p_spec, to_write_spec, + NULL, NULL, 0, + p_final, to_write_final, start_value, + RSEQ_FINISH_TWO, false); +} + +static inline __attribute__((always_inline)) +bool rseq_finish2_release(struct rseq_lock *rlock, + intptr_t *p_spec, intptr_t to_write_spec, + intptr_t *p_final, intptr_t to_write_final, + struct rseq_state start_value) +{ + return __rseq_finish(rlock, p_spec, to_write_spec, + NULL, NULL, 0, + p_final, to_write_final, start_value, + RSEQ_FINISH_TWO, true); +} + +static inline __attribute__((always_inline)) +bool rseq_finish_memcpy(struct rseq_lock *rlock, + void *p_memcpy, void *to_write_memcpy, size_t len_memcpy, + intptr_t *p_final, intptr_t to_write_final, + struct rseq_state start_value) +{ + return __rseq_finish(rlock, NULL, 0, + p_memcpy, to_write_memcpy, len_memcpy, + p_final, to_write_final, start_value, + RSEQ_FINISH_MEMCPY, false); +} + +static inline __attribute__((always_inline)) +bool rseq_finish_memcpy_release(struct rseq_lock *rlock, + void *p_memcpy, void *to_write_memcpy, size_t len_memcpy, + intptr_t *p_final, intptr_t to_write_final, + struct rseq_state start_value) +{ + return __rseq_finish(rlock, NULL, 0, + p_memcpy, to_write_memcpy, len_memcpy, + p_final, to_write_final, start_value, + RSEQ_FINISH_MEMCPY, true); +} + +#define __rseq_store_RSEQ_FINISH_SINGLE(_targetptr_spec, _newval_spec, \ + _dest_memcpy, _src_memcpy, _len_memcpy, \ + _targetptr_final, _newval_final) \ + do { \ + *(_targetptr_final) = (_newval_final); \ + } while (0) + +#define __rseq_store_RSEQ_FINISH_TWO(_targetptr_spec, _newval_spec, \ + _dest_memcpy, _src_memcpy, _len_memcpy, \ + _targetptr_final, _newval_final) \ + do { \ + *(_targetptr_spec) = (_newval_spec); \ + *(_targetptr_final) = (_newval_final); \ + } while (0) + +#define __rseq_store_RSEQ_FINISH_MEMCPY(_targetptr_spec, \ + _newval_spec, _dest_memcpy, _src_memcpy, _len_memcpy, \ + _targetptr_final, _newval_final) \ + do { \ + memcpy(_dest_memcpy, _src_memcpy, _len_memcpy); \ + *(_targetptr_final) = (_newval_final); \ + } while (0) + +/* + * Helper macro doing two restartable critical section attempts, and if + * they fail, fallback on locking. + */ +#define __do_rseq(_type, _lock, _rseq_state, _cpu, _result, \ + _targetptr_spec, _newval_spec, \ + _dest_memcpy, _src_memcpy, _len_memcpy, \ + _targetptr_final, _newval_final, _code, _release) \ + do { \ + _rseq_state = rseq_start(_lock); \ + _cpu = rseq_cpu_at_start(_rseq_state); \ + _result = true; \ + _code \ + if (unlikely(!_result)) \ + break; \ + if (likely(__rseq_finish(_lock, \ + _targetptr_spec, _newval_spec, \ + _dest_memcpy, _src_memcpy, _len_memcpy, \ + _targetptr_final, _newval_final, \ + _rseq_state, _type, _release))) \ + break; \ + _rseq_state = rseq_start(_lock); \ + _cpu = rseq_cpu_at_start(_rseq_state); \ + _result = true; \ + _code \ + if (unlikely(!_result)) \ + break; \ + if (likely(__rseq_finish(_lock, \ + _targetptr_spec, _newval_spec, \ + _dest_memcpy, _src_memcpy, _len_memcpy, \ + _targetptr_final, _newval_final, \ + _rseq_state, _type, _release))) \ + break; \ + _cpu = rseq_fallback_begin(_lock); \ + _result = true; \ + _code \ + if (likely(_result)) \ + __rseq_store_##_type(_targetptr_spec, \ + _newval_spec, _dest_memcpy, \ + _src_memcpy, _len_memcpy, \ + _targetptr_final, _newval_final); \ + rseq_fallback_end(_lock, _cpu); \ + } while (0) + +#define do_rseq(_lock, _rseq_state, _cpu, _result, _targetptr, _newval, \ + _code) \ + __do_rseq(RSEQ_FINISH_SINGLE, _lock, _rseq_state, _cpu, _result,\ + NULL, 0, NULL, NULL, 0, _targetptr, _newval, _code, false) + +#define do_rseq2(_lock, _rseq_state, _cpu, _result, \ + _targetptr_spec, _newval_spec, \ + _targetptr_final, _newval_final, _code) \ + __do_rseq(RSEQ_FINISH_TWO, _lock, _rseq_state, _cpu, _result, \ + _targetptr_spec, _newval_spec, \ + NULL, NULL, 0, \ + _targetptr_final, _newval_final, _code, false) + +#define do_rseq2_release(_lock, _rseq_state, _cpu, _result, \ + _targetptr_spec, _newval_spec, \ + _targetptr_final, _newval_final, _code) \ + __do_rseq(RSEQ_FINISH_TWO, _lock, _rseq_state, _cpu, _result, \ + _targetptr_spec, _newval_spec, \ + NULL, NULL, 0, \ + _targetptr_final, _newval_final, _code, true) + +#define do_rseq_memcpy(_lock, _rseq_state, _cpu, _result, \ + _dest_memcpy, _src_memcpy, _len_memcpy, \ + _targetptr_final, _newval_final, _code) \ + __do_rseq(RSEQ_FINISH_MEMCPY, _lock, _rseq_state, _cpu, _result,\ + NULL, 0, \ + _dest_memcpy, _src_memcpy, _len_memcpy, \ + _targetptr_final, _newval_final, _code, false) + +#define do_rseq_memcpy_release(_lock, _rseq_state, _cpu, _result, \ + _dest_memcpy, _src_memcpy, _len_memcpy, \ + _targetptr_final, _newval_final, _code) \ + __do_rseq(RSEQ_FINISH_MEMCPY, _lock, _rseq_state, _cpu, _result,\ + NULL, 0, \ + _dest_memcpy, _src_memcpy, _len_memcpy, \ + _targetptr_final, _newval_final, _code, true) + +#endif /* RSEQ_H_ */ -- 2.34.1