Restartable sequences: add rseq library and abi header
authorMathieu Desnoyers <mathieu.desnoyers@efficios.com>
Sat, 17 Sep 2016 23:44:25 +0000 (19:44 -0400)
committerMathieu Desnoyers <mathieu.desnoyers@efficios.com>
Mon, 21 Nov 2016 21:15:07 +0000 (16:15 -0500)
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
libringbuffer/Makefile.am
libringbuffer/linux-rseq-abi.h [new file with mode: 0644]
libringbuffer/rseq-arm.h [new file with mode: 0644]
libringbuffer/rseq-ppc.h [new file with mode: 0644]
libringbuffer/rseq-x86.h [new file with mode: 0644]
libringbuffer/rseq.c [new file with mode: 0644]
libringbuffer/rseq.h [new file with mode: 0644]

index 271c8beed9ffdd385e9a39902d4c463b688d3887..1750e42d02b5fb165094e8decb02eaf063901454 100644 (file)
@@ -11,7 +11,8 @@ libringbuffer_la_SOURCES = \
        api.h \
        backend.h backend_internal.h backend_types.h \
        frontend_api.h frontend.h frontend_internal.h frontend_types.h \
-       nohz.h vatomic.h tlsfixup.h
+       nohz.h vatomic.h tlsfixup.h \
+       rseq.c rseq.h rseq-x86.h rseq-arm.h rseq-ppc.h
 
 libringbuffer_la_LIBADD = \
        -lpthread \
diff --git a/libringbuffer/linux-rseq-abi.h b/libringbuffer/linux-rseq-abi.h
new file mode 100644 (file)
index 0000000..ee45be6
--- /dev/null
@@ -0,0 +1,106 @@
+#ifndef _UAPI_LINUX_RSEQ_H
+#define _UAPI_LINUX_RSEQ_H
+
+/*
+ * linux/rseq.h
+ *
+ * Restartable sequences system call API
+ *
+ * Copyright (c) 2015-2016 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifdef __KERNEL__
+# include <linux/types.h>
+#else  /* #ifdef __KERNEL__ */
+# include <stdint.h>
+#endif /* #else #ifdef __KERNEL__ */
+
+#include <asm/byteorder.h>
+
+#ifdef __LP64__
+# define RSEQ_FIELD_u32_u64(field)     uint64_t field
+#elif defined(__BYTE_ORDER) ? \
+       __BYTE_ORDER == __BIG_ENDIAN : defined(__BIG_ENDIAN)
+# define RSEQ_FIELD_u32_u64(field)     uint32_t _padding ## field, field
+#else
+# define RSEQ_FIELD_u32_u64(field)     uint32_t field, _padding ## field
+#endif
+
+enum rseq_flags {
+       RSEQ_FORCE_UNREGISTER = (1 << 0),
+};
+
+/*
+ * struct rseq_cs is aligned on 4 * 8 bytes to ensure it is always
+ * contained within a single cache-line. It is usually declared as
+ * link-time constant data.
+ */
+struct rseq_cs {
+       RSEQ_FIELD_u32_u64(start_ip);
+       RSEQ_FIELD_u32_u64(post_commit_ip);
+       RSEQ_FIELD_u32_u64(abort_ip);
+} __attribute__((aligned(4 * sizeof(uint64_t))));
+
+union rseq_cpu_event {
+       struct {
+               /*
+                * Restartable sequences cpu_id field.
+                * Updated by the kernel, and read by user-space with
+                * single-copy atomicity semantics. Aligned on 32-bit.
+                * Negative values are reserved for user-space.
+                */
+               int32_t cpu_id;
+               /*
+                * Restartable sequences event_counter field.
+                * Updated by the kernel, and read by user-space with
+                * single-copy atomicity semantics. Aligned on 32-bit.
+                */
+               uint32_t event_counter;
+       } e;
+       /*
+        * On architectures with 64-bit aligned reads, both cpu_id and
+        * event_counter can be read with single-copy atomicity
+        * semantics.
+        */
+       uint64_t v;
+};
+
+/*
+ * struct rseq is aligned on 2 * 8 bytes to ensure it is always
+ * contained within a single cache-line.
+ */
+struct rseq {
+       union rseq_cpu_event u;
+       /*
+        * Restartable sequences rseq_cs field.
+        * Contains NULL when no critical section is active for the
+        * current thread, or holds a pointer to the currently active
+        * struct rseq_cs.
+        * Updated by user-space at the beginning and end of assembly
+        * instruction sequence block, and by the kernel when it
+        * restarts an assembly instruction sequence block. Read by the
+        * kernel with single-copy atomicity semantics. Aligned on
+        * 64-bit.
+        */
+       RSEQ_FIELD_u32_u64(rseq_cs);
+} __attribute__((aligned(2 * sizeof(uint64_t))));
+
+#endif /* _UAPI_LINUX_RSEQ_H */
diff --git a/libringbuffer/rseq-arm.h b/libringbuffer/rseq-arm.h
new file mode 100644 (file)
index 0000000..289abd4
--- /dev/null
@@ -0,0 +1,168 @@
+/*
+ * rseq-arm.h
+ *
+ * (C) Copyright 2016 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define smp_mb()       __asm__ __volatile__ ("dmb" : : : "memory")
+#define smp_rmb()      __asm__ __volatile__ ("dmb" : : : "memory")
+#define smp_wmb()      __asm__ __volatile__ ("dmb" : : : "memory")
+
+#define smp_load_acquire(p)                                            \
+__extension__ ({                                                       \
+       __typeof(*p) ____p1 = READ_ONCE(*p);                            \
+       smp_mb();                                                       \
+       ____p1;                                                         \
+})
+
+#define smp_acquire__after_ctrl_dep()  smp_rmb()
+
+#define smp_store_release(p, v)                                                \
+do {                                                                   \
+       smp_mb();                                                       \
+       WRITE_ONCE(*p, v);                                              \
+} while (0)
+
+#define has_fast_acquire_release()     0
+#define has_single_copy_load_64()      1
+
+/*
+ * The __rseq_table section can be used by debuggers to better handle
+ * single-stepping through the restartable critical sections.
+ *
+ * Load the immediate value 0 into register r1 right after the ldr
+ * instruction to improve instruction-level parallelism: load the
+ * constant while the processor is stalled waiting for the load to
+ * complete, which is required by the following comparison and branch.
+ */
+
+#define RSEQ_FINISH_ASM(_target_final, _to_write_final, _start_value, \
+               _failure, _spec_store, _spec_input, \
+               _final_store, _final_input, _extra_clobber, \
+               _setup, _teardown, _scratch) \
+do { \
+       _scratch \
+       __asm__ __volatile__ goto ( \
+               ".pushsection __rseq_table, \"aw\"\n\t" \
+               ".balign 32\n\t" \
+               ".word 1f, 0x0, 2f, 0x0, 5f, 0x0, 0x0, 0x0\n\t" \
+               ".popsection\n\t" \
+               "1:\n\t" \
+               _setup \
+               RSEQ_INJECT_ASM(1) \
+               "adr r0, 3f\n\t" \
+               "str r0, [%[rseq_cs]]\n\t" \
+               RSEQ_INJECT_ASM(2) \
+               "ldr r0, %[current_event_counter]\n\t" \
+               "mov r1, #0\n\t" \
+               "cmp %[start_event_counter], r0\n\t" \
+               "bne 5f\n\t" \
+               RSEQ_INJECT_ASM(3) \
+               _spec_store \
+               _final_store \
+               "2:\n\t" \
+               RSEQ_INJECT_ASM(5) \
+               "str r1, [%[rseq_cs]]\n\t" \
+               _teardown \
+               "b 4f\n\t" \
+               ".balign 32\n\t" \
+               "3:\n\t" \
+               ".word 1b, 0x0, 2b, 0x0, 5f, 0x0, 0x0, 0x0\n\t" \
+               "5:\n\t" \
+               "mov r1, #0\n\t" \
+               "str r1, [%[rseq_cs]]\n\t" \
+               _teardown \
+               "b %l[failure]\n\t" \
+               "4:\n\t" \
+               : /* no outputs */ \
+               : [start_event_counter]"r"((_start_value).event_counter), \
+                 [current_event_counter]"m"((_start_value).rseqp->u.e.event_counter), \
+                 [rseq_cs]"r"(&(_start_value).rseqp->rseq_cs) \
+                 _spec_input \
+                 _final_input \
+                 RSEQ_INJECT_INPUT \
+               : "r0", "r1", "memory", "cc" \
+                 _extra_clobber \
+                 RSEQ_INJECT_CLOBBER \
+               : _failure \
+       ); \
+} while (0)
+
+#define RSEQ_FINISH_FINAL_STORE_ASM() \
+               "str %[to_write_final], [%[target_final]]\n\t"
+
+#define RSEQ_FINISH_FINAL_STORE_RELEASE_ASM() \
+               "dmb\n\t" \
+               RSEQ_FINISH_FINAL_STORE_ASM()
+
+#define RSEQ_FINISH_FINAL_STORE_INPUT(_target_final, _to_write_final) \
+               , [to_write_final]"r"(_to_write_final), \
+               [target_final]"r"(_target_final)
+
+#define RSEQ_FINISH_SPECULATIVE_STORE_ASM() \
+               "str %[to_write_spec], [%[target_spec]]\n\t" \
+               RSEQ_INJECT_ASM(4)
+
+#define RSEQ_FINISH_SPECULATIVE_STORE_INPUT(_target_spec, _to_write_spec) \
+               , [to_write_spec]"r"(_to_write_spec), \
+               [target_spec]"r"(_target_spec)
+
+/* TODO: implement a faster memcpy. */
+#define RSEQ_FINISH_MEMCPY_STORE_ASM() \
+               "cmp %[len_memcpy], #0\n\t" \
+               "beq 333f\n\t" \
+               "222:\n\t" \
+               "ldrb %%r0, [%[to_write_memcpy]]\n\t" \
+               "strb %%r0, [%[target_memcpy]]\n\t" \
+               "adds %[to_write_memcpy], #1\n\t" \
+               "adds %[target_memcpy], #1\n\t" \
+               "subs %[len_memcpy], #1\n\t" \
+               "bne 222b\n\t" \
+               "333:\n\t" \
+               RSEQ_INJECT_ASM(4)
+
+#define RSEQ_FINISH_MEMCPY_STORE_INPUT(_target_memcpy, _to_write_memcpy, _len_memcpy) \
+               , [to_write_memcpy]"r"(_to_write_memcpy), \
+               [target_memcpy]"r"(_target_memcpy), \
+               [len_memcpy]"r"(_len_memcpy), \
+               [rseq_scratch0]"m"(rseq_scratch[0]), \
+               [rseq_scratch1]"m"(rseq_scratch[1]), \
+               [rseq_scratch2]"m"(rseq_scratch[2])
+
+/* We can use r0. */
+#define RSEQ_FINISH_MEMCPY_CLOBBER()
+
+#define RSEQ_FINISH_MEMCPY_SCRATCH() \
+               uint32_t rseq_scratch[3];
+
+/*
+ * We need to save and restore those input registers so they can be
+ * modified within the assembly.
+ */
+#define RSEQ_FINISH_MEMCPY_SETUP() \
+               "str %[to_write_memcpy], %[rseq_scratch0]\n\t" \
+               "str %[target_memcpy], %[rseq_scratch1]\n\t" \
+               "str %[len_memcpy], %[rseq_scratch2]\n\t"
+
+#define RSEQ_FINISH_MEMCPY_TEARDOWN() \
+               "ldr %[len_memcpy], %[rseq_scratch2]\n\t" \
+               "ldr %[target_memcpy], %[rseq_scratch1]\n\t" \
+               "ldr %[to_write_memcpy], %[rseq_scratch0]\n\t"
diff --git a/libringbuffer/rseq-ppc.h b/libringbuffer/rseq-ppc.h
new file mode 100644 (file)
index 0000000..8a76d07
--- /dev/null
@@ -0,0 +1,273 @@
+/*
+ * rseq-ppc.h
+ *
+ * (C) Copyright 2016 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ * (C) Copyright 2016 - Boqun Feng <boqun.feng@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define smp_mb()       __asm__ __volatile__ ("sync" : : : "memory")
+#define smp_lwsync()   __asm__ __volatile__ ("lwsync" : : : "memory")
+#define smp_rmb()      smp_lwsync()
+#define smp_wmb()      smp_lwsync()
+
+#define smp_load_acquire(p)                                            \
+__extension__ ({                                                       \
+       __typeof(*p) ____p1 = READ_ONCE(*p);                            \
+       smp_lwsync();                                                   \
+       ____p1;                                                         \
+})
+
+#define smp_acquire__after_ctrl_dep()  smp_lwsync()
+
+#define smp_store_release(p, v)                                                \
+do {                                                                   \
+       smp_lwsync();                                                   \
+       WRITE_ONCE(*p, v);                                              \
+} while (0)
+
+#define has_fast_acquire_release()     0
+
+#ifdef __PPC64__
+#define has_single_copy_load_64()      1
+#else
+#define has_single_copy_load_64()      0
+#endif
+
+/*
+ * The __rseq_table section can be used by debuggers to better handle
+ * single-stepping through the restartable critical sections.
+ */
+
+#ifdef __PPC64__
+
+#define RSEQ_FINISH_ASM(_target_final, _to_write_final, _start_value, \
+               _failure, _spec_store, _spec_input, \
+               _final_store, _final_input, _extra_clobber, \
+               _setup, _teardown, _scratch) \
+       __asm__ __volatile__ goto ( \
+               ".pushsection __rseq_table, \"aw\"\n\t" \
+               ".balign 32\n\t" \
+               "3:\n\t" \
+               ".quad 1f, 2f, 4f, 0x0\n\t" \
+               ".popsection\n\t" \
+               "1:\n\t" \
+               _setup \
+               RSEQ_INJECT_ASM(1) \
+               "lis %%r17, (3b)@highest\n\t" \
+               "ori %%r17, %%r17, (3b)@higher\n\t" \
+               "rldicr %%r17, %%r17, 32, 31\n\t" \
+               "oris %%r17, %%r17, (3b)@h\n\t" \
+               "ori %%r17, %%r17, (3b)@l\n\t" \
+               "std %%r17, 0(%[rseq_cs])\n\t" \
+               RSEQ_INJECT_ASM(2) \
+               "lwz %%r17, %[current_event_counter]\n\t" \
+               "cmpw cr7, %[start_event_counter], %%r17\n\t" \
+               "bne- cr7, 4f\n\t" \
+               RSEQ_INJECT_ASM(3) \
+               _spec_store \
+               _final_store \
+               "2:\n\t" \
+               RSEQ_INJECT_ASM(5) \
+               "li %%r17, 0\n\t" \
+               "std %%r17, 0(%[rseq_cs])\n\t" \
+               _teardown \
+               "b 5f\n\t" \
+               "4:\n\t" \
+               "li %%r17, 0\n\t" \
+               "std %%r17, 0(%[rseq_cs])\n\t" \
+               _teardown \
+               "b %l[failure]\n\t" \
+               "5:\n\t" \
+               : /* no outputs */ \
+               : [start_event_counter]"r"((_start_value).event_counter), \
+                 [current_event_counter]"m"((_start_value).rseqp->u.e.event_counter), \
+                 [rseq_cs]"b"(&(_start_value).rseqp->rseq_cs) \
+                 _spec_input \
+                 _final_input \
+                 RSEQ_INJECT_INPUT \
+               : "r17", "memory", "cc" \
+                 _extra_clobber \
+                 RSEQ_INJECT_CLOBBER \
+               : _failure \
+       )
+
+#define RSEQ_FINISH_FINAL_STORE_ASM() \
+               "std %[to_write_final], 0(%[target_final])\n\t"
+
+#define RSEQ_FINISH_FINAL_STORE_RELEASE_ASM() \
+               "lwsync\n\t" \
+               RSEQ_FINISH_FINAL_STORE_ASM()
+
+#define RSEQ_FINISH_FINAL_STORE_INPUT(_target_final, _to_write_final) \
+               , [to_write_final]"r"(_to_write_final), \
+               [target_final]"b"(_target_final)
+
+#define RSEQ_FINISH_SPECULATIVE_STORE_ASM() \
+               "std %[to_write_spec], 0(%[target_spec])\n\t" \
+               RSEQ_INJECT_ASM(4)
+
+#define RSEQ_FINISH_SPECULATIVE_STORE_INPUT(_target_spec, _to_write_spec) \
+               , [to_write_spec]"r"(_to_write_spec), \
+               [target_spec]"b"(_target_spec)
+
+/* TODO: implement a faster memcpy. */
+#define RSEQ_FINISH_MEMCPY_STORE_ASM() \
+               "cmpdi %%r19, 0\n\t" \
+               "beq 333f\n\t" \
+               "addi %%r20, %%r20, -1\n\t" \
+               "addi %%r21, %%r21, -1\n\t" \
+               "222:\n\t" \
+               "lbzu %%r18, 1(%%r20)\n\t" \
+               "stbu %%r18, 1(%%r21)\n\t" \
+               "addi %%r19, %%r19, -1\n\t" \
+               "cmpdi %%r19, 0\n\t" \
+               "bne 222b\n\t" \
+               "333:\n\t" \
+               RSEQ_INJECT_ASM(4)
+
+#define RSEQ_FINISH_MEMCPY_STORE_INPUT(_target_memcpy, _to_write_memcpy, _len_memcpy) \
+               , [to_write_memcpy]"r"(_to_write_memcpy), \
+               [target_memcpy]"r"(_target_memcpy), \
+               [len_memcpy]"r"(_len_memcpy)
+
+#define RSEQ_FINISH_MEMCPY_CLOBBER() \
+               , "r18", "r19", "r20", "r21"
+
+#define RSEQ_FINISH_MEMCPY_SCRATCH()
+
+/*
+ * We use extra registers to hold the input registers, and we don't need to
+ * save and restore the input registers.
+ */
+#define RSEQ_FINISH_MEMCPY_SETUP() \
+               "mr %%r19, %[len_memcpy]\n\t" \
+               "mr %%r20, %[to_write_memcpy]\n\t" \
+               "mr %%r21, %[target_memcpy]\n\t" \
+
+#define RSEQ_FINISH_MEMCPY_TEARDOWN()
+
+#else  /* #ifdef __PPC64__ */
+
+#define RSEQ_FINISH_ASM(_target_final, _to_write_final, _start_value, \
+               _failure, _spec_store, _spec_input, \
+               _final_store, _final_input, _extra_clobber, \
+               _setup, _teardown, _scratch) \
+       __asm__ __volatile__ goto ( \
+               ".pushsection __rseq_table, \"aw\"\n\t" \
+               ".balign 32\n\t" \
+               "3:\n\t" \
+               /* 32-bit only supported on BE */ \
+               ".long 0x0, 1f, 0x0, 2f, 0x0, 4f, 0x0, 0x0\n\t" \
+               ".popsection\n\t" \
+               "1:\n\t" \
+               _setup \
+               RSEQ_INJECT_ASM(1) \
+               "lis %%r17, (3b)@ha\n\t" \
+               "addi %%r17, %%r17, (3b)@l\n\t" \
+               "stw %%r17, 0(%[rseq_cs])\n\t" \
+               RSEQ_INJECT_ASM(2) \
+               "lwz %%r17, %[current_event_counter]\n\t" \
+               "cmpw cr7, %[start_event_counter], %%r17\n\t" \
+               "bne- cr7, 4f\n\t" \
+               RSEQ_INJECT_ASM(3) \
+               _spec_store \
+               _final_store \
+               "2:\n\t" \
+               RSEQ_INJECT_ASM(5) \
+               "li %%r17, 0\n\t" \
+               "stw %%r17, 0(%[rseq_cs])\n\t" \
+               _teardown \
+               "b 5f\n\t" \
+               "4:\n\t" \
+               "li %%r17, 0\n\t" \
+               "std %%r17, 0(%[rseq_cs])\n\t" \
+               _teardown \
+               "b %l[failure]\n\t" \
+               "5:\n\t" \
+               : /* no outputs */ \
+               : [start_event_counter]"r"((_start_value).event_counter), \
+                 [current_event_counter]"m"((_start_value).rseqp->u.e.event_counter), \
+                 [rseq_cs]"b"(&(_start_value).rseqp->rseq_cs) \
+                 _spec_input \
+                 _final_input \
+                 RSEQ_INJECT_INPUT \
+               : "r17", "memory", "cc" \
+                 _extra_clobber \
+                 RSEQ_INJECT_CLOBBER \
+               : _failure \
+       )
+
+#define RSEQ_FINISH_FINAL_STORE_ASM() \
+               "stw %[to_write_final], 0(%[target_final])\n\t"
+
+#define RSEQ_FINISH_FINAL_STORE_RELEASE_ASM() \
+               "lwsync\n\t" \
+               RSEQ_FINISH_FINAL_STORE_ASM()
+
+#define RSEQ_FINISH_FINAL_STORE_INPUT(_target_final, _to_write_final) \
+               , [to_write_final]"r"(_to_write_final), \
+               [target_final]"b"(_target_final)
+
+#define RSEQ_FINISH_SPECULATIVE_STORE_ASM() \
+               "stw %[to_write_spec], 0(%[target_spec])\n\t" \
+               RSEQ_INJECT_ASM(4)
+
+#define RSEQ_FINISH_SPECULATIVE_STORE_INPUT(_target_spec, _to_write_spec) \
+               , [to_write_spec]"r"(_to_write_spec), \
+               [target_spec]"b"(_target_spec)
+
+/* TODO: implement a faster memcpy. */
+#define RSEQ_FINISH_MEMCPY_STORE_ASM() \
+               "cmpwi %%r19, 0\n\t" \
+               "beq 333f\n\t" \
+               "addi %%r20, %%r20, -1\n\t" \
+               "addi %%r21, %%r21, -1\n\t" \
+               "222:\n\t" \
+               "lbzu %%r18, 1(%%r20)\n\t" \
+               "stbu %%r18, 1(%%r21)\n\t" \
+               "addi %%r19, %%r19, -1\n\t" \
+               "cmpwi %%r19, 0\n\t" \
+               "bne 222b\n\t" \
+               "333:\n\t" \
+               RSEQ_INJECT_ASM(4)
+
+#define RSEQ_FINISH_MEMCPY_STORE_INPUT(_target_memcpy, _to_write_memcpy, _len_memcpy) \
+               , [to_write_memcpy]"r"(_to_write_memcpy), \
+               [target_memcpy]"r"(_target_memcpy), \
+               [len_memcpy]"r"(_len_memcpy)
+
+#define RSEQ_FINISH_MEMCPY_CLOBBER() \
+               , "r18", "r19", "r20", "r21"
+
+#define RSEQ_FINISH_MEMCPY_SCRATCH()
+
+/*
+ * We use extra registers to hold the input registers, and we don't need to
+ * save and restore the input registers.
+ */
+#define RSEQ_FINISH_MEMCPY_SETUP() \
+               "mr %%r19, %[len_memcpy]\n\t" \
+               "mr %%r20, %[to_write_memcpy]\n\t" \
+               "mr %%r21, %[target_memcpy]\n\t" \
+
+#define RSEQ_FINISH_MEMCPY_TEARDOWN()
+
+#endif /* #else #ifdef __PPC64__ */
diff --git a/libringbuffer/rseq-x86.h b/libringbuffer/rseq-x86.h
new file mode 100644 (file)
index 0000000..7154bfa
--- /dev/null
@@ -0,0 +1,306 @@
+/*
+ * rseq-x86.h
+ *
+ * (C) Copyright 2016 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifdef __x86_64__
+
+#define smp_mb()       __asm__ __volatile__ ("mfence" : : : "memory")
+#define smp_rmb()      barrier()
+#define smp_wmb()      barrier()
+
+#define smp_load_acquire(p)                                            \
+__extension__ ({                                                       \
+       __typeof(*p) ____p1 = READ_ONCE(*p);                            \
+       barrier();                                                      \
+       ____p1;                                                         \
+})
+
+#define smp_acquire__after_ctrl_dep()  smp_rmb()
+
+#define smp_store_release(p, v)                                                \
+do {                                                                   \
+       barrier();                                                      \
+       WRITE_ONCE(*p, v);                                              \
+} while (0)
+
+#define has_fast_acquire_release()     1
+#define has_single_copy_load_64()      1
+
+/*
+ * The __rseq_table section can be used by debuggers to better handle
+ * single-stepping through the restartable critical sections.
+ */
+#define RSEQ_FINISH_ASM(_target_final, _to_write_final, _start_value, \
+               _failure, _spec_store, _spec_input, \
+               _final_store, _final_input, _extra_clobber, \
+               _setup, _teardown, _scratch) \
+do { \
+       _scratch \
+       __asm__ __volatile__ goto ( \
+               ".pushsection __rseq_table, \"aw\"\n\t" \
+               ".balign 32\n\t" \
+               "3:\n\t" \
+               ".quad 1f, 2f, 4f, 0x0\n\t" \
+               ".popsection\n\t" \
+               "1:\n\t" \
+               _setup \
+               RSEQ_INJECT_ASM(1) \
+               "movq $3b, %[rseq_cs]\n\t" \
+               RSEQ_INJECT_ASM(2) \
+               "cmpl %[start_event_counter], %[current_event_counter]\n\t" \
+               "jnz 4f\n\t" \
+               RSEQ_INJECT_ASM(3) \
+               _spec_store \
+               _final_store \
+               "2:\n\t" \
+               RSEQ_INJECT_ASM(5) \
+               "movq $0, %[rseq_cs]\n\t" \
+               _teardown \
+               ".pushsection __rseq_failure, \"a\"\n\t" \
+               "4:\n\t" \
+               "movq $0, %[rseq_cs]\n\t" \
+               _teardown \
+               "jmp %l[failure]\n\t" \
+               ".popsection\n\t" \
+               : /* no outputs */ \
+               : [start_event_counter]"r"((_start_value).event_counter), \
+                 [current_event_counter]"m"((_start_value).rseqp->u.e.event_counter), \
+                 [rseq_cs]"m"((_start_value).rseqp->rseq_cs) \
+                 _spec_input \
+                 _final_input \
+                 RSEQ_INJECT_INPUT \
+               : "memory", "cc" \
+                 _extra_clobber \
+                 RSEQ_INJECT_CLOBBER \
+               : _failure \
+       ); \
+} while (0)
+
+#define RSEQ_FINISH_FINAL_STORE_ASM() \
+               "movq %[to_write_final], %[target_final]\n\t"
+
+/* x86-64 is TSO */
+#define RSEQ_FINISH_FINAL_STORE_RELEASE_ASM() \
+               RSEQ_FINISH_FINAL_STORE_ASM()
+
+#define RSEQ_FINISH_FINAL_STORE_INPUT(_target_final, _to_write_final) \
+               , [to_write_final]"r"(_to_write_final), \
+               [target_final]"m"(*(_target_final))
+
+#define RSEQ_FINISH_SPECULATIVE_STORE_ASM() \
+               "movq %[to_write_spec], %[target_spec]\n\t" \
+               RSEQ_INJECT_ASM(4)
+
+#define RSEQ_FINISH_SPECULATIVE_STORE_INPUT(_target_spec, _to_write_spec) \
+               , [to_write_spec]"r"(_to_write_spec), \
+               [target_spec]"m"(*(_target_spec))
+
+/* TODO: implement a faster memcpy. */
+#define RSEQ_FINISH_MEMCPY_STORE_ASM() \
+               "test %[len_memcpy], %[len_memcpy]\n\t" \
+               "jz 333f\n\t" \
+               "222:\n\t" \
+               "movb (%[to_write_memcpy]), %%al\n\t" \
+               "movb %%al, (%[target_memcpy])\n\t" \
+               "inc %[to_write_memcpy]\n\t" \
+               "inc %[target_memcpy]\n\t" \
+               "dec %[len_memcpy]\n\t" \
+               "jnz 222b\n\t" \
+               "333:\n\t" \
+               RSEQ_INJECT_ASM(4)
+
+#define RSEQ_FINISH_MEMCPY_STORE_INPUT(_target_memcpy, _to_write_memcpy, _len_memcpy) \
+               , [to_write_memcpy]"r"(_to_write_memcpy), \
+               [target_memcpy]"r"(_target_memcpy), \
+               [len_memcpy]"r"(_len_memcpy), \
+               [rseq_scratch0]"m"(rseq_scratch[0]), \
+               [rseq_scratch1]"m"(rseq_scratch[1]), \
+               [rseq_scratch2]"m"(rseq_scratch[2])
+
+#define RSEQ_FINISH_MEMCPY_CLOBBER()   \
+               , "rax"
+
+#define RSEQ_FINISH_MEMCPY_SCRATCH() \
+               uint64_t rseq_scratch[3];
+
+/*
+ * We need to save and restore those input registers so they can be
+ * modified within the assembly.
+ */
+#define RSEQ_FINISH_MEMCPY_SETUP() \
+               "movq %[to_write_memcpy], %[rseq_scratch0]\n\t" \
+               "movq %[target_memcpy], %[rseq_scratch1]\n\t" \
+               "movq %[len_memcpy], %[rseq_scratch2]\n\t"
+
+#define RSEQ_FINISH_MEMCPY_TEARDOWN() \
+               "movq %[rseq_scratch2], %[len_memcpy]\n\t" \
+               "movq %[rseq_scratch1], %[target_memcpy]\n\t" \
+               "movq %[rseq_scratch0], %[to_write_memcpy]\n\t"
+
+#elif __i386__
+
+/*
+ * Support older 32-bit architectures that do not implement fence
+ * instructions.
+ */
+#define smp_mb()       \
+       __asm__ __volatile__ ("lock; addl $0,0(%%esp)" : : : "memory")
+#define smp_rmb()      \
+       __asm__ __volatile__ ("lock; addl $0,0(%%esp)" : : : "memory")
+#define smp_wmb()      \
+       __asm__ __volatile__ ("lock; addl $0,0(%%esp)" : : : "memory")
+
+#define smp_load_acquire(p)                                            \
+__extension__ ({                                                       \
+       __typeof(*p) ____p1 = READ_ONCE(*p);                            \
+       smp_mb();                                                       \
+       ____p1;                                                         \
+})
+
+#define smp_acquire__after_ctrl_dep()  smp_rmb()
+
+#define smp_store_release(p, v)                                                \
+do {                                                                   \
+       smp_mb();                                                       \
+       WRITE_ONCE(*p, v);                                              \
+} while (0)
+
+#define has_fast_acquire_release()     0
+#define has_single_copy_load_64()      0
+
+/*
+ * Use eax as scratch register and take memory operands as input to
+ * lessen register pressure. Especially needed when compiling
+ * do_rseq_memcpy() in O0.
+ */
+#define RSEQ_FINISH_ASM(_target_final, _to_write_final, _start_value, \
+               _failure, _spec_store, _spec_input, \
+               _final_store, _final_input, _extra_clobber, \
+               _setup, _teardown, _scratch) \
+do { \
+       _scratch \
+       __asm__ __volatile__ goto ( \
+               ".pushsection __rseq_table, \"aw\"\n\t" \
+               ".balign 32\n\t" \
+               "3:\n\t" \
+               ".long 1f, 0x0, 2f, 0x0, 4f, 0x0, 0x0, 0x0\n\t" \
+               ".popsection\n\t" \
+               "1:\n\t" \
+               _setup \
+               RSEQ_INJECT_ASM(1) \
+               "movl $3b, %[rseq_cs]\n\t" \
+               RSEQ_INJECT_ASM(2) \
+               "movl %[start_event_counter], %%eax\n\t" \
+               "cmpl %%eax, %[current_event_counter]\n\t" \
+               "jnz 4f\n\t" \
+               RSEQ_INJECT_ASM(3) \
+               _spec_store \
+               _final_store \
+               "2:\n\t" \
+               RSEQ_INJECT_ASM(5) \
+               "movl $0, %[rseq_cs]\n\t" \
+               _teardown \
+               ".pushsection __rseq_failure, \"a\"\n\t" \
+               "4:\n\t" \
+               "movl $0, %[rseq_cs]\n\t" \
+               _teardown \
+               "jmp %l[failure]\n\t" \
+               ".popsection\n\t" \
+               : /* no outputs */ \
+               : [start_event_counter]"m"((_start_value).event_counter), \
+                 [current_event_counter]"m"((_start_value).rseqp->u.e.event_counter), \
+                 [rseq_cs]"m"((_start_value).rseqp->rseq_cs) \
+                 _spec_input \
+                 _final_input \
+                 RSEQ_INJECT_INPUT \
+               : "memory", "cc", "eax" \
+                 _extra_clobber \
+                 RSEQ_INJECT_CLOBBER \
+               : _failure \
+       ); \
+} while (0)
+
+#define RSEQ_FINISH_FINAL_STORE_ASM() \
+               "movl %[to_write_final], %%eax\n\t" \
+               "movl %%eax, %[target_final]\n\t"
+
+#define RSEQ_FINISH_FINAL_STORE_RELEASE_ASM() \
+               "lock; addl $0,0(%%esp)\n\t" \
+               RSEQ_FINISH_FINAL_STORE_ASM()
+
+#define RSEQ_FINISH_FINAL_STORE_INPUT(_target_final, _to_write_final) \
+               , [to_write_final]"m"(_to_write_final), \
+               [target_final]"m"(*(_target_final))
+
+#define RSEQ_FINISH_SPECULATIVE_STORE_ASM() \
+               "movl %[to_write_spec], %%eax\n\t" \
+               "movl %%eax, %[target_spec]\n\t" \
+               RSEQ_INJECT_ASM(4)
+
+#define RSEQ_FINISH_SPECULATIVE_STORE_INPUT(_target_spec, _to_write_spec) \
+               , [to_write_spec]"m"(_to_write_spec), \
+               [target_spec]"m"(*(_target_spec))
+
+/* TODO: implement a faster memcpy. */
+#define RSEQ_FINISH_MEMCPY_STORE_ASM() \
+               "movl %[len_memcpy], %%eax\n\t" \
+               "test %%eax, %%eax\n\t" \
+               "jz 333f\n\t" \
+               "222:\n\t" \
+               "movb (%[to_write_memcpy]), %%al\n\t" \
+               "movb %%al, (%[target_memcpy])\n\t" \
+               "inc %[to_write_memcpy]\n\t" \
+               "inc %[target_memcpy]\n\t" \
+               "decl %[rseq_scratch2]\n\t" \
+               "jnz 222b\n\t" \
+               "333:\n\t" \
+               RSEQ_INJECT_ASM(4)
+
+#define RSEQ_FINISH_MEMCPY_STORE_INPUT(_target_memcpy, _to_write_memcpy, _len_memcpy) \
+               , [to_write_memcpy]"r"(_to_write_memcpy), \
+               [target_memcpy]"r"(_target_memcpy), \
+               [len_memcpy]"m"(_len_memcpy), \
+               [rseq_scratch0]"m"(rseq_scratch[0]), \
+               [rseq_scratch1]"m"(rseq_scratch[1]), \
+               [rseq_scratch2]"m"(rseq_scratch[2])
+
+#define RSEQ_FINISH_MEMCPY_CLOBBER()
+
+#define RSEQ_FINISH_MEMCPY_SCRATCH() \
+               uint32_t rseq_scratch[3];
+
+/*
+ * We need to save and restore those input registers so they can be
+ * modified within the assembly.
+ */
+#define RSEQ_FINISH_MEMCPY_SETUP() \
+               "movl %[to_write_memcpy], %[rseq_scratch0]\n\t" \
+               "movl %[target_memcpy], %[rseq_scratch1]\n\t" \
+               "movl %[len_memcpy], %%eax\n\t" \
+               "movl %%eax, %[rseq_scratch2]\n\t"
+
+#define RSEQ_FINISH_MEMCPY_TEARDOWN() \
+               "movl %[rseq_scratch1], %[target_memcpy]\n\t" \
+               "movl %[rseq_scratch0], %[to_write_memcpy]\n\t"
+
+#endif
diff --git a/libringbuffer/rseq.c b/libringbuffer/rseq.c
new file mode 100644 (file)
index 0000000..c8193a3
--- /dev/null
@@ -0,0 +1,247 @@
+/*
+ * rseq.c
+ *
+ * Copyright (C) 2016 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; only
+ * version 2.1 of the License.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <syscall.h>
+#include <assert.h>
+#include <signal.h>
+#include <linux/membarrier.h>
+
+#include <rseq.h>
+
+#ifdef __NR_membarrier
+# define membarrier(...)               syscall(__NR_membarrier, __VA_ARGS__)
+#else
+# define membarrier(...)               -ENOSYS
+#endif
+
+struct rseq_thread_state {
+       uint32_t fallback_wait_cnt;
+       uint32_t fallback_cnt;
+       sigset_t sigmask_saved;
+};
+
+__attribute__((weak)) __thread volatile struct rseq __rseq_abi = {
+       .u.e.cpu_id = -1,
+};
+
+static __thread volatile struct rseq_thread_state rseq_thread_state;
+
+int rseq_has_sys_membarrier;
+
+static int sys_rseq(volatile struct rseq *rseq_abi, int flags)
+{
+       return syscall(__NR_rseq, rseq_abi, flags);
+}
+
+int rseq_register_current_thread(void)
+{
+       int rc;
+
+       rc = sys_rseq(&__rseq_abi, 0);
+       if (rc) {
+               fprintf(stderr, "Error: sys_rseq(...) failed(%d): %s\n",
+                       errno, strerror(errno));
+               return -1;
+       }
+       assert(rseq_current_cpu() >= 0);
+       return 0;
+}
+
+int rseq_unregister_current_thread(void)
+{
+       int rc;
+
+       rc = sys_rseq(NULL, 0);
+       if (rc) {
+               fprintf(stderr, "Error: sys_rseq(...) failed(%d): %s\n",
+                       errno, strerror(errno));
+               return -1;
+       }
+       return 0;
+}
+
+int rseq_init_lock(struct rseq_lock *rlock)
+{
+       int ret;
+
+       ret = pthread_mutex_init(&rlock->lock, NULL);
+       if (ret) {
+               errno = ret;
+               return -1;
+       }
+       rlock->state = RSEQ_LOCK_STATE_RESTART;
+       return 0;
+}
+
+int rseq_destroy_lock(struct rseq_lock *rlock)
+{
+       int ret;
+
+       ret = pthread_mutex_destroy(&rlock->lock);
+       if (ret) {
+               errno = ret;
+               return -1;
+       }
+       return 0;
+}
+
+static void signal_off_save(sigset_t *oldset)
+{
+       sigset_t set;
+       int ret;
+
+       sigfillset(&set);
+       ret = pthread_sigmask(SIG_BLOCK, &set, oldset);
+       if (ret)
+               abort();
+}
+
+static void signal_restore(sigset_t oldset)
+{
+       int ret;
+
+       ret = pthread_sigmask(SIG_SETMASK, &oldset, NULL);
+       if (ret)
+               abort();
+}
+
+static void rseq_fallback_lock(struct rseq_lock *rlock)
+{
+       signal_off_save((sigset_t *)&rseq_thread_state.sigmask_saved);
+       pthread_mutex_lock(&rlock->lock);
+       rseq_thread_state.fallback_cnt++;
+       /*
+        * For concurrent threads arriving before we set LOCK:
+        * reading cpu_id after setting the state to LOCK
+        * ensures they restart.
+        */
+       ACCESS_ONCE(rlock->state) = RSEQ_LOCK_STATE_LOCK;
+       /*
+        * For concurrent threads arriving after we set LOCK:
+        * those will grab the lock, so we are protected by
+        * mutual exclusion.
+        */
+}
+
+void rseq_fallback_wait(struct rseq_lock *rlock)
+{
+       signal_off_save((sigset_t *)&rseq_thread_state.sigmask_saved);
+       pthread_mutex_lock(&rlock->lock);
+       rseq_thread_state.fallback_wait_cnt++;
+       pthread_mutex_unlock(&rlock->lock);
+       signal_restore(rseq_thread_state.sigmask_saved);
+}
+
+static void rseq_fallback_unlock(struct rseq_lock *rlock, int cpu_at_start)
+{
+       /*
+        * Concurrent rseq arriving before we set state back to RESTART
+        * grab the lock. Those arriving after we set state back to
+        * RESTART will perform restartable critical sections. The next
+        * owner of the lock will take take of making sure it prevents
+        * concurrent restartable sequences from completing.  We may be
+        * writing from another CPU, so update the state with a store
+        * release semantic to ensure restartable sections will see our
+        * side effect (writing to *p) before they enter their
+        * restartable critical section.
+        *
+        * In cases where we observe that we are on the right CPU after the
+        * critical section, program order ensures that following restartable
+        * critical sections will see our stores, so we don't have to use
+        * store-release or membarrier.
+        *
+        * Use sys_membarrier when available to remove the memory barrier
+        * implied by smp_load_acquire().
+        */
+       barrier();
+       if (likely(rseq_current_cpu() == cpu_at_start)) {
+               ACCESS_ONCE(rlock->state) = RSEQ_LOCK_STATE_RESTART;
+       } else {
+               if (!has_fast_acquire_release() && rseq_has_sys_membarrier) {
+                       if (membarrier(MEMBARRIER_CMD_SHARED, 0))
+                               abort();
+                       ACCESS_ONCE(rlock->state) = RSEQ_LOCK_STATE_RESTART;
+               } else {
+                       /*
+                        * Store with release semantic to ensure
+                        * restartable sections will see our side effect
+                        * (writing to *p) before they enter their
+                        * restartable critical section. Matches
+                        * smp_load_acquire() in rseq_start().
+                        */
+                       smp_store_release(&rlock->state,
+                               RSEQ_LOCK_STATE_RESTART);
+               }
+       }
+       pthread_mutex_unlock(&rlock->lock);
+       signal_restore(rseq_thread_state.sigmask_saved);
+}
+
+int rseq_fallback_current_cpu(void)
+{
+       int cpu;
+
+       cpu = sched_getcpu();
+       if (cpu < 0) {
+               perror("sched_getcpu()");
+               abort();
+       }
+       return cpu;
+}
+
+int rseq_fallback_begin(struct rseq_lock *rlock)
+{
+       rseq_fallback_lock(rlock);
+       return rseq_fallback_current_cpu();
+}
+
+void rseq_fallback_end(struct rseq_lock *rlock, int cpu)
+{
+       rseq_fallback_unlock(rlock, cpu);
+}
+
+/* Handle non-initialized rseq for this thread. */
+void rseq_fallback_noinit(struct rseq_state *rseq_state)
+{
+       rseq_state->lock_state = RSEQ_LOCK_STATE_FAIL;
+       rseq_state->cpu_id = 0;
+}
+
+uint32_t rseq_get_fallback_wait_cnt(void)
+{
+       return rseq_thread_state.fallback_wait_cnt;
+}
+
+uint32_t rseq_get_fallback_cnt(void)
+{
+       return rseq_thread_state.fallback_cnt;
+}
+
+void __attribute__((constructor)) rseq_init(void)
+{
+       int ret;
+
+       ret = membarrier(MEMBARRIER_CMD_QUERY, 0);
+       if (ret >= 0 && (ret & MEMBARRIER_CMD_SHARED))
+               rseq_has_sys_membarrier = 1;
+}
diff --git a/libringbuffer/rseq.h b/libringbuffer/rseq.h
new file mode 100644 (file)
index 0000000..e76a994
--- /dev/null
@@ -0,0 +1,477 @@
+/*
+ * rseq.h
+ *
+ * (C) Copyright 2016 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef RSEQ_H
+#define RSEQ_H
+
+#include <stdint.h>
+#include <stdbool.h>
+#include <pthread.h>
+#include <signal.h>
+#include <sched.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sched.h>
+#include "linux-rseq-abi.h"
+
+/*
+ * Empty code injection macros, override when testing.
+ * It is important to consider that the ASM injection macros need to be
+ * fully reentrant (e.g. do not modify the stack).
+ */
+#ifndef RSEQ_INJECT_ASM
+#define RSEQ_INJECT_ASM(n)
+#endif
+
+#ifndef RSEQ_INJECT_C
+#define RSEQ_INJECT_C(n)
+#endif
+
+#ifndef RSEQ_INJECT_INPUT
+#define RSEQ_INJECT_INPUT
+#endif
+
+#ifndef RSEQ_INJECT_CLOBBER
+#define RSEQ_INJECT_CLOBBER
+#endif
+
+#ifndef RSEQ_INJECT_FAILED
+#define RSEQ_INJECT_FAILED
+#endif
+
+#ifndef RSEQ_FALLBACK_CNT
+#define RSEQ_FALLBACK_CNT      3
+#endif
+
+uint32_t rseq_get_fallback_wait_cnt(void);
+uint32_t rseq_get_fallback_cnt(void);
+
+extern __thread volatile struct rseq __rseq_abi;
+extern int rseq_has_sys_membarrier;
+
+#define likely(x)              __builtin_expect(!!(x), 1)
+#define unlikely(x)            __builtin_expect(!!(x), 0)
+#define barrier()              __asm__ __volatile__("" : : : "memory")
+
+#define ACCESS_ONCE(x)         (*(__volatile__  __typeof__(x) *)&(x))
+#define WRITE_ONCE(x, v)       __extension__ ({ ACCESS_ONCE(x) = (v); })
+#define READ_ONCE(x)           ACCESS_ONCE(x)
+
+#if defined(__x86_64__) || defined(__i386__)
+#include <rseq-x86.h>
+#elif defined(__ARMEL__)
+#include <rseq-arm.h>
+#elif defined(__PPC__)
+#include <rseq-ppc.h>
+#else
+#error unsupported target
+#endif
+
+enum rseq_lock_state {
+       RSEQ_LOCK_STATE_RESTART = 0,
+       RSEQ_LOCK_STATE_LOCK = 1,
+       RSEQ_LOCK_STATE_FAIL = 2,
+};
+
+struct rseq_lock {
+       pthread_mutex_t lock;
+       int32_t state;          /* enum rseq_lock_state */
+};
+
+/* State returned by rseq_start, passed as argument to rseq_finish. */
+struct rseq_state {
+       volatile struct rseq *rseqp;
+       int32_t cpu_id;         /* cpu_id at start. */
+       uint32_t event_counter; /* event_counter at start. */
+       int32_t lock_state;     /* Lock state at start. */
+};
+
+/*
+ * Register rseq for the current thread. This needs to be called once
+ * by any thread which uses restartable sequences, before they start
+ * using restartable sequences. If initialization is not invoked, or if
+ * it fails, the restartable critical sections will fall-back on locking
+ * (rseq_lock).
+ */
+int rseq_register_current_thread(void);
+
+/*
+ * Unregister rseq for current thread.
+ */
+int rseq_unregister_current_thread(void);
+
+/*
+ * The fallback lock should be initialized before being used by any
+ * thread, and destroyed after all threads are done using it. This lock
+ * should be used by all rseq calls associated with shared data, either
+ * between threads, or between processes in a shared memory.
+ *
+ * There may be many rseq_lock per process, e.g. one per protected data
+ * structure.
+ */
+int rseq_init_lock(struct rseq_lock *rlock);
+int rseq_destroy_lock(struct rseq_lock *rlock);
+
+/*
+ * Restartable sequence fallback prototypes. Fallback on locking when
+ * rseq is not initialized, not available on the system, or during
+ * single-stepping to ensure forward progress.
+ */
+int rseq_fallback_begin(struct rseq_lock *rlock);
+void rseq_fallback_end(struct rseq_lock *rlock, int cpu);
+void rseq_fallback_wait(struct rseq_lock *rlock);
+void rseq_fallback_noinit(struct rseq_state *rseq_state);
+
+/*
+ * Restartable sequence fallback for reading the current CPU number.
+ */
+int rseq_fallback_current_cpu(void);
+
+static inline int32_t rseq_cpu_at_start(struct rseq_state start_value)
+{
+       return start_value.cpu_id;
+}
+
+static inline int32_t rseq_current_cpu_raw(void)
+{
+       return ACCESS_ONCE(__rseq_abi.u.e.cpu_id);
+}
+
+static inline int32_t rseq_current_cpu(void)
+{
+       int32_t cpu;
+
+       cpu = rseq_current_cpu_raw();
+       if (unlikely(cpu < 0))
+               cpu = rseq_fallback_current_cpu();
+       return cpu;
+}
+
+static inline __attribute__((always_inline))
+struct rseq_state rseq_start(struct rseq_lock *rlock)
+{
+       struct rseq_state result;
+
+       result.rseqp = &__rseq_abi;
+       if (has_single_copy_load_64()) {
+               union rseq_cpu_event u;
+
+               u.v = ACCESS_ONCE(result.rseqp->u.v);
+               result.event_counter = u.e.event_counter;
+               result.cpu_id = u.e.cpu_id;
+       } else {
+               result.event_counter =
+                       ACCESS_ONCE(result.rseqp->u.e.event_counter);
+               /* load event_counter before cpu_id. */
+               RSEQ_INJECT_C(6)
+               result.cpu_id = ACCESS_ONCE(result.rseqp->u.e.cpu_id);
+       }
+       /*
+        * Read event counter before lock state and cpu_id. This ensures
+        * that when the state changes from RESTART to LOCK, if we have
+        * some threads that have already seen the RESTART still in
+        * flight, they will necessarily be preempted/signalled before a
+        * thread can see the LOCK state for that same CPU. That
+        * preemption/signalling will cause them to restart, so they
+        * don't interfere with the lock.
+        */
+       RSEQ_INJECT_C(7)
+
+       if (!has_fast_acquire_release() && likely(rseq_has_sys_membarrier)) {
+               result.lock_state = ACCESS_ONCE(rlock->state);
+               barrier();
+       } else {
+               /*
+                * Load lock state with acquire semantic. Matches
+                * smp_store_release() in rseq_fallback_end().
+                */
+               result.lock_state = smp_load_acquire(&rlock->state);
+       }
+       if (unlikely(result.cpu_id < 0))
+               rseq_fallback_noinit(&result);
+       /*
+        * Ensure the compiler does not re-order loads of protected
+        * values before we load the event counter.
+        */
+       barrier();
+       return result;
+}
+
+enum rseq_finish_type {
+       RSEQ_FINISH_SINGLE,
+       RSEQ_FINISH_TWO,
+       RSEQ_FINISH_MEMCPY,
+};
+
+/*
+ * p_spec and to_write_spec are used for a speculative write attempted
+ * near the end of the restartable sequence. A rseq_finish2 may fail
+ * even after this write takes place.
+ *
+ * p_final and to_write_final are used for the final write. If this
+ * write takes place, the rseq_finish2 is guaranteed to succeed.
+ */
+static inline __attribute__((always_inline))
+bool __rseq_finish(struct rseq_lock *rlock,
+               intptr_t *p_spec, intptr_t to_write_spec,
+               void *p_memcpy, void *to_write_memcpy, size_t len_memcpy,
+               intptr_t *p_final, intptr_t to_write_final,
+               struct rseq_state start_value,
+               enum rseq_finish_type type, bool release)
+{
+       RSEQ_INJECT_C(9)
+
+       if (unlikely(start_value.lock_state != RSEQ_LOCK_STATE_RESTART)) {
+               if (start_value.lock_state == RSEQ_LOCK_STATE_LOCK)
+                       rseq_fallback_wait(rlock);
+               return false;
+       }
+       switch (type) {
+       case RSEQ_FINISH_SINGLE:
+               RSEQ_FINISH_ASM(p_final, to_write_final, start_value, failure,
+                       /* no speculative write */, /* no speculative write */,
+                       RSEQ_FINISH_FINAL_STORE_ASM(),
+                       RSEQ_FINISH_FINAL_STORE_INPUT(p_final, to_write_final),
+                       /* no extra clobber */, /* no arg */, /* no arg */,
+                       /* no arg */
+               );
+               break;
+       case RSEQ_FINISH_TWO:
+               if (release) {
+                       RSEQ_FINISH_ASM(p_final, to_write_final, start_value, failure,
+                               RSEQ_FINISH_SPECULATIVE_STORE_ASM(),
+                               RSEQ_FINISH_SPECULATIVE_STORE_INPUT(p_spec, to_write_spec),
+                               RSEQ_FINISH_FINAL_STORE_RELEASE_ASM(),
+                               RSEQ_FINISH_FINAL_STORE_INPUT(p_final, to_write_final),
+                               /* no extra clobber */, /* no arg */, /* no arg */,
+                               /* no arg */
+                       );
+               } else {
+                       RSEQ_FINISH_ASM(p_final, to_write_final, start_value, failure,
+                               RSEQ_FINISH_SPECULATIVE_STORE_ASM(),
+                               RSEQ_FINISH_SPECULATIVE_STORE_INPUT(p_spec, to_write_spec),
+                               RSEQ_FINISH_FINAL_STORE_ASM(),
+                               RSEQ_FINISH_FINAL_STORE_INPUT(p_final, to_write_final),
+                               /* no extra clobber */, /* no arg */, /* no arg */,
+                               /* no arg */
+                       );
+               }
+               break;
+       case RSEQ_FINISH_MEMCPY:
+               if (release) {
+                       RSEQ_FINISH_ASM(p_final, to_write_final, start_value, failure,
+                               RSEQ_FINISH_MEMCPY_STORE_ASM(),
+                               RSEQ_FINISH_MEMCPY_STORE_INPUT(p_memcpy, to_write_memcpy, len_memcpy),
+                               RSEQ_FINISH_FINAL_STORE_RELEASE_ASM(),
+                               RSEQ_FINISH_FINAL_STORE_INPUT(p_final, to_write_final),
+                               RSEQ_FINISH_MEMCPY_CLOBBER(),
+                               RSEQ_FINISH_MEMCPY_SETUP(),
+                               RSEQ_FINISH_MEMCPY_TEARDOWN(),
+                               RSEQ_FINISH_MEMCPY_SCRATCH()
+                       );
+               } else {
+                       RSEQ_FINISH_ASM(p_final, to_write_final, start_value, failure,
+                               RSEQ_FINISH_MEMCPY_STORE_ASM(),
+                               RSEQ_FINISH_MEMCPY_STORE_INPUT(p_memcpy, to_write_memcpy, len_memcpy),
+                               RSEQ_FINISH_FINAL_STORE_ASM(),
+                               RSEQ_FINISH_FINAL_STORE_INPUT(p_final, to_write_final),
+                               RSEQ_FINISH_MEMCPY_CLOBBER(),
+                               RSEQ_FINISH_MEMCPY_SETUP(),
+                               RSEQ_FINISH_MEMCPY_TEARDOWN(),
+                               RSEQ_FINISH_MEMCPY_SCRATCH()
+                       );
+               }
+               break;
+       }
+       return true;
+failure:
+       RSEQ_INJECT_FAILED
+       return false;
+}
+
+static inline __attribute__((always_inline))
+bool rseq_finish(struct rseq_lock *rlock,
+               intptr_t *p, intptr_t to_write,
+               struct rseq_state start_value)
+{
+       return __rseq_finish(rlock, NULL, 0,
+                       NULL, NULL, 0,
+                       p, to_write, start_value,
+                       RSEQ_FINISH_SINGLE, false);
+}
+
+static inline __attribute__((always_inline))
+bool rseq_finish2(struct rseq_lock *rlock,
+               intptr_t *p_spec, intptr_t to_write_spec,
+               intptr_t *p_final, intptr_t to_write_final,
+               struct rseq_state start_value)
+{
+       return __rseq_finish(rlock, p_spec, to_write_spec,
+                       NULL, NULL, 0,
+                       p_final, to_write_final, start_value,
+                       RSEQ_FINISH_TWO, false);
+}
+
+static inline __attribute__((always_inline))
+bool rseq_finish2_release(struct rseq_lock *rlock,
+               intptr_t *p_spec, intptr_t to_write_spec,
+               intptr_t *p_final, intptr_t to_write_final,
+               struct rseq_state start_value)
+{
+       return __rseq_finish(rlock, p_spec, to_write_spec,
+                       NULL, NULL, 0,
+                       p_final, to_write_final, start_value,
+                       RSEQ_FINISH_TWO, true);
+}
+
+static inline __attribute__((always_inline))
+bool rseq_finish_memcpy(struct rseq_lock *rlock,
+               void *p_memcpy, void *to_write_memcpy, size_t len_memcpy,
+               intptr_t *p_final, intptr_t to_write_final,
+               struct rseq_state start_value)
+{
+       return __rseq_finish(rlock, NULL, 0,
+                       p_memcpy, to_write_memcpy, len_memcpy,
+                       p_final, to_write_final, start_value,
+                       RSEQ_FINISH_MEMCPY, false);
+}
+
+static inline __attribute__((always_inline))
+bool rseq_finish_memcpy_release(struct rseq_lock *rlock,
+               void *p_memcpy, void *to_write_memcpy, size_t len_memcpy,
+               intptr_t *p_final, intptr_t to_write_final,
+               struct rseq_state start_value)
+{
+       return __rseq_finish(rlock, NULL, 0,
+                       p_memcpy, to_write_memcpy, len_memcpy,
+                       p_final, to_write_final, start_value,
+                       RSEQ_FINISH_MEMCPY, true);
+}
+
+#define __rseq_store_RSEQ_FINISH_SINGLE(_targetptr_spec, _newval_spec, \
+               _dest_memcpy, _src_memcpy, _len_memcpy,                 \
+               _targetptr_final, _newval_final)                        \
+       do {                                                            \
+               *(_targetptr_final) = (_newval_final);                  \
+       } while (0)
+
+#define __rseq_store_RSEQ_FINISH_TWO(_targetptr_spec, _newval_spec,    \
+               _dest_memcpy, _src_memcpy, _len_memcpy,                 \
+               _targetptr_final, _newval_final)                        \
+       do {                                                            \
+               *(_targetptr_spec) = (_newval_spec);                    \
+               *(_targetptr_final) = (_newval_final);                  \
+       } while (0)
+
+#define __rseq_store_RSEQ_FINISH_MEMCPY(_targetptr_spec,               \
+               _newval_spec, _dest_memcpy, _src_memcpy, _len_memcpy,   \
+               _targetptr_final, _newval_final)                        \
+       do {                                                            \
+               memcpy(_dest_memcpy, _src_memcpy, _len_memcpy);         \
+               *(_targetptr_final) = (_newval_final);                  \
+       } while (0)
+
+/*
+ * Helper macro doing two restartable critical section attempts, and if
+ * they fail, fallback on locking.
+ */
+#define __do_rseq(_type, _lock, _rseq_state, _cpu, _result,            \
+               _targetptr_spec, _newval_spec,                          \
+               _dest_memcpy, _src_memcpy, _len_memcpy,                 \
+               _targetptr_final, _newval_final, _code, _release)       \
+       do {                                                            \
+               _rseq_state = rseq_start(_lock);                        \
+               _cpu = rseq_cpu_at_start(_rseq_state);                  \
+               _result = true;                                         \
+               _code                                                   \
+               if (unlikely(!_result))                                 \
+                       break;                                          \
+               if (likely(__rseq_finish(_lock,                         \
+                               _targetptr_spec, _newval_spec,          \
+                               _dest_memcpy, _src_memcpy, _len_memcpy, \
+                               _targetptr_final, _newval_final,        \
+                               _rseq_state, _type, _release)))         \
+                       break;                                          \
+               _rseq_state = rseq_start(_lock);                        \
+               _cpu = rseq_cpu_at_start(_rseq_state);                  \
+               _result = true;                                         \
+               _code                                                   \
+               if (unlikely(!_result))                                 \
+                       break;                                          \
+               if (likely(__rseq_finish(_lock,                         \
+                               _targetptr_spec, _newval_spec,          \
+                               _dest_memcpy, _src_memcpy, _len_memcpy, \
+                               _targetptr_final, _newval_final,        \
+                               _rseq_state, _type, _release)))         \
+                       break;                                          \
+               _cpu = rseq_fallback_begin(_lock);                      \
+               _result = true;                                         \
+               _code                                                   \
+               if (likely(_result))                                    \
+                       __rseq_store_##_type(_targetptr_spec,           \
+                                _newval_spec, _dest_memcpy,            \
+                               _src_memcpy, _len_memcpy,               \
+                               _targetptr_final, _newval_final);       \
+               rseq_fallback_end(_lock, _cpu);                         \
+       } while (0)
+
+#define do_rseq(_lock, _rseq_state, _cpu, _result, _targetptr, _newval,        \
+               _code)                                                  \
+       __do_rseq(RSEQ_FINISH_SINGLE, _lock, _rseq_state, _cpu, _result,\
+               NULL, 0, NULL, NULL, 0, _targetptr, _newval, _code, false)
+
+#define do_rseq2(_lock, _rseq_state, _cpu, _result,                    \
+               _targetptr_spec, _newval_spec,                          \
+               _targetptr_final, _newval_final, _code)                 \
+       __do_rseq(RSEQ_FINISH_TWO, _lock, _rseq_state, _cpu, _result,   \
+               _targetptr_spec, _newval_spec,                          \
+               NULL, NULL, 0,                                          \
+               _targetptr_final, _newval_final, _code, false)
+
+#define do_rseq2_release(_lock, _rseq_state, _cpu, _result,            \
+               _targetptr_spec, _newval_spec,                          \
+               _targetptr_final, _newval_final, _code)                 \
+       __do_rseq(RSEQ_FINISH_TWO, _lock, _rseq_state, _cpu, _result,   \
+               _targetptr_spec, _newval_spec,                          \
+               NULL, NULL, 0,                                          \
+               _targetptr_final, _newval_final, _code, true)
+
+#define do_rseq_memcpy(_lock, _rseq_state, _cpu, _result,              \
+               _dest_memcpy, _src_memcpy, _len_memcpy,                 \
+               _targetptr_final, _newval_final, _code)                 \
+       __do_rseq(RSEQ_FINISH_MEMCPY, _lock, _rseq_state, _cpu, _result,\
+               NULL, 0,                                                \
+               _dest_memcpy, _src_memcpy, _len_memcpy,                 \
+               _targetptr_final, _newval_final, _code, false)
+
+#define do_rseq_memcpy_release(_lock, _rseq_state, _cpu, _result,      \
+               _dest_memcpy, _src_memcpy, _len_memcpy,                 \
+               _targetptr_final, _newval_final, _code)                 \
+       __do_rseq(RSEQ_FINISH_MEMCPY, _lock, _rseq_state, _cpu, _result,\
+               NULL, 0,                                                \
+               _dest_memcpy, _src_memcpy, _len_memcpy,                 \
+               _targetptr_final, _newval_final, _code, true)
+
+#endif  /* RSEQ_H_ */
This page took 0.042958 seconds and 5 git commands to generate.