1c960e990SMathieu Desnoyers // SPDX-License-Identifier: LGPL-2.1
2c960e990SMathieu Desnoyers #define _GNU_SOURCE
3c960e990SMathieu Desnoyers #include <assert.h>
4f166b111SPeter Oskolkov #include <linux/membarrier.h>
5c960e990SMathieu Desnoyers #include <pthread.h>
6c960e990SMathieu Desnoyers #include <sched.h>
7f166b111SPeter Oskolkov #include <stdatomic.h>
8c960e990SMathieu Desnoyers #include <stdint.h>
9c960e990SMathieu Desnoyers #include <stdio.h>
10c960e990SMathieu Desnoyers #include <stdlib.h>
11c960e990SMathieu Desnoyers #include <string.h>
12c960e990SMathieu Desnoyers #include <syscall.h>
13c960e990SMathieu Desnoyers #include <unistd.h>
14c960e990SMathieu Desnoyers #include <poll.h>
15c960e990SMathieu Desnoyers #include <sys/types.h>
16c960e990SMathieu Desnoyers #include <signal.h>
17c960e990SMathieu Desnoyers #include <errno.h>
18c960e990SMathieu Desnoyers #include <stddef.h>
19ee31fff0SMathieu Desnoyers #include <stdbool.h>
20c960e990SMathieu Desnoyers
rseq_gettid(void)218df34c56SMathieu Desnoyers static inline pid_t rseq_gettid(void)
22c960e990SMathieu Desnoyers {
23c960e990SMathieu Desnoyers return syscall(__NR_gettid);
24c960e990SMathieu Desnoyers }
25c960e990SMathieu Desnoyers
26c960e990SMathieu Desnoyers #define NR_INJECT 9
27c960e990SMathieu Desnoyers static int loop_cnt[NR_INJECT + 1];
28c960e990SMathieu Desnoyers
29c960e990SMathieu Desnoyers static int loop_cnt_1 asm("asm_loop_cnt_1") __attribute__((used));
30c960e990SMathieu Desnoyers static int loop_cnt_2 asm("asm_loop_cnt_2") __attribute__((used));
31c960e990SMathieu Desnoyers static int loop_cnt_3 asm("asm_loop_cnt_3") __attribute__((used));
32c960e990SMathieu Desnoyers static int loop_cnt_4 asm("asm_loop_cnt_4") __attribute__((used));
33c960e990SMathieu Desnoyers static int loop_cnt_5 asm("asm_loop_cnt_5") __attribute__((used));
34c960e990SMathieu Desnoyers static int loop_cnt_6 asm("asm_loop_cnt_6") __attribute__((used));
35c960e990SMathieu Desnoyers
36c960e990SMathieu Desnoyers static int opt_modulo, verbose;
37c960e990SMathieu Desnoyers
38c960e990SMathieu Desnoyers static int opt_yield, opt_signal, opt_sleep,
39c960e990SMathieu Desnoyers opt_disable_rseq, opt_threads = 200,
40ee31fff0SMathieu Desnoyers opt_disable_mod = 0, opt_test = 's';
41c960e990SMathieu Desnoyers
42c960e990SMathieu Desnoyers static long long opt_reps = 5000;
43c960e990SMathieu Desnoyers
44c960e990SMathieu Desnoyers static __thread __attribute__((tls_model("initial-exec")))
45c960e990SMathieu Desnoyers unsigned int signals_delivered;
46c960e990SMathieu Desnoyers
47c960e990SMathieu Desnoyers #ifndef BENCHMARK
48c960e990SMathieu Desnoyers
49c960e990SMathieu Desnoyers static __thread __attribute__((tls_model("initial-exec"), unused))
50c960e990SMathieu Desnoyers unsigned int yield_mod_cnt, nr_abort;
51c960e990SMathieu Desnoyers
52c960e990SMathieu Desnoyers #define printf_verbose(fmt, ...) \
53c960e990SMathieu Desnoyers do { \
54c960e990SMathieu Desnoyers if (verbose) \
55c960e990SMathieu Desnoyers printf(fmt, ## __VA_ARGS__); \
56c960e990SMathieu Desnoyers } while (0)
57c960e990SMathieu Desnoyers
58ce01a157SMathieu Desnoyers #ifdef __i386__
59c960e990SMathieu Desnoyers
60c960e990SMathieu Desnoyers #define INJECT_ASM_REG "eax"
61c960e990SMathieu Desnoyers
62c960e990SMathieu Desnoyers #define RSEQ_INJECT_CLOBBER \
63c960e990SMathieu Desnoyers , INJECT_ASM_REG
64c960e990SMathieu Desnoyers
65c960e990SMathieu Desnoyers #define RSEQ_INJECT_ASM(n) \
66c960e990SMathieu Desnoyers "mov asm_loop_cnt_" #n ", %%" INJECT_ASM_REG "\n\t" \
67c960e990SMathieu Desnoyers "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
68c960e990SMathieu Desnoyers "jz 333f\n\t" \
69c960e990SMathieu Desnoyers "222:\n\t" \
70c960e990SMathieu Desnoyers "dec %%" INJECT_ASM_REG "\n\t" \
71c960e990SMathieu Desnoyers "jnz 222b\n\t" \
72c960e990SMathieu Desnoyers "333:\n\t"
73c960e990SMathieu Desnoyers
74c960e990SMathieu Desnoyers #elif defined(__x86_64__)
75c960e990SMathieu Desnoyers
76ce01a157SMathieu Desnoyers #define INJECT_ASM_REG_P "rax"
77ce01a157SMathieu Desnoyers #define INJECT_ASM_REG "eax"
78ce01a157SMathieu Desnoyers
79ce01a157SMathieu Desnoyers #define RSEQ_INJECT_CLOBBER \
80ce01a157SMathieu Desnoyers , INJECT_ASM_REG_P \
81ce01a157SMathieu Desnoyers , INJECT_ASM_REG
82ce01a157SMathieu Desnoyers
83c960e990SMathieu Desnoyers #define RSEQ_INJECT_ASM(n) \
84ce01a157SMathieu Desnoyers "lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG_P "\n\t" \
85ce01a157SMathieu Desnoyers "mov (%%" INJECT_ASM_REG_P "), %%" INJECT_ASM_REG "\n\t" \
86c960e990SMathieu Desnoyers "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
87c960e990SMathieu Desnoyers "jz 333f\n\t" \
88c960e990SMathieu Desnoyers "222:\n\t" \
89c960e990SMathieu Desnoyers "dec %%" INJECT_ASM_REG "\n\t" \
90c960e990SMathieu Desnoyers "jnz 222b\n\t" \
91c960e990SMathieu Desnoyers "333:\n\t"
92c960e990SMathieu Desnoyers
934c14d1ceSVasily Gorbik #elif defined(__s390__)
944c14d1ceSVasily Gorbik
954c14d1ceSVasily Gorbik #define RSEQ_INJECT_INPUT \
964c14d1ceSVasily Gorbik , [loop_cnt_1]"m"(loop_cnt[1]) \
974c14d1ceSVasily Gorbik , [loop_cnt_2]"m"(loop_cnt[2]) \
984c14d1ceSVasily Gorbik , [loop_cnt_3]"m"(loop_cnt[3]) \
994c14d1ceSVasily Gorbik , [loop_cnt_4]"m"(loop_cnt[4]) \
1004c14d1ceSVasily Gorbik , [loop_cnt_5]"m"(loop_cnt[5]) \
1014c14d1ceSVasily Gorbik , [loop_cnt_6]"m"(loop_cnt[6])
1024c14d1ceSVasily Gorbik
1034c14d1ceSVasily Gorbik #define INJECT_ASM_REG "r12"
1044c14d1ceSVasily Gorbik
1054c14d1ceSVasily Gorbik #define RSEQ_INJECT_CLOBBER \
1064c14d1ceSVasily Gorbik , INJECT_ASM_REG
1074c14d1ceSVasily Gorbik
1084c14d1ceSVasily Gorbik #define RSEQ_INJECT_ASM(n) \
1094c14d1ceSVasily Gorbik "l %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
1104c14d1ceSVasily Gorbik "ltr %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG "\n\t" \
1114c14d1ceSVasily Gorbik "je 333f\n\t" \
1124c14d1ceSVasily Gorbik "222:\n\t" \
1134c14d1ceSVasily Gorbik "ahi %%" INJECT_ASM_REG ", -1\n\t" \
1144c14d1ceSVasily Gorbik "jnz 222b\n\t" \
1154c14d1ceSVasily Gorbik "333:\n\t"
1164c14d1ceSVasily Gorbik
117c960e990SMathieu Desnoyers #elif defined(__ARMEL__)
118c960e990SMathieu Desnoyers
119c960e990SMathieu Desnoyers #define RSEQ_INJECT_INPUT \
120c960e990SMathieu Desnoyers , [loop_cnt_1]"m"(loop_cnt[1]) \
121c960e990SMathieu Desnoyers , [loop_cnt_2]"m"(loop_cnt[2]) \
122c960e990SMathieu Desnoyers , [loop_cnt_3]"m"(loop_cnt[3]) \
123c960e990SMathieu Desnoyers , [loop_cnt_4]"m"(loop_cnt[4]) \
124c960e990SMathieu Desnoyers , [loop_cnt_5]"m"(loop_cnt[5]) \
125c960e990SMathieu Desnoyers , [loop_cnt_6]"m"(loop_cnt[6])
126c960e990SMathieu Desnoyers
127c960e990SMathieu Desnoyers #define INJECT_ASM_REG "r4"
128c960e990SMathieu Desnoyers
129c960e990SMathieu Desnoyers #define RSEQ_INJECT_CLOBBER \
130c960e990SMathieu Desnoyers , INJECT_ASM_REG
131c960e990SMathieu Desnoyers
132c960e990SMathieu Desnoyers #define RSEQ_INJECT_ASM(n) \
133c960e990SMathieu Desnoyers "ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
134c960e990SMathieu Desnoyers "cmp " INJECT_ASM_REG ", #0\n\t" \
135c960e990SMathieu Desnoyers "beq 333f\n\t" \
136c960e990SMathieu Desnoyers "222:\n\t" \
137c960e990SMathieu Desnoyers "subs " INJECT_ASM_REG ", #1\n\t" \
138c960e990SMathieu Desnoyers "bne 222b\n\t" \
139c960e990SMathieu Desnoyers "333:\n\t"
140c960e990SMathieu Desnoyers
141b9657463SWill Deacon #elif defined(__AARCH64EL__)
142b9657463SWill Deacon
143b9657463SWill Deacon #define RSEQ_INJECT_INPUT \
144b9657463SWill Deacon , [loop_cnt_1] "Qo" (loop_cnt[1]) \
145b9657463SWill Deacon , [loop_cnt_2] "Qo" (loop_cnt[2]) \
146b9657463SWill Deacon , [loop_cnt_3] "Qo" (loop_cnt[3]) \
147b9657463SWill Deacon , [loop_cnt_4] "Qo" (loop_cnt[4]) \
148b9657463SWill Deacon , [loop_cnt_5] "Qo" (loop_cnt[5]) \
149b9657463SWill Deacon , [loop_cnt_6] "Qo" (loop_cnt[6])
150b9657463SWill Deacon
151b9657463SWill Deacon #define INJECT_ASM_REG RSEQ_ASM_TMP_REG32
152b9657463SWill Deacon
153b9657463SWill Deacon #define RSEQ_INJECT_ASM(n) \
154b9657463SWill Deacon " ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n" \
155b9657463SWill Deacon " cbz " INJECT_ASM_REG ", 333f\n" \
156b9657463SWill Deacon "222:\n" \
157b9657463SWill Deacon " sub " INJECT_ASM_REG ", " INJECT_ASM_REG ", #1\n" \
158b9657463SWill Deacon " cbnz " INJECT_ASM_REG ", 222b\n" \
159b9657463SWill Deacon "333:\n"
160b9657463SWill Deacon
161d7ed99adSMathieu Desnoyers #elif defined(__PPC__)
162c960e990SMathieu Desnoyers
163c960e990SMathieu Desnoyers #define RSEQ_INJECT_INPUT \
164c960e990SMathieu Desnoyers , [loop_cnt_1]"m"(loop_cnt[1]) \
165c960e990SMathieu Desnoyers , [loop_cnt_2]"m"(loop_cnt[2]) \
166c960e990SMathieu Desnoyers , [loop_cnt_3]"m"(loop_cnt[3]) \
167c960e990SMathieu Desnoyers , [loop_cnt_4]"m"(loop_cnt[4]) \
168c960e990SMathieu Desnoyers , [loop_cnt_5]"m"(loop_cnt[5]) \
169c960e990SMathieu Desnoyers , [loop_cnt_6]"m"(loop_cnt[6])
170c960e990SMathieu Desnoyers
171c960e990SMathieu Desnoyers #define INJECT_ASM_REG "r18"
172c960e990SMathieu Desnoyers
173c960e990SMathieu Desnoyers #define RSEQ_INJECT_CLOBBER \
174c960e990SMathieu Desnoyers , INJECT_ASM_REG
175c960e990SMathieu Desnoyers
176c960e990SMathieu Desnoyers #define RSEQ_INJECT_ASM(n) \
177c960e990SMathieu Desnoyers "lwz %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
178c960e990SMathieu Desnoyers "cmpwi %%" INJECT_ASM_REG ", 0\n\t" \
179c960e990SMathieu Desnoyers "beq 333f\n\t" \
180c960e990SMathieu Desnoyers "222:\n\t" \
181c960e990SMathieu Desnoyers "subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \
182c960e990SMathieu Desnoyers "bne 222b\n\t" \
183c960e990SMathieu Desnoyers "333:\n\t"
184744f4be5SPaul Burton
185744f4be5SPaul Burton #elif defined(__mips__)
186744f4be5SPaul Burton
187744f4be5SPaul Burton #define RSEQ_INJECT_INPUT \
188744f4be5SPaul Burton , [loop_cnt_1]"m"(loop_cnt[1]) \
189744f4be5SPaul Burton , [loop_cnt_2]"m"(loop_cnt[2]) \
190744f4be5SPaul Burton , [loop_cnt_3]"m"(loop_cnt[3]) \
191744f4be5SPaul Burton , [loop_cnt_4]"m"(loop_cnt[4]) \
192744f4be5SPaul Burton , [loop_cnt_5]"m"(loop_cnt[5]) \
193744f4be5SPaul Burton , [loop_cnt_6]"m"(loop_cnt[6])
194744f4be5SPaul Burton
195744f4be5SPaul Burton #define INJECT_ASM_REG "$5"
196744f4be5SPaul Burton
197744f4be5SPaul Burton #define RSEQ_INJECT_CLOBBER \
198744f4be5SPaul Burton , INJECT_ASM_REG
199744f4be5SPaul Burton
200744f4be5SPaul Burton #define RSEQ_INJECT_ASM(n) \
201744f4be5SPaul Burton "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
202744f4be5SPaul Burton "beqz " INJECT_ASM_REG ", 333f\n\t" \
203744f4be5SPaul Burton "222:\n\t" \
204744f4be5SPaul Burton "addiu " INJECT_ASM_REG ", -1\n\t" \
205744f4be5SPaul Burton "bnez " INJECT_ASM_REG ", 222b\n\t" \
206744f4be5SPaul Burton "333:\n\t"
2076d1a6f46SVincent Chen #elif defined(__riscv)
2086d1a6f46SVincent Chen
2096d1a6f46SVincent Chen #define RSEQ_INJECT_INPUT \
2106d1a6f46SVincent Chen , [loop_cnt_1]"m"(loop_cnt[1]) \
2116d1a6f46SVincent Chen , [loop_cnt_2]"m"(loop_cnt[2]) \
2126d1a6f46SVincent Chen , [loop_cnt_3]"m"(loop_cnt[3]) \
2136d1a6f46SVincent Chen , [loop_cnt_4]"m"(loop_cnt[4]) \
2146d1a6f46SVincent Chen , [loop_cnt_5]"m"(loop_cnt[5]) \
2156d1a6f46SVincent Chen , [loop_cnt_6]"m"(loop_cnt[6])
2166d1a6f46SVincent Chen
2176d1a6f46SVincent Chen #define INJECT_ASM_REG "t1"
2186d1a6f46SVincent Chen
2196d1a6f46SVincent Chen #define RSEQ_INJECT_CLOBBER \
2206d1a6f46SVincent Chen , INJECT_ASM_REG
2216d1a6f46SVincent Chen
2226d1a6f46SVincent Chen #define RSEQ_INJECT_ASM(n) \
2236d1a6f46SVincent Chen "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
2246d1a6f46SVincent Chen "beqz " INJECT_ASM_REG ", 333f\n\t" \
2256d1a6f46SVincent Chen "222:\n\t" \
2266d1a6f46SVincent Chen "addi " INJECT_ASM_REG "," INJECT_ASM_REG ", -1\n\t" \
2276d1a6f46SVincent Chen "bnez " INJECT_ASM_REG ", 222b\n\t" \
2286d1a6f46SVincent Chen "333:\n\t"
229*ea1413e5SStafford Horne #elif defined(__or1k__)
2306d1a6f46SVincent Chen
231*ea1413e5SStafford Horne #define RSEQ_INJECT_INPUT \
232*ea1413e5SStafford Horne , [loop_cnt_1]"m"(loop_cnt[1]) \
233*ea1413e5SStafford Horne , [loop_cnt_2]"m"(loop_cnt[2]) \
234*ea1413e5SStafford Horne , [loop_cnt_3]"m"(loop_cnt[3]) \
235*ea1413e5SStafford Horne , [loop_cnt_4]"m"(loop_cnt[4]) \
236*ea1413e5SStafford Horne , [loop_cnt_5]"m"(loop_cnt[5]) \
237*ea1413e5SStafford Horne , [loop_cnt_6]"m"(loop_cnt[6])
238744f4be5SPaul Burton
239*ea1413e5SStafford Horne #define INJECT_ASM_REG "r31"
240*ea1413e5SStafford Horne
241*ea1413e5SStafford Horne #define RSEQ_INJECT_CLOBBER \
242*ea1413e5SStafford Horne , INJECT_ASM_REG
243*ea1413e5SStafford Horne
244*ea1413e5SStafford Horne #define RSEQ_INJECT_ASM(n) \
245*ea1413e5SStafford Horne "l.lwz " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
246*ea1413e5SStafford Horne "l.sfeqi " INJECT_ASM_REG ", 0\n\t" \
247*ea1413e5SStafford Horne "l.bf 333f\n\t" \
248*ea1413e5SStafford Horne " l.nop\n\t" \
249*ea1413e5SStafford Horne "222:\n\t" \
250*ea1413e5SStafford Horne "l.addi " INJECT_ASM_REG "," INJECT_ASM_REG ", -1\n\t" \
251*ea1413e5SStafford Horne "l.sfeqi " INJECT_ASM_REG ", 0\n\t" \
252*ea1413e5SStafford Horne "l.bf 222f\n\t" \
253*ea1413e5SStafford Horne " l.nop\n\t" \
254*ea1413e5SStafford Horne "333:\n\t"
255c960e990SMathieu Desnoyers #else
256c960e990SMathieu Desnoyers #error unsupported target
257c960e990SMathieu Desnoyers #endif
258c960e990SMathieu Desnoyers
259c960e990SMathieu Desnoyers #define RSEQ_INJECT_FAILED \
260c960e990SMathieu Desnoyers nr_abort++;
261c960e990SMathieu Desnoyers
262c960e990SMathieu Desnoyers #define RSEQ_INJECT_C(n) \
263c960e990SMathieu Desnoyers { \
264c960e990SMathieu Desnoyers int loc_i, loc_nr_loops = loop_cnt[n]; \
265c960e990SMathieu Desnoyers \
266c960e990SMathieu Desnoyers for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \
267c960e990SMathieu Desnoyers rseq_barrier(); \
268c960e990SMathieu Desnoyers } \
269c960e990SMathieu Desnoyers if (loc_nr_loops == -1 && opt_modulo) { \
270c960e990SMathieu Desnoyers if (yield_mod_cnt == opt_modulo - 1) { \
271c960e990SMathieu Desnoyers if (opt_sleep > 0) \
272c960e990SMathieu Desnoyers poll(NULL, 0, opt_sleep); \
273c960e990SMathieu Desnoyers if (opt_yield) \
274c960e990SMathieu Desnoyers sched_yield(); \
275c960e990SMathieu Desnoyers if (opt_signal) \
276c960e990SMathieu Desnoyers raise(SIGUSR1); \
277c960e990SMathieu Desnoyers yield_mod_cnt = 0; \
278c960e990SMathieu Desnoyers } else { \
279c960e990SMathieu Desnoyers yield_mod_cnt++; \
280c960e990SMathieu Desnoyers } \
281c960e990SMathieu Desnoyers } \
282c960e990SMathieu Desnoyers }
283c960e990SMathieu Desnoyers
284c960e990SMathieu Desnoyers #else
285c960e990SMathieu Desnoyers
286c960e990SMathieu Desnoyers #define printf_verbose(fmt, ...)
287c960e990SMathieu Desnoyers
288c960e990SMathieu Desnoyers #endif /* BENCHMARK */
289c960e990SMathieu Desnoyers
290c960e990SMathieu Desnoyers #include "rseq.h"
291c960e990SMathieu Desnoyers
292ee31fff0SMathieu Desnoyers static enum rseq_mo opt_mo = RSEQ_MO_RELAXED;
293ee31fff0SMathieu Desnoyers
294ee31fff0SMathieu Desnoyers #ifdef RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV
295ee31fff0SMathieu Desnoyers #define TEST_MEMBARRIER
296ee31fff0SMathieu Desnoyers
sys_membarrier(int cmd,int flags,int cpu_id)297ee31fff0SMathieu Desnoyers static int sys_membarrier(int cmd, int flags, int cpu_id)
298ee31fff0SMathieu Desnoyers {
299ee31fff0SMathieu Desnoyers return syscall(__NR_membarrier, cmd, flags, cpu_id);
300ee31fff0SMathieu Desnoyers }
301ee31fff0SMathieu Desnoyers #endif
302ee31fff0SMathieu Desnoyers
303ee31fff0SMathieu Desnoyers #ifdef BUILDOPT_RSEQ_PERCPU_MM_CID
304ee31fff0SMathieu Desnoyers # define RSEQ_PERCPU RSEQ_PERCPU_MM_CID
305ee31fff0SMathieu Desnoyers static
get_current_cpu_id(void)306ee31fff0SMathieu Desnoyers int get_current_cpu_id(void)
307ee31fff0SMathieu Desnoyers {
308ee31fff0SMathieu Desnoyers return rseq_current_mm_cid();
309ee31fff0SMathieu Desnoyers }
310ee31fff0SMathieu Desnoyers static
rseq_validate_cpu_id(void)311ee31fff0SMathieu Desnoyers bool rseq_validate_cpu_id(void)
312ee31fff0SMathieu Desnoyers {
313ee31fff0SMathieu Desnoyers return rseq_mm_cid_available();
314ee31fff0SMathieu Desnoyers }
315d53271c0SMathieu Desnoyers static
rseq_use_cpu_index(void)316d53271c0SMathieu Desnoyers bool rseq_use_cpu_index(void)
317d53271c0SMathieu Desnoyers {
318d53271c0SMathieu Desnoyers return false; /* Use mm_cid */
319d53271c0SMathieu Desnoyers }
320ee31fff0SMathieu Desnoyers # ifdef TEST_MEMBARRIER
321ee31fff0SMathieu Desnoyers /*
322ee31fff0SMathieu Desnoyers * Membarrier does not currently support targeting a mm_cid, so
323ee31fff0SMathieu Desnoyers * issue the barrier on all cpus.
324ee31fff0SMathieu Desnoyers */
325ee31fff0SMathieu Desnoyers static
rseq_membarrier_expedited(int cpu)326ee31fff0SMathieu Desnoyers int rseq_membarrier_expedited(int cpu)
327ee31fff0SMathieu Desnoyers {
328ee31fff0SMathieu Desnoyers return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
329ee31fff0SMathieu Desnoyers 0, 0);
330ee31fff0SMathieu Desnoyers }
331ee31fff0SMathieu Desnoyers # endif /* TEST_MEMBARRIER */
332ee31fff0SMathieu Desnoyers #else
333ee31fff0SMathieu Desnoyers # define RSEQ_PERCPU RSEQ_PERCPU_CPU_ID
334ee31fff0SMathieu Desnoyers static
get_current_cpu_id(void)335ee31fff0SMathieu Desnoyers int get_current_cpu_id(void)
336ee31fff0SMathieu Desnoyers {
337ee31fff0SMathieu Desnoyers return rseq_cpu_start();
338ee31fff0SMathieu Desnoyers }
339ee31fff0SMathieu Desnoyers static
rseq_validate_cpu_id(void)340ee31fff0SMathieu Desnoyers bool rseq_validate_cpu_id(void)
341ee31fff0SMathieu Desnoyers {
342ee31fff0SMathieu Desnoyers return rseq_current_cpu_raw() >= 0;
343ee31fff0SMathieu Desnoyers }
344d53271c0SMathieu Desnoyers static
rseq_use_cpu_index(void)345d53271c0SMathieu Desnoyers bool rseq_use_cpu_index(void)
346d53271c0SMathieu Desnoyers {
347d53271c0SMathieu Desnoyers return true; /* Use cpu_id as index. */
348d53271c0SMathieu Desnoyers }
349ee31fff0SMathieu Desnoyers # ifdef TEST_MEMBARRIER
350ee31fff0SMathieu Desnoyers static
rseq_membarrier_expedited(int cpu)351ee31fff0SMathieu Desnoyers int rseq_membarrier_expedited(int cpu)
352ee31fff0SMathieu Desnoyers {
353ee31fff0SMathieu Desnoyers return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
354ee31fff0SMathieu Desnoyers MEMBARRIER_CMD_FLAG_CPU, cpu);
355ee31fff0SMathieu Desnoyers }
356ee31fff0SMathieu Desnoyers # endif /* TEST_MEMBARRIER */
357ee31fff0SMathieu Desnoyers #endif
358ee31fff0SMathieu Desnoyers
359c960e990SMathieu Desnoyers struct percpu_lock_entry {
360c960e990SMathieu Desnoyers intptr_t v;
361c960e990SMathieu Desnoyers } __attribute__((aligned(128)));
362c960e990SMathieu Desnoyers
363c960e990SMathieu Desnoyers struct percpu_lock {
364c960e990SMathieu Desnoyers struct percpu_lock_entry c[CPU_SETSIZE];
365c960e990SMathieu Desnoyers };
366c960e990SMathieu Desnoyers
367c960e990SMathieu Desnoyers struct test_data_entry {
368c960e990SMathieu Desnoyers intptr_t count;
369c960e990SMathieu Desnoyers } __attribute__((aligned(128)));
370c960e990SMathieu Desnoyers
371c960e990SMathieu Desnoyers struct spinlock_test_data {
372c960e990SMathieu Desnoyers struct percpu_lock lock;
373c960e990SMathieu Desnoyers struct test_data_entry c[CPU_SETSIZE];
374c960e990SMathieu Desnoyers };
375c960e990SMathieu Desnoyers
376c960e990SMathieu Desnoyers struct spinlock_thread_test_data {
377c960e990SMathieu Desnoyers struct spinlock_test_data *data;
378c960e990SMathieu Desnoyers long long reps;
379c960e990SMathieu Desnoyers int reg;
380c960e990SMathieu Desnoyers };
381c960e990SMathieu Desnoyers
382c960e990SMathieu Desnoyers struct inc_test_data {
383c960e990SMathieu Desnoyers struct test_data_entry c[CPU_SETSIZE];
384c960e990SMathieu Desnoyers };
385c960e990SMathieu Desnoyers
386c960e990SMathieu Desnoyers struct inc_thread_test_data {
387c960e990SMathieu Desnoyers struct inc_test_data *data;
388c960e990SMathieu Desnoyers long long reps;
389c960e990SMathieu Desnoyers int reg;
390c960e990SMathieu Desnoyers };
391c960e990SMathieu Desnoyers
392c960e990SMathieu Desnoyers struct percpu_list_node {
393c960e990SMathieu Desnoyers intptr_t data;
394c960e990SMathieu Desnoyers struct percpu_list_node *next;
395c960e990SMathieu Desnoyers };
396c960e990SMathieu Desnoyers
397c960e990SMathieu Desnoyers struct percpu_list_entry {
398c960e990SMathieu Desnoyers struct percpu_list_node *head;
399c960e990SMathieu Desnoyers } __attribute__((aligned(128)));
400c960e990SMathieu Desnoyers
401c960e990SMathieu Desnoyers struct percpu_list {
402c960e990SMathieu Desnoyers struct percpu_list_entry c[CPU_SETSIZE];
403c960e990SMathieu Desnoyers };
404c960e990SMathieu Desnoyers
405c960e990SMathieu Desnoyers #define BUFFER_ITEM_PER_CPU 100
406c960e990SMathieu Desnoyers
407c960e990SMathieu Desnoyers struct percpu_buffer_node {
408c960e990SMathieu Desnoyers intptr_t data;
409c960e990SMathieu Desnoyers };
410c960e990SMathieu Desnoyers
411c960e990SMathieu Desnoyers struct percpu_buffer_entry {
412c960e990SMathieu Desnoyers intptr_t offset;
413c960e990SMathieu Desnoyers intptr_t buflen;
414c960e990SMathieu Desnoyers struct percpu_buffer_node **array;
415c960e990SMathieu Desnoyers } __attribute__((aligned(128)));
416c960e990SMathieu Desnoyers
417c960e990SMathieu Desnoyers struct percpu_buffer {
418c960e990SMathieu Desnoyers struct percpu_buffer_entry c[CPU_SETSIZE];
419c960e990SMathieu Desnoyers };
420c960e990SMathieu Desnoyers
421c960e990SMathieu Desnoyers #define MEMCPY_BUFFER_ITEM_PER_CPU 100
422c960e990SMathieu Desnoyers
423c960e990SMathieu Desnoyers struct percpu_memcpy_buffer_node {
424c960e990SMathieu Desnoyers intptr_t data1;
425c960e990SMathieu Desnoyers uint64_t data2;
426c960e990SMathieu Desnoyers };
427c960e990SMathieu Desnoyers
428c960e990SMathieu Desnoyers struct percpu_memcpy_buffer_entry {
429c960e990SMathieu Desnoyers intptr_t offset;
430c960e990SMathieu Desnoyers intptr_t buflen;
431c960e990SMathieu Desnoyers struct percpu_memcpy_buffer_node *array;
432c960e990SMathieu Desnoyers } __attribute__((aligned(128)));
433c960e990SMathieu Desnoyers
434c960e990SMathieu Desnoyers struct percpu_memcpy_buffer {
435c960e990SMathieu Desnoyers struct percpu_memcpy_buffer_entry c[CPU_SETSIZE];
436c960e990SMathieu Desnoyers };
437c960e990SMathieu Desnoyers
438c960e990SMathieu Desnoyers /* A simple percpu spinlock. Grabs lock on current cpu. */
rseq_this_cpu_lock(struct percpu_lock * lock)439c960e990SMathieu Desnoyers static int rseq_this_cpu_lock(struct percpu_lock *lock)
440c960e990SMathieu Desnoyers {
441c960e990SMathieu Desnoyers int cpu;
442c960e990SMathieu Desnoyers
443c960e990SMathieu Desnoyers for (;;) {
444c960e990SMathieu Desnoyers int ret;
445c960e990SMathieu Desnoyers
446ee31fff0SMathieu Desnoyers cpu = get_current_cpu_id();
447a3798e6fSMathieu Desnoyers if (cpu < 0) {
448a3798e6fSMathieu Desnoyers fprintf(stderr, "pid: %d: tid: %d, cpu: %d: cid: %d\n",
449a3798e6fSMathieu Desnoyers getpid(), (int) rseq_gettid(), rseq_current_cpu_raw(), cpu);
450a3798e6fSMathieu Desnoyers abort();
451a3798e6fSMathieu Desnoyers }
452ee31fff0SMathieu Desnoyers ret = rseq_cmpeqv_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU,
453ee31fff0SMathieu Desnoyers &lock->c[cpu].v,
454c960e990SMathieu Desnoyers 0, 1, cpu);
455c960e990SMathieu Desnoyers if (rseq_likely(!ret))
456c960e990SMathieu Desnoyers break;
457c960e990SMathieu Desnoyers /* Retry if comparison fails or rseq aborts. */
458c960e990SMathieu Desnoyers }
459c960e990SMathieu Desnoyers /*
460c960e990SMathieu Desnoyers * Acquire semantic when taking lock after control dependency.
461c960e990SMathieu Desnoyers * Matches rseq_smp_store_release().
462c960e990SMathieu Desnoyers */
463c960e990SMathieu Desnoyers rseq_smp_acquire__after_ctrl_dep();
464c960e990SMathieu Desnoyers return cpu;
465c960e990SMathieu Desnoyers }
466c960e990SMathieu Desnoyers
rseq_percpu_unlock(struct percpu_lock * lock,int cpu)467c960e990SMathieu Desnoyers static void rseq_percpu_unlock(struct percpu_lock *lock, int cpu)
468c960e990SMathieu Desnoyers {
469c960e990SMathieu Desnoyers assert(lock->c[cpu].v == 1);
470c960e990SMathieu Desnoyers /*
471c960e990SMathieu Desnoyers * Release lock, with release semantic. Matches
472c960e990SMathieu Desnoyers * rseq_smp_acquire__after_ctrl_dep().
473c960e990SMathieu Desnoyers */
474c960e990SMathieu Desnoyers rseq_smp_store_release(&lock->c[cpu].v, 0);
475c960e990SMathieu Desnoyers }
476c960e990SMathieu Desnoyers
test_percpu_spinlock_thread(void * arg)477c960e990SMathieu Desnoyers void *test_percpu_spinlock_thread(void *arg)
478c960e990SMathieu Desnoyers {
479c960e990SMathieu Desnoyers struct spinlock_thread_test_data *thread_data = arg;
480c960e990SMathieu Desnoyers struct spinlock_test_data *data = thread_data->data;
481c960e990SMathieu Desnoyers long long i, reps;
482c960e990SMathieu Desnoyers
483c960e990SMathieu Desnoyers if (!opt_disable_rseq && thread_data->reg &&
484c960e990SMathieu Desnoyers rseq_register_current_thread())
485c960e990SMathieu Desnoyers abort();
486c960e990SMathieu Desnoyers reps = thread_data->reps;
487c960e990SMathieu Desnoyers for (i = 0; i < reps; i++) {
488930378d0SMathieu Desnoyers int cpu = rseq_this_cpu_lock(&data->lock);
489c960e990SMathieu Desnoyers data->c[cpu].count++;
490c960e990SMathieu Desnoyers rseq_percpu_unlock(&data->lock, cpu);
491c960e990SMathieu Desnoyers #ifndef BENCHMARK
492c960e990SMathieu Desnoyers if (i != 0 && !(i % (reps / 10)))
4938df34c56SMathieu Desnoyers printf_verbose("tid %d: count %lld\n",
4948df34c56SMathieu Desnoyers (int) rseq_gettid(), i);
495c960e990SMathieu Desnoyers #endif
496c960e990SMathieu Desnoyers }
497c960e990SMathieu Desnoyers printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
4988df34c56SMathieu Desnoyers (int) rseq_gettid(), nr_abort, signals_delivered);
499c960e990SMathieu Desnoyers if (!opt_disable_rseq && thread_data->reg &&
500c960e990SMathieu Desnoyers rseq_unregister_current_thread())
501c960e990SMathieu Desnoyers abort();
502c960e990SMathieu Desnoyers return NULL;
503c960e990SMathieu Desnoyers }
504c960e990SMathieu Desnoyers
505c960e990SMathieu Desnoyers /*
506c960e990SMathieu Desnoyers * A simple test which implements a sharded counter using a per-cpu
507c960e990SMathieu Desnoyers * lock. Obviously real applications might prefer to simply use a
508c960e990SMathieu Desnoyers * per-cpu increment; however, this is reasonable for a test and the
509c960e990SMathieu Desnoyers * lock can be extended to synchronize more complicated operations.
510c960e990SMathieu Desnoyers */
test_percpu_spinlock(void)511c960e990SMathieu Desnoyers void test_percpu_spinlock(void)
512c960e990SMathieu Desnoyers {
513c960e990SMathieu Desnoyers const int num_threads = opt_threads;
514c960e990SMathieu Desnoyers int i, ret;
515c960e990SMathieu Desnoyers uint64_t sum;
516c960e990SMathieu Desnoyers pthread_t test_threads[num_threads];
517c960e990SMathieu Desnoyers struct spinlock_test_data data;
518c960e990SMathieu Desnoyers struct spinlock_thread_test_data thread_data[num_threads];
519c960e990SMathieu Desnoyers
520c960e990SMathieu Desnoyers memset(&data, 0, sizeof(data));
521c960e990SMathieu Desnoyers for (i = 0; i < num_threads; i++) {
522c960e990SMathieu Desnoyers thread_data[i].reps = opt_reps;
523c960e990SMathieu Desnoyers if (opt_disable_mod <= 0 || (i % opt_disable_mod))
524c960e990SMathieu Desnoyers thread_data[i].reg = 1;
525c960e990SMathieu Desnoyers else
526c960e990SMathieu Desnoyers thread_data[i].reg = 0;
527c960e990SMathieu Desnoyers thread_data[i].data = &data;
528c960e990SMathieu Desnoyers ret = pthread_create(&test_threads[i], NULL,
529c960e990SMathieu Desnoyers test_percpu_spinlock_thread,
530c960e990SMathieu Desnoyers &thread_data[i]);
531c960e990SMathieu Desnoyers if (ret) {
532c960e990SMathieu Desnoyers errno = ret;
533c960e990SMathieu Desnoyers perror("pthread_create");
534c960e990SMathieu Desnoyers abort();
535c960e990SMathieu Desnoyers }
536c960e990SMathieu Desnoyers }
537c960e990SMathieu Desnoyers
538c960e990SMathieu Desnoyers for (i = 0; i < num_threads; i++) {
539c960e990SMathieu Desnoyers ret = pthread_join(test_threads[i], NULL);
540c960e990SMathieu Desnoyers if (ret) {
541c960e990SMathieu Desnoyers errno = ret;
542c960e990SMathieu Desnoyers perror("pthread_join");
543c960e990SMathieu Desnoyers abort();
544c960e990SMathieu Desnoyers }
545c960e990SMathieu Desnoyers }
546c960e990SMathieu Desnoyers
547c960e990SMathieu Desnoyers sum = 0;
548c960e990SMathieu Desnoyers for (i = 0; i < CPU_SETSIZE; i++)
549c960e990SMathieu Desnoyers sum += data.c[i].count;
550c960e990SMathieu Desnoyers
551c960e990SMathieu Desnoyers assert(sum == (uint64_t)opt_reps * num_threads);
552c960e990SMathieu Desnoyers }
553c960e990SMathieu Desnoyers
test_percpu_inc_thread(void * arg)554c960e990SMathieu Desnoyers void *test_percpu_inc_thread(void *arg)
555c960e990SMathieu Desnoyers {
556c960e990SMathieu Desnoyers struct inc_thread_test_data *thread_data = arg;
557c960e990SMathieu Desnoyers struct inc_test_data *data = thread_data->data;
558c960e990SMathieu Desnoyers long long i, reps;
559c960e990SMathieu Desnoyers
560c960e990SMathieu Desnoyers if (!opt_disable_rseq && thread_data->reg &&
561c960e990SMathieu Desnoyers rseq_register_current_thread())
562c960e990SMathieu Desnoyers abort();
563c960e990SMathieu Desnoyers reps = thread_data->reps;
564c960e990SMathieu Desnoyers for (i = 0; i < reps; i++) {
565c960e990SMathieu Desnoyers int ret;
566c960e990SMathieu Desnoyers
567c960e990SMathieu Desnoyers do {
568c960e990SMathieu Desnoyers int cpu;
569c960e990SMathieu Desnoyers
570ee31fff0SMathieu Desnoyers cpu = get_current_cpu_id();
571ee31fff0SMathieu Desnoyers ret = rseq_addv(RSEQ_MO_RELAXED, RSEQ_PERCPU,
572ee31fff0SMathieu Desnoyers &data->c[cpu].count, 1, cpu);
573c960e990SMathieu Desnoyers } while (rseq_unlikely(ret));
574c960e990SMathieu Desnoyers #ifndef BENCHMARK
575c960e990SMathieu Desnoyers if (i != 0 && !(i % (reps / 10)))
5768df34c56SMathieu Desnoyers printf_verbose("tid %d: count %lld\n",
5778df34c56SMathieu Desnoyers (int) rseq_gettid(), i);
578c960e990SMathieu Desnoyers #endif
579c960e990SMathieu Desnoyers }
580c960e990SMathieu Desnoyers printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
5818df34c56SMathieu Desnoyers (int) rseq_gettid(), nr_abort, signals_delivered);
582c960e990SMathieu Desnoyers if (!opt_disable_rseq && thread_data->reg &&
583c960e990SMathieu Desnoyers rseq_unregister_current_thread())
584c960e990SMathieu Desnoyers abort();
585c960e990SMathieu Desnoyers return NULL;
586c960e990SMathieu Desnoyers }
587c960e990SMathieu Desnoyers
test_percpu_inc(void)588c960e990SMathieu Desnoyers void test_percpu_inc(void)
589c960e990SMathieu Desnoyers {
590c960e990SMathieu Desnoyers const int num_threads = opt_threads;
591c960e990SMathieu Desnoyers int i, ret;
592c960e990SMathieu Desnoyers uint64_t sum;
593c960e990SMathieu Desnoyers pthread_t test_threads[num_threads];
594c960e990SMathieu Desnoyers struct inc_test_data data;
595c960e990SMathieu Desnoyers struct inc_thread_test_data thread_data[num_threads];
596c960e990SMathieu Desnoyers
597c960e990SMathieu Desnoyers memset(&data, 0, sizeof(data));
598c960e990SMathieu Desnoyers for (i = 0; i < num_threads; i++) {
599c960e990SMathieu Desnoyers thread_data[i].reps = opt_reps;
600c960e990SMathieu Desnoyers if (opt_disable_mod <= 0 || (i % opt_disable_mod))
601c960e990SMathieu Desnoyers thread_data[i].reg = 1;
602c960e990SMathieu Desnoyers else
603c960e990SMathieu Desnoyers thread_data[i].reg = 0;
604c960e990SMathieu Desnoyers thread_data[i].data = &data;
605c960e990SMathieu Desnoyers ret = pthread_create(&test_threads[i], NULL,
606c960e990SMathieu Desnoyers test_percpu_inc_thread,
607c960e990SMathieu Desnoyers &thread_data[i]);
608c960e990SMathieu Desnoyers if (ret) {
609c960e990SMathieu Desnoyers errno = ret;
610c960e990SMathieu Desnoyers perror("pthread_create");
611c960e990SMathieu Desnoyers abort();
612c960e990SMathieu Desnoyers }
613c960e990SMathieu Desnoyers }
614c960e990SMathieu Desnoyers
615c960e990SMathieu Desnoyers for (i = 0; i < num_threads; i++) {
616c960e990SMathieu Desnoyers ret = pthread_join(test_threads[i], NULL);
617c960e990SMathieu Desnoyers if (ret) {
618c960e990SMathieu Desnoyers errno = ret;
619c960e990SMathieu Desnoyers perror("pthread_join");
620c960e990SMathieu Desnoyers abort();
621c960e990SMathieu Desnoyers }
622c960e990SMathieu Desnoyers }
623c960e990SMathieu Desnoyers
624c960e990SMathieu Desnoyers sum = 0;
625c960e990SMathieu Desnoyers for (i = 0; i < CPU_SETSIZE; i++)
626c960e990SMathieu Desnoyers sum += data.c[i].count;
627c960e990SMathieu Desnoyers
628c960e990SMathieu Desnoyers assert(sum == (uint64_t)opt_reps * num_threads);
629c960e990SMathieu Desnoyers }
630c960e990SMathieu Desnoyers
this_cpu_list_push(struct percpu_list * list,struct percpu_list_node * node,int * _cpu)631c960e990SMathieu Desnoyers void this_cpu_list_push(struct percpu_list *list,
632c960e990SMathieu Desnoyers struct percpu_list_node *node,
633c960e990SMathieu Desnoyers int *_cpu)
634c960e990SMathieu Desnoyers {
635c960e990SMathieu Desnoyers int cpu;
636c960e990SMathieu Desnoyers
637c960e990SMathieu Desnoyers for (;;) {
638c960e990SMathieu Desnoyers intptr_t *targetptr, newval, expect;
639c960e990SMathieu Desnoyers int ret;
640c960e990SMathieu Desnoyers
641ee31fff0SMathieu Desnoyers cpu = get_current_cpu_id();
642c960e990SMathieu Desnoyers /* Load list->c[cpu].head with single-copy atomicity. */
643c960e990SMathieu Desnoyers expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head);
644c960e990SMathieu Desnoyers newval = (intptr_t)node;
645c960e990SMathieu Desnoyers targetptr = (intptr_t *)&list->c[cpu].head;
646c960e990SMathieu Desnoyers node->next = (struct percpu_list_node *)expect;
647ee31fff0SMathieu Desnoyers ret = rseq_cmpeqv_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU,
648ee31fff0SMathieu Desnoyers targetptr, expect, newval, cpu);
649c960e990SMathieu Desnoyers if (rseq_likely(!ret))
650c960e990SMathieu Desnoyers break;
651c960e990SMathieu Desnoyers /* Retry if comparison fails or rseq aborts. */
652c960e990SMathieu Desnoyers }
653c960e990SMathieu Desnoyers if (_cpu)
654c960e990SMathieu Desnoyers *_cpu = cpu;
655c960e990SMathieu Desnoyers }
656c960e990SMathieu Desnoyers
657c960e990SMathieu Desnoyers /*
658c960e990SMathieu Desnoyers * Unlike a traditional lock-less linked list; the availability of a
659c960e990SMathieu Desnoyers * rseq primitive allows us to implement pop without concerns over
660c960e990SMathieu Desnoyers * ABA-type races.
661c960e990SMathieu Desnoyers */
this_cpu_list_pop(struct percpu_list * list,int * _cpu)662c960e990SMathieu Desnoyers struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list,
663c960e990SMathieu Desnoyers int *_cpu)
664c960e990SMathieu Desnoyers {
665c960e990SMathieu Desnoyers struct percpu_list_node *node = NULL;
666c960e990SMathieu Desnoyers int cpu;
667c960e990SMathieu Desnoyers
668c960e990SMathieu Desnoyers for (;;) {
669c960e990SMathieu Desnoyers struct percpu_list_node *head;
670c960e990SMathieu Desnoyers intptr_t *targetptr, expectnot, *load;
67126dc8a6dSMathieu Desnoyers long offset;
672c960e990SMathieu Desnoyers int ret;
673c960e990SMathieu Desnoyers
674ee31fff0SMathieu Desnoyers cpu = get_current_cpu_id();
675c960e990SMathieu Desnoyers targetptr = (intptr_t *)&list->c[cpu].head;
676c960e990SMathieu Desnoyers expectnot = (intptr_t)NULL;
677c960e990SMathieu Desnoyers offset = offsetof(struct percpu_list_node, next);
678c960e990SMathieu Desnoyers load = (intptr_t *)&head;
679ee31fff0SMathieu Desnoyers ret = rseq_cmpnev_storeoffp_load(RSEQ_MO_RELAXED, RSEQ_PERCPU,
680ee31fff0SMathieu Desnoyers targetptr, expectnot,
681c960e990SMathieu Desnoyers offset, load, cpu);
682c960e990SMathieu Desnoyers if (rseq_likely(!ret)) {
683c960e990SMathieu Desnoyers node = head;
684c960e990SMathieu Desnoyers break;
685c960e990SMathieu Desnoyers }
686c960e990SMathieu Desnoyers if (ret > 0)
687c960e990SMathieu Desnoyers break;
688c960e990SMathieu Desnoyers /* Retry if rseq aborts. */
689c960e990SMathieu Desnoyers }
690c960e990SMathieu Desnoyers if (_cpu)
691c960e990SMathieu Desnoyers *_cpu = cpu;
692c960e990SMathieu Desnoyers return node;
693c960e990SMathieu Desnoyers }
694c960e990SMathieu Desnoyers
695c960e990SMathieu Desnoyers /*
696c960e990SMathieu Desnoyers * __percpu_list_pop is not safe against concurrent accesses. Should
697c960e990SMathieu Desnoyers * only be used on lists that are not concurrently modified.
698c960e990SMathieu Desnoyers */
__percpu_list_pop(struct percpu_list * list,int cpu)699c960e990SMathieu Desnoyers struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu)
700c960e990SMathieu Desnoyers {
701c960e990SMathieu Desnoyers struct percpu_list_node *node;
702c960e990SMathieu Desnoyers
703c960e990SMathieu Desnoyers node = list->c[cpu].head;
704c960e990SMathieu Desnoyers if (!node)
705c960e990SMathieu Desnoyers return NULL;
706c960e990SMathieu Desnoyers list->c[cpu].head = node->next;
707c960e990SMathieu Desnoyers return node;
708c960e990SMathieu Desnoyers }
709c960e990SMathieu Desnoyers
test_percpu_list_thread(void * arg)710c960e990SMathieu Desnoyers void *test_percpu_list_thread(void *arg)
711c960e990SMathieu Desnoyers {
712c960e990SMathieu Desnoyers long long i, reps;
713c960e990SMathieu Desnoyers struct percpu_list *list = (struct percpu_list *)arg;
714c960e990SMathieu Desnoyers
715c960e990SMathieu Desnoyers if (!opt_disable_rseq && rseq_register_current_thread())
716c960e990SMathieu Desnoyers abort();
717c960e990SMathieu Desnoyers
718c960e990SMathieu Desnoyers reps = opt_reps;
719c960e990SMathieu Desnoyers for (i = 0; i < reps; i++) {
720c960e990SMathieu Desnoyers struct percpu_list_node *node;
721c960e990SMathieu Desnoyers
722c960e990SMathieu Desnoyers node = this_cpu_list_pop(list, NULL);
723c960e990SMathieu Desnoyers if (opt_yield)
724c960e990SMathieu Desnoyers sched_yield(); /* encourage shuffling */
725c960e990SMathieu Desnoyers if (node)
726c960e990SMathieu Desnoyers this_cpu_list_push(list, node, NULL);
727c960e990SMathieu Desnoyers }
728c960e990SMathieu Desnoyers
729c960e990SMathieu Desnoyers printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
7308df34c56SMathieu Desnoyers (int) rseq_gettid(), nr_abort, signals_delivered);
731c960e990SMathieu Desnoyers if (!opt_disable_rseq && rseq_unregister_current_thread())
732c960e990SMathieu Desnoyers abort();
733c960e990SMathieu Desnoyers
734c960e990SMathieu Desnoyers return NULL;
735c960e990SMathieu Desnoyers }
736c960e990SMathieu Desnoyers
737c960e990SMathieu Desnoyers /* Simultaneous modification to a per-cpu linked list from many threads. */
test_percpu_list(void)738c960e990SMathieu Desnoyers void test_percpu_list(void)
739c960e990SMathieu Desnoyers {
740c960e990SMathieu Desnoyers const int num_threads = opt_threads;
741c960e990SMathieu Desnoyers int i, j, ret;
742c960e990SMathieu Desnoyers uint64_t sum = 0, expected_sum = 0;
743c960e990SMathieu Desnoyers struct percpu_list list;
744c960e990SMathieu Desnoyers pthread_t test_threads[num_threads];
745c960e990SMathieu Desnoyers cpu_set_t allowed_cpus;
746c960e990SMathieu Desnoyers
747c960e990SMathieu Desnoyers memset(&list, 0, sizeof(list));
748c960e990SMathieu Desnoyers
749c960e990SMathieu Desnoyers /* Generate list entries for every usable cpu. */
750c960e990SMathieu Desnoyers sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
751c960e990SMathieu Desnoyers for (i = 0; i < CPU_SETSIZE; i++) {
752d53271c0SMathieu Desnoyers if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
753c960e990SMathieu Desnoyers continue;
754c960e990SMathieu Desnoyers for (j = 1; j <= 100; j++) {
755c960e990SMathieu Desnoyers struct percpu_list_node *node;
756c960e990SMathieu Desnoyers
757c960e990SMathieu Desnoyers expected_sum += j;
758c960e990SMathieu Desnoyers
759c960e990SMathieu Desnoyers node = malloc(sizeof(*node));
760c960e990SMathieu Desnoyers assert(node);
761c960e990SMathieu Desnoyers node->data = j;
762c960e990SMathieu Desnoyers node->next = list.c[i].head;
763c960e990SMathieu Desnoyers list.c[i].head = node;
764c960e990SMathieu Desnoyers }
765c960e990SMathieu Desnoyers }
766c960e990SMathieu Desnoyers
767c960e990SMathieu Desnoyers for (i = 0; i < num_threads; i++) {
768c960e990SMathieu Desnoyers ret = pthread_create(&test_threads[i], NULL,
769c960e990SMathieu Desnoyers test_percpu_list_thread, &list);
770c960e990SMathieu Desnoyers if (ret) {
771c960e990SMathieu Desnoyers errno = ret;
772c960e990SMathieu Desnoyers perror("pthread_create");
773c960e990SMathieu Desnoyers abort();
774c960e990SMathieu Desnoyers }
775c960e990SMathieu Desnoyers }
776c960e990SMathieu Desnoyers
777c960e990SMathieu Desnoyers for (i = 0; i < num_threads; i++) {
778c960e990SMathieu Desnoyers ret = pthread_join(test_threads[i], NULL);
779c960e990SMathieu Desnoyers if (ret) {
780c960e990SMathieu Desnoyers errno = ret;
781c960e990SMathieu Desnoyers perror("pthread_join");
782c960e990SMathieu Desnoyers abort();
783c960e990SMathieu Desnoyers }
784c960e990SMathieu Desnoyers }
785c960e990SMathieu Desnoyers
786c960e990SMathieu Desnoyers for (i = 0; i < CPU_SETSIZE; i++) {
787c960e990SMathieu Desnoyers struct percpu_list_node *node;
788c960e990SMathieu Desnoyers
789d53271c0SMathieu Desnoyers if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
790c960e990SMathieu Desnoyers continue;
791c960e990SMathieu Desnoyers
792c960e990SMathieu Desnoyers while ((node = __percpu_list_pop(&list, i))) {
793c960e990SMathieu Desnoyers sum += node->data;
794c960e990SMathieu Desnoyers free(node);
795c960e990SMathieu Desnoyers }
796c960e990SMathieu Desnoyers }
797c960e990SMathieu Desnoyers
798c960e990SMathieu Desnoyers /*
799c960e990SMathieu Desnoyers * All entries should now be accounted for (unless some external
800c960e990SMathieu Desnoyers * actor is interfering with our allowed affinity while this
801c960e990SMathieu Desnoyers * test is running).
802c960e990SMathieu Desnoyers */
803c960e990SMathieu Desnoyers assert(sum == expected_sum);
804c960e990SMathieu Desnoyers }
805c960e990SMathieu Desnoyers
this_cpu_buffer_push(struct percpu_buffer * buffer,struct percpu_buffer_node * node,int * _cpu)806c960e990SMathieu Desnoyers bool this_cpu_buffer_push(struct percpu_buffer *buffer,
807c960e990SMathieu Desnoyers struct percpu_buffer_node *node,
808c960e990SMathieu Desnoyers int *_cpu)
809c960e990SMathieu Desnoyers {
810c960e990SMathieu Desnoyers bool result = false;
811c960e990SMathieu Desnoyers int cpu;
812c960e990SMathieu Desnoyers
813c960e990SMathieu Desnoyers for (;;) {
814c960e990SMathieu Desnoyers intptr_t *targetptr_spec, newval_spec;
815c960e990SMathieu Desnoyers intptr_t *targetptr_final, newval_final;
816c960e990SMathieu Desnoyers intptr_t offset;
817c960e990SMathieu Desnoyers int ret;
818c960e990SMathieu Desnoyers
819ee31fff0SMathieu Desnoyers cpu = get_current_cpu_id();
820c960e990SMathieu Desnoyers offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
821c960e990SMathieu Desnoyers if (offset == buffer->c[cpu].buflen)
822c960e990SMathieu Desnoyers break;
823c960e990SMathieu Desnoyers newval_spec = (intptr_t)node;
824c960e990SMathieu Desnoyers targetptr_spec = (intptr_t *)&buffer->c[cpu].array[offset];
825c960e990SMathieu Desnoyers newval_final = offset + 1;
826c960e990SMathieu Desnoyers targetptr_final = &buffer->c[cpu].offset;
827ee31fff0SMathieu Desnoyers ret = rseq_cmpeqv_trystorev_storev(opt_mo, RSEQ_PERCPU,
828c960e990SMathieu Desnoyers targetptr_final, offset, targetptr_spec,
829c960e990SMathieu Desnoyers newval_spec, newval_final, cpu);
830c960e990SMathieu Desnoyers if (rseq_likely(!ret)) {
831c960e990SMathieu Desnoyers result = true;
832c960e990SMathieu Desnoyers break;
833c960e990SMathieu Desnoyers }
834c960e990SMathieu Desnoyers /* Retry if comparison fails or rseq aborts. */
835c960e990SMathieu Desnoyers }
836c960e990SMathieu Desnoyers if (_cpu)
837c960e990SMathieu Desnoyers *_cpu = cpu;
838c960e990SMathieu Desnoyers return result;
839c960e990SMathieu Desnoyers }
840c960e990SMathieu Desnoyers
this_cpu_buffer_pop(struct percpu_buffer * buffer,int * _cpu)841c960e990SMathieu Desnoyers struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer *buffer,
842c960e990SMathieu Desnoyers int *_cpu)
843c960e990SMathieu Desnoyers {
844c960e990SMathieu Desnoyers struct percpu_buffer_node *head;
845c960e990SMathieu Desnoyers int cpu;
846c960e990SMathieu Desnoyers
847c960e990SMathieu Desnoyers for (;;) {
848c960e990SMathieu Desnoyers intptr_t *targetptr, newval;
849c960e990SMathieu Desnoyers intptr_t offset;
850c960e990SMathieu Desnoyers int ret;
851c960e990SMathieu Desnoyers
852ee31fff0SMathieu Desnoyers cpu = get_current_cpu_id();
853c960e990SMathieu Desnoyers /* Load offset with single-copy atomicity. */
854c960e990SMathieu Desnoyers offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
855c960e990SMathieu Desnoyers if (offset == 0) {
856c960e990SMathieu Desnoyers head = NULL;
857c960e990SMathieu Desnoyers break;
858c960e990SMathieu Desnoyers }
859c960e990SMathieu Desnoyers head = RSEQ_READ_ONCE(buffer->c[cpu].array[offset - 1]);
860c960e990SMathieu Desnoyers newval = offset - 1;
861c960e990SMathieu Desnoyers targetptr = (intptr_t *)&buffer->c[cpu].offset;
862ee31fff0SMathieu Desnoyers ret = rseq_cmpeqv_cmpeqv_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU,
863ee31fff0SMathieu Desnoyers targetptr, offset,
864c960e990SMathieu Desnoyers (intptr_t *)&buffer->c[cpu].array[offset - 1],
865c960e990SMathieu Desnoyers (intptr_t)head, newval, cpu);
866c960e990SMathieu Desnoyers if (rseq_likely(!ret))
867c960e990SMathieu Desnoyers break;
868c960e990SMathieu Desnoyers /* Retry if comparison fails or rseq aborts. */
869c960e990SMathieu Desnoyers }
870c960e990SMathieu Desnoyers if (_cpu)
871c960e990SMathieu Desnoyers *_cpu = cpu;
872c960e990SMathieu Desnoyers return head;
873c960e990SMathieu Desnoyers }
874c960e990SMathieu Desnoyers
875c960e990SMathieu Desnoyers /*
876c960e990SMathieu Desnoyers * __percpu_buffer_pop is not safe against concurrent accesses. Should
877c960e990SMathieu Desnoyers * only be used on buffers that are not concurrently modified.
878c960e990SMathieu Desnoyers */
__percpu_buffer_pop(struct percpu_buffer * buffer,int cpu)879c960e990SMathieu Desnoyers struct percpu_buffer_node *__percpu_buffer_pop(struct percpu_buffer *buffer,
880c960e990SMathieu Desnoyers int cpu)
881c960e990SMathieu Desnoyers {
882c960e990SMathieu Desnoyers struct percpu_buffer_node *head;
883c960e990SMathieu Desnoyers intptr_t offset;
884c960e990SMathieu Desnoyers
885c960e990SMathieu Desnoyers offset = buffer->c[cpu].offset;
886c960e990SMathieu Desnoyers if (offset == 0)
887c960e990SMathieu Desnoyers return NULL;
888c960e990SMathieu Desnoyers head = buffer->c[cpu].array[offset - 1];
889c960e990SMathieu Desnoyers buffer->c[cpu].offset = offset - 1;
890c960e990SMathieu Desnoyers return head;
891c960e990SMathieu Desnoyers }
892c960e990SMathieu Desnoyers
test_percpu_buffer_thread(void * arg)893c960e990SMathieu Desnoyers void *test_percpu_buffer_thread(void *arg)
894c960e990SMathieu Desnoyers {
895c960e990SMathieu Desnoyers long long i, reps;
896c960e990SMathieu Desnoyers struct percpu_buffer *buffer = (struct percpu_buffer *)arg;
897c960e990SMathieu Desnoyers
898c960e990SMathieu Desnoyers if (!opt_disable_rseq && rseq_register_current_thread())
899c960e990SMathieu Desnoyers abort();
900c960e990SMathieu Desnoyers
901c960e990SMathieu Desnoyers reps = opt_reps;
902c960e990SMathieu Desnoyers for (i = 0; i < reps; i++) {
903c960e990SMathieu Desnoyers struct percpu_buffer_node *node;
904c960e990SMathieu Desnoyers
905c960e990SMathieu Desnoyers node = this_cpu_buffer_pop(buffer, NULL);
906c960e990SMathieu Desnoyers if (opt_yield)
907c960e990SMathieu Desnoyers sched_yield(); /* encourage shuffling */
908c960e990SMathieu Desnoyers if (node) {
909c960e990SMathieu Desnoyers if (!this_cpu_buffer_push(buffer, node, NULL)) {
910c960e990SMathieu Desnoyers /* Should increase buffer size. */
911c960e990SMathieu Desnoyers abort();
912c960e990SMathieu Desnoyers }
913c960e990SMathieu Desnoyers }
914c960e990SMathieu Desnoyers }
915c960e990SMathieu Desnoyers
916c960e990SMathieu Desnoyers printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
9178df34c56SMathieu Desnoyers (int) rseq_gettid(), nr_abort, signals_delivered);
918c960e990SMathieu Desnoyers if (!opt_disable_rseq && rseq_unregister_current_thread())
919c960e990SMathieu Desnoyers abort();
920c960e990SMathieu Desnoyers
921c960e990SMathieu Desnoyers return NULL;
922c960e990SMathieu Desnoyers }
923c960e990SMathieu Desnoyers
924c960e990SMathieu Desnoyers /* Simultaneous modification to a per-cpu buffer from many threads. */
test_percpu_buffer(void)925c960e990SMathieu Desnoyers void test_percpu_buffer(void)
926c960e990SMathieu Desnoyers {
927c960e990SMathieu Desnoyers const int num_threads = opt_threads;
928c960e990SMathieu Desnoyers int i, j, ret;
929c960e990SMathieu Desnoyers uint64_t sum = 0, expected_sum = 0;
930c960e990SMathieu Desnoyers struct percpu_buffer buffer;
931c960e990SMathieu Desnoyers pthread_t test_threads[num_threads];
932c960e990SMathieu Desnoyers cpu_set_t allowed_cpus;
933c960e990SMathieu Desnoyers
934c960e990SMathieu Desnoyers memset(&buffer, 0, sizeof(buffer));
935c960e990SMathieu Desnoyers
936c960e990SMathieu Desnoyers /* Generate list entries for every usable cpu. */
937c960e990SMathieu Desnoyers sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
938c960e990SMathieu Desnoyers for (i = 0; i < CPU_SETSIZE; i++) {
939d53271c0SMathieu Desnoyers if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
940c960e990SMathieu Desnoyers continue;
941c960e990SMathieu Desnoyers /* Worse-case is every item in same CPU. */
942c960e990SMathieu Desnoyers buffer.c[i].array =
943c960e990SMathieu Desnoyers malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
944c960e990SMathieu Desnoyers BUFFER_ITEM_PER_CPU);
945c960e990SMathieu Desnoyers assert(buffer.c[i].array);
946c960e990SMathieu Desnoyers buffer.c[i].buflen = CPU_SETSIZE * BUFFER_ITEM_PER_CPU;
947c960e990SMathieu Desnoyers for (j = 1; j <= BUFFER_ITEM_PER_CPU; j++) {
948c960e990SMathieu Desnoyers struct percpu_buffer_node *node;
949c960e990SMathieu Desnoyers
950c960e990SMathieu Desnoyers expected_sum += j;
951c960e990SMathieu Desnoyers
952c960e990SMathieu Desnoyers /*
953c960e990SMathieu Desnoyers * We could theoretically put the word-sized
954c960e990SMathieu Desnoyers * "data" directly in the buffer. However, we
955c960e990SMathieu Desnoyers * want to model objects that would not fit
956c960e990SMathieu Desnoyers * within a single word, so allocate an object
957c960e990SMathieu Desnoyers * for each node.
958c960e990SMathieu Desnoyers */
959c960e990SMathieu Desnoyers node = malloc(sizeof(*node));
960c960e990SMathieu Desnoyers assert(node);
961c960e990SMathieu Desnoyers node->data = j;
962c960e990SMathieu Desnoyers buffer.c[i].array[j - 1] = node;
963c960e990SMathieu Desnoyers buffer.c[i].offset++;
964c960e990SMathieu Desnoyers }
965c960e990SMathieu Desnoyers }
966c960e990SMathieu Desnoyers
967c960e990SMathieu Desnoyers for (i = 0; i < num_threads; i++) {
968c960e990SMathieu Desnoyers ret = pthread_create(&test_threads[i], NULL,
969c960e990SMathieu Desnoyers test_percpu_buffer_thread, &buffer);
970c960e990SMathieu Desnoyers if (ret) {
971c960e990SMathieu Desnoyers errno = ret;
972c960e990SMathieu Desnoyers perror("pthread_create");
973c960e990SMathieu Desnoyers abort();
974c960e990SMathieu Desnoyers }
975c960e990SMathieu Desnoyers }
976c960e990SMathieu Desnoyers
977c960e990SMathieu Desnoyers for (i = 0; i < num_threads; i++) {
978c960e990SMathieu Desnoyers ret = pthread_join(test_threads[i], NULL);
979c960e990SMathieu Desnoyers if (ret) {
980c960e990SMathieu Desnoyers errno = ret;
981c960e990SMathieu Desnoyers perror("pthread_join");
982c960e990SMathieu Desnoyers abort();
983c960e990SMathieu Desnoyers }
984c960e990SMathieu Desnoyers }
985c960e990SMathieu Desnoyers
986c960e990SMathieu Desnoyers for (i = 0; i < CPU_SETSIZE; i++) {
987c960e990SMathieu Desnoyers struct percpu_buffer_node *node;
988c960e990SMathieu Desnoyers
989d53271c0SMathieu Desnoyers if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
990c960e990SMathieu Desnoyers continue;
991c960e990SMathieu Desnoyers
992c960e990SMathieu Desnoyers while ((node = __percpu_buffer_pop(&buffer, i))) {
993c960e990SMathieu Desnoyers sum += node->data;
994c960e990SMathieu Desnoyers free(node);
995c960e990SMathieu Desnoyers }
996c960e990SMathieu Desnoyers free(buffer.c[i].array);
997c960e990SMathieu Desnoyers }
998c960e990SMathieu Desnoyers
999c960e990SMathieu Desnoyers /*
1000c960e990SMathieu Desnoyers * All entries should now be accounted for (unless some external
1001c960e990SMathieu Desnoyers * actor is interfering with our allowed affinity while this
1002c960e990SMathieu Desnoyers * test is running).
1003c960e990SMathieu Desnoyers */
1004c960e990SMathieu Desnoyers assert(sum == expected_sum);
1005c960e990SMathieu Desnoyers }
1006c960e990SMathieu Desnoyers
this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer * buffer,struct percpu_memcpy_buffer_node item,int * _cpu)1007c960e990SMathieu Desnoyers bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer *buffer,
1008c960e990SMathieu Desnoyers struct percpu_memcpy_buffer_node item,
1009c960e990SMathieu Desnoyers int *_cpu)
1010c960e990SMathieu Desnoyers {
1011c960e990SMathieu Desnoyers bool result = false;
1012c960e990SMathieu Desnoyers int cpu;
1013c960e990SMathieu Desnoyers
1014c960e990SMathieu Desnoyers for (;;) {
1015c960e990SMathieu Desnoyers intptr_t *targetptr_final, newval_final, offset;
1016c960e990SMathieu Desnoyers char *destptr, *srcptr;
1017c960e990SMathieu Desnoyers size_t copylen;
1018c960e990SMathieu Desnoyers int ret;
1019c960e990SMathieu Desnoyers
1020ee31fff0SMathieu Desnoyers cpu = get_current_cpu_id();
1021c960e990SMathieu Desnoyers /* Load offset with single-copy atomicity. */
1022c960e990SMathieu Desnoyers offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
1023c960e990SMathieu Desnoyers if (offset == buffer->c[cpu].buflen)
1024c960e990SMathieu Desnoyers break;
1025c960e990SMathieu Desnoyers destptr = (char *)&buffer->c[cpu].array[offset];
1026c960e990SMathieu Desnoyers srcptr = (char *)&item;
1027c960e990SMathieu Desnoyers /* copylen must be <= 4kB. */
1028c960e990SMathieu Desnoyers copylen = sizeof(item);
1029c960e990SMathieu Desnoyers newval_final = offset + 1;
1030c960e990SMathieu Desnoyers targetptr_final = &buffer->c[cpu].offset;
1031ee31fff0SMathieu Desnoyers ret = rseq_cmpeqv_trymemcpy_storev(
1032ee31fff0SMathieu Desnoyers opt_mo, RSEQ_PERCPU,
1033c960e990SMathieu Desnoyers targetptr_final, offset,
1034c960e990SMathieu Desnoyers destptr, srcptr, copylen,
1035c960e990SMathieu Desnoyers newval_final, cpu);
1036c960e990SMathieu Desnoyers if (rseq_likely(!ret)) {
1037c960e990SMathieu Desnoyers result = true;
1038c960e990SMathieu Desnoyers break;
1039c960e990SMathieu Desnoyers }
1040c960e990SMathieu Desnoyers /* Retry if comparison fails or rseq aborts. */
1041c960e990SMathieu Desnoyers }
1042c960e990SMathieu Desnoyers if (_cpu)
1043c960e990SMathieu Desnoyers *_cpu = cpu;
1044c960e990SMathieu Desnoyers return result;
1045c960e990SMathieu Desnoyers }
1046c960e990SMathieu Desnoyers
this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer * buffer,struct percpu_memcpy_buffer_node * item,int * _cpu)1047c960e990SMathieu Desnoyers bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
1048c960e990SMathieu Desnoyers struct percpu_memcpy_buffer_node *item,
1049c960e990SMathieu Desnoyers int *_cpu)
1050c960e990SMathieu Desnoyers {
1051c960e990SMathieu Desnoyers bool result = false;
1052c960e990SMathieu Desnoyers int cpu;
1053c960e990SMathieu Desnoyers
1054c960e990SMathieu Desnoyers for (;;) {
1055c960e990SMathieu Desnoyers intptr_t *targetptr_final, newval_final, offset;
1056c960e990SMathieu Desnoyers char *destptr, *srcptr;
1057c960e990SMathieu Desnoyers size_t copylen;
1058c960e990SMathieu Desnoyers int ret;
1059c960e990SMathieu Desnoyers
1060ee31fff0SMathieu Desnoyers cpu = get_current_cpu_id();
1061c960e990SMathieu Desnoyers /* Load offset with single-copy atomicity. */
1062c960e990SMathieu Desnoyers offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
1063c960e990SMathieu Desnoyers if (offset == 0)
1064c960e990SMathieu Desnoyers break;
1065c960e990SMathieu Desnoyers destptr = (char *)item;
1066c960e990SMathieu Desnoyers srcptr = (char *)&buffer->c[cpu].array[offset - 1];
1067c960e990SMathieu Desnoyers /* copylen must be <= 4kB. */
1068c960e990SMathieu Desnoyers copylen = sizeof(*item);
1069c960e990SMathieu Desnoyers newval_final = offset - 1;
1070c960e990SMathieu Desnoyers targetptr_final = &buffer->c[cpu].offset;
1071ee31fff0SMathieu Desnoyers ret = rseq_cmpeqv_trymemcpy_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU,
1072ee31fff0SMathieu Desnoyers targetptr_final, offset, destptr, srcptr, copylen,
1073c960e990SMathieu Desnoyers newval_final, cpu);
1074c960e990SMathieu Desnoyers if (rseq_likely(!ret)) {
1075c960e990SMathieu Desnoyers result = true;
1076c960e990SMathieu Desnoyers break;
1077c960e990SMathieu Desnoyers }
1078c960e990SMathieu Desnoyers /* Retry if comparison fails or rseq aborts. */
1079c960e990SMathieu Desnoyers }
1080c960e990SMathieu Desnoyers if (_cpu)
1081c960e990SMathieu Desnoyers *_cpu = cpu;
1082c960e990SMathieu Desnoyers return result;
1083c960e990SMathieu Desnoyers }
1084c960e990SMathieu Desnoyers
1085c960e990SMathieu Desnoyers /*
1086c960e990SMathieu Desnoyers * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should
1087c960e990SMathieu Desnoyers * only be used on buffers that are not concurrently modified.
1088c960e990SMathieu Desnoyers */
__percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer * buffer,struct percpu_memcpy_buffer_node * item,int cpu)1089c960e990SMathieu Desnoyers bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
1090c960e990SMathieu Desnoyers struct percpu_memcpy_buffer_node *item,
1091c960e990SMathieu Desnoyers int cpu)
1092c960e990SMathieu Desnoyers {
1093c960e990SMathieu Desnoyers intptr_t offset;
1094c960e990SMathieu Desnoyers
1095c960e990SMathieu Desnoyers offset = buffer->c[cpu].offset;
1096c960e990SMathieu Desnoyers if (offset == 0)
1097c960e990SMathieu Desnoyers return false;
1098c960e990SMathieu Desnoyers memcpy(item, &buffer->c[cpu].array[offset - 1], sizeof(*item));
1099c960e990SMathieu Desnoyers buffer->c[cpu].offset = offset - 1;
1100c960e990SMathieu Desnoyers return true;
1101c960e990SMathieu Desnoyers }
1102c960e990SMathieu Desnoyers
test_percpu_memcpy_buffer_thread(void * arg)1103c960e990SMathieu Desnoyers void *test_percpu_memcpy_buffer_thread(void *arg)
1104c960e990SMathieu Desnoyers {
1105c960e990SMathieu Desnoyers long long i, reps;
1106c960e990SMathieu Desnoyers struct percpu_memcpy_buffer *buffer = (struct percpu_memcpy_buffer *)arg;
1107c960e990SMathieu Desnoyers
1108c960e990SMathieu Desnoyers if (!opt_disable_rseq && rseq_register_current_thread())
1109c960e990SMathieu Desnoyers abort();
1110c960e990SMathieu Desnoyers
1111c960e990SMathieu Desnoyers reps = opt_reps;
1112c960e990SMathieu Desnoyers for (i = 0; i < reps; i++) {
1113c960e990SMathieu Desnoyers struct percpu_memcpy_buffer_node item;
1114c960e990SMathieu Desnoyers bool result;
1115c960e990SMathieu Desnoyers
1116c960e990SMathieu Desnoyers result = this_cpu_memcpy_buffer_pop(buffer, &item, NULL);
1117c960e990SMathieu Desnoyers if (opt_yield)
1118c960e990SMathieu Desnoyers sched_yield(); /* encourage shuffling */
1119c960e990SMathieu Desnoyers if (result) {
1120c960e990SMathieu Desnoyers if (!this_cpu_memcpy_buffer_push(buffer, item, NULL)) {
1121c960e990SMathieu Desnoyers /* Should increase buffer size. */
1122c960e990SMathieu Desnoyers abort();
1123c960e990SMathieu Desnoyers }
1124c960e990SMathieu Desnoyers }
1125c960e990SMathieu Desnoyers }
1126c960e990SMathieu Desnoyers
1127c960e990SMathieu Desnoyers printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
11288df34c56SMathieu Desnoyers (int) rseq_gettid(), nr_abort, signals_delivered);
1129c960e990SMathieu Desnoyers if (!opt_disable_rseq && rseq_unregister_current_thread())
1130c960e990SMathieu Desnoyers abort();
1131c960e990SMathieu Desnoyers
1132c960e990SMathieu Desnoyers return NULL;
1133c960e990SMathieu Desnoyers }
1134c960e990SMathieu Desnoyers
1135c960e990SMathieu Desnoyers /* Simultaneous modification to a per-cpu buffer from many threads. */
test_percpu_memcpy_buffer(void)1136c960e990SMathieu Desnoyers void test_percpu_memcpy_buffer(void)
1137c960e990SMathieu Desnoyers {
1138c960e990SMathieu Desnoyers const int num_threads = opt_threads;
1139c960e990SMathieu Desnoyers int i, j, ret;
1140c960e990SMathieu Desnoyers uint64_t sum = 0, expected_sum = 0;
1141c960e990SMathieu Desnoyers struct percpu_memcpy_buffer buffer;
1142c960e990SMathieu Desnoyers pthread_t test_threads[num_threads];
1143c960e990SMathieu Desnoyers cpu_set_t allowed_cpus;
1144c960e990SMathieu Desnoyers
1145c960e990SMathieu Desnoyers memset(&buffer, 0, sizeof(buffer));
1146c960e990SMathieu Desnoyers
1147c960e990SMathieu Desnoyers /* Generate list entries for every usable cpu. */
1148c960e990SMathieu Desnoyers sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
1149c960e990SMathieu Desnoyers for (i = 0; i < CPU_SETSIZE; i++) {
1150d53271c0SMathieu Desnoyers if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
1151c960e990SMathieu Desnoyers continue;
1152c960e990SMathieu Desnoyers /* Worse-case is every item in same CPU. */
1153c960e990SMathieu Desnoyers buffer.c[i].array =
1154c960e990SMathieu Desnoyers malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
1155c960e990SMathieu Desnoyers MEMCPY_BUFFER_ITEM_PER_CPU);
1156c960e990SMathieu Desnoyers assert(buffer.c[i].array);
1157c960e990SMathieu Desnoyers buffer.c[i].buflen = CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU;
1158c960e990SMathieu Desnoyers for (j = 1; j <= MEMCPY_BUFFER_ITEM_PER_CPU; j++) {
1159c960e990SMathieu Desnoyers expected_sum += 2 * j + 1;
1160c960e990SMathieu Desnoyers
1161c960e990SMathieu Desnoyers /*
1162c960e990SMathieu Desnoyers * We could theoretically put the word-sized
1163c960e990SMathieu Desnoyers * "data" directly in the buffer. However, we
1164c960e990SMathieu Desnoyers * want to model objects that would not fit
1165c960e990SMathieu Desnoyers * within a single word, so allocate an object
1166c960e990SMathieu Desnoyers * for each node.
1167c960e990SMathieu Desnoyers */
1168c960e990SMathieu Desnoyers buffer.c[i].array[j - 1].data1 = j;
1169c960e990SMathieu Desnoyers buffer.c[i].array[j - 1].data2 = j + 1;
1170c960e990SMathieu Desnoyers buffer.c[i].offset++;
1171c960e990SMathieu Desnoyers }
1172c960e990SMathieu Desnoyers }
1173c960e990SMathieu Desnoyers
1174c960e990SMathieu Desnoyers for (i = 0; i < num_threads; i++) {
1175c960e990SMathieu Desnoyers ret = pthread_create(&test_threads[i], NULL,
1176c960e990SMathieu Desnoyers test_percpu_memcpy_buffer_thread,
1177c960e990SMathieu Desnoyers &buffer);
1178c960e990SMathieu Desnoyers if (ret) {
1179c960e990SMathieu Desnoyers errno = ret;
1180c960e990SMathieu Desnoyers perror("pthread_create");
1181c960e990SMathieu Desnoyers abort();
1182c960e990SMathieu Desnoyers }
1183c960e990SMathieu Desnoyers }
1184c960e990SMathieu Desnoyers
1185c960e990SMathieu Desnoyers for (i = 0; i < num_threads; i++) {
1186c960e990SMathieu Desnoyers ret = pthread_join(test_threads[i], NULL);
1187c960e990SMathieu Desnoyers if (ret) {
1188c960e990SMathieu Desnoyers errno = ret;
1189c960e990SMathieu Desnoyers perror("pthread_join");
1190c960e990SMathieu Desnoyers abort();
1191c960e990SMathieu Desnoyers }
1192c960e990SMathieu Desnoyers }
1193c960e990SMathieu Desnoyers
1194c960e990SMathieu Desnoyers for (i = 0; i < CPU_SETSIZE; i++) {
1195c960e990SMathieu Desnoyers struct percpu_memcpy_buffer_node item;
1196c960e990SMathieu Desnoyers
1197d53271c0SMathieu Desnoyers if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
1198c960e990SMathieu Desnoyers continue;
1199c960e990SMathieu Desnoyers
1200c960e990SMathieu Desnoyers while (__percpu_memcpy_buffer_pop(&buffer, &item, i)) {
1201c960e990SMathieu Desnoyers sum += item.data1;
1202c960e990SMathieu Desnoyers sum += item.data2;
1203c960e990SMathieu Desnoyers }
1204c960e990SMathieu Desnoyers free(buffer.c[i].array);
1205c960e990SMathieu Desnoyers }
1206c960e990SMathieu Desnoyers
1207c960e990SMathieu Desnoyers /*
1208c960e990SMathieu Desnoyers * All entries should now be accounted for (unless some external
1209c960e990SMathieu Desnoyers * actor is interfering with our allowed affinity while this
1210c960e990SMathieu Desnoyers * test is running).
1211c960e990SMathieu Desnoyers */
1212c960e990SMathieu Desnoyers assert(sum == expected_sum);
1213c960e990SMathieu Desnoyers }
1214c960e990SMathieu Desnoyers
test_signal_interrupt_handler(int signo)1215c960e990SMathieu Desnoyers static void test_signal_interrupt_handler(int signo)
1216c960e990SMathieu Desnoyers {
1217c960e990SMathieu Desnoyers signals_delivered++;
1218c960e990SMathieu Desnoyers }
1219c960e990SMathieu Desnoyers
set_signal_handler(void)1220c960e990SMathieu Desnoyers static int set_signal_handler(void)
1221c960e990SMathieu Desnoyers {
1222c960e990SMathieu Desnoyers int ret = 0;
1223c960e990SMathieu Desnoyers struct sigaction sa;
1224c960e990SMathieu Desnoyers sigset_t sigset;
1225c960e990SMathieu Desnoyers
1226c960e990SMathieu Desnoyers ret = sigemptyset(&sigset);
1227c960e990SMathieu Desnoyers if (ret < 0) {
1228c960e990SMathieu Desnoyers perror("sigemptyset");
1229c960e990SMathieu Desnoyers return ret;
1230c960e990SMathieu Desnoyers }
1231c960e990SMathieu Desnoyers
1232c960e990SMathieu Desnoyers sa.sa_handler = test_signal_interrupt_handler;
1233c960e990SMathieu Desnoyers sa.sa_mask = sigset;
1234c960e990SMathieu Desnoyers sa.sa_flags = 0;
1235c960e990SMathieu Desnoyers ret = sigaction(SIGUSR1, &sa, NULL);
1236c960e990SMathieu Desnoyers if (ret < 0) {
1237c960e990SMathieu Desnoyers perror("sigaction");
1238c960e990SMathieu Desnoyers return ret;
1239c960e990SMathieu Desnoyers }
1240c960e990SMathieu Desnoyers
1241c960e990SMathieu Desnoyers printf_verbose("Signal handler set for SIGUSR1\n");
1242c960e990SMathieu Desnoyers
1243c960e990SMathieu Desnoyers return ret;
1244c960e990SMathieu Desnoyers }
1245c960e990SMathieu Desnoyers
12466f39cecdSXingxing Su /* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */
1247ee31fff0SMathieu Desnoyers #ifdef TEST_MEMBARRIER
1248f166b111SPeter Oskolkov struct test_membarrier_thread_args {
1249f166b111SPeter Oskolkov int stop;
1250f166b111SPeter Oskolkov intptr_t percpu_list_ptr;
1251f166b111SPeter Oskolkov };
1252f166b111SPeter Oskolkov
1253f166b111SPeter Oskolkov /* Worker threads modify data in their "active" percpu lists. */
test_membarrier_worker_thread(void * arg)1254f166b111SPeter Oskolkov void *test_membarrier_worker_thread(void *arg)
1255f166b111SPeter Oskolkov {
1256f166b111SPeter Oskolkov struct test_membarrier_thread_args *args =
1257f166b111SPeter Oskolkov (struct test_membarrier_thread_args *)arg;
1258f166b111SPeter Oskolkov const int iters = opt_reps;
1259f166b111SPeter Oskolkov int i;
1260f166b111SPeter Oskolkov
1261f166b111SPeter Oskolkov if (rseq_register_current_thread()) {
1262f166b111SPeter Oskolkov fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1263f166b111SPeter Oskolkov errno, strerror(errno));
1264f166b111SPeter Oskolkov abort();
1265f166b111SPeter Oskolkov }
1266f166b111SPeter Oskolkov
1267f166b111SPeter Oskolkov /* Wait for initialization. */
1268078a2eadSJustin Stitt while (!__atomic_load_n(&args->percpu_list_ptr, __ATOMIC_ACQUIRE)) {}
1269f166b111SPeter Oskolkov
1270f166b111SPeter Oskolkov for (i = 0; i < iters; ++i) {
1271f166b111SPeter Oskolkov int ret;
1272f166b111SPeter Oskolkov
1273f166b111SPeter Oskolkov do {
1274ee31fff0SMathieu Desnoyers int cpu = get_current_cpu_id();
1275f166b111SPeter Oskolkov
1276ee31fff0SMathieu Desnoyers ret = rseq_offset_deref_addv(RSEQ_MO_RELAXED, RSEQ_PERCPU,
1277ee31fff0SMathieu Desnoyers &args->percpu_list_ptr,
1278f166b111SPeter Oskolkov sizeof(struct percpu_list_entry) * cpu, 1, cpu);
1279f166b111SPeter Oskolkov } while (rseq_unlikely(ret));
1280f166b111SPeter Oskolkov }
1281f166b111SPeter Oskolkov
1282f166b111SPeter Oskolkov if (rseq_unregister_current_thread()) {
1283f166b111SPeter Oskolkov fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1284f166b111SPeter Oskolkov errno, strerror(errno));
1285f166b111SPeter Oskolkov abort();
1286f166b111SPeter Oskolkov }
1287f166b111SPeter Oskolkov return NULL;
1288f166b111SPeter Oskolkov }
1289f166b111SPeter Oskolkov
test_membarrier_init_percpu_list(struct percpu_list * list)1290f166b111SPeter Oskolkov void test_membarrier_init_percpu_list(struct percpu_list *list)
1291f166b111SPeter Oskolkov {
1292f166b111SPeter Oskolkov int i;
1293f166b111SPeter Oskolkov
1294f166b111SPeter Oskolkov memset(list, 0, sizeof(*list));
1295f166b111SPeter Oskolkov for (i = 0; i < CPU_SETSIZE; i++) {
1296f166b111SPeter Oskolkov struct percpu_list_node *node;
1297f166b111SPeter Oskolkov
1298f166b111SPeter Oskolkov node = malloc(sizeof(*node));
1299f166b111SPeter Oskolkov assert(node);
1300f166b111SPeter Oskolkov node->data = 0;
1301f166b111SPeter Oskolkov node->next = NULL;
1302f166b111SPeter Oskolkov list->c[i].head = node;
1303f166b111SPeter Oskolkov }
1304f166b111SPeter Oskolkov }
1305f166b111SPeter Oskolkov
test_membarrier_free_percpu_list(struct percpu_list * list)1306f166b111SPeter Oskolkov void test_membarrier_free_percpu_list(struct percpu_list *list)
1307f166b111SPeter Oskolkov {
1308f166b111SPeter Oskolkov int i;
1309f166b111SPeter Oskolkov
1310f166b111SPeter Oskolkov for (i = 0; i < CPU_SETSIZE; i++)
1311f166b111SPeter Oskolkov free(list->c[i].head);
1312f166b111SPeter Oskolkov }
1313f166b111SPeter Oskolkov
1314f166b111SPeter Oskolkov /*
1315f166b111SPeter Oskolkov * The manager thread swaps per-cpu lists that worker threads see,
1316f166b111SPeter Oskolkov * and validates that there are no unexpected modifications.
1317f166b111SPeter Oskolkov */
test_membarrier_manager_thread(void * arg)1318f166b111SPeter Oskolkov void *test_membarrier_manager_thread(void *arg)
1319f166b111SPeter Oskolkov {
1320f166b111SPeter Oskolkov struct test_membarrier_thread_args *args =
1321f166b111SPeter Oskolkov (struct test_membarrier_thread_args *)arg;
1322f166b111SPeter Oskolkov struct percpu_list list_a, list_b;
1323f166b111SPeter Oskolkov intptr_t expect_a = 0, expect_b = 0;
1324f166b111SPeter Oskolkov int cpu_a = 0, cpu_b = 0;
1325f166b111SPeter Oskolkov
1326f166b111SPeter Oskolkov if (rseq_register_current_thread()) {
1327f166b111SPeter Oskolkov fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1328f166b111SPeter Oskolkov errno, strerror(errno));
1329f166b111SPeter Oskolkov abort();
1330f166b111SPeter Oskolkov }
1331f166b111SPeter Oskolkov
1332f166b111SPeter Oskolkov /* Init lists. */
1333f166b111SPeter Oskolkov test_membarrier_init_percpu_list(&list_a);
1334f166b111SPeter Oskolkov test_membarrier_init_percpu_list(&list_b);
1335f166b111SPeter Oskolkov
1336078a2eadSJustin Stitt __atomic_store_n(&args->percpu_list_ptr, (intptr_t)&list_a, __ATOMIC_RELEASE);
1337f166b111SPeter Oskolkov
1338078a2eadSJustin Stitt while (!__atomic_load_n(&args->stop, __ATOMIC_ACQUIRE)) {
1339f166b111SPeter Oskolkov /* list_a is "active". */
1340f166b111SPeter Oskolkov cpu_a = rand() % CPU_SETSIZE;
1341f166b111SPeter Oskolkov /*
1342f166b111SPeter Oskolkov * As list_b is "inactive", we should never see changes
1343f166b111SPeter Oskolkov * to list_b.
1344f166b111SPeter Oskolkov */
1345078a2eadSJustin Stitt if (expect_b != __atomic_load_n(&list_b.c[cpu_b].head->data, __ATOMIC_ACQUIRE)) {
1346f166b111SPeter Oskolkov fprintf(stderr, "Membarrier test failed\n");
1347f166b111SPeter Oskolkov abort();
1348f166b111SPeter Oskolkov }
1349f166b111SPeter Oskolkov
1350f166b111SPeter Oskolkov /* Make list_b "active". */
1351078a2eadSJustin Stitt __atomic_store_n(&args->percpu_list_ptr, (intptr_t)&list_b, __ATOMIC_RELEASE);
1352ee31fff0SMathieu Desnoyers if (rseq_membarrier_expedited(cpu_a) &&
1353f166b111SPeter Oskolkov errno != ENXIO /* missing CPU */) {
1354f166b111SPeter Oskolkov perror("sys_membarrier");
1355f166b111SPeter Oskolkov abort();
1356f166b111SPeter Oskolkov }
1357f166b111SPeter Oskolkov /*
1358f166b111SPeter Oskolkov * Cpu A should now only modify list_b, so the values
1359f166b111SPeter Oskolkov * in list_a should be stable.
1360f166b111SPeter Oskolkov */
1361078a2eadSJustin Stitt expect_a = __atomic_load_n(&list_a.c[cpu_a].head->data, __ATOMIC_ACQUIRE);
1362f166b111SPeter Oskolkov
1363f166b111SPeter Oskolkov cpu_b = rand() % CPU_SETSIZE;
1364f166b111SPeter Oskolkov /*
1365f166b111SPeter Oskolkov * As list_a is "inactive", we should never see changes
1366f166b111SPeter Oskolkov * to list_a.
1367f166b111SPeter Oskolkov */
1368078a2eadSJustin Stitt if (expect_a != __atomic_load_n(&list_a.c[cpu_a].head->data, __ATOMIC_ACQUIRE)) {
1369f166b111SPeter Oskolkov fprintf(stderr, "Membarrier test failed\n");
1370f166b111SPeter Oskolkov abort();
1371f166b111SPeter Oskolkov }
1372f166b111SPeter Oskolkov
1373f166b111SPeter Oskolkov /* Make list_a "active". */
1374078a2eadSJustin Stitt __atomic_store_n(&args->percpu_list_ptr, (intptr_t)&list_a, __ATOMIC_RELEASE);
1375ee31fff0SMathieu Desnoyers if (rseq_membarrier_expedited(cpu_b) &&
1376f166b111SPeter Oskolkov errno != ENXIO /* missing CPU*/) {
1377f166b111SPeter Oskolkov perror("sys_membarrier");
1378f166b111SPeter Oskolkov abort();
1379f166b111SPeter Oskolkov }
1380f166b111SPeter Oskolkov /* Remember a value from list_b. */
1381078a2eadSJustin Stitt expect_b = __atomic_load_n(&list_b.c[cpu_b].head->data, __ATOMIC_ACQUIRE);
1382f166b111SPeter Oskolkov }
1383f166b111SPeter Oskolkov
1384f166b111SPeter Oskolkov test_membarrier_free_percpu_list(&list_a);
1385f166b111SPeter Oskolkov test_membarrier_free_percpu_list(&list_b);
1386f166b111SPeter Oskolkov
1387f166b111SPeter Oskolkov if (rseq_unregister_current_thread()) {
1388f166b111SPeter Oskolkov fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1389f166b111SPeter Oskolkov errno, strerror(errno));
1390f166b111SPeter Oskolkov abort();
1391f166b111SPeter Oskolkov }
1392f166b111SPeter Oskolkov return NULL;
1393f166b111SPeter Oskolkov }
1394f166b111SPeter Oskolkov
test_membarrier(void)1395f166b111SPeter Oskolkov void test_membarrier(void)
1396f166b111SPeter Oskolkov {
1397f166b111SPeter Oskolkov const int num_threads = opt_threads;
1398f166b111SPeter Oskolkov struct test_membarrier_thread_args thread_args;
1399f166b111SPeter Oskolkov pthread_t worker_threads[num_threads];
1400f166b111SPeter Oskolkov pthread_t manager_thread;
1401f166b111SPeter Oskolkov int i, ret;
1402f166b111SPeter Oskolkov
1403f166b111SPeter Oskolkov if (sys_membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, 0, 0)) {
1404f166b111SPeter Oskolkov perror("sys_membarrier");
1405f166b111SPeter Oskolkov abort();
1406f166b111SPeter Oskolkov }
1407f166b111SPeter Oskolkov
1408f166b111SPeter Oskolkov thread_args.stop = 0;
1409f166b111SPeter Oskolkov thread_args.percpu_list_ptr = 0;
1410f166b111SPeter Oskolkov ret = pthread_create(&manager_thread, NULL,
1411f166b111SPeter Oskolkov test_membarrier_manager_thread, &thread_args);
1412f166b111SPeter Oskolkov if (ret) {
1413f166b111SPeter Oskolkov errno = ret;
1414f166b111SPeter Oskolkov perror("pthread_create");
1415f166b111SPeter Oskolkov abort();
1416f166b111SPeter Oskolkov }
1417f166b111SPeter Oskolkov
1418f166b111SPeter Oskolkov for (i = 0; i < num_threads; i++) {
1419f166b111SPeter Oskolkov ret = pthread_create(&worker_threads[i], NULL,
1420f166b111SPeter Oskolkov test_membarrier_worker_thread, &thread_args);
1421f166b111SPeter Oskolkov if (ret) {
1422f166b111SPeter Oskolkov errno = ret;
1423f166b111SPeter Oskolkov perror("pthread_create");
1424f166b111SPeter Oskolkov abort();
1425f166b111SPeter Oskolkov }
1426f166b111SPeter Oskolkov }
1427f166b111SPeter Oskolkov
1428f166b111SPeter Oskolkov
1429f166b111SPeter Oskolkov for (i = 0; i < num_threads; i++) {
1430f166b111SPeter Oskolkov ret = pthread_join(worker_threads[i], NULL);
1431f166b111SPeter Oskolkov if (ret) {
1432f166b111SPeter Oskolkov errno = ret;
1433f166b111SPeter Oskolkov perror("pthread_join");
1434f166b111SPeter Oskolkov abort();
1435f166b111SPeter Oskolkov }
1436f166b111SPeter Oskolkov }
1437f166b111SPeter Oskolkov
1438078a2eadSJustin Stitt __atomic_store_n(&thread_args.stop, 1, __ATOMIC_RELEASE);
1439f166b111SPeter Oskolkov ret = pthread_join(manager_thread, NULL);
1440f166b111SPeter Oskolkov if (ret) {
1441f166b111SPeter Oskolkov errno = ret;
1442f166b111SPeter Oskolkov perror("pthread_join");
1443f166b111SPeter Oskolkov abort();
1444f166b111SPeter Oskolkov }
1445f166b111SPeter Oskolkov }
1446ee31fff0SMathieu Desnoyers #else /* TEST_MEMBARRIER */
test_membarrier(void)1447f166b111SPeter Oskolkov void test_membarrier(void)
1448f166b111SPeter Oskolkov {
1449f166b111SPeter Oskolkov fprintf(stderr, "rseq_offset_deref_addv is not implemented on this architecture. "
1450f166b111SPeter Oskolkov "Skipping membarrier test.\n");
1451f166b111SPeter Oskolkov }
1452f166b111SPeter Oskolkov #endif
1453f166b111SPeter Oskolkov
show_usage(int argc,char ** argv)1454c960e990SMathieu Desnoyers static void show_usage(int argc, char **argv)
1455c960e990SMathieu Desnoyers {
1456c960e990SMathieu Desnoyers printf("Usage : %s <OPTIONS>\n",
1457c960e990SMathieu Desnoyers argv[0]);
1458c960e990SMathieu Desnoyers printf("OPTIONS:\n");
1459c960e990SMathieu Desnoyers printf(" [-1 loops] Number of loops for delay injection 1\n");
1460c960e990SMathieu Desnoyers printf(" [-2 loops] Number of loops for delay injection 2\n");
1461c960e990SMathieu Desnoyers printf(" [-3 loops] Number of loops for delay injection 3\n");
1462c960e990SMathieu Desnoyers printf(" [-4 loops] Number of loops for delay injection 4\n");
1463c960e990SMathieu Desnoyers printf(" [-5 loops] Number of loops for delay injection 5\n");
1464c960e990SMathieu Desnoyers printf(" [-6 loops] Number of loops for delay injection 6\n");
1465c960e990SMathieu Desnoyers printf(" [-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n");
1466c960e990SMathieu Desnoyers printf(" [-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n");
1467c960e990SMathieu Desnoyers printf(" [-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n");
1468c960e990SMathieu Desnoyers printf(" [-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n");
1469c960e990SMathieu Desnoyers printf(" [-y] Yield\n");
1470c960e990SMathieu Desnoyers printf(" [-k] Kill thread with signal\n");
1471c960e990SMathieu Desnoyers printf(" [-s S] S: =0: disabled (default), >0: sleep time (ms)\n");
1472c960e990SMathieu Desnoyers printf(" [-t N] Number of threads (default 200)\n");
1473c960e990SMathieu Desnoyers printf(" [-r N] Number of repetitions per thread (default 5000)\n");
1474c960e990SMathieu Desnoyers printf(" [-d] Disable rseq system call (no initialization)\n");
1475c960e990SMathieu Desnoyers printf(" [-D M] Disable rseq for each M threads\n");
1476f166b111SPeter Oskolkov printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement, membarrie(r)\n");
1477c960e990SMathieu Desnoyers printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n");
1478c960e990SMathieu Desnoyers printf(" [-v] Verbose output.\n");
1479c960e990SMathieu Desnoyers printf(" [-h] Show this help.\n");
1480c960e990SMathieu Desnoyers printf("\n");
1481c960e990SMathieu Desnoyers }
1482c960e990SMathieu Desnoyers
main(int argc,char ** argv)1483c960e990SMathieu Desnoyers int main(int argc, char **argv)
1484c960e990SMathieu Desnoyers {
1485c960e990SMathieu Desnoyers int i;
1486c960e990SMathieu Desnoyers
1487c960e990SMathieu Desnoyers for (i = 1; i < argc; i++) {
1488c960e990SMathieu Desnoyers if (argv[i][0] != '-')
1489c960e990SMathieu Desnoyers continue;
1490c960e990SMathieu Desnoyers switch (argv[i][1]) {
1491c960e990SMathieu Desnoyers case '1':
1492c960e990SMathieu Desnoyers case '2':
1493c960e990SMathieu Desnoyers case '3':
1494c960e990SMathieu Desnoyers case '4':
1495c960e990SMathieu Desnoyers case '5':
1496c960e990SMathieu Desnoyers case '6':
1497c960e990SMathieu Desnoyers case '7':
1498c960e990SMathieu Desnoyers case '8':
1499c960e990SMathieu Desnoyers case '9':
1500c960e990SMathieu Desnoyers if (argc < i + 2) {
1501c960e990SMathieu Desnoyers show_usage(argc, argv);
1502c960e990SMathieu Desnoyers goto error;
1503c960e990SMathieu Desnoyers }
1504c960e990SMathieu Desnoyers loop_cnt[argv[i][1] - '0'] = atol(argv[i + 1]);
1505c960e990SMathieu Desnoyers i++;
1506c960e990SMathieu Desnoyers break;
1507c960e990SMathieu Desnoyers case 'm':
1508c960e990SMathieu Desnoyers if (argc < i + 2) {
1509c960e990SMathieu Desnoyers show_usage(argc, argv);
1510c960e990SMathieu Desnoyers goto error;
1511c960e990SMathieu Desnoyers }
1512c960e990SMathieu Desnoyers opt_modulo = atol(argv[i + 1]);
1513c960e990SMathieu Desnoyers if (opt_modulo < 0) {
1514c960e990SMathieu Desnoyers show_usage(argc, argv);
1515c960e990SMathieu Desnoyers goto error;
1516c960e990SMathieu Desnoyers }
1517c960e990SMathieu Desnoyers i++;
1518c960e990SMathieu Desnoyers break;
1519c960e990SMathieu Desnoyers case 's':
1520c960e990SMathieu Desnoyers if (argc < i + 2) {
1521c960e990SMathieu Desnoyers show_usage(argc, argv);
1522c960e990SMathieu Desnoyers goto error;
1523c960e990SMathieu Desnoyers }
1524c960e990SMathieu Desnoyers opt_sleep = atol(argv[i + 1]);
1525c960e990SMathieu Desnoyers if (opt_sleep < 0) {
1526c960e990SMathieu Desnoyers show_usage(argc, argv);
1527c960e990SMathieu Desnoyers goto error;
1528c960e990SMathieu Desnoyers }
1529c960e990SMathieu Desnoyers i++;
1530c960e990SMathieu Desnoyers break;
1531c960e990SMathieu Desnoyers case 'y':
1532c960e990SMathieu Desnoyers opt_yield = 1;
1533c960e990SMathieu Desnoyers break;
1534c960e990SMathieu Desnoyers case 'k':
1535c960e990SMathieu Desnoyers opt_signal = 1;
1536c960e990SMathieu Desnoyers break;
1537c960e990SMathieu Desnoyers case 'd':
1538c960e990SMathieu Desnoyers opt_disable_rseq = 1;
1539c960e990SMathieu Desnoyers break;
1540c960e990SMathieu Desnoyers case 'D':
1541c960e990SMathieu Desnoyers if (argc < i + 2) {
1542c960e990SMathieu Desnoyers show_usage(argc, argv);
1543c960e990SMathieu Desnoyers goto error;
1544c960e990SMathieu Desnoyers }
1545c960e990SMathieu Desnoyers opt_disable_mod = atol(argv[i + 1]);
1546c960e990SMathieu Desnoyers if (opt_disable_mod < 0) {
1547c960e990SMathieu Desnoyers show_usage(argc, argv);
1548c960e990SMathieu Desnoyers goto error;
1549c960e990SMathieu Desnoyers }
1550c960e990SMathieu Desnoyers i++;
1551c960e990SMathieu Desnoyers break;
1552c960e990SMathieu Desnoyers case 't':
1553c960e990SMathieu Desnoyers if (argc < i + 2) {
1554c960e990SMathieu Desnoyers show_usage(argc, argv);
1555c960e990SMathieu Desnoyers goto error;
1556c960e990SMathieu Desnoyers }
1557c960e990SMathieu Desnoyers opt_threads = atol(argv[i + 1]);
1558c960e990SMathieu Desnoyers if (opt_threads < 0) {
1559c960e990SMathieu Desnoyers show_usage(argc, argv);
1560c960e990SMathieu Desnoyers goto error;
1561c960e990SMathieu Desnoyers }
1562c960e990SMathieu Desnoyers i++;
1563c960e990SMathieu Desnoyers break;
1564c960e990SMathieu Desnoyers case 'r':
1565c960e990SMathieu Desnoyers if (argc < i + 2) {
1566c960e990SMathieu Desnoyers show_usage(argc, argv);
1567c960e990SMathieu Desnoyers goto error;
1568c960e990SMathieu Desnoyers }
1569c960e990SMathieu Desnoyers opt_reps = atoll(argv[i + 1]);
1570c960e990SMathieu Desnoyers if (opt_reps < 0) {
1571c960e990SMathieu Desnoyers show_usage(argc, argv);
1572c960e990SMathieu Desnoyers goto error;
1573c960e990SMathieu Desnoyers }
1574c960e990SMathieu Desnoyers i++;
1575c960e990SMathieu Desnoyers break;
1576c960e990SMathieu Desnoyers case 'h':
1577c960e990SMathieu Desnoyers show_usage(argc, argv);
1578c960e990SMathieu Desnoyers goto end;
1579c960e990SMathieu Desnoyers case 'T':
1580c960e990SMathieu Desnoyers if (argc < i + 2) {
1581c960e990SMathieu Desnoyers show_usage(argc, argv);
1582c960e990SMathieu Desnoyers goto error;
1583c960e990SMathieu Desnoyers }
1584c960e990SMathieu Desnoyers opt_test = *argv[i + 1];
1585c960e990SMathieu Desnoyers switch (opt_test) {
1586c960e990SMathieu Desnoyers case 's':
1587c960e990SMathieu Desnoyers case 'l':
1588c960e990SMathieu Desnoyers case 'i':
1589c960e990SMathieu Desnoyers case 'b':
1590c960e990SMathieu Desnoyers case 'm':
1591f166b111SPeter Oskolkov case 'r':
1592c960e990SMathieu Desnoyers break;
1593c960e990SMathieu Desnoyers default:
1594c960e990SMathieu Desnoyers show_usage(argc, argv);
1595c960e990SMathieu Desnoyers goto error;
1596c960e990SMathieu Desnoyers }
1597c960e990SMathieu Desnoyers i++;
1598c960e990SMathieu Desnoyers break;
1599c960e990SMathieu Desnoyers case 'v':
1600c960e990SMathieu Desnoyers verbose = 1;
1601c960e990SMathieu Desnoyers break;
1602c960e990SMathieu Desnoyers case 'M':
1603ee31fff0SMathieu Desnoyers opt_mo = RSEQ_MO_RELEASE;
1604c960e990SMathieu Desnoyers break;
1605c960e990SMathieu Desnoyers default:
1606c960e990SMathieu Desnoyers show_usage(argc, argv);
1607c960e990SMathieu Desnoyers goto error;
1608c960e990SMathieu Desnoyers }
1609c960e990SMathieu Desnoyers }
1610c960e990SMathieu Desnoyers
1611c960e990SMathieu Desnoyers loop_cnt_1 = loop_cnt[1];
1612c960e990SMathieu Desnoyers loop_cnt_2 = loop_cnt[2];
1613c960e990SMathieu Desnoyers loop_cnt_3 = loop_cnt[3];
1614c960e990SMathieu Desnoyers loop_cnt_4 = loop_cnt[4];
1615c960e990SMathieu Desnoyers loop_cnt_5 = loop_cnt[5];
1616c960e990SMathieu Desnoyers loop_cnt_6 = loop_cnt[6];
1617c960e990SMathieu Desnoyers
1618c960e990SMathieu Desnoyers if (set_signal_handler())
1619c960e990SMathieu Desnoyers goto error;
1620c960e990SMathieu Desnoyers
1621c960e990SMathieu Desnoyers if (!opt_disable_rseq && rseq_register_current_thread())
1622c960e990SMathieu Desnoyers goto error;
1623ee31fff0SMathieu Desnoyers if (!opt_disable_rseq && !rseq_validate_cpu_id()) {
1624ee31fff0SMathieu Desnoyers fprintf(stderr, "Error: cpu id getter unavailable\n");
1625ee31fff0SMathieu Desnoyers goto error;
1626ee31fff0SMathieu Desnoyers }
1627c960e990SMathieu Desnoyers switch (opt_test) {
1628c960e990SMathieu Desnoyers case 's':
1629c960e990SMathieu Desnoyers printf_verbose("spinlock\n");
1630c960e990SMathieu Desnoyers test_percpu_spinlock();
1631c960e990SMathieu Desnoyers break;
1632c960e990SMathieu Desnoyers case 'l':
1633c960e990SMathieu Desnoyers printf_verbose("linked list\n");
1634c960e990SMathieu Desnoyers test_percpu_list();
1635c960e990SMathieu Desnoyers break;
1636c960e990SMathieu Desnoyers case 'b':
1637c960e990SMathieu Desnoyers printf_verbose("buffer\n");
1638c960e990SMathieu Desnoyers test_percpu_buffer();
1639c960e990SMathieu Desnoyers break;
1640c960e990SMathieu Desnoyers case 'm':
1641c960e990SMathieu Desnoyers printf_verbose("memcpy buffer\n");
1642c960e990SMathieu Desnoyers test_percpu_memcpy_buffer();
1643c960e990SMathieu Desnoyers break;
1644c960e990SMathieu Desnoyers case 'i':
1645c960e990SMathieu Desnoyers printf_verbose("counter increment\n");
1646c960e990SMathieu Desnoyers test_percpu_inc();
1647c960e990SMathieu Desnoyers break;
1648f166b111SPeter Oskolkov case 'r':
1649f166b111SPeter Oskolkov printf_verbose("membarrier\n");
1650f166b111SPeter Oskolkov test_membarrier();
1651f166b111SPeter Oskolkov break;
1652c960e990SMathieu Desnoyers }
1653c960e990SMathieu Desnoyers if (!opt_disable_rseq && rseq_unregister_current_thread())
1654c960e990SMathieu Desnoyers abort();
1655c960e990SMathieu Desnoyers end:
1656c960e990SMathieu Desnoyers return 0;
1657c960e990SMathieu Desnoyers
1658c960e990SMathieu Desnoyers error:
1659c960e990SMathieu Desnoyers return -1;
1660c960e990SMathieu Desnoyers }
1661