1c960e990SMathieu Desnoyers // SPDX-License-Identifier: LGPL-2.1
2c960e990SMathieu Desnoyers #define _GNU_SOURCE
3c960e990SMathieu Desnoyers #include <assert.h>
4f166b111SPeter Oskolkov #include <linux/membarrier.h>
5c960e990SMathieu Desnoyers #include <pthread.h>
6c960e990SMathieu Desnoyers #include <sched.h>
7f166b111SPeter Oskolkov #include <stdatomic.h>
8c960e990SMathieu Desnoyers #include <stdint.h>
9c960e990SMathieu Desnoyers #include <stdio.h>
10c960e990SMathieu Desnoyers #include <stdlib.h>
11c960e990SMathieu Desnoyers #include <string.h>
12c960e990SMathieu Desnoyers #include <syscall.h>
13c960e990SMathieu Desnoyers #include <unistd.h>
14c960e990SMathieu Desnoyers #include <poll.h>
15c960e990SMathieu Desnoyers #include <sys/types.h>
16c960e990SMathieu Desnoyers #include <signal.h>
17c960e990SMathieu Desnoyers #include <errno.h>
18c960e990SMathieu Desnoyers #include <stddef.h>
19ee31fff0SMathieu Desnoyers #include <stdbool.h>
20c960e990SMathieu Desnoyers 
rseq_gettid(void)218df34c56SMathieu Desnoyers static inline pid_t rseq_gettid(void)
22c960e990SMathieu Desnoyers {
23c960e990SMathieu Desnoyers 	return syscall(__NR_gettid);
24c960e990SMathieu Desnoyers }
25c960e990SMathieu Desnoyers 
26c960e990SMathieu Desnoyers #define NR_INJECT	9
27c960e990SMathieu Desnoyers static int loop_cnt[NR_INJECT + 1];
28c960e990SMathieu Desnoyers 
29c960e990SMathieu Desnoyers static int loop_cnt_1 asm("asm_loop_cnt_1") __attribute__((used));
30c960e990SMathieu Desnoyers static int loop_cnt_2 asm("asm_loop_cnt_2") __attribute__((used));
31c960e990SMathieu Desnoyers static int loop_cnt_3 asm("asm_loop_cnt_3") __attribute__((used));
32c960e990SMathieu Desnoyers static int loop_cnt_4 asm("asm_loop_cnt_4") __attribute__((used));
33c960e990SMathieu Desnoyers static int loop_cnt_5 asm("asm_loop_cnt_5") __attribute__((used));
34c960e990SMathieu Desnoyers static int loop_cnt_6 asm("asm_loop_cnt_6") __attribute__((used));
35c960e990SMathieu Desnoyers 
36c960e990SMathieu Desnoyers static int opt_modulo, verbose;
37c960e990SMathieu Desnoyers 
38c960e990SMathieu Desnoyers static int opt_yield, opt_signal, opt_sleep,
39c960e990SMathieu Desnoyers 		opt_disable_rseq, opt_threads = 200,
40ee31fff0SMathieu Desnoyers 		opt_disable_mod = 0, opt_test = 's';
41c960e990SMathieu Desnoyers 
42c960e990SMathieu Desnoyers static long long opt_reps = 5000;
43c960e990SMathieu Desnoyers 
44c960e990SMathieu Desnoyers static __thread __attribute__((tls_model("initial-exec")))
45c960e990SMathieu Desnoyers unsigned int signals_delivered;
46c960e990SMathieu Desnoyers 
47c960e990SMathieu Desnoyers #ifndef BENCHMARK
48c960e990SMathieu Desnoyers 
49c960e990SMathieu Desnoyers static __thread __attribute__((tls_model("initial-exec"), unused))
50c960e990SMathieu Desnoyers unsigned int yield_mod_cnt, nr_abort;
51c960e990SMathieu Desnoyers 
52c960e990SMathieu Desnoyers #define printf_verbose(fmt, ...)			\
53c960e990SMathieu Desnoyers 	do {						\
54c960e990SMathieu Desnoyers 		if (verbose)				\
55c960e990SMathieu Desnoyers 			printf(fmt, ## __VA_ARGS__);	\
56c960e990SMathieu Desnoyers 	} while (0)
57c960e990SMathieu Desnoyers 
58ce01a157SMathieu Desnoyers #ifdef __i386__
59c960e990SMathieu Desnoyers 
60c960e990SMathieu Desnoyers #define INJECT_ASM_REG	"eax"
61c960e990SMathieu Desnoyers 
62c960e990SMathieu Desnoyers #define RSEQ_INJECT_CLOBBER \
63c960e990SMathieu Desnoyers 	, INJECT_ASM_REG
64c960e990SMathieu Desnoyers 
65c960e990SMathieu Desnoyers #define RSEQ_INJECT_ASM(n) \
66c960e990SMathieu Desnoyers 	"mov asm_loop_cnt_" #n ", %%" INJECT_ASM_REG "\n\t" \
67c960e990SMathieu Desnoyers 	"test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
68c960e990SMathieu Desnoyers 	"jz 333f\n\t" \
69c960e990SMathieu Desnoyers 	"222:\n\t" \
70c960e990SMathieu Desnoyers 	"dec %%" INJECT_ASM_REG "\n\t" \
71c960e990SMathieu Desnoyers 	"jnz 222b\n\t" \
72c960e990SMathieu Desnoyers 	"333:\n\t"
73c960e990SMathieu Desnoyers 
74c960e990SMathieu Desnoyers #elif defined(__x86_64__)
75c960e990SMathieu Desnoyers 
76ce01a157SMathieu Desnoyers #define INJECT_ASM_REG_P	"rax"
77ce01a157SMathieu Desnoyers #define INJECT_ASM_REG		"eax"
78ce01a157SMathieu Desnoyers 
79ce01a157SMathieu Desnoyers #define RSEQ_INJECT_CLOBBER \
80ce01a157SMathieu Desnoyers 	, INJECT_ASM_REG_P \
81ce01a157SMathieu Desnoyers 	, INJECT_ASM_REG
82ce01a157SMathieu Desnoyers 
83c960e990SMathieu Desnoyers #define RSEQ_INJECT_ASM(n) \
84ce01a157SMathieu Desnoyers 	"lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG_P "\n\t" \
85ce01a157SMathieu Desnoyers 	"mov (%%" INJECT_ASM_REG_P "), %%" INJECT_ASM_REG "\n\t" \
86c960e990SMathieu Desnoyers 	"test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
87c960e990SMathieu Desnoyers 	"jz 333f\n\t" \
88c960e990SMathieu Desnoyers 	"222:\n\t" \
89c960e990SMathieu Desnoyers 	"dec %%" INJECT_ASM_REG "\n\t" \
90c960e990SMathieu Desnoyers 	"jnz 222b\n\t" \
91c960e990SMathieu Desnoyers 	"333:\n\t"
92c960e990SMathieu Desnoyers 
934c14d1ceSVasily Gorbik #elif defined(__s390__)
944c14d1ceSVasily Gorbik 
954c14d1ceSVasily Gorbik #define RSEQ_INJECT_INPUT \
964c14d1ceSVasily Gorbik 	, [loop_cnt_1]"m"(loop_cnt[1]) \
974c14d1ceSVasily Gorbik 	, [loop_cnt_2]"m"(loop_cnt[2]) \
984c14d1ceSVasily Gorbik 	, [loop_cnt_3]"m"(loop_cnt[3]) \
994c14d1ceSVasily Gorbik 	, [loop_cnt_4]"m"(loop_cnt[4]) \
1004c14d1ceSVasily Gorbik 	, [loop_cnt_5]"m"(loop_cnt[5]) \
1014c14d1ceSVasily Gorbik 	, [loop_cnt_6]"m"(loop_cnt[6])
1024c14d1ceSVasily Gorbik 
1034c14d1ceSVasily Gorbik #define INJECT_ASM_REG	"r12"
1044c14d1ceSVasily Gorbik 
1054c14d1ceSVasily Gorbik #define RSEQ_INJECT_CLOBBER \
1064c14d1ceSVasily Gorbik 	, INJECT_ASM_REG
1074c14d1ceSVasily Gorbik 
1084c14d1ceSVasily Gorbik #define RSEQ_INJECT_ASM(n) \
1094c14d1ceSVasily Gorbik 	"l %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
1104c14d1ceSVasily Gorbik 	"ltr %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG "\n\t" \
1114c14d1ceSVasily Gorbik 	"je 333f\n\t" \
1124c14d1ceSVasily Gorbik 	"222:\n\t" \
1134c14d1ceSVasily Gorbik 	"ahi %%" INJECT_ASM_REG ", -1\n\t" \
1144c14d1ceSVasily Gorbik 	"jnz 222b\n\t" \
1154c14d1ceSVasily Gorbik 	"333:\n\t"
1164c14d1ceSVasily Gorbik 
117c960e990SMathieu Desnoyers #elif defined(__ARMEL__)
118c960e990SMathieu Desnoyers 
119c960e990SMathieu Desnoyers #define RSEQ_INJECT_INPUT \
120c960e990SMathieu Desnoyers 	, [loop_cnt_1]"m"(loop_cnt[1]) \
121c960e990SMathieu Desnoyers 	, [loop_cnt_2]"m"(loop_cnt[2]) \
122c960e990SMathieu Desnoyers 	, [loop_cnt_3]"m"(loop_cnt[3]) \
123c960e990SMathieu Desnoyers 	, [loop_cnt_4]"m"(loop_cnt[4]) \
124c960e990SMathieu Desnoyers 	, [loop_cnt_5]"m"(loop_cnt[5]) \
125c960e990SMathieu Desnoyers 	, [loop_cnt_6]"m"(loop_cnt[6])
126c960e990SMathieu Desnoyers 
127c960e990SMathieu Desnoyers #define INJECT_ASM_REG	"r4"
128c960e990SMathieu Desnoyers 
129c960e990SMathieu Desnoyers #define RSEQ_INJECT_CLOBBER \
130c960e990SMathieu Desnoyers 	, INJECT_ASM_REG
131c960e990SMathieu Desnoyers 
132c960e990SMathieu Desnoyers #define RSEQ_INJECT_ASM(n) \
133c960e990SMathieu Desnoyers 	"ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
134c960e990SMathieu Desnoyers 	"cmp " INJECT_ASM_REG ", #0\n\t" \
135c960e990SMathieu Desnoyers 	"beq 333f\n\t" \
136c960e990SMathieu Desnoyers 	"222:\n\t" \
137c960e990SMathieu Desnoyers 	"subs " INJECT_ASM_REG ", #1\n\t" \
138c960e990SMathieu Desnoyers 	"bne 222b\n\t" \
139c960e990SMathieu Desnoyers 	"333:\n\t"
140c960e990SMathieu Desnoyers 
141b9657463SWill Deacon #elif defined(__AARCH64EL__)
142b9657463SWill Deacon 
143b9657463SWill Deacon #define RSEQ_INJECT_INPUT \
144b9657463SWill Deacon 	, [loop_cnt_1] "Qo" (loop_cnt[1]) \
145b9657463SWill Deacon 	, [loop_cnt_2] "Qo" (loop_cnt[2]) \
146b9657463SWill Deacon 	, [loop_cnt_3] "Qo" (loop_cnt[3]) \
147b9657463SWill Deacon 	, [loop_cnt_4] "Qo" (loop_cnt[4]) \
148b9657463SWill Deacon 	, [loop_cnt_5] "Qo" (loop_cnt[5]) \
149b9657463SWill Deacon 	, [loop_cnt_6] "Qo" (loop_cnt[6])
150b9657463SWill Deacon 
151b9657463SWill Deacon #define INJECT_ASM_REG	RSEQ_ASM_TMP_REG32
152b9657463SWill Deacon 
153b9657463SWill Deacon #define RSEQ_INJECT_ASM(n) \
154b9657463SWill Deacon 	"	ldr	" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n"	\
155b9657463SWill Deacon 	"	cbz	" INJECT_ASM_REG ", 333f\n"			\
156b9657463SWill Deacon 	"222:\n"							\
157b9657463SWill Deacon 	"	sub	" INJECT_ASM_REG ", " INJECT_ASM_REG ", #1\n"	\
158b9657463SWill Deacon 	"	cbnz	" INJECT_ASM_REG ", 222b\n"			\
159b9657463SWill Deacon 	"333:\n"
160b9657463SWill Deacon 
161d7ed99adSMathieu Desnoyers #elif defined(__PPC__)
162c960e990SMathieu Desnoyers 
163c960e990SMathieu Desnoyers #define RSEQ_INJECT_INPUT \
164c960e990SMathieu Desnoyers 	, [loop_cnt_1]"m"(loop_cnt[1]) \
165c960e990SMathieu Desnoyers 	, [loop_cnt_2]"m"(loop_cnt[2]) \
166c960e990SMathieu Desnoyers 	, [loop_cnt_3]"m"(loop_cnt[3]) \
167c960e990SMathieu Desnoyers 	, [loop_cnt_4]"m"(loop_cnt[4]) \
168c960e990SMathieu Desnoyers 	, [loop_cnt_5]"m"(loop_cnt[5]) \
169c960e990SMathieu Desnoyers 	, [loop_cnt_6]"m"(loop_cnt[6])
170c960e990SMathieu Desnoyers 
171c960e990SMathieu Desnoyers #define INJECT_ASM_REG	"r18"
172c960e990SMathieu Desnoyers 
173c960e990SMathieu Desnoyers #define RSEQ_INJECT_CLOBBER \
174c960e990SMathieu Desnoyers 	, INJECT_ASM_REG
175c960e990SMathieu Desnoyers 
176c960e990SMathieu Desnoyers #define RSEQ_INJECT_ASM(n) \
177c960e990SMathieu Desnoyers 	"lwz %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
178c960e990SMathieu Desnoyers 	"cmpwi %%" INJECT_ASM_REG ", 0\n\t" \
179c960e990SMathieu Desnoyers 	"beq 333f\n\t" \
180c960e990SMathieu Desnoyers 	"222:\n\t" \
181c960e990SMathieu Desnoyers 	"subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \
182c960e990SMathieu Desnoyers 	"bne 222b\n\t" \
183c960e990SMathieu Desnoyers 	"333:\n\t"
184744f4be5SPaul Burton 
185744f4be5SPaul Burton #elif defined(__mips__)
186744f4be5SPaul Burton 
187744f4be5SPaul Burton #define RSEQ_INJECT_INPUT \
188744f4be5SPaul Burton 	, [loop_cnt_1]"m"(loop_cnt[1]) \
189744f4be5SPaul Burton 	, [loop_cnt_2]"m"(loop_cnt[2]) \
190744f4be5SPaul Burton 	, [loop_cnt_3]"m"(loop_cnt[3]) \
191744f4be5SPaul Burton 	, [loop_cnt_4]"m"(loop_cnt[4]) \
192744f4be5SPaul Burton 	, [loop_cnt_5]"m"(loop_cnt[5]) \
193744f4be5SPaul Burton 	, [loop_cnt_6]"m"(loop_cnt[6])
194744f4be5SPaul Burton 
195744f4be5SPaul Burton #define INJECT_ASM_REG	"$5"
196744f4be5SPaul Burton 
197744f4be5SPaul Burton #define RSEQ_INJECT_CLOBBER \
198744f4be5SPaul Burton 	, INJECT_ASM_REG
199744f4be5SPaul Burton 
200744f4be5SPaul Burton #define RSEQ_INJECT_ASM(n) \
201744f4be5SPaul Burton 	"lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
202744f4be5SPaul Burton 	"beqz " INJECT_ASM_REG ", 333f\n\t" \
203744f4be5SPaul Burton 	"222:\n\t" \
204744f4be5SPaul Burton 	"addiu " INJECT_ASM_REG ", -1\n\t" \
205744f4be5SPaul Burton 	"bnez " INJECT_ASM_REG ", 222b\n\t" \
206744f4be5SPaul Burton 	"333:\n\t"
2076d1a6f46SVincent Chen #elif defined(__riscv)
2086d1a6f46SVincent Chen 
2096d1a6f46SVincent Chen #define RSEQ_INJECT_INPUT \
2106d1a6f46SVincent Chen 	, [loop_cnt_1]"m"(loop_cnt[1]) \
2116d1a6f46SVincent Chen 	, [loop_cnt_2]"m"(loop_cnt[2]) \
2126d1a6f46SVincent Chen 	, [loop_cnt_3]"m"(loop_cnt[3]) \
2136d1a6f46SVincent Chen 	, [loop_cnt_4]"m"(loop_cnt[4]) \
2146d1a6f46SVincent Chen 	, [loop_cnt_5]"m"(loop_cnt[5]) \
2156d1a6f46SVincent Chen 	, [loop_cnt_6]"m"(loop_cnt[6])
2166d1a6f46SVincent Chen 
2176d1a6f46SVincent Chen #define INJECT_ASM_REG	"t1"
2186d1a6f46SVincent Chen 
2196d1a6f46SVincent Chen #define RSEQ_INJECT_CLOBBER \
2206d1a6f46SVincent Chen 	, INJECT_ASM_REG
2216d1a6f46SVincent Chen 
2226d1a6f46SVincent Chen #define RSEQ_INJECT_ASM(n)					\
2236d1a6f46SVincent Chen 	"lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t"		\
2246d1a6f46SVincent Chen 	"beqz " INJECT_ASM_REG ", 333f\n\t"			\
2256d1a6f46SVincent Chen 	"222:\n\t"						\
2266d1a6f46SVincent Chen 	"addi  " INJECT_ASM_REG "," INJECT_ASM_REG ", -1\n\t"	\
2276d1a6f46SVincent Chen 	"bnez " INJECT_ASM_REG ", 222b\n\t"			\
2286d1a6f46SVincent Chen 	"333:\n\t"
229*ea1413e5SStafford Horne #elif defined(__or1k__)
2306d1a6f46SVincent Chen 
231*ea1413e5SStafford Horne #define RSEQ_INJECT_INPUT \
232*ea1413e5SStafford Horne 	, [loop_cnt_1]"m"(loop_cnt[1]) \
233*ea1413e5SStafford Horne 	, [loop_cnt_2]"m"(loop_cnt[2]) \
234*ea1413e5SStafford Horne 	, [loop_cnt_3]"m"(loop_cnt[3]) \
235*ea1413e5SStafford Horne 	, [loop_cnt_4]"m"(loop_cnt[4]) \
236*ea1413e5SStafford Horne 	, [loop_cnt_5]"m"(loop_cnt[5]) \
237*ea1413e5SStafford Horne 	, [loop_cnt_6]"m"(loop_cnt[6])
238744f4be5SPaul Burton 
239*ea1413e5SStafford Horne #define INJECT_ASM_REG	"r31"
240*ea1413e5SStafford Horne 
241*ea1413e5SStafford Horne #define RSEQ_INJECT_CLOBBER \
242*ea1413e5SStafford Horne 	, INJECT_ASM_REG
243*ea1413e5SStafford Horne 
244*ea1413e5SStafford Horne #define RSEQ_INJECT_ASM(n)					\
245*ea1413e5SStafford Horne 	"l.lwz   " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t"	\
246*ea1413e5SStafford Horne 	"l.sfeqi " INJECT_ASM_REG ", 0\n\t"			\
247*ea1413e5SStafford Horne 	"l.bf 333f\n\t"						\
248*ea1413e5SStafford Horne 	" l.nop\n\t"						\
249*ea1413e5SStafford Horne 	"222:\n\t"						\
250*ea1413e5SStafford Horne 	"l.addi  " INJECT_ASM_REG "," INJECT_ASM_REG ", -1\n\t"	\
251*ea1413e5SStafford Horne 	"l.sfeqi " INJECT_ASM_REG ", 0\n\t"			\
252*ea1413e5SStafford Horne 	"l.bf 222f\n\t"						\
253*ea1413e5SStafford Horne 	" l.nop\n\t"						\
254*ea1413e5SStafford Horne 	"333:\n\t"
255c960e990SMathieu Desnoyers #else
256c960e990SMathieu Desnoyers #error unsupported target
257c960e990SMathieu Desnoyers #endif
258c960e990SMathieu Desnoyers 
259c960e990SMathieu Desnoyers #define RSEQ_INJECT_FAILED \
260c960e990SMathieu Desnoyers 	nr_abort++;
261c960e990SMathieu Desnoyers 
262c960e990SMathieu Desnoyers #define RSEQ_INJECT_C(n) \
263c960e990SMathieu Desnoyers { \
264c960e990SMathieu Desnoyers 	int loc_i, loc_nr_loops = loop_cnt[n]; \
265c960e990SMathieu Desnoyers 	\
266c960e990SMathieu Desnoyers 	for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \
267c960e990SMathieu Desnoyers 		rseq_barrier(); \
268c960e990SMathieu Desnoyers 	} \
269c960e990SMathieu Desnoyers 	if (loc_nr_loops == -1 && opt_modulo) { \
270c960e990SMathieu Desnoyers 		if (yield_mod_cnt == opt_modulo - 1) { \
271c960e990SMathieu Desnoyers 			if (opt_sleep > 0) \
272c960e990SMathieu Desnoyers 				poll(NULL, 0, opt_sleep); \
273c960e990SMathieu Desnoyers 			if (opt_yield) \
274c960e990SMathieu Desnoyers 				sched_yield(); \
275c960e990SMathieu Desnoyers 			if (opt_signal) \
276c960e990SMathieu Desnoyers 				raise(SIGUSR1); \
277c960e990SMathieu Desnoyers 			yield_mod_cnt = 0; \
278c960e990SMathieu Desnoyers 		} else { \
279c960e990SMathieu Desnoyers 			yield_mod_cnt++; \
280c960e990SMathieu Desnoyers 		} \
281c960e990SMathieu Desnoyers 	} \
282c960e990SMathieu Desnoyers }
283c960e990SMathieu Desnoyers 
284c960e990SMathieu Desnoyers #else
285c960e990SMathieu Desnoyers 
286c960e990SMathieu Desnoyers #define printf_verbose(fmt, ...)
287c960e990SMathieu Desnoyers 
288c960e990SMathieu Desnoyers #endif /* BENCHMARK */
289c960e990SMathieu Desnoyers 
290c960e990SMathieu Desnoyers #include "rseq.h"
291c960e990SMathieu Desnoyers 
292ee31fff0SMathieu Desnoyers static enum rseq_mo opt_mo = RSEQ_MO_RELAXED;
293ee31fff0SMathieu Desnoyers 
294ee31fff0SMathieu Desnoyers #ifdef RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV
295ee31fff0SMathieu Desnoyers #define TEST_MEMBARRIER
296ee31fff0SMathieu Desnoyers 
sys_membarrier(int cmd,int flags,int cpu_id)297ee31fff0SMathieu Desnoyers static int sys_membarrier(int cmd, int flags, int cpu_id)
298ee31fff0SMathieu Desnoyers {
299ee31fff0SMathieu Desnoyers 	return syscall(__NR_membarrier, cmd, flags, cpu_id);
300ee31fff0SMathieu Desnoyers }
301ee31fff0SMathieu Desnoyers #endif
302ee31fff0SMathieu Desnoyers 
303ee31fff0SMathieu Desnoyers #ifdef BUILDOPT_RSEQ_PERCPU_MM_CID
304ee31fff0SMathieu Desnoyers # define RSEQ_PERCPU	RSEQ_PERCPU_MM_CID
305ee31fff0SMathieu Desnoyers static
get_current_cpu_id(void)306ee31fff0SMathieu Desnoyers int get_current_cpu_id(void)
307ee31fff0SMathieu Desnoyers {
308ee31fff0SMathieu Desnoyers 	return rseq_current_mm_cid();
309ee31fff0SMathieu Desnoyers }
310ee31fff0SMathieu Desnoyers static
rseq_validate_cpu_id(void)311ee31fff0SMathieu Desnoyers bool rseq_validate_cpu_id(void)
312ee31fff0SMathieu Desnoyers {
313ee31fff0SMathieu Desnoyers 	return rseq_mm_cid_available();
314ee31fff0SMathieu Desnoyers }
315d53271c0SMathieu Desnoyers static
rseq_use_cpu_index(void)316d53271c0SMathieu Desnoyers bool rseq_use_cpu_index(void)
317d53271c0SMathieu Desnoyers {
318d53271c0SMathieu Desnoyers 	return false;	/* Use mm_cid */
319d53271c0SMathieu Desnoyers }
320ee31fff0SMathieu Desnoyers # ifdef TEST_MEMBARRIER
321ee31fff0SMathieu Desnoyers /*
322ee31fff0SMathieu Desnoyers  * Membarrier does not currently support targeting a mm_cid, so
323ee31fff0SMathieu Desnoyers  * issue the barrier on all cpus.
324ee31fff0SMathieu Desnoyers  */
325ee31fff0SMathieu Desnoyers static
rseq_membarrier_expedited(int cpu)326ee31fff0SMathieu Desnoyers int rseq_membarrier_expedited(int cpu)
327ee31fff0SMathieu Desnoyers {
328ee31fff0SMathieu Desnoyers 	return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
329ee31fff0SMathieu Desnoyers 			      0, 0);
330ee31fff0SMathieu Desnoyers }
331ee31fff0SMathieu Desnoyers # endif /* TEST_MEMBARRIER */
332ee31fff0SMathieu Desnoyers #else
333ee31fff0SMathieu Desnoyers # define RSEQ_PERCPU	RSEQ_PERCPU_CPU_ID
334ee31fff0SMathieu Desnoyers static
get_current_cpu_id(void)335ee31fff0SMathieu Desnoyers int get_current_cpu_id(void)
336ee31fff0SMathieu Desnoyers {
337ee31fff0SMathieu Desnoyers 	return rseq_cpu_start();
338ee31fff0SMathieu Desnoyers }
339ee31fff0SMathieu Desnoyers static
rseq_validate_cpu_id(void)340ee31fff0SMathieu Desnoyers bool rseq_validate_cpu_id(void)
341ee31fff0SMathieu Desnoyers {
342ee31fff0SMathieu Desnoyers 	return rseq_current_cpu_raw() >= 0;
343ee31fff0SMathieu Desnoyers }
344d53271c0SMathieu Desnoyers static
rseq_use_cpu_index(void)345d53271c0SMathieu Desnoyers bool rseq_use_cpu_index(void)
346d53271c0SMathieu Desnoyers {
347d53271c0SMathieu Desnoyers 	return true;	/* Use cpu_id as index. */
348d53271c0SMathieu Desnoyers }
349ee31fff0SMathieu Desnoyers # ifdef TEST_MEMBARRIER
350ee31fff0SMathieu Desnoyers static
rseq_membarrier_expedited(int cpu)351ee31fff0SMathieu Desnoyers int rseq_membarrier_expedited(int cpu)
352ee31fff0SMathieu Desnoyers {
353ee31fff0SMathieu Desnoyers 	return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
354ee31fff0SMathieu Desnoyers 			      MEMBARRIER_CMD_FLAG_CPU, cpu);
355ee31fff0SMathieu Desnoyers }
356ee31fff0SMathieu Desnoyers # endif /* TEST_MEMBARRIER */
357ee31fff0SMathieu Desnoyers #endif
358ee31fff0SMathieu Desnoyers 
359c960e990SMathieu Desnoyers struct percpu_lock_entry {
360c960e990SMathieu Desnoyers 	intptr_t v;
361c960e990SMathieu Desnoyers } __attribute__((aligned(128)));
362c960e990SMathieu Desnoyers 
363c960e990SMathieu Desnoyers struct percpu_lock {
364c960e990SMathieu Desnoyers 	struct percpu_lock_entry c[CPU_SETSIZE];
365c960e990SMathieu Desnoyers };
366c960e990SMathieu Desnoyers 
367c960e990SMathieu Desnoyers struct test_data_entry {
368c960e990SMathieu Desnoyers 	intptr_t count;
369c960e990SMathieu Desnoyers } __attribute__((aligned(128)));
370c960e990SMathieu Desnoyers 
371c960e990SMathieu Desnoyers struct spinlock_test_data {
372c960e990SMathieu Desnoyers 	struct percpu_lock lock;
373c960e990SMathieu Desnoyers 	struct test_data_entry c[CPU_SETSIZE];
374c960e990SMathieu Desnoyers };
375c960e990SMathieu Desnoyers 
376c960e990SMathieu Desnoyers struct spinlock_thread_test_data {
377c960e990SMathieu Desnoyers 	struct spinlock_test_data *data;
378c960e990SMathieu Desnoyers 	long long reps;
379c960e990SMathieu Desnoyers 	int reg;
380c960e990SMathieu Desnoyers };
381c960e990SMathieu Desnoyers 
382c960e990SMathieu Desnoyers struct inc_test_data {
383c960e990SMathieu Desnoyers 	struct test_data_entry c[CPU_SETSIZE];
384c960e990SMathieu Desnoyers };
385c960e990SMathieu Desnoyers 
386c960e990SMathieu Desnoyers struct inc_thread_test_data {
387c960e990SMathieu Desnoyers 	struct inc_test_data *data;
388c960e990SMathieu Desnoyers 	long long reps;
389c960e990SMathieu Desnoyers 	int reg;
390c960e990SMathieu Desnoyers };
391c960e990SMathieu Desnoyers 
392c960e990SMathieu Desnoyers struct percpu_list_node {
393c960e990SMathieu Desnoyers 	intptr_t data;
394c960e990SMathieu Desnoyers 	struct percpu_list_node *next;
395c960e990SMathieu Desnoyers };
396c960e990SMathieu Desnoyers 
397c960e990SMathieu Desnoyers struct percpu_list_entry {
398c960e990SMathieu Desnoyers 	struct percpu_list_node *head;
399c960e990SMathieu Desnoyers } __attribute__((aligned(128)));
400c960e990SMathieu Desnoyers 
401c960e990SMathieu Desnoyers struct percpu_list {
402c960e990SMathieu Desnoyers 	struct percpu_list_entry c[CPU_SETSIZE];
403c960e990SMathieu Desnoyers };
404c960e990SMathieu Desnoyers 
405c960e990SMathieu Desnoyers #define BUFFER_ITEM_PER_CPU	100
406c960e990SMathieu Desnoyers 
407c960e990SMathieu Desnoyers struct percpu_buffer_node {
408c960e990SMathieu Desnoyers 	intptr_t data;
409c960e990SMathieu Desnoyers };
410c960e990SMathieu Desnoyers 
411c960e990SMathieu Desnoyers struct percpu_buffer_entry {
412c960e990SMathieu Desnoyers 	intptr_t offset;
413c960e990SMathieu Desnoyers 	intptr_t buflen;
414c960e990SMathieu Desnoyers 	struct percpu_buffer_node **array;
415c960e990SMathieu Desnoyers } __attribute__((aligned(128)));
416c960e990SMathieu Desnoyers 
417c960e990SMathieu Desnoyers struct percpu_buffer {
418c960e990SMathieu Desnoyers 	struct percpu_buffer_entry c[CPU_SETSIZE];
419c960e990SMathieu Desnoyers };
420c960e990SMathieu Desnoyers 
421c960e990SMathieu Desnoyers #define MEMCPY_BUFFER_ITEM_PER_CPU	100
422c960e990SMathieu Desnoyers 
423c960e990SMathieu Desnoyers struct percpu_memcpy_buffer_node {
424c960e990SMathieu Desnoyers 	intptr_t data1;
425c960e990SMathieu Desnoyers 	uint64_t data2;
426c960e990SMathieu Desnoyers };
427c960e990SMathieu Desnoyers 
428c960e990SMathieu Desnoyers struct percpu_memcpy_buffer_entry {
429c960e990SMathieu Desnoyers 	intptr_t offset;
430c960e990SMathieu Desnoyers 	intptr_t buflen;
431c960e990SMathieu Desnoyers 	struct percpu_memcpy_buffer_node *array;
432c960e990SMathieu Desnoyers } __attribute__((aligned(128)));
433c960e990SMathieu Desnoyers 
434c960e990SMathieu Desnoyers struct percpu_memcpy_buffer {
435c960e990SMathieu Desnoyers 	struct percpu_memcpy_buffer_entry c[CPU_SETSIZE];
436c960e990SMathieu Desnoyers };
437c960e990SMathieu Desnoyers 
438c960e990SMathieu Desnoyers /* A simple percpu spinlock. Grabs lock on current cpu. */
rseq_this_cpu_lock(struct percpu_lock * lock)439c960e990SMathieu Desnoyers static int rseq_this_cpu_lock(struct percpu_lock *lock)
440c960e990SMathieu Desnoyers {
441c960e990SMathieu Desnoyers 	int cpu;
442c960e990SMathieu Desnoyers 
443c960e990SMathieu Desnoyers 	for (;;) {
444c960e990SMathieu Desnoyers 		int ret;
445c960e990SMathieu Desnoyers 
446ee31fff0SMathieu Desnoyers 		cpu = get_current_cpu_id();
447a3798e6fSMathieu Desnoyers 		if (cpu < 0) {
448a3798e6fSMathieu Desnoyers 			fprintf(stderr, "pid: %d: tid: %d, cpu: %d: cid: %d\n",
449a3798e6fSMathieu Desnoyers 					getpid(), (int) rseq_gettid(), rseq_current_cpu_raw(), cpu);
450a3798e6fSMathieu Desnoyers 			abort();
451a3798e6fSMathieu Desnoyers 		}
452ee31fff0SMathieu Desnoyers 		ret = rseq_cmpeqv_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU,
453ee31fff0SMathieu Desnoyers 					 &lock->c[cpu].v,
454c960e990SMathieu Desnoyers 					 0, 1, cpu);
455c960e990SMathieu Desnoyers 		if (rseq_likely(!ret))
456c960e990SMathieu Desnoyers 			break;
457c960e990SMathieu Desnoyers 		/* Retry if comparison fails or rseq aborts. */
458c960e990SMathieu Desnoyers 	}
459c960e990SMathieu Desnoyers 	/*
460c960e990SMathieu Desnoyers 	 * Acquire semantic when taking lock after control dependency.
461c960e990SMathieu Desnoyers 	 * Matches rseq_smp_store_release().
462c960e990SMathieu Desnoyers 	 */
463c960e990SMathieu Desnoyers 	rseq_smp_acquire__after_ctrl_dep();
464c960e990SMathieu Desnoyers 	return cpu;
465c960e990SMathieu Desnoyers }
466c960e990SMathieu Desnoyers 
rseq_percpu_unlock(struct percpu_lock * lock,int cpu)467c960e990SMathieu Desnoyers static void rseq_percpu_unlock(struct percpu_lock *lock, int cpu)
468c960e990SMathieu Desnoyers {
469c960e990SMathieu Desnoyers 	assert(lock->c[cpu].v == 1);
470c960e990SMathieu Desnoyers 	/*
471c960e990SMathieu Desnoyers 	 * Release lock, with release semantic. Matches
472c960e990SMathieu Desnoyers 	 * rseq_smp_acquire__after_ctrl_dep().
473c960e990SMathieu Desnoyers 	 */
474c960e990SMathieu Desnoyers 	rseq_smp_store_release(&lock->c[cpu].v, 0);
475c960e990SMathieu Desnoyers }
476c960e990SMathieu Desnoyers 
test_percpu_spinlock_thread(void * arg)477c960e990SMathieu Desnoyers void *test_percpu_spinlock_thread(void *arg)
478c960e990SMathieu Desnoyers {
479c960e990SMathieu Desnoyers 	struct spinlock_thread_test_data *thread_data = arg;
480c960e990SMathieu Desnoyers 	struct spinlock_test_data *data = thread_data->data;
481c960e990SMathieu Desnoyers 	long long i, reps;
482c960e990SMathieu Desnoyers 
483c960e990SMathieu Desnoyers 	if (!opt_disable_rseq && thread_data->reg &&
484c960e990SMathieu Desnoyers 	    rseq_register_current_thread())
485c960e990SMathieu Desnoyers 		abort();
486c960e990SMathieu Desnoyers 	reps = thread_data->reps;
487c960e990SMathieu Desnoyers 	for (i = 0; i < reps; i++) {
488930378d0SMathieu Desnoyers 		int cpu = rseq_this_cpu_lock(&data->lock);
489c960e990SMathieu Desnoyers 		data->c[cpu].count++;
490c960e990SMathieu Desnoyers 		rseq_percpu_unlock(&data->lock, cpu);
491c960e990SMathieu Desnoyers #ifndef BENCHMARK
492c960e990SMathieu Desnoyers 		if (i != 0 && !(i % (reps / 10)))
4938df34c56SMathieu Desnoyers 			printf_verbose("tid %d: count %lld\n",
4948df34c56SMathieu Desnoyers 				       (int) rseq_gettid(), i);
495c960e990SMathieu Desnoyers #endif
496c960e990SMathieu Desnoyers 	}
497c960e990SMathieu Desnoyers 	printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
4988df34c56SMathieu Desnoyers 		       (int) rseq_gettid(), nr_abort, signals_delivered);
499c960e990SMathieu Desnoyers 	if (!opt_disable_rseq && thread_data->reg &&
500c960e990SMathieu Desnoyers 	    rseq_unregister_current_thread())
501c960e990SMathieu Desnoyers 		abort();
502c960e990SMathieu Desnoyers 	return NULL;
503c960e990SMathieu Desnoyers }
504c960e990SMathieu Desnoyers 
505c960e990SMathieu Desnoyers /*
506c960e990SMathieu Desnoyers  * A simple test which implements a sharded counter using a per-cpu
507c960e990SMathieu Desnoyers  * lock.  Obviously real applications might prefer to simply use a
508c960e990SMathieu Desnoyers  * per-cpu increment; however, this is reasonable for a test and the
509c960e990SMathieu Desnoyers  * lock can be extended to synchronize more complicated operations.
510c960e990SMathieu Desnoyers  */
test_percpu_spinlock(void)511c960e990SMathieu Desnoyers void test_percpu_spinlock(void)
512c960e990SMathieu Desnoyers {
513c960e990SMathieu Desnoyers 	const int num_threads = opt_threads;
514c960e990SMathieu Desnoyers 	int i, ret;
515c960e990SMathieu Desnoyers 	uint64_t sum;
516c960e990SMathieu Desnoyers 	pthread_t test_threads[num_threads];
517c960e990SMathieu Desnoyers 	struct spinlock_test_data data;
518c960e990SMathieu Desnoyers 	struct spinlock_thread_test_data thread_data[num_threads];
519c960e990SMathieu Desnoyers 
520c960e990SMathieu Desnoyers 	memset(&data, 0, sizeof(data));
521c960e990SMathieu Desnoyers 	for (i = 0; i < num_threads; i++) {
522c960e990SMathieu Desnoyers 		thread_data[i].reps = opt_reps;
523c960e990SMathieu Desnoyers 		if (opt_disable_mod <= 0 || (i % opt_disable_mod))
524c960e990SMathieu Desnoyers 			thread_data[i].reg = 1;
525c960e990SMathieu Desnoyers 		else
526c960e990SMathieu Desnoyers 			thread_data[i].reg = 0;
527c960e990SMathieu Desnoyers 		thread_data[i].data = &data;
528c960e990SMathieu Desnoyers 		ret = pthread_create(&test_threads[i], NULL,
529c960e990SMathieu Desnoyers 				     test_percpu_spinlock_thread,
530c960e990SMathieu Desnoyers 				     &thread_data[i]);
531c960e990SMathieu Desnoyers 		if (ret) {
532c960e990SMathieu Desnoyers 			errno = ret;
533c960e990SMathieu Desnoyers 			perror("pthread_create");
534c960e990SMathieu Desnoyers 			abort();
535c960e990SMathieu Desnoyers 		}
536c960e990SMathieu Desnoyers 	}
537c960e990SMathieu Desnoyers 
538c960e990SMathieu Desnoyers 	for (i = 0; i < num_threads; i++) {
539c960e990SMathieu Desnoyers 		ret = pthread_join(test_threads[i], NULL);
540c960e990SMathieu Desnoyers 		if (ret) {
541c960e990SMathieu Desnoyers 			errno = ret;
542c960e990SMathieu Desnoyers 			perror("pthread_join");
543c960e990SMathieu Desnoyers 			abort();
544c960e990SMathieu Desnoyers 		}
545c960e990SMathieu Desnoyers 	}
546c960e990SMathieu Desnoyers 
547c960e990SMathieu Desnoyers 	sum = 0;
548c960e990SMathieu Desnoyers 	for (i = 0; i < CPU_SETSIZE; i++)
549c960e990SMathieu Desnoyers 		sum += data.c[i].count;
550c960e990SMathieu Desnoyers 
551c960e990SMathieu Desnoyers 	assert(sum == (uint64_t)opt_reps * num_threads);
552c960e990SMathieu Desnoyers }
553c960e990SMathieu Desnoyers 
test_percpu_inc_thread(void * arg)554c960e990SMathieu Desnoyers void *test_percpu_inc_thread(void *arg)
555c960e990SMathieu Desnoyers {
556c960e990SMathieu Desnoyers 	struct inc_thread_test_data *thread_data = arg;
557c960e990SMathieu Desnoyers 	struct inc_test_data *data = thread_data->data;
558c960e990SMathieu Desnoyers 	long long i, reps;
559c960e990SMathieu Desnoyers 
560c960e990SMathieu Desnoyers 	if (!opt_disable_rseq && thread_data->reg &&
561c960e990SMathieu Desnoyers 	    rseq_register_current_thread())
562c960e990SMathieu Desnoyers 		abort();
563c960e990SMathieu Desnoyers 	reps = thread_data->reps;
564c960e990SMathieu Desnoyers 	for (i = 0; i < reps; i++) {
565c960e990SMathieu Desnoyers 		int ret;
566c960e990SMathieu Desnoyers 
567c960e990SMathieu Desnoyers 		do {
568c960e990SMathieu Desnoyers 			int cpu;
569c960e990SMathieu Desnoyers 
570ee31fff0SMathieu Desnoyers 			cpu = get_current_cpu_id();
571ee31fff0SMathieu Desnoyers 			ret = rseq_addv(RSEQ_MO_RELAXED, RSEQ_PERCPU,
572ee31fff0SMathieu Desnoyers 					&data->c[cpu].count, 1, cpu);
573c960e990SMathieu Desnoyers 		} while (rseq_unlikely(ret));
574c960e990SMathieu Desnoyers #ifndef BENCHMARK
575c960e990SMathieu Desnoyers 		if (i != 0 && !(i % (reps / 10)))
5768df34c56SMathieu Desnoyers 			printf_verbose("tid %d: count %lld\n",
5778df34c56SMathieu Desnoyers 				       (int) rseq_gettid(), i);
578c960e990SMathieu Desnoyers #endif
579c960e990SMathieu Desnoyers 	}
580c960e990SMathieu Desnoyers 	printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
5818df34c56SMathieu Desnoyers 		       (int) rseq_gettid(), nr_abort, signals_delivered);
582c960e990SMathieu Desnoyers 	if (!opt_disable_rseq && thread_data->reg &&
583c960e990SMathieu Desnoyers 	    rseq_unregister_current_thread())
584c960e990SMathieu Desnoyers 		abort();
585c960e990SMathieu Desnoyers 	return NULL;
586c960e990SMathieu Desnoyers }
587c960e990SMathieu Desnoyers 
test_percpu_inc(void)588c960e990SMathieu Desnoyers void test_percpu_inc(void)
589c960e990SMathieu Desnoyers {
590c960e990SMathieu Desnoyers 	const int num_threads = opt_threads;
591c960e990SMathieu Desnoyers 	int i, ret;
592c960e990SMathieu Desnoyers 	uint64_t sum;
593c960e990SMathieu Desnoyers 	pthread_t test_threads[num_threads];
594c960e990SMathieu Desnoyers 	struct inc_test_data data;
595c960e990SMathieu Desnoyers 	struct inc_thread_test_data thread_data[num_threads];
596c960e990SMathieu Desnoyers 
597c960e990SMathieu Desnoyers 	memset(&data, 0, sizeof(data));
598c960e990SMathieu Desnoyers 	for (i = 0; i < num_threads; i++) {
599c960e990SMathieu Desnoyers 		thread_data[i].reps = opt_reps;
600c960e990SMathieu Desnoyers 		if (opt_disable_mod <= 0 || (i % opt_disable_mod))
601c960e990SMathieu Desnoyers 			thread_data[i].reg = 1;
602c960e990SMathieu Desnoyers 		else
603c960e990SMathieu Desnoyers 			thread_data[i].reg = 0;
604c960e990SMathieu Desnoyers 		thread_data[i].data = &data;
605c960e990SMathieu Desnoyers 		ret = pthread_create(&test_threads[i], NULL,
606c960e990SMathieu Desnoyers 				     test_percpu_inc_thread,
607c960e990SMathieu Desnoyers 				     &thread_data[i]);
608c960e990SMathieu Desnoyers 		if (ret) {
609c960e990SMathieu Desnoyers 			errno = ret;
610c960e990SMathieu Desnoyers 			perror("pthread_create");
611c960e990SMathieu Desnoyers 			abort();
612c960e990SMathieu Desnoyers 		}
613c960e990SMathieu Desnoyers 	}
614c960e990SMathieu Desnoyers 
615c960e990SMathieu Desnoyers 	for (i = 0; i < num_threads; i++) {
616c960e990SMathieu Desnoyers 		ret = pthread_join(test_threads[i], NULL);
617c960e990SMathieu Desnoyers 		if (ret) {
618c960e990SMathieu Desnoyers 			errno = ret;
619c960e990SMathieu Desnoyers 			perror("pthread_join");
620c960e990SMathieu Desnoyers 			abort();
621c960e990SMathieu Desnoyers 		}
622c960e990SMathieu Desnoyers 	}
623c960e990SMathieu Desnoyers 
624c960e990SMathieu Desnoyers 	sum = 0;
625c960e990SMathieu Desnoyers 	for (i = 0; i < CPU_SETSIZE; i++)
626c960e990SMathieu Desnoyers 		sum += data.c[i].count;
627c960e990SMathieu Desnoyers 
628c960e990SMathieu Desnoyers 	assert(sum == (uint64_t)opt_reps * num_threads);
629c960e990SMathieu Desnoyers }
630c960e990SMathieu Desnoyers 
this_cpu_list_push(struct percpu_list * list,struct percpu_list_node * node,int * _cpu)631c960e990SMathieu Desnoyers void this_cpu_list_push(struct percpu_list *list,
632c960e990SMathieu Desnoyers 			struct percpu_list_node *node,
633c960e990SMathieu Desnoyers 			int *_cpu)
634c960e990SMathieu Desnoyers {
635c960e990SMathieu Desnoyers 	int cpu;
636c960e990SMathieu Desnoyers 
637c960e990SMathieu Desnoyers 	for (;;) {
638c960e990SMathieu Desnoyers 		intptr_t *targetptr, newval, expect;
639c960e990SMathieu Desnoyers 		int ret;
640c960e990SMathieu Desnoyers 
641ee31fff0SMathieu Desnoyers 		cpu = get_current_cpu_id();
642c960e990SMathieu Desnoyers 		/* Load list->c[cpu].head with single-copy atomicity. */
643c960e990SMathieu Desnoyers 		expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head);
644c960e990SMathieu Desnoyers 		newval = (intptr_t)node;
645c960e990SMathieu Desnoyers 		targetptr = (intptr_t *)&list->c[cpu].head;
646c960e990SMathieu Desnoyers 		node->next = (struct percpu_list_node *)expect;
647ee31fff0SMathieu Desnoyers 		ret = rseq_cmpeqv_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU,
648ee31fff0SMathieu Desnoyers 					 targetptr, expect, newval, cpu);
649c960e990SMathieu Desnoyers 		if (rseq_likely(!ret))
650c960e990SMathieu Desnoyers 			break;
651c960e990SMathieu Desnoyers 		/* Retry if comparison fails or rseq aborts. */
652c960e990SMathieu Desnoyers 	}
653c960e990SMathieu Desnoyers 	if (_cpu)
654c960e990SMathieu Desnoyers 		*_cpu = cpu;
655c960e990SMathieu Desnoyers }
656c960e990SMathieu Desnoyers 
657c960e990SMathieu Desnoyers /*
658c960e990SMathieu Desnoyers  * Unlike a traditional lock-less linked list; the availability of a
659c960e990SMathieu Desnoyers  * rseq primitive allows us to implement pop without concerns over
660c960e990SMathieu Desnoyers  * ABA-type races.
661c960e990SMathieu Desnoyers  */
this_cpu_list_pop(struct percpu_list * list,int * _cpu)662c960e990SMathieu Desnoyers struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list,
663c960e990SMathieu Desnoyers 					   int *_cpu)
664c960e990SMathieu Desnoyers {
665c960e990SMathieu Desnoyers 	struct percpu_list_node *node = NULL;
666c960e990SMathieu Desnoyers 	int cpu;
667c960e990SMathieu Desnoyers 
668c960e990SMathieu Desnoyers 	for (;;) {
669c960e990SMathieu Desnoyers 		struct percpu_list_node *head;
670c960e990SMathieu Desnoyers 		intptr_t *targetptr, expectnot, *load;
67126dc8a6dSMathieu Desnoyers 		long offset;
672c960e990SMathieu Desnoyers 		int ret;
673c960e990SMathieu Desnoyers 
674ee31fff0SMathieu Desnoyers 		cpu = get_current_cpu_id();
675c960e990SMathieu Desnoyers 		targetptr = (intptr_t *)&list->c[cpu].head;
676c960e990SMathieu Desnoyers 		expectnot = (intptr_t)NULL;
677c960e990SMathieu Desnoyers 		offset = offsetof(struct percpu_list_node, next);
678c960e990SMathieu Desnoyers 		load = (intptr_t *)&head;
679ee31fff0SMathieu Desnoyers 		ret = rseq_cmpnev_storeoffp_load(RSEQ_MO_RELAXED, RSEQ_PERCPU,
680ee31fff0SMathieu Desnoyers 						 targetptr, expectnot,
681c960e990SMathieu Desnoyers 						 offset, load, cpu);
682c960e990SMathieu Desnoyers 		if (rseq_likely(!ret)) {
683c960e990SMathieu Desnoyers 			node = head;
684c960e990SMathieu Desnoyers 			break;
685c960e990SMathieu Desnoyers 		}
686c960e990SMathieu Desnoyers 		if (ret > 0)
687c960e990SMathieu Desnoyers 			break;
688c960e990SMathieu Desnoyers 		/* Retry if rseq aborts. */
689c960e990SMathieu Desnoyers 	}
690c960e990SMathieu Desnoyers 	if (_cpu)
691c960e990SMathieu Desnoyers 		*_cpu = cpu;
692c960e990SMathieu Desnoyers 	return node;
693c960e990SMathieu Desnoyers }
694c960e990SMathieu Desnoyers 
695c960e990SMathieu Desnoyers /*
696c960e990SMathieu Desnoyers  * __percpu_list_pop is not safe against concurrent accesses. Should
697c960e990SMathieu Desnoyers  * only be used on lists that are not concurrently modified.
698c960e990SMathieu Desnoyers  */
__percpu_list_pop(struct percpu_list * list,int cpu)699c960e990SMathieu Desnoyers struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu)
700c960e990SMathieu Desnoyers {
701c960e990SMathieu Desnoyers 	struct percpu_list_node *node;
702c960e990SMathieu Desnoyers 
703c960e990SMathieu Desnoyers 	node = list->c[cpu].head;
704c960e990SMathieu Desnoyers 	if (!node)
705c960e990SMathieu Desnoyers 		return NULL;
706c960e990SMathieu Desnoyers 	list->c[cpu].head = node->next;
707c960e990SMathieu Desnoyers 	return node;
708c960e990SMathieu Desnoyers }
709c960e990SMathieu Desnoyers 
test_percpu_list_thread(void * arg)710c960e990SMathieu Desnoyers void *test_percpu_list_thread(void *arg)
711c960e990SMathieu Desnoyers {
712c960e990SMathieu Desnoyers 	long long i, reps;
713c960e990SMathieu Desnoyers 	struct percpu_list *list = (struct percpu_list *)arg;
714c960e990SMathieu Desnoyers 
715c960e990SMathieu Desnoyers 	if (!opt_disable_rseq && rseq_register_current_thread())
716c960e990SMathieu Desnoyers 		abort();
717c960e990SMathieu Desnoyers 
718c960e990SMathieu Desnoyers 	reps = opt_reps;
719c960e990SMathieu Desnoyers 	for (i = 0; i < reps; i++) {
720c960e990SMathieu Desnoyers 		struct percpu_list_node *node;
721c960e990SMathieu Desnoyers 
722c960e990SMathieu Desnoyers 		node = this_cpu_list_pop(list, NULL);
723c960e990SMathieu Desnoyers 		if (opt_yield)
724c960e990SMathieu Desnoyers 			sched_yield();  /* encourage shuffling */
725c960e990SMathieu Desnoyers 		if (node)
726c960e990SMathieu Desnoyers 			this_cpu_list_push(list, node, NULL);
727c960e990SMathieu Desnoyers 	}
728c960e990SMathieu Desnoyers 
729c960e990SMathieu Desnoyers 	printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
7308df34c56SMathieu Desnoyers 		       (int) rseq_gettid(), nr_abort, signals_delivered);
731c960e990SMathieu Desnoyers 	if (!opt_disable_rseq && rseq_unregister_current_thread())
732c960e990SMathieu Desnoyers 		abort();
733c960e990SMathieu Desnoyers 
734c960e990SMathieu Desnoyers 	return NULL;
735c960e990SMathieu Desnoyers }
736c960e990SMathieu Desnoyers 
737c960e990SMathieu Desnoyers /* Simultaneous modification to a per-cpu linked list from many threads.  */
test_percpu_list(void)738c960e990SMathieu Desnoyers void test_percpu_list(void)
739c960e990SMathieu Desnoyers {
740c960e990SMathieu Desnoyers 	const int num_threads = opt_threads;
741c960e990SMathieu Desnoyers 	int i, j, ret;
742c960e990SMathieu Desnoyers 	uint64_t sum = 0, expected_sum = 0;
743c960e990SMathieu Desnoyers 	struct percpu_list list;
744c960e990SMathieu Desnoyers 	pthread_t test_threads[num_threads];
745c960e990SMathieu Desnoyers 	cpu_set_t allowed_cpus;
746c960e990SMathieu Desnoyers 
747c960e990SMathieu Desnoyers 	memset(&list, 0, sizeof(list));
748c960e990SMathieu Desnoyers 
749c960e990SMathieu Desnoyers 	/* Generate list entries for every usable cpu. */
750c960e990SMathieu Desnoyers 	sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
751c960e990SMathieu Desnoyers 	for (i = 0; i < CPU_SETSIZE; i++) {
752d53271c0SMathieu Desnoyers 		if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
753c960e990SMathieu Desnoyers 			continue;
754c960e990SMathieu Desnoyers 		for (j = 1; j <= 100; j++) {
755c960e990SMathieu Desnoyers 			struct percpu_list_node *node;
756c960e990SMathieu Desnoyers 
757c960e990SMathieu Desnoyers 			expected_sum += j;
758c960e990SMathieu Desnoyers 
759c960e990SMathieu Desnoyers 			node = malloc(sizeof(*node));
760c960e990SMathieu Desnoyers 			assert(node);
761c960e990SMathieu Desnoyers 			node->data = j;
762c960e990SMathieu Desnoyers 			node->next = list.c[i].head;
763c960e990SMathieu Desnoyers 			list.c[i].head = node;
764c960e990SMathieu Desnoyers 		}
765c960e990SMathieu Desnoyers 	}
766c960e990SMathieu Desnoyers 
767c960e990SMathieu Desnoyers 	for (i = 0; i < num_threads; i++) {
768c960e990SMathieu Desnoyers 		ret = pthread_create(&test_threads[i], NULL,
769c960e990SMathieu Desnoyers 				     test_percpu_list_thread, &list);
770c960e990SMathieu Desnoyers 		if (ret) {
771c960e990SMathieu Desnoyers 			errno = ret;
772c960e990SMathieu Desnoyers 			perror("pthread_create");
773c960e990SMathieu Desnoyers 			abort();
774c960e990SMathieu Desnoyers 		}
775c960e990SMathieu Desnoyers 	}
776c960e990SMathieu Desnoyers 
777c960e990SMathieu Desnoyers 	for (i = 0; i < num_threads; i++) {
778c960e990SMathieu Desnoyers 		ret = pthread_join(test_threads[i], NULL);
779c960e990SMathieu Desnoyers 		if (ret) {
780c960e990SMathieu Desnoyers 			errno = ret;
781c960e990SMathieu Desnoyers 			perror("pthread_join");
782c960e990SMathieu Desnoyers 			abort();
783c960e990SMathieu Desnoyers 		}
784c960e990SMathieu Desnoyers 	}
785c960e990SMathieu Desnoyers 
786c960e990SMathieu Desnoyers 	for (i = 0; i < CPU_SETSIZE; i++) {
787c960e990SMathieu Desnoyers 		struct percpu_list_node *node;
788c960e990SMathieu Desnoyers 
789d53271c0SMathieu Desnoyers 		if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
790c960e990SMathieu Desnoyers 			continue;
791c960e990SMathieu Desnoyers 
792c960e990SMathieu Desnoyers 		while ((node = __percpu_list_pop(&list, i))) {
793c960e990SMathieu Desnoyers 			sum += node->data;
794c960e990SMathieu Desnoyers 			free(node);
795c960e990SMathieu Desnoyers 		}
796c960e990SMathieu Desnoyers 	}
797c960e990SMathieu Desnoyers 
798c960e990SMathieu Desnoyers 	/*
799c960e990SMathieu Desnoyers 	 * All entries should now be accounted for (unless some external
800c960e990SMathieu Desnoyers 	 * actor is interfering with our allowed affinity while this
801c960e990SMathieu Desnoyers 	 * test is running).
802c960e990SMathieu Desnoyers 	 */
803c960e990SMathieu Desnoyers 	assert(sum == expected_sum);
804c960e990SMathieu Desnoyers }
805c960e990SMathieu Desnoyers 
this_cpu_buffer_push(struct percpu_buffer * buffer,struct percpu_buffer_node * node,int * _cpu)806c960e990SMathieu Desnoyers bool this_cpu_buffer_push(struct percpu_buffer *buffer,
807c960e990SMathieu Desnoyers 			  struct percpu_buffer_node *node,
808c960e990SMathieu Desnoyers 			  int *_cpu)
809c960e990SMathieu Desnoyers {
810c960e990SMathieu Desnoyers 	bool result = false;
811c960e990SMathieu Desnoyers 	int cpu;
812c960e990SMathieu Desnoyers 
813c960e990SMathieu Desnoyers 	for (;;) {
814c960e990SMathieu Desnoyers 		intptr_t *targetptr_spec, newval_spec;
815c960e990SMathieu Desnoyers 		intptr_t *targetptr_final, newval_final;
816c960e990SMathieu Desnoyers 		intptr_t offset;
817c960e990SMathieu Desnoyers 		int ret;
818c960e990SMathieu Desnoyers 
819ee31fff0SMathieu Desnoyers 		cpu = get_current_cpu_id();
820c960e990SMathieu Desnoyers 		offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
821c960e990SMathieu Desnoyers 		if (offset == buffer->c[cpu].buflen)
822c960e990SMathieu Desnoyers 			break;
823c960e990SMathieu Desnoyers 		newval_spec = (intptr_t)node;
824c960e990SMathieu Desnoyers 		targetptr_spec = (intptr_t *)&buffer->c[cpu].array[offset];
825c960e990SMathieu Desnoyers 		newval_final = offset + 1;
826c960e990SMathieu Desnoyers 		targetptr_final = &buffer->c[cpu].offset;
827ee31fff0SMathieu Desnoyers 		ret = rseq_cmpeqv_trystorev_storev(opt_mo, RSEQ_PERCPU,
828c960e990SMathieu Desnoyers 			targetptr_final, offset, targetptr_spec,
829c960e990SMathieu Desnoyers 			newval_spec, newval_final, cpu);
830c960e990SMathieu Desnoyers 		if (rseq_likely(!ret)) {
831c960e990SMathieu Desnoyers 			result = true;
832c960e990SMathieu Desnoyers 			break;
833c960e990SMathieu Desnoyers 		}
834c960e990SMathieu Desnoyers 		/* Retry if comparison fails or rseq aborts. */
835c960e990SMathieu Desnoyers 	}
836c960e990SMathieu Desnoyers 	if (_cpu)
837c960e990SMathieu Desnoyers 		*_cpu = cpu;
838c960e990SMathieu Desnoyers 	return result;
839c960e990SMathieu Desnoyers }
840c960e990SMathieu Desnoyers 
this_cpu_buffer_pop(struct percpu_buffer * buffer,int * _cpu)841c960e990SMathieu Desnoyers struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer *buffer,
842c960e990SMathieu Desnoyers 					       int *_cpu)
843c960e990SMathieu Desnoyers {
844c960e990SMathieu Desnoyers 	struct percpu_buffer_node *head;
845c960e990SMathieu Desnoyers 	int cpu;
846c960e990SMathieu Desnoyers 
847c960e990SMathieu Desnoyers 	for (;;) {
848c960e990SMathieu Desnoyers 		intptr_t *targetptr, newval;
849c960e990SMathieu Desnoyers 		intptr_t offset;
850c960e990SMathieu Desnoyers 		int ret;
851c960e990SMathieu Desnoyers 
852ee31fff0SMathieu Desnoyers 		cpu = get_current_cpu_id();
853c960e990SMathieu Desnoyers 		/* Load offset with single-copy atomicity. */
854c960e990SMathieu Desnoyers 		offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
855c960e990SMathieu Desnoyers 		if (offset == 0) {
856c960e990SMathieu Desnoyers 			head = NULL;
857c960e990SMathieu Desnoyers 			break;
858c960e990SMathieu Desnoyers 		}
859c960e990SMathieu Desnoyers 		head = RSEQ_READ_ONCE(buffer->c[cpu].array[offset - 1]);
860c960e990SMathieu Desnoyers 		newval = offset - 1;
861c960e990SMathieu Desnoyers 		targetptr = (intptr_t *)&buffer->c[cpu].offset;
862ee31fff0SMathieu Desnoyers 		ret = rseq_cmpeqv_cmpeqv_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU,
863ee31fff0SMathieu Desnoyers 			targetptr, offset,
864c960e990SMathieu Desnoyers 			(intptr_t *)&buffer->c[cpu].array[offset - 1],
865c960e990SMathieu Desnoyers 			(intptr_t)head, newval, cpu);
866c960e990SMathieu Desnoyers 		if (rseq_likely(!ret))
867c960e990SMathieu Desnoyers 			break;
868c960e990SMathieu Desnoyers 		/* Retry if comparison fails or rseq aborts. */
869c960e990SMathieu Desnoyers 	}
870c960e990SMathieu Desnoyers 	if (_cpu)
871c960e990SMathieu Desnoyers 		*_cpu = cpu;
872c960e990SMathieu Desnoyers 	return head;
873c960e990SMathieu Desnoyers }
874c960e990SMathieu Desnoyers 
875c960e990SMathieu Desnoyers /*
876c960e990SMathieu Desnoyers  * __percpu_buffer_pop is not safe against concurrent accesses. Should
877c960e990SMathieu Desnoyers  * only be used on buffers that are not concurrently modified.
878c960e990SMathieu Desnoyers  */
__percpu_buffer_pop(struct percpu_buffer * buffer,int cpu)879c960e990SMathieu Desnoyers struct percpu_buffer_node *__percpu_buffer_pop(struct percpu_buffer *buffer,
880c960e990SMathieu Desnoyers 					       int cpu)
881c960e990SMathieu Desnoyers {
882c960e990SMathieu Desnoyers 	struct percpu_buffer_node *head;
883c960e990SMathieu Desnoyers 	intptr_t offset;
884c960e990SMathieu Desnoyers 
885c960e990SMathieu Desnoyers 	offset = buffer->c[cpu].offset;
886c960e990SMathieu Desnoyers 	if (offset == 0)
887c960e990SMathieu Desnoyers 		return NULL;
888c960e990SMathieu Desnoyers 	head = buffer->c[cpu].array[offset - 1];
889c960e990SMathieu Desnoyers 	buffer->c[cpu].offset = offset - 1;
890c960e990SMathieu Desnoyers 	return head;
891c960e990SMathieu Desnoyers }
892c960e990SMathieu Desnoyers 
test_percpu_buffer_thread(void * arg)893c960e990SMathieu Desnoyers void *test_percpu_buffer_thread(void *arg)
894c960e990SMathieu Desnoyers {
895c960e990SMathieu Desnoyers 	long long i, reps;
896c960e990SMathieu Desnoyers 	struct percpu_buffer *buffer = (struct percpu_buffer *)arg;
897c960e990SMathieu Desnoyers 
898c960e990SMathieu Desnoyers 	if (!opt_disable_rseq && rseq_register_current_thread())
899c960e990SMathieu Desnoyers 		abort();
900c960e990SMathieu Desnoyers 
901c960e990SMathieu Desnoyers 	reps = opt_reps;
902c960e990SMathieu Desnoyers 	for (i = 0; i < reps; i++) {
903c960e990SMathieu Desnoyers 		struct percpu_buffer_node *node;
904c960e990SMathieu Desnoyers 
905c960e990SMathieu Desnoyers 		node = this_cpu_buffer_pop(buffer, NULL);
906c960e990SMathieu Desnoyers 		if (opt_yield)
907c960e990SMathieu Desnoyers 			sched_yield();  /* encourage shuffling */
908c960e990SMathieu Desnoyers 		if (node) {
909c960e990SMathieu Desnoyers 			if (!this_cpu_buffer_push(buffer, node, NULL)) {
910c960e990SMathieu Desnoyers 				/* Should increase buffer size. */
911c960e990SMathieu Desnoyers 				abort();
912c960e990SMathieu Desnoyers 			}
913c960e990SMathieu Desnoyers 		}
914c960e990SMathieu Desnoyers 	}
915c960e990SMathieu Desnoyers 
916c960e990SMathieu Desnoyers 	printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
9178df34c56SMathieu Desnoyers 		       (int) rseq_gettid(), nr_abort, signals_delivered);
918c960e990SMathieu Desnoyers 	if (!opt_disable_rseq && rseq_unregister_current_thread())
919c960e990SMathieu Desnoyers 		abort();
920c960e990SMathieu Desnoyers 
921c960e990SMathieu Desnoyers 	return NULL;
922c960e990SMathieu Desnoyers }
923c960e990SMathieu Desnoyers 
924c960e990SMathieu Desnoyers /* Simultaneous modification to a per-cpu buffer from many threads.  */
test_percpu_buffer(void)925c960e990SMathieu Desnoyers void test_percpu_buffer(void)
926c960e990SMathieu Desnoyers {
927c960e990SMathieu Desnoyers 	const int num_threads = opt_threads;
928c960e990SMathieu Desnoyers 	int i, j, ret;
929c960e990SMathieu Desnoyers 	uint64_t sum = 0, expected_sum = 0;
930c960e990SMathieu Desnoyers 	struct percpu_buffer buffer;
931c960e990SMathieu Desnoyers 	pthread_t test_threads[num_threads];
932c960e990SMathieu Desnoyers 	cpu_set_t allowed_cpus;
933c960e990SMathieu Desnoyers 
934c960e990SMathieu Desnoyers 	memset(&buffer, 0, sizeof(buffer));
935c960e990SMathieu Desnoyers 
936c960e990SMathieu Desnoyers 	/* Generate list entries for every usable cpu. */
937c960e990SMathieu Desnoyers 	sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
938c960e990SMathieu Desnoyers 	for (i = 0; i < CPU_SETSIZE; i++) {
939d53271c0SMathieu Desnoyers 		if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
940c960e990SMathieu Desnoyers 			continue;
941c960e990SMathieu Desnoyers 		/* Worse-case is every item in same CPU. */
942c960e990SMathieu Desnoyers 		buffer.c[i].array =
943c960e990SMathieu Desnoyers 			malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
944c960e990SMathieu Desnoyers 			       BUFFER_ITEM_PER_CPU);
945c960e990SMathieu Desnoyers 		assert(buffer.c[i].array);
946c960e990SMathieu Desnoyers 		buffer.c[i].buflen = CPU_SETSIZE * BUFFER_ITEM_PER_CPU;
947c960e990SMathieu Desnoyers 		for (j = 1; j <= BUFFER_ITEM_PER_CPU; j++) {
948c960e990SMathieu Desnoyers 			struct percpu_buffer_node *node;
949c960e990SMathieu Desnoyers 
950c960e990SMathieu Desnoyers 			expected_sum += j;
951c960e990SMathieu Desnoyers 
952c960e990SMathieu Desnoyers 			/*
953c960e990SMathieu Desnoyers 			 * We could theoretically put the word-sized
954c960e990SMathieu Desnoyers 			 * "data" directly in the buffer. However, we
955c960e990SMathieu Desnoyers 			 * want to model objects that would not fit
956c960e990SMathieu Desnoyers 			 * within a single word, so allocate an object
957c960e990SMathieu Desnoyers 			 * for each node.
958c960e990SMathieu Desnoyers 			 */
959c960e990SMathieu Desnoyers 			node = malloc(sizeof(*node));
960c960e990SMathieu Desnoyers 			assert(node);
961c960e990SMathieu Desnoyers 			node->data = j;
962c960e990SMathieu Desnoyers 			buffer.c[i].array[j - 1] = node;
963c960e990SMathieu Desnoyers 			buffer.c[i].offset++;
964c960e990SMathieu Desnoyers 		}
965c960e990SMathieu Desnoyers 	}
966c960e990SMathieu Desnoyers 
967c960e990SMathieu Desnoyers 	for (i = 0; i < num_threads; i++) {
968c960e990SMathieu Desnoyers 		ret = pthread_create(&test_threads[i], NULL,
969c960e990SMathieu Desnoyers 				     test_percpu_buffer_thread, &buffer);
970c960e990SMathieu Desnoyers 		if (ret) {
971c960e990SMathieu Desnoyers 			errno = ret;
972c960e990SMathieu Desnoyers 			perror("pthread_create");
973c960e990SMathieu Desnoyers 			abort();
974c960e990SMathieu Desnoyers 		}
975c960e990SMathieu Desnoyers 	}
976c960e990SMathieu Desnoyers 
977c960e990SMathieu Desnoyers 	for (i = 0; i < num_threads; i++) {
978c960e990SMathieu Desnoyers 		ret = pthread_join(test_threads[i], NULL);
979c960e990SMathieu Desnoyers 		if (ret) {
980c960e990SMathieu Desnoyers 			errno = ret;
981c960e990SMathieu Desnoyers 			perror("pthread_join");
982c960e990SMathieu Desnoyers 			abort();
983c960e990SMathieu Desnoyers 		}
984c960e990SMathieu Desnoyers 	}
985c960e990SMathieu Desnoyers 
986c960e990SMathieu Desnoyers 	for (i = 0; i < CPU_SETSIZE; i++) {
987c960e990SMathieu Desnoyers 		struct percpu_buffer_node *node;
988c960e990SMathieu Desnoyers 
989d53271c0SMathieu Desnoyers 		if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
990c960e990SMathieu Desnoyers 			continue;
991c960e990SMathieu Desnoyers 
992c960e990SMathieu Desnoyers 		while ((node = __percpu_buffer_pop(&buffer, i))) {
993c960e990SMathieu Desnoyers 			sum += node->data;
994c960e990SMathieu Desnoyers 			free(node);
995c960e990SMathieu Desnoyers 		}
996c960e990SMathieu Desnoyers 		free(buffer.c[i].array);
997c960e990SMathieu Desnoyers 	}
998c960e990SMathieu Desnoyers 
999c960e990SMathieu Desnoyers 	/*
1000c960e990SMathieu Desnoyers 	 * All entries should now be accounted for (unless some external
1001c960e990SMathieu Desnoyers 	 * actor is interfering with our allowed affinity while this
1002c960e990SMathieu Desnoyers 	 * test is running).
1003c960e990SMathieu Desnoyers 	 */
1004c960e990SMathieu Desnoyers 	assert(sum == expected_sum);
1005c960e990SMathieu Desnoyers }
1006c960e990SMathieu Desnoyers 
this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer * buffer,struct percpu_memcpy_buffer_node item,int * _cpu)1007c960e990SMathieu Desnoyers bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer *buffer,
1008c960e990SMathieu Desnoyers 				 struct percpu_memcpy_buffer_node item,
1009c960e990SMathieu Desnoyers 				 int *_cpu)
1010c960e990SMathieu Desnoyers {
1011c960e990SMathieu Desnoyers 	bool result = false;
1012c960e990SMathieu Desnoyers 	int cpu;
1013c960e990SMathieu Desnoyers 
1014c960e990SMathieu Desnoyers 	for (;;) {
1015c960e990SMathieu Desnoyers 		intptr_t *targetptr_final, newval_final, offset;
1016c960e990SMathieu Desnoyers 		char *destptr, *srcptr;
1017c960e990SMathieu Desnoyers 		size_t copylen;
1018c960e990SMathieu Desnoyers 		int ret;
1019c960e990SMathieu Desnoyers 
1020ee31fff0SMathieu Desnoyers 		cpu = get_current_cpu_id();
1021c960e990SMathieu Desnoyers 		/* Load offset with single-copy atomicity. */
1022c960e990SMathieu Desnoyers 		offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
1023c960e990SMathieu Desnoyers 		if (offset == buffer->c[cpu].buflen)
1024c960e990SMathieu Desnoyers 			break;
1025c960e990SMathieu Desnoyers 		destptr = (char *)&buffer->c[cpu].array[offset];
1026c960e990SMathieu Desnoyers 		srcptr = (char *)&item;
1027c960e990SMathieu Desnoyers 		/* copylen must be <= 4kB. */
1028c960e990SMathieu Desnoyers 		copylen = sizeof(item);
1029c960e990SMathieu Desnoyers 		newval_final = offset + 1;
1030c960e990SMathieu Desnoyers 		targetptr_final = &buffer->c[cpu].offset;
1031ee31fff0SMathieu Desnoyers 		ret = rseq_cmpeqv_trymemcpy_storev(
1032ee31fff0SMathieu Desnoyers 			opt_mo, RSEQ_PERCPU,
1033c960e990SMathieu Desnoyers 			targetptr_final, offset,
1034c960e990SMathieu Desnoyers 			destptr, srcptr, copylen,
1035c960e990SMathieu Desnoyers 			newval_final, cpu);
1036c960e990SMathieu Desnoyers 		if (rseq_likely(!ret)) {
1037c960e990SMathieu Desnoyers 			result = true;
1038c960e990SMathieu Desnoyers 			break;
1039c960e990SMathieu Desnoyers 		}
1040c960e990SMathieu Desnoyers 		/* Retry if comparison fails or rseq aborts. */
1041c960e990SMathieu Desnoyers 	}
1042c960e990SMathieu Desnoyers 	if (_cpu)
1043c960e990SMathieu Desnoyers 		*_cpu = cpu;
1044c960e990SMathieu Desnoyers 	return result;
1045c960e990SMathieu Desnoyers }
1046c960e990SMathieu Desnoyers 
this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer * buffer,struct percpu_memcpy_buffer_node * item,int * _cpu)1047c960e990SMathieu Desnoyers bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
1048c960e990SMathieu Desnoyers 				struct percpu_memcpy_buffer_node *item,
1049c960e990SMathieu Desnoyers 				int *_cpu)
1050c960e990SMathieu Desnoyers {
1051c960e990SMathieu Desnoyers 	bool result = false;
1052c960e990SMathieu Desnoyers 	int cpu;
1053c960e990SMathieu Desnoyers 
1054c960e990SMathieu Desnoyers 	for (;;) {
1055c960e990SMathieu Desnoyers 		intptr_t *targetptr_final, newval_final, offset;
1056c960e990SMathieu Desnoyers 		char *destptr, *srcptr;
1057c960e990SMathieu Desnoyers 		size_t copylen;
1058c960e990SMathieu Desnoyers 		int ret;
1059c960e990SMathieu Desnoyers 
1060ee31fff0SMathieu Desnoyers 		cpu = get_current_cpu_id();
1061c960e990SMathieu Desnoyers 		/* Load offset with single-copy atomicity. */
1062c960e990SMathieu Desnoyers 		offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
1063c960e990SMathieu Desnoyers 		if (offset == 0)
1064c960e990SMathieu Desnoyers 			break;
1065c960e990SMathieu Desnoyers 		destptr = (char *)item;
1066c960e990SMathieu Desnoyers 		srcptr = (char *)&buffer->c[cpu].array[offset - 1];
1067c960e990SMathieu Desnoyers 		/* copylen must be <= 4kB. */
1068c960e990SMathieu Desnoyers 		copylen = sizeof(*item);
1069c960e990SMathieu Desnoyers 		newval_final = offset - 1;
1070c960e990SMathieu Desnoyers 		targetptr_final = &buffer->c[cpu].offset;
1071ee31fff0SMathieu Desnoyers 		ret = rseq_cmpeqv_trymemcpy_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU,
1072ee31fff0SMathieu Desnoyers 			targetptr_final, offset, destptr, srcptr, copylen,
1073c960e990SMathieu Desnoyers 			newval_final, cpu);
1074c960e990SMathieu Desnoyers 		if (rseq_likely(!ret)) {
1075c960e990SMathieu Desnoyers 			result = true;
1076c960e990SMathieu Desnoyers 			break;
1077c960e990SMathieu Desnoyers 		}
1078c960e990SMathieu Desnoyers 		/* Retry if comparison fails or rseq aborts. */
1079c960e990SMathieu Desnoyers 	}
1080c960e990SMathieu Desnoyers 	if (_cpu)
1081c960e990SMathieu Desnoyers 		*_cpu = cpu;
1082c960e990SMathieu Desnoyers 	return result;
1083c960e990SMathieu Desnoyers }
1084c960e990SMathieu Desnoyers 
1085c960e990SMathieu Desnoyers /*
1086c960e990SMathieu Desnoyers  * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should
1087c960e990SMathieu Desnoyers  * only be used on buffers that are not concurrently modified.
1088c960e990SMathieu Desnoyers  */
__percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer * buffer,struct percpu_memcpy_buffer_node * item,int cpu)1089c960e990SMathieu Desnoyers bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
1090c960e990SMathieu Desnoyers 				struct percpu_memcpy_buffer_node *item,
1091c960e990SMathieu Desnoyers 				int cpu)
1092c960e990SMathieu Desnoyers {
1093c960e990SMathieu Desnoyers 	intptr_t offset;
1094c960e990SMathieu Desnoyers 
1095c960e990SMathieu Desnoyers 	offset = buffer->c[cpu].offset;
1096c960e990SMathieu Desnoyers 	if (offset == 0)
1097c960e990SMathieu Desnoyers 		return false;
1098c960e990SMathieu Desnoyers 	memcpy(item, &buffer->c[cpu].array[offset - 1], sizeof(*item));
1099c960e990SMathieu Desnoyers 	buffer->c[cpu].offset = offset - 1;
1100c960e990SMathieu Desnoyers 	return true;
1101c960e990SMathieu Desnoyers }
1102c960e990SMathieu Desnoyers 
test_percpu_memcpy_buffer_thread(void * arg)1103c960e990SMathieu Desnoyers void *test_percpu_memcpy_buffer_thread(void *arg)
1104c960e990SMathieu Desnoyers {
1105c960e990SMathieu Desnoyers 	long long i, reps;
1106c960e990SMathieu Desnoyers 	struct percpu_memcpy_buffer *buffer = (struct percpu_memcpy_buffer *)arg;
1107c960e990SMathieu Desnoyers 
1108c960e990SMathieu Desnoyers 	if (!opt_disable_rseq && rseq_register_current_thread())
1109c960e990SMathieu Desnoyers 		abort();
1110c960e990SMathieu Desnoyers 
1111c960e990SMathieu Desnoyers 	reps = opt_reps;
1112c960e990SMathieu Desnoyers 	for (i = 0; i < reps; i++) {
1113c960e990SMathieu Desnoyers 		struct percpu_memcpy_buffer_node item;
1114c960e990SMathieu Desnoyers 		bool result;
1115c960e990SMathieu Desnoyers 
1116c960e990SMathieu Desnoyers 		result = this_cpu_memcpy_buffer_pop(buffer, &item, NULL);
1117c960e990SMathieu Desnoyers 		if (opt_yield)
1118c960e990SMathieu Desnoyers 			sched_yield();  /* encourage shuffling */
1119c960e990SMathieu Desnoyers 		if (result) {
1120c960e990SMathieu Desnoyers 			if (!this_cpu_memcpy_buffer_push(buffer, item, NULL)) {
1121c960e990SMathieu Desnoyers 				/* Should increase buffer size. */
1122c960e990SMathieu Desnoyers 				abort();
1123c960e990SMathieu Desnoyers 			}
1124c960e990SMathieu Desnoyers 		}
1125c960e990SMathieu Desnoyers 	}
1126c960e990SMathieu Desnoyers 
1127c960e990SMathieu Desnoyers 	printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
11288df34c56SMathieu Desnoyers 		       (int) rseq_gettid(), nr_abort, signals_delivered);
1129c960e990SMathieu Desnoyers 	if (!opt_disable_rseq && rseq_unregister_current_thread())
1130c960e990SMathieu Desnoyers 		abort();
1131c960e990SMathieu Desnoyers 
1132c960e990SMathieu Desnoyers 	return NULL;
1133c960e990SMathieu Desnoyers }
1134c960e990SMathieu Desnoyers 
1135c960e990SMathieu Desnoyers /* Simultaneous modification to a per-cpu buffer from many threads.  */
test_percpu_memcpy_buffer(void)1136c960e990SMathieu Desnoyers void test_percpu_memcpy_buffer(void)
1137c960e990SMathieu Desnoyers {
1138c960e990SMathieu Desnoyers 	const int num_threads = opt_threads;
1139c960e990SMathieu Desnoyers 	int i, j, ret;
1140c960e990SMathieu Desnoyers 	uint64_t sum = 0, expected_sum = 0;
1141c960e990SMathieu Desnoyers 	struct percpu_memcpy_buffer buffer;
1142c960e990SMathieu Desnoyers 	pthread_t test_threads[num_threads];
1143c960e990SMathieu Desnoyers 	cpu_set_t allowed_cpus;
1144c960e990SMathieu Desnoyers 
1145c960e990SMathieu Desnoyers 	memset(&buffer, 0, sizeof(buffer));
1146c960e990SMathieu Desnoyers 
1147c960e990SMathieu Desnoyers 	/* Generate list entries for every usable cpu. */
1148c960e990SMathieu Desnoyers 	sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
1149c960e990SMathieu Desnoyers 	for (i = 0; i < CPU_SETSIZE; i++) {
1150d53271c0SMathieu Desnoyers 		if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
1151c960e990SMathieu Desnoyers 			continue;
1152c960e990SMathieu Desnoyers 		/* Worse-case is every item in same CPU. */
1153c960e990SMathieu Desnoyers 		buffer.c[i].array =
1154c960e990SMathieu Desnoyers 			malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
1155c960e990SMathieu Desnoyers 			       MEMCPY_BUFFER_ITEM_PER_CPU);
1156c960e990SMathieu Desnoyers 		assert(buffer.c[i].array);
1157c960e990SMathieu Desnoyers 		buffer.c[i].buflen = CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU;
1158c960e990SMathieu Desnoyers 		for (j = 1; j <= MEMCPY_BUFFER_ITEM_PER_CPU; j++) {
1159c960e990SMathieu Desnoyers 			expected_sum += 2 * j + 1;
1160c960e990SMathieu Desnoyers 
1161c960e990SMathieu Desnoyers 			/*
1162c960e990SMathieu Desnoyers 			 * We could theoretically put the word-sized
1163c960e990SMathieu Desnoyers 			 * "data" directly in the buffer. However, we
1164c960e990SMathieu Desnoyers 			 * want to model objects that would not fit
1165c960e990SMathieu Desnoyers 			 * within a single word, so allocate an object
1166c960e990SMathieu Desnoyers 			 * for each node.
1167c960e990SMathieu Desnoyers 			 */
1168c960e990SMathieu Desnoyers 			buffer.c[i].array[j - 1].data1 = j;
1169c960e990SMathieu Desnoyers 			buffer.c[i].array[j - 1].data2 = j + 1;
1170c960e990SMathieu Desnoyers 			buffer.c[i].offset++;
1171c960e990SMathieu Desnoyers 		}
1172c960e990SMathieu Desnoyers 	}
1173c960e990SMathieu Desnoyers 
1174c960e990SMathieu Desnoyers 	for (i = 0; i < num_threads; i++) {
1175c960e990SMathieu Desnoyers 		ret = pthread_create(&test_threads[i], NULL,
1176c960e990SMathieu Desnoyers 				     test_percpu_memcpy_buffer_thread,
1177c960e990SMathieu Desnoyers 				     &buffer);
1178c960e990SMathieu Desnoyers 		if (ret) {
1179c960e990SMathieu Desnoyers 			errno = ret;
1180c960e990SMathieu Desnoyers 			perror("pthread_create");
1181c960e990SMathieu Desnoyers 			abort();
1182c960e990SMathieu Desnoyers 		}
1183c960e990SMathieu Desnoyers 	}
1184c960e990SMathieu Desnoyers 
1185c960e990SMathieu Desnoyers 	for (i = 0; i < num_threads; i++) {
1186c960e990SMathieu Desnoyers 		ret = pthread_join(test_threads[i], NULL);
1187c960e990SMathieu Desnoyers 		if (ret) {
1188c960e990SMathieu Desnoyers 			errno = ret;
1189c960e990SMathieu Desnoyers 			perror("pthread_join");
1190c960e990SMathieu Desnoyers 			abort();
1191c960e990SMathieu Desnoyers 		}
1192c960e990SMathieu Desnoyers 	}
1193c960e990SMathieu Desnoyers 
1194c960e990SMathieu Desnoyers 	for (i = 0; i < CPU_SETSIZE; i++) {
1195c960e990SMathieu Desnoyers 		struct percpu_memcpy_buffer_node item;
1196c960e990SMathieu Desnoyers 
1197d53271c0SMathieu Desnoyers 		if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
1198c960e990SMathieu Desnoyers 			continue;
1199c960e990SMathieu Desnoyers 
1200c960e990SMathieu Desnoyers 		while (__percpu_memcpy_buffer_pop(&buffer, &item, i)) {
1201c960e990SMathieu Desnoyers 			sum += item.data1;
1202c960e990SMathieu Desnoyers 			sum += item.data2;
1203c960e990SMathieu Desnoyers 		}
1204c960e990SMathieu Desnoyers 		free(buffer.c[i].array);
1205c960e990SMathieu Desnoyers 	}
1206c960e990SMathieu Desnoyers 
1207c960e990SMathieu Desnoyers 	/*
1208c960e990SMathieu Desnoyers 	 * All entries should now be accounted for (unless some external
1209c960e990SMathieu Desnoyers 	 * actor is interfering with our allowed affinity while this
1210c960e990SMathieu Desnoyers 	 * test is running).
1211c960e990SMathieu Desnoyers 	 */
1212c960e990SMathieu Desnoyers 	assert(sum == expected_sum);
1213c960e990SMathieu Desnoyers }
1214c960e990SMathieu Desnoyers 
test_signal_interrupt_handler(int signo)1215c960e990SMathieu Desnoyers static void test_signal_interrupt_handler(int signo)
1216c960e990SMathieu Desnoyers {
1217c960e990SMathieu Desnoyers 	signals_delivered++;
1218c960e990SMathieu Desnoyers }
1219c960e990SMathieu Desnoyers 
set_signal_handler(void)1220c960e990SMathieu Desnoyers static int set_signal_handler(void)
1221c960e990SMathieu Desnoyers {
1222c960e990SMathieu Desnoyers 	int ret = 0;
1223c960e990SMathieu Desnoyers 	struct sigaction sa;
1224c960e990SMathieu Desnoyers 	sigset_t sigset;
1225c960e990SMathieu Desnoyers 
1226c960e990SMathieu Desnoyers 	ret = sigemptyset(&sigset);
1227c960e990SMathieu Desnoyers 	if (ret < 0) {
1228c960e990SMathieu Desnoyers 		perror("sigemptyset");
1229c960e990SMathieu Desnoyers 		return ret;
1230c960e990SMathieu Desnoyers 	}
1231c960e990SMathieu Desnoyers 
1232c960e990SMathieu Desnoyers 	sa.sa_handler = test_signal_interrupt_handler;
1233c960e990SMathieu Desnoyers 	sa.sa_mask = sigset;
1234c960e990SMathieu Desnoyers 	sa.sa_flags = 0;
1235c960e990SMathieu Desnoyers 	ret = sigaction(SIGUSR1, &sa, NULL);
1236c960e990SMathieu Desnoyers 	if (ret < 0) {
1237c960e990SMathieu Desnoyers 		perror("sigaction");
1238c960e990SMathieu Desnoyers 		return ret;
1239c960e990SMathieu Desnoyers 	}
1240c960e990SMathieu Desnoyers 
1241c960e990SMathieu Desnoyers 	printf_verbose("Signal handler set for SIGUSR1\n");
1242c960e990SMathieu Desnoyers 
1243c960e990SMathieu Desnoyers 	return ret;
1244c960e990SMathieu Desnoyers }
1245c960e990SMathieu Desnoyers 
12466f39cecdSXingxing Su /* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */
1247ee31fff0SMathieu Desnoyers #ifdef TEST_MEMBARRIER
1248f166b111SPeter Oskolkov struct test_membarrier_thread_args {
1249f166b111SPeter Oskolkov 	int stop;
1250f166b111SPeter Oskolkov 	intptr_t percpu_list_ptr;
1251f166b111SPeter Oskolkov };
1252f166b111SPeter Oskolkov 
1253f166b111SPeter Oskolkov /* Worker threads modify data in their "active" percpu lists. */
test_membarrier_worker_thread(void * arg)1254f166b111SPeter Oskolkov void *test_membarrier_worker_thread(void *arg)
1255f166b111SPeter Oskolkov {
1256f166b111SPeter Oskolkov 	struct test_membarrier_thread_args *args =
1257f166b111SPeter Oskolkov 		(struct test_membarrier_thread_args *)arg;
1258f166b111SPeter Oskolkov 	const int iters = opt_reps;
1259f166b111SPeter Oskolkov 	int i;
1260f166b111SPeter Oskolkov 
1261f166b111SPeter Oskolkov 	if (rseq_register_current_thread()) {
1262f166b111SPeter Oskolkov 		fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1263f166b111SPeter Oskolkov 			errno, strerror(errno));
1264f166b111SPeter Oskolkov 		abort();
1265f166b111SPeter Oskolkov 	}
1266f166b111SPeter Oskolkov 
1267f166b111SPeter Oskolkov 	/* Wait for initialization. */
1268078a2eadSJustin Stitt 	while (!__atomic_load_n(&args->percpu_list_ptr, __ATOMIC_ACQUIRE)) {}
1269f166b111SPeter Oskolkov 
1270f166b111SPeter Oskolkov 	for (i = 0; i < iters; ++i) {
1271f166b111SPeter Oskolkov 		int ret;
1272f166b111SPeter Oskolkov 
1273f166b111SPeter Oskolkov 		do {
1274ee31fff0SMathieu Desnoyers 			int cpu = get_current_cpu_id();
1275f166b111SPeter Oskolkov 
1276ee31fff0SMathieu Desnoyers 			ret = rseq_offset_deref_addv(RSEQ_MO_RELAXED, RSEQ_PERCPU,
1277ee31fff0SMathieu Desnoyers 				&args->percpu_list_ptr,
1278f166b111SPeter Oskolkov 				sizeof(struct percpu_list_entry) * cpu, 1, cpu);
1279f166b111SPeter Oskolkov 		} while (rseq_unlikely(ret));
1280f166b111SPeter Oskolkov 	}
1281f166b111SPeter Oskolkov 
1282f166b111SPeter Oskolkov 	if (rseq_unregister_current_thread()) {
1283f166b111SPeter Oskolkov 		fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1284f166b111SPeter Oskolkov 			errno, strerror(errno));
1285f166b111SPeter Oskolkov 		abort();
1286f166b111SPeter Oskolkov 	}
1287f166b111SPeter Oskolkov 	return NULL;
1288f166b111SPeter Oskolkov }
1289f166b111SPeter Oskolkov 
test_membarrier_init_percpu_list(struct percpu_list * list)1290f166b111SPeter Oskolkov void test_membarrier_init_percpu_list(struct percpu_list *list)
1291f166b111SPeter Oskolkov {
1292f166b111SPeter Oskolkov 	int i;
1293f166b111SPeter Oskolkov 
1294f166b111SPeter Oskolkov 	memset(list, 0, sizeof(*list));
1295f166b111SPeter Oskolkov 	for (i = 0; i < CPU_SETSIZE; i++) {
1296f166b111SPeter Oskolkov 		struct percpu_list_node *node;
1297f166b111SPeter Oskolkov 
1298f166b111SPeter Oskolkov 		node = malloc(sizeof(*node));
1299f166b111SPeter Oskolkov 		assert(node);
1300f166b111SPeter Oskolkov 		node->data = 0;
1301f166b111SPeter Oskolkov 		node->next = NULL;
1302f166b111SPeter Oskolkov 		list->c[i].head = node;
1303f166b111SPeter Oskolkov 	}
1304f166b111SPeter Oskolkov }
1305f166b111SPeter Oskolkov 
test_membarrier_free_percpu_list(struct percpu_list * list)1306f166b111SPeter Oskolkov void test_membarrier_free_percpu_list(struct percpu_list *list)
1307f166b111SPeter Oskolkov {
1308f166b111SPeter Oskolkov 	int i;
1309f166b111SPeter Oskolkov 
1310f166b111SPeter Oskolkov 	for (i = 0; i < CPU_SETSIZE; i++)
1311f166b111SPeter Oskolkov 		free(list->c[i].head);
1312f166b111SPeter Oskolkov }
1313f166b111SPeter Oskolkov 
1314f166b111SPeter Oskolkov /*
1315f166b111SPeter Oskolkov  * The manager thread swaps per-cpu lists that worker threads see,
1316f166b111SPeter Oskolkov  * and validates that there are no unexpected modifications.
1317f166b111SPeter Oskolkov  */
test_membarrier_manager_thread(void * arg)1318f166b111SPeter Oskolkov void *test_membarrier_manager_thread(void *arg)
1319f166b111SPeter Oskolkov {
1320f166b111SPeter Oskolkov 	struct test_membarrier_thread_args *args =
1321f166b111SPeter Oskolkov 		(struct test_membarrier_thread_args *)arg;
1322f166b111SPeter Oskolkov 	struct percpu_list list_a, list_b;
1323f166b111SPeter Oskolkov 	intptr_t expect_a = 0, expect_b = 0;
1324f166b111SPeter Oskolkov 	int cpu_a = 0, cpu_b = 0;
1325f166b111SPeter Oskolkov 
1326f166b111SPeter Oskolkov 	if (rseq_register_current_thread()) {
1327f166b111SPeter Oskolkov 		fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1328f166b111SPeter Oskolkov 			errno, strerror(errno));
1329f166b111SPeter Oskolkov 		abort();
1330f166b111SPeter Oskolkov 	}
1331f166b111SPeter Oskolkov 
1332f166b111SPeter Oskolkov 	/* Init lists. */
1333f166b111SPeter Oskolkov 	test_membarrier_init_percpu_list(&list_a);
1334f166b111SPeter Oskolkov 	test_membarrier_init_percpu_list(&list_b);
1335f166b111SPeter Oskolkov 
1336078a2eadSJustin Stitt 	__atomic_store_n(&args->percpu_list_ptr, (intptr_t)&list_a, __ATOMIC_RELEASE);
1337f166b111SPeter Oskolkov 
1338078a2eadSJustin Stitt 	while (!__atomic_load_n(&args->stop, __ATOMIC_ACQUIRE)) {
1339f166b111SPeter Oskolkov 		/* list_a is "active". */
1340f166b111SPeter Oskolkov 		cpu_a = rand() % CPU_SETSIZE;
1341f166b111SPeter Oskolkov 		/*
1342f166b111SPeter Oskolkov 		 * As list_b is "inactive", we should never see changes
1343f166b111SPeter Oskolkov 		 * to list_b.
1344f166b111SPeter Oskolkov 		 */
1345078a2eadSJustin Stitt 		if (expect_b != __atomic_load_n(&list_b.c[cpu_b].head->data, __ATOMIC_ACQUIRE)) {
1346f166b111SPeter Oskolkov 			fprintf(stderr, "Membarrier test failed\n");
1347f166b111SPeter Oskolkov 			abort();
1348f166b111SPeter Oskolkov 		}
1349f166b111SPeter Oskolkov 
1350f166b111SPeter Oskolkov 		/* Make list_b "active". */
1351078a2eadSJustin Stitt 		__atomic_store_n(&args->percpu_list_ptr, (intptr_t)&list_b, __ATOMIC_RELEASE);
1352ee31fff0SMathieu Desnoyers 		if (rseq_membarrier_expedited(cpu_a) &&
1353f166b111SPeter Oskolkov 				errno != ENXIO /* missing CPU */) {
1354f166b111SPeter Oskolkov 			perror("sys_membarrier");
1355f166b111SPeter Oskolkov 			abort();
1356f166b111SPeter Oskolkov 		}
1357f166b111SPeter Oskolkov 		/*
1358f166b111SPeter Oskolkov 		 * Cpu A should now only modify list_b, so the values
1359f166b111SPeter Oskolkov 		 * in list_a should be stable.
1360f166b111SPeter Oskolkov 		 */
1361078a2eadSJustin Stitt 		expect_a = __atomic_load_n(&list_a.c[cpu_a].head->data, __ATOMIC_ACQUIRE);
1362f166b111SPeter Oskolkov 
1363f166b111SPeter Oskolkov 		cpu_b = rand() % CPU_SETSIZE;
1364f166b111SPeter Oskolkov 		/*
1365f166b111SPeter Oskolkov 		 * As list_a is "inactive", we should never see changes
1366f166b111SPeter Oskolkov 		 * to list_a.
1367f166b111SPeter Oskolkov 		 */
1368078a2eadSJustin Stitt 		if (expect_a != __atomic_load_n(&list_a.c[cpu_a].head->data, __ATOMIC_ACQUIRE)) {
1369f166b111SPeter Oskolkov 			fprintf(stderr, "Membarrier test failed\n");
1370f166b111SPeter Oskolkov 			abort();
1371f166b111SPeter Oskolkov 		}
1372f166b111SPeter Oskolkov 
1373f166b111SPeter Oskolkov 		/* Make list_a "active". */
1374078a2eadSJustin Stitt 		__atomic_store_n(&args->percpu_list_ptr, (intptr_t)&list_a, __ATOMIC_RELEASE);
1375ee31fff0SMathieu Desnoyers 		if (rseq_membarrier_expedited(cpu_b) &&
1376f166b111SPeter Oskolkov 				errno != ENXIO /* missing CPU*/) {
1377f166b111SPeter Oskolkov 			perror("sys_membarrier");
1378f166b111SPeter Oskolkov 			abort();
1379f166b111SPeter Oskolkov 		}
1380f166b111SPeter Oskolkov 		/* Remember a value from list_b. */
1381078a2eadSJustin Stitt 		expect_b = __atomic_load_n(&list_b.c[cpu_b].head->data, __ATOMIC_ACQUIRE);
1382f166b111SPeter Oskolkov 	}
1383f166b111SPeter Oskolkov 
1384f166b111SPeter Oskolkov 	test_membarrier_free_percpu_list(&list_a);
1385f166b111SPeter Oskolkov 	test_membarrier_free_percpu_list(&list_b);
1386f166b111SPeter Oskolkov 
1387f166b111SPeter Oskolkov 	if (rseq_unregister_current_thread()) {
1388f166b111SPeter Oskolkov 		fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1389f166b111SPeter Oskolkov 			errno, strerror(errno));
1390f166b111SPeter Oskolkov 		abort();
1391f166b111SPeter Oskolkov 	}
1392f166b111SPeter Oskolkov 	return NULL;
1393f166b111SPeter Oskolkov }
1394f166b111SPeter Oskolkov 
test_membarrier(void)1395f166b111SPeter Oskolkov void test_membarrier(void)
1396f166b111SPeter Oskolkov {
1397f166b111SPeter Oskolkov 	const int num_threads = opt_threads;
1398f166b111SPeter Oskolkov 	struct test_membarrier_thread_args thread_args;
1399f166b111SPeter Oskolkov 	pthread_t worker_threads[num_threads];
1400f166b111SPeter Oskolkov 	pthread_t manager_thread;
1401f166b111SPeter Oskolkov 	int i, ret;
1402f166b111SPeter Oskolkov 
1403f166b111SPeter Oskolkov 	if (sys_membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, 0, 0)) {
1404f166b111SPeter Oskolkov 		perror("sys_membarrier");
1405f166b111SPeter Oskolkov 		abort();
1406f166b111SPeter Oskolkov 	}
1407f166b111SPeter Oskolkov 
1408f166b111SPeter Oskolkov 	thread_args.stop = 0;
1409f166b111SPeter Oskolkov 	thread_args.percpu_list_ptr = 0;
1410f166b111SPeter Oskolkov 	ret = pthread_create(&manager_thread, NULL,
1411f166b111SPeter Oskolkov 			test_membarrier_manager_thread, &thread_args);
1412f166b111SPeter Oskolkov 	if (ret) {
1413f166b111SPeter Oskolkov 		errno = ret;
1414f166b111SPeter Oskolkov 		perror("pthread_create");
1415f166b111SPeter Oskolkov 		abort();
1416f166b111SPeter Oskolkov 	}
1417f166b111SPeter Oskolkov 
1418f166b111SPeter Oskolkov 	for (i = 0; i < num_threads; i++) {
1419f166b111SPeter Oskolkov 		ret = pthread_create(&worker_threads[i], NULL,
1420f166b111SPeter Oskolkov 				test_membarrier_worker_thread, &thread_args);
1421f166b111SPeter Oskolkov 		if (ret) {
1422f166b111SPeter Oskolkov 			errno = ret;
1423f166b111SPeter Oskolkov 			perror("pthread_create");
1424f166b111SPeter Oskolkov 			abort();
1425f166b111SPeter Oskolkov 		}
1426f166b111SPeter Oskolkov 	}
1427f166b111SPeter Oskolkov 
1428f166b111SPeter Oskolkov 
1429f166b111SPeter Oskolkov 	for (i = 0; i < num_threads; i++) {
1430f166b111SPeter Oskolkov 		ret = pthread_join(worker_threads[i], NULL);
1431f166b111SPeter Oskolkov 		if (ret) {
1432f166b111SPeter Oskolkov 			errno = ret;
1433f166b111SPeter Oskolkov 			perror("pthread_join");
1434f166b111SPeter Oskolkov 			abort();
1435f166b111SPeter Oskolkov 		}
1436f166b111SPeter Oskolkov 	}
1437f166b111SPeter Oskolkov 
1438078a2eadSJustin Stitt 	__atomic_store_n(&thread_args.stop, 1, __ATOMIC_RELEASE);
1439f166b111SPeter Oskolkov 	ret = pthread_join(manager_thread, NULL);
1440f166b111SPeter Oskolkov 	if (ret) {
1441f166b111SPeter Oskolkov 		errno = ret;
1442f166b111SPeter Oskolkov 		perror("pthread_join");
1443f166b111SPeter Oskolkov 		abort();
1444f166b111SPeter Oskolkov 	}
1445f166b111SPeter Oskolkov }
1446ee31fff0SMathieu Desnoyers #else /* TEST_MEMBARRIER */
test_membarrier(void)1447f166b111SPeter Oskolkov void test_membarrier(void)
1448f166b111SPeter Oskolkov {
1449f166b111SPeter Oskolkov 	fprintf(stderr, "rseq_offset_deref_addv is not implemented on this architecture. "
1450f166b111SPeter Oskolkov 			"Skipping membarrier test.\n");
1451f166b111SPeter Oskolkov }
1452f166b111SPeter Oskolkov #endif
1453f166b111SPeter Oskolkov 
show_usage(int argc,char ** argv)1454c960e990SMathieu Desnoyers static void show_usage(int argc, char **argv)
1455c960e990SMathieu Desnoyers {
1456c960e990SMathieu Desnoyers 	printf("Usage : %s <OPTIONS>\n",
1457c960e990SMathieu Desnoyers 		argv[0]);
1458c960e990SMathieu Desnoyers 	printf("OPTIONS:\n");
1459c960e990SMathieu Desnoyers 	printf("	[-1 loops] Number of loops for delay injection 1\n");
1460c960e990SMathieu Desnoyers 	printf("	[-2 loops] Number of loops for delay injection 2\n");
1461c960e990SMathieu Desnoyers 	printf("	[-3 loops] Number of loops for delay injection 3\n");
1462c960e990SMathieu Desnoyers 	printf("	[-4 loops] Number of loops for delay injection 4\n");
1463c960e990SMathieu Desnoyers 	printf("	[-5 loops] Number of loops for delay injection 5\n");
1464c960e990SMathieu Desnoyers 	printf("	[-6 loops] Number of loops for delay injection 6\n");
1465c960e990SMathieu Desnoyers 	printf("	[-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n");
1466c960e990SMathieu Desnoyers 	printf("	[-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n");
1467c960e990SMathieu Desnoyers 	printf("	[-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n");
1468c960e990SMathieu Desnoyers 	printf("	[-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n");
1469c960e990SMathieu Desnoyers 	printf("	[-y] Yield\n");
1470c960e990SMathieu Desnoyers 	printf("	[-k] Kill thread with signal\n");
1471c960e990SMathieu Desnoyers 	printf("	[-s S] S: =0: disabled (default), >0: sleep time (ms)\n");
1472c960e990SMathieu Desnoyers 	printf("	[-t N] Number of threads (default 200)\n");
1473c960e990SMathieu Desnoyers 	printf("	[-r N] Number of repetitions per thread (default 5000)\n");
1474c960e990SMathieu Desnoyers 	printf("	[-d] Disable rseq system call (no initialization)\n");
1475c960e990SMathieu Desnoyers 	printf("	[-D M] Disable rseq for each M threads\n");
1476f166b111SPeter Oskolkov 	printf("	[-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement, membarrie(r)\n");
1477c960e990SMathieu Desnoyers 	printf("	[-M] Push into buffer and memcpy buffer with memory barriers.\n");
1478c960e990SMathieu Desnoyers 	printf("	[-v] Verbose output.\n");
1479c960e990SMathieu Desnoyers 	printf("	[-h] Show this help.\n");
1480c960e990SMathieu Desnoyers 	printf("\n");
1481c960e990SMathieu Desnoyers }
1482c960e990SMathieu Desnoyers 
main(int argc,char ** argv)1483c960e990SMathieu Desnoyers int main(int argc, char **argv)
1484c960e990SMathieu Desnoyers {
1485c960e990SMathieu Desnoyers 	int i;
1486c960e990SMathieu Desnoyers 
1487c960e990SMathieu Desnoyers 	for (i = 1; i < argc; i++) {
1488c960e990SMathieu Desnoyers 		if (argv[i][0] != '-')
1489c960e990SMathieu Desnoyers 			continue;
1490c960e990SMathieu Desnoyers 		switch (argv[i][1]) {
1491c960e990SMathieu Desnoyers 		case '1':
1492c960e990SMathieu Desnoyers 		case '2':
1493c960e990SMathieu Desnoyers 		case '3':
1494c960e990SMathieu Desnoyers 		case '4':
1495c960e990SMathieu Desnoyers 		case '5':
1496c960e990SMathieu Desnoyers 		case '6':
1497c960e990SMathieu Desnoyers 		case '7':
1498c960e990SMathieu Desnoyers 		case '8':
1499c960e990SMathieu Desnoyers 		case '9':
1500c960e990SMathieu Desnoyers 			if (argc < i + 2) {
1501c960e990SMathieu Desnoyers 				show_usage(argc, argv);
1502c960e990SMathieu Desnoyers 				goto error;
1503c960e990SMathieu Desnoyers 			}
1504c960e990SMathieu Desnoyers 			loop_cnt[argv[i][1] - '0'] = atol(argv[i + 1]);
1505c960e990SMathieu Desnoyers 			i++;
1506c960e990SMathieu Desnoyers 			break;
1507c960e990SMathieu Desnoyers 		case 'm':
1508c960e990SMathieu Desnoyers 			if (argc < i + 2) {
1509c960e990SMathieu Desnoyers 				show_usage(argc, argv);
1510c960e990SMathieu Desnoyers 				goto error;
1511c960e990SMathieu Desnoyers 			}
1512c960e990SMathieu Desnoyers 			opt_modulo = atol(argv[i + 1]);
1513c960e990SMathieu Desnoyers 			if (opt_modulo < 0) {
1514c960e990SMathieu Desnoyers 				show_usage(argc, argv);
1515c960e990SMathieu Desnoyers 				goto error;
1516c960e990SMathieu Desnoyers 			}
1517c960e990SMathieu Desnoyers 			i++;
1518c960e990SMathieu Desnoyers 			break;
1519c960e990SMathieu Desnoyers 		case 's':
1520c960e990SMathieu Desnoyers 			if (argc < i + 2) {
1521c960e990SMathieu Desnoyers 				show_usage(argc, argv);
1522c960e990SMathieu Desnoyers 				goto error;
1523c960e990SMathieu Desnoyers 			}
1524c960e990SMathieu Desnoyers 			opt_sleep = atol(argv[i + 1]);
1525c960e990SMathieu Desnoyers 			if (opt_sleep < 0) {
1526c960e990SMathieu Desnoyers 				show_usage(argc, argv);
1527c960e990SMathieu Desnoyers 				goto error;
1528c960e990SMathieu Desnoyers 			}
1529c960e990SMathieu Desnoyers 			i++;
1530c960e990SMathieu Desnoyers 			break;
1531c960e990SMathieu Desnoyers 		case 'y':
1532c960e990SMathieu Desnoyers 			opt_yield = 1;
1533c960e990SMathieu Desnoyers 			break;
1534c960e990SMathieu Desnoyers 		case 'k':
1535c960e990SMathieu Desnoyers 			opt_signal = 1;
1536c960e990SMathieu Desnoyers 			break;
1537c960e990SMathieu Desnoyers 		case 'd':
1538c960e990SMathieu Desnoyers 			opt_disable_rseq = 1;
1539c960e990SMathieu Desnoyers 			break;
1540c960e990SMathieu Desnoyers 		case 'D':
1541c960e990SMathieu Desnoyers 			if (argc < i + 2) {
1542c960e990SMathieu Desnoyers 				show_usage(argc, argv);
1543c960e990SMathieu Desnoyers 				goto error;
1544c960e990SMathieu Desnoyers 			}
1545c960e990SMathieu Desnoyers 			opt_disable_mod = atol(argv[i + 1]);
1546c960e990SMathieu Desnoyers 			if (opt_disable_mod < 0) {
1547c960e990SMathieu Desnoyers 				show_usage(argc, argv);
1548c960e990SMathieu Desnoyers 				goto error;
1549c960e990SMathieu Desnoyers 			}
1550c960e990SMathieu Desnoyers 			i++;
1551c960e990SMathieu Desnoyers 			break;
1552c960e990SMathieu Desnoyers 		case 't':
1553c960e990SMathieu Desnoyers 			if (argc < i + 2) {
1554c960e990SMathieu Desnoyers 				show_usage(argc, argv);
1555c960e990SMathieu Desnoyers 				goto error;
1556c960e990SMathieu Desnoyers 			}
1557c960e990SMathieu Desnoyers 			opt_threads = atol(argv[i + 1]);
1558c960e990SMathieu Desnoyers 			if (opt_threads < 0) {
1559c960e990SMathieu Desnoyers 				show_usage(argc, argv);
1560c960e990SMathieu Desnoyers 				goto error;
1561c960e990SMathieu Desnoyers 			}
1562c960e990SMathieu Desnoyers 			i++;
1563c960e990SMathieu Desnoyers 			break;
1564c960e990SMathieu Desnoyers 		case 'r':
1565c960e990SMathieu Desnoyers 			if (argc < i + 2) {
1566c960e990SMathieu Desnoyers 				show_usage(argc, argv);
1567c960e990SMathieu Desnoyers 				goto error;
1568c960e990SMathieu Desnoyers 			}
1569c960e990SMathieu Desnoyers 			opt_reps = atoll(argv[i + 1]);
1570c960e990SMathieu Desnoyers 			if (opt_reps < 0) {
1571c960e990SMathieu Desnoyers 				show_usage(argc, argv);
1572c960e990SMathieu Desnoyers 				goto error;
1573c960e990SMathieu Desnoyers 			}
1574c960e990SMathieu Desnoyers 			i++;
1575c960e990SMathieu Desnoyers 			break;
1576c960e990SMathieu Desnoyers 		case 'h':
1577c960e990SMathieu Desnoyers 			show_usage(argc, argv);
1578c960e990SMathieu Desnoyers 			goto end;
1579c960e990SMathieu Desnoyers 		case 'T':
1580c960e990SMathieu Desnoyers 			if (argc < i + 2) {
1581c960e990SMathieu Desnoyers 				show_usage(argc, argv);
1582c960e990SMathieu Desnoyers 				goto error;
1583c960e990SMathieu Desnoyers 			}
1584c960e990SMathieu Desnoyers 			opt_test = *argv[i + 1];
1585c960e990SMathieu Desnoyers 			switch (opt_test) {
1586c960e990SMathieu Desnoyers 			case 's':
1587c960e990SMathieu Desnoyers 			case 'l':
1588c960e990SMathieu Desnoyers 			case 'i':
1589c960e990SMathieu Desnoyers 			case 'b':
1590c960e990SMathieu Desnoyers 			case 'm':
1591f166b111SPeter Oskolkov 			case 'r':
1592c960e990SMathieu Desnoyers 				break;
1593c960e990SMathieu Desnoyers 			default:
1594c960e990SMathieu Desnoyers 				show_usage(argc, argv);
1595c960e990SMathieu Desnoyers 				goto error;
1596c960e990SMathieu Desnoyers 			}
1597c960e990SMathieu Desnoyers 			i++;
1598c960e990SMathieu Desnoyers 			break;
1599c960e990SMathieu Desnoyers 		case 'v':
1600c960e990SMathieu Desnoyers 			verbose = 1;
1601c960e990SMathieu Desnoyers 			break;
1602c960e990SMathieu Desnoyers 		case 'M':
1603ee31fff0SMathieu Desnoyers 			opt_mo = RSEQ_MO_RELEASE;
1604c960e990SMathieu Desnoyers 			break;
1605c960e990SMathieu Desnoyers 		default:
1606c960e990SMathieu Desnoyers 			show_usage(argc, argv);
1607c960e990SMathieu Desnoyers 			goto error;
1608c960e990SMathieu Desnoyers 		}
1609c960e990SMathieu Desnoyers 	}
1610c960e990SMathieu Desnoyers 
1611c960e990SMathieu Desnoyers 	loop_cnt_1 = loop_cnt[1];
1612c960e990SMathieu Desnoyers 	loop_cnt_2 = loop_cnt[2];
1613c960e990SMathieu Desnoyers 	loop_cnt_3 = loop_cnt[3];
1614c960e990SMathieu Desnoyers 	loop_cnt_4 = loop_cnt[4];
1615c960e990SMathieu Desnoyers 	loop_cnt_5 = loop_cnt[5];
1616c960e990SMathieu Desnoyers 	loop_cnt_6 = loop_cnt[6];
1617c960e990SMathieu Desnoyers 
1618c960e990SMathieu Desnoyers 	if (set_signal_handler())
1619c960e990SMathieu Desnoyers 		goto error;
1620c960e990SMathieu Desnoyers 
1621c960e990SMathieu Desnoyers 	if (!opt_disable_rseq && rseq_register_current_thread())
1622c960e990SMathieu Desnoyers 		goto error;
1623ee31fff0SMathieu Desnoyers 	if (!opt_disable_rseq && !rseq_validate_cpu_id()) {
1624ee31fff0SMathieu Desnoyers 		fprintf(stderr, "Error: cpu id getter unavailable\n");
1625ee31fff0SMathieu Desnoyers 		goto error;
1626ee31fff0SMathieu Desnoyers 	}
1627c960e990SMathieu Desnoyers 	switch (opt_test) {
1628c960e990SMathieu Desnoyers 	case 's':
1629c960e990SMathieu Desnoyers 		printf_verbose("spinlock\n");
1630c960e990SMathieu Desnoyers 		test_percpu_spinlock();
1631c960e990SMathieu Desnoyers 		break;
1632c960e990SMathieu Desnoyers 	case 'l':
1633c960e990SMathieu Desnoyers 		printf_verbose("linked list\n");
1634c960e990SMathieu Desnoyers 		test_percpu_list();
1635c960e990SMathieu Desnoyers 		break;
1636c960e990SMathieu Desnoyers 	case 'b':
1637c960e990SMathieu Desnoyers 		printf_verbose("buffer\n");
1638c960e990SMathieu Desnoyers 		test_percpu_buffer();
1639c960e990SMathieu Desnoyers 		break;
1640c960e990SMathieu Desnoyers 	case 'm':
1641c960e990SMathieu Desnoyers 		printf_verbose("memcpy buffer\n");
1642c960e990SMathieu Desnoyers 		test_percpu_memcpy_buffer();
1643c960e990SMathieu Desnoyers 		break;
1644c960e990SMathieu Desnoyers 	case 'i':
1645c960e990SMathieu Desnoyers 		printf_verbose("counter increment\n");
1646c960e990SMathieu Desnoyers 		test_percpu_inc();
1647c960e990SMathieu Desnoyers 		break;
1648f166b111SPeter Oskolkov 	case 'r':
1649f166b111SPeter Oskolkov 		printf_verbose("membarrier\n");
1650f166b111SPeter Oskolkov 		test_membarrier();
1651f166b111SPeter Oskolkov 		break;
1652c960e990SMathieu Desnoyers 	}
1653c960e990SMathieu Desnoyers 	if (!opt_disable_rseq && rseq_unregister_current_thread())
1654c960e990SMathieu Desnoyers 		abort();
1655c960e990SMathieu Desnoyers end:
1656c960e990SMathieu Desnoyers 	return 0;
1657c960e990SMathieu Desnoyers 
1658c960e990SMathieu Desnoyers error:
1659c960e990SMathieu Desnoyers 	return -1;
1660c960e990SMathieu Desnoyers }
1661