1c960e990SMathieu Desnoyers // SPDX-License-Identifier: LGPL-2.1
2c960e990SMathieu Desnoyers #define _GNU_SOURCE
3c960e990SMathieu Desnoyers #include <assert.h>
4f166b111SPeter Oskolkov #include <linux/membarrier.h>
5c960e990SMathieu Desnoyers #include <pthread.h>
6c960e990SMathieu Desnoyers #include <sched.h>
7f166b111SPeter Oskolkov #include <stdatomic.h>
8c960e990SMathieu Desnoyers #include <stdint.h>
9c960e990SMathieu Desnoyers #include <stdio.h>
10c960e990SMathieu Desnoyers #include <stdlib.h>
11c960e990SMathieu Desnoyers #include <string.h>
12c960e990SMathieu Desnoyers #include <syscall.h>
13c960e990SMathieu Desnoyers #include <unistd.h>
14c960e990SMathieu Desnoyers #include <poll.h>
15c960e990SMathieu Desnoyers #include <sys/types.h>
16c960e990SMathieu Desnoyers #include <signal.h>
17c960e990SMathieu Desnoyers #include <errno.h>
18c960e990SMathieu Desnoyers #include <stddef.h>
19ee31fff0SMathieu Desnoyers #include <stdbool.h>
20c960e990SMathieu Desnoyers 
rseq_gettid(void)218df34c56SMathieu Desnoyers static inline pid_t rseq_gettid(void)
22c960e990SMathieu Desnoyers {
23c960e990SMathieu Desnoyers 	return syscall(__NR_gettid);
24c960e990SMathieu Desnoyers }
25c960e990SMathieu Desnoyers 
26c960e990SMathieu Desnoyers #define NR_INJECT	9
27c960e990SMathieu Desnoyers static int loop_cnt[NR_INJECT + 1];
28c960e990SMathieu Desnoyers 
29c960e990SMathieu Desnoyers static int loop_cnt_1 asm("asm_loop_cnt_1") __attribute__((used));
30c960e990SMathieu Desnoyers static int loop_cnt_2 asm("asm_loop_cnt_2") __attribute__((used));
31c960e990SMathieu Desnoyers static int loop_cnt_3 asm("asm_loop_cnt_3") __attribute__((used));
32c960e990SMathieu Desnoyers static int loop_cnt_4 asm("asm_loop_cnt_4") __attribute__((used));
33c960e990SMathieu Desnoyers static int loop_cnt_5 asm("asm_loop_cnt_5") __attribute__((used));
34c960e990SMathieu Desnoyers static int loop_cnt_6 asm("asm_loop_cnt_6") __attribute__((used));
35c960e990SMathieu Desnoyers 
36c960e990SMathieu Desnoyers static int opt_modulo, verbose;
37c960e990SMathieu Desnoyers 
38c960e990SMathieu Desnoyers static int opt_yield, opt_signal, opt_sleep,
39c960e990SMathieu Desnoyers 		opt_disable_rseq, opt_threads = 200,
40ee31fff0SMathieu Desnoyers 		opt_disable_mod = 0, opt_test = 's';
41c960e990SMathieu Desnoyers 
42c960e990SMathieu Desnoyers static long long opt_reps = 5000;
43c960e990SMathieu Desnoyers 
44c960e990SMathieu Desnoyers static __thread __attribute__((tls_model("initial-exec")))
45c960e990SMathieu Desnoyers unsigned int signals_delivered;
46c960e990SMathieu Desnoyers 
47c960e990SMathieu Desnoyers #ifndef BENCHMARK
48c960e990SMathieu Desnoyers 
49c960e990SMathieu Desnoyers static __thread __attribute__((tls_model("initial-exec"), unused))
50c960e990SMathieu Desnoyers unsigned int yield_mod_cnt, nr_abort;
51c960e990SMathieu Desnoyers 
52c960e990SMathieu Desnoyers #define printf_verbose(fmt, ...)			\
53c960e990SMathieu Desnoyers 	do {						\
54c960e990SMathieu Desnoyers 		if (verbose)				\
55c960e990SMathieu Desnoyers 			printf(fmt, ## __VA_ARGS__);	\
56c960e990SMathieu Desnoyers 	} while (0)
57c960e990SMathieu Desnoyers 
58ce01a157SMathieu Desnoyers #ifdef __i386__
59c960e990SMathieu Desnoyers 
60c960e990SMathieu Desnoyers #define INJECT_ASM_REG	"eax"
61c960e990SMathieu Desnoyers 
62c960e990SMathieu Desnoyers #define RSEQ_INJECT_CLOBBER \
63c960e990SMathieu Desnoyers 	, INJECT_ASM_REG
64c960e990SMathieu Desnoyers 
65c960e990SMathieu Desnoyers #define RSEQ_INJECT_ASM(n) \
66c960e990SMathieu Desnoyers 	"mov asm_loop_cnt_" #n ", %%" INJECT_ASM_REG "\n\t" \
67c960e990SMathieu Desnoyers 	"test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
68c960e990SMathieu Desnoyers 	"jz 333f\n\t" \
69c960e990SMathieu Desnoyers 	"222:\n\t" \
70c960e990SMathieu Desnoyers 	"dec %%" INJECT_ASM_REG "\n\t" \
71c960e990SMathieu Desnoyers 	"jnz 222b\n\t" \
72c960e990SMathieu Desnoyers 	"333:\n\t"
73c960e990SMathieu Desnoyers 
74c960e990SMathieu Desnoyers #elif defined(__x86_64__)
75c960e990SMathieu Desnoyers 
76ce01a157SMathieu Desnoyers #define INJECT_ASM_REG_P	"rax"
77ce01a157SMathieu Desnoyers #define INJECT_ASM_REG		"eax"
78ce01a157SMathieu Desnoyers 
79ce01a157SMathieu Desnoyers #define RSEQ_INJECT_CLOBBER \
80ce01a157SMathieu Desnoyers 	, INJECT_ASM_REG_P \
81ce01a157SMathieu Desnoyers 	, INJECT_ASM_REG
82ce01a157SMathieu Desnoyers 
83c960e990SMathieu Desnoyers #define RSEQ_INJECT_ASM(n) \
84ce01a157SMathieu Desnoyers 	"lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG_P "\n\t" \
85ce01a157SMathieu Desnoyers 	"mov (%%" INJECT_ASM_REG_P "), %%" INJECT_ASM_REG "\n\t" \
86c960e990SMathieu Desnoyers 	"test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
87c960e990SMathieu Desnoyers 	"jz 333f\n\t" \
88c960e990SMathieu Desnoyers 	"222:\n\t" \
89c960e990SMathieu Desnoyers 	"dec %%" INJECT_ASM_REG "\n\t" \
90c960e990SMathieu Desnoyers 	"jnz 222b\n\t" \
91c960e990SMathieu Desnoyers 	"333:\n\t"
92c960e990SMathieu Desnoyers 
934c14d1ceSVasily Gorbik #elif defined(__s390__)
944c14d1ceSVasily Gorbik 
954c14d1ceSVasily Gorbik #define RSEQ_INJECT_INPUT \
964c14d1ceSVasily Gorbik 	, [loop_cnt_1]"m"(loop_cnt[1]) \
974c14d1ceSVasily Gorbik 	, [loop_cnt_2]"m"(loop_cnt[2]) \
984c14d1ceSVasily Gorbik 	, [loop_cnt_3]"m"(loop_cnt[3]) \
994c14d1ceSVasily Gorbik 	, [loop_cnt_4]"m"(loop_cnt[4]) \
1004c14d1ceSVasily Gorbik 	, [loop_cnt_5]"m"(loop_cnt[5]) \
1014c14d1ceSVasily Gorbik 	, [loop_cnt_6]"m"(loop_cnt[6])
1024c14d1ceSVasily Gorbik 
1034c14d1ceSVasily Gorbik #define INJECT_ASM_REG	"r12"
1044c14d1ceSVasily Gorbik 
1054c14d1ceSVasily Gorbik #define RSEQ_INJECT_CLOBBER \
1064c14d1ceSVasily Gorbik 	, INJECT_ASM_REG
1074c14d1ceSVasily Gorbik 
1084c14d1ceSVasily Gorbik #define RSEQ_INJECT_ASM(n) \
1094c14d1ceSVasily Gorbik 	"l %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
1104c14d1ceSVasily Gorbik 	"ltr %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG "\n\t" \
1114c14d1ceSVasily Gorbik 	"je 333f\n\t" \
1124c14d1ceSVasily Gorbik 	"222:\n\t" \
1134c14d1ceSVasily Gorbik 	"ahi %%" INJECT_ASM_REG ", -1\n\t" \
1144c14d1ceSVasily Gorbik 	"jnz 222b\n\t" \
1154c14d1ceSVasily Gorbik 	"333:\n\t"
1164c14d1ceSVasily Gorbik 
117c960e990SMathieu Desnoyers #elif defined(__ARMEL__)
118c960e990SMathieu Desnoyers 
119c960e990SMathieu Desnoyers #define RSEQ_INJECT_INPUT \
120c960e990SMathieu Desnoyers 	, [loop_cnt_1]"m"(loop_cnt[1]) \
121c960e990SMathieu Desnoyers 	, [loop_cnt_2]"m"(loop_cnt[2]) \
122c960e990SMathieu Desnoyers 	, [loop_cnt_3]"m"(loop_cnt[3]) \
123c960e990SMathieu Desnoyers 	, [loop_cnt_4]"m"(loop_cnt[4]) \
124c960e990SMathieu Desnoyers 	, [loop_cnt_5]"m"(loop_cnt[5]) \
125c960e990SMathieu Desnoyers 	, [loop_cnt_6]"m"(loop_cnt[6])
126c960e990SMathieu Desnoyers 
127c960e990SMathieu Desnoyers #define INJECT_ASM_REG	"r4"
128c960e990SMathieu Desnoyers 
129c960e990SMathieu Desnoyers #define RSEQ_INJECT_CLOBBER \
130c960e990SMathieu Desnoyers 	, INJECT_ASM_REG
131c960e990SMathieu Desnoyers 
132c960e990SMathieu Desnoyers #define RSEQ_INJECT_ASM(n) \
133c960e990SMathieu Desnoyers 	"ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
134c960e990SMathieu Desnoyers 	"cmp " INJECT_ASM_REG ", #0\n\t" \
135c960e990SMathieu Desnoyers 	"beq 333f\n\t" \
136c960e990SMathieu Desnoyers 	"222:\n\t" \
137c960e990SMathieu Desnoyers 	"subs " INJECT_ASM_REG ", #1\n\t" \
138c960e990SMathieu Desnoyers 	"bne 222b\n\t" \
139c960e990SMathieu Desnoyers 	"333:\n\t"
140c960e990SMathieu Desnoyers 
141b9657463SWill Deacon #elif defined(__AARCH64EL__)
142b9657463SWill Deacon 
143b9657463SWill Deacon #define RSEQ_INJECT_INPUT \
144b9657463SWill Deacon 	, [loop_cnt_1] "Qo" (loop_cnt[1]) \
145b9657463SWill Deacon 	, [loop_cnt_2] "Qo" (loop_cnt[2]) \
146b9657463SWill Deacon 	, [loop_cnt_3] "Qo" (loop_cnt[3]) \
147b9657463SWill Deacon 	, [loop_cnt_4] "Qo" (loop_cnt[4]) \
148b9657463SWill Deacon 	, [loop_cnt_5] "Qo" (loop_cnt[5]) \
149b9657463SWill Deacon 	, [loop_cnt_6] "Qo" (loop_cnt[6])
150b9657463SWill Deacon 
151b9657463SWill Deacon #define INJECT_ASM_REG	RSEQ_ASM_TMP_REG32
152b9657463SWill Deacon 
153b9657463SWill Deacon #define RSEQ_INJECT_ASM(n) \
154b9657463SWill Deacon 	"	ldr	" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n"	\
155b9657463SWill Deacon 	"	cbz	" INJECT_ASM_REG ", 333f\n"			\
156b9657463SWill Deacon 	"222:\n"							\
157b9657463SWill Deacon 	"	sub	" INJECT_ASM_REG ", " INJECT_ASM_REG ", #1\n"	\
158b9657463SWill Deacon 	"	cbnz	" INJECT_ASM_REG ", 222b\n"			\
159b9657463SWill Deacon 	"333:\n"
160b9657463SWill Deacon 
161d7ed99adSMathieu Desnoyers #elif defined(__PPC__)
162c960e990SMathieu Desnoyers 
163c960e990SMathieu Desnoyers #define RSEQ_INJECT_INPUT \
164c960e990SMathieu Desnoyers 	, [loop_cnt_1]"m"(loop_cnt[1]) \
165c960e990SMathieu Desnoyers 	, [loop_cnt_2]"m"(loop_cnt[2]) \
166c960e990SMathieu Desnoyers 	, [loop_cnt_3]"m"(loop_cnt[3]) \
167c960e990SMathieu Desnoyers 	, [loop_cnt_4]"m"(loop_cnt[4]) \
168c960e990SMathieu Desnoyers 	, [loop_cnt_5]"m"(loop_cnt[5]) \
169c960e990SMathieu Desnoyers 	, [loop_cnt_6]"m"(loop_cnt[6])
170c960e990SMathieu Desnoyers 
171c960e990SMathieu Desnoyers #define INJECT_ASM_REG	"r18"
172c960e990SMathieu Desnoyers 
173c960e990SMathieu Desnoyers #define RSEQ_INJECT_CLOBBER \
174c960e990SMathieu Desnoyers 	, INJECT_ASM_REG
175c960e990SMathieu Desnoyers 
176c960e990SMathieu Desnoyers #define RSEQ_INJECT_ASM(n) \
177c960e990SMathieu Desnoyers 	"lwz %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
178c960e990SMathieu Desnoyers 	"cmpwi %%" INJECT_ASM_REG ", 0\n\t" \
179c960e990SMathieu Desnoyers 	"beq 333f\n\t" \
180c960e990SMathieu Desnoyers 	"222:\n\t" \
181c960e990SMathieu Desnoyers 	"subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \
182c960e990SMathieu Desnoyers 	"bne 222b\n\t" \
183c960e990SMathieu Desnoyers 	"333:\n\t"
184744f4be5SPaul Burton 
185744f4be5SPaul Burton #elif defined(__mips__)
186744f4be5SPaul Burton 
187744f4be5SPaul Burton #define RSEQ_INJECT_INPUT \
188744f4be5SPaul Burton 	, [loop_cnt_1]"m"(loop_cnt[1]) \
189744f4be5SPaul Burton 	, [loop_cnt_2]"m"(loop_cnt[2]) \
190744f4be5SPaul Burton 	, [loop_cnt_3]"m"(loop_cnt[3]) \
191744f4be5SPaul Burton 	, [loop_cnt_4]"m"(loop_cnt[4]) \
192744f4be5SPaul Burton 	, [loop_cnt_5]"m"(loop_cnt[5]) \
193744f4be5SPaul Burton 	, [loop_cnt_6]"m"(loop_cnt[6])
194744f4be5SPaul Burton 
195744f4be5SPaul Burton #define INJECT_ASM_REG	"$5"
196744f4be5SPaul Burton 
197744f4be5SPaul Burton #define RSEQ_INJECT_CLOBBER \
198744f4be5SPaul Burton 	, INJECT_ASM_REG
199744f4be5SPaul Burton 
200744f4be5SPaul Burton #define RSEQ_INJECT_ASM(n) \
201744f4be5SPaul Burton 	"lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
202744f4be5SPaul Burton 	"beqz " INJECT_ASM_REG ", 333f\n\t" \
203744f4be5SPaul Burton 	"222:\n\t" \
204744f4be5SPaul Burton 	"addiu " INJECT_ASM_REG ", -1\n\t" \
205744f4be5SPaul Burton 	"bnez " INJECT_ASM_REG ", 222b\n\t" \
206744f4be5SPaul Burton 	"333:\n\t"
2076d1a6f46SVincent Chen #elif defined(__riscv)
2086d1a6f46SVincent Chen 
2096d1a6f46SVincent Chen #define RSEQ_INJECT_INPUT \
2106d1a6f46SVincent Chen 	, [loop_cnt_1]"m"(loop_cnt[1]) \
2116d1a6f46SVincent Chen 	, [loop_cnt_2]"m"(loop_cnt[2]) \
2126d1a6f46SVincent Chen 	, [loop_cnt_3]"m"(loop_cnt[3]) \
2136d1a6f46SVincent Chen 	, [loop_cnt_4]"m"(loop_cnt[4]) \
2146d1a6f46SVincent Chen 	, [loop_cnt_5]"m"(loop_cnt[5]) \
2156d1a6f46SVincent Chen 	, [loop_cnt_6]"m"(loop_cnt[6])
2166d1a6f46SVincent Chen 
2176d1a6f46SVincent Chen #define INJECT_ASM_REG	"t1"
2186d1a6f46SVincent Chen 
2196d1a6f46SVincent Chen #define RSEQ_INJECT_CLOBBER \
2206d1a6f46SVincent Chen 	, INJECT_ASM_REG
2216d1a6f46SVincent Chen 
2226d1a6f46SVincent Chen #define RSEQ_INJECT_ASM(n)					\
2236d1a6f46SVincent Chen 	"lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t"		\
2246d1a6f46SVincent Chen 	"beqz " INJECT_ASM_REG ", 333f\n\t"			\
2256d1a6f46SVincent Chen 	"222:\n\t"						\
2266d1a6f46SVincent Chen 	"addi  " INJECT_ASM_REG "," INJECT_ASM_REG ", -1\n\t"	\
2276d1a6f46SVincent Chen 	"bnez " INJECT_ASM_REG ", 222b\n\t"			\
2286d1a6f46SVincent Chen 	"333:\n\t"
2296d1a6f46SVincent Chen 
230744f4be5SPaul Burton 
231c960e990SMathieu Desnoyers #else
232c960e990SMathieu Desnoyers #error unsupported target
233c960e990SMathieu Desnoyers #endif
234c960e990SMathieu Desnoyers 
235c960e990SMathieu Desnoyers #define RSEQ_INJECT_FAILED \
236c960e990SMathieu Desnoyers 	nr_abort++;
237c960e990SMathieu Desnoyers 
238c960e990SMathieu Desnoyers #define RSEQ_INJECT_C(n) \
239c960e990SMathieu Desnoyers { \
240c960e990SMathieu Desnoyers 	int loc_i, loc_nr_loops = loop_cnt[n]; \
241c960e990SMathieu Desnoyers 	\
242c960e990SMathieu Desnoyers 	for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \
243c960e990SMathieu Desnoyers 		rseq_barrier(); \
244c960e990SMathieu Desnoyers 	} \
245c960e990SMathieu Desnoyers 	if (loc_nr_loops == -1 && opt_modulo) { \
246c960e990SMathieu Desnoyers 		if (yield_mod_cnt == opt_modulo - 1) { \
247c960e990SMathieu Desnoyers 			if (opt_sleep > 0) \
248c960e990SMathieu Desnoyers 				poll(NULL, 0, opt_sleep); \
249c960e990SMathieu Desnoyers 			if (opt_yield) \
250c960e990SMathieu Desnoyers 				sched_yield(); \
251c960e990SMathieu Desnoyers 			if (opt_signal) \
252c960e990SMathieu Desnoyers 				raise(SIGUSR1); \
253c960e990SMathieu Desnoyers 			yield_mod_cnt = 0; \
254c960e990SMathieu Desnoyers 		} else { \
255c960e990SMathieu Desnoyers 			yield_mod_cnt++; \
256c960e990SMathieu Desnoyers 		} \
257c960e990SMathieu Desnoyers 	} \
258c960e990SMathieu Desnoyers }
259c960e990SMathieu Desnoyers 
260c960e990SMathieu Desnoyers #else
261c960e990SMathieu Desnoyers 
262c960e990SMathieu Desnoyers #define printf_verbose(fmt, ...)
263c960e990SMathieu Desnoyers 
264c960e990SMathieu Desnoyers #endif /* BENCHMARK */
265c960e990SMathieu Desnoyers 
266c960e990SMathieu Desnoyers #include "rseq.h"
267c960e990SMathieu Desnoyers 
268ee31fff0SMathieu Desnoyers static enum rseq_mo opt_mo = RSEQ_MO_RELAXED;
269ee31fff0SMathieu Desnoyers 
270ee31fff0SMathieu Desnoyers #ifdef RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV
271ee31fff0SMathieu Desnoyers #define TEST_MEMBARRIER
272ee31fff0SMathieu Desnoyers 
sys_membarrier(int cmd,int flags,int cpu_id)273ee31fff0SMathieu Desnoyers static int sys_membarrier(int cmd, int flags, int cpu_id)
274ee31fff0SMathieu Desnoyers {
275ee31fff0SMathieu Desnoyers 	return syscall(__NR_membarrier, cmd, flags, cpu_id);
276ee31fff0SMathieu Desnoyers }
277ee31fff0SMathieu Desnoyers #endif
278ee31fff0SMathieu Desnoyers 
279ee31fff0SMathieu Desnoyers #ifdef BUILDOPT_RSEQ_PERCPU_MM_CID
280ee31fff0SMathieu Desnoyers # define RSEQ_PERCPU	RSEQ_PERCPU_MM_CID
281ee31fff0SMathieu Desnoyers static
get_current_cpu_id(void)282ee31fff0SMathieu Desnoyers int get_current_cpu_id(void)
283ee31fff0SMathieu Desnoyers {
284ee31fff0SMathieu Desnoyers 	return rseq_current_mm_cid();
285ee31fff0SMathieu Desnoyers }
286ee31fff0SMathieu Desnoyers static
rseq_validate_cpu_id(void)287ee31fff0SMathieu Desnoyers bool rseq_validate_cpu_id(void)
288ee31fff0SMathieu Desnoyers {
289ee31fff0SMathieu Desnoyers 	return rseq_mm_cid_available();
290ee31fff0SMathieu Desnoyers }
291ee31fff0SMathieu Desnoyers # ifdef TEST_MEMBARRIER
292ee31fff0SMathieu Desnoyers /*
293ee31fff0SMathieu Desnoyers  * Membarrier does not currently support targeting a mm_cid, so
294ee31fff0SMathieu Desnoyers  * issue the barrier on all cpus.
295ee31fff0SMathieu Desnoyers  */
296ee31fff0SMathieu Desnoyers static
rseq_membarrier_expedited(int cpu)297ee31fff0SMathieu Desnoyers int rseq_membarrier_expedited(int cpu)
298ee31fff0SMathieu Desnoyers {
299ee31fff0SMathieu Desnoyers 	return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
300ee31fff0SMathieu Desnoyers 			      0, 0);
301ee31fff0SMathieu Desnoyers }
302ee31fff0SMathieu Desnoyers # endif /* TEST_MEMBARRIER */
303ee31fff0SMathieu Desnoyers #else
304ee31fff0SMathieu Desnoyers # define RSEQ_PERCPU	RSEQ_PERCPU_CPU_ID
305ee31fff0SMathieu Desnoyers static
get_current_cpu_id(void)306ee31fff0SMathieu Desnoyers int get_current_cpu_id(void)
307ee31fff0SMathieu Desnoyers {
308ee31fff0SMathieu Desnoyers 	return rseq_cpu_start();
309ee31fff0SMathieu Desnoyers }
310ee31fff0SMathieu Desnoyers static
rseq_validate_cpu_id(void)311ee31fff0SMathieu Desnoyers bool rseq_validate_cpu_id(void)
312ee31fff0SMathieu Desnoyers {
313ee31fff0SMathieu Desnoyers 	return rseq_current_cpu_raw() >= 0;
314ee31fff0SMathieu Desnoyers }
315ee31fff0SMathieu Desnoyers # ifdef TEST_MEMBARRIER
316ee31fff0SMathieu Desnoyers static
rseq_membarrier_expedited(int cpu)317ee31fff0SMathieu Desnoyers int rseq_membarrier_expedited(int cpu)
318ee31fff0SMathieu Desnoyers {
319ee31fff0SMathieu Desnoyers 	return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
320ee31fff0SMathieu Desnoyers 			      MEMBARRIER_CMD_FLAG_CPU, cpu);
321ee31fff0SMathieu Desnoyers }
322ee31fff0SMathieu Desnoyers # endif /* TEST_MEMBARRIER */
323ee31fff0SMathieu Desnoyers #endif
324ee31fff0SMathieu Desnoyers 
325c960e990SMathieu Desnoyers struct percpu_lock_entry {
326c960e990SMathieu Desnoyers 	intptr_t v;
327c960e990SMathieu Desnoyers } __attribute__((aligned(128)));
328c960e990SMathieu Desnoyers 
329c960e990SMathieu Desnoyers struct percpu_lock {
330c960e990SMathieu Desnoyers 	struct percpu_lock_entry c[CPU_SETSIZE];
331c960e990SMathieu Desnoyers };
332c960e990SMathieu Desnoyers 
333c960e990SMathieu Desnoyers struct test_data_entry {
334c960e990SMathieu Desnoyers 	intptr_t count;
335c960e990SMathieu Desnoyers } __attribute__((aligned(128)));
336c960e990SMathieu Desnoyers 
337c960e990SMathieu Desnoyers struct spinlock_test_data {
338c960e990SMathieu Desnoyers 	struct percpu_lock lock;
339c960e990SMathieu Desnoyers 	struct test_data_entry c[CPU_SETSIZE];
340c960e990SMathieu Desnoyers };
341c960e990SMathieu Desnoyers 
342c960e990SMathieu Desnoyers struct spinlock_thread_test_data {
343c960e990SMathieu Desnoyers 	struct spinlock_test_data *data;
344c960e990SMathieu Desnoyers 	long long reps;
345c960e990SMathieu Desnoyers 	int reg;
346c960e990SMathieu Desnoyers };
347c960e990SMathieu Desnoyers 
348c960e990SMathieu Desnoyers struct inc_test_data {
349c960e990SMathieu Desnoyers 	struct test_data_entry c[CPU_SETSIZE];
350c960e990SMathieu Desnoyers };
351c960e990SMathieu Desnoyers 
352c960e990SMathieu Desnoyers struct inc_thread_test_data {
353c960e990SMathieu Desnoyers 	struct inc_test_data *data;
354c960e990SMathieu Desnoyers 	long long reps;
355c960e990SMathieu Desnoyers 	int reg;
356c960e990SMathieu Desnoyers };
357c960e990SMathieu Desnoyers 
358c960e990SMathieu Desnoyers struct percpu_list_node {
359c960e990SMathieu Desnoyers 	intptr_t data;
360c960e990SMathieu Desnoyers 	struct percpu_list_node *next;
361c960e990SMathieu Desnoyers };
362c960e990SMathieu Desnoyers 
363c960e990SMathieu Desnoyers struct percpu_list_entry {
364c960e990SMathieu Desnoyers 	struct percpu_list_node *head;
365c960e990SMathieu Desnoyers } __attribute__((aligned(128)));
366c960e990SMathieu Desnoyers 
367c960e990SMathieu Desnoyers struct percpu_list {
368c960e990SMathieu Desnoyers 	struct percpu_list_entry c[CPU_SETSIZE];
369c960e990SMathieu Desnoyers };
370c960e990SMathieu Desnoyers 
371c960e990SMathieu Desnoyers #define BUFFER_ITEM_PER_CPU	100
372c960e990SMathieu Desnoyers 
373c960e990SMathieu Desnoyers struct percpu_buffer_node {
374c960e990SMathieu Desnoyers 	intptr_t data;
375c960e990SMathieu Desnoyers };
376c960e990SMathieu Desnoyers 
377c960e990SMathieu Desnoyers struct percpu_buffer_entry {
378c960e990SMathieu Desnoyers 	intptr_t offset;
379c960e990SMathieu Desnoyers 	intptr_t buflen;
380c960e990SMathieu Desnoyers 	struct percpu_buffer_node **array;
381c960e990SMathieu Desnoyers } __attribute__((aligned(128)));
382c960e990SMathieu Desnoyers 
383c960e990SMathieu Desnoyers struct percpu_buffer {
384c960e990SMathieu Desnoyers 	struct percpu_buffer_entry c[CPU_SETSIZE];
385c960e990SMathieu Desnoyers };
386c960e990SMathieu Desnoyers 
387c960e990SMathieu Desnoyers #define MEMCPY_BUFFER_ITEM_PER_CPU	100
388c960e990SMathieu Desnoyers 
389c960e990SMathieu Desnoyers struct percpu_memcpy_buffer_node {
390c960e990SMathieu Desnoyers 	intptr_t data1;
391c960e990SMathieu Desnoyers 	uint64_t data2;
392c960e990SMathieu Desnoyers };
393c960e990SMathieu Desnoyers 
394c960e990SMathieu Desnoyers struct percpu_memcpy_buffer_entry {
395c960e990SMathieu Desnoyers 	intptr_t offset;
396c960e990SMathieu Desnoyers 	intptr_t buflen;
397c960e990SMathieu Desnoyers 	struct percpu_memcpy_buffer_node *array;
398c960e990SMathieu Desnoyers } __attribute__((aligned(128)));
399c960e990SMathieu Desnoyers 
400c960e990SMathieu Desnoyers struct percpu_memcpy_buffer {
401c960e990SMathieu Desnoyers 	struct percpu_memcpy_buffer_entry c[CPU_SETSIZE];
402c960e990SMathieu Desnoyers };
403c960e990SMathieu Desnoyers 
404c960e990SMathieu Desnoyers /* A simple percpu spinlock. Grabs lock on current cpu. */
rseq_this_cpu_lock(struct percpu_lock * lock)405c960e990SMathieu Desnoyers static int rseq_this_cpu_lock(struct percpu_lock *lock)
406c960e990SMathieu Desnoyers {
407c960e990SMathieu Desnoyers 	int cpu;
408c960e990SMathieu Desnoyers 
409c960e990SMathieu Desnoyers 	for (;;) {
410c960e990SMathieu Desnoyers 		int ret;
411c960e990SMathieu Desnoyers 
412ee31fff0SMathieu Desnoyers 		cpu = get_current_cpu_id();
413*a3798e6fSMathieu Desnoyers 		if (cpu < 0) {
414*a3798e6fSMathieu Desnoyers 			fprintf(stderr, "pid: %d: tid: %d, cpu: %d: cid: %d\n",
415*a3798e6fSMathieu Desnoyers 					getpid(), (int) rseq_gettid(), rseq_current_cpu_raw(), cpu);
416*a3798e6fSMathieu Desnoyers 			abort();
417*a3798e6fSMathieu Desnoyers 		}
418ee31fff0SMathieu Desnoyers 		ret = rseq_cmpeqv_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU,
419ee31fff0SMathieu Desnoyers 					 &lock->c[cpu].v,
420c960e990SMathieu Desnoyers 					 0, 1, cpu);
421c960e990SMathieu Desnoyers 		if (rseq_likely(!ret))
422c960e990SMathieu Desnoyers 			break;
423c960e990SMathieu Desnoyers 		/* Retry if comparison fails or rseq aborts. */
424c960e990SMathieu Desnoyers 	}
425c960e990SMathieu Desnoyers 	/*
426c960e990SMathieu Desnoyers 	 * Acquire semantic when taking lock after control dependency.
427c960e990SMathieu Desnoyers 	 * Matches rseq_smp_store_release().
428c960e990SMathieu Desnoyers 	 */
429c960e990SMathieu Desnoyers 	rseq_smp_acquire__after_ctrl_dep();
430c960e990SMathieu Desnoyers 	return cpu;
431c960e990SMathieu Desnoyers }
432c960e990SMathieu Desnoyers 
rseq_percpu_unlock(struct percpu_lock * lock,int cpu)433c960e990SMathieu Desnoyers static void rseq_percpu_unlock(struct percpu_lock *lock, int cpu)
434c960e990SMathieu Desnoyers {
435c960e990SMathieu Desnoyers 	assert(lock->c[cpu].v == 1);
436c960e990SMathieu Desnoyers 	/*
437c960e990SMathieu Desnoyers 	 * Release lock, with release semantic. Matches
438c960e990SMathieu Desnoyers 	 * rseq_smp_acquire__after_ctrl_dep().
439c960e990SMathieu Desnoyers 	 */
440c960e990SMathieu Desnoyers 	rseq_smp_store_release(&lock->c[cpu].v, 0);
441c960e990SMathieu Desnoyers }
442c960e990SMathieu Desnoyers 
test_percpu_spinlock_thread(void * arg)443c960e990SMathieu Desnoyers void *test_percpu_spinlock_thread(void *arg)
444c960e990SMathieu Desnoyers {
445c960e990SMathieu Desnoyers 	struct spinlock_thread_test_data *thread_data = arg;
446c960e990SMathieu Desnoyers 	struct spinlock_test_data *data = thread_data->data;
447c960e990SMathieu Desnoyers 	long long i, reps;
448c960e990SMathieu Desnoyers 
449c960e990SMathieu Desnoyers 	if (!opt_disable_rseq && thread_data->reg &&
450c960e990SMathieu Desnoyers 	    rseq_register_current_thread())
451c960e990SMathieu Desnoyers 		abort();
452c960e990SMathieu Desnoyers 	reps = thread_data->reps;
453c960e990SMathieu Desnoyers 	for (i = 0; i < reps; i++) {
454930378d0SMathieu Desnoyers 		int cpu = rseq_this_cpu_lock(&data->lock);
455c960e990SMathieu Desnoyers 		data->c[cpu].count++;
456c960e990SMathieu Desnoyers 		rseq_percpu_unlock(&data->lock, cpu);
457c960e990SMathieu Desnoyers #ifndef BENCHMARK
458c960e990SMathieu Desnoyers 		if (i != 0 && !(i % (reps / 10)))
4598df34c56SMathieu Desnoyers 			printf_verbose("tid %d: count %lld\n",
4608df34c56SMathieu Desnoyers 				       (int) rseq_gettid(), i);
461c960e990SMathieu Desnoyers #endif
462c960e990SMathieu Desnoyers 	}
463c960e990SMathieu Desnoyers 	printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
4648df34c56SMathieu Desnoyers 		       (int) rseq_gettid(), nr_abort, signals_delivered);
465c960e990SMathieu Desnoyers 	if (!opt_disable_rseq && thread_data->reg &&
466c960e990SMathieu Desnoyers 	    rseq_unregister_current_thread())
467c960e990SMathieu Desnoyers 		abort();
468c960e990SMathieu Desnoyers 	return NULL;
469c960e990SMathieu Desnoyers }
470c960e990SMathieu Desnoyers 
471c960e990SMathieu Desnoyers /*
472c960e990SMathieu Desnoyers  * A simple test which implements a sharded counter using a per-cpu
473c960e990SMathieu Desnoyers  * lock.  Obviously real applications might prefer to simply use a
474c960e990SMathieu Desnoyers  * per-cpu increment; however, this is reasonable for a test and the
475c960e990SMathieu Desnoyers  * lock can be extended to synchronize more complicated operations.
476c960e990SMathieu Desnoyers  */
test_percpu_spinlock(void)477c960e990SMathieu Desnoyers void test_percpu_spinlock(void)
478c960e990SMathieu Desnoyers {
479c960e990SMathieu Desnoyers 	const int num_threads = opt_threads;
480c960e990SMathieu Desnoyers 	int i, ret;
481c960e990SMathieu Desnoyers 	uint64_t sum;
482c960e990SMathieu Desnoyers 	pthread_t test_threads[num_threads];
483c960e990SMathieu Desnoyers 	struct spinlock_test_data data;
484c960e990SMathieu Desnoyers 	struct spinlock_thread_test_data thread_data[num_threads];
485c960e990SMathieu Desnoyers 
486c960e990SMathieu Desnoyers 	memset(&data, 0, sizeof(data));
487c960e990SMathieu Desnoyers 	for (i = 0; i < num_threads; i++) {
488c960e990SMathieu Desnoyers 		thread_data[i].reps = opt_reps;
489c960e990SMathieu Desnoyers 		if (opt_disable_mod <= 0 || (i % opt_disable_mod))
490c960e990SMathieu Desnoyers 			thread_data[i].reg = 1;
491c960e990SMathieu Desnoyers 		else
492c960e990SMathieu Desnoyers 			thread_data[i].reg = 0;
493c960e990SMathieu Desnoyers 		thread_data[i].data = &data;
494c960e990SMathieu Desnoyers 		ret = pthread_create(&test_threads[i], NULL,
495c960e990SMathieu Desnoyers 				     test_percpu_spinlock_thread,
496c960e990SMathieu Desnoyers 				     &thread_data[i]);
497c960e990SMathieu Desnoyers 		if (ret) {
498c960e990SMathieu Desnoyers 			errno = ret;
499c960e990SMathieu Desnoyers 			perror("pthread_create");
500c960e990SMathieu Desnoyers 			abort();
501c960e990SMathieu Desnoyers 		}
502c960e990SMathieu Desnoyers 	}
503c960e990SMathieu Desnoyers 
504c960e990SMathieu Desnoyers 	for (i = 0; i < num_threads; i++) {
505c960e990SMathieu Desnoyers 		ret = pthread_join(test_threads[i], NULL);
506c960e990SMathieu Desnoyers 		if (ret) {
507c960e990SMathieu Desnoyers 			errno = ret;
508c960e990SMathieu Desnoyers 			perror("pthread_join");
509c960e990SMathieu Desnoyers 			abort();
510c960e990SMathieu Desnoyers 		}
511c960e990SMathieu Desnoyers 	}
512c960e990SMathieu Desnoyers 
513c960e990SMathieu Desnoyers 	sum = 0;
514c960e990SMathieu Desnoyers 	for (i = 0; i < CPU_SETSIZE; i++)
515c960e990SMathieu Desnoyers 		sum += data.c[i].count;
516c960e990SMathieu Desnoyers 
517c960e990SMathieu Desnoyers 	assert(sum == (uint64_t)opt_reps * num_threads);
518c960e990SMathieu Desnoyers }
519c960e990SMathieu Desnoyers 
test_percpu_inc_thread(void * arg)520c960e990SMathieu Desnoyers void *test_percpu_inc_thread(void *arg)
521c960e990SMathieu Desnoyers {
522c960e990SMathieu Desnoyers 	struct inc_thread_test_data *thread_data = arg;
523c960e990SMathieu Desnoyers 	struct inc_test_data *data = thread_data->data;
524c960e990SMathieu Desnoyers 	long long i, reps;
525c960e990SMathieu Desnoyers 
526c960e990SMathieu Desnoyers 	if (!opt_disable_rseq && thread_data->reg &&
527c960e990SMathieu Desnoyers 	    rseq_register_current_thread())
528c960e990SMathieu Desnoyers 		abort();
529c960e990SMathieu Desnoyers 	reps = thread_data->reps;
530c960e990SMathieu Desnoyers 	for (i = 0; i < reps; i++) {
531c960e990SMathieu Desnoyers 		int ret;
532c960e990SMathieu Desnoyers 
533c960e990SMathieu Desnoyers 		do {
534c960e990SMathieu Desnoyers 			int cpu;
535c960e990SMathieu Desnoyers 
536ee31fff0SMathieu Desnoyers 			cpu = get_current_cpu_id();
537ee31fff0SMathieu Desnoyers 			ret = rseq_addv(RSEQ_MO_RELAXED, RSEQ_PERCPU,
538ee31fff0SMathieu Desnoyers 					&data->c[cpu].count, 1, cpu);
539c960e990SMathieu Desnoyers 		} while (rseq_unlikely(ret));
540c960e990SMathieu Desnoyers #ifndef BENCHMARK
541c960e990SMathieu Desnoyers 		if (i != 0 && !(i % (reps / 10)))
5428df34c56SMathieu Desnoyers 			printf_verbose("tid %d: count %lld\n",
5438df34c56SMathieu Desnoyers 				       (int) rseq_gettid(), i);
544c960e990SMathieu Desnoyers #endif
545c960e990SMathieu Desnoyers 	}
546c960e990SMathieu Desnoyers 	printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
5478df34c56SMathieu Desnoyers 		       (int) rseq_gettid(), nr_abort, signals_delivered);
548c960e990SMathieu Desnoyers 	if (!opt_disable_rseq && thread_data->reg &&
549c960e990SMathieu Desnoyers 	    rseq_unregister_current_thread())
550c960e990SMathieu Desnoyers 		abort();
551c960e990SMathieu Desnoyers 	return NULL;
552c960e990SMathieu Desnoyers }
553c960e990SMathieu Desnoyers 
test_percpu_inc(void)554c960e990SMathieu Desnoyers void test_percpu_inc(void)
555c960e990SMathieu Desnoyers {
556c960e990SMathieu Desnoyers 	const int num_threads = opt_threads;
557c960e990SMathieu Desnoyers 	int i, ret;
558c960e990SMathieu Desnoyers 	uint64_t sum;
559c960e990SMathieu Desnoyers 	pthread_t test_threads[num_threads];
560c960e990SMathieu Desnoyers 	struct inc_test_data data;
561c960e990SMathieu Desnoyers 	struct inc_thread_test_data thread_data[num_threads];
562c960e990SMathieu Desnoyers 
563c960e990SMathieu Desnoyers 	memset(&data, 0, sizeof(data));
564c960e990SMathieu Desnoyers 	for (i = 0; i < num_threads; i++) {
565c960e990SMathieu Desnoyers 		thread_data[i].reps = opt_reps;
566c960e990SMathieu Desnoyers 		if (opt_disable_mod <= 0 || (i % opt_disable_mod))
567c960e990SMathieu Desnoyers 			thread_data[i].reg = 1;
568c960e990SMathieu Desnoyers 		else
569c960e990SMathieu Desnoyers 			thread_data[i].reg = 0;
570c960e990SMathieu Desnoyers 		thread_data[i].data = &data;
571c960e990SMathieu Desnoyers 		ret = pthread_create(&test_threads[i], NULL,
572c960e990SMathieu Desnoyers 				     test_percpu_inc_thread,
573c960e990SMathieu Desnoyers 				     &thread_data[i]);
574c960e990SMathieu Desnoyers 		if (ret) {
575c960e990SMathieu Desnoyers 			errno = ret;
576c960e990SMathieu Desnoyers 			perror("pthread_create");
577c960e990SMathieu Desnoyers 			abort();
578c960e990SMathieu Desnoyers 		}
579c960e990SMathieu Desnoyers 	}
580c960e990SMathieu Desnoyers 
581c960e990SMathieu Desnoyers 	for (i = 0; i < num_threads; i++) {
582c960e990SMathieu Desnoyers 		ret = pthread_join(test_threads[i], NULL);
583c960e990SMathieu Desnoyers 		if (ret) {
584c960e990SMathieu Desnoyers 			errno = ret;
585c960e990SMathieu Desnoyers 			perror("pthread_join");
586c960e990SMathieu Desnoyers 			abort();
587c960e990SMathieu Desnoyers 		}
588c960e990SMathieu Desnoyers 	}
589c960e990SMathieu Desnoyers 
590c960e990SMathieu Desnoyers 	sum = 0;
591c960e990SMathieu Desnoyers 	for (i = 0; i < CPU_SETSIZE; i++)
592c960e990SMathieu Desnoyers 		sum += data.c[i].count;
593c960e990SMathieu Desnoyers 
594c960e990SMathieu Desnoyers 	assert(sum == (uint64_t)opt_reps * num_threads);
595c960e990SMathieu Desnoyers }
596c960e990SMathieu Desnoyers 
this_cpu_list_push(struct percpu_list * list,struct percpu_list_node * node,int * _cpu)597c960e990SMathieu Desnoyers void this_cpu_list_push(struct percpu_list *list,
598c960e990SMathieu Desnoyers 			struct percpu_list_node *node,
599c960e990SMathieu Desnoyers 			int *_cpu)
600c960e990SMathieu Desnoyers {
601c960e990SMathieu Desnoyers 	int cpu;
602c960e990SMathieu Desnoyers 
603c960e990SMathieu Desnoyers 	for (;;) {
604c960e990SMathieu Desnoyers 		intptr_t *targetptr, newval, expect;
605c960e990SMathieu Desnoyers 		int ret;
606c960e990SMathieu Desnoyers 
607ee31fff0SMathieu Desnoyers 		cpu = get_current_cpu_id();
608c960e990SMathieu Desnoyers 		/* Load list->c[cpu].head with single-copy atomicity. */
609c960e990SMathieu Desnoyers 		expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head);
610c960e990SMathieu Desnoyers 		newval = (intptr_t)node;
611c960e990SMathieu Desnoyers 		targetptr = (intptr_t *)&list->c[cpu].head;
612c960e990SMathieu Desnoyers 		node->next = (struct percpu_list_node *)expect;
613ee31fff0SMathieu Desnoyers 		ret = rseq_cmpeqv_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU,
614ee31fff0SMathieu Desnoyers 					 targetptr, expect, newval, cpu);
615c960e990SMathieu Desnoyers 		if (rseq_likely(!ret))
616c960e990SMathieu Desnoyers 			break;
617c960e990SMathieu Desnoyers 		/* Retry if comparison fails or rseq aborts. */
618c960e990SMathieu Desnoyers 	}
619c960e990SMathieu Desnoyers 	if (_cpu)
620c960e990SMathieu Desnoyers 		*_cpu = cpu;
621c960e990SMathieu Desnoyers }
622c960e990SMathieu Desnoyers 
623c960e990SMathieu Desnoyers /*
624c960e990SMathieu Desnoyers  * Unlike a traditional lock-less linked list; the availability of a
625c960e990SMathieu Desnoyers  * rseq primitive allows us to implement pop without concerns over
626c960e990SMathieu Desnoyers  * ABA-type races.
627c960e990SMathieu Desnoyers  */
this_cpu_list_pop(struct percpu_list * list,int * _cpu)628c960e990SMathieu Desnoyers struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list,
629c960e990SMathieu Desnoyers 					   int *_cpu)
630c960e990SMathieu Desnoyers {
631c960e990SMathieu Desnoyers 	struct percpu_list_node *node = NULL;
632c960e990SMathieu Desnoyers 	int cpu;
633c960e990SMathieu Desnoyers 
634c960e990SMathieu Desnoyers 	for (;;) {
635c960e990SMathieu Desnoyers 		struct percpu_list_node *head;
636c960e990SMathieu Desnoyers 		intptr_t *targetptr, expectnot, *load;
63726dc8a6dSMathieu Desnoyers 		long offset;
638c960e990SMathieu Desnoyers 		int ret;
639c960e990SMathieu Desnoyers 
640ee31fff0SMathieu Desnoyers 		cpu = get_current_cpu_id();
641c960e990SMathieu Desnoyers 		targetptr = (intptr_t *)&list->c[cpu].head;
642c960e990SMathieu Desnoyers 		expectnot = (intptr_t)NULL;
643c960e990SMathieu Desnoyers 		offset = offsetof(struct percpu_list_node, next);
644c960e990SMathieu Desnoyers 		load = (intptr_t *)&head;
645ee31fff0SMathieu Desnoyers 		ret = rseq_cmpnev_storeoffp_load(RSEQ_MO_RELAXED, RSEQ_PERCPU,
646ee31fff0SMathieu Desnoyers 						 targetptr, expectnot,
647c960e990SMathieu Desnoyers 						 offset, load, cpu);
648c960e990SMathieu Desnoyers 		if (rseq_likely(!ret)) {
649c960e990SMathieu Desnoyers 			node = head;
650c960e990SMathieu Desnoyers 			break;
651c960e990SMathieu Desnoyers 		}
652c960e990SMathieu Desnoyers 		if (ret > 0)
653c960e990SMathieu Desnoyers 			break;
654c960e990SMathieu Desnoyers 		/* Retry if rseq aborts. */
655c960e990SMathieu Desnoyers 	}
656c960e990SMathieu Desnoyers 	if (_cpu)
657c960e990SMathieu Desnoyers 		*_cpu = cpu;
658c960e990SMathieu Desnoyers 	return node;
659c960e990SMathieu Desnoyers }
660c960e990SMathieu Desnoyers 
661c960e990SMathieu Desnoyers /*
662c960e990SMathieu Desnoyers  * __percpu_list_pop is not safe against concurrent accesses. Should
663c960e990SMathieu Desnoyers  * only be used on lists that are not concurrently modified.
664c960e990SMathieu Desnoyers  */
__percpu_list_pop(struct percpu_list * list,int cpu)665c960e990SMathieu Desnoyers struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu)
666c960e990SMathieu Desnoyers {
667c960e990SMathieu Desnoyers 	struct percpu_list_node *node;
668c960e990SMathieu Desnoyers 
669c960e990SMathieu Desnoyers 	node = list->c[cpu].head;
670c960e990SMathieu Desnoyers 	if (!node)
671c960e990SMathieu Desnoyers 		return NULL;
672c960e990SMathieu Desnoyers 	list->c[cpu].head = node->next;
673c960e990SMathieu Desnoyers 	return node;
674c960e990SMathieu Desnoyers }
675c960e990SMathieu Desnoyers 
test_percpu_list_thread(void * arg)676c960e990SMathieu Desnoyers void *test_percpu_list_thread(void *arg)
677c960e990SMathieu Desnoyers {
678c960e990SMathieu Desnoyers 	long long i, reps;
679c960e990SMathieu Desnoyers 	struct percpu_list *list = (struct percpu_list *)arg;
680c960e990SMathieu Desnoyers 
681c960e990SMathieu Desnoyers 	if (!opt_disable_rseq && rseq_register_current_thread())
682c960e990SMathieu Desnoyers 		abort();
683c960e990SMathieu Desnoyers 
684c960e990SMathieu Desnoyers 	reps = opt_reps;
685c960e990SMathieu Desnoyers 	for (i = 0; i < reps; i++) {
686c960e990SMathieu Desnoyers 		struct percpu_list_node *node;
687c960e990SMathieu Desnoyers 
688c960e990SMathieu Desnoyers 		node = this_cpu_list_pop(list, NULL);
689c960e990SMathieu Desnoyers 		if (opt_yield)
690c960e990SMathieu Desnoyers 			sched_yield();  /* encourage shuffling */
691c960e990SMathieu Desnoyers 		if (node)
692c960e990SMathieu Desnoyers 			this_cpu_list_push(list, node, NULL);
693c960e990SMathieu Desnoyers 	}
694c960e990SMathieu Desnoyers 
695c960e990SMathieu Desnoyers 	printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
6968df34c56SMathieu Desnoyers 		       (int) rseq_gettid(), nr_abort, signals_delivered);
697c960e990SMathieu Desnoyers 	if (!opt_disable_rseq && rseq_unregister_current_thread())
698c960e990SMathieu Desnoyers 		abort();
699c960e990SMathieu Desnoyers 
700c960e990SMathieu Desnoyers 	return NULL;
701c960e990SMathieu Desnoyers }
702c960e990SMathieu Desnoyers 
703c960e990SMathieu Desnoyers /* Simultaneous modification to a per-cpu linked list from many threads.  */
test_percpu_list(void)704c960e990SMathieu Desnoyers void test_percpu_list(void)
705c960e990SMathieu Desnoyers {
706c960e990SMathieu Desnoyers 	const int num_threads = opt_threads;
707c960e990SMathieu Desnoyers 	int i, j, ret;
708c960e990SMathieu Desnoyers 	uint64_t sum = 0, expected_sum = 0;
709c960e990SMathieu Desnoyers 	struct percpu_list list;
710c960e990SMathieu Desnoyers 	pthread_t test_threads[num_threads];
711c960e990SMathieu Desnoyers 	cpu_set_t allowed_cpus;
712c960e990SMathieu Desnoyers 
713c960e990SMathieu Desnoyers 	memset(&list, 0, sizeof(list));
714c960e990SMathieu Desnoyers 
715c960e990SMathieu Desnoyers 	/* Generate list entries for every usable cpu. */
716c960e990SMathieu Desnoyers 	sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
717c960e990SMathieu Desnoyers 	for (i = 0; i < CPU_SETSIZE; i++) {
718c960e990SMathieu Desnoyers 		if (!CPU_ISSET(i, &allowed_cpus))
719c960e990SMathieu Desnoyers 			continue;
720c960e990SMathieu Desnoyers 		for (j = 1; j <= 100; j++) {
721c960e990SMathieu Desnoyers 			struct percpu_list_node *node;
722c960e990SMathieu Desnoyers 
723c960e990SMathieu Desnoyers 			expected_sum += j;
724c960e990SMathieu Desnoyers 
725c960e990SMathieu Desnoyers 			node = malloc(sizeof(*node));
726c960e990SMathieu Desnoyers 			assert(node);
727c960e990SMathieu Desnoyers 			node->data = j;
728c960e990SMathieu Desnoyers 			node->next = list.c[i].head;
729c960e990SMathieu Desnoyers 			list.c[i].head = node;
730c960e990SMathieu Desnoyers 		}
731c960e990SMathieu Desnoyers 	}
732c960e990SMathieu Desnoyers 
733c960e990SMathieu Desnoyers 	for (i = 0; i < num_threads; i++) {
734c960e990SMathieu Desnoyers 		ret = pthread_create(&test_threads[i], NULL,
735c960e990SMathieu Desnoyers 				     test_percpu_list_thread, &list);
736c960e990SMathieu Desnoyers 		if (ret) {
737c960e990SMathieu Desnoyers 			errno = ret;
738c960e990SMathieu Desnoyers 			perror("pthread_create");
739c960e990SMathieu Desnoyers 			abort();
740c960e990SMathieu Desnoyers 		}
741c960e990SMathieu Desnoyers 	}
742c960e990SMathieu Desnoyers 
743c960e990SMathieu Desnoyers 	for (i = 0; i < num_threads; i++) {
744c960e990SMathieu Desnoyers 		ret = pthread_join(test_threads[i], NULL);
745c960e990SMathieu Desnoyers 		if (ret) {
746c960e990SMathieu Desnoyers 			errno = ret;
747c960e990SMathieu Desnoyers 			perror("pthread_join");
748c960e990SMathieu Desnoyers 			abort();
749c960e990SMathieu Desnoyers 		}
750c960e990SMathieu Desnoyers 	}
751c960e990SMathieu Desnoyers 
752c960e990SMathieu Desnoyers 	for (i = 0; i < CPU_SETSIZE; i++) {
753c960e990SMathieu Desnoyers 		struct percpu_list_node *node;
754c960e990SMathieu Desnoyers 
755c960e990SMathieu Desnoyers 		if (!CPU_ISSET(i, &allowed_cpus))
756c960e990SMathieu Desnoyers 			continue;
757c960e990SMathieu Desnoyers 
758c960e990SMathieu Desnoyers 		while ((node = __percpu_list_pop(&list, i))) {
759c960e990SMathieu Desnoyers 			sum += node->data;
760c960e990SMathieu Desnoyers 			free(node);
761c960e990SMathieu Desnoyers 		}
762c960e990SMathieu Desnoyers 	}
763c960e990SMathieu Desnoyers 
764c960e990SMathieu Desnoyers 	/*
765c960e990SMathieu Desnoyers 	 * All entries should now be accounted for (unless some external
766c960e990SMathieu Desnoyers 	 * actor is interfering with our allowed affinity while this
767c960e990SMathieu Desnoyers 	 * test is running).
768c960e990SMathieu Desnoyers 	 */
769c960e990SMathieu Desnoyers 	assert(sum == expected_sum);
770c960e990SMathieu Desnoyers }
771c960e990SMathieu Desnoyers 
this_cpu_buffer_push(struct percpu_buffer * buffer,struct percpu_buffer_node * node,int * _cpu)772c960e990SMathieu Desnoyers bool this_cpu_buffer_push(struct percpu_buffer *buffer,
773c960e990SMathieu Desnoyers 			  struct percpu_buffer_node *node,
774c960e990SMathieu Desnoyers 			  int *_cpu)
775c960e990SMathieu Desnoyers {
776c960e990SMathieu Desnoyers 	bool result = false;
777c960e990SMathieu Desnoyers 	int cpu;
778c960e990SMathieu Desnoyers 
779c960e990SMathieu Desnoyers 	for (;;) {
780c960e990SMathieu Desnoyers 		intptr_t *targetptr_spec, newval_spec;
781c960e990SMathieu Desnoyers 		intptr_t *targetptr_final, newval_final;
782c960e990SMathieu Desnoyers 		intptr_t offset;
783c960e990SMathieu Desnoyers 		int ret;
784c960e990SMathieu Desnoyers 
785ee31fff0SMathieu Desnoyers 		cpu = get_current_cpu_id();
786c960e990SMathieu Desnoyers 		offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
787c960e990SMathieu Desnoyers 		if (offset == buffer->c[cpu].buflen)
788c960e990SMathieu Desnoyers 			break;
789c960e990SMathieu Desnoyers 		newval_spec = (intptr_t)node;
790c960e990SMathieu Desnoyers 		targetptr_spec = (intptr_t *)&buffer->c[cpu].array[offset];
791c960e990SMathieu Desnoyers 		newval_final = offset + 1;
792c960e990SMathieu Desnoyers 		targetptr_final = &buffer->c[cpu].offset;
793ee31fff0SMathieu Desnoyers 		ret = rseq_cmpeqv_trystorev_storev(opt_mo, RSEQ_PERCPU,
794c960e990SMathieu Desnoyers 			targetptr_final, offset, targetptr_spec,
795c960e990SMathieu Desnoyers 			newval_spec, newval_final, cpu);
796c960e990SMathieu Desnoyers 		if (rseq_likely(!ret)) {
797c960e990SMathieu Desnoyers 			result = true;
798c960e990SMathieu Desnoyers 			break;
799c960e990SMathieu Desnoyers 		}
800c960e990SMathieu Desnoyers 		/* Retry if comparison fails or rseq aborts. */
801c960e990SMathieu Desnoyers 	}
802c960e990SMathieu Desnoyers 	if (_cpu)
803c960e990SMathieu Desnoyers 		*_cpu = cpu;
804c960e990SMathieu Desnoyers 	return result;
805c960e990SMathieu Desnoyers }
806c960e990SMathieu Desnoyers 
this_cpu_buffer_pop(struct percpu_buffer * buffer,int * _cpu)807c960e990SMathieu Desnoyers struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer *buffer,
808c960e990SMathieu Desnoyers 					       int *_cpu)
809c960e990SMathieu Desnoyers {
810c960e990SMathieu Desnoyers 	struct percpu_buffer_node *head;
811c960e990SMathieu Desnoyers 	int cpu;
812c960e990SMathieu Desnoyers 
813c960e990SMathieu Desnoyers 	for (;;) {
814c960e990SMathieu Desnoyers 		intptr_t *targetptr, newval;
815c960e990SMathieu Desnoyers 		intptr_t offset;
816c960e990SMathieu Desnoyers 		int ret;
817c960e990SMathieu Desnoyers 
818ee31fff0SMathieu Desnoyers 		cpu = get_current_cpu_id();
819c960e990SMathieu Desnoyers 		/* Load offset with single-copy atomicity. */
820c960e990SMathieu Desnoyers 		offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
821c960e990SMathieu Desnoyers 		if (offset == 0) {
822c960e990SMathieu Desnoyers 			head = NULL;
823c960e990SMathieu Desnoyers 			break;
824c960e990SMathieu Desnoyers 		}
825c960e990SMathieu Desnoyers 		head = RSEQ_READ_ONCE(buffer->c[cpu].array[offset - 1]);
826c960e990SMathieu Desnoyers 		newval = offset - 1;
827c960e990SMathieu Desnoyers 		targetptr = (intptr_t *)&buffer->c[cpu].offset;
828ee31fff0SMathieu Desnoyers 		ret = rseq_cmpeqv_cmpeqv_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU,
829ee31fff0SMathieu Desnoyers 			targetptr, offset,
830c960e990SMathieu Desnoyers 			(intptr_t *)&buffer->c[cpu].array[offset - 1],
831c960e990SMathieu Desnoyers 			(intptr_t)head, newval, cpu);
832c960e990SMathieu Desnoyers 		if (rseq_likely(!ret))
833c960e990SMathieu Desnoyers 			break;
834c960e990SMathieu Desnoyers 		/* Retry if comparison fails or rseq aborts. */
835c960e990SMathieu Desnoyers 	}
836c960e990SMathieu Desnoyers 	if (_cpu)
837c960e990SMathieu Desnoyers 		*_cpu = cpu;
838c960e990SMathieu Desnoyers 	return head;
839c960e990SMathieu Desnoyers }
840c960e990SMathieu Desnoyers 
841c960e990SMathieu Desnoyers /*
842c960e990SMathieu Desnoyers  * __percpu_buffer_pop is not safe against concurrent accesses. Should
843c960e990SMathieu Desnoyers  * only be used on buffers that are not concurrently modified.
844c960e990SMathieu Desnoyers  */
__percpu_buffer_pop(struct percpu_buffer * buffer,int cpu)845c960e990SMathieu Desnoyers struct percpu_buffer_node *__percpu_buffer_pop(struct percpu_buffer *buffer,
846c960e990SMathieu Desnoyers 					       int cpu)
847c960e990SMathieu Desnoyers {
848c960e990SMathieu Desnoyers 	struct percpu_buffer_node *head;
849c960e990SMathieu Desnoyers 	intptr_t offset;
850c960e990SMathieu Desnoyers 
851c960e990SMathieu Desnoyers 	offset = buffer->c[cpu].offset;
852c960e990SMathieu Desnoyers 	if (offset == 0)
853c960e990SMathieu Desnoyers 		return NULL;
854c960e990SMathieu Desnoyers 	head = buffer->c[cpu].array[offset - 1];
855c960e990SMathieu Desnoyers 	buffer->c[cpu].offset = offset - 1;
856c960e990SMathieu Desnoyers 	return head;
857c960e990SMathieu Desnoyers }
858c960e990SMathieu Desnoyers 
test_percpu_buffer_thread(void * arg)859c960e990SMathieu Desnoyers void *test_percpu_buffer_thread(void *arg)
860c960e990SMathieu Desnoyers {
861c960e990SMathieu Desnoyers 	long long i, reps;
862c960e990SMathieu Desnoyers 	struct percpu_buffer *buffer = (struct percpu_buffer *)arg;
863c960e990SMathieu Desnoyers 
864c960e990SMathieu Desnoyers 	if (!opt_disable_rseq && rseq_register_current_thread())
865c960e990SMathieu Desnoyers 		abort();
866c960e990SMathieu Desnoyers 
867c960e990SMathieu Desnoyers 	reps = opt_reps;
868c960e990SMathieu Desnoyers 	for (i = 0; i < reps; i++) {
869c960e990SMathieu Desnoyers 		struct percpu_buffer_node *node;
870c960e990SMathieu Desnoyers 
871c960e990SMathieu Desnoyers 		node = this_cpu_buffer_pop(buffer, NULL);
872c960e990SMathieu Desnoyers 		if (opt_yield)
873c960e990SMathieu Desnoyers 			sched_yield();  /* encourage shuffling */
874c960e990SMathieu Desnoyers 		if (node) {
875c960e990SMathieu Desnoyers 			if (!this_cpu_buffer_push(buffer, node, NULL)) {
876c960e990SMathieu Desnoyers 				/* Should increase buffer size. */
877c960e990SMathieu Desnoyers 				abort();
878c960e990SMathieu Desnoyers 			}
879c960e990SMathieu Desnoyers 		}
880c960e990SMathieu Desnoyers 	}
881c960e990SMathieu Desnoyers 
882c960e990SMathieu Desnoyers 	printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
8838df34c56SMathieu Desnoyers 		       (int) rseq_gettid(), nr_abort, signals_delivered);
884c960e990SMathieu Desnoyers 	if (!opt_disable_rseq && rseq_unregister_current_thread())
885c960e990SMathieu Desnoyers 		abort();
886c960e990SMathieu Desnoyers 
887c960e990SMathieu Desnoyers 	return NULL;
888c960e990SMathieu Desnoyers }
889c960e990SMathieu Desnoyers 
890c960e990SMathieu Desnoyers /* Simultaneous modification to a per-cpu buffer from many threads.  */
test_percpu_buffer(void)891c960e990SMathieu Desnoyers void test_percpu_buffer(void)
892c960e990SMathieu Desnoyers {
893c960e990SMathieu Desnoyers 	const int num_threads = opt_threads;
894c960e990SMathieu Desnoyers 	int i, j, ret;
895c960e990SMathieu Desnoyers 	uint64_t sum = 0, expected_sum = 0;
896c960e990SMathieu Desnoyers 	struct percpu_buffer buffer;
897c960e990SMathieu Desnoyers 	pthread_t test_threads[num_threads];
898c960e990SMathieu Desnoyers 	cpu_set_t allowed_cpus;
899c960e990SMathieu Desnoyers 
900c960e990SMathieu Desnoyers 	memset(&buffer, 0, sizeof(buffer));
901c960e990SMathieu Desnoyers 
902c960e990SMathieu Desnoyers 	/* Generate list entries for every usable cpu. */
903c960e990SMathieu Desnoyers 	sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
904c960e990SMathieu Desnoyers 	for (i = 0; i < CPU_SETSIZE; i++) {
905c960e990SMathieu Desnoyers 		if (!CPU_ISSET(i, &allowed_cpus))
906c960e990SMathieu Desnoyers 			continue;
907c960e990SMathieu Desnoyers 		/* Worse-case is every item in same CPU. */
908c960e990SMathieu Desnoyers 		buffer.c[i].array =
909c960e990SMathieu Desnoyers 			malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
910c960e990SMathieu Desnoyers 			       BUFFER_ITEM_PER_CPU);
911c960e990SMathieu Desnoyers 		assert(buffer.c[i].array);
912c960e990SMathieu Desnoyers 		buffer.c[i].buflen = CPU_SETSIZE * BUFFER_ITEM_PER_CPU;
913c960e990SMathieu Desnoyers 		for (j = 1; j <= BUFFER_ITEM_PER_CPU; j++) {
914c960e990SMathieu Desnoyers 			struct percpu_buffer_node *node;
915c960e990SMathieu Desnoyers 
916c960e990SMathieu Desnoyers 			expected_sum += j;
917c960e990SMathieu Desnoyers 
918c960e990SMathieu Desnoyers 			/*
919c960e990SMathieu Desnoyers 			 * We could theoretically put the word-sized
920c960e990SMathieu Desnoyers 			 * "data" directly in the buffer. However, we
921c960e990SMathieu Desnoyers 			 * want to model objects that would not fit
922c960e990SMathieu Desnoyers 			 * within a single word, so allocate an object
923c960e990SMathieu Desnoyers 			 * for each node.
924c960e990SMathieu Desnoyers 			 */
925c960e990SMathieu Desnoyers 			node = malloc(sizeof(*node));
926c960e990SMathieu Desnoyers 			assert(node);
927c960e990SMathieu Desnoyers 			node->data = j;
928c960e990SMathieu Desnoyers 			buffer.c[i].array[j - 1] = node;
929c960e990SMathieu Desnoyers 			buffer.c[i].offset++;
930c960e990SMathieu Desnoyers 		}
931c960e990SMathieu Desnoyers 	}
932c960e990SMathieu Desnoyers 
933c960e990SMathieu Desnoyers 	for (i = 0; i < num_threads; i++) {
934c960e990SMathieu Desnoyers 		ret = pthread_create(&test_threads[i], NULL,
935c960e990SMathieu Desnoyers 				     test_percpu_buffer_thread, &buffer);
936c960e990SMathieu Desnoyers 		if (ret) {
937c960e990SMathieu Desnoyers 			errno = ret;
938c960e990SMathieu Desnoyers 			perror("pthread_create");
939c960e990SMathieu Desnoyers 			abort();
940c960e990SMathieu Desnoyers 		}
941c960e990SMathieu Desnoyers 	}
942c960e990SMathieu Desnoyers 
943c960e990SMathieu Desnoyers 	for (i = 0; i < num_threads; i++) {
944c960e990SMathieu Desnoyers 		ret = pthread_join(test_threads[i], NULL);
945c960e990SMathieu Desnoyers 		if (ret) {
946c960e990SMathieu Desnoyers 			errno = ret;
947c960e990SMathieu Desnoyers 			perror("pthread_join");
948c960e990SMathieu Desnoyers 			abort();
949c960e990SMathieu Desnoyers 		}
950c960e990SMathieu Desnoyers 	}
951c960e990SMathieu Desnoyers 
952c960e990SMathieu Desnoyers 	for (i = 0; i < CPU_SETSIZE; i++) {
953c960e990SMathieu Desnoyers 		struct percpu_buffer_node *node;
954c960e990SMathieu Desnoyers 
955c960e990SMathieu Desnoyers 		if (!CPU_ISSET(i, &allowed_cpus))
956c960e990SMathieu Desnoyers 			continue;
957c960e990SMathieu Desnoyers 
958c960e990SMathieu Desnoyers 		while ((node = __percpu_buffer_pop(&buffer, i))) {
959c960e990SMathieu Desnoyers 			sum += node->data;
960c960e990SMathieu Desnoyers 			free(node);
961c960e990SMathieu Desnoyers 		}
962c960e990SMathieu Desnoyers 		free(buffer.c[i].array);
963c960e990SMathieu Desnoyers 	}
964c960e990SMathieu Desnoyers 
965c960e990SMathieu Desnoyers 	/*
966c960e990SMathieu Desnoyers 	 * All entries should now be accounted for (unless some external
967c960e990SMathieu Desnoyers 	 * actor is interfering with our allowed affinity while this
968c960e990SMathieu Desnoyers 	 * test is running).
969c960e990SMathieu Desnoyers 	 */
970c960e990SMathieu Desnoyers 	assert(sum == expected_sum);
971c960e990SMathieu Desnoyers }
972c960e990SMathieu Desnoyers 
this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer * buffer,struct percpu_memcpy_buffer_node item,int * _cpu)973c960e990SMathieu Desnoyers bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer *buffer,
974c960e990SMathieu Desnoyers 				 struct percpu_memcpy_buffer_node item,
975c960e990SMathieu Desnoyers 				 int *_cpu)
976c960e990SMathieu Desnoyers {
977c960e990SMathieu Desnoyers 	bool result = false;
978c960e990SMathieu Desnoyers 	int cpu;
979c960e990SMathieu Desnoyers 
980c960e990SMathieu Desnoyers 	for (;;) {
981c960e990SMathieu Desnoyers 		intptr_t *targetptr_final, newval_final, offset;
982c960e990SMathieu Desnoyers 		char *destptr, *srcptr;
983c960e990SMathieu Desnoyers 		size_t copylen;
984c960e990SMathieu Desnoyers 		int ret;
985c960e990SMathieu Desnoyers 
986ee31fff0SMathieu Desnoyers 		cpu = get_current_cpu_id();
987c960e990SMathieu Desnoyers 		/* Load offset with single-copy atomicity. */
988c960e990SMathieu Desnoyers 		offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
989c960e990SMathieu Desnoyers 		if (offset == buffer->c[cpu].buflen)
990c960e990SMathieu Desnoyers 			break;
991c960e990SMathieu Desnoyers 		destptr = (char *)&buffer->c[cpu].array[offset];
992c960e990SMathieu Desnoyers 		srcptr = (char *)&item;
993c960e990SMathieu Desnoyers 		/* copylen must be <= 4kB. */
994c960e990SMathieu Desnoyers 		copylen = sizeof(item);
995c960e990SMathieu Desnoyers 		newval_final = offset + 1;
996c960e990SMathieu Desnoyers 		targetptr_final = &buffer->c[cpu].offset;
997ee31fff0SMathieu Desnoyers 		ret = rseq_cmpeqv_trymemcpy_storev(
998ee31fff0SMathieu Desnoyers 			opt_mo, RSEQ_PERCPU,
999c960e990SMathieu Desnoyers 			targetptr_final, offset,
1000c960e990SMathieu Desnoyers 			destptr, srcptr, copylen,
1001c960e990SMathieu Desnoyers 			newval_final, cpu);
1002c960e990SMathieu Desnoyers 		if (rseq_likely(!ret)) {
1003c960e990SMathieu Desnoyers 			result = true;
1004c960e990SMathieu Desnoyers 			break;
1005c960e990SMathieu Desnoyers 		}
1006c960e990SMathieu Desnoyers 		/* Retry if comparison fails or rseq aborts. */
1007c960e990SMathieu Desnoyers 	}
1008c960e990SMathieu Desnoyers 	if (_cpu)
1009c960e990SMathieu Desnoyers 		*_cpu = cpu;
1010c960e990SMathieu Desnoyers 	return result;
1011c960e990SMathieu Desnoyers }
1012c960e990SMathieu Desnoyers 
this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer * buffer,struct percpu_memcpy_buffer_node * item,int * _cpu)1013c960e990SMathieu Desnoyers bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
1014c960e990SMathieu Desnoyers 				struct percpu_memcpy_buffer_node *item,
1015c960e990SMathieu Desnoyers 				int *_cpu)
1016c960e990SMathieu Desnoyers {
1017c960e990SMathieu Desnoyers 	bool result = false;
1018c960e990SMathieu Desnoyers 	int cpu;
1019c960e990SMathieu Desnoyers 
1020c960e990SMathieu Desnoyers 	for (;;) {
1021c960e990SMathieu Desnoyers 		intptr_t *targetptr_final, newval_final, offset;
1022c960e990SMathieu Desnoyers 		char *destptr, *srcptr;
1023c960e990SMathieu Desnoyers 		size_t copylen;
1024c960e990SMathieu Desnoyers 		int ret;
1025c960e990SMathieu Desnoyers 
1026ee31fff0SMathieu Desnoyers 		cpu = get_current_cpu_id();
1027c960e990SMathieu Desnoyers 		/* Load offset with single-copy atomicity. */
1028c960e990SMathieu Desnoyers 		offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
1029c960e990SMathieu Desnoyers 		if (offset == 0)
1030c960e990SMathieu Desnoyers 			break;
1031c960e990SMathieu Desnoyers 		destptr = (char *)item;
1032c960e990SMathieu Desnoyers 		srcptr = (char *)&buffer->c[cpu].array[offset - 1];
1033c960e990SMathieu Desnoyers 		/* copylen must be <= 4kB. */
1034c960e990SMathieu Desnoyers 		copylen = sizeof(*item);
1035c960e990SMathieu Desnoyers 		newval_final = offset - 1;
1036c960e990SMathieu Desnoyers 		targetptr_final = &buffer->c[cpu].offset;
1037ee31fff0SMathieu Desnoyers 		ret = rseq_cmpeqv_trymemcpy_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU,
1038ee31fff0SMathieu Desnoyers 			targetptr_final, offset, destptr, srcptr, copylen,
1039c960e990SMathieu Desnoyers 			newval_final, cpu);
1040c960e990SMathieu Desnoyers 		if (rseq_likely(!ret)) {
1041c960e990SMathieu Desnoyers 			result = true;
1042c960e990SMathieu Desnoyers 			break;
1043c960e990SMathieu Desnoyers 		}
1044c960e990SMathieu Desnoyers 		/* Retry if comparison fails or rseq aborts. */
1045c960e990SMathieu Desnoyers 	}
1046c960e990SMathieu Desnoyers 	if (_cpu)
1047c960e990SMathieu Desnoyers 		*_cpu = cpu;
1048c960e990SMathieu Desnoyers 	return result;
1049c960e990SMathieu Desnoyers }
1050c960e990SMathieu Desnoyers 
1051c960e990SMathieu Desnoyers /*
1052c960e990SMathieu Desnoyers  * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should
1053c960e990SMathieu Desnoyers  * only be used on buffers that are not concurrently modified.
1054c960e990SMathieu Desnoyers  */
__percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer * buffer,struct percpu_memcpy_buffer_node * item,int cpu)1055c960e990SMathieu Desnoyers bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
1056c960e990SMathieu Desnoyers 				struct percpu_memcpy_buffer_node *item,
1057c960e990SMathieu Desnoyers 				int cpu)
1058c960e990SMathieu Desnoyers {
1059c960e990SMathieu Desnoyers 	intptr_t offset;
1060c960e990SMathieu Desnoyers 
1061c960e990SMathieu Desnoyers 	offset = buffer->c[cpu].offset;
1062c960e990SMathieu Desnoyers 	if (offset == 0)
1063c960e990SMathieu Desnoyers 		return false;
1064c960e990SMathieu Desnoyers 	memcpy(item, &buffer->c[cpu].array[offset - 1], sizeof(*item));
1065c960e990SMathieu Desnoyers 	buffer->c[cpu].offset = offset - 1;
1066c960e990SMathieu Desnoyers 	return true;
1067c960e990SMathieu Desnoyers }
1068c960e990SMathieu Desnoyers 
test_percpu_memcpy_buffer_thread(void * arg)1069c960e990SMathieu Desnoyers void *test_percpu_memcpy_buffer_thread(void *arg)
1070c960e990SMathieu Desnoyers {
1071c960e990SMathieu Desnoyers 	long long i, reps;
1072c960e990SMathieu Desnoyers 	struct percpu_memcpy_buffer *buffer = (struct percpu_memcpy_buffer *)arg;
1073c960e990SMathieu Desnoyers 
1074c960e990SMathieu Desnoyers 	if (!opt_disable_rseq && rseq_register_current_thread())
1075c960e990SMathieu Desnoyers 		abort();
1076c960e990SMathieu Desnoyers 
1077c960e990SMathieu Desnoyers 	reps = opt_reps;
1078c960e990SMathieu Desnoyers 	for (i = 0; i < reps; i++) {
1079c960e990SMathieu Desnoyers 		struct percpu_memcpy_buffer_node item;
1080c960e990SMathieu Desnoyers 		bool result;
1081c960e990SMathieu Desnoyers 
1082c960e990SMathieu Desnoyers 		result = this_cpu_memcpy_buffer_pop(buffer, &item, NULL);
1083c960e990SMathieu Desnoyers 		if (opt_yield)
1084c960e990SMathieu Desnoyers 			sched_yield();  /* encourage shuffling */
1085c960e990SMathieu Desnoyers 		if (result) {
1086c960e990SMathieu Desnoyers 			if (!this_cpu_memcpy_buffer_push(buffer, item, NULL)) {
1087c960e990SMathieu Desnoyers 				/* Should increase buffer size. */
1088c960e990SMathieu Desnoyers 				abort();
1089c960e990SMathieu Desnoyers 			}
1090c960e990SMathieu Desnoyers 		}
1091c960e990SMathieu Desnoyers 	}
1092c960e990SMathieu Desnoyers 
1093c960e990SMathieu Desnoyers 	printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
10948df34c56SMathieu Desnoyers 		       (int) rseq_gettid(), nr_abort, signals_delivered);
1095c960e990SMathieu Desnoyers 	if (!opt_disable_rseq && rseq_unregister_current_thread())
1096c960e990SMathieu Desnoyers 		abort();
1097c960e990SMathieu Desnoyers 
1098c960e990SMathieu Desnoyers 	return NULL;
1099c960e990SMathieu Desnoyers }
1100c960e990SMathieu Desnoyers 
1101c960e990SMathieu Desnoyers /* Simultaneous modification to a per-cpu buffer from many threads.  */
test_percpu_memcpy_buffer(void)1102c960e990SMathieu Desnoyers void test_percpu_memcpy_buffer(void)
1103c960e990SMathieu Desnoyers {
1104c960e990SMathieu Desnoyers 	const int num_threads = opt_threads;
1105c960e990SMathieu Desnoyers 	int i, j, ret;
1106c960e990SMathieu Desnoyers 	uint64_t sum = 0, expected_sum = 0;
1107c960e990SMathieu Desnoyers 	struct percpu_memcpy_buffer buffer;
1108c960e990SMathieu Desnoyers 	pthread_t test_threads[num_threads];
1109c960e990SMathieu Desnoyers 	cpu_set_t allowed_cpus;
1110c960e990SMathieu Desnoyers 
1111c960e990SMathieu Desnoyers 	memset(&buffer, 0, sizeof(buffer));
1112c960e990SMathieu Desnoyers 
1113c960e990SMathieu Desnoyers 	/* Generate list entries for every usable cpu. */
1114c960e990SMathieu Desnoyers 	sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
1115c960e990SMathieu Desnoyers 	for (i = 0; i < CPU_SETSIZE; i++) {
1116c960e990SMathieu Desnoyers 		if (!CPU_ISSET(i, &allowed_cpus))
1117c960e990SMathieu Desnoyers 			continue;
1118c960e990SMathieu Desnoyers 		/* Worse-case is every item in same CPU. */
1119c960e990SMathieu Desnoyers 		buffer.c[i].array =
1120c960e990SMathieu Desnoyers 			malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
1121c960e990SMathieu Desnoyers 			       MEMCPY_BUFFER_ITEM_PER_CPU);
1122c960e990SMathieu Desnoyers 		assert(buffer.c[i].array);
1123c960e990SMathieu Desnoyers 		buffer.c[i].buflen = CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU;
1124c960e990SMathieu Desnoyers 		for (j = 1; j <= MEMCPY_BUFFER_ITEM_PER_CPU; j++) {
1125c960e990SMathieu Desnoyers 			expected_sum += 2 * j + 1;
1126c960e990SMathieu Desnoyers 
1127c960e990SMathieu Desnoyers 			/*
1128c960e990SMathieu Desnoyers 			 * We could theoretically put the word-sized
1129c960e990SMathieu Desnoyers 			 * "data" directly in the buffer. However, we
1130c960e990SMathieu Desnoyers 			 * want to model objects that would not fit
1131c960e990SMathieu Desnoyers 			 * within a single word, so allocate an object
1132c960e990SMathieu Desnoyers 			 * for each node.
1133c960e990SMathieu Desnoyers 			 */
1134c960e990SMathieu Desnoyers 			buffer.c[i].array[j - 1].data1 = j;
1135c960e990SMathieu Desnoyers 			buffer.c[i].array[j - 1].data2 = j + 1;
1136c960e990SMathieu Desnoyers 			buffer.c[i].offset++;
1137c960e990SMathieu Desnoyers 		}
1138c960e990SMathieu Desnoyers 	}
1139c960e990SMathieu Desnoyers 
1140c960e990SMathieu Desnoyers 	for (i = 0; i < num_threads; i++) {
1141c960e990SMathieu Desnoyers 		ret = pthread_create(&test_threads[i], NULL,
1142c960e990SMathieu Desnoyers 				     test_percpu_memcpy_buffer_thread,
1143c960e990SMathieu Desnoyers 				     &buffer);
1144c960e990SMathieu Desnoyers 		if (ret) {
1145c960e990SMathieu Desnoyers 			errno = ret;
1146c960e990SMathieu Desnoyers 			perror("pthread_create");
1147c960e990SMathieu Desnoyers 			abort();
1148c960e990SMathieu Desnoyers 		}
1149c960e990SMathieu Desnoyers 	}
1150c960e990SMathieu Desnoyers 
1151c960e990SMathieu Desnoyers 	for (i = 0; i < num_threads; i++) {
1152c960e990SMathieu Desnoyers 		ret = pthread_join(test_threads[i], NULL);
1153c960e990SMathieu Desnoyers 		if (ret) {
1154c960e990SMathieu Desnoyers 			errno = ret;
1155c960e990SMathieu Desnoyers 			perror("pthread_join");
1156c960e990SMathieu Desnoyers 			abort();
1157c960e990SMathieu Desnoyers 		}
1158c960e990SMathieu Desnoyers 	}
1159c960e990SMathieu Desnoyers 
1160c960e990SMathieu Desnoyers 	for (i = 0; i < CPU_SETSIZE; i++) {
1161c960e990SMathieu Desnoyers 		struct percpu_memcpy_buffer_node item;
1162c960e990SMathieu Desnoyers 
1163c960e990SMathieu Desnoyers 		if (!CPU_ISSET(i, &allowed_cpus))
1164c960e990SMathieu Desnoyers 			continue;
1165c960e990SMathieu Desnoyers 
1166c960e990SMathieu Desnoyers 		while (__percpu_memcpy_buffer_pop(&buffer, &item, i)) {
1167c960e990SMathieu Desnoyers 			sum += item.data1;
1168c960e990SMathieu Desnoyers 			sum += item.data2;
1169c960e990SMathieu Desnoyers 		}
1170c960e990SMathieu Desnoyers 		free(buffer.c[i].array);
1171c960e990SMathieu Desnoyers 	}
1172c960e990SMathieu Desnoyers 
1173c960e990SMathieu Desnoyers 	/*
1174c960e990SMathieu Desnoyers 	 * All entries should now be accounted for (unless some external
1175c960e990SMathieu Desnoyers 	 * actor is interfering with our allowed affinity while this
1176c960e990SMathieu Desnoyers 	 * test is running).
1177c960e990SMathieu Desnoyers 	 */
1178c960e990SMathieu Desnoyers 	assert(sum == expected_sum);
1179c960e990SMathieu Desnoyers }
1180c960e990SMathieu Desnoyers 
test_signal_interrupt_handler(int signo)1181c960e990SMathieu Desnoyers static void test_signal_interrupt_handler(int signo)
1182c960e990SMathieu Desnoyers {
1183c960e990SMathieu Desnoyers 	signals_delivered++;
1184c960e990SMathieu Desnoyers }
1185c960e990SMathieu Desnoyers 
set_signal_handler(void)1186c960e990SMathieu Desnoyers static int set_signal_handler(void)
1187c960e990SMathieu Desnoyers {
1188c960e990SMathieu Desnoyers 	int ret = 0;
1189c960e990SMathieu Desnoyers 	struct sigaction sa;
1190c960e990SMathieu Desnoyers 	sigset_t sigset;
1191c960e990SMathieu Desnoyers 
1192c960e990SMathieu Desnoyers 	ret = sigemptyset(&sigset);
1193c960e990SMathieu Desnoyers 	if (ret < 0) {
1194c960e990SMathieu Desnoyers 		perror("sigemptyset");
1195c960e990SMathieu Desnoyers 		return ret;
1196c960e990SMathieu Desnoyers 	}
1197c960e990SMathieu Desnoyers 
1198c960e990SMathieu Desnoyers 	sa.sa_handler = test_signal_interrupt_handler;
1199c960e990SMathieu Desnoyers 	sa.sa_mask = sigset;
1200c960e990SMathieu Desnoyers 	sa.sa_flags = 0;
1201c960e990SMathieu Desnoyers 	ret = sigaction(SIGUSR1, &sa, NULL);
1202c960e990SMathieu Desnoyers 	if (ret < 0) {
1203c960e990SMathieu Desnoyers 		perror("sigaction");
1204c960e990SMathieu Desnoyers 		return ret;
1205c960e990SMathieu Desnoyers 	}
1206c960e990SMathieu Desnoyers 
1207c960e990SMathieu Desnoyers 	printf_verbose("Signal handler set for SIGUSR1\n");
1208c960e990SMathieu Desnoyers 
1209c960e990SMathieu Desnoyers 	return ret;
1210c960e990SMathieu Desnoyers }
1211c960e990SMathieu Desnoyers 
12126f39cecdSXingxing Su /* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */
1213ee31fff0SMathieu Desnoyers #ifdef TEST_MEMBARRIER
1214f166b111SPeter Oskolkov struct test_membarrier_thread_args {
1215f166b111SPeter Oskolkov 	int stop;
1216f166b111SPeter Oskolkov 	intptr_t percpu_list_ptr;
1217f166b111SPeter Oskolkov };
1218f166b111SPeter Oskolkov 
1219f166b111SPeter Oskolkov /* Worker threads modify data in their "active" percpu lists. */
test_membarrier_worker_thread(void * arg)1220f166b111SPeter Oskolkov void *test_membarrier_worker_thread(void *arg)
1221f166b111SPeter Oskolkov {
1222f166b111SPeter Oskolkov 	struct test_membarrier_thread_args *args =
1223f166b111SPeter Oskolkov 		(struct test_membarrier_thread_args *)arg;
1224f166b111SPeter Oskolkov 	const int iters = opt_reps;
1225f166b111SPeter Oskolkov 	int i;
1226f166b111SPeter Oskolkov 
1227f166b111SPeter Oskolkov 	if (rseq_register_current_thread()) {
1228f166b111SPeter Oskolkov 		fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1229f166b111SPeter Oskolkov 			errno, strerror(errno));
1230f166b111SPeter Oskolkov 		abort();
1231f166b111SPeter Oskolkov 	}
1232f166b111SPeter Oskolkov 
1233f166b111SPeter Oskolkov 	/* Wait for initialization. */
1234f166b111SPeter Oskolkov 	while (!atomic_load(&args->percpu_list_ptr)) {}
1235f166b111SPeter Oskolkov 
1236f166b111SPeter Oskolkov 	for (i = 0; i < iters; ++i) {
1237f166b111SPeter Oskolkov 		int ret;
1238f166b111SPeter Oskolkov 
1239f166b111SPeter Oskolkov 		do {
1240ee31fff0SMathieu Desnoyers 			int cpu = get_current_cpu_id();
1241f166b111SPeter Oskolkov 
1242ee31fff0SMathieu Desnoyers 			ret = rseq_offset_deref_addv(RSEQ_MO_RELAXED, RSEQ_PERCPU,
1243ee31fff0SMathieu Desnoyers 				&args->percpu_list_ptr,
1244f166b111SPeter Oskolkov 				sizeof(struct percpu_list_entry) * cpu, 1, cpu);
1245f166b111SPeter Oskolkov 		} while (rseq_unlikely(ret));
1246f166b111SPeter Oskolkov 	}
1247f166b111SPeter Oskolkov 
1248f166b111SPeter Oskolkov 	if (rseq_unregister_current_thread()) {
1249f166b111SPeter Oskolkov 		fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1250f166b111SPeter Oskolkov 			errno, strerror(errno));
1251f166b111SPeter Oskolkov 		abort();
1252f166b111SPeter Oskolkov 	}
1253f166b111SPeter Oskolkov 	return NULL;
1254f166b111SPeter Oskolkov }
1255f166b111SPeter Oskolkov 
test_membarrier_init_percpu_list(struct percpu_list * list)1256f166b111SPeter Oskolkov void test_membarrier_init_percpu_list(struct percpu_list *list)
1257f166b111SPeter Oskolkov {
1258f166b111SPeter Oskolkov 	int i;
1259f166b111SPeter Oskolkov 
1260f166b111SPeter Oskolkov 	memset(list, 0, sizeof(*list));
1261f166b111SPeter Oskolkov 	for (i = 0; i < CPU_SETSIZE; i++) {
1262f166b111SPeter Oskolkov 		struct percpu_list_node *node;
1263f166b111SPeter Oskolkov 
1264f166b111SPeter Oskolkov 		node = malloc(sizeof(*node));
1265f166b111SPeter Oskolkov 		assert(node);
1266f166b111SPeter Oskolkov 		node->data = 0;
1267f166b111SPeter Oskolkov 		node->next = NULL;
1268f166b111SPeter Oskolkov 		list->c[i].head = node;
1269f166b111SPeter Oskolkov 	}
1270f166b111SPeter Oskolkov }
1271f166b111SPeter Oskolkov 
test_membarrier_free_percpu_list(struct percpu_list * list)1272f166b111SPeter Oskolkov void test_membarrier_free_percpu_list(struct percpu_list *list)
1273f166b111SPeter Oskolkov {
1274f166b111SPeter Oskolkov 	int i;
1275f166b111SPeter Oskolkov 
1276f166b111SPeter Oskolkov 	for (i = 0; i < CPU_SETSIZE; i++)
1277f166b111SPeter Oskolkov 		free(list->c[i].head);
1278f166b111SPeter Oskolkov }
1279f166b111SPeter Oskolkov 
1280f166b111SPeter Oskolkov /*
1281f166b111SPeter Oskolkov  * The manager thread swaps per-cpu lists that worker threads see,
1282f166b111SPeter Oskolkov  * and validates that there are no unexpected modifications.
1283f166b111SPeter Oskolkov  */
test_membarrier_manager_thread(void * arg)1284f166b111SPeter Oskolkov void *test_membarrier_manager_thread(void *arg)
1285f166b111SPeter Oskolkov {
1286f166b111SPeter Oskolkov 	struct test_membarrier_thread_args *args =
1287f166b111SPeter Oskolkov 		(struct test_membarrier_thread_args *)arg;
1288f166b111SPeter Oskolkov 	struct percpu_list list_a, list_b;
1289f166b111SPeter Oskolkov 	intptr_t expect_a = 0, expect_b = 0;
1290f166b111SPeter Oskolkov 	int cpu_a = 0, cpu_b = 0;
1291f166b111SPeter Oskolkov 
1292f166b111SPeter Oskolkov 	if (rseq_register_current_thread()) {
1293f166b111SPeter Oskolkov 		fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1294f166b111SPeter Oskolkov 			errno, strerror(errno));
1295f166b111SPeter Oskolkov 		abort();
1296f166b111SPeter Oskolkov 	}
1297f166b111SPeter Oskolkov 
1298f166b111SPeter Oskolkov 	/* Init lists. */
1299f166b111SPeter Oskolkov 	test_membarrier_init_percpu_list(&list_a);
1300f166b111SPeter Oskolkov 	test_membarrier_init_percpu_list(&list_b);
1301f166b111SPeter Oskolkov 
1302f166b111SPeter Oskolkov 	atomic_store(&args->percpu_list_ptr, (intptr_t)&list_a);
1303f166b111SPeter Oskolkov 
1304f166b111SPeter Oskolkov 	while (!atomic_load(&args->stop)) {
1305f166b111SPeter Oskolkov 		/* list_a is "active". */
1306f166b111SPeter Oskolkov 		cpu_a = rand() % CPU_SETSIZE;
1307f166b111SPeter Oskolkov 		/*
1308f166b111SPeter Oskolkov 		 * As list_b is "inactive", we should never see changes
1309f166b111SPeter Oskolkov 		 * to list_b.
1310f166b111SPeter Oskolkov 		 */
1311f166b111SPeter Oskolkov 		if (expect_b != atomic_load(&list_b.c[cpu_b].head->data)) {
1312f166b111SPeter Oskolkov 			fprintf(stderr, "Membarrier test failed\n");
1313f166b111SPeter Oskolkov 			abort();
1314f166b111SPeter Oskolkov 		}
1315f166b111SPeter Oskolkov 
1316f166b111SPeter Oskolkov 		/* Make list_b "active". */
1317f166b111SPeter Oskolkov 		atomic_store(&args->percpu_list_ptr, (intptr_t)&list_b);
1318ee31fff0SMathieu Desnoyers 		if (rseq_membarrier_expedited(cpu_a) &&
1319f166b111SPeter Oskolkov 				errno != ENXIO /* missing CPU */) {
1320f166b111SPeter Oskolkov 			perror("sys_membarrier");
1321f166b111SPeter Oskolkov 			abort();
1322f166b111SPeter Oskolkov 		}
1323f166b111SPeter Oskolkov 		/*
1324f166b111SPeter Oskolkov 		 * Cpu A should now only modify list_b, so the values
1325f166b111SPeter Oskolkov 		 * in list_a should be stable.
1326f166b111SPeter Oskolkov 		 */
1327f166b111SPeter Oskolkov 		expect_a = atomic_load(&list_a.c[cpu_a].head->data);
1328f166b111SPeter Oskolkov 
1329f166b111SPeter Oskolkov 		cpu_b = rand() % CPU_SETSIZE;
1330f166b111SPeter Oskolkov 		/*
1331f166b111SPeter Oskolkov 		 * As list_a is "inactive", we should never see changes
1332f166b111SPeter Oskolkov 		 * to list_a.
1333f166b111SPeter Oskolkov 		 */
1334f166b111SPeter Oskolkov 		if (expect_a != atomic_load(&list_a.c[cpu_a].head->data)) {
1335f166b111SPeter Oskolkov 			fprintf(stderr, "Membarrier test failed\n");
1336f166b111SPeter Oskolkov 			abort();
1337f166b111SPeter Oskolkov 		}
1338f166b111SPeter Oskolkov 
1339f166b111SPeter Oskolkov 		/* Make list_a "active". */
1340f166b111SPeter Oskolkov 		atomic_store(&args->percpu_list_ptr, (intptr_t)&list_a);
1341ee31fff0SMathieu Desnoyers 		if (rseq_membarrier_expedited(cpu_b) &&
1342f166b111SPeter Oskolkov 				errno != ENXIO /* missing CPU*/) {
1343f166b111SPeter Oskolkov 			perror("sys_membarrier");
1344f166b111SPeter Oskolkov 			abort();
1345f166b111SPeter Oskolkov 		}
1346f166b111SPeter Oskolkov 		/* Remember a value from list_b. */
1347f166b111SPeter Oskolkov 		expect_b = atomic_load(&list_b.c[cpu_b].head->data);
1348f166b111SPeter Oskolkov 	}
1349f166b111SPeter Oskolkov 
1350f166b111SPeter Oskolkov 	test_membarrier_free_percpu_list(&list_a);
1351f166b111SPeter Oskolkov 	test_membarrier_free_percpu_list(&list_b);
1352f166b111SPeter Oskolkov 
1353f166b111SPeter Oskolkov 	if (rseq_unregister_current_thread()) {
1354f166b111SPeter Oskolkov 		fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1355f166b111SPeter Oskolkov 			errno, strerror(errno));
1356f166b111SPeter Oskolkov 		abort();
1357f166b111SPeter Oskolkov 	}
1358f166b111SPeter Oskolkov 	return NULL;
1359f166b111SPeter Oskolkov }
1360f166b111SPeter Oskolkov 
test_membarrier(void)1361f166b111SPeter Oskolkov void test_membarrier(void)
1362f166b111SPeter Oskolkov {
1363f166b111SPeter Oskolkov 	const int num_threads = opt_threads;
1364f166b111SPeter Oskolkov 	struct test_membarrier_thread_args thread_args;
1365f166b111SPeter Oskolkov 	pthread_t worker_threads[num_threads];
1366f166b111SPeter Oskolkov 	pthread_t manager_thread;
1367f166b111SPeter Oskolkov 	int i, ret;
1368f166b111SPeter Oskolkov 
1369f166b111SPeter Oskolkov 	if (sys_membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, 0, 0)) {
1370f166b111SPeter Oskolkov 		perror("sys_membarrier");
1371f166b111SPeter Oskolkov 		abort();
1372f166b111SPeter Oskolkov 	}
1373f166b111SPeter Oskolkov 
1374f166b111SPeter Oskolkov 	thread_args.stop = 0;
1375f166b111SPeter Oskolkov 	thread_args.percpu_list_ptr = 0;
1376f166b111SPeter Oskolkov 	ret = pthread_create(&manager_thread, NULL,
1377f166b111SPeter Oskolkov 			test_membarrier_manager_thread, &thread_args);
1378f166b111SPeter Oskolkov 	if (ret) {
1379f166b111SPeter Oskolkov 		errno = ret;
1380f166b111SPeter Oskolkov 		perror("pthread_create");
1381f166b111SPeter Oskolkov 		abort();
1382f166b111SPeter Oskolkov 	}
1383f166b111SPeter Oskolkov 
1384f166b111SPeter Oskolkov 	for (i = 0; i < num_threads; i++) {
1385f166b111SPeter Oskolkov 		ret = pthread_create(&worker_threads[i], NULL,
1386f166b111SPeter Oskolkov 				test_membarrier_worker_thread, &thread_args);
1387f166b111SPeter Oskolkov 		if (ret) {
1388f166b111SPeter Oskolkov 			errno = ret;
1389f166b111SPeter Oskolkov 			perror("pthread_create");
1390f166b111SPeter Oskolkov 			abort();
1391f166b111SPeter Oskolkov 		}
1392f166b111SPeter Oskolkov 	}
1393f166b111SPeter Oskolkov 
1394f166b111SPeter Oskolkov 
1395f166b111SPeter Oskolkov 	for (i = 0; i < num_threads; i++) {
1396f166b111SPeter Oskolkov 		ret = pthread_join(worker_threads[i], NULL);
1397f166b111SPeter Oskolkov 		if (ret) {
1398f166b111SPeter Oskolkov 			errno = ret;
1399f166b111SPeter Oskolkov 			perror("pthread_join");
1400f166b111SPeter Oskolkov 			abort();
1401f166b111SPeter Oskolkov 		}
1402f166b111SPeter Oskolkov 	}
1403f166b111SPeter Oskolkov 
1404f166b111SPeter Oskolkov 	atomic_store(&thread_args.stop, 1);
1405f166b111SPeter Oskolkov 	ret = pthread_join(manager_thread, NULL);
1406f166b111SPeter Oskolkov 	if (ret) {
1407f166b111SPeter Oskolkov 		errno = ret;
1408f166b111SPeter Oskolkov 		perror("pthread_join");
1409f166b111SPeter Oskolkov 		abort();
1410f166b111SPeter Oskolkov 	}
1411f166b111SPeter Oskolkov }
1412ee31fff0SMathieu Desnoyers #else /* TEST_MEMBARRIER */
test_membarrier(void)1413f166b111SPeter Oskolkov void test_membarrier(void)
1414f166b111SPeter Oskolkov {
1415f166b111SPeter Oskolkov 	fprintf(stderr, "rseq_offset_deref_addv is not implemented on this architecture. "
1416f166b111SPeter Oskolkov 			"Skipping membarrier test.\n");
1417f166b111SPeter Oskolkov }
1418f166b111SPeter Oskolkov #endif
1419f166b111SPeter Oskolkov 
show_usage(int argc,char ** argv)1420c960e990SMathieu Desnoyers static void show_usage(int argc, char **argv)
1421c960e990SMathieu Desnoyers {
1422c960e990SMathieu Desnoyers 	printf("Usage : %s <OPTIONS>\n",
1423c960e990SMathieu Desnoyers 		argv[0]);
1424c960e990SMathieu Desnoyers 	printf("OPTIONS:\n");
1425c960e990SMathieu Desnoyers 	printf("	[-1 loops] Number of loops for delay injection 1\n");
1426c960e990SMathieu Desnoyers 	printf("	[-2 loops] Number of loops for delay injection 2\n");
1427c960e990SMathieu Desnoyers 	printf("	[-3 loops] Number of loops for delay injection 3\n");
1428c960e990SMathieu Desnoyers 	printf("	[-4 loops] Number of loops for delay injection 4\n");
1429c960e990SMathieu Desnoyers 	printf("	[-5 loops] Number of loops for delay injection 5\n");
1430c960e990SMathieu Desnoyers 	printf("	[-6 loops] Number of loops for delay injection 6\n");
1431c960e990SMathieu Desnoyers 	printf("	[-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n");
1432c960e990SMathieu Desnoyers 	printf("	[-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n");
1433c960e990SMathieu Desnoyers 	printf("	[-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n");
1434c960e990SMathieu Desnoyers 	printf("	[-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n");
1435c960e990SMathieu Desnoyers 	printf("	[-y] Yield\n");
1436c960e990SMathieu Desnoyers 	printf("	[-k] Kill thread with signal\n");
1437c960e990SMathieu Desnoyers 	printf("	[-s S] S: =0: disabled (default), >0: sleep time (ms)\n");
1438c960e990SMathieu Desnoyers 	printf("	[-t N] Number of threads (default 200)\n");
1439c960e990SMathieu Desnoyers 	printf("	[-r N] Number of repetitions per thread (default 5000)\n");
1440c960e990SMathieu Desnoyers 	printf("	[-d] Disable rseq system call (no initialization)\n");
1441c960e990SMathieu Desnoyers 	printf("	[-D M] Disable rseq for each M threads\n");
1442f166b111SPeter Oskolkov 	printf("	[-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement, membarrie(r)\n");
1443c960e990SMathieu Desnoyers 	printf("	[-M] Push into buffer and memcpy buffer with memory barriers.\n");
1444c960e990SMathieu Desnoyers 	printf("	[-v] Verbose output.\n");
1445c960e990SMathieu Desnoyers 	printf("	[-h] Show this help.\n");
1446c960e990SMathieu Desnoyers 	printf("\n");
1447c960e990SMathieu Desnoyers }
1448c960e990SMathieu Desnoyers 
main(int argc,char ** argv)1449c960e990SMathieu Desnoyers int main(int argc, char **argv)
1450c960e990SMathieu Desnoyers {
1451c960e990SMathieu Desnoyers 	int i;
1452c960e990SMathieu Desnoyers 
1453c960e990SMathieu Desnoyers 	for (i = 1; i < argc; i++) {
1454c960e990SMathieu Desnoyers 		if (argv[i][0] != '-')
1455c960e990SMathieu Desnoyers 			continue;
1456c960e990SMathieu Desnoyers 		switch (argv[i][1]) {
1457c960e990SMathieu Desnoyers 		case '1':
1458c960e990SMathieu Desnoyers 		case '2':
1459c960e990SMathieu Desnoyers 		case '3':
1460c960e990SMathieu Desnoyers 		case '4':
1461c960e990SMathieu Desnoyers 		case '5':
1462c960e990SMathieu Desnoyers 		case '6':
1463c960e990SMathieu Desnoyers 		case '7':
1464c960e990SMathieu Desnoyers 		case '8':
1465c960e990SMathieu Desnoyers 		case '9':
1466c960e990SMathieu Desnoyers 			if (argc < i + 2) {
1467c960e990SMathieu Desnoyers 				show_usage(argc, argv);
1468c960e990SMathieu Desnoyers 				goto error;
1469c960e990SMathieu Desnoyers 			}
1470c960e990SMathieu Desnoyers 			loop_cnt[argv[i][1] - '0'] = atol(argv[i + 1]);
1471c960e990SMathieu Desnoyers 			i++;
1472c960e990SMathieu Desnoyers 			break;
1473c960e990SMathieu Desnoyers 		case 'm':
1474c960e990SMathieu Desnoyers 			if (argc < i + 2) {
1475c960e990SMathieu Desnoyers 				show_usage(argc, argv);
1476c960e990SMathieu Desnoyers 				goto error;
1477c960e990SMathieu Desnoyers 			}
1478c960e990SMathieu Desnoyers 			opt_modulo = atol(argv[i + 1]);
1479c960e990SMathieu Desnoyers 			if (opt_modulo < 0) {
1480c960e990SMathieu Desnoyers 				show_usage(argc, argv);
1481c960e990SMathieu Desnoyers 				goto error;
1482c960e990SMathieu Desnoyers 			}
1483c960e990SMathieu Desnoyers 			i++;
1484c960e990SMathieu Desnoyers 			break;
1485c960e990SMathieu Desnoyers 		case 's':
1486c960e990SMathieu Desnoyers 			if (argc < i + 2) {
1487c960e990SMathieu Desnoyers 				show_usage(argc, argv);
1488c960e990SMathieu Desnoyers 				goto error;
1489c960e990SMathieu Desnoyers 			}
1490c960e990SMathieu Desnoyers 			opt_sleep = atol(argv[i + 1]);
1491c960e990SMathieu Desnoyers 			if (opt_sleep < 0) {
1492c960e990SMathieu Desnoyers 				show_usage(argc, argv);
1493c960e990SMathieu Desnoyers 				goto error;
1494c960e990SMathieu Desnoyers 			}
1495c960e990SMathieu Desnoyers 			i++;
1496c960e990SMathieu Desnoyers 			break;
1497c960e990SMathieu Desnoyers 		case 'y':
1498c960e990SMathieu Desnoyers 			opt_yield = 1;
1499c960e990SMathieu Desnoyers 			break;
1500c960e990SMathieu Desnoyers 		case 'k':
1501c960e990SMathieu Desnoyers 			opt_signal = 1;
1502c960e990SMathieu Desnoyers 			break;
1503c960e990SMathieu Desnoyers 		case 'd':
1504c960e990SMathieu Desnoyers 			opt_disable_rseq = 1;
1505c960e990SMathieu Desnoyers 			break;
1506c960e990SMathieu Desnoyers 		case 'D':
1507c960e990SMathieu Desnoyers 			if (argc < i + 2) {
1508c960e990SMathieu Desnoyers 				show_usage(argc, argv);
1509c960e990SMathieu Desnoyers 				goto error;
1510c960e990SMathieu Desnoyers 			}
1511c960e990SMathieu Desnoyers 			opt_disable_mod = atol(argv[i + 1]);
1512c960e990SMathieu Desnoyers 			if (opt_disable_mod < 0) {
1513c960e990SMathieu Desnoyers 				show_usage(argc, argv);
1514c960e990SMathieu Desnoyers 				goto error;
1515c960e990SMathieu Desnoyers 			}
1516c960e990SMathieu Desnoyers 			i++;
1517c960e990SMathieu Desnoyers 			break;
1518c960e990SMathieu Desnoyers 		case 't':
1519c960e990SMathieu Desnoyers 			if (argc < i + 2) {
1520c960e990SMathieu Desnoyers 				show_usage(argc, argv);
1521c960e990SMathieu Desnoyers 				goto error;
1522c960e990SMathieu Desnoyers 			}
1523c960e990SMathieu Desnoyers 			opt_threads = atol(argv[i + 1]);
1524c960e990SMathieu Desnoyers 			if (opt_threads < 0) {
1525c960e990SMathieu Desnoyers 				show_usage(argc, argv);
1526c960e990SMathieu Desnoyers 				goto error;
1527c960e990SMathieu Desnoyers 			}
1528c960e990SMathieu Desnoyers 			i++;
1529c960e990SMathieu Desnoyers 			break;
1530c960e990SMathieu Desnoyers 		case 'r':
1531c960e990SMathieu Desnoyers 			if (argc < i + 2) {
1532c960e990SMathieu Desnoyers 				show_usage(argc, argv);
1533c960e990SMathieu Desnoyers 				goto error;
1534c960e990SMathieu Desnoyers 			}
1535c960e990SMathieu Desnoyers 			opt_reps = atoll(argv[i + 1]);
1536c960e990SMathieu Desnoyers 			if (opt_reps < 0) {
1537c960e990SMathieu Desnoyers 				show_usage(argc, argv);
1538c960e990SMathieu Desnoyers 				goto error;
1539c960e990SMathieu Desnoyers 			}
1540c960e990SMathieu Desnoyers 			i++;
1541c960e990SMathieu Desnoyers 			break;
1542c960e990SMathieu Desnoyers 		case 'h':
1543c960e990SMathieu Desnoyers 			show_usage(argc, argv);
1544c960e990SMathieu Desnoyers 			goto end;
1545c960e990SMathieu Desnoyers 		case 'T':
1546c960e990SMathieu Desnoyers 			if (argc < i + 2) {
1547c960e990SMathieu Desnoyers 				show_usage(argc, argv);
1548c960e990SMathieu Desnoyers 				goto error;
1549c960e990SMathieu Desnoyers 			}
1550c960e990SMathieu Desnoyers 			opt_test = *argv[i + 1];
1551c960e990SMathieu Desnoyers 			switch (opt_test) {
1552c960e990SMathieu Desnoyers 			case 's':
1553c960e990SMathieu Desnoyers 			case 'l':
1554c960e990SMathieu Desnoyers 			case 'i':
1555c960e990SMathieu Desnoyers 			case 'b':
1556c960e990SMathieu Desnoyers 			case 'm':
1557f166b111SPeter Oskolkov 			case 'r':
1558c960e990SMathieu Desnoyers 				break;
1559c960e990SMathieu Desnoyers 			default:
1560c960e990SMathieu Desnoyers 				show_usage(argc, argv);
1561c960e990SMathieu Desnoyers 				goto error;
1562c960e990SMathieu Desnoyers 			}
1563c960e990SMathieu Desnoyers 			i++;
1564c960e990SMathieu Desnoyers 			break;
1565c960e990SMathieu Desnoyers 		case 'v':
1566c960e990SMathieu Desnoyers 			verbose = 1;
1567c960e990SMathieu Desnoyers 			break;
1568c960e990SMathieu Desnoyers 		case 'M':
1569ee31fff0SMathieu Desnoyers 			opt_mo = RSEQ_MO_RELEASE;
1570c960e990SMathieu Desnoyers 			break;
1571c960e990SMathieu Desnoyers 		default:
1572c960e990SMathieu Desnoyers 			show_usage(argc, argv);
1573c960e990SMathieu Desnoyers 			goto error;
1574c960e990SMathieu Desnoyers 		}
1575c960e990SMathieu Desnoyers 	}
1576c960e990SMathieu Desnoyers 
1577c960e990SMathieu Desnoyers 	loop_cnt_1 = loop_cnt[1];
1578c960e990SMathieu Desnoyers 	loop_cnt_2 = loop_cnt[2];
1579c960e990SMathieu Desnoyers 	loop_cnt_3 = loop_cnt[3];
1580c960e990SMathieu Desnoyers 	loop_cnt_4 = loop_cnt[4];
1581c960e990SMathieu Desnoyers 	loop_cnt_5 = loop_cnt[5];
1582c960e990SMathieu Desnoyers 	loop_cnt_6 = loop_cnt[6];
1583c960e990SMathieu Desnoyers 
1584c960e990SMathieu Desnoyers 	if (set_signal_handler())
1585c960e990SMathieu Desnoyers 		goto error;
1586c960e990SMathieu Desnoyers 
1587c960e990SMathieu Desnoyers 	if (!opt_disable_rseq && rseq_register_current_thread())
1588c960e990SMathieu Desnoyers 		goto error;
1589ee31fff0SMathieu Desnoyers 	if (!opt_disable_rseq && !rseq_validate_cpu_id()) {
1590ee31fff0SMathieu Desnoyers 		fprintf(stderr, "Error: cpu id getter unavailable\n");
1591ee31fff0SMathieu Desnoyers 		goto error;
1592ee31fff0SMathieu Desnoyers 	}
1593c960e990SMathieu Desnoyers 	switch (opt_test) {
1594c960e990SMathieu Desnoyers 	case 's':
1595c960e990SMathieu Desnoyers 		printf_verbose("spinlock\n");
1596c960e990SMathieu Desnoyers 		test_percpu_spinlock();
1597c960e990SMathieu Desnoyers 		break;
1598c960e990SMathieu Desnoyers 	case 'l':
1599c960e990SMathieu Desnoyers 		printf_verbose("linked list\n");
1600c960e990SMathieu Desnoyers 		test_percpu_list();
1601c960e990SMathieu Desnoyers 		break;
1602c960e990SMathieu Desnoyers 	case 'b':
1603c960e990SMathieu Desnoyers 		printf_verbose("buffer\n");
1604c960e990SMathieu Desnoyers 		test_percpu_buffer();
1605c960e990SMathieu Desnoyers 		break;
1606c960e990SMathieu Desnoyers 	case 'm':
1607c960e990SMathieu Desnoyers 		printf_verbose("memcpy buffer\n");
1608c960e990SMathieu Desnoyers 		test_percpu_memcpy_buffer();
1609c960e990SMathieu Desnoyers 		break;
1610c960e990SMathieu Desnoyers 	case 'i':
1611c960e990SMathieu Desnoyers 		printf_verbose("counter increment\n");
1612c960e990SMathieu Desnoyers 		test_percpu_inc();
1613c960e990SMathieu Desnoyers 		break;
1614f166b111SPeter Oskolkov 	case 'r':
1615f166b111SPeter Oskolkov 		printf_verbose("membarrier\n");
1616f166b111SPeter Oskolkov 		test_membarrier();
1617f166b111SPeter Oskolkov 		break;
1618c960e990SMathieu Desnoyers 	}
1619c960e990SMathieu Desnoyers 	if (!opt_disable_rseq && rseq_unregister_current_thread())
1620c960e990SMathieu Desnoyers 		abort();
1621c960e990SMathieu Desnoyers end:
1622c960e990SMathieu Desnoyers 	return 0;
1623c960e990SMathieu Desnoyers 
1624c960e990SMathieu Desnoyers error:
1625c960e990SMathieu Desnoyers 	return -1;
1626c960e990SMathieu Desnoyers }
1627