1c960e990SMathieu Desnoyers // SPDX-License-Identifier: LGPL-2.1
2c960e990SMathieu Desnoyers #define _GNU_SOURCE
3c960e990SMathieu Desnoyers #include <assert.h>
4f166b111SPeter Oskolkov #include <linux/membarrier.h>
5c960e990SMathieu Desnoyers #include <pthread.h>
6c960e990SMathieu Desnoyers #include <sched.h>
7f166b111SPeter Oskolkov #include <stdatomic.h>
8c960e990SMathieu Desnoyers #include <stdint.h>
9c960e990SMathieu Desnoyers #include <stdio.h>
10c960e990SMathieu Desnoyers #include <stdlib.h>
11c960e990SMathieu Desnoyers #include <string.h>
12c960e990SMathieu Desnoyers #include <syscall.h>
13c960e990SMathieu Desnoyers #include <unistd.h>
14c960e990SMathieu Desnoyers #include <poll.h>
15c960e990SMathieu Desnoyers #include <sys/types.h>
16c960e990SMathieu Desnoyers #include <signal.h>
17c960e990SMathieu Desnoyers #include <errno.h>
18c960e990SMathieu Desnoyers #include <stddef.h>
19ee31fff0SMathieu Desnoyers #include <stdbool.h>
20c960e990SMathieu Desnoyers
rseq_gettid(void)218df34c56SMathieu Desnoyers static inline pid_t rseq_gettid(void)
22c960e990SMathieu Desnoyers {
23c960e990SMathieu Desnoyers return syscall(__NR_gettid);
24c960e990SMathieu Desnoyers }
25c960e990SMathieu Desnoyers
26c960e990SMathieu Desnoyers #define NR_INJECT 9
27c960e990SMathieu Desnoyers static int loop_cnt[NR_INJECT + 1];
28c960e990SMathieu Desnoyers
29c960e990SMathieu Desnoyers static int loop_cnt_1 asm("asm_loop_cnt_1") __attribute__((used));
30c960e990SMathieu Desnoyers static int loop_cnt_2 asm("asm_loop_cnt_2") __attribute__((used));
31c960e990SMathieu Desnoyers static int loop_cnt_3 asm("asm_loop_cnt_3") __attribute__((used));
32c960e990SMathieu Desnoyers static int loop_cnt_4 asm("asm_loop_cnt_4") __attribute__((used));
33c960e990SMathieu Desnoyers static int loop_cnt_5 asm("asm_loop_cnt_5") __attribute__((used));
34c960e990SMathieu Desnoyers static int loop_cnt_6 asm("asm_loop_cnt_6") __attribute__((used));
35c960e990SMathieu Desnoyers
36c960e990SMathieu Desnoyers static int opt_modulo, verbose;
37c960e990SMathieu Desnoyers
38c960e990SMathieu Desnoyers static int opt_yield, opt_signal, opt_sleep,
39c960e990SMathieu Desnoyers opt_disable_rseq, opt_threads = 200,
40ee31fff0SMathieu Desnoyers opt_disable_mod = 0, opt_test = 's';
41c960e990SMathieu Desnoyers
42c960e990SMathieu Desnoyers static long long opt_reps = 5000;
43c960e990SMathieu Desnoyers
44c960e990SMathieu Desnoyers static __thread __attribute__((tls_model("initial-exec")))
45c960e990SMathieu Desnoyers unsigned int signals_delivered;
46c960e990SMathieu Desnoyers
47c960e990SMathieu Desnoyers #ifndef BENCHMARK
48c960e990SMathieu Desnoyers
49c960e990SMathieu Desnoyers static __thread __attribute__((tls_model("initial-exec"), unused))
50c960e990SMathieu Desnoyers unsigned int yield_mod_cnt, nr_abort;
51c960e990SMathieu Desnoyers
52c960e990SMathieu Desnoyers #define printf_verbose(fmt, ...) \
53c960e990SMathieu Desnoyers do { \
54c960e990SMathieu Desnoyers if (verbose) \
55c960e990SMathieu Desnoyers printf(fmt, ## __VA_ARGS__); \
56c960e990SMathieu Desnoyers } while (0)
57c960e990SMathieu Desnoyers
58ce01a157SMathieu Desnoyers #ifdef __i386__
59c960e990SMathieu Desnoyers
60c960e990SMathieu Desnoyers #define INJECT_ASM_REG "eax"
61c960e990SMathieu Desnoyers
62c960e990SMathieu Desnoyers #define RSEQ_INJECT_CLOBBER \
63c960e990SMathieu Desnoyers , INJECT_ASM_REG
64c960e990SMathieu Desnoyers
65c960e990SMathieu Desnoyers #define RSEQ_INJECT_ASM(n) \
66c960e990SMathieu Desnoyers "mov asm_loop_cnt_" #n ", %%" INJECT_ASM_REG "\n\t" \
67c960e990SMathieu Desnoyers "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
68c960e990SMathieu Desnoyers "jz 333f\n\t" \
69c960e990SMathieu Desnoyers "222:\n\t" \
70c960e990SMathieu Desnoyers "dec %%" INJECT_ASM_REG "\n\t" \
71c960e990SMathieu Desnoyers "jnz 222b\n\t" \
72c960e990SMathieu Desnoyers "333:\n\t"
73c960e990SMathieu Desnoyers
74c960e990SMathieu Desnoyers #elif defined(__x86_64__)
75c960e990SMathieu Desnoyers
76ce01a157SMathieu Desnoyers #define INJECT_ASM_REG_P "rax"
77ce01a157SMathieu Desnoyers #define INJECT_ASM_REG "eax"
78ce01a157SMathieu Desnoyers
79ce01a157SMathieu Desnoyers #define RSEQ_INJECT_CLOBBER \
80ce01a157SMathieu Desnoyers , INJECT_ASM_REG_P \
81ce01a157SMathieu Desnoyers , INJECT_ASM_REG
82ce01a157SMathieu Desnoyers
83c960e990SMathieu Desnoyers #define RSEQ_INJECT_ASM(n) \
84ce01a157SMathieu Desnoyers "lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG_P "\n\t" \
85ce01a157SMathieu Desnoyers "mov (%%" INJECT_ASM_REG_P "), %%" INJECT_ASM_REG "\n\t" \
86c960e990SMathieu Desnoyers "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
87c960e990SMathieu Desnoyers "jz 333f\n\t" \
88c960e990SMathieu Desnoyers "222:\n\t" \
89c960e990SMathieu Desnoyers "dec %%" INJECT_ASM_REG "\n\t" \
90c960e990SMathieu Desnoyers "jnz 222b\n\t" \
91c960e990SMathieu Desnoyers "333:\n\t"
92c960e990SMathieu Desnoyers
934c14d1ceSVasily Gorbik #elif defined(__s390__)
944c14d1ceSVasily Gorbik
954c14d1ceSVasily Gorbik #define RSEQ_INJECT_INPUT \
964c14d1ceSVasily Gorbik , [loop_cnt_1]"m"(loop_cnt[1]) \
974c14d1ceSVasily Gorbik , [loop_cnt_2]"m"(loop_cnt[2]) \
984c14d1ceSVasily Gorbik , [loop_cnt_3]"m"(loop_cnt[3]) \
994c14d1ceSVasily Gorbik , [loop_cnt_4]"m"(loop_cnt[4]) \
1004c14d1ceSVasily Gorbik , [loop_cnt_5]"m"(loop_cnt[5]) \
1014c14d1ceSVasily Gorbik , [loop_cnt_6]"m"(loop_cnt[6])
1024c14d1ceSVasily Gorbik
1034c14d1ceSVasily Gorbik #define INJECT_ASM_REG "r12"
1044c14d1ceSVasily Gorbik
1054c14d1ceSVasily Gorbik #define RSEQ_INJECT_CLOBBER \
1064c14d1ceSVasily Gorbik , INJECT_ASM_REG
1074c14d1ceSVasily Gorbik
1084c14d1ceSVasily Gorbik #define RSEQ_INJECT_ASM(n) \
1094c14d1ceSVasily Gorbik "l %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
1104c14d1ceSVasily Gorbik "ltr %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG "\n\t" \
1114c14d1ceSVasily Gorbik "je 333f\n\t" \
1124c14d1ceSVasily Gorbik "222:\n\t" \
1134c14d1ceSVasily Gorbik "ahi %%" INJECT_ASM_REG ", -1\n\t" \
1144c14d1ceSVasily Gorbik "jnz 222b\n\t" \
1154c14d1ceSVasily Gorbik "333:\n\t"
1164c14d1ceSVasily Gorbik
117c960e990SMathieu Desnoyers #elif defined(__ARMEL__)
118c960e990SMathieu Desnoyers
119c960e990SMathieu Desnoyers #define RSEQ_INJECT_INPUT \
120c960e990SMathieu Desnoyers , [loop_cnt_1]"m"(loop_cnt[1]) \
121c960e990SMathieu Desnoyers , [loop_cnt_2]"m"(loop_cnt[2]) \
122c960e990SMathieu Desnoyers , [loop_cnt_3]"m"(loop_cnt[3]) \
123c960e990SMathieu Desnoyers , [loop_cnt_4]"m"(loop_cnt[4]) \
124c960e990SMathieu Desnoyers , [loop_cnt_5]"m"(loop_cnt[5]) \
125c960e990SMathieu Desnoyers , [loop_cnt_6]"m"(loop_cnt[6])
126c960e990SMathieu Desnoyers
127c960e990SMathieu Desnoyers #define INJECT_ASM_REG "r4"
128c960e990SMathieu Desnoyers
129c960e990SMathieu Desnoyers #define RSEQ_INJECT_CLOBBER \
130c960e990SMathieu Desnoyers , INJECT_ASM_REG
131c960e990SMathieu Desnoyers
132c960e990SMathieu Desnoyers #define RSEQ_INJECT_ASM(n) \
133c960e990SMathieu Desnoyers "ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
134c960e990SMathieu Desnoyers "cmp " INJECT_ASM_REG ", #0\n\t" \
135c960e990SMathieu Desnoyers "beq 333f\n\t" \
136c960e990SMathieu Desnoyers "222:\n\t" \
137c960e990SMathieu Desnoyers "subs " INJECT_ASM_REG ", #1\n\t" \
138c960e990SMathieu Desnoyers "bne 222b\n\t" \
139c960e990SMathieu Desnoyers "333:\n\t"
140c960e990SMathieu Desnoyers
141b9657463SWill Deacon #elif defined(__AARCH64EL__)
142b9657463SWill Deacon
143b9657463SWill Deacon #define RSEQ_INJECT_INPUT \
144b9657463SWill Deacon , [loop_cnt_1] "Qo" (loop_cnt[1]) \
145b9657463SWill Deacon , [loop_cnt_2] "Qo" (loop_cnt[2]) \
146b9657463SWill Deacon , [loop_cnt_3] "Qo" (loop_cnt[3]) \
147b9657463SWill Deacon , [loop_cnt_4] "Qo" (loop_cnt[4]) \
148b9657463SWill Deacon , [loop_cnt_5] "Qo" (loop_cnt[5]) \
149b9657463SWill Deacon , [loop_cnt_6] "Qo" (loop_cnt[6])
150b9657463SWill Deacon
151b9657463SWill Deacon #define INJECT_ASM_REG RSEQ_ASM_TMP_REG32
152b9657463SWill Deacon
153b9657463SWill Deacon #define RSEQ_INJECT_ASM(n) \
154b9657463SWill Deacon " ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n" \
155b9657463SWill Deacon " cbz " INJECT_ASM_REG ", 333f\n" \
156b9657463SWill Deacon "222:\n" \
157b9657463SWill Deacon " sub " INJECT_ASM_REG ", " INJECT_ASM_REG ", #1\n" \
158b9657463SWill Deacon " cbnz " INJECT_ASM_REG ", 222b\n" \
159b9657463SWill Deacon "333:\n"
160b9657463SWill Deacon
161d7ed99adSMathieu Desnoyers #elif defined(__PPC__)
162c960e990SMathieu Desnoyers
163c960e990SMathieu Desnoyers #define RSEQ_INJECT_INPUT \
164c960e990SMathieu Desnoyers , [loop_cnt_1]"m"(loop_cnt[1]) \
165c960e990SMathieu Desnoyers , [loop_cnt_2]"m"(loop_cnt[2]) \
166c960e990SMathieu Desnoyers , [loop_cnt_3]"m"(loop_cnt[3]) \
167c960e990SMathieu Desnoyers , [loop_cnt_4]"m"(loop_cnt[4]) \
168c960e990SMathieu Desnoyers , [loop_cnt_5]"m"(loop_cnt[5]) \
169c960e990SMathieu Desnoyers , [loop_cnt_6]"m"(loop_cnt[6])
170c960e990SMathieu Desnoyers
171c960e990SMathieu Desnoyers #define INJECT_ASM_REG "r18"
172c960e990SMathieu Desnoyers
173c960e990SMathieu Desnoyers #define RSEQ_INJECT_CLOBBER \
174c960e990SMathieu Desnoyers , INJECT_ASM_REG
175c960e990SMathieu Desnoyers
176c960e990SMathieu Desnoyers #define RSEQ_INJECT_ASM(n) \
177c960e990SMathieu Desnoyers "lwz %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
178c960e990SMathieu Desnoyers "cmpwi %%" INJECT_ASM_REG ", 0\n\t" \
179c960e990SMathieu Desnoyers "beq 333f\n\t" \
180c960e990SMathieu Desnoyers "222:\n\t" \
181c960e990SMathieu Desnoyers "subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \
182c960e990SMathieu Desnoyers "bne 222b\n\t" \
183c960e990SMathieu Desnoyers "333:\n\t"
184744f4be5SPaul Burton
185744f4be5SPaul Burton #elif defined(__mips__)
186744f4be5SPaul Burton
187744f4be5SPaul Burton #define RSEQ_INJECT_INPUT \
188744f4be5SPaul Burton , [loop_cnt_1]"m"(loop_cnt[1]) \
189744f4be5SPaul Burton , [loop_cnt_2]"m"(loop_cnt[2]) \
190744f4be5SPaul Burton , [loop_cnt_3]"m"(loop_cnt[3]) \
191744f4be5SPaul Burton , [loop_cnt_4]"m"(loop_cnt[4]) \
192744f4be5SPaul Burton , [loop_cnt_5]"m"(loop_cnt[5]) \
193744f4be5SPaul Burton , [loop_cnt_6]"m"(loop_cnt[6])
194744f4be5SPaul Burton
195744f4be5SPaul Burton #define INJECT_ASM_REG "$5"
196744f4be5SPaul Burton
197744f4be5SPaul Burton #define RSEQ_INJECT_CLOBBER \
198744f4be5SPaul Burton , INJECT_ASM_REG
199744f4be5SPaul Burton
200744f4be5SPaul Burton #define RSEQ_INJECT_ASM(n) \
201744f4be5SPaul Burton "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
202744f4be5SPaul Burton "beqz " INJECT_ASM_REG ", 333f\n\t" \
203744f4be5SPaul Burton "222:\n\t" \
204744f4be5SPaul Burton "addiu " INJECT_ASM_REG ", -1\n\t" \
205744f4be5SPaul Burton "bnez " INJECT_ASM_REG ", 222b\n\t" \
206744f4be5SPaul Burton "333:\n\t"
2076d1a6f46SVincent Chen #elif defined(__riscv)
2086d1a6f46SVincent Chen
2096d1a6f46SVincent Chen #define RSEQ_INJECT_INPUT \
2106d1a6f46SVincent Chen , [loop_cnt_1]"m"(loop_cnt[1]) \
2116d1a6f46SVincent Chen , [loop_cnt_2]"m"(loop_cnt[2]) \
2126d1a6f46SVincent Chen , [loop_cnt_3]"m"(loop_cnt[3]) \
2136d1a6f46SVincent Chen , [loop_cnt_4]"m"(loop_cnt[4]) \
2146d1a6f46SVincent Chen , [loop_cnt_5]"m"(loop_cnt[5]) \
2156d1a6f46SVincent Chen , [loop_cnt_6]"m"(loop_cnt[6])
2166d1a6f46SVincent Chen
2176d1a6f46SVincent Chen #define INJECT_ASM_REG "t1"
2186d1a6f46SVincent Chen
2196d1a6f46SVincent Chen #define RSEQ_INJECT_CLOBBER \
2206d1a6f46SVincent Chen , INJECT_ASM_REG
2216d1a6f46SVincent Chen
2226d1a6f46SVincent Chen #define RSEQ_INJECT_ASM(n) \
2236d1a6f46SVincent Chen "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
2246d1a6f46SVincent Chen "beqz " INJECT_ASM_REG ", 333f\n\t" \
2256d1a6f46SVincent Chen "222:\n\t" \
2266d1a6f46SVincent Chen "addi " INJECT_ASM_REG "," INJECT_ASM_REG ", -1\n\t" \
2276d1a6f46SVincent Chen "bnez " INJECT_ASM_REG ", 222b\n\t" \
2286d1a6f46SVincent Chen "333:\n\t"
2296d1a6f46SVincent Chen
230744f4be5SPaul Burton
231c960e990SMathieu Desnoyers #else
232c960e990SMathieu Desnoyers #error unsupported target
233c960e990SMathieu Desnoyers #endif
234c960e990SMathieu Desnoyers
235c960e990SMathieu Desnoyers #define RSEQ_INJECT_FAILED \
236c960e990SMathieu Desnoyers nr_abort++;
237c960e990SMathieu Desnoyers
238c960e990SMathieu Desnoyers #define RSEQ_INJECT_C(n) \
239c960e990SMathieu Desnoyers { \
240c960e990SMathieu Desnoyers int loc_i, loc_nr_loops = loop_cnt[n]; \
241c960e990SMathieu Desnoyers \
242c960e990SMathieu Desnoyers for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \
243c960e990SMathieu Desnoyers rseq_barrier(); \
244c960e990SMathieu Desnoyers } \
245c960e990SMathieu Desnoyers if (loc_nr_loops == -1 && opt_modulo) { \
246c960e990SMathieu Desnoyers if (yield_mod_cnt == opt_modulo - 1) { \
247c960e990SMathieu Desnoyers if (opt_sleep > 0) \
248c960e990SMathieu Desnoyers poll(NULL, 0, opt_sleep); \
249c960e990SMathieu Desnoyers if (opt_yield) \
250c960e990SMathieu Desnoyers sched_yield(); \
251c960e990SMathieu Desnoyers if (opt_signal) \
252c960e990SMathieu Desnoyers raise(SIGUSR1); \
253c960e990SMathieu Desnoyers yield_mod_cnt = 0; \
254c960e990SMathieu Desnoyers } else { \
255c960e990SMathieu Desnoyers yield_mod_cnt++; \
256c960e990SMathieu Desnoyers } \
257c960e990SMathieu Desnoyers } \
258c960e990SMathieu Desnoyers }
259c960e990SMathieu Desnoyers
260c960e990SMathieu Desnoyers #else
261c960e990SMathieu Desnoyers
262c960e990SMathieu Desnoyers #define printf_verbose(fmt, ...)
263c960e990SMathieu Desnoyers
264c960e990SMathieu Desnoyers #endif /* BENCHMARK */
265c960e990SMathieu Desnoyers
266c960e990SMathieu Desnoyers #include "rseq.h"
267c960e990SMathieu Desnoyers
268ee31fff0SMathieu Desnoyers static enum rseq_mo opt_mo = RSEQ_MO_RELAXED;
269ee31fff0SMathieu Desnoyers
270ee31fff0SMathieu Desnoyers #ifdef RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV
271ee31fff0SMathieu Desnoyers #define TEST_MEMBARRIER
272ee31fff0SMathieu Desnoyers
sys_membarrier(int cmd,int flags,int cpu_id)273ee31fff0SMathieu Desnoyers static int sys_membarrier(int cmd, int flags, int cpu_id)
274ee31fff0SMathieu Desnoyers {
275ee31fff0SMathieu Desnoyers return syscall(__NR_membarrier, cmd, flags, cpu_id);
276ee31fff0SMathieu Desnoyers }
277ee31fff0SMathieu Desnoyers #endif
278ee31fff0SMathieu Desnoyers
279ee31fff0SMathieu Desnoyers #ifdef BUILDOPT_RSEQ_PERCPU_MM_CID
280ee31fff0SMathieu Desnoyers # define RSEQ_PERCPU RSEQ_PERCPU_MM_CID
281ee31fff0SMathieu Desnoyers static
get_current_cpu_id(void)282ee31fff0SMathieu Desnoyers int get_current_cpu_id(void)
283ee31fff0SMathieu Desnoyers {
284ee31fff0SMathieu Desnoyers return rseq_current_mm_cid();
285ee31fff0SMathieu Desnoyers }
286ee31fff0SMathieu Desnoyers static
rseq_validate_cpu_id(void)287ee31fff0SMathieu Desnoyers bool rseq_validate_cpu_id(void)
288ee31fff0SMathieu Desnoyers {
289ee31fff0SMathieu Desnoyers return rseq_mm_cid_available();
290ee31fff0SMathieu Desnoyers }
291ee31fff0SMathieu Desnoyers # ifdef TEST_MEMBARRIER
292ee31fff0SMathieu Desnoyers /*
293ee31fff0SMathieu Desnoyers * Membarrier does not currently support targeting a mm_cid, so
294ee31fff0SMathieu Desnoyers * issue the barrier on all cpus.
295ee31fff0SMathieu Desnoyers */
296ee31fff0SMathieu Desnoyers static
rseq_membarrier_expedited(int cpu)297ee31fff0SMathieu Desnoyers int rseq_membarrier_expedited(int cpu)
298ee31fff0SMathieu Desnoyers {
299ee31fff0SMathieu Desnoyers return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
300ee31fff0SMathieu Desnoyers 0, 0);
301ee31fff0SMathieu Desnoyers }
302ee31fff0SMathieu Desnoyers # endif /* TEST_MEMBARRIER */
303ee31fff0SMathieu Desnoyers #else
304ee31fff0SMathieu Desnoyers # define RSEQ_PERCPU RSEQ_PERCPU_CPU_ID
305ee31fff0SMathieu Desnoyers static
get_current_cpu_id(void)306ee31fff0SMathieu Desnoyers int get_current_cpu_id(void)
307ee31fff0SMathieu Desnoyers {
308ee31fff0SMathieu Desnoyers return rseq_cpu_start();
309ee31fff0SMathieu Desnoyers }
310ee31fff0SMathieu Desnoyers static
rseq_validate_cpu_id(void)311ee31fff0SMathieu Desnoyers bool rseq_validate_cpu_id(void)
312ee31fff0SMathieu Desnoyers {
313ee31fff0SMathieu Desnoyers return rseq_current_cpu_raw() >= 0;
314ee31fff0SMathieu Desnoyers }
315ee31fff0SMathieu Desnoyers # ifdef TEST_MEMBARRIER
316ee31fff0SMathieu Desnoyers static
rseq_membarrier_expedited(int cpu)317ee31fff0SMathieu Desnoyers int rseq_membarrier_expedited(int cpu)
318ee31fff0SMathieu Desnoyers {
319ee31fff0SMathieu Desnoyers return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
320ee31fff0SMathieu Desnoyers MEMBARRIER_CMD_FLAG_CPU, cpu);
321ee31fff0SMathieu Desnoyers }
322ee31fff0SMathieu Desnoyers # endif /* TEST_MEMBARRIER */
323ee31fff0SMathieu Desnoyers #endif
324ee31fff0SMathieu Desnoyers
325c960e990SMathieu Desnoyers struct percpu_lock_entry {
326c960e990SMathieu Desnoyers intptr_t v;
327c960e990SMathieu Desnoyers } __attribute__((aligned(128)));
328c960e990SMathieu Desnoyers
329c960e990SMathieu Desnoyers struct percpu_lock {
330c960e990SMathieu Desnoyers struct percpu_lock_entry c[CPU_SETSIZE];
331c960e990SMathieu Desnoyers };
332c960e990SMathieu Desnoyers
333c960e990SMathieu Desnoyers struct test_data_entry {
334c960e990SMathieu Desnoyers intptr_t count;
335c960e990SMathieu Desnoyers } __attribute__((aligned(128)));
336c960e990SMathieu Desnoyers
337c960e990SMathieu Desnoyers struct spinlock_test_data {
338c960e990SMathieu Desnoyers struct percpu_lock lock;
339c960e990SMathieu Desnoyers struct test_data_entry c[CPU_SETSIZE];
340c960e990SMathieu Desnoyers };
341c960e990SMathieu Desnoyers
342c960e990SMathieu Desnoyers struct spinlock_thread_test_data {
343c960e990SMathieu Desnoyers struct spinlock_test_data *data;
344c960e990SMathieu Desnoyers long long reps;
345c960e990SMathieu Desnoyers int reg;
346c960e990SMathieu Desnoyers };
347c960e990SMathieu Desnoyers
348c960e990SMathieu Desnoyers struct inc_test_data {
349c960e990SMathieu Desnoyers struct test_data_entry c[CPU_SETSIZE];
350c960e990SMathieu Desnoyers };
351c960e990SMathieu Desnoyers
352c960e990SMathieu Desnoyers struct inc_thread_test_data {
353c960e990SMathieu Desnoyers struct inc_test_data *data;
354c960e990SMathieu Desnoyers long long reps;
355c960e990SMathieu Desnoyers int reg;
356c960e990SMathieu Desnoyers };
357c960e990SMathieu Desnoyers
358c960e990SMathieu Desnoyers struct percpu_list_node {
359c960e990SMathieu Desnoyers intptr_t data;
360c960e990SMathieu Desnoyers struct percpu_list_node *next;
361c960e990SMathieu Desnoyers };
362c960e990SMathieu Desnoyers
363c960e990SMathieu Desnoyers struct percpu_list_entry {
364c960e990SMathieu Desnoyers struct percpu_list_node *head;
365c960e990SMathieu Desnoyers } __attribute__((aligned(128)));
366c960e990SMathieu Desnoyers
367c960e990SMathieu Desnoyers struct percpu_list {
368c960e990SMathieu Desnoyers struct percpu_list_entry c[CPU_SETSIZE];
369c960e990SMathieu Desnoyers };
370c960e990SMathieu Desnoyers
371c960e990SMathieu Desnoyers #define BUFFER_ITEM_PER_CPU 100
372c960e990SMathieu Desnoyers
373c960e990SMathieu Desnoyers struct percpu_buffer_node {
374c960e990SMathieu Desnoyers intptr_t data;
375c960e990SMathieu Desnoyers };
376c960e990SMathieu Desnoyers
377c960e990SMathieu Desnoyers struct percpu_buffer_entry {
378c960e990SMathieu Desnoyers intptr_t offset;
379c960e990SMathieu Desnoyers intptr_t buflen;
380c960e990SMathieu Desnoyers struct percpu_buffer_node **array;
381c960e990SMathieu Desnoyers } __attribute__((aligned(128)));
382c960e990SMathieu Desnoyers
383c960e990SMathieu Desnoyers struct percpu_buffer {
384c960e990SMathieu Desnoyers struct percpu_buffer_entry c[CPU_SETSIZE];
385c960e990SMathieu Desnoyers };
386c960e990SMathieu Desnoyers
387c960e990SMathieu Desnoyers #define MEMCPY_BUFFER_ITEM_PER_CPU 100
388c960e990SMathieu Desnoyers
389c960e990SMathieu Desnoyers struct percpu_memcpy_buffer_node {
390c960e990SMathieu Desnoyers intptr_t data1;
391c960e990SMathieu Desnoyers uint64_t data2;
392c960e990SMathieu Desnoyers };
393c960e990SMathieu Desnoyers
394c960e990SMathieu Desnoyers struct percpu_memcpy_buffer_entry {
395c960e990SMathieu Desnoyers intptr_t offset;
396c960e990SMathieu Desnoyers intptr_t buflen;
397c960e990SMathieu Desnoyers struct percpu_memcpy_buffer_node *array;
398c960e990SMathieu Desnoyers } __attribute__((aligned(128)));
399c960e990SMathieu Desnoyers
400c960e990SMathieu Desnoyers struct percpu_memcpy_buffer {
401c960e990SMathieu Desnoyers struct percpu_memcpy_buffer_entry c[CPU_SETSIZE];
402c960e990SMathieu Desnoyers };
403c960e990SMathieu Desnoyers
404c960e990SMathieu Desnoyers /* A simple percpu spinlock. Grabs lock on current cpu. */
rseq_this_cpu_lock(struct percpu_lock * lock)405c960e990SMathieu Desnoyers static int rseq_this_cpu_lock(struct percpu_lock *lock)
406c960e990SMathieu Desnoyers {
407c960e990SMathieu Desnoyers int cpu;
408c960e990SMathieu Desnoyers
409c960e990SMathieu Desnoyers for (;;) {
410c960e990SMathieu Desnoyers int ret;
411c960e990SMathieu Desnoyers
412ee31fff0SMathieu Desnoyers cpu = get_current_cpu_id();
413*a3798e6fSMathieu Desnoyers if (cpu < 0) {
414*a3798e6fSMathieu Desnoyers fprintf(stderr, "pid: %d: tid: %d, cpu: %d: cid: %d\n",
415*a3798e6fSMathieu Desnoyers getpid(), (int) rseq_gettid(), rseq_current_cpu_raw(), cpu);
416*a3798e6fSMathieu Desnoyers abort();
417*a3798e6fSMathieu Desnoyers }
418ee31fff0SMathieu Desnoyers ret = rseq_cmpeqv_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU,
419ee31fff0SMathieu Desnoyers &lock->c[cpu].v,
420c960e990SMathieu Desnoyers 0, 1, cpu);
421c960e990SMathieu Desnoyers if (rseq_likely(!ret))
422c960e990SMathieu Desnoyers break;
423c960e990SMathieu Desnoyers /* Retry if comparison fails or rseq aborts. */
424c960e990SMathieu Desnoyers }
425c960e990SMathieu Desnoyers /*
426c960e990SMathieu Desnoyers * Acquire semantic when taking lock after control dependency.
427c960e990SMathieu Desnoyers * Matches rseq_smp_store_release().
428c960e990SMathieu Desnoyers */
429c960e990SMathieu Desnoyers rseq_smp_acquire__after_ctrl_dep();
430c960e990SMathieu Desnoyers return cpu;
431c960e990SMathieu Desnoyers }
432c960e990SMathieu Desnoyers
rseq_percpu_unlock(struct percpu_lock * lock,int cpu)433c960e990SMathieu Desnoyers static void rseq_percpu_unlock(struct percpu_lock *lock, int cpu)
434c960e990SMathieu Desnoyers {
435c960e990SMathieu Desnoyers assert(lock->c[cpu].v == 1);
436c960e990SMathieu Desnoyers /*
437c960e990SMathieu Desnoyers * Release lock, with release semantic. Matches
438c960e990SMathieu Desnoyers * rseq_smp_acquire__after_ctrl_dep().
439c960e990SMathieu Desnoyers */
440c960e990SMathieu Desnoyers rseq_smp_store_release(&lock->c[cpu].v, 0);
441c960e990SMathieu Desnoyers }
442c960e990SMathieu Desnoyers
test_percpu_spinlock_thread(void * arg)443c960e990SMathieu Desnoyers void *test_percpu_spinlock_thread(void *arg)
444c960e990SMathieu Desnoyers {
445c960e990SMathieu Desnoyers struct spinlock_thread_test_data *thread_data = arg;
446c960e990SMathieu Desnoyers struct spinlock_test_data *data = thread_data->data;
447c960e990SMathieu Desnoyers long long i, reps;
448c960e990SMathieu Desnoyers
449c960e990SMathieu Desnoyers if (!opt_disable_rseq && thread_data->reg &&
450c960e990SMathieu Desnoyers rseq_register_current_thread())
451c960e990SMathieu Desnoyers abort();
452c960e990SMathieu Desnoyers reps = thread_data->reps;
453c960e990SMathieu Desnoyers for (i = 0; i < reps; i++) {
454930378d0SMathieu Desnoyers int cpu = rseq_this_cpu_lock(&data->lock);
455c960e990SMathieu Desnoyers data->c[cpu].count++;
456c960e990SMathieu Desnoyers rseq_percpu_unlock(&data->lock, cpu);
457c960e990SMathieu Desnoyers #ifndef BENCHMARK
458c960e990SMathieu Desnoyers if (i != 0 && !(i % (reps / 10)))
4598df34c56SMathieu Desnoyers printf_verbose("tid %d: count %lld\n",
4608df34c56SMathieu Desnoyers (int) rseq_gettid(), i);
461c960e990SMathieu Desnoyers #endif
462c960e990SMathieu Desnoyers }
463c960e990SMathieu Desnoyers printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
4648df34c56SMathieu Desnoyers (int) rseq_gettid(), nr_abort, signals_delivered);
465c960e990SMathieu Desnoyers if (!opt_disable_rseq && thread_data->reg &&
466c960e990SMathieu Desnoyers rseq_unregister_current_thread())
467c960e990SMathieu Desnoyers abort();
468c960e990SMathieu Desnoyers return NULL;
469c960e990SMathieu Desnoyers }
470c960e990SMathieu Desnoyers
471c960e990SMathieu Desnoyers /*
472c960e990SMathieu Desnoyers * A simple test which implements a sharded counter using a per-cpu
473c960e990SMathieu Desnoyers * lock. Obviously real applications might prefer to simply use a
474c960e990SMathieu Desnoyers * per-cpu increment; however, this is reasonable for a test and the
475c960e990SMathieu Desnoyers * lock can be extended to synchronize more complicated operations.
476c960e990SMathieu Desnoyers */
test_percpu_spinlock(void)477c960e990SMathieu Desnoyers void test_percpu_spinlock(void)
478c960e990SMathieu Desnoyers {
479c960e990SMathieu Desnoyers const int num_threads = opt_threads;
480c960e990SMathieu Desnoyers int i, ret;
481c960e990SMathieu Desnoyers uint64_t sum;
482c960e990SMathieu Desnoyers pthread_t test_threads[num_threads];
483c960e990SMathieu Desnoyers struct spinlock_test_data data;
484c960e990SMathieu Desnoyers struct spinlock_thread_test_data thread_data[num_threads];
485c960e990SMathieu Desnoyers
486c960e990SMathieu Desnoyers memset(&data, 0, sizeof(data));
487c960e990SMathieu Desnoyers for (i = 0; i < num_threads; i++) {
488c960e990SMathieu Desnoyers thread_data[i].reps = opt_reps;
489c960e990SMathieu Desnoyers if (opt_disable_mod <= 0 || (i % opt_disable_mod))
490c960e990SMathieu Desnoyers thread_data[i].reg = 1;
491c960e990SMathieu Desnoyers else
492c960e990SMathieu Desnoyers thread_data[i].reg = 0;
493c960e990SMathieu Desnoyers thread_data[i].data = &data;
494c960e990SMathieu Desnoyers ret = pthread_create(&test_threads[i], NULL,
495c960e990SMathieu Desnoyers test_percpu_spinlock_thread,
496c960e990SMathieu Desnoyers &thread_data[i]);
497c960e990SMathieu Desnoyers if (ret) {
498c960e990SMathieu Desnoyers errno = ret;
499c960e990SMathieu Desnoyers perror("pthread_create");
500c960e990SMathieu Desnoyers abort();
501c960e990SMathieu Desnoyers }
502c960e990SMathieu Desnoyers }
503c960e990SMathieu Desnoyers
504c960e990SMathieu Desnoyers for (i = 0; i < num_threads; i++) {
505c960e990SMathieu Desnoyers ret = pthread_join(test_threads[i], NULL);
506c960e990SMathieu Desnoyers if (ret) {
507c960e990SMathieu Desnoyers errno = ret;
508c960e990SMathieu Desnoyers perror("pthread_join");
509c960e990SMathieu Desnoyers abort();
510c960e990SMathieu Desnoyers }
511c960e990SMathieu Desnoyers }
512c960e990SMathieu Desnoyers
513c960e990SMathieu Desnoyers sum = 0;
514c960e990SMathieu Desnoyers for (i = 0; i < CPU_SETSIZE; i++)
515c960e990SMathieu Desnoyers sum += data.c[i].count;
516c960e990SMathieu Desnoyers
517c960e990SMathieu Desnoyers assert(sum == (uint64_t)opt_reps * num_threads);
518c960e990SMathieu Desnoyers }
519c960e990SMathieu Desnoyers
test_percpu_inc_thread(void * arg)520c960e990SMathieu Desnoyers void *test_percpu_inc_thread(void *arg)
521c960e990SMathieu Desnoyers {
522c960e990SMathieu Desnoyers struct inc_thread_test_data *thread_data = arg;
523c960e990SMathieu Desnoyers struct inc_test_data *data = thread_data->data;
524c960e990SMathieu Desnoyers long long i, reps;
525c960e990SMathieu Desnoyers
526c960e990SMathieu Desnoyers if (!opt_disable_rseq && thread_data->reg &&
527c960e990SMathieu Desnoyers rseq_register_current_thread())
528c960e990SMathieu Desnoyers abort();
529c960e990SMathieu Desnoyers reps = thread_data->reps;
530c960e990SMathieu Desnoyers for (i = 0; i < reps; i++) {
531c960e990SMathieu Desnoyers int ret;
532c960e990SMathieu Desnoyers
533c960e990SMathieu Desnoyers do {
534c960e990SMathieu Desnoyers int cpu;
535c960e990SMathieu Desnoyers
536ee31fff0SMathieu Desnoyers cpu = get_current_cpu_id();
537ee31fff0SMathieu Desnoyers ret = rseq_addv(RSEQ_MO_RELAXED, RSEQ_PERCPU,
538ee31fff0SMathieu Desnoyers &data->c[cpu].count, 1, cpu);
539c960e990SMathieu Desnoyers } while (rseq_unlikely(ret));
540c960e990SMathieu Desnoyers #ifndef BENCHMARK
541c960e990SMathieu Desnoyers if (i != 0 && !(i % (reps / 10)))
5428df34c56SMathieu Desnoyers printf_verbose("tid %d: count %lld\n",
5438df34c56SMathieu Desnoyers (int) rseq_gettid(), i);
544c960e990SMathieu Desnoyers #endif
545c960e990SMathieu Desnoyers }
546c960e990SMathieu Desnoyers printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
5478df34c56SMathieu Desnoyers (int) rseq_gettid(), nr_abort, signals_delivered);
548c960e990SMathieu Desnoyers if (!opt_disable_rseq && thread_data->reg &&
549c960e990SMathieu Desnoyers rseq_unregister_current_thread())
550c960e990SMathieu Desnoyers abort();
551c960e990SMathieu Desnoyers return NULL;
552c960e990SMathieu Desnoyers }
553c960e990SMathieu Desnoyers
test_percpu_inc(void)554c960e990SMathieu Desnoyers void test_percpu_inc(void)
555c960e990SMathieu Desnoyers {
556c960e990SMathieu Desnoyers const int num_threads = opt_threads;
557c960e990SMathieu Desnoyers int i, ret;
558c960e990SMathieu Desnoyers uint64_t sum;
559c960e990SMathieu Desnoyers pthread_t test_threads[num_threads];
560c960e990SMathieu Desnoyers struct inc_test_data data;
561c960e990SMathieu Desnoyers struct inc_thread_test_data thread_data[num_threads];
562c960e990SMathieu Desnoyers
563c960e990SMathieu Desnoyers memset(&data, 0, sizeof(data));
564c960e990SMathieu Desnoyers for (i = 0; i < num_threads; i++) {
565c960e990SMathieu Desnoyers thread_data[i].reps = opt_reps;
566c960e990SMathieu Desnoyers if (opt_disable_mod <= 0 || (i % opt_disable_mod))
567c960e990SMathieu Desnoyers thread_data[i].reg = 1;
568c960e990SMathieu Desnoyers else
569c960e990SMathieu Desnoyers thread_data[i].reg = 0;
570c960e990SMathieu Desnoyers thread_data[i].data = &data;
571c960e990SMathieu Desnoyers ret = pthread_create(&test_threads[i], NULL,
572c960e990SMathieu Desnoyers test_percpu_inc_thread,
573c960e990SMathieu Desnoyers &thread_data[i]);
574c960e990SMathieu Desnoyers if (ret) {
575c960e990SMathieu Desnoyers errno = ret;
576c960e990SMathieu Desnoyers perror("pthread_create");
577c960e990SMathieu Desnoyers abort();
578c960e990SMathieu Desnoyers }
579c960e990SMathieu Desnoyers }
580c960e990SMathieu Desnoyers
581c960e990SMathieu Desnoyers for (i = 0; i < num_threads; i++) {
582c960e990SMathieu Desnoyers ret = pthread_join(test_threads[i], NULL);
583c960e990SMathieu Desnoyers if (ret) {
584c960e990SMathieu Desnoyers errno = ret;
585c960e990SMathieu Desnoyers perror("pthread_join");
586c960e990SMathieu Desnoyers abort();
587c960e990SMathieu Desnoyers }
588c960e990SMathieu Desnoyers }
589c960e990SMathieu Desnoyers
590c960e990SMathieu Desnoyers sum = 0;
591c960e990SMathieu Desnoyers for (i = 0; i < CPU_SETSIZE; i++)
592c960e990SMathieu Desnoyers sum += data.c[i].count;
593c960e990SMathieu Desnoyers
594c960e990SMathieu Desnoyers assert(sum == (uint64_t)opt_reps * num_threads);
595c960e990SMathieu Desnoyers }
596c960e990SMathieu Desnoyers
this_cpu_list_push(struct percpu_list * list,struct percpu_list_node * node,int * _cpu)597c960e990SMathieu Desnoyers void this_cpu_list_push(struct percpu_list *list,
598c960e990SMathieu Desnoyers struct percpu_list_node *node,
599c960e990SMathieu Desnoyers int *_cpu)
600c960e990SMathieu Desnoyers {
601c960e990SMathieu Desnoyers int cpu;
602c960e990SMathieu Desnoyers
603c960e990SMathieu Desnoyers for (;;) {
604c960e990SMathieu Desnoyers intptr_t *targetptr, newval, expect;
605c960e990SMathieu Desnoyers int ret;
606c960e990SMathieu Desnoyers
607ee31fff0SMathieu Desnoyers cpu = get_current_cpu_id();
608c960e990SMathieu Desnoyers /* Load list->c[cpu].head with single-copy atomicity. */
609c960e990SMathieu Desnoyers expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head);
610c960e990SMathieu Desnoyers newval = (intptr_t)node;
611c960e990SMathieu Desnoyers targetptr = (intptr_t *)&list->c[cpu].head;
612c960e990SMathieu Desnoyers node->next = (struct percpu_list_node *)expect;
613ee31fff0SMathieu Desnoyers ret = rseq_cmpeqv_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU,
614ee31fff0SMathieu Desnoyers targetptr, expect, newval, cpu);
615c960e990SMathieu Desnoyers if (rseq_likely(!ret))
616c960e990SMathieu Desnoyers break;
617c960e990SMathieu Desnoyers /* Retry if comparison fails or rseq aborts. */
618c960e990SMathieu Desnoyers }
619c960e990SMathieu Desnoyers if (_cpu)
620c960e990SMathieu Desnoyers *_cpu = cpu;
621c960e990SMathieu Desnoyers }
622c960e990SMathieu Desnoyers
623c960e990SMathieu Desnoyers /*
624c960e990SMathieu Desnoyers * Unlike a traditional lock-less linked list; the availability of a
625c960e990SMathieu Desnoyers * rseq primitive allows us to implement pop without concerns over
626c960e990SMathieu Desnoyers * ABA-type races.
627c960e990SMathieu Desnoyers */
this_cpu_list_pop(struct percpu_list * list,int * _cpu)628c960e990SMathieu Desnoyers struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list,
629c960e990SMathieu Desnoyers int *_cpu)
630c960e990SMathieu Desnoyers {
631c960e990SMathieu Desnoyers struct percpu_list_node *node = NULL;
632c960e990SMathieu Desnoyers int cpu;
633c960e990SMathieu Desnoyers
634c960e990SMathieu Desnoyers for (;;) {
635c960e990SMathieu Desnoyers struct percpu_list_node *head;
636c960e990SMathieu Desnoyers intptr_t *targetptr, expectnot, *load;
63726dc8a6dSMathieu Desnoyers long offset;
638c960e990SMathieu Desnoyers int ret;
639c960e990SMathieu Desnoyers
640ee31fff0SMathieu Desnoyers cpu = get_current_cpu_id();
641c960e990SMathieu Desnoyers targetptr = (intptr_t *)&list->c[cpu].head;
642c960e990SMathieu Desnoyers expectnot = (intptr_t)NULL;
643c960e990SMathieu Desnoyers offset = offsetof(struct percpu_list_node, next);
644c960e990SMathieu Desnoyers load = (intptr_t *)&head;
645ee31fff0SMathieu Desnoyers ret = rseq_cmpnev_storeoffp_load(RSEQ_MO_RELAXED, RSEQ_PERCPU,
646ee31fff0SMathieu Desnoyers targetptr, expectnot,
647c960e990SMathieu Desnoyers offset, load, cpu);
648c960e990SMathieu Desnoyers if (rseq_likely(!ret)) {
649c960e990SMathieu Desnoyers node = head;
650c960e990SMathieu Desnoyers break;
651c960e990SMathieu Desnoyers }
652c960e990SMathieu Desnoyers if (ret > 0)
653c960e990SMathieu Desnoyers break;
654c960e990SMathieu Desnoyers /* Retry if rseq aborts. */
655c960e990SMathieu Desnoyers }
656c960e990SMathieu Desnoyers if (_cpu)
657c960e990SMathieu Desnoyers *_cpu = cpu;
658c960e990SMathieu Desnoyers return node;
659c960e990SMathieu Desnoyers }
660c960e990SMathieu Desnoyers
661c960e990SMathieu Desnoyers /*
662c960e990SMathieu Desnoyers * __percpu_list_pop is not safe against concurrent accesses. Should
663c960e990SMathieu Desnoyers * only be used on lists that are not concurrently modified.
664c960e990SMathieu Desnoyers */
__percpu_list_pop(struct percpu_list * list,int cpu)665c960e990SMathieu Desnoyers struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu)
666c960e990SMathieu Desnoyers {
667c960e990SMathieu Desnoyers struct percpu_list_node *node;
668c960e990SMathieu Desnoyers
669c960e990SMathieu Desnoyers node = list->c[cpu].head;
670c960e990SMathieu Desnoyers if (!node)
671c960e990SMathieu Desnoyers return NULL;
672c960e990SMathieu Desnoyers list->c[cpu].head = node->next;
673c960e990SMathieu Desnoyers return node;
674c960e990SMathieu Desnoyers }
675c960e990SMathieu Desnoyers
test_percpu_list_thread(void * arg)676c960e990SMathieu Desnoyers void *test_percpu_list_thread(void *arg)
677c960e990SMathieu Desnoyers {
678c960e990SMathieu Desnoyers long long i, reps;
679c960e990SMathieu Desnoyers struct percpu_list *list = (struct percpu_list *)arg;
680c960e990SMathieu Desnoyers
681c960e990SMathieu Desnoyers if (!opt_disable_rseq && rseq_register_current_thread())
682c960e990SMathieu Desnoyers abort();
683c960e990SMathieu Desnoyers
684c960e990SMathieu Desnoyers reps = opt_reps;
685c960e990SMathieu Desnoyers for (i = 0; i < reps; i++) {
686c960e990SMathieu Desnoyers struct percpu_list_node *node;
687c960e990SMathieu Desnoyers
688c960e990SMathieu Desnoyers node = this_cpu_list_pop(list, NULL);
689c960e990SMathieu Desnoyers if (opt_yield)
690c960e990SMathieu Desnoyers sched_yield(); /* encourage shuffling */
691c960e990SMathieu Desnoyers if (node)
692c960e990SMathieu Desnoyers this_cpu_list_push(list, node, NULL);
693c960e990SMathieu Desnoyers }
694c960e990SMathieu Desnoyers
695c960e990SMathieu Desnoyers printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
6968df34c56SMathieu Desnoyers (int) rseq_gettid(), nr_abort, signals_delivered);
697c960e990SMathieu Desnoyers if (!opt_disable_rseq && rseq_unregister_current_thread())
698c960e990SMathieu Desnoyers abort();
699c960e990SMathieu Desnoyers
700c960e990SMathieu Desnoyers return NULL;
701c960e990SMathieu Desnoyers }
702c960e990SMathieu Desnoyers
703c960e990SMathieu Desnoyers /* Simultaneous modification to a per-cpu linked list from many threads. */
test_percpu_list(void)704c960e990SMathieu Desnoyers void test_percpu_list(void)
705c960e990SMathieu Desnoyers {
706c960e990SMathieu Desnoyers const int num_threads = opt_threads;
707c960e990SMathieu Desnoyers int i, j, ret;
708c960e990SMathieu Desnoyers uint64_t sum = 0, expected_sum = 0;
709c960e990SMathieu Desnoyers struct percpu_list list;
710c960e990SMathieu Desnoyers pthread_t test_threads[num_threads];
711c960e990SMathieu Desnoyers cpu_set_t allowed_cpus;
712c960e990SMathieu Desnoyers
713c960e990SMathieu Desnoyers memset(&list, 0, sizeof(list));
714c960e990SMathieu Desnoyers
715c960e990SMathieu Desnoyers /* Generate list entries for every usable cpu. */
716c960e990SMathieu Desnoyers sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
717c960e990SMathieu Desnoyers for (i = 0; i < CPU_SETSIZE; i++) {
718c960e990SMathieu Desnoyers if (!CPU_ISSET(i, &allowed_cpus))
719c960e990SMathieu Desnoyers continue;
720c960e990SMathieu Desnoyers for (j = 1; j <= 100; j++) {
721c960e990SMathieu Desnoyers struct percpu_list_node *node;
722c960e990SMathieu Desnoyers
723c960e990SMathieu Desnoyers expected_sum += j;
724c960e990SMathieu Desnoyers
725c960e990SMathieu Desnoyers node = malloc(sizeof(*node));
726c960e990SMathieu Desnoyers assert(node);
727c960e990SMathieu Desnoyers node->data = j;
728c960e990SMathieu Desnoyers node->next = list.c[i].head;
729c960e990SMathieu Desnoyers list.c[i].head = node;
730c960e990SMathieu Desnoyers }
731c960e990SMathieu Desnoyers }
732c960e990SMathieu Desnoyers
733c960e990SMathieu Desnoyers for (i = 0; i < num_threads; i++) {
734c960e990SMathieu Desnoyers ret = pthread_create(&test_threads[i], NULL,
735c960e990SMathieu Desnoyers test_percpu_list_thread, &list);
736c960e990SMathieu Desnoyers if (ret) {
737c960e990SMathieu Desnoyers errno = ret;
738c960e990SMathieu Desnoyers perror("pthread_create");
739c960e990SMathieu Desnoyers abort();
740c960e990SMathieu Desnoyers }
741c960e990SMathieu Desnoyers }
742c960e990SMathieu Desnoyers
743c960e990SMathieu Desnoyers for (i = 0; i < num_threads; i++) {
744c960e990SMathieu Desnoyers ret = pthread_join(test_threads[i], NULL);
745c960e990SMathieu Desnoyers if (ret) {
746c960e990SMathieu Desnoyers errno = ret;
747c960e990SMathieu Desnoyers perror("pthread_join");
748c960e990SMathieu Desnoyers abort();
749c960e990SMathieu Desnoyers }
750c960e990SMathieu Desnoyers }
751c960e990SMathieu Desnoyers
752c960e990SMathieu Desnoyers for (i = 0; i < CPU_SETSIZE; i++) {
753c960e990SMathieu Desnoyers struct percpu_list_node *node;
754c960e990SMathieu Desnoyers
755c960e990SMathieu Desnoyers if (!CPU_ISSET(i, &allowed_cpus))
756c960e990SMathieu Desnoyers continue;
757c960e990SMathieu Desnoyers
758c960e990SMathieu Desnoyers while ((node = __percpu_list_pop(&list, i))) {
759c960e990SMathieu Desnoyers sum += node->data;
760c960e990SMathieu Desnoyers free(node);
761c960e990SMathieu Desnoyers }
762c960e990SMathieu Desnoyers }
763c960e990SMathieu Desnoyers
764c960e990SMathieu Desnoyers /*
765c960e990SMathieu Desnoyers * All entries should now be accounted for (unless some external
766c960e990SMathieu Desnoyers * actor is interfering with our allowed affinity while this
767c960e990SMathieu Desnoyers * test is running).
768c960e990SMathieu Desnoyers */
769c960e990SMathieu Desnoyers assert(sum == expected_sum);
770c960e990SMathieu Desnoyers }
771c960e990SMathieu Desnoyers
this_cpu_buffer_push(struct percpu_buffer * buffer,struct percpu_buffer_node * node,int * _cpu)772c960e990SMathieu Desnoyers bool this_cpu_buffer_push(struct percpu_buffer *buffer,
773c960e990SMathieu Desnoyers struct percpu_buffer_node *node,
774c960e990SMathieu Desnoyers int *_cpu)
775c960e990SMathieu Desnoyers {
776c960e990SMathieu Desnoyers bool result = false;
777c960e990SMathieu Desnoyers int cpu;
778c960e990SMathieu Desnoyers
779c960e990SMathieu Desnoyers for (;;) {
780c960e990SMathieu Desnoyers intptr_t *targetptr_spec, newval_spec;
781c960e990SMathieu Desnoyers intptr_t *targetptr_final, newval_final;
782c960e990SMathieu Desnoyers intptr_t offset;
783c960e990SMathieu Desnoyers int ret;
784c960e990SMathieu Desnoyers
785ee31fff0SMathieu Desnoyers cpu = get_current_cpu_id();
786c960e990SMathieu Desnoyers offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
787c960e990SMathieu Desnoyers if (offset == buffer->c[cpu].buflen)
788c960e990SMathieu Desnoyers break;
789c960e990SMathieu Desnoyers newval_spec = (intptr_t)node;
790c960e990SMathieu Desnoyers targetptr_spec = (intptr_t *)&buffer->c[cpu].array[offset];
791c960e990SMathieu Desnoyers newval_final = offset + 1;
792c960e990SMathieu Desnoyers targetptr_final = &buffer->c[cpu].offset;
793ee31fff0SMathieu Desnoyers ret = rseq_cmpeqv_trystorev_storev(opt_mo, RSEQ_PERCPU,
794c960e990SMathieu Desnoyers targetptr_final, offset, targetptr_spec,
795c960e990SMathieu Desnoyers newval_spec, newval_final, cpu);
796c960e990SMathieu Desnoyers if (rseq_likely(!ret)) {
797c960e990SMathieu Desnoyers result = true;
798c960e990SMathieu Desnoyers break;
799c960e990SMathieu Desnoyers }
800c960e990SMathieu Desnoyers /* Retry if comparison fails or rseq aborts. */
801c960e990SMathieu Desnoyers }
802c960e990SMathieu Desnoyers if (_cpu)
803c960e990SMathieu Desnoyers *_cpu = cpu;
804c960e990SMathieu Desnoyers return result;
805c960e990SMathieu Desnoyers }
806c960e990SMathieu Desnoyers
this_cpu_buffer_pop(struct percpu_buffer * buffer,int * _cpu)807c960e990SMathieu Desnoyers struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer *buffer,
808c960e990SMathieu Desnoyers int *_cpu)
809c960e990SMathieu Desnoyers {
810c960e990SMathieu Desnoyers struct percpu_buffer_node *head;
811c960e990SMathieu Desnoyers int cpu;
812c960e990SMathieu Desnoyers
813c960e990SMathieu Desnoyers for (;;) {
814c960e990SMathieu Desnoyers intptr_t *targetptr, newval;
815c960e990SMathieu Desnoyers intptr_t offset;
816c960e990SMathieu Desnoyers int ret;
817c960e990SMathieu Desnoyers
818ee31fff0SMathieu Desnoyers cpu = get_current_cpu_id();
819c960e990SMathieu Desnoyers /* Load offset with single-copy atomicity. */
820c960e990SMathieu Desnoyers offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
821c960e990SMathieu Desnoyers if (offset == 0) {
822c960e990SMathieu Desnoyers head = NULL;
823c960e990SMathieu Desnoyers break;
824c960e990SMathieu Desnoyers }
825c960e990SMathieu Desnoyers head = RSEQ_READ_ONCE(buffer->c[cpu].array[offset - 1]);
826c960e990SMathieu Desnoyers newval = offset - 1;
827c960e990SMathieu Desnoyers targetptr = (intptr_t *)&buffer->c[cpu].offset;
828ee31fff0SMathieu Desnoyers ret = rseq_cmpeqv_cmpeqv_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU,
829ee31fff0SMathieu Desnoyers targetptr, offset,
830c960e990SMathieu Desnoyers (intptr_t *)&buffer->c[cpu].array[offset - 1],
831c960e990SMathieu Desnoyers (intptr_t)head, newval, cpu);
832c960e990SMathieu Desnoyers if (rseq_likely(!ret))
833c960e990SMathieu Desnoyers break;
834c960e990SMathieu Desnoyers /* Retry if comparison fails or rseq aborts. */
835c960e990SMathieu Desnoyers }
836c960e990SMathieu Desnoyers if (_cpu)
837c960e990SMathieu Desnoyers *_cpu = cpu;
838c960e990SMathieu Desnoyers return head;
839c960e990SMathieu Desnoyers }
840c960e990SMathieu Desnoyers
841c960e990SMathieu Desnoyers /*
842c960e990SMathieu Desnoyers * __percpu_buffer_pop is not safe against concurrent accesses. Should
843c960e990SMathieu Desnoyers * only be used on buffers that are not concurrently modified.
844c960e990SMathieu Desnoyers */
__percpu_buffer_pop(struct percpu_buffer * buffer,int cpu)845c960e990SMathieu Desnoyers struct percpu_buffer_node *__percpu_buffer_pop(struct percpu_buffer *buffer,
846c960e990SMathieu Desnoyers int cpu)
847c960e990SMathieu Desnoyers {
848c960e990SMathieu Desnoyers struct percpu_buffer_node *head;
849c960e990SMathieu Desnoyers intptr_t offset;
850c960e990SMathieu Desnoyers
851c960e990SMathieu Desnoyers offset = buffer->c[cpu].offset;
852c960e990SMathieu Desnoyers if (offset == 0)
853c960e990SMathieu Desnoyers return NULL;
854c960e990SMathieu Desnoyers head = buffer->c[cpu].array[offset - 1];
855c960e990SMathieu Desnoyers buffer->c[cpu].offset = offset - 1;
856c960e990SMathieu Desnoyers return head;
857c960e990SMathieu Desnoyers }
858c960e990SMathieu Desnoyers
test_percpu_buffer_thread(void * arg)859c960e990SMathieu Desnoyers void *test_percpu_buffer_thread(void *arg)
860c960e990SMathieu Desnoyers {
861c960e990SMathieu Desnoyers long long i, reps;
862c960e990SMathieu Desnoyers struct percpu_buffer *buffer = (struct percpu_buffer *)arg;
863c960e990SMathieu Desnoyers
864c960e990SMathieu Desnoyers if (!opt_disable_rseq && rseq_register_current_thread())
865c960e990SMathieu Desnoyers abort();
866c960e990SMathieu Desnoyers
867c960e990SMathieu Desnoyers reps = opt_reps;
868c960e990SMathieu Desnoyers for (i = 0; i < reps; i++) {
869c960e990SMathieu Desnoyers struct percpu_buffer_node *node;
870c960e990SMathieu Desnoyers
871c960e990SMathieu Desnoyers node = this_cpu_buffer_pop(buffer, NULL);
872c960e990SMathieu Desnoyers if (opt_yield)
873c960e990SMathieu Desnoyers sched_yield(); /* encourage shuffling */
874c960e990SMathieu Desnoyers if (node) {
875c960e990SMathieu Desnoyers if (!this_cpu_buffer_push(buffer, node, NULL)) {
876c960e990SMathieu Desnoyers /* Should increase buffer size. */
877c960e990SMathieu Desnoyers abort();
878c960e990SMathieu Desnoyers }
879c960e990SMathieu Desnoyers }
880c960e990SMathieu Desnoyers }
881c960e990SMathieu Desnoyers
882c960e990SMathieu Desnoyers printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
8838df34c56SMathieu Desnoyers (int) rseq_gettid(), nr_abort, signals_delivered);
884c960e990SMathieu Desnoyers if (!opt_disable_rseq && rseq_unregister_current_thread())
885c960e990SMathieu Desnoyers abort();
886c960e990SMathieu Desnoyers
887c960e990SMathieu Desnoyers return NULL;
888c960e990SMathieu Desnoyers }
889c960e990SMathieu Desnoyers
890c960e990SMathieu Desnoyers /* Simultaneous modification to a per-cpu buffer from many threads. */
test_percpu_buffer(void)891c960e990SMathieu Desnoyers void test_percpu_buffer(void)
892c960e990SMathieu Desnoyers {
893c960e990SMathieu Desnoyers const int num_threads = opt_threads;
894c960e990SMathieu Desnoyers int i, j, ret;
895c960e990SMathieu Desnoyers uint64_t sum = 0, expected_sum = 0;
896c960e990SMathieu Desnoyers struct percpu_buffer buffer;
897c960e990SMathieu Desnoyers pthread_t test_threads[num_threads];
898c960e990SMathieu Desnoyers cpu_set_t allowed_cpus;
899c960e990SMathieu Desnoyers
900c960e990SMathieu Desnoyers memset(&buffer, 0, sizeof(buffer));
901c960e990SMathieu Desnoyers
902c960e990SMathieu Desnoyers /* Generate list entries for every usable cpu. */
903c960e990SMathieu Desnoyers sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
904c960e990SMathieu Desnoyers for (i = 0; i < CPU_SETSIZE; i++) {
905c960e990SMathieu Desnoyers if (!CPU_ISSET(i, &allowed_cpus))
906c960e990SMathieu Desnoyers continue;
907c960e990SMathieu Desnoyers /* Worse-case is every item in same CPU. */
908c960e990SMathieu Desnoyers buffer.c[i].array =
909c960e990SMathieu Desnoyers malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
910c960e990SMathieu Desnoyers BUFFER_ITEM_PER_CPU);
911c960e990SMathieu Desnoyers assert(buffer.c[i].array);
912c960e990SMathieu Desnoyers buffer.c[i].buflen = CPU_SETSIZE * BUFFER_ITEM_PER_CPU;
913c960e990SMathieu Desnoyers for (j = 1; j <= BUFFER_ITEM_PER_CPU; j++) {
914c960e990SMathieu Desnoyers struct percpu_buffer_node *node;
915c960e990SMathieu Desnoyers
916c960e990SMathieu Desnoyers expected_sum += j;
917c960e990SMathieu Desnoyers
918c960e990SMathieu Desnoyers /*
919c960e990SMathieu Desnoyers * We could theoretically put the word-sized
920c960e990SMathieu Desnoyers * "data" directly in the buffer. However, we
921c960e990SMathieu Desnoyers * want to model objects that would not fit
922c960e990SMathieu Desnoyers * within a single word, so allocate an object
923c960e990SMathieu Desnoyers * for each node.
924c960e990SMathieu Desnoyers */
925c960e990SMathieu Desnoyers node = malloc(sizeof(*node));
926c960e990SMathieu Desnoyers assert(node);
927c960e990SMathieu Desnoyers node->data = j;
928c960e990SMathieu Desnoyers buffer.c[i].array[j - 1] = node;
929c960e990SMathieu Desnoyers buffer.c[i].offset++;
930c960e990SMathieu Desnoyers }
931c960e990SMathieu Desnoyers }
932c960e990SMathieu Desnoyers
933c960e990SMathieu Desnoyers for (i = 0; i < num_threads; i++) {
934c960e990SMathieu Desnoyers ret = pthread_create(&test_threads[i], NULL,
935c960e990SMathieu Desnoyers test_percpu_buffer_thread, &buffer);
936c960e990SMathieu Desnoyers if (ret) {
937c960e990SMathieu Desnoyers errno = ret;
938c960e990SMathieu Desnoyers perror("pthread_create");
939c960e990SMathieu Desnoyers abort();
940c960e990SMathieu Desnoyers }
941c960e990SMathieu Desnoyers }
942c960e990SMathieu Desnoyers
943c960e990SMathieu Desnoyers for (i = 0; i < num_threads; i++) {
944c960e990SMathieu Desnoyers ret = pthread_join(test_threads[i], NULL);
945c960e990SMathieu Desnoyers if (ret) {
946c960e990SMathieu Desnoyers errno = ret;
947c960e990SMathieu Desnoyers perror("pthread_join");
948c960e990SMathieu Desnoyers abort();
949c960e990SMathieu Desnoyers }
950c960e990SMathieu Desnoyers }
951c960e990SMathieu Desnoyers
952c960e990SMathieu Desnoyers for (i = 0; i < CPU_SETSIZE; i++) {
953c960e990SMathieu Desnoyers struct percpu_buffer_node *node;
954c960e990SMathieu Desnoyers
955c960e990SMathieu Desnoyers if (!CPU_ISSET(i, &allowed_cpus))
956c960e990SMathieu Desnoyers continue;
957c960e990SMathieu Desnoyers
958c960e990SMathieu Desnoyers while ((node = __percpu_buffer_pop(&buffer, i))) {
959c960e990SMathieu Desnoyers sum += node->data;
960c960e990SMathieu Desnoyers free(node);
961c960e990SMathieu Desnoyers }
962c960e990SMathieu Desnoyers free(buffer.c[i].array);
963c960e990SMathieu Desnoyers }
964c960e990SMathieu Desnoyers
965c960e990SMathieu Desnoyers /*
966c960e990SMathieu Desnoyers * All entries should now be accounted for (unless some external
967c960e990SMathieu Desnoyers * actor is interfering with our allowed affinity while this
968c960e990SMathieu Desnoyers * test is running).
969c960e990SMathieu Desnoyers */
970c960e990SMathieu Desnoyers assert(sum == expected_sum);
971c960e990SMathieu Desnoyers }
972c960e990SMathieu Desnoyers
this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer * buffer,struct percpu_memcpy_buffer_node item,int * _cpu)973c960e990SMathieu Desnoyers bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer *buffer,
974c960e990SMathieu Desnoyers struct percpu_memcpy_buffer_node item,
975c960e990SMathieu Desnoyers int *_cpu)
976c960e990SMathieu Desnoyers {
977c960e990SMathieu Desnoyers bool result = false;
978c960e990SMathieu Desnoyers int cpu;
979c960e990SMathieu Desnoyers
980c960e990SMathieu Desnoyers for (;;) {
981c960e990SMathieu Desnoyers intptr_t *targetptr_final, newval_final, offset;
982c960e990SMathieu Desnoyers char *destptr, *srcptr;
983c960e990SMathieu Desnoyers size_t copylen;
984c960e990SMathieu Desnoyers int ret;
985c960e990SMathieu Desnoyers
986ee31fff0SMathieu Desnoyers cpu = get_current_cpu_id();
987c960e990SMathieu Desnoyers /* Load offset with single-copy atomicity. */
988c960e990SMathieu Desnoyers offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
989c960e990SMathieu Desnoyers if (offset == buffer->c[cpu].buflen)
990c960e990SMathieu Desnoyers break;
991c960e990SMathieu Desnoyers destptr = (char *)&buffer->c[cpu].array[offset];
992c960e990SMathieu Desnoyers srcptr = (char *)&item;
993c960e990SMathieu Desnoyers /* copylen must be <= 4kB. */
994c960e990SMathieu Desnoyers copylen = sizeof(item);
995c960e990SMathieu Desnoyers newval_final = offset + 1;
996c960e990SMathieu Desnoyers targetptr_final = &buffer->c[cpu].offset;
997ee31fff0SMathieu Desnoyers ret = rseq_cmpeqv_trymemcpy_storev(
998ee31fff0SMathieu Desnoyers opt_mo, RSEQ_PERCPU,
999c960e990SMathieu Desnoyers targetptr_final, offset,
1000c960e990SMathieu Desnoyers destptr, srcptr, copylen,
1001c960e990SMathieu Desnoyers newval_final, cpu);
1002c960e990SMathieu Desnoyers if (rseq_likely(!ret)) {
1003c960e990SMathieu Desnoyers result = true;
1004c960e990SMathieu Desnoyers break;
1005c960e990SMathieu Desnoyers }
1006c960e990SMathieu Desnoyers /* Retry if comparison fails or rseq aborts. */
1007c960e990SMathieu Desnoyers }
1008c960e990SMathieu Desnoyers if (_cpu)
1009c960e990SMathieu Desnoyers *_cpu = cpu;
1010c960e990SMathieu Desnoyers return result;
1011c960e990SMathieu Desnoyers }
1012c960e990SMathieu Desnoyers
this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer * buffer,struct percpu_memcpy_buffer_node * item,int * _cpu)1013c960e990SMathieu Desnoyers bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
1014c960e990SMathieu Desnoyers struct percpu_memcpy_buffer_node *item,
1015c960e990SMathieu Desnoyers int *_cpu)
1016c960e990SMathieu Desnoyers {
1017c960e990SMathieu Desnoyers bool result = false;
1018c960e990SMathieu Desnoyers int cpu;
1019c960e990SMathieu Desnoyers
1020c960e990SMathieu Desnoyers for (;;) {
1021c960e990SMathieu Desnoyers intptr_t *targetptr_final, newval_final, offset;
1022c960e990SMathieu Desnoyers char *destptr, *srcptr;
1023c960e990SMathieu Desnoyers size_t copylen;
1024c960e990SMathieu Desnoyers int ret;
1025c960e990SMathieu Desnoyers
1026ee31fff0SMathieu Desnoyers cpu = get_current_cpu_id();
1027c960e990SMathieu Desnoyers /* Load offset with single-copy atomicity. */
1028c960e990SMathieu Desnoyers offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
1029c960e990SMathieu Desnoyers if (offset == 0)
1030c960e990SMathieu Desnoyers break;
1031c960e990SMathieu Desnoyers destptr = (char *)item;
1032c960e990SMathieu Desnoyers srcptr = (char *)&buffer->c[cpu].array[offset - 1];
1033c960e990SMathieu Desnoyers /* copylen must be <= 4kB. */
1034c960e990SMathieu Desnoyers copylen = sizeof(*item);
1035c960e990SMathieu Desnoyers newval_final = offset - 1;
1036c960e990SMathieu Desnoyers targetptr_final = &buffer->c[cpu].offset;
1037ee31fff0SMathieu Desnoyers ret = rseq_cmpeqv_trymemcpy_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU,
1038ee31fff0SMathieu Desnoyers targetptr_final, offset, destptr, srcptr, copylen,
1039c960e990SMathieu Desnoyers newval_final, cpu);
1040c960e990SMathieu Desnoyers if (rseq_likely(!ret)) {
1041c960e990SMathieu Desnoyers result = true;
1042c960e990SMathieu Desnoyers break;
1043c960e990SMathieu Desnoyers }
1044c960e990SMathieu Desnoyers /* Retry if comparison fails or rseq aborts. */
1045c960e990SMathieu Desnoyers }
1046c960e990SMathieu Desnoyers if (_cpu)
1047c960e990SMathieu Desnoyers *_cpu = cpu;
1048c960e990SMathieu Desnoyers return result;
1049c960e990SMathieu Desnoyers }
1050c960e990SMathieu Desnoyers
1051c960e990SMathieu Desnoyers /*
1052c960e990SMathieu Desnoyers * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should
1053c960e990SMathieu Desnoyers * only be used on buffers that are not concurrently modified.
1054c960e990SMathieu Desnoyers */
__percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer * buffer,struct percpu_memcpy_buffer_node * item,int cpu)1055c960e990SMathieu Desnoyers bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
1056c960e990SMathieu Desnoyers struct percpu_memcpy_buffer_node *item,
1057c960e990SMathieu Desnoyers int cpu)
1058c960e990SMathieu Desnoyers {
1059c960e990SMathieu Desnoyers intptr_t offset;
1060c960e990SMathieu Desnoyers
1061c960e990SMathieu Desnoyers offset = buffer->c[cpu].offset;
1062c960e990SMathieu Desnoyers if (offset == 0)
1063c960e990SMathieu Desnoyers return false;
1064c960e990SMathieu Desnoyers memcpy(item, &buffer->c[cpu].array[offset - 1], sizeof(*item));
1065c960e990SMathieu Desnoyers buffer->c[cpu].offset = offset - 1;
1066c960e990SMathieu Desnoyers return true;
1067c960e990SMathieu Desnoyers }
1068c960e990SMathieu Desnoyers
test_percpu_memcpy_buffer_thread(void * arg)1069c960e990SMathieu Desnoyers void *test_percpu_memcpy_buffer_thread(void *arg)
1070c960e990SMathieu Desnoyers {
1071c960e990SMathieu Desnoyers long long i, reps;
1072c960e990SMathieu Desnoyers struct percpu_memcpy_buffer *buffer = (struct percpu_memcpy_buffer *)arg;
1073c960e990SMathieu Desnoyers
1074c960e990SMathieu Desnoyers if (!opt_disable_rseq && rseq_register_current_thread())
1075c960e990SMathieu Desnoyers abort();
1076c960e990SMathieu Desnoyers
1077c960e990SMathieu Desnoyers reps = opt_reps;
1078c960e990SMathieu Desnoyers for (i = 0; i < reps; i++) {
1079c960e990SMathieu Desnoyers struct percpu_memcpy_buffer_node item;
1080c960e990SMathieu Desnoyers bool result;
1081c960e990SMathieu Desnoyers
1082c960e990SMathieu Desnoyers result = this_cpu_memcpy_buffer_pop(buffer, &item, NULL);
1083c960e990SMathieu Desnoyers if (opt_yield)
1084c960e990SMathieu Desnoyers sched_yield(); /* encourage shuffling */
1085c960e990SMathieu Desnoyers if (result) {
1086c960e990SMathieu Desnoyers if (!this_cpu_memcpy_buffer_push(buffer, item, NULL)) {
1087c960e990SMathieu Desnoyers /* Should increase buffer size. */
1088c960e990SMathieu Desnoyers abort();
1089c960e990SMathieu Desnoyers }
1090c960e990SMathieu Desnoyers }
1091c960e990SMathieu Desnoyers }
1092c960e990SMathieu Desnoyers
1093c960e990SMathieu Desnoyers printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
10948df34c56SMathieu Desnoyers (int) rseq_gettid(), nr_abort, signals_delivered);
1095c960e990SMathieu Desnoyers if (!opt_disable_rseq && rseq_unregister_current_thread())
1096c960e990SMathieu Desnoyers abort();
1097c960e990SMathieu Desnoyers
1098c960e990SMathieu Desnoyers return NULL;
1099c960e990SMathieu Desnoyers }
1100c960e990SMathieu Desnoyers
1101c960e990SMathieu Desnoyers /* Simultaneous modification to a per-cpu buffer from many threads. */
test_percpu_memcpy_buffer(void)1102c960e990SMathieu Desnoyers void test_percpu_memcpy_buffer(void)
1103c960e990SMathieu Desnoyers {
1104c960e990SMathieu Desnoyers const int num_threads = opt_threads;
1105c960e990SMathieu Desnoyers int i, j, ret;
1106c960e990SMathieu Desnoyers uint64_t sum = 0, expected_sum = 0;
1107c960e990SMathieu Desnoyers struct percpu_memcpy_buffer buffer;
1108c960e990SMathieu Desnoyers pthread_t test_threads[num_threads];
1109c960e990SMathieu Desnoyers cpu_set_t allowed_cpus;
1110c960e990SMathieu Desnoyers
1111c960e990SMathieu Desnoyers memset(&buffer, 0, sizeof(buffer));
1112c960e990SMathieu Desnoyers
1113c960e990SMathieu Desnoyers /* Generate list entries for every usable cpu. */
1114c960e990SMathieu Desnoyers sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
1115c960e990SMathieu Desnoyers for (i = 0; i < CPU_SETSIZE; i++) {
1116c960e990SMathieu Desnoyers if (!CPU_ISSET(i, &allowed_cpus))
1117c960e990SMathieu Desnoyers continue;
1118c960e990SMathieu Desnoyers /* Worse-case is every item in same CPU. */
1119c960e990SMathieu Desnoyers buffer.c[i].array =
1120c960e990SMathieu Desnoyers malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
1121c960e990SMathieu Desnoyers MEMCPY_BUFFER_ITEM_PER_CPU);
1122c960e990SMathieu Desnoyers assert(buffer.c[i].array);
1123c960e990SMathieu Desnoyers buffer.c[i].buflen = CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU;
1124c960e990SMathieu Desnoyers for (j = 1; j <= MEMCPY_BUFFER_ITEM_PER_CPU; j++) {
1125c960e990SMathieu Desnoyers expected_sum += 2 * j + 1;
1126c960e990SMathieu Desnoyers
1127c960e990SMathieu Desnoyers /*
1128c960e990SMathieu Desnoyers * We could theoretically put the word-sized
1129c960e990SMathieu Desnoyers * "data" directly in the buffer. However, we
1130c960e990SMathieu Desnoyers * want to model objects that would not fit
1131c960e990SMathieu Desnoyers * within a single word, so allocate an object
1132c960e990SMathieu Desnoyers * for each node.
1133c960e990SMathieu Desnoyers */
1134c960e990SMathieu Desnoyers buffer.c[i].array[j - 1].data1 = j;
1135c960e990SMathieu Desnoyers buffer.c[i].array[j - 1].data2 = j + 1;
1136c960e990SMathieu Desnoyers buffer.c[i].offset++;
1137c960e990SMathieu Desnoyers }
1138c960e990SMathieu Desnoyers }
1139c960e990SMathieu Desnoyers
1140c960e990SMathieu Desnoyers for (i = 0; i < num_threads; i++) {
1141c960e990SMathieu Desnoyers ret = pthread_create(&test_threads[i], NULL,
1142c960e990SMathieu Desnoyers test_percpu_memcpy_buffer_thread,
1143c960e990SMathieu Desnoyers &buffer);
1144c960e990SMathieu Desnoyers if (ret) {
1145c960e990SMathieu Desnoyers errno = ret;
1146c960e990SMathieu Desnoyers perror("pthread_create");
1147c960e990SMathieu Desnoyers abort();
1148c960e990SMathieu Desnoyers }
1149c960e990SMathieu Desnoyers }
1150c960e990SMathieu Desnoyers
1151c960e990SMathieu Desnoyers for (i = 0; i < num_threads; i++) {
1152c960e990SMathieu Desnoyers ret = pthread_join(test_threads[i], NULL);
1153c960e990SMathieu Desnoyers if (ret) {
1154c960e990SMathieu Desnoyers errno = ret;
1155c960e990SMathieu Desnoyers perror("pthread_join");
1156c960e990SMathieu Desnoyers abort();
1157c960e990SMathieu Desnoyers }
1158c960e990SMathieu Desnoyers }
1159c960e990SMathieu Desnoyers
1160c960e990SMathieu Desnoyers for (i = 0; i < CPU_SETSIZE; i++) {
1161c960e990SMathieu Desnoyers struct percpu_memcpy_buffer_node item;
1162c960e990SMathieu Desnoyers
1163c960e990SMathieu Desnoyers if (!CPU_ISSET(i, &allowed_cpus))
1164c960e990SMathieu Desnoyers continue;
1165c960e990SMathieu Desnoyers
1166c960e990SMathieu Desnoyers while (__percpu_memcpy_buffer_pop(&buffer, &item, i)) {
1167c960e990SMathieu Desnoyers sum += item.data1;
1168c960e990SMathieu Desnoyers sum += item.data2;
1169c960e990SMathieu Desnoyers }
1170c960e990SMathieu Desnoyers free(buffer.c[i].array);
1171c960e990SMathieu Desnoyers }
1172c960e990SMathieu Desnoyers
1173c960e990SMathieu Desnoyers /*
1174c960e990SMathieu Desnoyers * All entries should now be accounted for (unless some external
1175c960e990SMathieu Desnoyers * actor is interfering with our allowed affinity while this
1176c960e990SMathieu Desnoyers * test is running).
1177c960e990SMathieu Desnoyers */
1178c960e990SMathieu Desnoyers assert(sum == expected_sum);
1179c960e990SMathieu Desnoyers }
1180c960e990SMathieu Desnoyers
test_signal_interrupt_handler(int signo)1181c960e990SMathieu Desnoyers static void test_signal_interrupt_handler(int signo)
1182c960e990SMathieu Desnoyers {
1183c960e990SMathieu Desnoyers signals_delivered++;
1184c960e990SMathieu Desnoyers }
1185c960e990SMathieu Desnoyers
set_signal_handler(void)1186c960e990SMathieu Desnoyers static int set_signal_handler(void)
1187c960e990SMathieu Desnoyers {
1188c960e990SMathieu Desnoyers int ret = 0;
1189c960e990SMathieu Desnoyers struct sigaction sa;
1190c960e990SMathieu Desnoyers sigset_t sigset;
1191c960e990SMathieu Desnoyers
1192c960e990SMathieu Desnoyers ret = sigemptyset(&sigset);
1193c960e990SMathieu Desnoyers if (ret < 0) {
1194c960e990SMathieu Desnoyers perror("sigemptyset");
1195c960e990SMathieu Desnoyers return ret;
1196c960e990SMathieu Desnoyers }
1197c960e990SMathieu Desnoyers
1198c960e990SMathieu Desnoyers sa.sa_handler = test_signal_interrupt_handler;
1199c960e990SMathieu Desnoyers sa.sa_mask = sigset;
1200c960e990SMathieu Desnoyers sa.sa_flags = 0;
1201c960e990SMathieu Desnoyers ret = sigaction(SIGUSR1, &sa, NULL);
1202c960e990SMathieu Desnoyers if (ret < 0) {
1203c960e990SMathieu Desnoyers perror("sigaction");
1204c960e990SMathieu Desnoyers return ret;
1205c960e990SMathieu Desnoyers }
1206c960e990SMathieu Desnoyers
1207c960e990SMathieu Desnoyers printf_verbose("Signal handler set for SIGUSR1\n");
1208c960e990SMathieu Desnoyers
1209c960e990SMathieu Desnoyers return ret;
1210c960e990SMathieu Desnoyers }
1211c960e990SMathieu Desnoyers
12126f39cecdSXingxing Su /* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */
1213ee31fff0SMathieu Desnoyers #ifdef TEST_MEMBARRIER
1214f166b111SPeter Oskolkov struct test_membarrier_thread_args {
1215f166b111SPeter Oskolkov int stop;
1216f166b111SPeter Oskolkov intptr_t percpu_list_ptr;
1217f166b111SPeter Oskolkov };
1218f166b111SPeter Oskolkov
1219f166b111SPeter Oskolkov /* Worker threads modify data in their "active" percpu lists. */
test_membarrier_worker_thread(void * arg)1220f166b111SPeter Oskolkov void *test_membarrier_worker_thread(void *arg)
1221f166b111SPeter Oskolkov {
1222f166b111SPeter Oskolkov struct test_membarrier_thread_args *args =
1223f166b111SPeter Oskolkov (struct test_membarrier_thread_args *)arg;
1224f166b111SPeter Oskolkov const int iters = opt_reps;
1225f166b111SPeter Oskolkov int i;
1226f166b111SPeter Oskolkov
1227f166b111SPeter Oskolkov if (rseq_register_current_thread()) {
1228f166b111SPeter Oskolkov fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1229f166b111SPeter Oskolkov errno, strerror(errno));
1230f166b111SPeter Oskolkov abort();
1231f166b111SPeter Oskolkov }
1232f166b111SPeter Oskolkov
1233f166b111SPeter Oskolkov /* Wait for initialization. */
1234f166b111SPeter Oskolkov while (!atomic_load(&args->percpu_list_ptr)) {}
1235f166b111SPeter Oskolkov
1236f166b111SPeter Oskolkov for (i = 0; i < iters; ++i) {
1237f166b111SPeter Oskolkov int ret;
1238f166b111SPeter Oskolkov
1239f166b111SPeter Oskolkov do {
1240ee31fff0SMathieu Desnoyers int cpu = get_current_cpu_id();
1241f166b111SPeter Oskolkov
1242ee31fff0SMathieu Desnoyers ret = rseq_offset_deref_addv(RSEQ_MO_RELAXED, RSEQ_PERCPU,
1243ee31fff0SMathieu Desnoyers &args->percpu_list_ptr,
1244f166b111SPeter Oskolkov sizeof(struct percpu_list_entry) * cpu, 1, cpu);
1245f166b111SPeter Oskolkov } while (rseq_unlikely(ret));
1246f166b111SPeter Oskolkov }
1247f166b111SPeter Oskolkov
1248f166b111SPeter Oskolkov if (rseq_unregister_current_thread()) {
1249f166b111SPeter Oskolkov fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1250f166b111SPeter Oskolkov errno, strerror(errno));
1251f166b111SPeter Oskolkov abort();
1252f166b111SPeter Oskolkov }
1253f166b111SPeter Oskolkov return NULL;
1254f166b111SPeter Oskolkov }
1255f166b111SPeter Oskolkov
test_membarrier_init_percpu_list(struct percpu_list * list)1256f166b111SPeter Oskolkov void test_membarrier_init_percpu_list(struct percpu_list *list)
1257f166b111SPeter Oskolkov {
1258f166b111SPeter Oskolkov int i;
1259f166b111SPeter Oskolkov
1260f166b111SPeter Oskolkov memset(list, 0, sizeof(*list));
1261f166b111SPeter Oskolkov for (i = 0; i < CPU_SETSIZE; i++) {
1262f166b111SPeter Oskolkov struct percpu_list_node *node;
1263f166b111SPeter Oskolkov
1264f166b111SPeter Oskolkov node = malloc(sizeof(*node));
1265f166b111SPeter Oskolkov assert(node);
1266f166b111SPeter Oskolkov node->data = 0;
1267f166b111SPeter Oskolkov node->next = NULL;
1268f166b111SPeter Oskolkov list->c[i].head = node;
1269f166b111SPeter Oskolkov }
1270f166b111SPeter Oskolkov }
1271f166b111SPeter Oskolkov
test_membarrier_free_percpu_list(struct percpu_list * list)1272f166b111SPeter Oskolkov void test_membarrier_free_percpu_list(struct percpu_list *list)
1273f166b111SPeter Oskolkov {
1274f166b111SPeter Oskolkov int i;
1275f166b111SPeter Oskolkov
1276f166b111SPeter Oskolkov for (i = 0; i < CPU_SETSIZE; i++)
1277f166b111SPeter Oskolkov free(list->c[i].head);
1278f166b111SPeter Oskolkov }
1279f166b111SPeter Oskolkov
1280f166b111SPeter Oskolkov /*
1281f166b111SPeter Oskolkov * The manager thread swaps per-cpu lists that worker threads see,
1282f166b111SPeter Oskolkov * and validates that there are no unexpected modifications.
1283f166b111SPeter Oskolkov */
test_membarrier_manager_thread(void * arg)1284f166b111SPeter Oskolkov void *test_membarrier_manager_thread(void *arg)
1285f166b111SPeter Oskolkov {
1286f166b111SPeter Oskolkov struct test_membarrier_thread_args *args =
1287f166b111SPeter Oskolkov (struct test_membarrier_thread_args *)arg;
1288f166b111SPeter Oskolkov struct percpu_list list_a, list_b;
1289f166b111SPeter Oskolkov intptr_t expect_a = 0, expect_b = 0;
1290f166b111SPeter Oskolkov int cpu_a = 0, cpu_b = 0;
1291f166b111SPeter Oskolkov
1292f166b111SPeter Oskolkov if (rseq_register_current_thread()) {
1293f166b111SPeter Oskolkov fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1294f166b111SPeter Oskolkov errno, strerror(errno));
1295f166b111SPeter Oskolkov abort();
1296f166b111SPeter Oskolkov }
1297f166b111SPeter Oskolkov
1298f166b111SPeter Oskolkov /* Init lists. */
1299f166b111SPeter Oskolkov test_membarrier_init_percpu_list(&list_a);
1300f166b111SPeter Oskolkov test_membarrier_init_percpu_list(&list_b);
1301f166b111SPeter Oskolkov
1302f166b111SPeter Oskolkov atomic_store(&args->percpu_list_ptr, (intptr_t)&list_a);
1303f166b111SPeter Oskolkov
1304f166b111SPeter Oskolkov while (!atomic_load(&args->stop)) {
1305f166b111SPeter Oskolkov /* list_a is "active". */
1306f166b111SPeter Oskolkov cpu_a = rand() % CPU_SETSIZE;
1307f166b111SPeter Oskolkov /*
1308f166b111SPeter Oskolkov * As list_b is "inactive", we should never see changes
1309f166b111SPeter Oskolkov * to list_b.
1310f166b111SPeter Oskolkov */
1311f166b111SPeter Oskolkov if (expect_b != atomic_load(&list_b.c[cpu_b].head->data)) {
1312f166b111SPeter Oskolkov fprintf(stderr, "Membarrier test failed\n");
1313f166b111SPeter Oskolkov abort();
1314f166b111SPeter Oskolkov }
1315f166b111SPeter Oskolkov
1316f166b111SPeter Oskolkov /* Make list_b "active". */
1317f166b111SPeter Oskolkov atomic_store(&args->percpu_list_ptr, (intptr_t)&list_b);
1318ee31fff0SMathieu Desnoyers if (rseq_membarrier_expedited(cpu_a) &&
1319f166b111SPeter Oskolkov errno != ENXIO /* missing CPU */) {
1320f166b111SPeter Oskolkov perror("sys_membarrier");
1321f166b111SPeter Oskolkov abort();
1322f166b111SPeter Oskolkov }
1323f166b111SPeter Oskolkov /*
1324f166b111SPeter Oskolkov * Cpu A should now only modify list_b, so the values
1325f166b111SPeter Oskolkov * in list_a should be stable.
1326f166b111SPeter Oskolkov */
1327f166b111SPeter Oskolkov expect_a = atomic_load(&list_a.c[cpu_a].head->data);
1328f166b111SPeter Oskolkov
1329f166b111SPeter Oskolkov cpu_b = rand() % CPU_SETSIZE;
1330f166b111SPeter Oskolkov /*
1331f166b111SPeter Oskolkov * As list_a is "inactive", we should never see changes
1332f166b111SPeter Oskolkov * to list_a.
1333f166b111SPeter Oskolkov */
1334f166b111SPeter Oskolkov if (expect_a != atomic_load(&list_a.c[cpu_a].head->data)) {
1335f166b111SPeter Oskolkov fprintf(stderr, "Membarrier test failed\n");
1336f166b111SPeter Oskolkov abort();
1337f166b111SPeter Oskolkov }
1338f166b111SPeter Oskolkov
1339f166b111SPeter Oskolkov /* Make list_a "active". */
1340f166b111SPeter Oskolkov atomic_store(&args->percpu_list_ptr, (intptr_t)&list_a);
1341ee31fff0SMathieu Desnoyers if (rseq_membarrier_expedited(cpu_b) &&
1342f166b111SPeter Oskolkov errno != ENXIO /* missing CPU*/) {
1343f166b111SPeter Oskolkov perror("sys_membarrier");
1344f166b111SPeter Oskolkov abort();
1345f166b111SPeter Oskolkov }
1346f166b111SPeter Oskolkov /* Remember a value from list_b. */
1347f166b111SPeter Oskolkov expect_b = atomic_load(&list_b.c[cpu_b].head->data);
1348f166b111SPeter Oskolkov }
1349f166b111SPeter Oskolkov
1350f166b111SPeter Oskolkov test_membarrier_free_percpu_list(&list_a);
1351f166b111SPeter Oskolkov test_membarrier_free_percpu_list(&list_b);
1352f166b111SPeter Oskolkov
1353f166b111SPeter Oskolkov if (rseq_unregister_current_thread()) {
1354f166b111SPeter Oskolkov fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1355f166b111SPeter Oskolkov errno, strerror(errno));
1356f166b111SPeter Oskolkov abort();
1357f166b111SPeter Oskolkov }
1358f166b111SPeter Oskolkov return NULL;
1359f166b111SPeter Oskolkov }
1360f166b111SPeter Oskolkov
test_membarrier(void)1361f166b111SPeter Oskolkov void test_membarrier(void)
1362f166b111SPeter Oskolkov {
1363f166b111SPeter Oskolkov const int num_threads = opt_threads;
1364f166b111SPeter Oskolkov struct test_membarrier_thread_args thread_args;
1365f166b111SPeter Oskolkov pthread_t worker_threads[num_threads];
1366f166b111SPeter Oskolkov pthread_t manager_thread;
1367f166b111SPeter Oskolkov int i, ret;
1368f166b111SPeter Oskolkov
1369f166b111SPeter Oskolkov if (sys_membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, 0, 0)) {
1370f166b111SPeter Oskolkov perror("sys_membarrier");
1371f166b111SPeter Oskolkov abort();
1372f166b111SPeter Oskolkov }
1373f166b111SPeter Oskolkov
1374f166b111SPeter Oskolkov thread_args.stop = 0;
1375f166b111SPeter Oskolkov thread_args.percpu_list_ptr = 0;
1376f166b111SPeter Oskolkov ret = pthread_create(&manager_thread, NULL,
1377f166b111SPeter Oskolkov test_membarrier_manager_thread, &thread_args);
1378f166b111SPeter Oskolkov if (ret) {
1379f166b111SPeter Oskolkov errno = ret;
1380f166b111SPeter Oskolkov perror("pthread_create");
1381f166b111SPeter Oskolkov abort();
1382f166b111SPeter Oskolkov }
1383f166b111SPeter Oskolkov
1384f166b111SPeter Oskolkov for (i = 0; i < num_threads; i++) {
1385f166b111SPeter Oskolkov ret = pthread_create(&worker_threads[i], NULL,
1386f166b111SPeter Oskolkov test_membarrier_worker_thread, &thread_args);
1387f166b111SPeter Oskolkov if (ret) {
1388f166b111SPeter Oskolkov errno = ret;
1389f166b111SPeter Oskolkov perror("pthread_create");
1390f166b111SPeter Oskolkov abort();
1391f166b111SPeter Oskolkov }
1392f166b111SPeter Oskolkov }
1393f166b111SPeter Oskolkov
1394f166b111SPeter Oskolkov
1395f166b111SPeter Oskolkov for (i = 0; i < num_threads; i++) {
1396f166b111SPeter Oskolkov ret = pthread_join(worker_threads[i], NULL);
1397f166b111SPeter Oskolkov if (ret) {
1398f166b111SPeter Oskolkov errno = ret;
1399f166b111SPeter Oskolkov perror("pthread_join");
1400f166b111SPeter Oskolkov abort();
1401f166b111SPeter Oskolkov }
1402f166b111SPeter Oskolkov }
1403f166b111SPeter Oskolkov
1404f166b111SPeter Oskolkov atomic_store(&thread_args.stop, 1);
1405f166b111SPeter Oskolkov ret = pthread_join(manager_thread, NULL);
1406f166b111SPeter Oskolkov if (ret) {
1407f166b111SPeter Oskolkov errno = ret;
1408f166b111SPeter Oskolkov perror("pthread_join");
1409f166b111SPeter Oskolkov abort();
1410f166b111SPeter Oskolkov }
1411f166b111SPeter Oskolkov }
1412ee31fff0SMathieu Desnoyers #else /* TEST_MEMBARRIER */
test_membarrier(void)1413f166b111SPeter Oskolkov void test_membarrier(void)
1414f166b111SPeter Oskolkov {
1415f166b111SPeter Oskolkov fprintf(stderr, "rseq_offset_deref_addv is not implemented on this architecture. "
1416f166b111SPeter Oskolkov "Skipping membarrier test.\n");
1417f166b111SPeter Oskolkov }
1418f166b111SPeter Oskolkov #endif
1419f166b111SPeter Oskolkov
show_usage(int argc,char ** argv)1420c960e990SMathieu Desnoyers static void show_usage(int argc, char **argv)
1421c960e990SMathieu Desnoyers {
1422c960e990SMathieu Desnoyers printf("Usage : %s <OPTIONS>\n",
1423c960e990SMathieu Desnoyers argv[0]);
1424c960e990SMathieu Desnoyers printf("OPTIONS:\n");
1425c960e990SMathieu Desnoyers printf(" [-1 loops] Number of loops for delay injection 1\n");
1426c960e990SMathieu Desnoyers printf(" [-2 loops] Number of loops for delay injection 2\n");
1427c960e990SMathieu Desnoyers printf(" [-3 loops] Number of loops for delay injection 3\n");
1428c960e990SMathieu Desnoyers printf(" [-4 loops] Number of loops for delay injection 4\n");
1429c960e990SMathieu Desnoyers printf(" [-5 loops] Number of loops for delay injection 5\n");
1430c960e990SMathieu Desnoyers printf(" [-6 loops] Number of loops for delay injection 6\n");
1431c960e990SMathieu Desnoyers printf(" [-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n");
1432c960e990SMathieu Desnoyers printf(" [-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n");
1433c960e990SMathieu Desnoyers printf(" [-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n");
1434c960e990SMathieu Desnoyers printf(" [-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n");
1435c960e990SMathieu Desnoyers printf(" [-y] Yield\n");
1436c960e990SMathieu Desnoyers printf(" [-k] Kill thread with signal\n");
1437c960e990SMathieu Desnoyers printf(" [-s S] S: =0: disabled (default), >0: sleep time (ms)\n");
1438c960e990SMathieu Desnoyers printf(" [-t N] Number of threads (default 200)\n");
1439c960e990SMathieu Desnoyers printf(" [-r N] Number of repetitions per thread (default 5000)\n");
1440c960e990SMathieu Desnoyers printf(" [-d] Disable rseq system call (no initialization)\n");
1441c960e990SMathieu Desnoyers printf(" [-D M] Disable rseq for each M threads\n");
1442f166b111SPeter Oskolkov printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement, membarrie(r)\n");
1443c960e990SMathieu Desnoyers printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n");
1444c960e990SMathieu Desnoyers printf(" [-v] Verbose output.\n");
1445c960e990SMathieu Desnoyers printf(" [-h] Show this help.\n");
1446c960e990SMathieu Desnoyers printf("\n");
1447c960e990SMathieu Desnoyers }
1448c960e990SMathieu Desnoyers
main(int argc,char ** argv)1449c960e990SMathieu Desnoyers int main(int argc, char **argv)
1450c960e990SMathieu Desnoyers {
1451c960e990SMathieu Desnoyers int i;
1452c960e990SMathieu Desnoyers
1453c960e990SMathieu Desnoyers for (i = 1; i < argc; i++) {
1454c960e990SMathieu Desnoyers if (argv[i][0] != '-')
1455c960e990SMathieu Desnoyers continue;
1456c960e990SMathieu Desnoyers switch (argv[i][1]) {
1457c960e990SMathieu Desnoyers case '1':
1458c960e990SMathieu Desnoyers case '2':
1459c960e990SMathieu Desnoyers case '3':
1460c960e990SMathieu Desnoyers case '4':
1461c960e990SMathieu Desnoyers case '5':
1462c960e990SMathieu Desnoyers case '6':
1463c960e990SMathieu Desnoyers case '7':
1464c960e990SMathieu Desnoyers case '8':
1465c960e990SMathieu Desnoyers case '9':
1466c960e990SMathieu Desnoyers if (argc < i + 2) {
1467c960e990SMathieu Desnoyers show_usage(argc, argv);
1468c960e990SMathieu Desnoyers goto error;
1469c960e990SMathieu Desnoyers }
1470c960e990SMathieu Desnoyers loop_cnt[argv[i][1] - '0'] = atol(argv[i + 1]);
1471c960e990SMathieu Desnoyers i++;
1472c960e990SMathieu Desnoyers break;
1473c960e990SMathieu Desnoyers case 'm':
1474c960e990SMathieu Desnoyers if (argc < i + 2) {
1475c960e990SMathieu Desnoyers show_usage(argc, argv);
1476c960e990SMathieu Desnoyers goto error;
1477c960e990SMathieu Desnoyers }
1478c960e990SMathieu Desnoyers opt_modulo = atol(argv[i + 1]);
1479c960e990SMathieu Desnoyers if (opt_modulo < 0) {
1480c960e990SMathieu Desnoyers show_usage(argc, argv);
1481c960e990SMathieu Desnoyers goto error;
1482c960e990SMathieu Desnoyers }
1483c960e990SMathieu Desnoyers i++;
1484c960e990SMathieu Desnoyers break;
1485c960e990SMathieu Desnoyers case 's':
1486c960e990SMathieu Desnoyers if (argc < i + 2) {
1487c960e990SMathieu Desnoyers show_usage(argc, argv);
1488c960e990SMathieu Desnoyers goto error;
1489c960e990SMathieu Desnoyers }
1490c960e990SMathieu Desnoyers opt_sleep = atol(argv[i + 1]);
1491c960e990SMathieu Desnoyers if (opt_sleep < 0) {
1492c960e990SMathieu Desnoyers show_usage(argc, argv);
1493c960e990SMathieu Desnoyers goto error;
1494c960e990SMathieu Desnoyers }
1495c960e990SMathieu Desnoyers i++;
1496c960e990SMathieu Desnoyers break;
1497c960e990SMathieu Desnoyers case 'y':
1498c960e990SMathieu Desnoyers opt_yield = 1;
1499c960e990SMathieu Desnoyers break;
1500c960e990SMathieu Desnoyers case 'k':
1501c960e990SMathieu Desnoyers opt_signal = 1;
1502c960e990SMathieu Desnoyers break;
1503c960e990SMathieu Desnoyers case 'd':
1504c960e990SMathieu Desnoyers opt_disable_rseq = 1;
1505c960e990SMathieu Desnoyers break;
1506c960e990SMathieu Desnoyers case 'D':
1507c960e990SMathieu Desnoyers if (argc < i + 2) {
1508c960e990SMathieu Desnoyers show_usage(argc, argv);
1509c960e990SMathieu Desnoyers goto error;
1510c960e990SMathieu Desnoyers }
1511c960e990SMathieu Desnoyers opt_disable_mod = atol(argv[i + 1]);
1512c960e990SMathieu Desnoyers if (opt_disable_mod < 0) {
1513c960e990SMathieu Desnoyers show_usage(argc, argv);
1514c960e990SMathieu Desnoyers goto error;
1515c960e990SMathieu Desnoyers }
1516c960e990SMathieu Desnoyers i++;
1517c960e990SMathieu Desnoyers break;
1518c960e990SMathieu Desnoyers case 't':
1519c960e990SMathieu Desnoyers if (argc < i + 2) {
1520c960e990SMathieu Desnoyers show_usage(argc, argv);
1521c960e990SMathieu Desnoyers goto error;
1522c960e990SMathieu Desnoyers }
1523c960e990SMathieu Desnoyers opt_threads = atol(argv[i + 1]);
1524c960e990SMathieu Desnoyers if (opt_threads < 0) {
1525c960e990SMathieu Desnoyers show_usage(argc, argv);
1526c960e990SMathieu Desnoyers goto error;
1527c960e990SMathieu Desnoyers }
1528c960e990SMathieu Desnoyers i++;
1529c960e990SMathieu Desnoyers break;
1530c960e990SMathieu Desnoyers case 'r':
1531c960e990SMathieu Desnoyers if (argc < i + 2) {
1532c960e990SMathieu Desnoyers show_usage(argc, argv);
1533c960e990SMathieu Desnoyers goto error;
1534c960e990SMathieu Desnoyers }
1535c960e990SMathieu Desnoyers opt_reps = atoll(argv[i + 1]);
1536c960e990SMathieu Desnoyers if (opt_reps < 0) {
1537c960e990SMathieu Desnoyers show_usage(argc, argv);
1538c960e990SMathieu Desnoyers goto error;
1539c960e990SMathieu Desnoyers }
1540c960e990SMathieu Desnoyers i++;
1541c960e990SMathieu Desnoyers break;
1542c960e990SMathieu Desnoyers case 'h':
1543c960e990SMathieu Desnoyers show_usage(argc, argv);
1544c960e990SMathieu Desnoyers goto end;
1545c960e990SMathieu Desnoyers case 'T':
1546c960e990SMathieu Desnoyers if (argc < i + 2) {
1547c960e990SMathieu Desnoyers show_usage(argc, argv);
1548c960e990SMathieu Desnoyers goto error;
1549c960e990SMathieu Desnoyers }
1550c960e990SMathieu Desnoyers opt_test = *argv[i + 1];
1551c960e990SMathieu Desnoyers switch (opt_test) {
1552c960e990SMathieu Desnoyers case 's':
1553c960e990SMathieu Desnoyers case 'l':
1554c960e990SMathieu Desnoyers case 'i':
1555c960e990SMathieu Desnoyers case 'b':
1556c960e990SMathieu Desnoyers case 'm':
1557f166b111SPeter Oskolkov case 'r':
1558c960e990SMathieu Desnoyers break;
1559c960e990SMathieu Desnoyers default:
1560c960e990SMathieu Desnoyers show_usage(argc, argv);
1561c960e990SMathieu Desnoyers goto error;
1562c960e990SMathieu Desnoyers }
1563c960e990SMathieu Desnoyers i++;
1564c960e990SMathieu Desnoyers break;
1565c960e990SMathieu Desnoyers case 'v':
1566c960e990SMathieu Desnoyers verbose = 1;
1567c960e990SMathieu Desnoyers break;
1568c960e990SMathieu Desnoyers case 'M':
1569ee31fff0SMathieu Desnoyers opt_mo = RSEQ_MO_RELEASE;
1570c960e990SMathieu Desnoyers break;
1571c960e990SMathieu Desnoyers default:
1572c960e990SMathieu Desnoyers show_usage(argc, argv);
1573c960e990SMathieu Desnoyers goto error;
1574c960e990SMathieu Desnoyers }
1575c960e990SMathieu Desnoyers }
1576c960e990SMathieu Desnoyers
1577c960e990SMathieu Desnoyers loop_cnt_1 = loop_cnt[1];
1578c960e990SMathieu Desnoyers loop_cnt_2 = loop_cnt[2];
1579c960e990SMathieu Desnoyers loop_cnt_3 = loop_cnt[3];
1580c960e990SMathieu Desnoyers loop_cnt_4 = loop_cnt[4];
1581c960e990SMathieu Desnoyers loop_cnt_5 = loop_cnt[5];
1582c960e990SMathieu Desnoyers loop_cnt_6 = loop_cnt[6];
1583c960e990SMathieu Desnoyers
1584c960e990SMathieu Desnoyers if (set_signal_handler())
1585c960e990SMathieu Desnoyers goto error;
1586c960e990SMathieu Desnoyers
1587c960e990SMathieu Desnoyers if (!opt_disable_rseq && rseq_register_current_thread())
1588c960e990SMathieu Desnoyers goto error;
1589ee31fff0SMathieu Desnoyers if (!opt_disable_rseq && !rseq_validate_cpu_id()) {
1590ee31fff0SMathieu Desnoyers fprintf(stderr, "Error: cpu id getter unavailable\n");
1591ee31fff0SMathieu Desnoyers goto error;
1592ee31fff0SMathieu Desnoyers }
1593c960e990SMathieu Desnoyers switch (opt_test) {
1594c960e990SMathieu Desnoyers case 's':
1595c960e990SMathieu Desnoyers printf_verbose("spinlock\n");
1596c960e990SMathieu Desnoyers test_percpu_spinlock();
1597c960e990SMathieu Desnoyers break;
1598c960e990SMathieu Desnoyers case 'l':
1599c960e990SMathieu Desnoyers printf_verbose("linked list\n");
1600c960e990SMathieu Desnoyers test_percpu_list();
1601c960e990SMathieu Desnoyers break;
1602c960e990SMathieu Desnoyers case 'b':
1603c960e990SMathieu Desnoyers printf_verbose("buffer\n");
1604c960e990SMathieu Desnoyers test_percpu_buffer();
1605c960e990SMathieu Desnoyers break;
1606c960e990SMathieu Desnoyers case 'm':
1607c960e990SMathieu Desnoyers printf_verbose("memcpy buffer\n");
1608c960e990SMathieu Desnoyers test_percpu_memcpy_buffer();
1609c960e990SMathieu Desnoyers break;
1610c960e990SMathieu Desnoyers case 'i':
1611c960e990SMathieu Desnoyers printf_verbose("counter increment\n");
1612c960e990SMathieu Desnoyers test_percpu_inc();
1613c960e990SMathieu Desnoyers break;
1614f166b111SPeter Oskolkov case 'r':
1615f166b111SPeter Oskolkov printf_verbose("membarrier\n");
1616f166b111SPeter Oskolkov test_membarrier();
1617f166b111SPeter Oskolkov break;
1618c960e990SMathieu Desnoyers }
1619c960e990SMathieu Desnoyers if (!opt_disable_rseq && rseq_unregister_current_thread())
1620c960e990SMathieu Desnoyers abort();
1621c960e990SMathieu Desnoyers end:
1622c960e990SMathieu Desnoyers return 0;
1623c960e990SMathieu Desnoyers
1624c960e990SMathieu Desnoyers error:
1625c960e990SMathieu Desnoyers return -1;
1626c960e990SMathieu Desnoyers }
1627