1 // SPDX-License-Identifier: LGPL-2.1
2 #define _GNU_SOURCE
3 #include <assert.h>
4 #include <linux/membarrier.h>
5 #include <pthread.h>
6 #include <sched.h>
7 #include <stdatomic.h>
8 #include <stdint.h>
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <syscall.h>
13 #include <unistd.h>
14 #include <poll.h>
15 #include <sys/types.h>
16 #include <signal.h>
17 #include <errno.h>
18 #include <stddef.h>
19 
20 static inline pid_t rseq_gettid(void)
21 {
22 	return syscall(__NR_gettid);
23 }
24 
25 #define NR_INJECT	9
26 static int loop_cnt[NR_INJECT + 1];
27 
28 static int loop_cnt_1 asm("asm_loop_cnt_1") __attribute__((used));
29 static int loop_cnt_2 asm("asm_loop_cnt_2") __attribute__((used));
30 static int loop_cnt_3 asm("asm_loop_cnt_3") __attribute__((used));
31 static int loop_cnt_4 asm("asm_loop_cnt_4") __attribute__((used));
32 static int loop_cnt_5 asm("asm_loop_cnt_5") __attribute__((used));
33 static int loop_cnt_6 asm("asm_loop_cnt_6") __attribute__((used));
34 
35 static int opt_modulo, verbose;
36 
37 static int opt_yield, opt_signal, opt_sleep,
38 		opt_disable_rseq, opt_threads = 200,
39 		opt_disable_mod = 0, opt_test = 's', opt_mb = 0;
40 
41 #ifndef RSEQ_SKIP_FASTPATH
42 static long long opt_reps = 5000;
43 #else
44 static long long opt_reps = 100;
45 #endif
46 
47 static __thread __attribute__((tls_model("initial-exec")))
48 unsigned int signals_delivered;
49 
50 #ifndef BENCHMARK
51 
52 static __thread __attribute__((tls_model("initial-exec"), unused))
53 unsigned int yield_mod_cnt, nr_abort;
54 
55 #define printf_verbose(fmt, ...)			\
56 	do {						\
57 		if (verbose)				\
58 			printf(fmt, ## __VA_ARGS__);	\
59 	} while (0)
60 
61 #ifdef __i386__
62 
63 #define INJECT_ASM_REG	"eax"
64 
65 #define RSEQ_INJECT_CLOBBER \
66 	, INJECT_ASM_REG
67 
68 #define RSEQ_INJECT_ASM(n) \
69 	"mov asm_loop_cnt_" #n ", %%" INJECT_ASM_REG "\n\t" \
70 	"test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
71 	"jz 333f\n\t" \
72 	"222:\n\t" \
73 	"dec %%" INJECT_ASM_REG "\n\t" \
74 	"jnz 222b\n\t" \
75 	"333:\n\t"
76 
77 #elif defined(__x86_64__)
78 
79 #define INJECT_ASM_REG_P	"rax"
80 #define INJECT_ASM_REG		"eax"
81 
82 #define RSEQ_INJECT_CLOBBER \
83 	, INJECT_ASM_REG_P \
84 	, INJECT_ASM_REG
85 
86 #define RSEQ_INJECT_ASM(n) \
87 	"lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG_P "\n\t" \
88 	"mov (%%" INJECT_ASM_REG_P "), %%" INJECT_ASM_REG "\n\t" \
89 	"test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
90 	"jz 333f\n\t" \
91 	"222:\n\t" \
92 	"dec %%" INJECT_ASM_REG "\n\t" \
93 	"jnz 222b\n\t" \
94 	"333:\n\t"
95 
96 #elif defined(__s390__)
97 
98 #define RSEQ_INJECT_INPUT \
99 	, [loop_cnt_1]"m"(loop_cnt[1]) \
100 	, [loop_cnt_2]"m"(loop_cnt[2]) \
101 	, [loop_cnt_3]"m"(loop_cnt[3]) \
102 	, [loop_cnt_4]"m"(loop_cnt[4]) \
103 	, [loop_cnt_5]"m"(loop_cnt[5]) \
104 	, [loop_cnt_6]"m"(loop_cnt[6])
105 
106 #define INJECT_ASM_REG	"r12"
107 
108 #define RSEQ_INJECT_CLOBBER \
109 	, INJECT_ASM_REG
110 
111 #define RSEQ_INJECT_ASM(n) \
112 	"l %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
113 	"ltr %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG "\n\t" \
114 	"je 333f\n\t" \
115 	"222:\n\t" \
116 	"ahi %%" INJECT_ASM_REG ", -1\n\t" \
117 	"jnz 222b\n\t" \
118 	"333:\n\t"
119 
120 #elif defined(__ARMEL__)
121 
122 #define RSEQ_INJECT_INPUT \
123 	, [loop_cnt_1]"m"(loop_cnt[1]) \
124 	, [loop_cnt_2]"m"(loop_cnt[2]) \
125 	, [loop_cnt_3]"m"(loop_cnt[3]) \
126 	, [loop_cnt_4]"m"(loop_cnt[4]) \
127 	, [loop_cnt_5]"m"(loop_cnt[5]) \
128 	, [loop_cnt_6]"m"(loop_cnt[6])
129 
130 #define INJECT_ASM_REG	"r4"
131 
132 #define RSEQ_INJECT_CLOBBER \
133 	, INJECT_ASM_REG
134 
135 #define RSEQ_INJECT_ASM(n) \
136 	"ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
137 	"cmp " INJECT_ASM_REG ", #0\n\t" \
138 	"beq 333f\n\t" \
139 	"222:\n\t" \
140 	"subs " INJECT_ASM_REG ", #1\n\t" \
141 	"bne 222b\n\t" \
142 	"333:\n\t"
143 
144 #elif defined(__AARCH64EL__)
145 
146 #define RSEQ_INJECT_INPUT \
147 	, [loop_cnt_1] "Qo" (loop_cnt[1]) \
148 	, [loop_cnt_2] "Qo" (loop_cnt[2]) \
149 	, [loop_cnt_3] "Qo" (loop_cnt[3]) \
150 	, [loop_cnt_4] "Qo" (loop_cnt[4]) \
151 	, [loop_cnt_5] "Qo" (loop_cnt[5]) \
152 	, [loop_cnt_6] "Qo" (loop_cnt[6])
153 
154 #define INJECT_ASM_REG	RSEQ_ASM_TMP_REG32
155 
156 #define RSEQ_INJECT_ASM(n) \
157 	"	ldr	" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n"	\
158 	"	cbz	" INJECT_ASM_REG ", 333f\n"			\
159 	"222:\n"							\
160 	"	sub	" INJECT_ASM_REG ", " INJECT_ASM_REG ", #1\n"	\
161 	"	cbnz	" INJECT_ASM_REG ", 222b\n"			\
162 	"333:\n"
163 
164 #elif defined(__PPC__)
165 
166 #define RSEQ_INJECT_INPUT \
167 	, [loop_cnt_1]"m"(loop_cnt[1]) \
168 	, [loop_cnt_2]"m"(loop_cnt[2]) \
169 	, [loop_cnt_3]"m"(loop_cnt[3]) \
170 	, [loop_cnt_4]"m"(loop_cnt[4]) \
171 	, [loop_cnt_5]"m"(loop_cnt[5]) \
172 	, [loop_cnt_6]"m"(loop_cnt[6])
173 
174 #define INJECT_ASM_REG	"r18"
175 
176 #define RSEQ_INJECT_CLOBBER \
177 	, INJECT_ASM_REG
178 
179 #define RSEQ_INJECT_ASM(n) \
180 	"lwz %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
181 	"cmpwi %%" INJECT_ASM_REG ", 0\n\t" \
182 	"beq 333f\n\t" \
183 	"222:\n\t" \
184 	"subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \
185 	"bne 222b\n\t" \
186 	"333:\n\t"
187 
188 #elif defined(__mips__)
189 
190 #define RSEQ_INJECT_INPUT \
191 	, [loop_cnt_1]"m"(loop_cnt[1]) \
192 	, [loop_cnt_2]"m"(loop_cnt[2]) \
193 	, [loop_cnt_3]"m"(loop_cnt[3]) \
194 	, [loop_cnt_4]"m"(loop_cnt[4]) \
195 	, [loop_cnt_5]"m"(loop_cnt[5]) \
196 	, [loop_cnt_6]"m"(loop_cnt[6])
197 
198 #define INJECT_ASM_REG	"$5"
199 
200 #define RSEQ_INJECT_CLOBBER \
201 	, INJECT_ASM_REG
202 
203 #define RSEQ_INJECT_ASM(n) \
204 	"lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
205 	"beqz " INJECT_ASM_REG ", 333f\n\t" \
206 	"222:\n\t" \
207 	"addiu " INJECT_ASM_REG ", -1\n\t" \
208 	"bnez " INJECT_ASM_REG ", 222b\n\t" \
209 	"333:\n\t"
210 #elif defined(__riscv)
211 
212 #define RSEQ_INJECT_INPUT \
213 	, [loop_cnt_1]"m"(loop_cnt[1]) \
214 	, [loop_cnt_2]"m"(loop_cnt[2]) \
215 	, [loop_cnt_3]"m"(loop_cnt[3]) \
216 	, [loop_cnt_4]"m"(loop_cnt[4]) \
217 	, [loop_cnt_5]"m"(loop_cnt[5]) \
218 	, [loop_cnt_6]"m"(loop_cnt[6])
219 
220 #define INJECT_ASM_REG	"t1"
221 
222 #define RSEQ_INJECT_CLOBBER \
223 	, INJECT_ASM_REG
224 
225 #define RSEQ_INJECT_ASM(n)					\
226 	"lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t"		\
227 	"beqz " INJECT_ASM_REG ", 333f\n\t"			\
228 	"222:\n\t"						\
229 	"addi  " INJECT_ASM_REG "," INJECT_ASM_REG ", -1\n\t"	\
230 	"bnez " INJECT_ASM_REG ", 222b\n\t"			\
231 	"333:\n\t"
232 
233 
234 #else
235 #error unsupported target
236 #endif
237 
238 #define RSEQ_INJECT_FAILED \
239 	nr_abort++;
240 
241 #define RSEQ_INJECT_C(n) \
242 { \
243 	int loc_i, loc_nr_loops = loop_cnt[n]; \
244 	\
245 	for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \
246 		rseq_barrier(); \
247 	} \
248 	if (loc_nr_loops == -1 && opt_modulo) { \
249 		if (yield_mod_cnt == opt_modulo - 1) { \
250 			if (opt_sleep > 0) \
251 				poll(NULL, 0, opt_sleep); \
252 			if (opt_yield) \
253 				sched_yield(); \
254 			if (opt_signal) \
255 				raise(SIGUSR1); \
256 			yield_mod_cnt = 0; \
257 		} else { \
258 			yield_mod_cnt++; \
259 		} \
260 	} \
261 }
262 
263 #else
264 
265 #define printf_verbose(fmt, ...)
266 
267 #endif /* BENCHMARK */
268 
269 #include "rseq.h"
270 
271 struct percpu_lock_entry {
272 	intptr_t v;
273 } __attribute__((aligned(128)));
274 
275 struct percpu_lock {
276 	struct percpu_lock_entry c[CPU_SETSIZE];
277 };
278 
279 struct test_data_entry {
280 	intptr_t count;
281 } __attribute__((aligned(128)));
282 
283 struct spinlock_test_data {
284 	struct percpu_lock lock;
285 	struct test_data_entry c[CPU_SETSIZE];
286 };
287 
288 struct spinlock_thread_test_data {
289 	struct spinlock_test_data *data;
290 	long long reps;
291 	int reg;
292 };
293 
294 struct inc_test_data {
295 	struct test_data_entry c[CPU_SETSIZE];
296 };
297 
298 struct inc_thread_test_data {
299 	struct inc_test_data *data;
300 	long long reps;
301 	int reg;
302 };
303 
304 struct percpu_list_node {
305 	intptr_t data;
306 	struct percpu_list_node *next;
307 };
308 
309 struct percpu_list_entry {
310 	struct percpu_list_node *head;
311 } __attribute__((aligned(128)));
312 
313 struct percpu_list {
314 	struct percpu_list_entry c[CPU_SETSIZE];
315 };
316 
317 #define BUFFER_ITEM_PER_CPU	100
318 
319 struct percpu_buffer_node {
320 	intptr_t data;
321 };
322 
323 struct percpu_buffer_entry {
324 	intptr_t offset;
325 	intptr_t buflen;
326 	struct percpu_buffer_node **array;
327 } __attribute__((aligned(128)));
328 
329 struct percpu_buffer {
330 	struct percpu_buffer_entry c[CPU_SETSIZE];
331 };
332 
333 #define MEMCPY_BUFFER_ITEM_PER_CPU	100
334 
335 struct percpu_memcpy_buffer_node {
336 	intptr_t data1;
337 	uint64_t data2;
338 };
339 
340 struct percpu_memcpy_buffer_entry {
341 	intptr_t offset;
342 	intptr_t buflen;
343 	struct percpu_memcpy_buffer_node *array;
344 } __attribute__((aligned(128)));
345 
346 struct percpu_memcpy_buffer {
347 	struct percpu_memcpy_buffer_entry c[CPU_SETSIZE];
348 };
349 
350 /* A simple percpu spinlock. Grabs lock on current cpu. */
351 static int rseq_this_cpu_lock(struct percpu_lock *lock)
352 {
353 	int cpu;
354 
355 	for (;;) {
356 		int ret;
357 
358 		cpu = rseq_cpu_start();
359 		ret = rseq_cmpeqv_storev(&lock->c[cpu].v,
360 					 0, 1, cpu);
361 		if (rseq_likely(!ret))
362 			break;
363 		/* Retry if comparison fails or rseq aborts. */
364 	}
365 	/*
366 	 * Acquire semantic when taking lock after control dependency.
367 	 * Matches rseq_smp_store_release().
368 	 */
369 	rseq_smp_acquire__after_ctrl_dep();
370 	return cpu;
371 }
372 
373 static void rseq_percpu_unlock(struct percpu_lock *lock, int cpu)
374 {
375 	assert(lock->c[cpu].v == 1);
376 	/*
377 	 * Release lock, with release semantic. Matches
378 	 * rseq_smp_acquire__after_ctrl_dep().
379 	 */
380 	rseq_smp_store_release(&lock->c[cpu].v, 0);
381 }
382 
383 void *test_percpu_spinlock_thread(void *arg)
384 {
385 	struct spinlock_thread_test_data *thread_data = arg;
386 	struct spinlock_test_data *data = thread_data->data;
387 	long long i, reps;
388 
389 	if (!opt_disable_rseq && thread_data->reg &&
390 	    rseq_register_current_thread())
391 		abort();
392 	reps = thread_data->reps;
393 	for (i = 0; i < reps; i++) {
394 		int cpu = rseq_this_cpu_lock(&data->lock);
395 		data->c[cpu].count++;
396 		rseq_percpu_unlock(&data->lock, cpu);
397 #ifndef BENCHMARK
398 		if (i != 0 && !(i % (reps / 10)))
399 			printf_verbose("tid %d: count %lld\n",
400 				       (int) rseq_gettid(), i);
401 #endif
402 	}
403 	printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
404 		       (int) rseq_gettid(), nr_abort, signals_delivered);
405 	if (!opt_disable_rseq && thread_data->reg &&
406 	    rseq_unregister_current_thread())
407 		abort();
408 	return NULL;
409 }
410 
411 /*
412  * A simple test which implements a sharded counter using a per-cpu
413  * lock.  Obviously real applications might prefer to simply use a
414  * per-cpu increment; however, this is reasonable for a test and the
415  * lock can be extended to synchronize more complicated operations.
416  */
417 void test_percpu_spinlock(void)
418 {
419 	const int num_threads = opt_threads;
420 	int i, ret;
421 	uint64_t sum;
422 	pthread_t test_threads[num_threads];
423 	struct spinlock_test_data data;
424 	struct spinlock_thread_test_data thread_data[num_threads];
425 
426 	memset(&data, 0, sizeof(data));
427 	for (i = 0; i < num_threads; i++) {
428 		thread_data[i].reps = opt_reps;
429 		if (opt_disable_mod <= 0 || (i % opt_disable_mod))
430 			thread_data[i].reg = 1;
431 		else
432 			thread_data[i].reg = 0;
433 		thread_data[i].data = &data;
434 		ret = pthread_create(&test_threads[i], NULL,
435 				     test_percpu_spinlock_thread,
436 				     &thread_data[i]);
437 		if (ret) {
438 			errno = ret;
439 			perror("pthread_create");
440 			abort();
441 		}
442 	}
443 
444 	for (i = 0; i < num_threads; i++) {
445 		ret = pthread_join(test_threads[i], NULL);
446 		if (ret) {
447 			errno = ret;
448 			perror("pthread_join");
449 			abort();
450 		}
451 	}
452 
453 	sum = 0;
454 	for (i = 0; i < CPU_SETSIZE; i++)
455 		sum += data.c[i].count;
456 
457 	assert(sum == (uint64_t)opt_reps * num_threads);
458 }
459 
460 void *test_percpu_inc_thread(void *arg)
461 {
462 	struct inc_thread_test_data *thread_data = arg;
463 	struct inc_test_data *data = thread_data->data;
464 	long long i, reps;
465 
466 	if (!opt_disable_rseq && thread_data->reg &&
467 	    rseq_register_current_thread())
468 		abort();
469 	reps = thread_data->reps;
470 	for (i = 0; i < reps; i++) {
471 		int ret;
472 
473 		do {
474 			int cpu;
475 
476 			cpu = rseq_cpu_start();
477 			ret = rseq_addv(&data->c[cpu].count, 1, cpu);
478 		} while (rseq_unlikely(ret));
479 #ifndef BENCHMARK
480 		if (i != 0 && !(i % (reps / 10)))
481 			printf_verbose("tid %d: count %lld\n",
482 				       (int) rseq_gettid(), i);
483 #endif
484 	}
485 	printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
486 		       (int) rseq_gettid(), nr_abort, signals_delivered);
487 	if (!opt_disable_rseq && thread_data->reg &&
488 	    rseq_unregister_current_thread())
489 		abort();
490 	return NULL;
491 }
492 
493 void test_percpu_inc(void)
494 {
495 	const int num_threads = opt_threads;
496 	int i, ret;
497 	uint64_t sum;
498 	pthread_t test_threads[num_threads];
499 	struct inc_test_data data;
500 	struct inc_thread_test_data thread_data[num_threads];
501 
502 	memset(&data, 0, sizeof(data));
503 	for (i = 0; i < num_threads; i++) {
504 		thread_data[i].reps = opt_reps;
505 		if (opt_disable_mod <= 0 || (i % opt_disable_mod))
506 			thread_data[i].reg = 1;
507 		else
508 			thread_data[i].reg = 0;
509 		thread_data[i].data = &data;
510 		ret = pthread_create(&test_threads[i], NULL,
511 				     test_percpu_inc_thread,
512 				     &thread_data[i]);
513 		if (ret) {
514 			errno = ret;
515 			perror("pthread_create");
516 			abort();
517 		}
518 	}
519 
520 	for (i = 0; i < num_threads; i++) {
521 		ret = pthread_join(test_threads[i], NULL);
522 		if (ret) {
523 			errno = ret;
524 			perror("pthread_join");
525 			abort();
526 		}
527 	}
528 
529 	sum = 0;
530 	for (i = 0; i < CPU_SETSIZE; i++)
531 		sum += data.c[i].count;
532 
533 	assert(sum == (uint64_t)opt_reps * num_threads);
534 }
535 
536 void this_cpu_list_push(struct percpu_list *list,
537 			struct percpu_list_node *node,
538 			int *_cpu)
539 {
540 	int cpu;
541 
542 	for (;;) {
543 		intptr_t *targetptr, newval, expect;
544 		int ret;
545 
546 		cpu = rseq_cpu_start();
547 		/* Load list->c[cpu].head with single-copy atomicity. */
548 		expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head);
549 		newval = (intptr_t)node;
550 		targetptr = (intptr_t *)&list->c[cpu].head;
551 		node->next = (struct percpu_list_node *)expect;
552 		ret = rseq_cmpeqv_storev(targetptr, expect, newval, cpu);
553 		if (rseq_likely(!ret))
554 			break;
555 		/* Retry if comparison fails or rseq aborts. */
556 	}
557 	if (_cpu)
558 		*_cpu = cpu;
559 }
560 
561 /*
562  * Unlike a traditional lock-less linked list; the availability of a
563  * rseq primitive allows us to implement pop without concerns over
564  * ABA-type races.
565  */
566 struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list,
567 					   int *_cpu)
568 {
569 	struct percpu_list_node *node = NULL;
570 	int cpu;
571 
572 	for (;;) {
573 		struct percpu_list_node *head;
574 		intptr_t *targetptr, expectnot, *load;
575 		long offset;
576 		int ret;
577 
578 		cpu = rseq_cpu_start();
579 		targetptr = (intptr_t *)&list->c[cpu].head;
580 		expectnot = (intptr_t)NULL;
581 		offset = offsetof(struct percpu_list_node, next);
582 		load = (intptr_t *)&head;
583 		ret = rseq_cmpnev_storeoffp_load(targetptr, expectnot,
584 						   offset, load, cpu);
585 		if (rseq_likely(!ret)) {
586 			node = head;
587 			break;
588 		}
589 		if (ret > 0)
590 			break;
591 		/* Retry if rseq aborts. */
592 	}
593 	if (_cpu)
594 		*_cpu = cpu;
595 	return node;
596 }
597 
598 /*
599  * __percpu_list_pop is not safe against concurrent accesses. Should
600  * only be used on lists that are not concurrently modified.
601  */
602 struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu)
603 {
604 	struct percpu_list_node *node;
605 
606 	node = list->c[cpu].head;
607 	if (!node)
608 		return NULL;
609 	list->c[cpu].head = node->next;
610 	return node;
611 }
612 
613 void *test_percpu_list_thread(void *arg)
614 {
615 	long long i, reps;
616 	struct percpu_list *list = (struct percpu_list *)arg;
617 
618 	if (!opt_disable_rseq && rseq_register_current_thread())
619 		abort();
620 
621 	reps = opt_reps;
622 	for (i = 0; i < reps; i++) {
623 		struct percpu_list_node *node;
624 
625 		node = this_cpu_list_pop(list, NULL);
626 		if (opt_yield)
627 			sched_yield();  /* encourage shuffling */
628 		if (node)
629 			this_cpu_list_push(list, node, NULL);
630 	}
631 
632 	printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
633 		       (int) rseq_gettid(), nr_abort, signals_delivered);
634 	if (!opt_disable_rseq && rseq_unregister_current_thread())
635 		abort();
636 
637 	return NULL;
638 }
639 
640 /* Simultaneous modification to a per-cpu linked list from many threads.  */
641 void test_percpu_list(void)
642 {
643 	const int num_threads = opt_threads;
644 	int i, j, ret;
645 	uint64_t sum = 0, expected_sum = 0;
646 	struct percpu_list list;
647 	pthread_t test_threads[num_threads];
648 	cpu_set_t allowed_cpus;
649 
650 	memset(&list, 0, sizeof(list));
651 
652 	/* Generate list entries for every usable cpu. */
653 	sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
654 	for (i = 0; i < CPU_SETSIZE; i++) {
655 		if (!CPU_ISSET(i, &allowed_cpus))
656 			continue;
657 		for (j = 1; j <= 100; j++) {
658 			struct percpu_list_node *node;
659 
660 			expected_sum += j;
661 
662 			node = malloc(sizeof(*node));
663 			assert(node);
664 			node->data = j;
665 			node->next = list.c[i].head;
666 			list.c[i].head = node;
667 		}
668 	}
669 
670 	for (i = 0; i < num_threads; i++) {
671 		ret = pthread_create(&test_threads[i], NULL,
672 				     test_percpu_list_thread, &list);
673 		if (ret) {
674 			errno = ret;
675 			perror("pthread_create");
676 			abort();
677 		}
678 	}
679 
680 	for (i = 0; i < num_threads; i++) {
681 		ret = pthread_join(test_threads[i], NULL);
682 		if (ret) {
683 			errno = ret;
684 			perror("pthread_join");
685 			abort();
686 		}
687 	}
688 
689 	for (i = 0; i < CPU_SETSIZE; i++) {
690 		struct percpu_list_node *node;
691 
692 		if (!CPU_ISSET(i, &allowed_cpus))
693 			continue;
694 
695 		while ((node = __percpu_list_pop(&list, i))) {
696 			sum += node->data;
697 			free(node);
698 		}
699 	}
700 
701 	/*
702 	 * All entries should now be accounted for (unless some external
703 	 * actor is interfering with our allowed affinity while this
704 	 * test is running).
705 	 */
706 	assert(sum == expected_sum);
707 }
708 
709 bool this_cpu_buffer_push(struct percpu_buffer *buffer,
710 			  struct percpu_buffer_node *node,
711 			  int *_cpu)
712 {
713 	bool result = false;
714 	int cpu;
715 
716 	for (;;) {
717 		intptr_t *targetptr_spec, newval_spec;
718 		intptr_t *targetptr_final, newval_final;
719 		intptr_t offset;
720 		int ret;
721 
722 		cpu = rseq_cpu_start();
723 		offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
724 		if (offset == buffer->c[cpu].buflen)
725 			break;
726 		newval_spec = (intptr_t)node;
727 		targetptr_spec = (intptr_t *)&buffer->c[cpu].array[offset];
728 		newval_final = offset + 1;
729 		targetptr_final = &buffer->c[cpu].offset;
730 		if (opt_mb)
731 			ret = rseq_cmpeqv_trystorev_storev_release(
732 				targetptr_final, offset, targetptr_spec,
733 				newval_spec, newval_final, cpu);
734 		else
735 			ret = rseq_cmpeqv_trystorev_storev(targetptr_final,
736 				offset, targetptr_spec, newval_spec,
737 				newval_final, cpu);
738 		if (rseq_likely(!ret)) {
739 			result = true;
740 			break;
741 		}
742 		/* Retry if comparison fails or rseq aborts. */
743 	}
744 	if (_cpu)
745 		*_cpu = cpu;
746 	return result;
747 }
748 
749 struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer *buffer,
750 					       int *_cpu)
751 {
752 	struct percpu_buffer_node *head;
753 	int cpu;
754 
755 	for (;;) {
756 		intptr_t *targetptr, newval;
757 		intptr_t offset;
758 		int ret;
759 
760 		cpu = rseq_cpu_start();
761 		/* Load offset with single-copy atomicity. */
762 		offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
763 		if (offset == 0) {
764 			head = NULL;
765 			break;
766 		}
767 		head = RSEQ_READ_ONCE(buffer->c[cpu].array[offset - 1]);
768 		newval = offset - 1;
769 		targetptr = (intptr_t *)&buffer->c[cpu].offset;
770 		ret = rseq_cmpeqv_cmpeqv_storev(targetptr, offset,
771 			(intptr_t *)&buffer->c[cpu].array[offset - 1],
772 			(intptr_t)head, newval, cpu);
773 		if (rseq_likely(!ret))
774 			break;
775 		/* Retry if comparison fails or rseq aborts. */
776 	}
777 	if (_cpu)
778 		*_cpu = cpu;
779 	return head;
780 }
781 
782 /*
783  * __percpu_buffer_pop is not safe against concurrent accesses. Should
784  * only be used on buffers that are not concurrently modified.
785  */
786 struct percpu_buffer_node *__percpu_buffer_pop(struct percpu_buffer *buffer,
787 					       int cpu)
788 {
789 	struct percpu_buffer_node *head;
790 	intptr_t offset;
791 
792 	offset = buffer->c[cpu].offset;
793 	if (offset == 0)
794 		return NULL;
795 	head = buffer->c[cpu].array[offset - 1];
796 	buffer->c[cpu].offset = offset - 1;
797 	return head;
798 }
799 
800 void *test_percpu_buffer_thread(void *arg)
801 {
802 	long long i, reps;
803 	struct percpu_buffer *buffer = (struct percpu_buffer *)arg;
804 
805 	if (!opt_disable_rseq && rseq_register_current_thread())
806 		abort();
807 
808 	reps = opt_reps;
809 	for (i = 0; i < reps; i++) {
810 		struct percpu_buffer_node *node;
811 
812 		node = this_cpu_buffer_pop(buffer, NULL);
813 		if (opt_yield)
814 			sched_yield();  /* encourage shuffling */
815 		if (node) {
816 			if (!this_cpu_buffer_push(buffer, node, NULL)) {
817 				/* Should increase buffer size. */
818 				abort();
819 			}
820 		}
821 	}
822 
823 	printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
824 		       (int) rseq_gettid(), nr_abort, signals_delivered);
825 	if (!opt_disable_rseq && rseq_unregister_current_thread())
826 		abort();
827 
828 	return NULL;
829 }
830 
831 /* Simultaneous modification to a per-cpu buffer from many threads.  */
832 void test_percpu_buffer(void)
833 {
834 	const int num_threads = opt_threads;
835 	int i, j, ret;
836 	uint64_t sum = 0, expected_sum = 0;
837 	struct percpu_buffer buffer;
838 	pthread_t test_threads[num_threads];
839 	cpu_set_t allowed_cpus;
840 
841 	memset(&buffer, 0, sizeof(buffer));
842 
843 	/* Generate list entries for every usable cpu. */
844 	sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
845 	for (i = 0; i < CPU_SETSIZE; i++) {
846 		if (!CPU_ISSET(i, &allowed_cpus))
847 			continue;
848 		/* Worse-case is every item in same CPU. */
849 		buffer.c[i].array =
850 			malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
851 			       BUFFER_ITEM_PER_CPU);
852 		assert(buffer.c[i].array);
853 		buffer.c[i].buflen = CPU_SETSIZE * BUFFER_ITEM_PER_CPU;
854 		for (j = 1; j <= BUFFER_ITEM_PER_CPU; j++) {
855 			struct percpu_buffer_node *node;
856 
857 			expected_sum += j;
858 
859 			/*
860 			 * We could theoretically put the word-sized
861 			 * "data" directly in the buffer. However, we
862 			 * want to model objects that would not fit
863 			 * within a single word, so allocate an object
864 			 * for each node.
865 			 */
866 			node = malloc(sizeof(*node));
867 			assert(node);
868 			node->data = j;
869 			buffer.c[i].array[j - 1] = node;
870 			buffer.c[i].offset++;
871 		}
872 	}
873 
874 	for (i = 0; i < num_threads; i++) {
875 		ret = pthread_create(&test_threads[i], NULL,
876 				     test_percpu_buffer_thread, &buffer);
877 		if (ret) {
878 			errno = ret;
879 			perror("pthread_create");
880 			abort();
881 		}
882 	}
883 
884 	for (i = 0; i < num_threads; i++) {
885 		ret = pthread_join(test_threads[i], NULL);
886 		if (ret) {
887 			errno = ret;
888 			perror("pthread_join");
889 			abort();
890 		}
891 	}
892 
893 	for (i = 0; i < CPU_SETSIZE; i++) {
894 		struct percpu_buffer_node *node;
895 
896 		if (!CPU_ISSET(i, &allowed_cpus))
897 			continue;
898 
899 		while ((node = __percpu_buffer_pop(&buffer, i))) {
900 			sum += node->data;
901 			free(node);
902 		}
903 		free(buffer.c[i].array);
904 	}
905 
906 	/*
907 	 * All entries should now be accounted for (unless some external
908 	 * actor is interfering with our allowed affinity while this
909 	 * test is running).
910 	 */
911 	assert(sum == expected_sum);
912 }
913 
914 bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer *buffer,
915 				 struct percpu_memcpy_buffer_node item,
916 				 int *_cpu)
917 {
918 	bool result = false;
919 	int cpu;
920 
921 	for (;;) {
922 		intptr_t *targetptr_final, newval_final, offset;
923 		char *destptr, *srcptr;
924 		size_t copylen;
925 		int ret;
926 
927 		cpu = rseq_cpu_start();
928 		/* Load offset with single-copy atomicity. */
929 		offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
930 		if (offset == buffer->c[cpu].buflen)
931 			break;
932 		destptr = (char *)&buffer->c[cpu].array[offset];
933 		srcptr = (char *)&item;
934 		/* copylen must be <= 4kB. */
935 		copylen = sizeof(item);
936 		newval_final = offset + 1;
937 		targetptr_final = &buffer->c[cpu].offset;
938 		if (opt_mb)
939 			ret = rseq_cmpeqv_trymemcpy_storev_release(
940 				targetptr_final, offset,
941 				destptr, srcptr, copylen,
942 				newval_final, cpu);
943 		else
944 			ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final,
945 				offset, destptr, srcptr, copylen,
946 				newval_final, cpu);
947 		if (rseq_likely(!ret)) {
948 			result = true;
949 			break;
950 		}
951 		/* Retry if comparison fails or rseq aborts. */
952 	}
953 	if (_cpu)
954 		*_cpu = cpu;
955 	return result;
956 }
957 
958 bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
959 				struct percpu_memcpy_buffer_node *item,
960 				int *_cpu)
961 {
962 	bool result = false;
963 	int cpu;
964 
965 	for (;;) {
966 		intptr_t *targetptr_final, newval_final, offset;
967 		char *destptr, *srcptr;
968 		size_t copylen;
969 		int ret;
970 
971 		cpu = rseq_cpu_start();
972 		/* Load offset with single-copy atomicity. */
973 		offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
974 		if (offset == 0)
975 			break;
976 		destptr = (char *)item;
977 		srcptr = (char *)&buffer->c[cpu].array[offset - 1];
978 		/* copylen must be <= 4kB. */
979 		copylen = sizeof(*item);
980 		newval_final = offset - 1;
981 		targetptr_final = &buffer->c[cpu].offset;
982 		ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final,
983 			offset, destptr, srcptr, copylen,
984 			newval_final, cpu);
985 		if (rseq_likely(!ret)) {
986 			result = true;
987 			break;
988 		}
989 		/* Retry if comparison fails or rseq aborts. */
990 	}
991 	if (_cpu)
992 		*_cpu = cpu;
993 	return result;
994 }
995 
996 /*
997  * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should
998  * only be used on buffers that are not concurrently modified.
999  */
1000 bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
1001 				struct percpu_memcpy_buffer_node *item,
1002 				int cpu)
1003 {
1004 	intptr_t offset;
1005 
1006 	offset = buffer->c[cpu].offset;
1007 	if (offset == 0)
1008 		return false;
1009 	memcpy(item, &buffer->c[cpu].array[offset - 1], sizeof(*item));
1010 	buffer->c[cpu].offset = offset - 1;
1011 	return true;
1012 }
1013 
1014 void *test_percpu_memcpy_buffer_thread(void *arg)
1015 {
1016 	long long i, reps;
1017 	struct percpu_memcpy_buffer *buffer = (struct percpu_memcpy_buffer *)arg;
1018 
1019 	if (!opt_disable_rseq && rseq_register_current_thread())
1020 		abort();
1021 
1022 	reps = opt_reps;
1023 	for (i = 0; i < reps; i++) {
1024 		struct percpu_memcpy_buffer_node item;
1025 		bool result;
1026 
1027 		result = this_cpu_memcpy_buffer_pop(buffer, &item, NULL);
1028 		if (opt_yield)
1029 			sched_yield();  /* encourage shuffling */
1030 		if (result) {
1031 			if (!this_cpu_memcpy_buffer_push(buffer, item, NULL)) {
1032 				/* Should increase buffer size. */
1033 				abort();
1034 			}
1035 		}
1036 	}
1037 
1038 	printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
1039 		       (int) rseq_gettid(), nr_abort, signals_delivered);
1040 	if (!opt_disable_rseq && rseq_unregister_current_thread())
1041 		abort();
1042 
1043 	return NULL;
1044 }
1045 
1046 /* Simultaneous modification to a per-cpu buffer from many threads.  */
1047 void test_percpu_memcpy_buffer(void)
1048 {
1049 	const int num_threads = opt_threads;
1050 	int i, j, ret;
1051 	uint64_t sum = 0, expected_sum = 0;
1052 	struct percpu_memcpy_buffer buffer;
1053 	pthread_t test_threads[num_threads];
1054 	cpu_set_t allowed_cpus;
1055 
1056 	memset(&buffer, 0, sizeof(buffer));
1057 
1058 	/* Generate list entries for every usable cpu. */
1059 	sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
1060 	for (i = 0; i < CPU_SETSIZE; i++) {
1061 		if (!CPU_ISSET(i, &allowed_cpus))
1062 			continue;
1063 		/* Worse-case is every item in same CPU. */
1064 		buffer.c[i].array =
1065 			malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
1066 			       MEMCPY_BUFFER_ITEM_PER_CPU);
1067 		assert(buffer.c[i].array);
1068 		buffer.c[i].buflen = CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU;
1069 		for (j = 1; j <= MEMCPY_BUFFER_ITEM_PER_CPU; j++) {
1070 			expected_sum += 2 * j + 1;
1071 
1072 			/*
1073 			 * We could theoretically put the word-sized
1074 			 * "data" directly in the buffer. However, we
1075 			 * want to model objects that would not fit
1076 			 * within a single word, so allocate an object
1077 			 * for each node.
1078 			 */
1079 			buffer.c[i].array[j - 1].data1 = j;
1080 			buffer.c[i].array[j - 1].data2 = j + 1;
1081 			buffer.c[i].offset++;
1082 		}
1083 	}
1084 
1085 	for (i = 0; i < num_threads; i++) {
1086 		ret = pthread_create(&test_threads[i], NULL,
1087 				     test_percpu_memcpy_buffer_thread,
1088 				     &buffer);
1089 		if (ret) {
1090 			errno = ret;
1091 			perror("pthread_create");
1092 			abort();
1093 		}
1094 	}
1095 
1096 	for (i = 0; i < num_threads; i++) {
1097 		ret = pthread_join(test_threads[i], NULL);
1098 		if (ret) {
1099 			errno = ret;
1100 			perror("pthread_join");
1101 			abort();
1102 		}
1103 	}
1104 
1105 	for (i = 0; i < CPU_SETSIZE; i++) {
1106 		struct percpu_memcpy_buffer_node item;
1107 
1108 		if (!CPU_ISSET(i, &allowed_cpus))
1109 			continue;
1110 
1111 		while (__percpu_memcpy_buffer_pop(&buffer, &item, i)) {
1112 			sum += item.data1;
1113 			sum += item.data2;
1114 		}
1115 		free(buffer.c[i].array);
1116 	}
1117 
1118 	/*
1119 	 * All entries should now be accounted for (unless some external
1120 	 * actor is interfering with our allowed affinity while this
1121 	 * test is running).
1122 	 */
1123 	assert(sum == expected_sum);
1124 }
1125 
1126 static void test_signal_interrupt_handler(int signo)
1127 {
1128 	signals_delivered++;
1129 }
1130 
1131 static int set_signal_handler(void)
1132 {
1133 	int ret = 0;
1134 	struct sigaction sa;
1135 	sigset_t sigset;
1136 
1137 	ret = sigemptyset(&sigset);
1138 	if (ret < 0) {
1139 		perror("sigemptyset");
1140 		return ret;
1141 	}
1142 
1143 	sa.sa_handler = test_signal_interrupt_handler;
1144 	sa.sa_mask = sigset;
1145 	sa.sa_flags = 0;
1146 	ret = sigaction(SIGUSR1, &sa, NULL);
1147 	if (ret < 0) {
1148 		perror("sigaction");
1149 		return ret;
1150 	}
1151 
1152 	printf_verbose("Signal handler set for SIGUSR1\n");
1153 
1154 	return ret;
1155 }
1156 
1157 /* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */
1158 #ifdef RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV
1159 struct test_membarrier_thread_args {
1160 	int stop;
1161 	intptr_t percpu_list_ptr;
1162 };
1163 
1164 /* Worker threads modify data in their "active" percpu lists. */
1165 void *test_membarrier_worker_thread(void *arg)
1166 {
1167 	struct test_membarrier_thread_args *args =
1168 		(struct test_membarrier_thread_args *)arg;
1169 	const int iters = opt_reps;
1170 	int i;
1171 
1172 	if (rseq_register_current_thread()) {
1173 		fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1174 			errno, strerror(errno));
1175 		abort();
1176 	}
1177 
1178 	/* Wait for initialization. */
1179 	while (!atomic_load(&args->percpu_list_ptr)) {}
1180 
1181 	for (i = 0; i < iters; ++i) {
1182 		int ret;
1183 
1184 		do {
1185 			int cpu = rseq_cpu_start();
1186 
1187 			ret = rseq_offset_deref_addv(&args->percpu_list_ptr,
1188 				sizeof(struct percpu_list_entry) * cpu, 1, cpu);
1189 		} while (rseq_unlikely(ret));
1190 	}
1191 
1192 	if (rseq_unregister_current_thread()) {
1193 		fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1194 			errno, strerror(errno));
1195 		abort();
1196 	}
1197 	return NULL;
1198 }
1199 
1200 void test_membarrier_init_percpu_list(struct percpu_list *list)
1201 {
1202 	int i;
1203 
1204 	memset(list, 0, sizeof(*list));
1205 	for (i = 0; i < CPU_SETSIZE; i++) {
1206 		struct percpu_list_node *node;
1207 
1208 		node = malloc(sizeof(*node));
1209 		assert(node);
1210 		node->data = 0;
1211 		node->next = NULL;
1212 		list->c[i].head = node;
1213 	}
1214 }
1215 
1216 void test_membarrier_free_percpu_list(struct percpu_list *list)
1217 {
1218 	int i;
1219 
1220 	for (i = 0; i < CPU_SETSIZE; i++)
1221 		free(list->c[i].head);
1222 }
1223 
1224 static int sys_membarrier(int cmd, int flags, int cpu_id)
1225 {
1226 	return syscall(__NR_membarrier, cmd, flags, cpu_id);
1227 }
1228 
1229 /*
1230  * The manager thread swaps per-cpu lists that worker threads see,
1231  * and validates that there are no unexpected modifications.
1232  */
1233 void *test_membarrier_manager_thread(void *arg)
1234 {
1235 	struct test_membarrier_thread_args *args =
1236 		(struct test_membarrier_thread_args *)arg;
1237 	struct percpu_list list_a, list_b;
1238 	intptr_t expect_a = 0, expect_b = 0;
1239 	int cpu_a = 0, cpu_b = 0;
1240 
1241 	if (rseq_register_current_thread()) {
1242 		fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1243 			errno, strerror(errno));
1244 		abort();
1245 	}
1246 
1247 	/* Init lists. */
1248 	test_membarrier_init_percpu_list(&list_a);
1249 	test_membarrier_init_percpu_list(&list_b);
1250 
1251 	atomic_store(&args->percpu_list_ptr, (intptr_t)&list_a);
1252 
1253 	while (!atomic_load(&args->stop)) {
1254 		/* list_a is "active". */
1255 		cpu_a = rand() % CPU_SETSIZE;
1256 		/*
1257 		 * As list_b is "inactive", we should never see changes
1258 		 * to list_b.
1259 		 */
1260 		if (expect_b != atomic_load(&list_b.c[cpu_b].head->data)) {
1261 			fprintf(stderr, "Membarrier test failed\n");
1262 			abort();
1263 		}
1264 
1265 		/* Make list_b "active". */
1266 		atomic_store(&args->percpu_list_ptr, (intptr_t)&list_b);
1267 		if (sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
1268 					MEMBARRIER_CMD_FLAG_CPU, cpu_a) &&
1269 				errno != ENXIO /* missing CPU */) {
1270 			perror("sys_membarrier");
1271 			abort();
1272 		}
1273 		/*
1274 		 * Cpu A should now only modify list_b, so the values
1275 		 * in list_a should be stable.
1276 		 */
1277 		expect_a = atomic_load(&list_a.c[cpu_a].head->data);
1278 
1279 		cpu_b = rand() % CPU_SETSIZE;
1280 		/*
1281 		 * As list_a is "inactive", we should never see changes
1282 		 * to list_a.
1283 		 */
1284 		if (expect_a != atomic_load(&list_a.c[cpu_a].head->data)) {
1285 			fprintf(stderr, "Membarrier test failed\n");
1286 			abort();
1287 		}
1288 
1289 		/* Make list_a "active". */
1290 		atomic_store(&args->percpu_list_ptr, (intptr_t)&list_a);
1291 		if (sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
1292 					MEMBARRIER_CMD_FLAG_CPU, cpu_b) &&
1293 				errno != ENXIO /* missing CPU*/) {
1294 			perror("sys_membarrier");
1295 			abort();
1296 		}
1297 		/* Remember a value from list_b. */
1298 		expect_b = atomic_load(&list_b.c[cpu_b].head->data);
1299 	}
1300 
1301 	test_membarrier_free_percpu_list(&list_a);
1302 	test_membarrier_free_percpu_list(&list_b);
1303 
1304 	if (rseq_unregister_current_thread()) {
1305 		fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1306 			errno, strerror(errno));
1307 		abort();
1308 	}
1309 	return NULL;
1310 }
1311 
1312 void test_membarrier(void)
1313 {
1314 	const int num_threads = opt_threads;
1315 	struct test_membarrier_thread_args thread_args;
1316 	pthread_t worker_threads[num_threads];
1317 	pthread_t manager_thread;
1318 	int i, ret;
1319 
1320 	if (sys_membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, 0, 0)) {
1321 		perror("sys_membarrier");
1322 		abort();
1323 	}
1324 
1325 	thread_args.stop = 0;
1326 	thread_args.percpu_list_ptr = 0;
1327 	ret = pthread_create(&manager_thread, NULL,
1328 			test_membarrier_manager_thread, &thread_args);
1329 	if (ret) {
1330 		errno = ret;
1331 		perror("pthread_create");
1332 		abort();
1333 	}
1334 
1335 	for (i = 0; i < num_threads; i++) {
1336 		ret = pthread_create(&worker_threads[i], NULL,
1337 				test_membarrier_worker_thread, &thread_args);
1338 		if (ret) {
1339 			errno = ret;
1340 			perror("pthread_create");
1341 			abort();
1342 		}
1343 	}
1344 
1345 
1346 	for (i = 0; i < num_threads; i++) {
1347 		ret = pthread_join(worker_threads[i], NULL);
1348 		if (ret) {
1349 			errno = ret;
1350 			perror("pthread_join");
1351 			abort();
1352 		}
1353 	}
1354 
1355 	atomic_store(&thread_args.stop, 1);
1356 	ret = pthread_join(manager_thread, NULL);
1357 	if (ret) {
1358 		errno = ret;
1359 		perror("pthread_join");
1360 		abort();
1361 	}
1362 }
1363 #else /* RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV */
1364 void test_membarrier(void)
1365 {
1366 	fprintf(stderr, "rseq_offset_deref_addv is not implemented on this architecture. "
1367 			"Skipping membarrier test.\n");
1368 }
1369 #endif
1370 
1371 static void show_usage(int argc, char **argv)
1372 {
1373 	printf("Usage : %s <OPTIONS>\n",
1374 		argv[0]);
1375 	printf("OPTIONS:\n");
1376 	printf("	[-1 loops] Number of loops for delay injection 1\n");
1377 	printf("	[-2 loops] Number of loops for delay injection 2\n");
1378 	printf("	[-3 loops] Number of loops for delay injection 3\n");
1379 	printf("	[-4 loops] Number of loops for delay injection 4\n");
1380 	printf("	[-5 loops] Number of loops for delay injection 5\n");
1381 	printf("	[-6 loops] Number of loops for delay injection 6\n");
1382 	printf("	[-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n");
1383 	printf("	[-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n");
1384 	printf("	[-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n");
1385 	printf("	[-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n");
1386 	printf("	[-y] Yield\n");
1387 	printf("	[-k] Kill thread with signal\n");
1388 	printf("	[-s S] S: =0: disabled (default), >0: sleep time (ms)\n");
1389 	printf("	[-t N] Number of threads (default 200)\n");
1390 	printf("	[-r N] Number of repetitions per thread (default 5000)\n");
1391 	printf("	[-d] Disable rseq system call (no initialization)\n");
1392 	printf("	[-D M] Disable rseq for each M threads\n");
1393 	printf("	[-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement, membarrie(r)\n");
1394 	printf("	[-M] Push into buffer and memcpy buffer with memory barriers.\n");
1395 	printf("	[-v] Verbose output.\n");
1396 	printf("	[-h] Show this help.\n");
1397 	printf("\n");
1398 }
1399 
1400 int main(int argc, char **argv)
1401 {
1402 	int i;
1403 
1404 	for (i = 1; i < argc; i++) {
1405 		if (argv[i][0] != '-')
1406 			continue;
1407 		switch (argv[i][1]) {
1408 		case '1':
1409 		case '2':
1410 		case '3':
1411 		case '4':
1412 		case '5':
1413 		case '6':
1414 		case '7':
1415 		case '8':
1416 		case '9':
1417 			if (argc < i + 2) {
1418 				show_usage(argc, argv);
1419 				goto error;
1420 			}
1421 			loop_cnt[argv[i][1] - '0'] = atol(argv[i + 1]);
1422 			i++;
1423 			break;
1424 		case 'm':
1425 			if (argc < i + 2) {
1426 				show_usage(argc, argv);
1427 				goto error;
1428 			}
1429 			opt_modulo = atol(argv[i + 1]);
1430 			if (opt_modulo < 0) {
1431 				show_usage(argc, argv);
1432 				goto error;
1433 			}
1434 			i++;
1435 			break;
1436 		case 's':
1437 			if (argc < i + 2) {
1438 				show_usage(argc, argv);
1439 				goto error;
1440 			}
1441 			opt_sleep = atol(argv[i + 1]);
1442 			if (opt_sleep < 0) {
1443 				show_usage(argc, argv);
1444 				goto error;
1445 			}
1446 			i++;
1447 			break;
1448 		case 'y':
1449 			opt_yield = 1;
1450 			break;
1451 		case 'k':
1452 			opt_signal = 1;
1453 			break;
1454 		case 'd':
1455 			opt_disable_rseq = 1;
1456 			break;
1457 		case 'D':
1458 			if (argc < i + 2) {
1459 				show_usage(argc, argv);
1460 				goto error;
1461 			}
1462 			opt_disable_mod = atol(argv[i + 1]);
1463 			if (opt_disable_mod < 0) {
1464 				show_usage(argc, argv);
1465 				goto error;
1466 			}
1467 			i++;
1468 			break;
1469 		case 't':
1470 			if (argc < i + 2) {
1471 				show_usage(argc, argv);
1472 				goto error;
1473 			}
1474 			opt_threads = atol(argv[i + 1]);
1475 			if (opt_threads < 0) {
1476 				show_usage(argc, argv);
1477 				goto error;
1478 			}
1479 			i++;
1480 			break;
1481 		case 'r':
1482 			if (argc < i + 2) {
1483 				show_usage(argc, argv);
1484 				goto error;
1485 			}
1486 			opt_reps = atoll(argv[i + 1]);
1487 			if (opt_reps < 0) {
1488 				show_usage(argc, argv);
1489 				goto error;
1490 			}
1491 			i++;
1492 			break;
1493 		case 'h':
1494 			show_usage(argc, argv);
1495 			goto end;
1496 		case 'T':
1497 			if (argc < i + 2) {
1498 				show_usage(argc, argv);
1499 				goto error;
1500 			}
1501 			opt_test = *argv[i + 1];
1502 			switch (opt_test) {
1503 			case 's':
1504 			case 'l':
1505 			case 'i':
1506 			case 'b':
1507 			case 'm':
1508 			case 'r':
1509 				break;
1510 			default:
1511 				show_usage(argc, argv);
1512 				goto error;
1513 			}
1514 			i++;
1515 			break;
1516 		case 'v':
1517 			verbose = 1;
1518 			break;
1519 		case 'M':
1520 			opt_mb = 1;
1521 			break;
1522 		default:
1523 			show_usage(argc, argv);
1524 			goto error;
1525 		}
1526 	}
1527 
1528 	loop_cnt_1 = loop_cnt[1];
1529 	loop_cnt_2 = loop_cnt[2];
1530 	loop_cnt_3 = loop_cnt[3];
1531 	loop_cnt_4 = loop_cnt[4];
1532 	loop_cnt_5 = loop_cnt[5];
1533 	loop_cnt_6 = loop_cnt[6];
1534 
1535 	if (set_signal_handler())
1536 		goto error;
1537 
1538 	if (!opt_disable_rseq && rseq_register_current_thread())
1539 		goto error;
1540 	switch (opt_test) {
1541 	case 's':
1542 		printf_verbose("spinlock\n");
1543 		test_percpu_spinlock();
1544 		break;
1545 	case 'l':
1546 		printf_verbose("linked list\n");
1547 		test_percpu_list();
1548 		break;
1549 	case 'b':
1550 		printf_verbose("buffer\n");
1551 		test_percpu_buffer();
1552 		break;
1553 	case 'm':
1554 		printf_verbose("memcpy buffer\n");
1555 		test_percpu_memcpy_buffer();
1556 		break;
1557 	case 'i':
1558 		printf_verbose("counter increment\n");
1559 		test_percpu_inc();
1560 		break;
1561 	case 'r':
1562 		printf_verbose("membarrier\n");
1563 		test_membarrier();
1564 		break;
1565 	}
1566 	if (!opt_disable_rseq && rseq_unregister_current_thread())
1567 		abort();
1568 end:
1569 	return 0;
1570 
1571 error:
1572 	return -1;
1573 }
1574