1 // SPDX-License-Identifier: LGPL-2.1
2 #define _GNU_SOURCE
3 #include <assert.h>
4 #include <pthread.h>
5 #include <sched.h>
6 #include <stdint.h>
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <stddef.h>
11 
12 #include "../kselftest.h"
13 #include "rseq.h"
14 
15 struct percpu_lock_entry {
16 	intptr_t v;
17 } __attribute__((aligned(128)));
18 
19 struct percpu_lock {
20 	struct percpu_lock_entry c[CPU_SETSIZE];
21 };
22 
23 struct test_data_entry {
24 	intptr_t count;
25 } __attribute__((aligned(128)));
26 
27 struct spinlock_test_data {
28 	struct percpu_lock lock;
29 	struct test_data_entry c[CPU_SETSIZE];
30 	int reps;
31 };
32 
33 struct percpu_list_node {
34 	intptr_t data;
35 	struct percpu_list_node *next;
36 };
37 
38 struct percpu_list_entry {
39 	struct percpu_list_node *head;
40 } __attribute__((aligned(128)));
41 
42 struct percpu_list {
43 	struct percpu_list_entry c[CPU_SETSIZE];
44 };
45 
46 /* A simple percpu spinlock.  Returns the cpu lock was acquired on. */
47 int rseq_this_cpu_lock(struct percpu_lock *lock)
48 {
49 	int cpu;
50 
51 	for (;;) {
52 		int ret;
53 
54 		cpu = rseq_cpu_start();
55 		ret = rseq_cmpeqv_storev(&lock->c[cpu].v,
56 					 0, 1, cpu);
57 		if (rseq_likely(!ret))
58 			break;
59 		/* Retry if comparison fails or rseq aborts. */
60 	}
61 	/*
62 	 * Acquire semantic when taking lock after control dependency.
63 	 * Matches rseq_smp_store_release().
64 	 */
65 	rseq_smp_acquire__after_ctrl_dep();
66 	return cpu;
67 }
68 
69 void rseq_percpu_unlock(struct percpu_lock *lock, int cpu)
70 {
71 	assert(lock->c[cpu].v == 1);
72 	/*
73 	 * Release lock, with release semantic. Matches
74 	 * rseq_smp_acquire__after_ctrl_dep().
75 	 */
76 	rseq_smp_store_release(&lock->c[cpu].v, 0);
77 }
78 
79 void *test_percpu_spinlock_thread(void *arg)
80 {
81 	struct spinlock_test_data *data = arg;
82 	int i, cpu;
83 
84 	if (rseq_register_current_thread()) {
85 		fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
86 			errno, strerror(errno));
87 		abort();
88 	}
89 	for (i = 0; i < data->reps; i++) {
90 		cpu = rseq_this_cpu_lock(&data->lock);
91 		data->c[cpu].count++;
92 		rseq_percpu_unlock(&data->lock, cpu);
93 	}
94 	if (rseq_unregister_current_thread()) {
95 		fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
96 			errno, strerror(errno));
97 		abort();
98 	}
99 
100 	return NULL;
101 }
102 
103 /*
104  * A simple test which implements a sharded counter using a per-cpu
105  * lock.  Obviously real applications might prefer to simply use a
106  * per-cpu increment; however, this is reasonable for a test and the
107  * lock can be extended to synchronize more complicated operations.
108  */
109 void test_percpu_spinlock(void)
110 {
111 	const int num_threads = 200;
112 	int i;
113 	uint64_t sum;
114 	pthread_t test_threads[num_threads];
115 	struct spinlock_test_data data;
116 
117 	memset(&data, 0, sizeof(data));
118 	data.reps = 5000;
119 
120 	for (i = 0; i < num_threads; i++)
121 		pthread_create(&test_threads[i], NULL,
122 			       test_percpu_spinlock_thread, &data);
123 
124 	for (i = 0; i < num_threads; i++)
125 		pthread_join(test_threads[i], NULL);
126 
127 	sum = 0;
128 	for (i = 0; i < CPU_SETSIZE; i++)
129 		sum += data.c[i].count;
130 
131 	assert(sum == (uint64_t)data.reps * num_threads);
132 }
133 
134 void this_cpu_list_push(struct percpu_list *list,
135 			struct percpu_list_node *node,
136 			int *_cpu)
137 {
138 	int cpu;
139 
140 	for (;;) {
141 		intptr_t *targetptr, newval, expect;
142 		int ret;
143 
144 		cpu = rseq_cpu_start();
145 		/* Load list->c[cpu].head with single-copy atomicity. */
146 		expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head);
147 		newval = (intptr_t)node;
148 		targetptr = (intptr_t *)&list->c[cpu].head;
149 		node->next = (struct percpu_list_node *)expect;
150 		ret = rseq_cmpeqv_storev(targetptr, expect, newval, cpu);
151 		if (rseq_likely(!ret))
152 			break;
153 		/* Retry if comparison fails or rseq aborts. */
154 	}
155 	if (_cpu)
156 		*_cpu = cpu;
157 }
158 
159 /*
160  * Unlike a traditional lock-less linked list; the availability of a
161  * rseq primitive allows us to implement pop without concerns over
162  * ABA-type races.
163  */
164 struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list,
165 					   int *_cpu)
166 {
167 	for (;;) {
168 		struct percpu_list_node *head;
169 		intptr_t *targetptr, expectnot, *load;
170 		long offset;
171 		int ret, cpu;
172 
173 		cpu = rseq_cpu_start();
174 		targetptr = (intptr_t *)&list->c[cpu].head;
175 		expectnot = (intptr_t)NULL;
176 		offset = offsetof(struct percpu_list_node, next);
177 		load = (intptr_t *)&head;
178 		ret = rseq_cmpnev_storeoffp_load(targetptr, expectnot,
179 						 offset, load, cpu);
180 		if (rseq_likely(!ret)) {
181 			if (_cpu)
182 				*_cpu = cpu;
183 			return head;
184 		}
185 		if (ret > 0)
186 			return NULL;
187 		/* Retry if rseq aborts. */
188 	}
189 }
190 
191 /*
192  * __percpu_list_pop is not safe against concurrent accesses. Should
193  * only be used on lists that are not concurrently modified.
194  */
195 struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu)
196 {
197 	struct percpu_list_node *node;
198 
199 	node = list->c[cpu].head;
200 	if (!node)
201 		return NULL;
202 	list->c[cpu].head = node->next;
203 	return node;
204 }
205 
206 void *test_percpu_list_thread(void *arg)
207 {
208 	int i;
209 	struct percpu_list *list = (struct percpu_list *)arg;
210 
211 	if (rseq_register_current_thread()) {
212 		fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
213 			errno, strerror(errno));
214 		abort();
215 	}
216 
217 	for (i = 0; i < 100000; i++) {
218 		struct percpu_list_node *node;
219 
220 		node = this_cpu_list_pop(list, NULL);
221 		sched_yield();  /* encourage shuffling */
222 		if (node)
223 			this_cpu_list_push(list, node, NULL);
224 	}
225 
226 	if (rseq_unregister_current_thread()) {
227 		fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
228 			errno, strerror(errno));
229 		abort();
230 	}
231 
232 	return NULL;
233 }
234 
235 /* Simultaneous modification to a per-cpu linked list from many threads.  */
236 void test_percpu_list(void)
237 {
238 	int i, j;
239 	uint64_t sum = 0, expected_sum = 0;
240 	struct percpu_list list;
241 	pthread_t test_threads[200];
242 	cpu_set_t allowed_cpus;
243 
244 	memset(&list, 0, sizeof(list));
245 
246 	/* Generate list entries for every usable cpu. */
247 	sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
248 	for (i = 0; i < CPU_SETSIZE; i++) {
249 		if (!CPU_ISSET(i, &allowed_cpus))
250 			continue;
251 		for (j = 1; j <= 100; j++) {
252 			struct percpu_list_node *node;
253 
254 			expected_sum += j;
255 
256 			node = malloc(sizeof(*node));
257 			assert(node);
258 			node->data = j;
259 			node->next = list.c[i].head;
260 			list.c[i].head = node;
261 		}
262 	}
263 
264 	for (i = 0; i < 200; i++)
265 		pthread_create(&test_threads[i], NULL,
266 		       test_percpu_list_thread, &list);
267 
268 	for (i = 0; i < 200; i++)
269 		pthread_join(test_threads[i], NULL);
270 
271 	for (i = 0; i < CPU_SETSIZE; i++) {
272 		struct percpu_list_node *node;
273 
274 		if (!CPU_ISSET(i, &allowed_cpus))
275 			continue;
276 
277 		while ((node = __percpu_list_pop(&list, i))) {
278 			sum += node->data;
279 			free(node);
280 		}
281 	}
282 
283 	/*
284 	 * All entries should now be accounted for (unless some external
285 	 * actor is interfering with our allowed affinity while this
286 	 * test is running).
287 	 */
288 	assert(sum == expected_sum);
289 }
290 
291 int main(int argc, char **argv)
292 {
293 	if (rseq_register_current_thread()) {
294 		fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
295 			errno, strerror(errno));
296 		goto error;
297 	}
298 	printf("spinlock\n");
299 	test_percpu_spinlock();
300 	printf("percpu_list\n");
301 	test_percpu_list();
302 	if (rseq_unregister_current_thread()) {
303 		fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
304 			errno, strerror(errno));
305 		goto error;
306 	}
307 	return 0;
308 
309 error:
310 	return -1;
311 }
312