1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2023 Isovalent */
3 
4 #include <errno.h>
5 #include <unistd.h>
6 #include <pthread.h>
7 
8 #include <bpf/bpf.h>
9 #include <bpf/libbpf.h>
10 
11 #include <bpf_util.h>
12 #include <test_maps.h>
13 
14 #include "map_percpu_stats.skel.h"
15 
16 #define MAX_ENTRIES			16384
17 #define MAX_ENTRIES_HASH_OF_MAPS	64
18 #define N_THREADS			8
19 #define MAX_MAP_KEY_SIZE		4
20 
map_info(int map_fd,struct bpf_map_info * info)21 static void map_info(int map_fd, struct bpf_map_info *info)
22 {
23 	__u32 len = sizeof(*info);
24 	int ret;
25 
26 	memset(info, 0, sizeof(*info));
27 
28 	ret = bpf_obj_get_info_by_fd(map_fd, info, &len);
29 	CHECK(ret < 0, "bpf_obj_get_info_by_fd", "error: %s\n", strerror(errno));
30 }
31 
map_type_to_s(__u32 type)32 static const char *map_type_to_s(__u32 type)
33 {
34 	switch (type) {
35 	case BPF_MAP_TYPE_HASH:
36 		return "HASH";
37 	case BPF_MAP_TYPE_PERCPU_HASH:
38 		return "PERCPU_HASH";
39 	case BPF_MAP_TYPE_LRU_HASH:
40 		return "LRU_HASH";
41 	case BPF_MAP_TYPE_LRU_PERCPU_HASH:
42 		return "LRU_PERCPU_HASH";
43 	case BPF_MAP_TYPE_HASH_OF_MAPS:
44 		return "BPF_MAP_TYPE_HASH_OF_MAPS";
45 	default:
46 		return "<define-me>";
47 	}
48 }
49 
map_count_elements(__u32 type,int map_fd)50 static __u32 map_count_elements(__u32 type, int map_fd)
51 {
52 	__u32 key = -1;
53 	int n = 0;
54 
55 	while (!bpf_map_get_next_key(map_fd, &key, &key))
56 		n++;
57 	return n;
58 }
59 
60 #define BATCH	true
61 
delete_and_lookup_batch(int map_fd,void * keys,__u32 count)62 static void delete_and_lookup_batch(int map_fd, void *keys, __u32 count)
63 {
64 	static __u8 values[(8 << 10) * MAX_ENTRIES];
65 	void *in_batch = NULL, *out_batch;
66 	__u32 save_count = count;
67 	int ret;
68 
69 	ret = bpf_map_lookup_and_delete_batch(map_fd,
70 					      &in_batch, &out_batch,
71 					      keys, values, &count,
72 					      NULL);
73 
74 	/*
75 	 * Despite what uapi header says, lookup_and_delete_batch will return
76 	 * -ENOENT in case we successfully have deleted all elements, so check
77 	 * this separately
78 	 */
79 	CHECK(ret < 0 && (errno != ENOENT || !count), "bpf_map_lookup_and_delete_batch",
80 		       "error: %s\n", strerror(errno));
81 
82 	CHECK(count != save_count,
83 			"bpf_map_lookup_and_delete_batch",
84 			"deleted not all elements: removed=%u expected=%u\n",
85 			count, save_count);
86 }
87 
delete_all_elements(__u32 type,int map_fd,bool batch)88 static void delete_all_elements(__u32 type, int map_fd, bool batch)
89 {
90 	static __u8 val[8 << 10]; /* enough for 1024 CPUs */
91 	__u32 key = -1;
92 	void *keys;
93 	__u32 i, n;
94 	int ret;
95 
96 	keys = calloc(MAX_MAP_KEY_SIZE, MAX_ENTRIES);
97 	CHECK(!keys, "calloc", "error: %s\n", strerror(errno));
98 
99 	for (n = 0; !bpf_map_get_next_key(map_fd, &key, &key); n++)
100 		memcpy(keys + n*MAX_MAP_KEY_SIZE, &key, MAX_MAP_KEY_SIZE);
101 
102 	if (batch) {
103 		/* Can't mix delete_batch and delete_and_lookup_batch because
104 		 * they have different semantics in relation to the keys
105 		 * argument. However, delete_batch utilize map_delete_elem,
106 		 * so we actually test it in non-batch scenario */
107 		delete_and_lookup_batch(map_fd, keys, n);
108 	} else {
109 		/* Intentionally mix delete and lookup_and_delete so we can test both */
110 		for (i = 0; i < n; i++) {
111 			void *keyp = keys + i*MAX_MAP_KEY_SIZE;
112 
113 			if (i % 2 || type == BPF_MAP_TYPE_HASH_OF_MAPS) {
114 				ret = bpf_map_delete_elem(map_fd, keyp);
115 				CHECK(ret < 0, "bpf_map_delete_elem",
116 					       "error: key %u: %s\n", i, strerror(errno));
117 			} else {
118 				ret = bpf_map_lookup_and_delete_elem(map_fd, keyp, val);
119 				CHECK(ret < 0, "bpf_map_lookup_and_delete_elem",
120 					       "error: key %u: %s\n", i, strerror(errno));
121 			}
122 		}
123 	}
124 
125 	free(keys);
126 }
127 
is_lru(__u32 map_type)128 static bool is_lru(__u32 map_type)
129 {
130 	return map_type == BPF_MAP_TYPE_LRU_HASH ||
131 	       map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
132 }
133 
134 struct upsert_opts {
135 	__u32 map_type;
136 	int map_fd;
137 	__u32 n;
138 };
139 
create_small_hash(void)140 static int create_small_hash(void)
141 {
142 	int map_fd;
143 
144 	map_fd = bpf_map_create(BPF_MAP_TYPE_HASH, "small", 4, 4, 4, NULL);
145 	CHECK(map_fd < 0, "bpf_map_create()", "error:%s (name=%s)\n",
146 			strerror(errno), "small");
147 
148 	return map_fd;
149 }
150 
patch_map_thread(void * arg)151 static void *patch_map_thread(void *arg)
152 {
153 	struct upsert_opts *opts = arg;
154 	int val;
155 	int ret;
156 	int i;
157 
158 	for (i = 0; i < opts->n; i++) {
159 		if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS)
160 			val = create_small_hash();
161 		else
162 			val = rand();
163 		ret = bpf_map_update_elem(opts->map_fd, &i, &val, 0);
164 		CHECK(ret < 0, "bpf_map_update_elem", "key=%d error: %s\n", i, strerror(errno));
165 
166 		if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS)
167 			close(val);
168 	}
169 	return NULL;
170 }
171 
upsert_elements(struct upsert_opts * opts)172 static void upsert_elements(struct upsert_opts *opts)
173 {
174 	pthread_t threads[N_THREADS];
175 	int ret;
176 	int i;
177 
178 	for (i = 0; i < ARRAY_SIZE(threads); i++) {
179 		ret = pthread_create(&i[threads], NULL, patch_map_thread, opts);
180 		CHECK(ret != 0, "pthread_create", "error: %s\n", strerror(ret));
181 	}
182 
183 	for (i = 0; i < ARRAY_SIZE(threads); i++) {
184 		ret = pthread_join(i[threads], NULL);
185 		CHECK(ret != 0, "pthread_join", "error: %s\n", strerror(ret));
186 	}
187 }
188 
read_cur_elements(int iter_fd)189 static __u32 read_cur_elements(int iter_fd)
190 {
191 	char buf[64];
192 	ssize_t n;
193 	__u32 ret;
194 
195 	n = read(iter_fd, buf, sizeof(buf)-1);
196 	CHECK(n <= 0, "read", "error: %s\n", strerror(errno));
197 	buf[n] = '\0';
198 
199 	errno = 0;
200 	ret = (__u32)strtol(buf, NULL, 10);
201 	CHECK(errno != 0, "strtol", "error: %s\n", strerror(errno));
202 
203 	return ret;
204 }
205 
get_cur_elements(int map_id)206 static __u32 get_cur_elements(int map_id)
207 {
208 	struct map_percpu_stats *skel;
209 	struct bpf_link *link;
210 	__u32 n_elements;
211 	int iter_fd;
212 	int ret;
213 
214 	skel = map_percpu_stats__open();
215 	CHECK(skel == NULL, "map_percpu_stats__open", "error: %s", strerror(errno));
216 
217 	skel->bss->target_id = map_id;
218 
219 	ret = map_percpu_stats__load(skel);
220 	CHECK(ret != 0, "map_percpu_stats__load", "error: %s", strerror(errno));
221 
222 	link = bpf_program__attach_iter(skel->progs.dump_bpf_map, NULL);
223 	CHECK(!link, "bpf_program__attach_iter", "error: %s\n", strerror(errno));
224 
225 	iter_fd = bpf_iter_create(bpf_link__fd(link));
226 	CHECK(iter_fd < 0, "bpf_iter_create", "error: %s\n", strerror(errno));
227 
228 	n_elements = read_cur_elements(iter_fd);
229 
230 	close(iter_fd);
231 	bpf_link__destroy(link);
232 	map_percpu_stats__destroy(skel);
233 
234 	return n_elements;
235 }
236 
check_expected_number_elements(__u32 n_inserted,int map_fd,struct bpf_map_info * info)237 static void check_expected_number_elements(__u32 n_inserted, int map_fd,
238 					   struct bpf_map_info *info)
239 {
240 	__u32 n_real;
241 	__u32 n_iter;
242 
243 	/* Count the current number of elements in the map by iterating through
244 	 * all the map keys via bpf_get_next_key
245 	 */
246 	n_real = map_count_elements(info->type, map_fd);
247 
248 	/* The "real" number of elements should be the same as the inserted
249 	 * number of elements in all cases except LRU maps, where some elements
250 	 * may have been evicted
251 	 */
252 	if (n_inserted == 0 || !is_lru(info->type))
253 		CHECK(n_inserted != n_real, "map_count_elements",
254 		      "n_real(%u) != n_inserted(%u)\n", n_real, n_inserted);
255 
256 	/* Count the current number of elements in the map using an iterator */
257 	n_iter = get_cur_elements(info->id);
258 
259 	/* Both counts should be the same, as all updates are over */
260 	CHECK(n_iter != n_real, "get_cur_elements",
261 	      "n_iter=%u, expected %u (map_type=%s,map_flags=%08x)\n",
262 	      n_iter, n_real, map_type_to_s(info->type), info->map_flags);
263 }
264 
__test(int map_fd)265 static void __test(int map_fd)
266 {
267 	struct upsert_opts opts = {
268 		.map_fd = map_fd,
269 	};
270 	struct bpf_map_info info;
271 
272 	map_info(map_fd, &info);
273 	opts.map_type = info.type;
274 	opts.n = info.max_entries;
275 
276 	/* Reduce the number of elements we are updating such that we don't
277 	 * bump into -E2BIG from non-preallocated hash maps, but still will
278 	 * have some evictions for LRU maps  */
279 	if (opts.map_type != BPF_MAP_TYPE_HASH_OF_MAPS)
280 		opts.n -= 512;
281 	else
282 		opts.n /= 2;
283 
284 	/*
285 	 * Upsert keys [0, n) under some competition: with random values from
286 	 * N_THREADS threads. Check values, then delete all elements and check
287 	 * values again.
288 	 */
289 	upsert_elements(&opts);
290 	check_expected_number_elements(opts.n, map_fd, &info);
291 	delete_all_elements(info.type, map_fd, !BATCH);
292 	check_expected_number_elements(0, map_fd, &info);
293 
294 	/* Now do the same, but using batch delete operations */
295 	upsert_elements(&opts);
296 	check_expected_number_elements(opts.n, map_fd, &info);
297 	delete_all_elements(info.type, map_fd, BATCH);
298 	check_expected_number_elements(0, map_fd, &info);
299 
300 	close(map_fd);
301 }
302 
map_create_opts(__u32 type,const char * name,struct bpf_map_create_opts * map_opts,__u32 key_size,__u32 val_size)303 static int map_create_opts(__u32 type, const char *name,
304 			   struct bpf_map_create_opts *map_opts,
305 			   __u32 key_size, __u32 val_size)
306 {
307 	int max_entries;
308 	int map_fd;
309 
310 	if (type == BPF_MAP_TYPE_HASH_OF_MAPS)
311 		max_entries = MAX_ENTRIES_HASH_OF_MAPS;
312 	else
313 		max_entries = MAX_ENTRIES;
314 
315 	map_fd = bpf_map_create(type, name, key_size, val_size, max_entries, map_opts);
316 	CHECK(map_fd < 0, "bpf_map_create()", "error:%s (name=%s)\n",
317 			strerror(errno), name);
318 
319 	return map_fd;
320 }
321 
map_create(__u32 type,const char * name,struct bpf_map_create_opts * map_opts)322 static int map_create(__u32 type, const char *name, struct bpf_map_create_opts *map_opts)
323 {
324 	return map_create_opts(type, name, map_opts, sizeof(int), sizeof(int));
325 }
326 
create_hash(void)327 static int create_hash(void)
328 {
329 	struct bpf_map_create_opts map_opts = {
330 		.sz = sizeof(map_opts),
331 		.map_flags = BPF_F_NO_PREALLOC,
332 	};
333 
334 	return map_create(BPF_MAP_TYPE_HASH, "hash", &map_opts);
335 }
336 
create_percpu_hash(void)337 static int create_percpu_hash(void)
338 {
339 	struct bpf_map_create_opts map_opts = {
340 		.sz = sizeof(map_opts),
341 		.map_flags = BPF_F_NO_PREALLOC,
342 	};
343 
344 	return map_create(BPF_MAP_TYPE_PERCPU_HASH, "percpu_hash", &map_opts);
345 }
346 
create_hash_prealloc(void)347 static int create_hash_prealloc(void)
348 {
349 	return map_create(BPF_MAP_TYPE_HASH, "hash", NULL);
350 }
351 
create_percpu_hash_prealloc(void)352 static int create_percpu_hash_prealloc(void)
353 {
354 	return map_create(BPF_MAP_TYPE_PERCPU_HASH, "percpu_hash_prealloc", NULL);
355 }
356 
create_lru_hash(__u32 type,__u32 map_flags)357 static int create_lru_hash(__u32 type, __u32 map_flags)
358 {
359 	struct bpf_map_create_opts map_opts = {
360 		.sz = sizeof(map_opts),
361 		.map_flags = map_flags,
362 	};
363 
364 	return map_create(type, "lru_hash", &map_opts);
365 }
366 
create_hash_of_maps(void)367 static int create_hash_of_maps(void)
368 {
369 	struct bpf_map_create_opts map_opts = {
370 		.sz = sizeof(map_opts),
371 		.map_flags = BPF_F_NO_PREALLOC,
372 		.inner_map_fd = create_small_hash(),
373 	};
374 	int ret;
375 
376 	ret = map_create_opts(BPF_MAP_TYPE_HASH_OF_MAPS, "hash_of_maps",
377 			      &map_opts, sizeof(int), sizeof(int));
378 	close(map_opts.inner_map_fd);
379 	return ret;
380 }
381 
map_percpu_stats_hash(void)382 static void map_percpu_stats_hash(void)
383 {
384 	__test(create_hash());
385 	printf("test_%s:PASS\n", __func__);
386 }
387 
map_percpu_stats_percpu_hash(void)388 static void map_percpu_stats_percpu_hash(void)
389 {
390 	__test(create_percpu_hash());
391 	printf("test_%s:PASS\n", __func__);
392 }
393 
map_percpu_stats_hash_prealloc(void)394 static void map_percpu_stats_hash_prealloc(void)
395 {
396 	__test(create_hash_prealloc());
397 	printf("test_%s:PASS\n", __func__);
398 }
399 
map_percpu_stats_percpu_hash_prealloc(void)400 static void map_percpu_stats_percpu_hash_prealloc(void)
401 {
402 	__test(create_percpu_hash_prealloc());
403 	printf("test_%s:PASS\n", __func__);
404 }
405 
map_percpu_stats_lru_hash(void)406 static void map_percpu_stats_lru_hash(void)
407 {
408 	__test(create_lru_hash(BPF_MAP_TYPE_LRU_HASH, 0));
409 	printf("test_%s:PASS\n", __func__);
410 }
411 
map_percpu_stats_lru_hash_no_common(void)412 static void map_percpu_stats_lru_hash_no_common(void)
413 {
414 	__test(create_lru_hash(BPF_MAP_TYPE_LRU_HASH, BPF_F_NO_COMMON_LRU));
415 	printf("test_%s:PASS\n", __func__);
416 }
417 
map_percpu_stats_percpu_lru_hash(void)418 static void map_percpu_stats_percpu_lru_hash(void)
419 {
420 	__test(create_lru_hash(BPF_MAP_TYPE_LRU_PERCPU_HASH, 0));
421 	printf("test_%s:PASS\n", __func__);
422 }
423 
map_percpu_stats_percpu_lru_hash_no_common(void)424 static void map_percpu_stats_percpu_lru_hash_no_common(void)
425 {
426 	__test(create_lru_hash(BPF_MAP_TYPE_LRU_PERCPU_HASH, BPF_F_NO_COMMON_LRU));
427 	printf("test_%s:PASS\n", __func__);
428 }
429 
map_percpu_stats_hash_of_maps(void)430 static void map_percpu_stats_hash_of_maps(void)
431 {
432 	__test(create_hash_of_maps());
433 	printf("test_%s:PASS\n", __func__);
434 }
435 
test_map_percpu_stats(void)436 void test_map_percpu_stats(void)
437 {
438 	map_percpu_stats_hash();
439 	map_percpu_stats_percpu_hash();
440 	map_percpu_stats_hash_prealloc();
441 	map_percpu_stats_percpu_hash_prealloc();
442 	map_percpu_stats_lru_hash();
443 	map_percpu_stats_lru_hash_no_common();
444 	map_percpu_stats_percpu_lru_hash();
445 	map_percpu_stats_percpu_lru_hash_no_common();
446 	map_percpu_stats_hash_of_maps();
447 }
448