1 /*
2  * Copyright (c) 2016 Facebook
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of version 2 of the GNU General Public
6  * License as published by the Free Software Foundation.
7  */
8 #define _GNU_SOURCE
9 #include <stdio.h>
10 #include <unistd.h>
11 #include <errno.h>
12 #include <string.h>
13 #include <assert.h>
14 #include <sched.h>
15 #include <stdlib.h>
16 #include <time.h>
17 
18 #include <sys/wait.h>
19 #include <sys/resource.h>
20 
21 #include "bpf_sys.h"
22 #include "bpf_util.h"
23 
24 #define LOCAL_FREE_TARGET	(128)
25 #define PERCPU_FREE_TARGET	(16)
26 
27 static int nr_cpus;
28 
29 static int create_map(int map_type, int map_flags, unsigned int size)
30 {
31 	int map_fd;
32 
33 	map_fd = bpf_map_create(map_type, sizeof(unsigned long long),
34 				sizeof(unsigned long long), size, map_flags);
35 
36 	if (map_fd == -1)
37 		perror("bpf_map_create");
38 
39 	return map_fd;
40 }
41 
42 static int map_subset(int map0, int map1)
43 {
44 	unsigned long long next_key = 0;
45 	unsigned long long value0[nr_cpus], value1[nr_cpus];
46 	int ret;
47 
48 	while (!bpf_map_next_key(map1, &next_key, &next_key)) {
49 		assert(!bpf_map_lookup(map1, &next_key, value1));
50 		ret = bpf_map_lookup(map0, &next_key, value0);
51 		if (ret) {
52 			printf("key:%llu not found from map. %s(%d)\n",
53 			       next_key, strerror(errno), errno);
54 			return 0;
55 		}
56 		if (value0[0] != value1[0]) {
57 			printf("key:%llu value0:%llu != value1:%llu\n",
58 			       next_key, value0[0], value1[0]);
59 			return 0;
60 		}
61 	}
62 	return 1;
63 }
64 
65 static int map_equal(int lru_map, int expected)
66 {
67 	return map_subset(lru_map, expected) && map_subset(expected, lru_map);
68 }
69 
70 static int sched_next_online(int pid, int next_to_try)
71 {
72 	cpu_set_t cpuset;
73 
74 	if (next_to_try == nr_cpus)
75 		return -1;
76 
77 	while (next_to_try < nr_cpus) {
78 		CPU_ZERO(&cpuset);
79 		CPU_SET(next_to_try++, &cpuset);
80 		if (!sched_setaffinity(pid, sizeof(cpuset), &cpuset))
81 			break;
82 	}
83 
84 	return next_to_try;
85 }
86 
87 /* Size of the LRU amp is 2
88  * Add key=1 (+1 key)
89  * Add key=2 (+1 key)
90  * Lookup Key=1
91  * Add Key=3
92  *   => Key=2 will be removed by LRU
93  * Iterate map.  Only found key=1 and key=3
94  */
95 static void test_lru_sanity0(int map_type, int map_flags)
96 {
97 	unsigned long long key, value[nr_cpus];
98 	int lru_map_fd, expected_map_fd;
99 
100 	printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
101 	       map_flags);
102 
103 	assert(sched_next_online(0, 0) != -1);
104 
105 	if (map_flags & BPF_F_NO_COMMON_LRU)
106 		lru_map_fd = create_map(map_type, map_flags, 2 * nr_cpus);
107 	else
108 		lru_map_fd = create_map(map_type, map_flags, 2);
109 	assert(lru_map_fd != -1);
110 
111 	expected_map_fd = create_map(BPF_MAP_TYPE_HASH, 0, 2);
112 	assert(expected_map_fd != -1);
113 
114 	value[0] = 1234;
115 
116 	/* insert key=1 element */
117 
118 	key = 1;
119 	assert(!bpf_map_update(lru_map_fd, &key, value, BPF_NOEXIST));
120 	assert(!bpf_map_update(expected_map_fd, &key, value, BPF_NOEXIST));
121 
122 	/* BPF_NOEXIST means: add new element if it doesn't exist */
123 	assert(bpf_map_update(lru_map_fd, &key, value, BPF_NOEXIST) == -1 &&
124 	       /* key=1 already exists */
125 	       errno == EEXIST);
126 
127 	assert(bpf_map_update(lru_map_fd, &key, value, -1) == -1 &&
128 	       errno == EINVAL);
129 
130 	/* insert key=2 element */
131 
132 	/* check that key=2 is not found */
133 	key = 2;
134 	assert(bpf_map_lookup(lru_map_fd, &key, value) == -1 &&
135 	       errno == ENOENT);
136 
137 	/* BPF_EXIST means: update existing element */
138 	assert(bpf_map_update(lru_map_fd, &key, value, BPF_EXIST) == -1 &&
139 	       /* key=2 is not there */
140 	       errno == ENOENT);
141 
142 	assert(!bpf_map_update(lru_map_fd, &key, value, BPF_NOEXIST));
143 
144 	/* insert key=3 element */
145 
146 	/* check that key=3 is not found */
147 	key = 3;
148 	assert(bpf_map_lookup(lru_map_fd, &key, value) == -1 &&
149 	       errno == ENOENT);
150 
151 	/* check that key=1 can be found and mark the ref bit to
152 	 * stop LRU from removing key=1
153 	 */
154 	key = 1;
155 	assert(!bpf_map_lookup(lru_map_fd, &key, value));
156 	assert(value[0] == 1234);
157 
158 	key = 3;
159 	assert(!bpf_map_update(lru_map_fd, &key, value, BPF_NOEXIST));
160 	assert(!bpf_map_update(expected_map_fd, &key, value, BPF_NOEXIST));
161 
162 	/* key=2 has been removed from the LRU */
163 	key = 2;
164 	assert(bpf_map_lookup(lru_map_fd, &key, value) == -1);
165 
166 	assert(map_equal(lru_map_fd, expected_map_fd));
167 
168 	close(expected_map_fd);
169 	close(lru_map_fd);
170 
171 	printf("Pass\n");
172 }
173 
174 /* Size of the LRU map is 1.5*tgt_free
175  * Insert 1 to tgt_free (+tgt_free keys)
176  * Lookup 1 to tgt_free/2
177  * Insert 1+tgt_free to 2*tgt_free (+tgt_free keys)
178  * => 1+tgt_free/2 to LOCALFREE_TARGET will be removed by LRU
179  */
180 static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free)
181 {
182 	unsigned long long key, end_key, value[nr_cpus];
183 	int lru_map_fd, expected_map_fd;
184 	unsigned int batch_size;
185 	unsigned int map_size;
186 
187 	if (map_flags & BPF_F_NO_COMMON_LRU)
188 		/* Ther percpu lru list (i.e each cpu has its own LRU
189 		 * list) does not have a local free list.  Hence,
190 		 * it will only free old nodes till there is no free
191 		 * from the LRU list.  Hence, this test does not apply
192 		 * to BPF_F_NO_COMMON_LRU
193 		 */
194 		return;
195 
196 	printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
197 	       map_flags);
198 
199 	assert(sched_next_online(0, 0) != -1);
200 
201 	batch_size = tgt_free / 2;
202 	assert(batch_size * 2 == tgt_free);
203 
204 	map_size = tgt_free + batch_size;
205 	lru_map_fd = create_map(map_type, map_flags, map_size);
206 	assert(lru_map_fd != -1);
207 
208 	expected_map_fd = create_map(BPF_MAP_TYPE_HASH, 0, map_size);
209 	assert(expected_map_fd != -1);
210 
211 	value[0] = 1234;
212 
213 	/* Insert 1 to tgt_free (+tgt_free keys) */
214 	end_key = 1 + tgt_free;
215 	for (key = 1; key < end_key; key++)
216 		assert(!bpf_map_update(lru_map_fd, &key, value, BPF_NOEXIST));
217 
218 	/* Lookup 1 to tgt_free/2 */
219 	end_key = 1 + batch_size;
220 	for (key = 1; key < end_key; key++) {
221 		assert(!bpf_map_lookup(lru_map_fd, &key, value));
222 		assert(!bpf_map_update(expected_map_fd, &key, value,
223 				       BPF_NOEXIST));
224 	}
225 
226 	/* Insert 1+tgt_free to 2*tgt_free
227 	 * => 1+tgt_free/2 to LOCALFREE_TARGET will be
228 	 * removed by LRU
229 	 */
230 	key = 1 + tgt_free;
231 	end_key = key + tgt_free;
232 	for (; key < end_key; key++) {
233 		assert(!bpf_map_update(lru_map_fd, &key, value, BPF_NOEXIST));
234 		assert(!bpf_map_update(expected_map_fd, &key, value,
235 				       BPF_NOEXIST));
236 	}
237 
238 	assert(map_equal(lru_map_fd, expected_map_fd));
239 
240 	close(expected_map_fd);
241 	close(lru_map_fd);
242 
243 	printf("Pass\n");
244 }
245 
246 /* Size of the LRU map 1.5 * tgt_free
247  * Insert 1 to tgt_free (+tgt_free keys)
248  * Update 1 to tgt_free/2
249  *   => The original 1 to tgt_free/2 will be removed due to
250  *      the LRU shrink process
251  * Re-insert 1 to tgt_free/2 again and do a lookup immeidately
252  * Insert 1+tgt_free to tgt_free*3/2
253  * Insert 1+tgt_free*3/2 to tgt_free*5/2
254  *   => Key 1+tgt_free to tgt_free*3/2
255  *      will be removed from LRU because it has never
256  *      been lookup and ref bit is not set
257  */
258 static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
259 {
260 	unsigned long long key, value[nr_cpus];
261 	unsigned long long end_key;
262 	int lru_map_fd, expected_map_fd;
263 	unsigned int batch_size;
264 	unsigned int map_size;
265 
266 	if (map_flags & BPF_F_NO_COMMON_LRU)
267 		/* Ther percpu lru list (i.e each cpu has its own LRU
268 		 * list) does not have a local free list.  Hence,
269 		 * it will only free old nodes till there is no free
270 		 * from the LRU list.  Hence, this test does not apply
271 		 * to BPF_F_NO_COMMON_LRU
272 		 */
273 		return;
274 
275 	printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
276 	       map_flags);
277 
278 	assert(sched_next_online(0, 0) != -1);
279 
280 	batch_size = tgt_free / 2;
281 	assert(batch_size * 2 == tgt_free);
282 
283 	map_size = tgt_free + batch_size;
284 	if (map_flags & BPF_F_NO_COMMON_LRU)
285 		lru_map_fd = create_map(map_type, map_flags,
286 					map_size * nr_cpus);
287 	else
288 		lru_map_fd = create_map(map_type, map_flags, map_size);
289 	assert(lru_map_fd != -1);
290 
291 	expected_map_fd = create_map(BPF_MAP_TYPE_HASH, 0, map_size);
292 	assert(expected_map_fd != -1);
293 
294 	value[0] = 1234;
295 
296 	/* Insert 1 to tgt_free (+tgt_free keys) */
297 	end_key = 1 + tgt_free;
298 	for (key = 1; key < end_key; key++)
299 		assert(!bpf_map_update(lru_map_fd, &key, value, BPF_NOEXIST));
300 
301 	/* Any bpf_map_update will require to acquire a new node
302 	 * from LRU first.
303 	 *
304 	 * The local list is running out of free nodes.
305 	 * It gets from the global LRU list which tries to
306 	 * shrink the inactive list to get tgt_free
307 	 * number of free nodes.
308 	 *
309 	 * Hence, the oldest key 1 to tgt_free/2
310 	 * are removed from the LRU list.
311 	 */
312 	key = 1;
313 	if (map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
314 		assert(!bpf_map_update(lru_map_fd, &key, value, BPF_NOEXIST));
315 		assert(!bpf_map_delete(lru_map_fd, &key));
316 	} else {
317 		assert(bpf_map_update(lru_map_fd, &key, value, BPF_EXIST));
318 	}
319 
320 	/* Re-insert 1 to tgt_free/2 again and do a lookup
321 	 * immeidately.
322 	 */
323 	end_key = 1 + batch_size;
324 	value[0] = 4321;
325 	for (key = 1; key < end_key; key++) {
326 		assert(bpf_map_lookup(lru_map_fd, &key, value));
327 		assert(!bpf_map_update(lru_map_fd, &key, value, BPF_NOEXIST));
328 		assert(!bpf_map_lookup(lru_map_fd, &key, value));
329 		assert(value[0] == 4321);
330 		assert(!bpf_map_update(expected_map_fd, &key, value,
331 				       BPF_NOEXIST));
332 	}
333 
334 	value[0] = 1234;
335 
336 	/* Insert 1+tgt_free to tgt_free*3/2 */
337 	end_key = 1 + tgt_free + batch_size;
338 	for (key = 1 + tgt_free; key < end_key; key++)
339 		/* These newly added but not referenced keys will be
340 		 * gone during the next LRU shrink.
341 		 */
342 		assert(!bpf_map_update(lru_map_fd, &key, value, BPF_NOEXIST));
343 
344 	/* Insert 1+tgt_free*3/2 to  tgt_free*5/2 */
345 	end_key = key + tgt_free;
346 	for (; key < end_key; key++) {
347 		assert(!bpf_map_update(lru_map_fd, &key, value, BPF_NOEXIST));
348 		assert(!bpf_map_update(expected_map_fd, &key, value,
349 				       BPF_NOEXIST));
350 	}
351 
352 	assert(map_equal(lru_map_fd, expected_map_fd));
353 
354 	close(expected_map_fd);
355 	close(lru_map_fd);
356 
357 	printf("Pass\n");
358 }
359 
360 /* Size of the LRU map is 2*tgt_free
361  * It is to test the active/inactive list rotation
362  * Insert 1 to 2*tgt_free (+2*tgt_free keys)
363  * Lookup key 1 to tgt_free*3/2
364  * Add 1+2*tgt_free to tgt_free*5/2 (+tgt_free/2 keys)
365  *  => key 1+tgt_free*3/2 to 2*tgt_free are removed from LRU
366  */
367 static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free)
368 {
369 	unsigned long long key, end_key, value[nr_cpus];
370 	int lru_map_fd, expected_map_fd;
371 	unsigned int batch_size;
372 	unsigned int map_size;
373 
374 	printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
375 	       map_flags);
376 
377 	assert(sched_next_online(0, 0) != -1);
378 
379 	batch_size = tgt_free / 2;
380 	assert(batch_size * 2 == tgt_free);
381 
382 	map_size = tgt_free * 2;
383 	if (map_flags & BPF_F_NO_COMMON_LRU)
384 		lru_map_fd = create_map(map_type, map_flags,
385 					map_size * nr_cpus);
386 	else
387 		lru_map_fd = create_map(map_type, map_flags, map_size);
388 	assert(lru_map_fd != -1);
389 
390 	expected_map_fd = create_map(BPF_MAP_TYPE_HASH, 0, map_size);
391 	assert(expected_map_fd != -1);
392 
393 	value[0] = 1234;
394 
395 	/* Insert 1 to 2*tgt_free (+2*tgt_free keys) */
396 	end_key = 1 + (2 * tgt_free);
397 	for (key = 1; key < end_key; key++)
398 		assert(!bpf_map_update(lru_map_fd, &key, value, BPF_NOEXIST));
399 
400 	/* Lookup key 1 to tgt_free*3/2 */
401 	end_key = tgt_free + batch_size;
402 	for (key = 1; key < end_key; key++) {
403 		assert(!bpf_map_lookup(lru_map_fd, &key, value));
404 		assert(!bpf_map_update(expected_map_fd, &key, value,
405 				       BPF_NOEXIST));
406 	}
407 
408 	/* Add 1+2*tgt_free to tgt_free*5/2
409 	 * (+tgt_free/2 keys)
410 	 */
411 	key = 2 * tgt_free + 1;
412 	end_key = key + batch_size;
413 	for (; key < end_key; key++) {
414 		assert(!bpf_map_update(lru_map_fd, &key, value, BPF_NOEXIST));
415 		assert(!bpf_map_update(expected_map_fd, &key, value,
416 				       BPF_NOEXIST));
417 	}
418 
419 	assert(map_equal(lru_map_fd, expected_map_fd));
420 
421 	close(expected_map_fd);
422 	close(lru_map_fd);
423 
424 	printf("Pass\n");
425 }
426 
427 /* Test deletion */
428 static void test_lru_sanity4(int map_type, int map_flags, unsigned int tgt_free)
429 {
430 	int lru_map_fd, expected_map_fd;
431 	unsigned long long key, value[nr_cpus];
432 	unsigned long long end_key;
433 
434 	printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
435 	       map_flags);
436 
437 	assert(sched_next_online(0, 0) != -1);
438 
439 	if (map_flags & BPF_F_NO_COMMON_LRU)
440 		lru_map_fd = create_map(map_type, map_flags,
441 					3 * tgt_free * nr_cpus);
442 	else
443 		lru_map_fd = create_map(map_type, map_flags, 3 * tgt_free);
444 	assert(lru_map_fd != -1);
445 
446 	expected_map_fd = create_map(BPF_MAP_TYPE_HASH, 0,
447 				     3 * tgt_free);
448 	assert(expected_map_fd != -1);
449 
450 	value[0] = 1234;
451 
452 	for (key = 1; key <= 2 * tgt_free; key++)
453 		assert(!bpf_map_update(lru_map_fd, &key, value, BPF_NOEXIST));
454 
455 	key = 1;
456 	assert(bpf_map_update(lru_map_fd, &key, value, BPF_NOEXIST));
457 
458 	for (key = 1; key <= tgt_free; key++) {
459 		assert(!bpf_map_lookup(lru_map_fd, &key, value));
460 		assert(!bpf_map_update(expected_map_fd, &key, value,
461 				       BPF_NOEXIST));
462 	}
463 
464 	for (; key <= 2 * tgt_free; key++) {
465 		assert(!bpf_map_delete(lru_map_fd, &key));
466 		assert(bpf_map_delete(lru_map_fd, &key));
467 	}
468 
469 	end_key = key + 2 * tgt_free;
470 	for (; key < end_key; key++) {
471 		assert(!bpf_map_update(lru_map_fd, &key, value, BPF_NOEXIST));
472 		assert(!bpf_map_update(expected_map_fd, &key, value,
473 				       BPF_NOEXIST));
474 	}
475 
476 	assert(map_equal(lru_map_fd, expected_map_fd));
477 
478 	close(expected_map_fd);
479 	close(lru_map_fd);
480 
481 	printf("Pass\n");
482 }
483 
484 static void do_test_lru_sanity5(unsigned long long last_key, int map_fd)
485 {
486 	unsigned long long key, value[nr_cpus];
487 
488 	/* Ensure the last key inserted by previous CPU can be found */
489 	assert(!bpf_map_lookup(map_fd, &last_key, value));
490 
491 	value[0] = 1234;
492 
493 	key = last_key + 1;
494 	assert(!bpf_map_update(map_fd, &key, value, BPF_NOEXIST));
495 	assert(!bpf_map_lookup(map_fd, &key, value));
496 
497 	/* Cannot find the last key because it was removed by LRU */
498 	assert(bpf_map_lookup(map_fd, &last_key, value));
499 }
500 
501 /* Test map with only one element */
502 static void test_lru_sanity5(int map_type, int map_flags)
503 {
504 	unsigned long long key, value[nr_cpus];
505 	int next_sched_cpu = 0;
506 	int map_fd;
507 	int i;
508 
509 	if (map_flags & BPF_F_NO_COMMON_LRU)
510 		return;
511 
512 	printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
513 	       map_flags);
514 
515 	map_fd = create_map(map_type, map_flags, 1);
516 	assert(map_fd != -1);
517 
518 	value[0] = 1234;
519 	key = 0;
520 	assert(!bpf_map_update(map_fd, &key, value, BPF_NOEXIST));
521 
522 	for (i = 0; i < nr_cpus; i++) {
523 		pid_t pid;
524 
525 		pid = fork();
526 		if (pid == 0) {
527 			next_sched_cpu = sched_next_online(0, next_sched_cpu);
528 			if (next_sched_cpu != -1)
529 				do_test_lru_sanity5(key, map_fd);
530 			exit(0);
531 		} else if (pid == -1) {
532 			printf("couldn't spawn #%d process\n", i);
533 			exit(1);
534 		} else {
535 			int status;
536 
537 			/* It is mostly redundant and just allow the parent
538 			 * process to update next_shced_cpu for the next child
539 			 * process
540 			 */
541 			next_sched_cpu = sched_next_online(pid, next_sched_cpu);
542 
543 			assert(waitpid(pid, &status, 0) == pid);
544 			assert(status == 0);
545 			key++;
546 		}
547 	}
548 
549 	close(map_fd);
550 
551 	printf("Pass\n");
552 }
553 
554 int main(int argc, char **argv)
555 {
556 	struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
557 	int map_types[] = {BPF_MAP_TYPE_LRU_HASH,
558 			     BPF_MAP_TYPE_LRU_PERCPU_HASH};
559 	int map_flags[] = {0, BPF_F_NO_COMMON_LRU};
560 	int t, f;
561 
562 	setbuf(stdout, NULL);
563 
564 	assert(!setrlimit(RLIMIT_MEMLOCK, &r));
565 
566 	nr_cpus = bpf_num_possible_cpus();
567 	assert(nr_cpus != -1);
568 	printf("nr_cpus:%d\n\n", nr_cpus);
569 
570 	for (f = 0; f < sizeof(map_flags) / sizeof(*map_flags); f++) {
571 		unsigned int tgt_free = (map_flags[f] & BPF_F_NO_COMMON_LRU) ?
572 			PERCPU_FREE_TARGET : LOCAL_FREE_TARGET;
573 
574 		for (t = 0; t < sizeof(map_types) / sizeof(*map_types); t++) {
575 			test_lru_sanity0(map_types[t], map_flags[f]);
576 			test_lru_sanity1(map_types[t], map_flags[f], tgt_free);
577 			test_lru_sanity2(map_types[t], map_flags[f], tgt_free);
578 			test_lru_sanity3(map_types[t], map_flags[f], tgt_free);
579 			test_lru_sanity4(map_types[t], map_flags[f], tgt_free);
580 			test_lru_sanity5(map_types[t], map_flags[f]);
581 
582 			printf("\n");
583 		}
584 	}
585 
586 	return 0;
587 }
588