1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2016 Facebook 4 */ 5 #define _GNU_SOURCE 6 #include <linux/types.h> 7 #include <stdio.h> 8 #include <unistd.h> 9 #include <linux/bpf.h> 10 #include <errno.h> 11 #include <string.h> 12 #include <assert.h> 13 #include <sched.h> 14 #include <sys/wait.h> 15 #include <sys/stat.h> 16 #include <fcntl.h> 17 #include <stdlib.h> 18 #include <time.h> 19 20 #include <bpf/bpf.h> 21 #include "bpf_util.h" 22 23 #define min(a, b) ((a) < (b) ? (a) : (b)) 24 #ifndef offsetof 25 # define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER) 26 #endif 27 #define container_of(ptr, type, member) ({ \ 28 const typeof( ((type *)0)->member ) *__mptr = (ptr); \ 29 (type *)( (char *)__mptr - offsetof(type,member) );}) 30 31 static int nr_cpus; 32 static unsigned long long *dist_keys; 33 static unsigned int dist_key_counts; 34 35 struct list_head { 36 struct list_head *next, *prev; 37 }; 38 39 static inline void INIT_LIST_HEAD(struct list_head *list) 40 { 41 list->next = list; 42 list->prev = list; 43 } 44 45 static inline void __list_add(struct list_head *new, 46 struct list_head *prev, 47 struct list_head *next) 48 { 49 next->prev = new; 50 new->next = next; 51 new->prev = prev; 52 prev->next = new; 53 } 54 55 static inline void list_add(struct list_head *new, struct list_head *head) 56 { 57 __list_add(new, head, head->next); 58 } 59 60 static inline void __list_del(struct list_head *prev, struct list_head *next) 61 { 62 next->prev = prev; 63 prev->next = next; 64 } 65 66 static inline void __list_del_entry(struct list_head *entry) 67 { 68 __list_del(entry->prev, entry->next); 69 } 70 71 static inline void list_move(struct list_head *list, struct list_head *head) 72 { 73 __list_del_entry(list); 74 list_add(list, head); 75 } 76 77 #define list_entry(ptr, type, member) \ 78 container_of(ptr, type, member) 79 80 #define list_last_entry(ptr, type, member) \ 81 list_entry((ptr)->prev, type, member) 82 83 struct pfect_lru_node { 84 struct list_head list; 85 unsigned long long key; 86 }; 87 88 struct pfect_lru { 89 struct list_head list; 90 struct pfect_lru_node *free_nodes; 91 unsigned int cur_size; 92 unsigned int lru_size; 93 unsigned int nr_unique; 94 unsigned int nr_misses; 95 unsigned int total; 96 int map_fd; 97 }; 98 99 static void pfect_lru_init(struct pfect_lru *lru, unsigned int lru_size, 100 unsigned int nr_possible_elems) 101 { 102 lru->map_fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, 103 sizeof(unsigned long long), 104 sizeof(struct pfect_lru_node *), 105 nr_possible_elems, NULL); 106 assert(lru->map_fd != -1); 107 108 lru->free_nodes = malloc(lru_size * sizeof(struct pfect_lru_node)); 109 assert(lru->free_nodes); 110 111 INIT_LIST_HEAD(&lru->list); 112 lru->cur_size = 0; 113 lru->lru_size = lru_size; 114 lru->nr_unique = lru->nr_misses = lru->total = 0; 115 } 116 117 static void pfect_lru_destroy(struct pfect_lru *lru) 118 { 119 close(lru->map_fd); 120 free(lru->free_nodes); 121 } 122 123 static int pfect_lru_lookup_or_insert(struct pfect_lru *lru, 124 unsigned long long key) 125 { 126 struct pfect_lru_node *node = NULL; 127 int seen = 0; 128 129 lru->total++; 130 if (!bpf_map_lookup_elem(lru->map_fd, &key, &node)) { 131 if (node) { 132 list_move(&node->list, &lru->list); 133 return 1; 134 } 135 seen = 1; 136 } 137 138 if (lru->cur_size < lru->lru_size) { 139 node = &lru->free_nodes[lru->cur_size++]; 140 INIT_LIST_HEAD(&node->list); 141 } else { 142 struct pfect_lru_node *null_node = NULL; 143 144 node = list_last_entry(&lru->list, 145 struct pfect_lru_node, 146 list); 147 bpf_map_update_elem(lru->map_fd, &node->key, &null_node, BPF_EXIST); 148 } 149 150 node->key = key; 151 list_move(&node->list, &lru->list); 152 153 lru->nr_misses++; 154 if (seen) { 155 assert(!bpf_map_update_elem(lru->map_fd, &key, &node, BPF_EXIST)); 156 } else { 157 lru->nr_unique++; 158 assert(!bpf_map_update_elem(lru->map_fd, &key, &node, BPF_NOEXIST)); 159 } 160 161 return seen; 162 } 163 164 static unsigned int read_keys(const char *dist_file, 165 unsigned long long **keys) 166 { 167 struct stat fst; 168 unsigned long long *retkeys; 169 unsigned int counts = 0; 170 int dist_fd; 171 char *b, *l; 172 int i; 173 174 dist_fd = open(dist_file, 0); 175 assert(dist_fd != -1); 176 177 assert(fstat(dist_fd, &fst) == 0); 178 b = malloc(fst.st_size); 179 assert(b); 180 181 assert(read(dist_fd, b, fst.st_size) == fst.st_size); 182 close(dist_fd); 183 for (i = 0; i < fst.st_size; i++) { 184 if (b[i] == '\n') 185 counts++; 186 } 187 counts++; /* in case the last line has no \n */ 188 189 retkeys = malloc(counts * sizeof(unsigned long long)); 190 assert(retkeys); 191 192 counts = 0; 193 for (l = strtok(b, "\n"); l; l = strtok(NULL, "\n")) 194 retkeys[counts++] = strtoull(l, NULL, 10); 195 free(b); 196 197 *keys = retkeys; 198 199 return counts; 200 } 201 202 static int create_map(int map_type, int map_flags, unsigned int size) 203 { 204 LIBBPF_OPTS(bpf_map_create_opts, opts, 205 .map_flags = map_flags, 206 ); 207 int map_fd; 208 209 map_fd = bpf_map_create(map_type, NULL, sizeof(unsigned long long), 210 sizeof(unsigned long long), size, &opts); 211 212 if (map_fd == -1) 213 perror("bpf_create_map"); 214 215 return map_fd; 216 } 217 218 static int sched_next_online(int pid, int next_to_try) 219 { 220 cpu_set_t cpuset; 221 222 if (next_to_try == nr_cpus) 223 return -1; 224 225 while (next_to_try < nr_cpus) { 226 CPU_ZERO(&cpuset); 227 CPU_SET(next_to_try++, &cpuset); 228 if (!sched_setaffinity(pid, sizeof(cpuset), &cpuset)) 229 break; 230 } 231 232 return next_to_try; 233 } 234 235 static void run_parallel(unsigned int tasks, void (*fn)(int i, void *data), 236 void *data) 237 { 238 int next_sched_cpu = 0; 239 pid_t pid[tasks]; 240 int i; 241 242 for (i = 0; i < tasks; i++) { 243 pid[i] = fork(); 244 if (pid[i] == 0) { 245 next_sched_cpu = sched_next_online(0, next_sched_cpu); 246 fn(i, data); 247 exit(0); 248 } else if (pid[i] == -1) { 249 printf("couldn't spawn #%d process\n", i); 250 exit(1); 251 } 252 /* It is mostly redundant and just allow the parent 253 * process to update next_shced_cpu for the next child 254 * process 255 */ 256 next_sched_cpu = sched_next_online(pid[i], next_sched_cpu); 257 } 258 for (i = 0; i < tasks; i++) { 259 int status; 260 261 assert(waitpid(pid[i], &status, 0) == pid[i]); 262 assert(status == 0); 263 } 264 } 265 266 static void do_test_lru_dist(int task, void *data) 267 { 268 unsigned int nr_misses = 0; 269 struct pfect_lru pfect_lru; 270 unsigned long long key, value = 1234; 271 unsigned int i; 272 273 unsigned int lru_map_fd = ((unsigned int *)data)[0]; 274 unsigned int lru_size = ((unsigned int *)data)[1]; 275 unsigned long long key_offset = task * dist_key_counts; 276 277 pfect_lru_init(&pfect_lru, lru_size, dist_key_counts); 278 279 for (i = 0; i < dist_key_counts; i++) { 280 key = dist_keys[i] + key_offset; 281 282 pfect_lru_lookup_or_insert(&pfect_lru, key); 283 284 if (!bpf_map_lookup_elem(lru_map_fd, &key, &value)) 285 continue; 286 287 if (bpf_map_update_elem(lru_map_fd, &key, &value, BPF_NOEXIST)) { 288 printf("bpf_map_update_elem(lru_map_fd, %llu): errno:%d\n", 289 key, errno); 290 assert(0); 291 } 292 293 nr_misses++; 294 } 295 296 printf(" task:%d BPF LRU: nr_unique:%u(/%u) nr_misses:%u(/%u)\n", 297 task, pfect_lru.nr_unique, dist_key_counts, nr_misses, 298 dist_key_counts); 299 printf(" task:%d Perfect LRU: nr_unique:%u(/%u) nr_misses:%u(/%u)\n", 300 task, pfect_lru.nr_unique, pfect_lru.total, 301 pfect_lru.nr_misses, pfect_lru.total); 302 303 pfect_lru_destroy(&pfect_lru); 304 close(lru_map_fd); 305 } 306 307 static void test_parallel_lru_dist(int map_type, int map_flags, 308 int nr_tasks, unsigned int lru_size) 309 { 310 int child_data[2]; 311 int lru_map_fd; 312 313 printf("%s (map_type:%d map_flags:0x%X):\n", __func__, map_type, 314 map_flags); 315 316 if (map_flags & BPF_F_NO_COMMON_LRU) 317 lru_map_fd = create_map(map_type, map_flags, 318 nr_cpus * lru_size); 319 else 320 lru_map_fd = create_map(map_type, map_flags, 321 nr_tasks * lru_size); 322 assert(lru_map_fd != -1); 323 324 child_data[0] = lru_map_fd; 325 child_data[1] = lru_size; 326 327 run_parallel(nr_tasks, do_test_lru_dist, child_data); 328 329 close(lru_map_fd); 330 } 331 332 static void test_lru_loss0(int map_type, int map_flags) 333 { 334 unsigned long long key, value[nr_cpus]; 335 unsigned int old_unused_losses = 0; 336 unsigned int new_unused_losses = 0; 337 unsigned int used_losses = 0; 338 int map_fd; 339 340 printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type, 341 map_flags); 342 343 assert(sched_next_online(0, 0) != -1); 344 345 if (map_flags & BPF_F_NO_COMMON_LRU) 346 map_fd = create_map(map_type, map_flags, 900 * nr_cpus); 347 else 348 map_fd = create_map(map_type, map_flags, 900); 349 350 assert(map_fd != -1); 351 352 value[0] = 1234; 353 354 for (key = 1; key <= 1000; key++) { 355 int start_key, end_key; 356 357 assert(bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST) == 0); 358 359 start_key = 101; 360 end_key = min(key, 900); 361 362 while (start_key <= end_key) { 363 bpf_map_lookup_elem(map_fd, &start_key, value); 364 start_key++; 365 } 366 } 367 368 for (key = 1; key <= 1000; key++) { 369 if (bpf_map_lookup_elem(map_fd, &key, value)) { 370 if (key <= 100) 371 old_unused_losses++; 372 else if (key <= 900) 373 used_losses++; 374 else 375 new_unused_losses++; 376 } 377 } 378 379 close(map_fd); 380 381 printf("older-elem-losses:%d(/100) active-elem-losses:%d(/800) " 382 "newer-elem-losses:%d(/100)\n", 383 old_unused_losses, used_losses, new_unused_losses); 384 } 385 386 static void test_lru_loss1(int map_type, int map_flags) 387 { 388 unsigned long long key, value[nr_cpus]; 389 int map_fd; 390 unsigned int nr_losses = 0; 391 392 printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type, 393 map_flags); 394 395 assert(sched_next_online(0, 0) != -1); 396 397 if (map_flags & BPF_F_NO_COMMON_LRU) 398 map_fd = create_map(map_type, map_flags, 1000 * nr_cpus); 399 else 400 map_fd = create_map(map_type, map_flags, 1000); 401 402 assert(map_fd != -1); 403 404 value[0] = 1234; 405 406 for (key = 1; key <= 1000; key++) 407 assert(!bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST)); 408 409 for (key = 1; key <= 1000; key++) { 410 if (bpf_map_lookup_elem(map_fd, &key, value)) 411 nr_losses++; 412 } 413 414 close(map_fd); 415 416 printf("nr_losses:%d(/1000)\n", nr_losses); 417 } 418 419 static void do_test_parallel_lru_loss(int task, void *data) 420 { 421 const unsigned int nr_stable_elems = 1000; 422 const unsigned int nr_repeats = 100000; 423 424 int map_fd = *(int *)data; 425 unsigned long long stable_base; 426 unsigned long long key, value[nr_cpus]; 427 unsigned long long next_ins_key; 428 unsigned int nr_losses = 0; 429 unsigned int i; 430 431 stable_base = task * nr_repeats * 2 + 1; 432 next_ins_key = stable_base; 433 value[0] = 1234; 434 for (i = 0; i < nr_stable_elems; i++) { 435 assert(bpf_map_update_elem(map_fd, &next_ins_key, value, 436 BPF_NOEXIST) == 0); 437 next_ins_key++; 438 } 439 440 for (i = 0; i < nr_repeats; i++) { 441 int rn; 442 443 rn = rand(); 444 445 if (rn % 10) { 446 key = rn % nr_stable_elems + stable_base; 447 bpf_map_lookup_elem(map_fd, &key, value); 448 } else { 449 bpf_map_update_elem(map_fd, &next_ins_key, value, 450 BPF_NOEXIST); 451 next_ins_key++; 452 } 453 } 454 455 key = stable_base; 456 for (i = 0; i < nr_stable_elems; i++) { 457 if (bpf_map_lookup_elem(map_fd, &key, value)) 458 nr_losses++; 459 key++; 460 } 461 462 printf(" task:%d nr_losses:%u\n", task, nr_losses); 463 } 464 465 static void test_parallel_lru_loss(int map_type, int map_flags, int nr_tasks) 466 { 467 int map_fd; 468 469 printf("%s (map_type:%d map_flags:0x%X):\n", __func__, map_type, 470 map_flags); 471 472 /* Give 20% more than the active working set */ 473 if (map_flags & BPF_F_NO_COMMON_LRU) 474 map_fd = create_map(map_type, map_flags, 475 nr_cpus * (1000 + 200)); 476 else 477 map_fd = create_map(map_type, map_flags, 478 nr_tasks * (1000 + 200)); 479 480 assert(map_fd != -1); 481 482 run_parallel(nr_tasks, do_test_parallel_lru_loss, &map_fd); 483 484 close(map_fd); 485 } 486 487 int main(int argc, char **argv) 488 { 489 int map_flags[] = {0, BPF_F_NO_COMMON_LRU}; 490 const char *dist_file; 491 int nr_tasks = 1; 492 int lru_size; 493 int f; 494 495 if (argc < 4) { 496 printf("Usage: %s <dist-file> <lru-size> <nr-tasks>\n", 497 argv[0]); 498 return -1; 499 } 500 501 dist_file = argv[1]; 502 lru_size = atoi(argv[2]); 503 nr_tasks = atoi(argv[3]); 504 505 setbuf(stdout, NULL); 506 507 srand(time(NULL)); 508 509 nr_cpus = bpf_num_possible_cpus(); 510 assert(nr_cpus != -1); 511 printf("nr_cpus:%d\n\n", nr_cpus); 512 513 nr_tasks = min(nr_tasks, nr_cpus); 514 515 dist_key_counts = read_keys(dist_file, &dist_keys); 516 if (!dist_key_counts) { 517 printf("%s has no key\n", dist_file); 518 return -1; 519 } 520 521 for (f = 0; f < ARRAY_SIZE(map_flags); f++) { 522 test_lru_loss0(BPF_MAP_TYPE_LRU_HASH, map_flags[f]); 523 test_lru_loss1(BPF_MAP_TYPE_LRU_HASH, map_flags[f]); 524 test_parallel_lru_loss(BPF_MAP_TYPE_LRU_HASH, map_flags[f], 525 nr_tasks); 526 test_parallel_lru_dist(BPF_MAP_TYPE_LRU_HASH, map_flags[f], 527 nr_tasks, lru_size); 528 printf("\n"); 529 } 530 531 free(dist_keys); 532 533 return 0; 534 } 535