1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2016 Facebook 3 */ 4 #define _GNU_SOURCE 5 #include <sched.h> 6 #include <stdio.h> 7 #include <sys/types.h> 8 #include <asm/unistd.h> 9 #include <unistd.h> 10 #include <assert.h> 11 #include <sys/wait.h> 12 #include <stdlib.h> 13 #include <signal.h> 14 #include <linux/bpf.h> 15 #include <string.h> 16 #include <time.h> 17 #include <sys/resource.h> 18 #include <arpa/inet.h> 19 #include <errno.h> 20 21 #include <bpf/bpf.h> 22 #include "bpf_load.h" 23 24 #define TEST_BIT(t) (1U << (t)) 25 #define MAX_NR_CPUS 1024 26 27 static __u64 time_get_ns(void) 28 { 29 struct timespec ts; 30 31 clock_gettime(CLOCK_MONOTONIC, &ts); 32 return ts.tv_sec * 1000000000ull + ts.tv_nsec; 33 } 34 35 enum test_type { 36 HASH_PREALLOC, 37 PERCPU_HASH_PREALLOC, 38 HASH_KMALLOC, 39 PERCPU_HASH_KMALLOC, 40 LRU_HASH_PREALLOC, 41 NOCOMMON_LRU_HASH_PREALLOC, 42 LPM_KMALLOC, 43 HASH_LOOKUP, 44 ARRAY_LOOKUP, 45 INNER_LRU_HASH_PREALLOC, 46 LRU_HASH_LOOKUP, 47 NR_TESTS, 48 }; 49 50 const char *test_map_names[NR_TESTS] = { 51 [HASH_PREALLOC] = "hash_map", 52 [PERCPU_HASH_PREALLOC] = "percpu_hash_map", 53 [HASH_KMALLOC] = "hash_map_alloc", 54 [PERCPU_HASH_KMALLOC] = "percpu_hash_map_alloc", 55 [LRU_HASH_PREALLOC] = "lru_hash_map", 56 [NOCOMMON_LRU_HASH_PREALLOC] = "nocommon_lru_hash_map", 57 [LPM_KMALLOC] = "lpm_trie_map_alloc", 58 [HASH_LOOKUP] = "hash_map", 59 [ARRAY_LOOKUP] = "array_map", 60 [INNER_LRU_HASH_PREALLOC] = "inner_lru_hash_map", 61 [LRU_HASH_LOOKUP] = "lru_hash_lookup_map", 62 }; 63 64 static int test_flags = ~0; 65 static uint32_t num_map_entries; 66 static uint32_t inner_lru_hash_size; 67 static int inner_lru_hash_idx = -1; 68 static int array_of_lru_hashs_idx = -1; 69 static int lru_hash_lookup_idx = -1; 70 static int lru_hash_lookup_test_entries = 32; 71 static uint32_t max_cnt = 1000000; 72 73 static int check_test_flags(enum test_type t) 74 { 75 return test_flags & TEST_BIT(t); 76 } 77 78 static void test_hash_prealloc(int cpu) 79 { 80 __u64 start_time; 81 int i; 82 83 start_time = time_get_ns(); 84 for (i = 0; i < max_cnt; i++) 85 syscall(__NR_getuid); 86 printf("%d:hash_map_perf pre-alloc %lld events per sec\n", 87 cpu, max_cnt * 1000000000ll / (time_get_ns() - start_time)); 88 } 89 90 static int pre_test_lru_hash_lookup(int tasks) 91 { 92 int fd = map_fd[lru_hash_lookup_idx]; 93 uint32_t key; 94 long val = 1; 95 int ret; 96 97 if (num_map_entries > lru_hash_lookup_test_entries) 98 lru_hash_lookup_test_entries = num_map_entries; 99 100 /* Populate the lru_hash_map for LRU_HASH_LOOKUP perf test. 101 * 102 * It is fine that the user requests for a map with 103 * num_map_entries < 32 and some of the later lru hash lookup 104 * may return not found. For LRU map, we are not interested 105 * in such small map performance. 106 */ 107 for (key = 0; key < lru_hash_lookup_test_entries; key++) { 108 ret = bpf_map_update_elem(fd, &key, &val, BPF_NOEXIST); 109 if (ret) 110 return ret; 111 } 112 113 return 0; 114 } 115 116 static void do_test_lru(enum test_type test, int cpu) 117 { 118 static int inner_lru_map_fds[MAX_NR_CPUS]; 119 120 struct sockaddr_in6 in6 = { .sin6_family = AF_INET6 }; 121 const char *test_name; 122 __u64 start_time; 123 int i, ret; 124 125 if (test == INNER_LRU_HASH_PREALLOC) { 126 int outer_fd = map_fd[array_of_lru_hashs_idx]; 127 unsigned int mycpu, mynode; 128 129 assert(cpu < MAX_NR_CPUS); 130 131 if (cpu) { 132 ret = syscall(__NR_getcpu, &mycpu, &mynode, NULL); 133 assert(!ret); 134 135 inner_lru_map_fds[cpu] = 136 bpf_create_map_node(BPF_MAP_TYPE_LRU_HASH, 137 test_map_names[INNER_LRU_HASH_PREALLOC], 138 sizeof(uint32_t), 139 sizeof(long), 140 inner_lru_hash_size, 0, 141 mynode); 142 if (inner_lru_map_fds[cpu] == -1) { 143 printf("cannot create BPF_MAP_TYPE_LRU_HASH %s(%d)\n", 144 strerror(errno), errno); 145 exit(1); 146 } 147 } else { 148 inner_lru_map_fds[cpu] = map_fd[inner_lru_hash_idx]; 149 } 150 151 ret = bpf_map_update_elem(outer_fd, &cpu, 152 &inner_lru_map_fds[cpu], 153 BPF_ANY); 154 if (ret) { 155 printf("cannot update ARRAY_OF_LRU_HASHS with key:%u. %s(%d)\n", 156 cpu, strerror(errno), errno); 157 exit(1); 158 } 159 } 160 161 in6.sin6_addr.s6_addr16[0] = 0xdead; 162 in6.sin6_addr.s6_addr16[1] = 0xbeef; 163 164 if (test == LRU_HASH_PREALLOC) { 165 test_name = "lru_hash_map_perf"; 166 in6.sin6_addr.s6_addr16[2] = 0; 167 } else if (test == NOCOMMON_LRU_HASH_PREALLOC) { 168 test_name = "nocommon_lru_hash_map_perf"; 169 in6.sin6_addr.s6_addr16[2] = 1; 170 } else if (test == INNER_LRU_HASH_PREALLOC) { 171 test_name = "inner_lru_hash_map_perf"; 172 in6.sin6_addr.s6_addr16[2] = 2; 173 } else if (test == LRU_HASH_LOOKUP) { 174 test_name = "lru_hash_lookup_perf"; 175 in6.sin6_addr.s6_addr16[2] = 3; 176 in6.sin6_addr.s6_addr32[3] = 0; 177 } else { 178 assert(0); 179 } 180 181 start_time = time_get_ns(); 182 for (i = 0; i < max_cnt; i++) { 183 ret = connect(-1, (const struct sockaddr *)&in6, sizeof(in6)); 184 assert(ret == -1 && errno == EBADF); 185 if (in6.sin6_addr.s6_addr32[3] < 186 lru_hash_lookup_test_entries - 32) 187 in6.sin6_addr.s6_addr32[3] += 32; 188 else 189 in6.sin6_addr.s6_addr32[3] = 0; 190 } 191 printf("%d:%s pre-alloc %lld events per sec\n", 192 cpu, test_name, 193 max_cnt * 1000000000ll / (time_get_ns() - start_time)); 194 } 195 196 static void test_lru_hash_prealloc(int cpu) 197 { 198 do_test_lru(LRU_HASH_PREALLOC, cpu); 199 } 200 201 static void test_nocommon_lru_hash_prealloc(int cpu) 202 { 203 do_test_lru(NOCOMMON_LRU_HASH_PREALLOC, cpu); 204 } 205 206 static void test_inner_lru_hash_prealloc(int cpu) 207 { 208 do_test_lru(INNER_LRU_HASH_PREALLOC, cpu); 209 } 210 211 static void test_lru_hash_lookup(int cpu) 212 { 213 do_test_lru(LRU_HASH_LOOKUP, cpu); 214 } 215 216 static void test_percpu_hash_prealloc(int cpu) 217 { 218 __u64 start_time; 219 int i; 220 221 start_time = time_get_ns(); 222 for (i = 0; i < max_cnt; i++) 223 syscall(__NR_geteuid); 224 printf("%d:percpu_hash_map_perf pre-alloc %lld events per sec\n", 225 cpu, max_cnt * 1000000000ll / (time_get_ns() - start_time)); 226 } 227 228 static void test_hash_kmalloc(int cpu) 229 { 230 __u64 start_time; 231 int i; 232 233 start_time = time_get_ns(); 234 for (i = 0; i < max_cnt; i++) 235 syscall(__NR_getgid); 236 printf("%d:hash_map_perf kmalloc %lld events per sec\n", 237 cpu, max_cnt * 1000000000ll / (time_get_ns() - start_time)); 238 } 239 240 static void test_percpu_hash_kmalloc(int cpu) 241 { 242 __u64 start_time; 243 int i; 244 245 start_time = time_get_ns(); 246 for (i = 0; i < max_cnt; i++) 247 syscall(__NR_getegid); 248 printf("%d:percpu_hash_map_perf kmalloc %lld events per sec\n", 249 cpu, max_cnt * 1000000000ll / (time_get_ns() - start_time)); 250 } 251 252 static void test_lpm_kmalloc(int cpu) 253 { 254 __u64 start_time; 255 int i; 256 257 start_time = time_get_ns(); 258 for (i = 0; i < max_cnt; i++) 259 syscall(__NR_gettid); 260 printf("%d:lpm_perf kmalloc %lld events per sec\n", 261 cpu, max_cnt * 1000000000ll / (time_get_ns() - start_time)); 262 } 263 264 static void test_hash_lookup(int cpu) 265 { 266 __u64 start_time; 267 int i; 268 269 start_time = time_get_ns(); 270 for (i = 0; i < max_cnt; i++) 271 syscall(__NR_getpgid, 0); 272 printf("%d:hash_lookup %lld lookups per sec\n", 273 cpu, max_cnt * 1000000000ll * 64 / (time_get_ns() - start_time)); 274 } 275 276 static void test_array_lookup(int cpu) 277 { 278 __u64 start_time; 279 int i; 280 281 start_time = time_get_ns(); 282 for (i = 0; i < max_cnt; i++) 283 syscall(__NR_getppid, 0); 284 printf("%d:array_lookup %lld lookups per sec\n", 285 cpu, max_cnt * 1000000000ll * 64 / (time_get_ns() - start_time)); 286 } 287 288 typedef int (*pre_test_func)(int tasks); 289 const pre_test_func pre_test_funcs[] = { 290 [LRU_HASH_LOOKUP] = pre_test_lru_hash_lookup, 291 }; 292 293 typedef void (*test_func)(int cpu); 294 const test_func test_funcs[] = { 295 [HASH_PREALLOC] = test_hash_prealloc, 296 [PERCPU_HASH_PREALLOC] = test_percpu_hash_prealloc, 297 [HASH_KMALLOC] = test_hash_kmalloc, 298 [PERCPU_HASH_KMALLOC] = test_percpu_hash_kmalloc, 299 [LRU_HASH_PREALLOC] = test_lru_hash_prealloc, 300 [NOCOMMON_LRU_HASH_PREALLOC] = test_nocommon_lru_hash_prealloc, 301 [LPM_KMALLOC] = test_lpm_kmalloc, 302 [HASH_LOOKUP] = test_hash_lookup, 303 [ARRAY_LOOKUP] = test_array_lookup, 304 [INNER_LRU_HASH_PREALLOC] = test_inner_lru_hash_prealloc, 305 [LRU_HASH_LOOKUP] = test_lru_hash_lookup, 306 }; 307 308 static int pre_test(int tasks) 309 { 310 int i; 311 312 for (i = 0; i < NR_TESTS; i++) { 313 if (pre_test_funcs[i] && check_test_flags(i)) { 314 int ret = pre_test_funcs[i](tasks); 315 316 if (ret) 317 return ret; 318 } 319 } 320 321 return 0; 322 } 323 324 static void loop(int cpu) 325 { 326 cpu_set_t cpuset; 327 int i; 328 329 CPU_ZERO(&cpuset); 330 CPU_SET(cpu, &cpuset); 331 sched_setaffinity(0, sizeof(cpuset), &cpuset); 332 333 for (i = 0; i < NR_TESTS; i++) { 334 if (check_test_flags(i)) 335 test_funcs[i](cpu); 336 } 337 } 338 339 static void run_perf_test(int tasks) 340 { 341 pid_t pid[tasks]; 342 int i; 343 344 assert(!pre_test(tasks)); 345 346 for (i = 0; i < tasks; i++) { 347 pid[i] = fork(); 348 if (pid[i] == 0) { 349 loop(i); 350 exit(0); 351 } else if (pid[i] == -1) { 352 printf("couldn't spawn #%d process\n", i); 353 exit(1); 354 } 355 } 356 for (i = 0; i < tasks; i++) { 357 int status; 358 359 assert(waitpid(pid[i], &status, 0) == pid[i]); 360 assert(status == 0); 361 } 362 } 363 364 static void fill_lpm_trie(void) 365 { 366 struct bpf_lpm_trie_key *key; 367 unsigned long value = 0; 368 unsigned int i; 369 int r; 370 371 key = alloca(sizeof(*key) + 4); 372 key->prefixlen = 32; 373 374 for (i = 0; i < 512; ++i) { 375 key->prefixlen = rand() % 33; 376 key->data[0] = rand() & 0xff; 377 key->data[1] = rand() & 0xff; 378 key->data[2] = rand() & 0xff; 379 key->data[3] = rand() & 0xff; 380 r = bpf_map_update_elem(map_fd[6], key, &value, 0); 381 assert(!r); 382 } 383 384 key->prefixlen = 32; 385 key->data[0] = 192; 386 key->data[1] = 168; 387 key->data[2] = 0; 388 key->data[3] = 1; 389 value = 128; 390 391 r = bpf_map_update_elem(map_fd[6], key, &value, 0); 392 assert(!r); 393 } 394 395 static void fixup_map(struct bpf_map_data *map, int idx) 396 { 397 int i; 398 399 if (!strcmp("inner_lru_hash_map", map->name)) { 400 inner_lru_hash_idx = idx; 401 inner_lru_hash_size = map->def.max_entries; 402 } 403 404 if (!strcmp("array_of_lru_hashs", map->name)) { 405 if (inner_lru_hash_idx == -1) { 406 printf("inner_lru_hash_map must be defined before array_of_lru_hashs\n"); 407 exit(1); 408 } 409 map->def.inner_map_idx = inner_lru_hash_idx; 410 array_of_lru_hashs_idx = idx; 411 } 412 413 if (!strcmp("lru_hash_lookup_map", map->name)) 414 lru_hash_lookup_idx = idx; 415 416 if (num_map_entries <= 0) 417 return; 418 419 inner_lru_hash_size = num_map_entries; 420 421 /* Only change the max_entries for the enabled test(s) */ 422 for (i = 0; i < NR_TESTS; i++) { 423 if (!strcmp(test_map_names[i], map->name) && 424 (check_test_flags(i))) { 425 map->def.max_entries = num_map_entries; 426 } 427 } 428 } 429 430 int main(int argc, char **argv) 431 { 432 struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; 433 char filename[256]; 434 int num_cpu = 8; 435 436 snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); 437 setrlimit(RLIMIT_MEMLOCK, &r); 438 439 if (argc > 1) 440 test_flags = atoi(argv[1]) ? : test_flags; 441 442 if (argc > 2) 443 num_cpu = atoi(argv[2]) ? : num_cpu; 444 445 if (argc > 3) 446 num_map_entries = atoi(argv[3]); 447 448 if (argc > 4) 449 max_cnt = atoi(argv[4]); 450 451 if (load_bpf_file_fixup_map(filename, fixup_map)) { 452 printf("%s", bpf_log_buf); 453 return 1; 454 } 455 456 fill_lpm_trie(); 457 458 run_perf_test(num_cpu); 459 460 return 0; 461 } 462