1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 3 * Copyright (c) 2016,2017 Facebook 4 */ 5 #include <linux/bpf.h> 6 #include <linux/btf.h> 7 #include <linux/err.h> 8 #include <linux/slab.h> 9 #include <linux/mm.h> 10 #include <linux/filter.h> 11 #include <linux/perf_event.h> 12 #include <uapi/linux/btf.h> 13 14 #include "map_in_map.h" 15 16 #define ARRAY_CREATE_FLAG_MASK \ 17 (BPF_F_NUMA_NODE | BPF_F_ACCESS_MASK) 18 19 static void bpf_array_free_percpu(struct bpf_array *array) 20 { 21 int i; 22 23 for (i = 0; i < array->map.max_entries; i++) { 24 free_percpu(array->pptrs[i]); 25 cond_resched(); 26 } 27 } 28 29 static int bpf_array_alloc_percpu(struct bpf_array *array) 30 { 31 void __percpu *ptr; 32 int i; 33 34 for (i = 0; i < array->map.max_entries; i++) { 35 ptr = __alloc_percpu_gfp(array->elem_size, 8, 36 GFP_USER | __GFP_NOWARN); 37 if (!ptr) { 38 bpf_array_free_percpu(array); 39 return -ENOMEM; 40 } 41 array->pptrs[i] = ptr; 42 cond_resched(); 43 } 44 45 return 0; 46 } 47 48 /* Called from syscall */ 49 int array_map_alloc_check(union bpf_attr *attr) 50 { 51 bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; 52 int numa_node = bpf_map_attr_numa_node(attr); 53 54 /* check sanity of attributes */ 55 if (attr->max_entries == 0 || attr->key_size != 4 || 56 attr->value_size == 0 || 57 attr->map_flags & ~ARRAY_CREATE_FLAG_MASK || 58 !bpf_map_flags_access_ok(attr->map_flags) || 59 (percpu && numa_node != NUMA_NO_NODE)) 60 return -EINVAL; 61 62 if (attr->value_size > KMALLOC_MAX_SIZE) 63 /* if value_size is bigger, the user space won't be able to 64 * access the elements. 65 */ 66 return -E2BIG; 67 68 return 0; 69 } 70 71 static struct bpf_map *array_map_alloc(union bpf_attr *attr) 72 { 73 bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; 74 int ret, numa_node = bpf_map_attr_numa_node(attr); 75 u32 elem_size, index_mask, max_entries; 76 bool unpriv = !capable(CAP_SYS_ADMIN); 77 u64 cost, array_size, mask64; 78 struct bpf_array *array; 79 80 elem_size = round_up(attr->value_size, 8); 81 82 max_entries = attr->max_entries; 83 84 /* On 32 bit archs roundup_pow_of_two() with max_entries that has 85 * upper most bit set in u32 space is undefined behavior due to 86 * resulting 1U << 32, so do it manually here in u64 space. 87 */ 88 mask64 = fls_long(max_entries - 1); 89 mask64 = 1ULL << mask64; 90 mask64 -= 1; 91 92 index_mask = mask64; 93 if (unpriv) { 94 /* round up array size to nearest power of 2, 95 * since cpu will speculate within index_mask limits 96 */ 97 max_entries = index_mask + 1; 98 /* Check for overflows. */ 99 if (max_entries < attr->max_entries) 100 return ERR_PTR(-E2BIG); 101 } 102 103 array_size = sizeof(*array); 104 if (percpu) 105 array_size += (u64) max_entries * sizeof(void *); 106 else 107 array_size += (u64) max_entries * elem_size; 108 109 /* make sure there is no u32 overflow later in round_up() */ 110 cost = array_size; 111 if (cost >= U32_MAX - PAGE_SIZE) 112 return ERR_PTR(-ENOMEM); 113 if (percpu) { 114 cost += (u64)attr->max_entries * elem_size * num_possible_cpus(); 115 if (cost >= U32_MAX - PAGE_SIZE) 116 return ERR_PTR(-ENOMEM); 117 } 118 cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; 119 120 ret = bpf_map_precharge_memlock(cost); 121 if (ret < 0) 122 return ERR_PTR(ret); 123 124 /* allocate all map elements and zero-initialize them */ 125 array = bpf_map_area_alloc(array_size, numa_node); 126 if (!array) 127 return ERR_PTR(-ENOMEM); 128 array->index_mask = index_mask; 129 array->map.unpriv_array = unpriv; 130 131 /* copy mandatory map attributes */ 132 bpf_map_init_from_attr(&array->map, attr); 133 array->map.pages = cost; 134 array->elem_size = elem_size; 135 136 if (percpu && bpf_array_alloc_percpu(array)) { 137 bpf_map_area_free(array); 138 return ERR_PTR(-ENOMEM); 139 } 140 141 return &array->map; 142 } 143 144 /* Called from syscall or from eBPF program */ 145 static void *array_map_lookup_elem(struct bpf_map *map, void *key) 146 { 147 struct bpf_array *array = container_of(map, struct bpf_array, map); 148 u32 index = *(u32 *)key; 149 150 if (unlikely(index >= array->map.max_entries)) 151 return NULL; 152 153 return array->value + array->elem_size * (index & array->index_mask); 154 } 155 156 static int array_map_direct_value_addr(const struct bpf_map *map, u64 *imm, 157 u32 off) 158 { 159 struct bpf_array *array = container_of(map, struct bpf_array, map); 160 161 if (map->max_entries != 1) 162 return -ENOTSUPP; 163 if (off >= map->value_size) 164 return -EINVAL; 165 166 *imm = (unsigned long)array->value; 167 return 0; 168 } 169 170 static int array_map_direct_value_meta(const struct bpf_map *map, u64 imm, 171 u32 *off) 172 { 173 struct bpf_array *array = container_of(map, struct bpf_array, map); 174 u64 base = (unsigned long)array->value; 175 u64 range = array->elem_size; 176 177 if (map->max_entries != 1) 178 return -ENOTSUPP; 179 if (imm < base || imm >= base + range) 180 return -ENOENT; 181 182 *off = imm - base; 183 return 0; 184 } 185 186 /* emit BPF instructions equivalent to C code of array_map_lookup_elem() */ 187 static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) 188 { 189 struct bpf_array *array = container_of(map, struct bpf_array, map); 190 struct bpf_insn *insn = insn_buf; 191 u32 elem_size = round_up(map->value_size, 8); 192 const int ret = BPF_REG_0; 193 const int map_ptr = BPF_REG_1; 194 const int index = BPF_REG_2; 195 196 *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value)); 197 *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0); 198 if (map->unpriv_array) { 199 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 4); 200 *insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask); 201 } else { 202 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3); 203 } 204 205 if (is_power_of_2(elem_size)) { 206 *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size)); 207 } else { 208 *insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size); 209 } 210 *insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr); 211 *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1); 212 *insn++ = BPF_MOV64_IMM(ret, 0); 213 return insn - insn_buf; 214 } 215 216 /* Called from eBPF program */ 217 static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key) 218 { 219 struct bpf_array *array = container_of(map, struct bpf_array, map); 220 u32 index = *(u32 *)key; 221 222 if (unlikely(index >= array->map.max_entries)) 223 return NULL; 224 225 return this_cpu_ptr(array->pptrs[index & array->index_mask]); 226 } 227 228 int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value) 229 { 230 struct bpf_array *array = container_of(map, struct bpf_array, map); 231 u32 index = *(u32 *)key; 232 void __percpu *pptr; 233 int cpu, off = 0; 234 u32 size; 235 236 if (unlikely(index >= array->map.max_entries)) 237 return -ENOENT; 238 239 /* per_cpu areas are zero-filled and bpf programs can only 240 * access 'value_size' of them, so copying rounded areas 241 * will not leak any kernel data 242 */ 243 size = round_up(map->value_size, 8); 244 rcu_read_lock(); 245 pptr = array->pptrs[index & array->index_mask]; 246 for_each_possible_cpu(cpu) { 247 bpf_long_memcpy(value + off, per_cpu_ptr(pptr, cpu), size); 248 off += size; 249 } 250 rcu_read_unlock(); 251 return 0; 252 } 253 254 /* Called from syscall */ 255 static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key) 256 { 257 struct bpf_array *array = container_of(map, struct bpf_array, map); 258 u32 index = key ? *(u32 *)key : U32_MAX; 259 u32 *next = (u32 *)next_key; 260 261 if (index >= array->map.max_entries) { 262 *next = 0; 263 return 0; 264 } 265 266 if (index == array->map.max_entries - 1) 267 return -ENOENT; 268 269 *next = index + 1; 270 return 0; 271 } 272 273 /* Called from syscall or from eBPF program */ 274 static int array_map_update_elem(struct bpf_map *map, void *key, void *value, 275 u64 map_flags) 276 { 277 struct bpf_array *array = container_of(map, struct bpf_array, map); 278 u32 index = *(u32 *)key; 279 char *val; 280 281 if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST)) 282 /* unknown flags */ 283 return -EINVAL; 284 285 if (unlikely(index >= array->map.max_entries)) 286 /* all elements were pre-allocated, cannot insert a new one */ 287 return -E2BIG; 288 289 if (unlikely(map_flags & BPF_NOEXIST)) 290 /* all elements already exist */ 291 return -EEXIST; 292 293 if (unlikely((map_flags & BPF_F_LOCK) && 294 !map_value_has_spin_lock(map))) 295 return -EINVAL; 296 297 if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 298 memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]), 299 value, map->value_size); 300 } else { 301 val = array->value + 302 array->elem_size * (index & array->index_mask); 303 if (map_flags & BPF_F_LOCK) 304 copy_map_value_locked(map, val, value, false); 305 else 306 copy_map_value(map, val, value); 307 } 308 return 0; 309 } 310 311 int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value, 312 u64 map_flags) 313 { 314 struct bpf_array *array = container_of(map, struct bpf_array, map); 315 u32 index = *(u32 *)key; 316 void __percpu *pptr; 317 int cpu, off = 0; 318 u32 size; 319 320 if (unlikely(map_flags > BPF_EXIST)) 321 /* unknown flags */ 322 return -EINVAL; 323 324 if (unlikely(index >= array->map.max_entries)) 325 /* all elements were pre-allocated, cannot insert a new one */ 326 return -E2BIG; 327 328 if (unlikely(map_flags == BPF_NOEXIST)) 329 /* all elements already exist */ 330 return -EEXIST; 331 332 /* the user space will provide round_up(value_size, 8) bytes that 333 * will be copied into per-cpu area. bpf programs can only access 334 * value_size of it. During lookup the same extra bytes will be 335 * returned or zeros which were zero-filled by percpu_alloc, 336 * so no kernel data leaks possible 337 */ 338 size = round_up(map->value_size, 8); 339 rcu_read_lock(); 340 pptr = array->pptrs[index & array->index_mask]; 341 for_each_possible_cpu(cpu) { 342 bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value + off, size); 343 off += size; 344 } 345 rcu_read_unlock(); 346 return 0; 347 } 348 349 /* Called from syscall or from eBPF program */ 350 static int array_map_delete_elem(struct bpf_map *map, void *key) 351 { 352 return -EINVAL; 353 } 354 355 /* Called when map->refcnt goes to zero, either from workqueue or from syscall */ 356 static void array_map_free(struct bpf_map *map) 357 { 358 struct bpf_array *array = container_of(map, struct bpf_array, map); 359 360 /* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0, 361 * so the programs (can be more than one that used this map) were 362 * disconnected from events. Wait for outstanding programs to complete 363 * and free the array 364 */ 365 synchronize_rcu(); 366 367 if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) 368 bpf_array_free_percpu(array); 369 370 bpf_map_area_free(array); 371 } 372 373 static void array_map_seq_show_elem(struct bpf_map *map, void *key, 374 struct seq_file *m) 375 { 376 void *value; 377 378 rcu_read_lock(); 379 380 value = array_map_lookup_elem(map, key); 381 if (!value) { 382 rcu_read_unlock(); 383 return; 384 } 385 386 if (map->btf_key_type_id) 387 seq_printf(m, "%u: ", *(u32 *)key); 388 btf_type_seq_show(map->btf, map->btf_value_type_id, value, m); 389 seq_puts(m, "\n"); 390 391 rcu_read_unlock(); 392 } 393 394 static void percpu_array_map_seq_show_elem(struct bpf_map *map, void *key, 395 struct seq_file *m) 396 { 397 struct bpf_array *array = container_of(map, struct bpf_array, map); 398 u32 index = *(u32 *)key; 399 void __percpu *pptr; 400 int cpu; 401 402 rcu_read_lock(); 403 404 seq_printf(m, "%u: {\n", *(u32 *)key); 405 pptr = array->pptrs[index & array->index_mask]; 406 for_each_possible_cpu(cpu) { 407 seq_printf(m, "\tcpu%d: ", cpu); 408 btf_type_seq_show(map->btf, map->btf_value_type_id, 409 per_cpu_ptr(pptr, cpu), m); 410 seq_puts(m, "\n"); 411 } 412 seq_puts(m, "}\n"); 413 414 rcu_read_unlock(); 415 } 416 417 static int array_map_check_btf(const struct bpf_map *map, 418 const struct btf *btf, 419 const struct btf_type *key_type, 420 const struct btf_type *value_type) 421 { 422 u32 int_data; 423 424 /* One exception for keyless BTF: .bss/.data/.rodata map */ 425 if (btf_type_is_void(key_type)) { 426 if (map->map_type != BPF_MAP_TYPE_ARRAY || 427 map->max_entries != 1) 428 return -EINVAL; 429 430 if (BTF_INFO_KIND(value_type->info) != BTF_KIND_DATASEC) 431 return -EINVAL; 432 433 return 0; 434 } 435 436 if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT) 437 return -EINVAL; 438 439 int_data = *(u32 *)(key_type + 1); 440 /* bpf array can only take a u32 key. This check makes sure 441 * that the btf matches the attr used during map_create. 442 */ 443 if (BTF_INT_BITS(int_data) != 32 || BTF_INT_OFFSET(int_data)) 444 return -EINVAL; 445 446 return 0; 447 } 448 449 const struct bpf_map_ops array_map_ops = { 450 .map_alloc_check = array_map_alloc_check, 451 .map_alloc = array_map_alloc, 452 .map_free = array_map_free, 453 .map_get_next_key = array_map_get_next_key, 454 .map_lookup_elem = array_map_lookup_elem, 455 .map_update_elem = array_map_update_elem, 456 .map_delete_elem = array_map_delete_elem, 457 .map_gen_lookup = array_map_gen_lookup, 458 .map_direct_value_addr = array_map_direct_value_addr, 459 .map_direct_value_meta = array_map_direct_value_meta, 460 .map_seq_show_elem = array_map_seq_show_elem, 461 .map_check_btf = array_map_check_btf, 462 }; 463 464 const struct bpf_map_ops percpu_array_map_ops = { 465 .map_alloc_check = array_map_alloc_check, 466 .map_alloc = array_map_alloc, 467 .map_free = array_map_free, 468 .map_get_next_key = array_map_get_next_key, 469 .map_lookup_elem = percpu_array_map_lookup_elem, 470 .map_update_elem = array_map_update_elem, 471 .map_delete_elem = array_map_delete_elem, 472 .map_seq_show_elem = percpu_array_map_seq_show_elem, 473 .map_check_btf = array_map_check_btf, 474 }; 475 476 static int fd_array_map_alloc_check(union bpf_attr *attr) 477 { 478 /* only file descriptors can be stored in this type of map */ 479 if (attr->value_size != sizeof(u32)) 480 return -EINVAL; 481 /* Program read-only/write-only not supported for special maps yet. */ 482 if (attr->map_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG)) 483 return -EINVAL; 484 return array_map_alloc_check(attr); 485 } 486 487 static void fd_array_map_free(struct bpf_map *map) 488 { 489 struct bpf_array *array = container_of(map, struct bpf_array, map); 490 int i; 491 492 synchronize_rcu(); 493 494 /* make sure it's empty */ 495 for (i = 0; i < array->map.max_entries; i++) 496 BUG_ON(array->ptrs[i] != NULL); 497 498 bpf_map_area_free(array); 499 } 500 501 static void *fd_array_map_lookup_elem(struct bpf_map *map, void *key) 502 { 503 return ERR_PTR(-EOPNOTSUPP); 504 } 505 506 /* only called from syscall */ 507 int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value) 508 { 509 void **elem, *ptr; 510 int ret = 0; 511 512 if (!map->ops->map_fd_sys_lookup_elem) 513 return -ENOTSUPP; 514 515 rcu_read_lock(); 516 elem = array_map_lookup_elem(map, key); 517 if (elem && (ptr = READ_ONCE(*elem))) 518 *value = map->ops->map_fd_sys_lookup_elem(ptr); 519 else 520 ret = -ENOENT; 521 rcu_read_unlock(); 522 523 return ret; 524 } 525 526 /* only called from syscall */ 527 int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file, 528 void *key, void *value, u64 map_flags) 529 { 530 struct bpf_array *array = container_of(map, struct bpf_array, map); 531 void *new_ptr, *old_ptr; 532 u32 index = *(u32 *)key, ufd; 533 534 if (map_flags != BPF_ANY) 535 return -EINVAL; 536 537 if (index >= array->map.max_entries) 538 return -E2BIG; 539 540 ufd = *(u32 *)value; 541 new_ptr = map->ops->map_fd_get_ptr(map, map_file, ufd); 542 if (IS_ERR(new_ptr)) 543 return PTR_ERR(new_ptr); 544 545 old_ptr = xchg(array->ptrs + index, new_ptr); 546 if (old_ptr) 547 map->ops->map_fd_put_ptr(old_ptr); 548 549 return 0; 550 } 551 552 static int fd_array_map_delete_elem(struct bpf_map *map, void *key) 553 { 554 struct bpf_array *array = container_of(map, struct bpf_array, map); 555 void *old_ptr; 556 u32 index = *(u32 *)key; 557 558 if (index >= array->map.max_entries) 559 return -E2BIG; 560 561 old_ptr = xchg(array->ptrs + index, NULL); 562 if (old_ptr) { 563 map->ops->map_fd_put_ptr(old_ptr); 564 return 0; 565 } else { 566 return -ENOENT; 567 } 568 } 569 570 static void *prog_fd_array_get_ptr(struct bpf_map *map, 571 struct file *map_file, int fd) 572 { 573 struct bpf_array *array = container_of(map, struct bpf_array, map); 574 struct bpf_prog *prog = bpf_prog_get(fd); 575 576 if (IS_ERR(prog)) 577 return prog; 578 579 if (!bpf_prog_array_compatible(array, prog)) { 580 bpf_prog_put(prog); 581 return ERR_PTR(-EINVAL); 582 } 583 584 return prog; 585 } 586 587 static void prog_fd_array_put_ptr(void *ptr) 588 { 589 bpf_prog_put(ptr); 590 } 591 592 static u32 prog_fd_array_sys_lookup_elem(void *ptr) 593 { 594 return ((struct bpf_prog *)ptr)->aux->id; 595 } 596 597 /* decrement refcnt of all bpf_progs that are stored in this map */ 598 static void bpf_fd_array_map_clear(struct bpf_map *map) 599 { 600 struct bpf_array *array = container_of(map, struct bpf_array, map); 601 int i; 602 603 for (i = 0; i < array->map.max_entries; i++) 604 fd_array_map_delete_elem(map, &i); 605 } 606 607 static void prog_array_map_seq_show_elem(struct bpf_map *map, void *key, 608 struct seq_file *m) 609 { 610 void **elem, *ptr; 611 u32 prog_id; 612 613 rcu_read_lock(); 614 615 elem = array_map_lookup_elem(map, key); 616 if (elem) { 617 ptr = READ_ONCE(*elem); 618 if (ptr) { 619 seq_printf(m, "%u: ", *(u32 *)key); 620 prog_id = prog_fd_array_sys_lookup_elem(ptr); 621 btf_type_seq_show(map->btf, map->btf_value_type_id, 622 &prog_id, m); 623 seq_puts(m, "\n"); 624 } 625 } 626 627 rcu_read_unlock(); 628 } 629 630 const struct bpf_map_ops prog_array_map_ops = { 631 .map_alloc_check = fd_array_map_alloc_check, 632 .map_alloc = array_map_alloc, 633 .map_free = fd_array_map_free, 634 .map_get_next_key = array_map_get_next_key, 635 .map_lookup_elem = fd_array_map_lookup_elem, 636 .map_delete_elem = fd_array_map_delete_elem, 637 .map_fd_get_ptr = prog_fd_array_get_ptr, 638 .map_fd_put_ptr = prog_fd_array_put_ptr, 639 .map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem, 640 .map_release_uref = bpf_fd_array_map_clear, 641 .map_seq_show_elem = prog_array_map_seq_show_elem, 642 }; 643 644 static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file, 645 struct file *map_file) 646 { 647 struct bpf_event_entry *ee; 648 649 ee = kzalloc(sizeof(*ee), GFP_ATOMIC); 650 if (ee) { 651 ee->event = perf_file->private_data; 652 ee->perf_file = perf_file; 653 ee->map_file = map_file; 654 } 655 656 return ee; 657 } 658 659 static void __bpf_event_entry_free(struct rcu_head *rcu) 660 { 661 struct bpf_event_entry *ee; 662 663 ee = container_of(rcu, struct bpf_event_entry, rcu); 664 fput(ee->perf_file); 665 kfree(ee); 666 } 667 668 static void bpf_event_entry_free_rcu(struct bpf_event_entry *ee) 669 { 670 call_rcu(&ee->rcu, __bpf_event_entry_free); 671 } 672 673 static void *perf_event_fd_array_get_ptr(struct bpf_map *map, 674 struct file *map_file, int fd) 675 { 676 struct bpf_event_entry *ee; 677 struct perf_event *event; 678 struct file *perf_file; 679 u64 value; 680 681 perf_file = perf_event_get(fd); 682 if (IS_ERR(perf_file)) 683 return perf_file; 684 685 ee = ERR_PTR(-EOPNOTSUPP); 686 event = perf_file->private_data; 687 if (perf_event_read_local(event, &value, NULL, NULL) == -EOPNOTSUPP) 688 goto err_out; 689 690 ee = bpf_event_entry_gen(perf_file, map_file); 691 if (ee) 692 return ee; 693 ee = ERR_PTR(-ENOMEM); 694 err_out: 695 fput(perf_file); 696 return ee; 697 } 698 699 static void perf_event_fd_array_put_ptr(void *ptr) 700 { 701 bpf_event_entry_free_rcu(ptr); 702 } 703 704 static void perf_event_fd_array_release(struct bpf_map *map, 705 struct file *map_file) 706 { 707 struct bpf_array *array = container_of(map, struct bpf_array, map); 708 struct bpf_event_entry *ee; 709 int i; 710 711 rcu_read_lock(); 712 for (i = 0; i < array->map.max_entries; i++) { 713 ee = READ_ONCE(array->ptrs[i]); 714 if (ee && ee->map_file == map_file) 715 fd_array_map_delete_elem(map, &i); 716 } 717 rcu_read_unlock(); 718 } 719 720 const struct bpf_map_ops perf_event_array_map_ops = { 721 .map_alloc_check = fd_array_map_alloc_check, 722 .map_alloc = array_map_alloc, 723 .map_free = fd_array_map_free, 724 .map_get_next_key = array_map_get_next_key, 725 .map_lookup_elem = fd_array_map_lookup_elem, 726 .map_delete_elem = fd_array_map_delete_elem, 727 .map_fd_get_ptr = perf_event_fd_array_get_ptr, 728 .map_fd_put_ptr = perf_event_fd_array_put_ptr, 729 .map_release = perf_event_fd_array_release, 730 .map_check_btf = map_check_no_btf, 731 }; 732 733 #ifdef CONFIG_CGROUPS 734 static void *cgroup_fd_array_get_ptr(struct bpf_map *map, 735 struct file *map_file /* not used */, 736 int fd) 737 { 738 return cgroup_get_from_fd(fd); 739 } 740 741 static void cgroup_fd_array_put_ptr(void *ptr) 742 { 743 /* cgroup_put free cgrp after a rcu grace period */ 744 cgroup_put(ptr); 745 } 746 747 static void cgroup_fd_array_free(struct bpf_map *map) 748 { 749 bpf_fd_array_map_clear(map); 750 fd_array_map_free(map); 751 } 752 753 const struct bpf_map_ops cgroup_array_map_ops = { 754 .map_alloc_check = fd_array_map_alloc_check, 755 .map_alloc = array_map_alloc, 756 .map_free = cgroup_fd_array_free, 757 .map_get_next_key = array_map_get_next_key, 758 .map_lookup_elem = fd_array_map_lookup_elem, 759 .map_delete_elem = fd_array_map_delete_elem, 760 .map_fd_get_ptr = cgroup_fd_array_get_ptr, 761 .map_fd_put_ptr = cgroup_fd_array_put_ptr, 762 .map_check_btf = map_check_no_btf, 763 }; 764 #endif 765 766 static struct bpf_map *array_of_map_alloc(union bpf_attr *attr) 767 { 768 struct bpf_map *map, *inner_map_meta; 769 770 inner_map_meta = bpf_map_meta_alloc(attr->inner_map_fd); 771 if (IS_ERR(inner_map_meta)) 772 return inner_map_meta; 773 774 map = array_map_alloc(attr); 775 if (IS_ERR(map)) { 776 bpf_map_meta_free(inner_map_meta); 777 return map; 778 } 779 780 map->inner_map_meta = inner_map_meta; 781 782 return map; 783 } 784 785 static void array_of_map_free(struct bpf_map *map) 786 { 787 /* map->inner_map_meta is only accessed by syscall which 788 * is protected by fdget/fdput. 789 */ 790 bpf_map_meta_free(map->inner_map_meta); 791 bpf_fd_array_map_clear(map); 792 fd_array_map_free(map); 793 } 794 795 static void *array_of_map_lookup_elem(struct bpf_map *map, void *key) 796 { 797 struct bpf_map **inner_map = array_map_lookup_elem(map, key); 798 799 if (!inner_map) 800 return NULL; 801 802 return READ_ONCE(*inner_map); 803 } 804 805 static u32 array_of_map_gen_lookup(struct bpf_map *map, 806 struct bpf_insn *insn_buf) 807 { 808 struct bpf_array *array = container_of(map, struct bpf_array, map); 809 u32 elem_size = round_up(map->value_size, 8); 810 struct bpf_insn *insn = insn_buf; 811 const int ret = BPF_REG_0; 812 const int map_ptr = BPF_REG_1; 813 const int index = BPF_REG_2; 814 815 *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value)); 816 *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0); 817 if (map->unpriv_array) { 818 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 6); 819 *insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask); 820 } else { 821 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5); 822 } 823 if (is_power_of_2(elem_size)) 824 *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size)); 825 else 826 *insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size); 827 *insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr); 828 *insn++ = BPF_LDX_MEM(BPF_DW, ret, ret, 0); 829 *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 1); 830 *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1); 831 *insn++ = BPF_MOV64_IMM(ret, 0); 832 833 return insn - insn_buf; 834 } 835 836 const struct bpf_map_ops array_of_maps_map_ops = { 837 .map_alloc_check = fd_array_map_alloc_check, 838 .map_alloc = array_of_map_alloc, 839 .map_free = array_of_map_free, 840 .map_get_next_key = array_map_get_next_key, 841 .map_lookup_elem = array_of_map_lookup_elem, 842 .map_delete_elem = fd_array_map_delete_elem, 843 .map_fd_get_ptr = bpf_map_fd_get_ptr, 844 .map_fd_put_ptr = bpf_map_fd_put_ptr, 845 .map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem, 846 .map_gen_lookup = array_of_map_gen_lookup, 847 .map_check_btf = map_check_no_btf, 848 }; 849