1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 3 * Copyright (c) 2016,2017 Facebook 4 */ 5 #include <linux/bpf.h> 6 #include <linux/btf.h> 7 #include <linux/err.h> 8 #include <linux/slab.h> 9 #include <linux/mm.h> 10 #include <linux/filter.h> 11 #include <linux/perf_event.h> 12 #include <uapi/linux/btf.h> 13 14 #include "map_in_map.h" 15 16 #define ARRAY_CREATE_FLAG_MASK \ 17 (BPF_F_NUMA_NODE | BPF_F_ACCESS_MASK) 18 19 static void bpf_array_free_percpu(struct bpf_array *array) 20 { 21 int i; 22 23 for (i = 0; i < array->map.max_entries; i++) { 24 free_percpu(array->pptrs[i]); 25 cond_resched(); 26 } 27 } 28 29 static int bpf_array_alloc_percpu(struct bpf_array *array) 30 { 31 void __percpu *ptr; 32 int i; 33 34 for (i = 0; i < array->map.max_entries; i++) { 35 ptr = __alloc_percpu_gfp(array->elem_size, 8, 36 GFP_USER | __GFP_NOWARN); 37 if (!ptr) { 38 bpf_array_free_percpu(array); 39 return -ENOMEM; 40 } 41 array->pptrs[i] = ptr; 42 cond_resched(); 43 } 44 45 return 0; 46 } 47 48 /* Called from syscall */ 49 int array_map_alloc_check(union bpf_attr *attr) 50 { 51 bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; 52 int numa_node = bpf_map_attr_numa_node(attr); 53 54 /* check sanity of attributes */ 55 if (attr->max_entries == 0 || attr->key_size != 4 || 56 attr->value_size == 0 || 57 attr->map_flags & ~ARRAY_CREATE_FLAG_MASK || 58 !bpf_map_flags_access_ok(attr->map_flags) || 59 (percpu && numa_node != NUMA_NO_NODE)) 60 return -EINVAL; 61 62 if (attr->value_size > KMALLOC_MAX_SIZE) 63 /* if value_size is bigger, the user space won't be able to 64 * access the elements. 65 */ 66 return -E2BIG; 67 68 return 0; 69 } 70 71 static struct bpf_map *array_map_alloc(union bpf_attr *attr) 72 { 73 bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; 74 int ret, numa_node = bpf_map_attr_numa_node(attr); 75 u32 elem_size, index_mask, max_entries; 76 bool unpriv = !capable(CAP_SYS_ADMIN); 77 u64 cost, array_size, mask64; 78 struct bpf_map_memory mem; 79 struct bpf_array *array; 80 81 elem_size = round_up(attr->value_size, 8); 82 83 max_entries = attr->max_entries; 84 85 /* On 32 bit archs roundup_pow_of_two() with max_entries that has 86 * upper most bit set in u32 space is undefined behavior due to 87 * resulting 1U << 32, so do it manually here in u64 space. 88 */ 89 mask64 = fls_long(max_entries - 1); 90 mask64 = 1ULL << mask64; 91 mask64 -= 1; 92 93 index_mask = mask64; 94 if (unpriv) { 95 /* round up array size to nearest power of 2, 96 * since cpu will speculate within index_mask limits 97 */ 98 max_entries = index_mask + 1; 99 /* Check for overflows. */ 100 if (max_entries < attr->max_entries) 101 return ERR_PTR(-E2BIG); 102 } 103 104 array_size = sizeof(*array); 105 if (percpu) 106 array_size += (u64) max_entries * sizeof(void *); 107 else 108 array_size += (u64) max_entries * elem_size; 109 110 /* make sure there is no u32 overflow later in round_up() */ 111 cost = array_size; 112 if (percpu) 113 cost += (u64)attr->max_entries * elem_size * num_possible_cpus(); 114 115 ret = bpf_map_charge_init(&mem, cost); 116 if (ret < 0) 117 return ERR_PTR(ret); 118 119 /* allocate all map elements and zero-initialize them */ 120 array = bpf_map_area_alloc(array_size, numa_node); 121 if (!array) { 122 bpf_map_charge_finish(&mem); 123 return ERR_PTR(-ENOMEM); 124 } 125 array->index_mask = index_mask; 126 array->map.unpriv_array = unpriv; 127 128 /* copy mandatory map attributes */ 129 bpf_map_init_from_attr(&array->map, attr); 130 bpf_map_charge_move(&array->map.memory, &mem); 131 array->elem_size = elem_size; 132 133 if (percpu && bpf_array_alloc_percpu(array)) { 134 bpf_map_charge_finish(&array->map.memory); 135 bpf_map_area_free(array); 136 return ERR_PTR(-ENOMEM); 137 } 138 139 return &array->map; 140 } 141 142 /* Called from syscall or from eBPF program */ 143 static void *array_map_lookup_elem(struct bpf_map *map, void *key) 144 { 145 struct bpf_array *array = container_of(map, struct bpf_array, map); 146 u32 index = *(u32 *)key; 147 148 if (unlikely(index >= array->map.max_entries)) 149 return NULL; 150 151 return array->value + array->elem_size * (index & array->index_mask); 152 } 153 154 static int array_map_direct_value_addr(const struct bpf_map *map, u64 *imm, 155 u32 off) 156 { 157 struct bpf_array *array = container_of(map, struct bpf_array, map); 158 159 if (map->max_entries != 1) 160 return -ENOTSUPP; 161 if (off >= map->value_size) 162 return -EINVAL; 163 164 *imm = (unsigned long)array->value; 165 return 0; 166 } 167 168 static int array_map_direct_value_meta(const struct bpf_map *map, u64 imm, 169 u32 *off) 170 { 171 struct bpf_array *array = container_of(map, struct bpf_array, map); 172 u64 base = (unsigned long)array->value; 173 u64 range = array->elem_size; 174 175 if (map->max_entries != 1) 176 return -ENOTSUPP; 177 if (imm < base || imm >= base + range) 178 return -ENOENT; 179 180 *off = imm - base; 181 return 0; 182 } 183 184 /* emit BPF instructions equivalent to C code of array_map_lookup_elem() */ 185 static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) 186 { 187 struct bpf_array *array = container_of(map, struct bpf_array, map); 188 struct bpf_insn *insn = insn_buf; 189 u32 elem_size = round_up(map->value_size, 8); 190 const int ret = BPF_REG_0; 191 const int map_ptr = BPF_REG_1; 192 const int index = BPF_REG_2; 193 194 *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value)); 195 *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0); 196 if (map->unpriv_array) { 197 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 4); 198 *insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask); 199 } else { 200 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3); 201 } 202 203 if (is_power_of_2(elem_size)) { 204 *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size)); 205 } else { 206 *insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size); 207 } 208 *insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr); 209 *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1); 210 *insn++ = BPF_MOV64_IMM(ret, 0); 211 return insn - insn_buf; 212 } 213 214 /* Called from eBPF program */ 215 static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key) 216 { 217 struct bpf_array *array = container_of(map, struct bpf_array, map); 218 u32 index = *(u32 *)key; 219 220 if (unlikely(index >= array->map.max_entries)) 221 return NULL; 222 223 return this_cpu_ptr(array->pptrs[index & array->index_mask]); 224 } 225 226 int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value) 227 { 228 struct bpf_array *array = container_of(map, struct bpf_array, map); 229 u32 index = *(u32 *)key; 230 void __percpu *pptr; 231 int cpu, off = 0; 232 u32 size; 233 234 if (unlikely(index >= array->map.max_entries)) 235 return -ENOENT; 236 237 /* per_cpu areas are zero-filled and bpf programs can only 238 * access 'value_size' of them, so copying rounded areas 239 * will not leak any kernel data 240 */ 241 size = round_up(map->value_size, 8); 242 rcu_read_lock(); 243 pptr = array->pptrs[index & array->index_mask]; 244 for_each_possible_cpu(cpu) { 245 bpf_long_memcpy(value + off, per_cpu_ptr(pptr, cpu), size); 246 off += size; 247 } 248 rcu_read_unlock(); 249 return 0; 250 } 251 252 /* Called from syscall */ 253 static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key) 254 { 255 struct bpf_array *array = container_of(map, struct bpf_array, map); 256 u32 index = key ? *(u32 *)key : U32_MAX; 257 u32 *next = (u32 *)next_key; 258 259 if (index >= array->map.max_entries) { 260 *next = 0; 261 return 0; 262 } 263 264 if (index == array->map.max_entries - 1) 265 return -ENOENT; 266 267 *next = index + 1; 268 return 0; 269 } 270 271 /* Called from syscall or from eBPF program */ 272 static int array_map_update_elem(struct bpf_map *map, void *key, void *value, 273 u64 map_flags) 274 { 275 struct bpf_array *array = container_of(map, struct bpf_array, map); 276 u32 index = *(u32 *)key; 277 char *val; 278 279 if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST)) 280 /* unknown flags */ 281 return -EINVAL; 282 283 if (unlikely(index >= array->map.max_entries)) 284 /* all elements were pre-allocated, cannot insert a new one */ 285 return -E2BIG; 286 287 if (unlikely(map_flags & BPF_NOEXIST)) 288 /* all elements already exist */ 289 return -EEXIST; 290 291 if (unlikely((map_flags & BPF_F_LOCK) && 292 !map_value_has_spin_lock(map))) 293 return -EINVAL; 294 295 if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 296 memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]), 297 value, map->value_size); 298 } else { 299 val = array->value + 300 array->elem_size * (index & array->index_mask); 301 if (map_flags & BPF_F_LOCK) 302 copy_map_value_locked(map, val, value, false); 303 else 304 copy_map_value(map, val, value); 305 } 306 return 0; 307 } 308 309 int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value, 310 u64 map_flags) 311 { 312 struct bpf_array *array = container_of(map, struct bpf_array, map); 313 u32 index = *(u32 *)key; 314 void __percpu *pptr; 315 int cpu, off = 0; 316 u32 size; 317 318 if (unlikely(map_flags > BPF_EXIST)) 319 /* unknown flags */ 320 return -EINVAL; 321 322 if (unlikely(index >= array->map.max_entries)) 323 /* all elements were pre-allocated, cannot insert a new one */ 324 return -E2BIG; 325 326 if (unlikely(map_flags == BPF_NOEXIST)) 327 /* all elements already exist */ 328 return -EEXIST; 329 330 /* the user space will provide round_up(value_size, 8) bytes that 331 * will be copied into per-cpu area. bpf programs can only access 332 * value_size of it. During lookup the same extra bytes will be 333 * returned or zeros which were zero-filled by percpu_alloc, 334 * so no kernel data leaks possible 335 */ 336 size = round_up(map->value_size, 8); 337 rcu_read_lock(); 338 pptr = array->pptrs[index & array->index_mask]; 339 for_each_possible_cpu(cpu) { 340 bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value + off, size); 341 off += size; 342 } 343 rcu_read_unlock(); 344 return 0; 345 } 346 347 /* Called from syscall or from eBPF program */ 348 static int array_map_delete_elem(struct bpf_map *map, void *key) 349 { 350 return -EINVAL; 351 } 352 353 /* Called when map->refcnt goes to zero, either from workqueue or from syscall */ 354 static void array_map_free(struct bpf_map *map) 355 { 356 struct bpf_array *array = container_of(map, struct bpf_array, map); 357 358 /* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0, 359 * so the programs (can be more than one that used this map) were 360 * disconnected from events. Wait for outstanding programs to complete 361 * and free the array 362 */ 363 synchronize_rcu(); 364 365 if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) 366 bpf_array_free_percpu(array); 367 368 bpf_map_area_free(array); 369 } 370 371 static void array_map_seq_show_elem(struct bpf_map *map, void *key, 372 struct seq_file *m) 373 { 374 void *value; 375 376 rcu_read_lock(); 377 378 value = array_map_lookup_elem(map, key); 379 if (!value) { 380 rcu_read_unlock(); 381 return; 382 } 383 384 if (map->btf_key_type_id) 385 seq_printf(m, "%u: ", *(u32 *)key); 386 btf_type_seq_show(map->btf, map->btf_value_type_id, value, m); 387 seq_puts(m, "\n"); 388 389 rcu_read_unlock(); 390 } 391 392 static void percpu_array_map_seq_show_elem(struct bpf_map *map, void *key, 393 struct seq_file *m) 394 { 395 struct bpf_array *array = container_of(map, struct bpf_array, map); 396 u32 index = *(u32 *)key; 397 void __percpu *pptr; 398 int cpu; 399 400 rcu_read_lock(); 401 402 seq_printf(m, "%u: {\n", *(u32 *)key); 403 pptr = array->pptrs[index & array->index_mask]; 404 for_each_possible_cpu(cpu) { 405 seq_printf(m, "\tcpu%d: ", cpu); 406 btf_type_seq_show(map->btf, map->btf_value_type_id, 407 per_cpu_ptr(pptr, cpu), m); 408 seq_puts(m, "\n"); 409 } 410 seq_puts(m, "}\n"); 411 412 rcu_read_unlock(); 413 } 414 415 static int array_map_check_btf(const struct bpf_map *map, 416 const struct btf *btf, 417 const struct btf_type *key_type, 418 const struct btf_type *value_type) 419 { 420 u32 int_data; 421 422 /* One exception for keyless BTF: .bss/.data/.rodata map */ 423 if (btf_type_is_void(key_type)) { 424 if (map->map_type != BPF_MAP_TYPE_ARRAY || 425 map->max_entries != 1) 426 return -EINVAL; 427 428 if (BTF_INFO_KIND(value_type->info) != BTF_KIND_DATASEC) 429 return -EINVAL; 430 431 return 0; 432 } 433 434 if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT) 435 return -EINVAL; 436 437 int_data = *(u32 *)(key_type + 1); 438 /* bpf array can only take a u32 key. This check makes sure 439 * that the btf matches the attr used during map_create. 440 */ 441 if (BTF_INT_BITS(int_data) != 32 || BTF_INT_OFFSET(int_data)) 442 return -EINVAL; 443 444 return 0; 445 } 446 447 const struct bpf_map_ops array_map_ops = { 448 .map_alloc_check = array_map_alloc_check, 449 .map_alloc = array_map_alloc, 450 .map_free = array_map_free, 451 .map_get_next_key = array_map_get_next_key, 452 .map_lookup_elem = array_map_lookup_elem, 453 .map_update_elem = array_map_update_elem, 454 .map_delete_elem = array_map_delete_elem, 455 .map_gen_lookup = array_map_gen_lookup, 456 .map_direct_value_addr = array_map_direct_value_addr, 457 .map_direct_value_meta = array_map_direct_value_meta, 458 .map_seq_show_elem = array_map_seq_show_elem, 459 .map_check_btf = array_map_check_btf, 460 }; 461 462 const struct bpf_map_ops percpu_array_map_ops = { 463 .map_alloc_check = array_map_alloc_check, 464 .map_alloc = array_map_alloc, 465 .map_free = array_map_free, 466 .map_get_next_key = array_map_get_next_key, 467 .map_lookup_elem = percpu_array_map_lookup_elem, 468 .map_update_elem = array_map_update_elem, 469 .map_delete_elem = array_map_delete_elem, 470 .map_seq_show_elem = percpu_array_map_seq_show_elem, 471 .map_check_btf = array_map_check_btf, 472 }; 473 474 static int fd_array_map_alloc_check(union bpf_attr *attr) 475 { 476 /* only file descriptors can be stored in this type of map */ 477 if (attr->value_size != sizeof(u32)) 478 return -EINVAL; 479 /* Program read-only/write-only not supported for special maps yet. */ 480 if (attr->map_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG)) 481 return -EINVAL; 482 return array_map_alloc_check(attr); 483 } 484 485 static void fd_array_map_free(struct bpf_map *map) 486 { 487 struct bpf_array *array = container_of(map, struct bpf_array, map); 488 int i; 489 490 synchronize_rcu(); 491 492 /* make sure it's empty */ 493 for (i = 0; i < array->map.max_entries; i++) 494 BUG_ON(array->ptrs[i] != NULL); 495 496 bpf_map_area_free(array); 497 } 498 499 static void *fd_array_map_lookup_elem(struct bpf_map *map, void *key) 500 { 501 return ERR_PTR(-EOPNOTSUPP); 502 } 503 504 /* only called from syscall */ 505 int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value) 506 { 507 void **elem, *ptr; 508 int ret = 0; 509 510 if (!map->ops->map_fd_sys_lookup_elem) 511 return -ENOTSUPP; 512 513 rcu_read_lock(); 514 elem = array_map_lookup_elem(map, key); 515 if (elem && (ptr = READ_ONCE(*elem))) 516 *value = map->ops->map_fd_sys_lookup_elem(ptr); 517 else 518 ret = -ENOENT; 519 rcu_read_unlock(); 520 521 return ret; 522 } 523 524 /* only called from syscall */ 525 int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file, 526 void *key, void *value, u64 map_flags) 527 { 528 struct bpf_array *array = container_of(map, struct bpf_array, map); 529 void *new_ptr, *old_ptr; 530 u32 index = *(u32 *)key, ufd; 531 532 if (map_flags != BPF_ANY) 533 return -EINVAL; 534 535 if (index >= array->map.max_entries) 536 return -E2BIG; 537 538 ufd = *(u32 *)value; 539 new_ptr = map->ops->map_fd_get_ptr(map, map_file, ufd); 540 if (IS_ERR(new_ptr)) 541 return PTR_ERR(new_ptr); 542 543 old_ptr = xchg(array->ptrs + index, new_ptr); 544 if (old_ptr) 545 map->ops->map_fd_put_ptr(old_ptr); 546 547 return 0; 548 } 549 550 static int fd_array_map_delete_elem(struct bpf_map *map, void *key) 551 { 552 struct bpf_array *array = container_of(map, struct bpf_array, map); 553 void *old_ptr; 554 u32 index = *(u32 *)key; 555 556 if (index >= array->map.max_entries) 557 return -E2BIG; 558 559 old_ptr = xchg(array->ptrs + index, NULL); 560 if (old_ptr) { 561 map->ops->map_fd_put_ptr(old_ptr); 562 return 0; 563 } else { 564 return -ENOENT; 565 } 566 } 567 568 static void *prog_fd_array_get_ptr(struct bpf_map *map, 569 struct file *map_file, int fd) 570 { 571 struct bpf_array *array = container_of(map, struct bpf_array, map); 572 struct bpf_prog *prog = bpf_prog_get(fd); 573 574 if (IS_ERR(prog)) 575 return prog; 576 577 if (!bpf_prog_array_compatible(array, prog)) { 578 bpf_prog_put(prog); 579 return ERR_PTR(-EINVAL); 580 } 581 582 return prog; 583 } 584 585 static void prog_fd_array_put_ptr(void *ptr) 586 { 587 bpf_prog_put(ptr); 588 } 589 590 static u32 prog_fd_array_sys_lookup_elem(void *ptr) 591 { 592 return ((struct bpf_prog *)ptr)->aux->id; 593 } 594 595 /* decrement refcnt of all bpf_progs that are stored in this map */ 596 static void bpf_fd_array_map_clear(struct bpf_map *map) 597 { 598 struct bpf_array *array = container_of(map, struct bpf_array, map); 599 int i; 600 601 for (i = 0; i < array->map.max_entries; i++) 602 fd_array_map_delete_elem(map, &i); 603 } 604 605 static void prog_array_map_seq_show_elem(struct bpf_map *map, void *key, 606 struct seq_file *m) 607 { 608 void **elem, *ptr; 609 u32 prog_id; 610 611 rcu_read_lock(); 612 613 elem = array_map_lookup_elem(map, key); 614 if (elem) { 615 ptr = READ_ONCE(*elem); 616 if (ptr) { 617 seq_printf(m, "%u: ", *(u32 *)key); 618 prog_id = prog_fd_array_sys_lookup_elem(ptr); 619 btf_type_seq_show(map->btf, map->btf_value_type_id, 620 &prog_id, m); 621 seq_puts(m, "\n"); 622 } 623 } 624 625 rcu_read_unlock(); 626 } 627 628 const struct bpf_map_ops prog_array_map_ops = { 629 .map_alloc_check = fd_array_map_alloc_check, 630 .map_alloc = array_map_alloc, 631 .map_free = fd_array_map_free, 632 .map_get_next_key = array_map_get_next_key, 633 .map_lookup_elem = fd_array_map_lookup_elem, 634 .map_delete_elem = fd_array_map_delete_elem, 635 .map_fd_get_ptr = prog_fd_array_get_ptr, 636 .map_fd_put_ptr = prog_fd_array_put_ptr, 637 .map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem, 638 .map_release_uref = bpf_fd_array_map_clear, 639 .map_seq_show_elem = prog_array_map_seq_show_elem, 640 }; 641 642 static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file, 643 struct file *map_file) 644 { 645 struct bpf_event_entry *ee; 646 647 ee = kzalloc(sizeof(*ee), GFP_ATOMIC); 648 if (ee) { 649 ee->event = perf_file->private_data; 650 ee->perf_file = perf_file; 651 ee->map_file = map_file; 652 } 653 654 return ee; 655 } 656 657 static void __bpf_event_entry_free(struct rcu_head *rcu) 658 { 659 struct bpf_event_entry *ee; 660 661 ee = container_of(rcu, struct bpf_event_entry, rcu); 662 fput(ee->perf_file); 663 kfree(ee); 664 } 665 666 static void bpf_event_entry_free_rcu(struct bpf_event_entry *ee) 667 { 668 call_rcu(&ee->rcu, __bpf_event_entry_free); 669 } 670 671 static void *perf_event_fd_array_get_ptr(struct bpf_map *map, 672 struct file *map_file, int fd) 673 { 674 struct bpf_event_entry *ee; 675 struct perf_event *event; 676 struct file *perf_file; 677 u64 value; 678 679 perf_file = perf_event_get(fd); 680 if (IS_ERR(perf_file)) 681 return perf_file; 682 683 ee = ERR_PTR(-EOPNOTSUPP); 684 event = perf_file->private_data; 685 if (perf_event_read_local(event, &value, NULL, NULL) == -EOPNOTSUPP) 686 goto err_out; 687 688 ee = bpf_event_entry_gen(perf_file, map_file); 689 if (ee) 690 return ee; 691 ee = ERR_PTR(-ENOMEM); 692 err_out: 693 fput(perf_file); 694 return ee; 695 } 696 697 static void perf_event_fd_array_put_ptr(void *ptr) 698 { 699 bpf_event_entry_free_rcu(ptr); 700 } 701 702 static void perf_event_fd_array_release(struct bpf_map *map, 703 struct file *map_file) 704 { 705 struct bpf_array *array = container_of(map, struct bpf_array, map); 706 struct bpf_event_entry *ee; 707 int i; 708 709 rcu_read_lock(); 710 for (i = 0; i < array->map.max_entries; i++) { 711 ee = READ_ONCE(array->ptrs[i]); 712 if (ee && ee->map_file == map_file) 713 fd_array_map_delete_elem(map, &i); 714 } 715 rcu_read_unlock(); 716 } 717 718 const struct bpf_map_ops perf_event_array_map_ops = { 719 .map_alloc_check = fd_array_map_alloc_check, 720 .map_alloc = array_map_alloc, 721 .map_free = fd_array_map_free, 722 .map_get_next_key = array_map_get_next_key, 723 .map_lookup_elem = fd_array_map_lookup_elem, 724 .map_delete_elem = fd_array_map_delete_elem, 725 .map_fd_get_ptr = perf_event_fd_array_get_ptr, 726 .map_fd_put_ptr = perf_event_fd_array_put_ptr, 727 .map_release = perf_event_fd_array_release, 728 .map_check_btf = map_check_no_btf, 729 }; 730 731 #ifdef CONFIG_CGROUPS 732 static void *cgroup_fd_array_get_ptr(struct bpf_map *map, 733 struct file *map_file /* not used */, 734 int fd) 735 { 736 return cgroup_get_from_fd(fd); 737 } 738 739 static void cgroup_fd_array_put_ptr(void *ptr) 740 { 741 /* cgroup_put free cgrp after a rcu grace period */ 742 cgroup_put(ptr); 743 } 744 745 static void cgroup_fd_array_free(struct bpf_map *map) 746 { 747 bpf_fd_array_map_clear(map); 748 fd_array_map_free(map); 749 } 750 751 const struct bpf_map_ops cgroup_array_map_ops = { 752 .map_alloc_check = fd_array_map_alloc_check, 753 .map_alloc = array_map_alloc, 754 .map_free = cgroup_fd_array_free, 755 .map_get_next_key = array_map_get_next_key, 756 .map_lookup_elem = fd_array_map_lookup_elem, 757 .map_delete_elem = fd_array_map_delete_elem, 758 .map_fd_get_ptr = cgroup_fd_array_get_ptr, 759 .map_fd_put_ptr = cgroup_fd_array_put_ptr, 760 .map_check_btf = map_check_no_btf, 761 }; 762 #endif 763 764 static struct bpf_map *array_of_map_alloc(union bpf_attr *attr) 765 { 766 struct bpf_map *map, *inner_map_meta; 767 768 inner_map_meta = bpf_map_meta_alloc(attr->inner_map_fd); 769 if (IS_ERR(inner_map_meta)) 770 return inner_map_meta; 771 772 map = array_map_alloc(attr); 773 if (IS_ERR(map)) { 774 bpf_map_meta_free(inner_map_meta); 775 return map; 776 } 777 778 map->inner_map_meta = inner_map_meta; 779 780 return map; 781 } 782 783 static void array_of_map_free(struct bpf_map *map) 784 { 785 /* map->inner_map_meta is only accessed by syscall which 786 * is protected by fdget/fdput. 787 */ 788 bpf_map_meta_free(map->inner_map_meta); 789 bpf_fd_array_map_clear(map); 790 fd_array_map_free(map); 791 } 792 793 static void *array_of_map_lookup_elem(struct bpf_map *map, void *key) 794 { 795 struct bpf_map **inner_map = array_map_lookup_elem(map, key); 796 797 if (!inner_map) 798 return NULL; 799 800 return READ_ONCE(*inner_map); 801 } 802 803 static u32 array_of_map_gen_lookup(struct bpf_map *map, 804 struct bpf_insn *insn_buf) 805 { 806 struct bpf_array *array = container_of(map, struct bpf_array, map); 807 u32 elem_size = round_up(map->value_size, 8); 808 struct bpf_insn *insn = insn_buf; 809 const int ret = BPF_REG_0; 810 const int map_ptr = BPF_REG_1; 811 const int index = BPF_REG_2; 812 813 *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value)); 814 *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0); 815 if (map->unpriv_array) { 816 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 6); 817 *insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask); 818 } else { 819 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5); 820 } 821 if (is_power_of_2(elem_size)) 822 *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size)); 823 else 824 *insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size); 825 *insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr); 826 *insn++ = BPF_LDX_MEM(BPF_DW, ret, ret, 0); 827 *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 1); 828 *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1); 829 *insn++ = BPF_MOV64_IMM(ret, 0); 830 831 return insn - insn_buf; 832 } 833 834 const struct bpf_map_ops array_of_maps_map_ops = { 835 .map_alloc_check = fd_array_map_alloc_check, 836 .map_alloc = array_of_map_alloc, 837 .map_free = array_of_map_free, 838 .map_get_next_key = array_map_get_next_key, 839 .map_lookup_elem = array_of_map_lookup_elem, 840 .map_delete_elem = fd_array_map_delete_elem, 841 .map_fd_get_ptr = bpf_map_fd_get_ptr, 842 .map_fd_put_ptr = bpf_map_fd_put_ptr, 843 .map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem, 844 .map_gen_lookup = array_of_map_gen_lookup, 845 .map_check_btf = map_check_no_btf, 846 }; 847