1 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 2 * Copyright (c) 2016,2017 Facebook 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of version 2 of the GNU General Public 6 * License as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, but 9 * WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 */ 13 #include <linux/bpf.h> 14 #include <linux/btf.h> 15 #include <linux/err.h> 16 #include <linux/slab.h> 17 #include <linux/mm.h> 18 #include <linux/filter.h> 19 #include <linux/perf_event.h> 20 #include <uapi/linux/btf.h> 21 22 #include "map_in_map.h" 23 24 #define ARRAY_CREATE_FLAG_MASK \ 25 (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY) 26 27 static void bpf_array_free_percpu(struct bpf_array *array) 28 { 29 int i; 30 31 for (i = 0; i < array->map.max_entries; i++) { 32 free_percpu(array->pptrs[i]); 33 cond_resched(); 34 } 35 } 36 37 static int bpf_array_alloc_percpu(struct bpf_array *array) 38 { 39 void __percpu *ptr; 40 int i; 41 42 for (i = 0; i < array->map.max_entries; i++) { 43 ptr = __alloc_percpu_gfp(array->elem_size, 8, 44 GFP_USER | __GFP_NOWARN); 45 if (!ptr) { 46 bpf_array_free_percpu(array); 47 return -ENOMEM; 48 } 49 array->pptrs[i] = ptr; 50 cond_resched(); 51 } 52 53 return 0; 54 } 55 56 /* Called from syscall */ 57 int array_map_alloc_check(union bpf_attr *attr) 58 { 59 bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; 60 int numa_node = bpf_map_attr_numa_node(attr); 61 62 /* check sanity of attributes */ 63 if (attr->max_entries == 0 || attr->key_size != 4 || 64 attr->value_size == 0 || 65 attr->map_flags & ~ARRAY_CREATE_FLAG_MASK || 66 (percpu && numa_node != NUMA_NO_NODE)) 67 return -EINVAL; 68 69 if (attr->value_size > KMALLOC_MAX_SIZE) 70 /* if value_size is bigger, the user space won't be able to 71 * access the elements. 72 */ 73 return -E2BIG; 74 75 return 0; 76 } 77 78 static struct bpf_map *array_map_alloc(union bpf_attr *attr) 79 { 80 bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; 81 int ret, numa_node = bpf_map_attr_numa_node(attr); 82 u32 elem_size, index_mask, max_entries; 83 bool unpriv = !capable(CAP_SYS_ADMIN); 84 u64 cost, array_size, mask64; 85 struct bpf_array *array; 86 87 elem_size = round_up(attr->value_size, 8); 88 89 max_entries = attr->max_entries; 90 91 /* On 32 bit archs roundup_pow_of_two() with max_entries that has 92 * upper most bit set in u32 space is undefined behavior due to 93 * resulting 1U << 32, so do it manually here in u64 space. 94 */ 95 mask64 = fls_long(max_entries - 1); 96 mask64 = 1ULL << mask64; 97 mask64 -= 1; 98 99 index_mask = mask64; 100 if (unpriv) { 101 /* round up array size to nearest power of 2, 102 * since cpu will speculate within index_mask limits 103 */ 104 max_entries = index_mask + 1; 105 /* Check for overflows. */ 106 if (max_entries < attr->max_entries) 107 return ERR_PTR(-E2BIG); 108 } 109 110 array_size = sizeof(*array); 111 if (percpu) 112 array_size += (u64) max_entries * sizeof(void *); 113 else 114 array_size += (u64) max_entries * elem_size; 115 116 /* make sure there is no u32 overflow later in round_up() */ 117 cost = array_size; 118 if (cost >= U32_MAX - PAGE_SIZE) 119 return ERR_PTR(-ENOMEM); 120 if (percpu) { 121 cost += (u64)attr->max_entries * elem_size * num_possible_cpus(); 122 if (cost >= U32_MAX - PAGE_SIZE) 123 return ERR_PTR(-ENOMEM); 124 } 125 cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; 126 127 ret = bpf_map_precharge_memlock(cost); 128 if (ret < 0) 129 return ERR_PTR(ret); 130 131 /* allocate all map elements and zero-initialize them */ 132 array = bpf_map_area_alloc(array_size, numa_node); 133 if (!array) 134 return ERR_PTR(-ENOMEM); 135 array->index_mask = index_mask; 136 array->map.unpriv_array = unpriv; 137 138 /* copy mandatory map attributes */ 139 bpf_map_init_from_attr(&array->map, attr); 140 array->map.pages = cost; 141 array->elem_size = elem_size; 142 143 if (percpu && bpf_array_alloc_percpu(array)) { 144 bpf_map_area_free(array); 145 return ERR_PTR(-ENOMEM); 146 } 147 148 return &array->map; 149 } 150 151 /* Called from syscall or from eBPF program */ 152 static void *array_map_lookup_elem(struct bpf_map *map, void *key) 153 { 154 struct bpf_array *array = container_of(map, struct bpf_array, map); 155 u32 index = *(u32 *)key; 156 157 if (unlikely(index >= array->map.max_entries)) 158 return NULL; 159 160 return array->value + array->elem_size * (index & array->index_mask); 161 } 162 163 /* emit BPF instructions equivalent to C code of array_map_lookup_elem() */ 164 static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) 165 { 166 struct bpf_array *array = container_of(map, struct bpf_array, map); 167 struct bpf_insn *insn = insn_buf; 168 u32 elem_size = round_up(map->value_size, 8); 169 const int ret = BPF_REG_0; 170 const int map_ptr = BPF_REG_1; 171 const int index = BPF_REG_2; 172 173 *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value)); 174 *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0); 175 if (map->unpriv_array) { 176 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 4); 177 *insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask); 178 } else { 179 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3); 180 } 181 182 if (is_power_of_2(elem_size)) { 183 *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size)); 184 } else { 185 *insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size); 186 } 187 *insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr); 188 *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1); 189 *insn++ = BPF_MOV64_IMM(ret, 0); 190 return insn - insn_buf; 191 } 192 193 /* Called from eBPF program */ 194 static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key) 195 { 196 struct bpf_array *array = container_of(map, struct bpf_array, map); 197 u32 index = *(u32 *)key; 198 199 if (unlikely(index >= array->map.max_entries)) 200 return NULL; 201 202 return this_cpu_ptr(array->pptrs[index & array->index_mask]); 203 } 204 205 int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value) 206 { 207 struct bpf_array *array = container_of(map, struct bpf_array, map); 208 u32 index = *(u32 *)key; 209 void __percpu *pptr; 210 int cpu, off = 0; 211 u32 size; 212 213 if (unlikely(index >= array->map.max_entries)) 214 return -ENOENT; 215 216 /* per_cpu areas are zero-filled and bpf programs can only 217 * access 'value_size' of them, so copying rounded areas 218 * will not leak any kernel data 219 */ 220 size = round_up(map->value_size, 8); 221 rcu_read_lock(); 222 pptr = array->pptrs[index & array->index_mask]; 223 for_each_possible_cpu(cpu) { 224 bpf_long_memcpy(value + off, per_cpu_ptr(pptr, cpu), size); 225 off += size; 226 } 227 rcu_read_unlock(); 228 return 0; 229 } 230 231 /* Called from syscall */ 232 static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key) 233 { 234 struct bpf_array *array = container_of(map, struct bpf_array, map); 235 u32 index = key ? *(u32 *)key : U32_MAX; 236 u32 *next = (u32 *)next_key; 237 238 if (index >= array->map.max_entries) { 239 *next = 0; 240 return 0; 241 } 242 243 if (index == array->map.max_entries - 1) 244 return -ENOENT; 245 246 *next = index + 1; 247 return 0; 248 } 249 250 /* Called from syscall or from eBPF program */ 251 static int array_map_update_elem(struct bpf_map *map, void *key, void *value, 252 u64 map_flags) 253 { 254 struct bpf_array *array = container_of(map, struct bpf_array, map); 255 u32 index = *(u32 *)key; 256 char *val; 257 258 if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST)) 259 /* unknown flags */ 260 return -EINVAL; 261 262 if (unlikely(index >= array->map.max_entries)) 263 /* all elements were pre-allocated, cannot insert a new one */ 264 return -E2BIG; 265 266 if (unlikely(map_flags & BPF_NOEXIST)) 267 /* all elements already exist */ 268 return -EEXIST; 269 270 if (unlikely((map_flags & BPF_F_LOCK) && 271 !map_value_has_spin_lock(map))) 272 return -EINVAL; 273 274 if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 275 memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]), 276 value, map->value_size); 277 } else { 278 val = array->value + 279 array->elem_size * (index & array->index_mask); 280 if (map_flags & BPF_F_LOCK) 281 copy_map_value_locked(map, val, value, false); 282 else 283 copy_map_value(map, val, value); 284 } 285 return 0; 286 } 287 288 int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value, 289 u64 map_flags) 290 { 291 struct bpf_array *array = container_of(map, struct bpf_array, map); 292 u32 index = *(u32 *)key; 293 void __percpu *pptr; 294 int cpu, off = 0; 295 u32 size; 296 297 if (unlikely(map_flags > BPF_EXIST)) 298 /* unknown flags */ 299 return -EINVAL; 300 301 if (unlikely(index >= array->map.max_entries)) 302 /* all elements were pre-allocated, cannot insert a new one */ 303 return -E2BIG; 304 305 if (unlikely(map_flags == BPF_NOEXIST)) 306 /* all elements already exist */ 307 return -EEXIST; 308 309 /* the user space will provide round_up(value_size, 8) bytes that 310 * will be copied into per-cpu area. bpf programs can only access 311 * value_size of it. During lookup the same extra bytes will be 312 * returned or zeros which were zero-filled by percpu_alloc, 313 * so no kernel data leaks possible 314 */ 315 size = round_up(map->value_size, 8); 316 rcu_read_lock(); 317 pptr = array->pptrs[index & array->index_mask]; 318 for_each_possible_cpu(cpu) { 319 bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value + off, size); 320 off += size; 321 } 322 rcu_read_unlock(); 323 return 0; 324 } 325 326 /* Called from syscall or from eBPF program */ 327 static int array_map_delete_elem(struct bpf_map *map, void *key) 328 { 329 return -EINVAL; 330 } 331 332 /* Called when map->refcnt goes to zero, either from workqueue or from syscall */ 333 static void array_map_free(struct bpf_map *map) 334 { 335 struct bpf_array *array = container_of(map, struct bpf_array, map); 336 337 /* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0, 338 * so the programs (can be more than one that used this map) were 339 * disconnected from events. Wait for outstanding programs to complete 340 * and free the array 341 */ 342 synchronize_rcu(); 343 344 if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) 345 bpf_array_free_percpu(array); 346 347 bpf_map_area_free(array); 348 } 349 350 static void array_map_seq_show_elem(struct bpf_map *map, void *key, 351 struct seq_file *m) 352 { 353 void *value; 354 355 rcu_read_lock(); 356 357 value = array_map_lookup_elem(map, key); 358 if (!value) { 359 rcu_read_unlock(); 360 return; 361 } 362 363 seq_printf(m, "%u: ", *(u32 *)key); 364 btf_type_seq_show(map->btf, map->btf_value_type_id, value, m); 365 seq_puts(m, "\n"); 366 367 rcu_read_unlock(); 368 } 369 370 static void percpu_array_map_seq_show_elem(struct bpf_map *map, void *key, 371 struct seq_file *m) 372 { 373 struct bpf_array *array = container_of(map, struct bpf_array, map); 374 u32 index = *(u32 *)key; 375 void __percpu *pptr; 376 int cpu; 377 378 rcu_read_lock(); 379 380 seq_printf(m, "%u: {\n", *(u32 *)key); 381 pptr = array->pptrs[index & array->index_mask]; 382 for_each_possible_cpu(cpu) { 383 seq_printf(m, "\tcpu%d: ", cpu); 384 btf_type_seq_show(map->btf, map->btf_value_type_id, 385 per_cpu_ptr(pptr, cpu), m); 386 seq_puts(m, "\n"); 387 } 388 seq_puts(m, "}\n"); 389 390 rcu_read_unlock(); 391 } 392 393 static int array_map_check_btf(const struct bpf_map *map, 394 const struct btf *btf, 395 const struct btf_type *key_type, 396 const struct btf_type *value_type) 397 { 398 u32 int_data; 399 400 if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT) 401 return -EINVAL; 402 403 int_data = *(u32 *)(key_type + 1); 404 /* bpf array can only take a u32 key. This check makes sure 405 * that the btf matches the attr used during map_create. 406 */ 407 if (BTF_INT_BITS(int_data) != 32 || BTF_INT_OFFSET(int_data)) 408 return -EINVAL; 409 410 return 0; 411 } 412 413 const struct bpf_map_ops array_map_ops = { 414 .map_alloc_check = array_map_alloc_check, 415 .map_alloc = array_map_alloc, 416 .map_free = array_map_free, 417 .map_get_next_key = array_map_get_next_key, 418 .map_lookup_elem = array_map_lookup_elem, 419 .map_update_elem = array_map_update_elem, 420 .map_delete_elem = array_map_delete_elem, 421 .map_gen_lookup = array_map_gen_lookup, 422 .map_seq_show_elem = array_map_seq_show_elem, 423 .map_check_btf = array_map_check_btf, 424 }; 425 426 const struct bpf_map_ops percpu_array_map_ops = { 427 .map_alloc_check = array_map_alloc_check, 428 .map_alloc = array_map_alloc, 429 .map_free = array_map_free, 430 .map_get_next_key = array_map_get_next_key, 431 .map_lookup_elem = percpu_array_map_lookup_elem, 432 .map_update_elem = array_map_update_elem, 433 .map_delete_elem = array_map_delete_elem, 434 .map_seq_show_elem = percpu_array_map_seq_show_elem, 435 .map_check_btf = array_map_check_btf, 436 }; 437 438 static int fd_array_map_alloc_check(union bpf_attr *attr) 439 { 440 /* only file descriptors can be stored in this type of map */ 441 if (attr->value_size != sizeof(u32)) 442 return -EINVAL; 443 return array_map_alloc_check(attr); 444 } 445 446 static void fd_array_map_free(struct bpf_map *map) 447 { 448 struct bpf_array *array = container_of(map, struct bpf_array, map); 449 int i; 450 451 synchronize_rcu(); 452 453 /* make sure it's empty */ 454 for (i = 0; i < array->map.max_entries; i++) 455 BUG_ON(array->ptrs[i] != NULL); 456 457 bpf_map_area_free(array); 458 } 459 460 static void *fd_array_map_lookup_elem(struct bpf_map *map, void *key) 461 { 462 return ERR_PTR(-EOPNOTSUPP); 463 } 464 465 /* only called from syscall */ 466 int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value) 467 { 468 void **elem, *ptr; 469 int ret = 0; 470 471 if (!map->ops->map_fd_sys_lookup_elem) 472 return -ENOTSUPP; 473 474 rcu_read_lock(); 475 elem = array_map_lookup_elem(map, key); 476 if (elem && (ptr = READ_ONCE(*elem))) 477 *value = map->ops->map_fd_sys_lookup_elem(ptr); 478 else 479 ret = -ENOENT; 480 rcu_read_unlock(); 481 482 return ret; 483 } 484 485 /* only called from syscall */ 486 int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file, 487 void *key, void *value, u64 map_flags) 488 { 489 struct bpf_array *array = container_of(map, struct bpf_array, map); 490 void *new_ptr, *old_ptr; 491 u32 index = *(u32 *)key, ufd; 492 493 if (map_flags != BPF_ANY) 494 return -EINVAL; 495 496 if (index >= array->map.max_entries) 497 return -E2BIG; 498 499 ufd = *(u32 *)value; 500 new_ptr = map->ops->map_fd_get_ptr(map, map_file, ufd); 501 if (IS_ERR(new_ptr)) 502 return PTR_ERR(new_ptr); 503 504 old_ptr = xchg(array->ptrs + index, new_ptr); 505 if (old_ptr) 506 map->ops->map_fd_put_ptr(old_ptr); 507 508 return 0; 509 } 510 511 static int fd_array_map_delete_elem(struct bpf_map *map, void *key) 512 { 513 struct bpf_array *array = container_of(map, struct bpf_array, map); 514 void *old_ptr; 515 u32 index = *(u32 *)key; 516 517 if (index >= array->map.max_entries) 518 return -E2BIG; 519 520 old_ptr = xchg(array->ptrs + index, NULL); 521 if (old_ptr) { 522 map->ops->map_fd_put_ptr(old_ptr); 523 return 0; 524 } else { 525 return -ENOENT; 526 } 527 } 528 529 static void *prog_fd_array_get_ptr(struct bpf_map *map, 530 struct file *map_file, int fd) 531 { 532 struct bpf_array *array = container_of(map, struct bpf_array, map); 533 struct bpf_prog *prog = bpf_prog_get(fd); 534 535 if (IS_ERR(prog)) 536 return prog; 537 538 if (!bpf_prog_array_compatible(array, prog)) { 539 bpf_prog_put(prog); 540 return ERR_PTR(-EINVAL); 541 } 542 543 return prog; 544 } 545 546 static void prog_fd_array_put_ptr(void *ptr) 547 { 548 bpf_prog_put(ptr); 549 } 550 551 static u32 prog_fd_array_sys_lookup_elem(void *ptr) 552 { 553 return ((struct bpf_prog *)ptr)->aux->id; 554 } 555 556 /* decrement refcnt of all bpf_progs that are stored in this map */ 557 static void bpf_fd_array_map_clear(struct bpf_map *map) 558 { 559 struct bpf_array *array = container_of(map, struct bpf_array, map); 560 int i; 561 562 for (i = 0; i < array->map.max_entries; i++) 563 fd_array_map_delete_elem(map, &i); 564 } 565 566 static void prog_array_map_seq_show_elem(struct bpf_map *map, void *key, 567 struct seq_file *m) 568 { 569 void **elem, *ptr; 570 u32 prog_id; 571 572 rcu_read_lock(); 573 574 elem = array_map_lookup_elem(map, key); 575 if (elem) { 576 ptr = READ_ONCE(*elem); 577 if (ptr) { 578 seq_printf(m, "%u: ", *(u32 *)key); 579 prog_id = prog_fd_array_sys_lookup_elem(ptr); 580 btf_type_seq_show(map->btf, map->btf_value_type_id, 581 &prog_id, m); 582 seq_puts(m, "\n"); 583 } 584 } 585 586 rcu_read_unlock(); 587 } 588 589 const struct bpf_map_ops prog_array_map_ops = { 590 .map_alloc_check = fd_array_map_alloc_check, 591 .map_alloc = array_map_alloc, 592 .map_free = fd_array_map_free, 593 .map_get_next_key = array_map_get_next_key, 594 .map_lookup_elem = fd_array_map_lookup_elem, 595 .map_delete_elem = fd_array_map_delete_elem, 596 .map_fd_get_ptr = prog_fd_array_get_ptr, 597 .map_fd_put_ptr = prog_fd_array_put_ptr, 598 .map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem, 599 .map_release_uref = bpf_fd_array_map_clear, 600 .map_seq_show_elem = prog_array_map_seq_show_elem, 601 }; 602 603 static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file, 604 struct file *map_file) 605 { 606 struct bpf_event_entry *ee; 607 608 ee = kzalloc(sizeof(*ee), GFP_ATOMIC); 609 if (ee) { 610 ee->event = perf_file->private_data; 611 ee->perf_file = perf_file; 612 ee->map_file = map_file; 613 } 614 615 return ee; 616 } 617 618 static void __bpf_event_entry_free(struct rcu_head *rcu) 619 { 620 struct bpf_event_entry *ee; 621 622 ee = container_of(rcu, struct bpf_event_entry, rcu); 623 fput(ee->perf_file); 624 kfree(ee); 625 } 626 627 static void bpf_event_entry_free_rcu(struct bpf_event_entry *ee) 628 { 629 call_rcu(&ee->rcu, __bpf_event_entry_free); 630 } 631 632 static void *perf_event_fd_array_get_ptr(struct bpf_map *map, 633 struct file *map_file, int fd) 634 { 635 struct bpf_event_entry *ee; 636 struct perf_event *event; 637 struct file *perf_file; 638 u64 value; 639 640 perf_file = perf_event_get(fd); 641 if (IS_ERR(perf_file)) 642 return perf_file; 643 644 ee = ERR_PTR(-EOPNOTSUPP); 645 event = perf_file->private_data; 646 if (perf_event_read_local(event, &value, NULL, NULL) == -EOPNOTSUPP) 647 goto err_out; 648 649 ee = bpf_event_entry_gen(perf_file, map_file); 650 if (ee) 651 return ee; 652 ee = ERR_PTR(-ENOMEM); 653 err_out: 654 fput(perf_file); 655 return ee; 656 } 657 658 static void perf_event_fd_array_put_ptr(void *ptr) 659 { 660 bpf_event_entry_free_rcu(ptr); 661 } 662 663 static void perf_event_fd_array_release(struct bpf_map *map, 664 struct file *map_file) 665 { 666 struct bpf_array *array = container_of(map, struct bpf_array, map); 667 struct bpf_event_entry *ee; 668 int i; 669 670 rcu_read_lock(); 671 for (i = 0; i < array->map.max_entries; i++) { 672 ee = READ_ONCE(array->ptrs[i]); 673 if (ee && ee->map_file == map_file) 674 fd_array_map_delete_elem(map, &i); 675 } 676 rcu_read_unlock(); 677 } 678 679 const struct bpf_map_ops perf_event_array_map_ops = { 680 .map_alloc_check = fd_array_map_alloc_check, 681 .map_alloc = array_map_alloc, 682 .map_free = fd_array_map_free, 683 .map_get_next_key = array_map_get_next_key, 684 .map_lookup_elem = fd_array_map_lookup_elem, 685 .map_delete_elem = fd_array_map_delete_elem, 686 .map_fd_get_ptr = perf_event_fd_array_get_ptr, 687 .map_fd_put_ptr = perf_event_fd_array_put_ptr, 688 .map_release = perf_event_fd_array_release, 689 .map_check_btf = map_check_no_btf, 690 }; 691 692 #ifdef CONFIG_CGROUPS 693 static void *cgroup_fd_array_get_ptr(struct bpf_map *map, 694 struct file *map_file /* not used */, 695 int fd) 696 { 697 return cgroup_get_from_fd(fd); 698 } 699 700 static void cgroup_fd_array_put_ptr(void *ptr) 701 { 702 /* cgroup_put free cgrp after a rcu grace period */ 703 cgroup_put(ptr); 704 } 705 706 static void cgroup_fd_array_free(struct bpf_map *map) 707 { 708 bpf_fd_array_map_clear(map); 709 fd_array_map_free(map); 710 } 711 712 const struct bpf_map_ops cgroup_array_map_ops = { 713 .map_alloc_check = fd_array_map_alloc_check, 714 .map_alloc = array_map_alloc, 715 .map_free = cgroup_fd_array_free, 716 .map_get_next_key = array_map_get_next_key, 717 .map_lookup_elem = fd_array_map_lookup_elem, 718 .map_delete_elem = fd_array_map_delete_elem, 719 .map_fd_get_ptr = cgroup_fd_array_get_ptr, 720 .map_fd_put_ptr = cgroup_fd_array_put_ptr, 721 .map_check_btf = map_check_no_btf, 722 }; 723 #endif 724 725 static struct bpf_map *array_of_map_alloc(union bpf_attr *attr) 726 { 727 struct bpf_map *map, *inner_map_meta; 728 729 inner_map_meta = bpf_map_meta_alloc(attr->inner_map_fd); 730 if (IS_ERR(inner_map_meta)) 731 return inner_map_meta; 732 733 map = array_map_alloc(attr); 734 if (IS_ERR(map)) { 735 bpf_map_meta_free(inner_map_meta); 736 return map; 737 } 738 739 map->inner_map_meta = inner_map_meta; 740 741 return map; 742 } 743 744 static void array_of_map_free(struct bpf_map *map) 745 { 746 /* map->inner_map_meta is only accessed by syscall which 747 * is protected by fdget/fdput. 748 */ 749 bpf_map_meta_free(map->inner_map_meta); 750 bpf_fd_array_map_clear(map); 751 fd_array_map_free(map); 752 } 753 754 static void *array_of_map_lookup_elem(struct bpf_map *map, void *key) 755 { 756 struct bpf_map **inner_map = array_map_lookup_elem(map, key); 757 758 if (!inner_map) 759 return NULL; 760 761 return READ_ONCE(*inner_map); 762 } 763 764 static u32 array_of_map_gen_lookup(struct bpf_map *map, 765 struct bpf_insn *insn_buf) 766 { 767 struct bpf_array *array = container_of(map, struct bpf_array, map); 768 u32 elem_size = round_up(map->value_size, 8); 769 struct bpf_insn *insn = insn_buf; 770 const int ret = BPF_REG_0; 771 const int map_ptr = BPF_REG_1; 772 const int index = BPF_REG_2; 773 774 *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value)); 775 *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0); 776 if (map->unpriv_array) { 777 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 6); 778 *insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask); 779 } else { 780 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5); 781 } 782 if (is_power_of_2(elem_size)) 783 *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size)); 784 else 785 *insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size); 786 *insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr); 787 *insn++ = BPF_LDX_MEM(BPF_DW, ret, ret, 0); 788 *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 1); 789 *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1); 790 *insn++ = BPF_MOV64_IMM(ret, 0); 791 792 return insn - insn_buf; 793 } 794 795 const struct bpf_map_ops array_of_maps_map_ops = { 796 .map_alloc_check = fd_array_map_alloc_check, 797 .map_alloc = array_of_map_alloc, 798 .map_free = array_of_map_free, 799 .map_get_next_key = array_map_get_next_key, 800 .map_lookup_elem = array_of_map_lookup_elem, 801 .map_delete_elem = fd_array_map_delete_elem, 802 .map_fd_get_ptr = bpf_map_fd_get_ptr, 803 .map_fd_put_ptr = bpf_map_fd_put_ptr, 804 .map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem, 805 .map_gen_lookup = array_of_map_gen_lookup, 806 .map_check_btf = map_check_no_btf, 807 }; 808