1 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 2 * 3 * This program is free software; you can redistribute it and/or 4 * modify it under the terms of version 2 of the GNU General Public 5 * License as published by the Free Software Foundation. 6 * 7 * This program is distributed in the hope that it will be useful, but 8 * WITHOUT ANY WARRANTY; without even the implied warranty of 9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 10 * General Public License for more details. 11 */ 12 #include <linux/bpf.h> 13 #include <linux/bpf_trace.h> 14 #include <linux/syscalls.h> 15 #include <linux/slab.h> 16 #include <linux/sched/signal.h> 17 #include <linux/vmalloc.h> 18 #include <linux/mmzone.h> 19 #include <linux/anon_inodes.h> 20 #include <linux/file.h> 21 #include <linux/license.h> 22 #include <linux/filter.h> 23 #include <linux/version.h> 24 #include <linux/kernel.h> 25 #include <linux/idr.h> 26 #include <linux/cred.h> 27 #include <linux/timekeeping.h> 28 #include <linux/ctype.h> 29 30 #define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY || \ 31 (map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \ 32 (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \ 33 (map)->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) 34 #define IS_FD_HASH(map) ((map)->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) 35 #define IS_FD_MAP(map) (IS_FD_ARRAY(map) || IS_FD_HASH(map)) 36 37 #define BPF_OBJ_FLAG_MASK (BPF_F_RDONLY | BPF_F_WRONLY) 38 39 DEFINE_PER_CPU(int, bpf_prog_active); 40 static DEFINE_IDR(prog_idr); 41 static DEFINE_SPINLOCK(prog_idr_lock); 42 static DEFINE_IDR(map_idr); 43 static DEFINE_SPINLOCK(map_idr_lock); 44 45 int sysctl_unprivileged_bpf_disabled __read_mostly; 46 47 static const struct bpf_map_ops * const bpf_map_types[] = { 48 #define BPF_PROG_TYPE(_id, _ops) 49 #define BPF_MAP_TYPE(_id, _ops) \ 50 [_id] = &_ops, 51 #include <linux/bpf_types.h> 52 #undef BPF_PROG_TYPE 53 #undef BPF_MAP_TYPE 54 }; 55 56 /* 57 * If we're handed a bigger struct than we know of, ensure all the unknown bits 58 * are 0 - i.e. new user-space does not rely on any kernel feature extensions 59 * we don't know about yet. 60 * 61 * There is a ToCToU between this function call and the following 62 * copy_from_user() call. However, this is not a concern since this function is 63 * meant to be a future-proofing of bits. 64 */ 65 static int check_uarg_tail_zero(void __user *uaddr, 66 size_t expected_size, 67 size_t actual_size) 68 { 69 unsigned char __user *addr; 70 unsigned char __user *end; 71 unsigned char val; 72 int err; 73 74 if (unlikely(actual_size > PAGE_SIZE)) /* silly large */ 75 return -E2BIG; 76 77 if (unlikely(!access_ok(VERIFY_READ, uaddr, actual_size))) 78 return -EFAULT; 79 80 if (actual_size <= expected_size) 81 return 0; 82 83 addr = uaddr + expected_size; 84 end = uaddr + actual_size; 85 86 for (; addr < end; addr++) { 87 err = get_user(val, addr); 88 if (err) 89 return err; 90 if (val) 91 return -E2BIG; 92 } 93 94 return 0; 95 } 96 97 const struct bpf_map_ops bpf_map_offload_ops = { 98 .map_alloc = bpf_map_offload_map_alloc, 99 .map_free = bpf_map_offload_map_free, 100 }; 101 102 static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) 103 { 104 const struct bpf_map_ops *ops; 105 struct bpf_map *map; 106 int err; 107 108 if (attr->map_type >= ARRAY_SIZE(bpf_map_types)) 109 return ERR_PTR(-EINVAL); 110 ops = bpf_map_types[attr->map_type]; 111 if (!ops) 112 return ERR_PTR(-EINVAL); 113 114 if (ops->map_alloc_check) { 115 err = ops->map_alloc_check(attr); 116 if (err) 117 return ERR_PTR(err); 118 } 119 if (attr->map_ifindex) 120 ops = &bpf_map_offload_ops; 121 map = ops->map_alloc(attr); 122 if (IS_ERR(map)) 123 return map; 124 map->ops = ops; 125 map->map_type = attr->map_type; 126 return map; 127 } 128 129 void *bpf_map_area_alloc(size_t size, int numa_node) 130 { 131 /* We definitely need __GFP_NORETRY, so OOM killer doesn't 132 * trigger under memory pressure as we really just want to 133 * fail instead. 134 */ 135 const gfp_t flags = __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO; 136 void *area; 137 138 if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { 139 area = kmalloc_node(size, GFP_USER | flags, numa_node); 140 if (area != NULL) 141 return area; 142 } 143 144 return __vmalloc_node_flags_caller(size, numa_node, GFP_KERNEL | flags, 145 __builtin_return_address(0)); 146 } 147 148 void bpf_map_area_free(void *area) 149 { 150 kvfree(area); 151 } 152 153 void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr) 154 { 155 map->map_type = attr->map_type; 156 map->key_size = attr->key_size; 157 map->value_size = attr->value_size; 158 map->max_entries = attr->max_entries; 159 map->map_flags = attr->map_flags; 160 map->numa_node = bpf_map_attr_numa_node(attr); 161 } 162 163 int bpf_map_precharge_memlock(u32 pages) 164 { 165 struct user_struct *user = get_current_user(); 166 unsigned long memlock_limit, cur; 167 168 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 169 cur = atomic_long_read(&user->locked_vm); 170 free_uid(user); 171 if (cur + pages > memlock_limit) 172 return -EPERM; 173 return 0; 174 } 175 176 static int bpf_map_charge_memlock(struct bpf_map *map) 177 { 178 struct user_struct *user = get_current_user(); 179 unsigned long memlock_limit; 180 181 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 182 183 atomic_long_add(map->pages, &user->locked_vm); 184 185 if (atomic_long_read(&user->locked_vm) > memlock_limit) { 186 atomic_long_sub(map->pages, &user->locked_vm); 187 free_uid(user); 188 return -EPERM; 189 } 190 map->user = user; 191 return 0; 192 } 193 194 static void bpf_map_uncharge_memlock(struct bpf_map *map) 195 { 196 struct user_struct *user = map->user; 197 198 atomic_long_sub(map->pages, &user->locked_vm); 199 free_uid(user); 200 } 201 202 static int bpf_map_alloc_id(struct bpf_map *map) 203 { 204 int id; 205 206 spin_lock_bh(&map_idr_lock); 207 id = idr_alloc_cyclic(&map_idr, map, 1, INT_MAX, GFP_ATOMIC); 208 if (id > 0) 209 map->id = id; 210 spin_unlock_bh(&map_idr_lock); 211 212 if (WARN_ON_ONCE(!id)) 213 return -ENOSPC; 214 215 return id > 0 ? 0 : id; 216 } 217 218 void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock) 219 { 220 unsigned long flags; 221 222 /* Offloaded maps are removed from the IDR store when their device 223 * disappears - even if someone holds an fd to them they are unusable, 224 * the memory is gone, all ops will fail; they are simply waiting for 225 * refcnt to drop to be freed. 226 */ 227 if (!map->id) 228 return; 229 230 if (do_idr_lock) 231 spin_lock_irqsave(&map_idr_lock, flags); 232 else 233 __acquire(&map_idr_lock); 234 235 idr_remove(&map_idr, map->id); 236 map->id = 0; 237 238 if (do_idr_lock) 239 spin_unlock_irqrestore(&map_idr_lock, flags); 240 else 241 __release(&map_idr_lock); 242 } 243 244 /* called from workqueue */ 245 static void bpf_map_free_deferred(struct work_struct *work) 246 { 247 struct bpf_map *map = container_of(work, struct bpf_map, work); 248 249 bpf_map_uncharge_memlock(map); 250 security_bpf_map_free(map); 251 /* implementation dependent freeing */ 252 map->ops->map_free(map); 253 } 254 255 static void bpf_map_put_uref(struct bpf_map *map) 256 { 257 if (atomic_dec_and_test(&map->usercnt)) { 258 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) 259 bpf_fd_array_map_clear(map); 260 } 261 } 262 263 /* decrement map refcnt and schedule it for freeing via workqueue 264 * (unrelying map implementation ops->map_free() might sleep) 265 */ 266 static void __bpf_map_put(struct bpf_map *map, bool do_idr_lock) 267 { 268 if (atomic_dec_and_test(&map->refcnt)) { 269 /* bpf_map_free_id() must be called first */ 270 bpf_map_free_id(map, do_idr_lock); 271 INIT_WORK(&map->work, bpf_map_free_deferred); 272 schedule_work(&map->work); 273 } 274 } 275 276 void bpf_map_put(struct bpf_map *map) 277 { 278 __bpf_map_put(map, true); 279 } 280 281 void bpf_map_put_with_uref(struct bpf_map *map) 282 { 283 bpf_map_put_uref(map); 284 bpf_map_put(map); 285 } 286 287 static int bpf_map_release(struct inode *inode, struct file *filp) 288 { 289 struct bpf_map *map = filp->private_data; 290 291 if (map->ops->map_release) 292 map->ops->map_release(map, filp); 293 294 bpf_map_put_with_uref(map); 295 return 0; 296 } 297 298 #ifdef CONFIG_PROC_FS 299 static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) 300 { 301 const struct bpf_map *map = filp->private_data; 302 const struct bpf_array *array; 303 u32 owner_prog_type = 0; 304 u32 owner_jited = 0; 305 306 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) { 307 array = container_of(map, struct bpf_array, map); 308 owner_prog_type = array->owner_prog_type; 309 owner_jited = array->owner_jited; 310 } 311 312 seq_printf(m, 313 "map_type:\t%u\n" 314 "key_size:\t%u\n" 315 "value_size:\t%u\n" 316 "max_entries:\t%u\n" 317 "map_flags:\t%#x\n" 318 "memlock:\t%llu\n", 319 map->map_type, 320 map->key_size, 321 map->value_size, 322 map->max_entries, 323 map->map_flags, 324 map->pages * 1ULL << PAGE_SHIFT); 325 326 if (owner_prog_type) { 327 seq_printf(m, "owner_prog_type:\t%u\n", 328 owner_prog_type); 329 seq_printf(m, "owner_jited:\t%u\n", 330 owner_jited); 331 } 332 } 333 #endif 334 335 static ssize_t bpf_dummy_read(struct file *filp, char __user *buf, size_t siz, 336 loff_t *ppos) 337 { 338 /* We need this handler such that alloc_file() enables 339 * f_mode with FMODE_CAN_READ. 340 */ 341 return -EINVAL; 342 } 343 344 static ssize_t bpf_dummy_write(struct file *filp, const char __user *buf, 345 size_t siz, loff_t *ppos) 346 { 347 /* We need this handler such that alloc_file() enables 348 * f_mode with FMODE_CAN_WRITE. 349 */ 350 return -EINVAL; 351 } 352 353 const struct file_operations bpf_map_fops = { 354 #ifdef CONFIG_PROC_FS 355 .show_fdinfo = bpf_map_show_fdinfo, 356 #endif 357 .release = bpf_map_release, 358 .read = bpf_dummy_read, 359 .write = bpf_dummy_write, 360 }; 361 362 int bpf_map_new_fd(struct bpf_map *map, int flags) 363 { 364 int ret; 365 366 ret = security_bpf_map(map, OPEN_FMODE(flags)); 367 if (ret < 0) 368 return ret; 369 370 return anon_inode_getfd("bpf-map", &bpf_map_fops, map, 371 flags | O_CLOEXEC); 372 } 373 374 int bpf_get_file_flag(int flags) 375 { 376 if ((flags & BPF_F_RDONLY) && (flags & BPF_F_WRONLY)) 377 return -EINVAL; 378 if (flags & BPF_F_RDONLY) 379 return O_RDONLY; 380 if (flags & BPF_F_WRONLY) 381 return O_WRONLY; 382 return O_RDWR; 383 } 384 385 /* helper macro to check that unused fields 'union bpf_attr' are zero */ 386 #define CHECK_ATTR(CMD) \ 387 memchr_inv((void *) &attr->CMD##_LAST_FIELD + \ 388 sizeof(attr->CMD##_LAST_FIELD), 0, \ 389 sizeof(*attr) - \ 390 offsetof(union bpf_attr, CMD##_LAST_FIELD) - \ 391 sizeof(attr->CMD##_LAST_FIELD)) != NULL 392 393 /* dst and src must have at least BPF_OBJ_NAME_LEN number of bytes. 394 * Return 0 on success and < 0 on error. 395 */ 396 static int bpf_obj_name_cpy(char *dst, const char *src) 397 { 398 const char *end = src + BPF_OBJ_NAME_LEN; 399 400 memset(dst, 0, BPF_OBJ_NAME_LEN); 401 402 /* Copy all isalnum() and '_' char */ 403 while (src < end && *src) { 404 if (!isalnum(*src) && *src != '_') 405 return -EINVAL; 406 *dst++ = *src++; 407 } 408 409 /* No '\0' found in BPF_OBJ_NAME_LEN number of bytes */ 410 if (src == end) 411 return -EINVAL; 412 413 return 0; 414 } 415 416 #define BPF_MAP_CREATE_LAST_FIELD map_ifindex 417 /* called via syscall */ 418 static int map_create(union bpf_attr *attr) 419 { 420 int numa_node = bpf_map_attr_numa_node(attr); 421 struct bpf_map *map; 422 int f_flags; 423 int err; 424 425 err = CHECK_ATTR(BPF_MAP_CREATE); 426 if (err) 427 return -EINVAL; 428 429 f_flags = bpf_get_file_flag(attr->map_flags); 430 if (f_flags < 0) 431 return f_flags; 432 433 if (numa_node != NUMA_NO_NODE && 434 ((unsigned int)numa_node >= nr_node_ids || 435 !node_online(numa_node))) 436 return -EINVAL; 437 438 /* find map type and init map: hashtable vs rbtree vs bloom vs ... */ 439 map = find_and_alloc_map(attr); 440 if (IS_ERR(map)) 441 return PTR_ERR(map); 442 443 err = bpf_obj_name_cpy(map->name, attr->map_name); 444 if (err) 445 goto free_map_nouncharge; 446 447 atomic_set(&map->refcnt, 1); 448 atomic_set(&map->usercnt, 1); 449 450 err = security_bpf_map_alloc(map); 451 if (err) 452 goto free_map_nouncharge; 453 454 err = bpf_map_charge_memlock(map); 455 if (err) 456 goto free_map_sec; 457 458 err = bpf_map_alloc_id(map); 459 if (err) 460 goto free_map; 461 462 err = bpf_map_new_fd(map, f_flags); 463 if (err < 0) { 464 /* failed to allocate fd. 465 * bpf_map_put() is needed because the above 466 * bpf_map_alloc_id() has published the map 467 * to the userspace and the userspace may 468 * have refcnt-ed it through BPF_MAP_GET_FD_BY_ID. 469 */ 470 bpf_map_put(map); 471 return err; 472 } 473 474 trace_bpf_map_create(map, err); 475 return err; 476 477 free_map: 478 bpf_map_uncharge_memlock(map); 479 free_map_sec: 480 security_bpf_map_free(map); 481 free_map_nouncharge: 482 map->ops->map_free(map); 483 return err; 484 } 485 486 /* if error is returned, fd is released. 487 * On success caller should complete fd access with matching fdput() 488 */ 489 struct bpf_map *__bpf_map_get(struct fd f) 490 { 491 if (!f.file) 492 return ERR_PTR(-EBADF); 493 if (f.file->f_op != &bpf_map_fops) { 494 fdput(f); 495 return ERR_PTR(-EINVAL); 496 } 497 498 return f.file->private_data; 499 } 500 501 /* prog's and map's refcnt limit */ 502 #define BPF_MAX_REFCNT 32768 503 504 struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref) 505 { 506 if (atomic_inc_return(&map->refcnt) > BPF_MAX_REFCNT) { 507 atomic_dec(&map->refcnt); 508 return ERR_PTR(-EBUSY); 509 } 510 if (uref) 511 atomic_inc(&map->usercnt); 512 return map; 513 } 514 515 struct bpf_map *bpf_map_get_with_uref(u32 ufd) 516 { 517 struct fd f = fdget(ufd); 518 struct bpf_map *map; 519 520 map = __bpf_map_get(f); 521 if (IS_ERR(map)) 522 return map; 523 524 map = bpf_map_inc(map, true); 525 fdput(f); 526 527 return map; 528 } 529 530 /* map_idr_lock should have been held */ 531 static struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map, 532 bool uref) 533 { 534 int refold; 535 536 refold = __atomic_add_unless(&map->refcnt, 1, 0); 537 538 if (refold >= BPF_MAX_REFCNT) { 539 __bpf_map_put(map, false); 540 return ERR_PTR(-EBUSY); 541 } 542 543 if (!refold) 544 return ERR_PTR(-ENOENT); 545 546 if (uref) 547 atomic_inc(&map->usercnt); 548 549 return map; 550 } 551 552 int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value) 553 { 554 return -ENOTSUPP; 555 } 556 557 /* last field in 'union bpf_attr' used by this command */ 558 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value 559 560 static int map_lookup_elem(union bpf_attr *attr) 561 { 562 void __user *ukey = u64_to_user_ptr(attr->key); 563 void __user *uvalue = u64_to_user_ptr(attr->value); 564 int ufd = attr->map_fd; 565 struct bpf_map *map; 566 void *key, *value, *ptr; 567 u32 value_size; 568 struct fd f; 569 int err; 570 571 if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM)) 572 return -EINVAL; 573 574 f = fdget(ufd); 575 map = __bpf_map_get(f); 576 if (IS_ERR(map)) 577 return PTR_ERR(map); 578 579 if (!(f.file->f_mode & FMODE_CAN_READ)) { 580 err = -EPERM; 581 goto err_put; 582 } 583 584 key = memdup_user(ukey, map->key_size); 585 if (IS_ERR(key)) { 586 err = PTR_ERR(key); 587 goto err_put; 588 } 589 590 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 591 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || 592 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) 593 value_size = round_up(map->value_size, 8) * num_possible_cpus(); 594 else if (IS_FD_MAP(map)) 595 value_size = sizeof(u32); 596 else 597 value_size = map->value_size; 598 599 err = -ENOMEM; 600 value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); 601 if (!value) 602 goto free_key; 603 604 if (bpf_map_is_dev_bound(map)) { 605 err = bpf_map_offload_lookup_elem(map, key, value); 606 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 607 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 608 err = bpf_percpu_hash_copy(map, key, value); 609 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 610 err = bpf_percpu_array_copy(map, key, value); 611 } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) { 612 err = bpf_stackmap_copy(map, key, value); 613 } else if (IS_FD_ARRAY(map)) { 614 err = bpf_fd_array_map_lookup_elem(map, key, value); 615 } else if (IS_FD_HASH(map)) { 616 err = bpf_fd_htab_map_lookup_elem(map, key, value); 617 } else { 618 rcu_read_lock(); 619 ptr = map->ops->map_lookup_elem(map, key); 620 if (ptr) 621 memcpy(value, ptr, value_size); 622 rcu_read_unlock(); 623 err = ptr ? 0 : -ENOENT; 624 } 625 626 if (err) 627 goto free_value; 628 629 err = -EFAULT; 630 if (copy_to_user(uvalue, value, value_size) != 0) 631 goto free_value; 632 633 trace_bpf_map_lookup_elem(map, ufd, key, value); 634 err = 0; 635 636 free_value: 637 kfree(value); 638 free_key: 639 kfree(key); 640 err_put: 641 fdput(f); 642 return err; 643 } 644 645 #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags 646 647 static int map_update_elem(union bpf_attr *attr) 648 { 649 void __user *ukey = u64_to_user_ptr(attr->key); 650 void __user *uvalue = u64_to_user_ptr(attr->value); 651 int ufd = attr->map_fd; 652 struct bpf_map *map; 653 void *key, *value; 654 u32 value_size; 655 struct fd f; 656 int err; 657 658 if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM)) 659 return -EINVAL; 660 661 f = fdget(ufd); 662 map = __bpf_map_get(f); 663 if (IS_ERR(map)) 664 return PTR_ERR(map); 665 666 if (!(f.file->f_mode & FMODE_CAN_WRITE)) { 667 err = -EPERM; 668 goto err_put; 669 } 670 671 key = memdup_user(ukey, map->key_size); 672 if (IS_ERR(key)) { 673 err = PTR_ERR(key); 674 goto err_put; 675 } 676 677 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 678 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || 679 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) 680 value_size = round_up(map->value_size, 8) * num_possible_cpus(); 681 else 682 value_size = map->value_size; 683 684 err = -ENOMEM; 685 value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); 686 if (!value) 687 goto free_key; 688 689 err = -EFAULT; 690 if (copy_from_user(value, uvalue, value_size) != 0) 691 goto free_value; 692 693 /* Need to create a kthread, thus must support schedule */ 694 if (bpf_map_is_dev_bound(map)) { 695 err = bpf_map_offload_update_elem(map, key, value, attr->flags); 696 goto out; 697 } else if (map->map_type == BPF_MAP_TYPE_CPUMAP) { 698 err = map->ops->map_update_elem(map, key, value, attr->flags); 699 goto out; 700 } 701 702 /* must increment bpf_prog_active to avoid kprobe+bpf triggering from 703 * inside bpf map update or delete otherwise deadlocks are possible 704 */ 705 preempt_disable(); 706 __this_cpu_inc(bpf_prog_active); 707 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 708 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 709 err = bpf_percpu_hash_update(map, key, value, attr->flags); 710 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 711 err = bpf_percpu_array_update(map, key, value, attr->flags); 712 } else if (IS_FD_ARRAY(map)) { 713 rcu_read_lock(); 714 err = bpf_fd_array_map_update_elem(map, f.file, key, value, 715 attr->flags); 716 rcu_read_unlock(); 717 } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { 718 rcu_read_lock(); 719 err = bpf_fd_htab_map_update_elem(map, f.file, key, value, 720 attr->flags); 721 rcu_read_unlock(); 722 } else { 723 rcu_read_lock(); 724 err = map->ops->map_update_elem(map, key, value, attr->flags); 725 rcu_read_unlock(); 726 } 727 __this_cpu_dec(bpf_prog_active); 728 preempt_enable(); 729 out: 730 if (!err) 731 trace_bpf_map_update_elem(map, ufd, key, value); 732 free_value: 733 kfree(value); 734 free_key: 735 kfree(key); 736 err_put: 737 fdput(f); 738 return err; 739 } 740 741 #define BPF_MAP_DELETE_ELEM_LAST_FIELD key 742 743 static int map_delete_elem(union bpf_attr *attr) 744 { 745 void __user *ukey = u64_to_user_ptr(attr->key); 746 int ufd = attr->map_fd; 747 struct bpf_map *map; 748 struct fd f; 749 void *key; 750 int err; 751 752 if (CHECK_ATTR(BPF_MAP_DELETE_ELEM)) 753 return -EINVAL; 754 755 f = fdget(ufd); 756 map = __bpf_map_get(f); 757 if (IS_ERR(map)) 758 return PTR_ERR(map); 759 760 if (!(f.file->f_mode & FMODE_CAN_WRITE)) { 761 err = -EPERM; 762 goto err_put; 763 } 764 765 key = memdup_user(ukey, map->key_size); 766 if (IS_ERR(key)) { 767 err = PTR_ERR(key); 768 goto err_put; 769 } 770 771 if (bpf_map_is_dev_bound(map)) { 772 err = bpf_map_offload_delete_elem(map, key); 773 goto out; 774 } 775 776 preempt_disable(); 777 __this_cpu_inc(bpf_prog_active); 778 rcu_read_lock(); 779 err = map->ops->map_delete_elem(map, key); 780 rcu_read_unlock(); 781 __this_cpu_dec(bpf_prog_active); 782 preempt_enable(); 783 out: 784 if (!err) 785 trace_bpf_map_delete_elem(map, ufd, key); 786 kfree(key); 787 err_put: 788 fdput(f); 789 return err; 790 } 791 792 /* last field in 'union bpf_attr' used by this command */ 793 #define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key 794 795 static int map_get_next_key(union bpf_attr *attr) 796 { 797 void __user *ukey = u64_to_user_ptr(attr->key); 798 void __user *unext_key = u64_to_user_ptr(attr->next_key); 799 int ufd = attr->map_fd; 800 struct bpf_map *map; 801 void *key, *next_key; 802 struct fd f; 803 int err; 804 805 if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY)) 806 return -EINVAL; 807 808 f = fdget(ufd); 809 map = __bpf_map_get(f); 810 if (IS_ERR(map)) 811 return PTR_ERR(map); 812 813 if (!(f.file->f_mode & FMODE_CAN_READ)) { 814 err = -EPERM; 815 goto err_put; 816 } 817 818 if (ukey) { 819 key = memdup_user(ukey, map->key_size); 820 if (IS_ERR(key)) { 821 err = PTR_ERR(key); 822 goto err_put; 823 } 824 } else { 825 key = NULL; 826 } 827 828 err = -ENOMEM; 829 next_key = kmalloc(map->key_size, GFP_USER); 830 if (!next_key) 831 goto free_key; 832 833 if (bpf_map_is_dev_bound(map)) { 834 err = bpf_map_offload_get_next_key(map, key, next_key); 835 goto out; 836 } 837 838 rcu_read_lock(); 839 err = map->ops->map_get_next_key(map, key, next_key); 840 rcu_read_unlock(); 841 out: 842 if (err) 843 goto free_next_key; 844 845 err = -EFAULT; 846 if (copy_to_user(unext_key, next_key, map->key_size) != 0) 847 goto free_next_key; 848 849 trace_bpf_map_next_key(map, ufd, key, next_key); 850 err = 0; 851 852 free_next_key: 853 kfree(next_key); 854 free_key: 855 kfree(key); 856 err_put: 857 fdput(f); 858 return err; 859 } 860 861 static const struct bpf_prog_ops * const bpf_prog_types[] = { 862 #define BPF_PROG_TYPE(_id, _name) \ 863 [_id] = & _name ## _prog_ops, 864 #define BPF_MAP_TYPE(_id, _ops) 865 #include <linux/bpf_types.h> 866 #undef BPF_PROG_TYPE 867 #undef BPF_MAP_TYPE 868 }; 869 870 static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog) 871 { 872 if (type >= ARRAY_SIZE(bpf_prog_types) || !bpf_prog_types[type]) 873 return -EINVAL; 874 875 if (!bpf_prog_is_dev_bound(prog->aux)) 876 prog->aux->ops = bpf_prog_types[type]; 877 else 878 prog->aux->ops = &bpf_offload_prog_ops; 879 prog->type = type; 880 return 0; 881 } 882 883 /* drop refcnt on maps used by eBPF program and free auxilary data */ 884 static void free_used_maps(struct bpf_prog_aux *aux) 885 { 886 int i; 887 888 for (i = 0; i < aux->used_map_cnt; i++) 889 bpf_map_put(aux->used_maps[i]); 890 891 kfree(aux->used_maps); 892 } 893 894 int __bpf_prog_charge(struct user_struct *user, u32 pages) 895 { 896 unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 897 unsigned long user_bufs; 898 899 if (user) { 900 user_bufs = atomic_long_add_return(pages, &user->locked_vm); 901 if (user_bufs > memlock_limit) { 902 atomic_long_sub(pages, &user->locked_vm); 903 return -EPERM; 904 } 905 } 906 907 return 0; 908 } 909 910 void __bpf_prog_uncharge(struct user_struct *user, u32 pages) 911 { 912 if (user) 913 atomic_long_sub(pages, &user->locked_vm); 914 } 915 916 static int bpf_prog_charge_memlock(struct bpf_prog *prog) 917 { 918 struct user_struct *user = get_current_user(); 919 int ret; 920 921 ret = __bpf_prog_charge(user, prog->pages); 922 if (ret) { 923 free_uid(user); 924 return ret; 925 } 926 927 prog->aux->user = user; 928 return 0; 929 } 930 931 static void bpf_prog_uncharge_memlock(struct bpf_prog *prog) 932 { 933 struct user_struct *user = prog->aux->user; 934 935 __bpf_prog_uncharge(user, prog->pages); 936 free_uid(user); 937 } 938 939 static int bpf_prog_alloc_id(struct bpf_prog *prog) 940 { 941 int id; 942 943 spin_lock_bh(&prog_idr_lock); 944 id = idr_alloc_cyclic(&prog_idr, prog, 1, INT_MAX, GFP_ATOMIC); 945 if (id > 0) 946 prog->aux->id = id; 947 spin_unlock_bh(&prog_idr_lock); 948 949 /* id is in [1, INT_MAX) */ 950 if (WARN_ON_ONCE(!id)) 951 return -ENOSPC; 952 953 return id > 0 ? 0 : id; 954 } 955 956 void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock) 957 { 958 /* cBPF to eBPF migrations are currently not in the idr store. 959 * Offloaded programs are removed from the store when their device 960 * disappears - even if someone grabs an fd to them they are unusable, 961 * simply waiting for refcnt to drop to be freed. 962 */ 963 if (!prog->aux->id) 964 return; 965 966 if (do_idr_lock) 967 spin_lock_bh(&prog_idr_lock); 968 else 969 __acquire(&prog_idr_lock); 970 971 idr_remove(&prog_idr, prog->aux->id); 972 prog->aux->id = 0; 973 974 if (do_idr_lock) 975 spin_unlock_bh(&prog_idr_lock); 976 else 977 __release(&prog_idr_lock); 978 } 979 980 static void __bpf_prog_put_rcu(struct rcu_head *rcu) 981 { 982 struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu); 983 984 free_used_maps(aux); 985 bpf_prog_uncharge_memlock(aux->prog); 986 security_bpf_prog_free(aux); 987 bpf_prog_free(aux->prog); 988 } 989 990 static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock) 991 { 992 if (atomic_dec_and_test(&prog->aux->refcnt)) { 993 int i; 994 995 trace_bpf_prog_put_rcu(prog); 996 /* bpf_prog_free_id() must be called first */ 997 bpf_prog_free_id(prog, do_idr_lock); 998 999 for (i = 0; i < prog->aux->func_cnt; i++) 1000 bpf_prog_kallsyms_del(prog->aux->func[i]); 1001 bpf_prog_kallsyms_del(prog); 1002 1003 call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu); 1004 } 1005 } 1006 1007 void bpf_prog_put(struct bpf_prog *prog) 1008 { 1009 __bpf_prog_put(prog, true); 1010 } 1011 EXPORT_SYMBOL_GPL(bpf_prog_put); 1012 1013 static int bpf_prog_release(struct inode *inode, struct file *filp) 1014 { 1015 struct bpf_prog *prog = filp->private_data; 1016 1017 bpf_prog_put(prog); 1018 return 0; 1019 } 1020 1021 #ifdef CONFIG_PROC_FS 1022 static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) 1023 { 1024 const struct bpf_prog *prog = filp->private_data; 1025 char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; 1026 1027 bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); 1028 seq_printf(m, 1029 "prog_type:\t%u\n" 1030 "prog_jited:\t%u\n" 1031 "prog_tag:\t%s\n" 1032 "memlock:\t%llu\n", 1033 prog->type, 1034 prog->jited, 1035 prog_tag, 1036 prog->pages * 1ULL << PAGE_SHIFT); 1037 } 1038 #endif 1039 1040 const struct file_operations bpf_prog_fops = { 1041 #ifdef CONFIG_PROC_FS 1042 .show_fdinfo = bpf_prog_show_fdinfo, 1043 #endif 1044 .release = bpf_prog_release, 1045 .read = bpf_dummy_read, 1046 .write = bpf_dummy_write, 1047 }; 1048 1049 int bpf_prog_new_fd(struct bpf_prog *prog) 1050 { 1051 int ret; 1052 1053 ret = security_bpf_prog(prog); 1054 if (ret < 0) 1055 return ret; 1056 1057 return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, 1058 O_RDWR | O_CLOEXEC); 1059 } 1060 1061 static struct bpf_prog *____bpf_prog_get(struct fd f) 1062 { 1063 if (!f.file) 1064 return ERR_PTR(-EBADF); 1065 if (f.file->f_op != &bpf_prog_fops) { 1066 fdput(f); 1067 return ERR_PTR(-EINVAL); 1068 } 1069 1070 return f.file->private_data; 1071 } 1072 1073 struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i) 1074 { 1075 if (atomic_add_return(i, &prog->aux->refcnt) > BPF_MAX_REFCNT) { 1076 atomic_sub(i, &prog->aux->refcnt); 1077 return ERR_PTR(-EBUSY); 1078 } 1079 return prog; 1080 } 1081 EXPORT_SYMBOL_GPL(bpf_prog_add); 1082 1083 void bpf_prog_sub(struct bpf_prog *prog, int i) 1084 { 1085 /* Only to be used for undoing previous bpf_prog_add() in some 1086 * error path. We still know that another entity in our call 1087 * path holds a reference to the program, thus atomic_sub() can 1088 * be safely used in such cases! 1089 */ 1090 WARN_ON(atomic_sub_return(i, &prog->aux->refcnt) == 0); 1091 } 1092 EXPORT_SYMBOL_GPL(bpf_prog_sub); 1093 1094 struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog) 1095 { 1096 return bpf_prog_add(prog, 1); 1097 } 1098 EXPORT_SYMBOL_GPL(bpf_prog_inc); 1099 1100 /* prog_idr_lock should have been held */ 1101 struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog) 1102 { 1103 int refold; 1104 1105 refold = __atomic_add_unless(&prog->aux->refcnt, 1, 0); 1106 1107 if (refold >= BPF_MAX_REFCNT) { 1108 __bpf_prog_put(prog, false); 1109 return ERR_PTR(-EBUSY); 1110 } 1111 1112 if (!refold) 1113 return ERR_PTR(-ENOENT); 1114 1115 return prog; 1116 } 1117 EXPORT_SYMBOL_GPL(bpf_prog_inc_not_zero); 1118 1119 bool bpf_prog_get_ok(struct bpf_prog *prog, 1120 enum bpf_prog_type *attach_type, bool attach_drv) 1121 { 1122 /* not an attachment, just a refcount inc, always allow */ 1123 if (!attach_type) 1124 return true; 1125 1126 if (prog->type != *attach_type) 1127 return false; 1128 if (bpf_prog_is_dev_bound(prog->aux) && !attach_drv) 1129 return false; 1130 1131 return true; 1132 } 1133 1134 static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *attach_type, 1135 bool attach_drv) 1136 { 1137 struct fd f = fdget(ufd); 1138 struct bpf_prog *prog; 1139 1140 prog = ____bpf_prog_get(f); 1141 if (IS_ERR(prog)) 1142 return prog; 1143 if (!bpf_prog_get_ok(prog, attach_type, attach_drv)) { 1144 prog = ERR_PTR(-EINVAL); 1145 goto out; 1146 } 1147 1148 prog = bpf_prog_inc(prog); 1149 out: 1150 fdput(f); 1151 return prog; 1152 } 1153 1154 struct bpf_prog *bpf_prog_get(u32 ufd) 1155 { 1156 return __bpf_prog_get(ufd, NULL, false); 1157 } 1158 1159 struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type, 1160 bool attach_drv) 1161 { 1162 struct bpf_prog *prog = __bpf_prog_get(ufd, &type, attach_drv); 1163 1164 if (!IS_ERR(prog)) 1165 trace_bpf_prog_get_type(prog); 1166 return prog; 1167 } 1168 EXPORT_SYMBOL_GPL(bpf_prog_get_type_dev); 1169 1170 /* last field in 'union bpf_attr' used by this command */ 1171 #define BPF_PROG_LOAD_LAST_FIELD prog_ifindex 1172 1173 static int bpf_prog_load(union bpf_attr *attr) 1174 { 1175 enum bpf_prog_type type = attr->prog_type; 1176 struct bpf_prog *prog; 1177 int err; 1178 char license[128]; 1179 bool is_gpl; 1180 1181 if (CHECK_ATTR(BPF_PROG_LOAD)) 1182 return -EINVAL; 1183 1184 if (attr->prog_flags & ~BPF_F_STRICT_ALIGNMENT) 1185 return -EINVAL; 1186 1187 /* copy eBPF program license from user space */ 1188 if (strncpy_from_user(license, u64_to_user_ptr(attr->license), 1189 sizeof(license) - 1) < 0) 1190 return -EFAULT; 1191 license[sizeof(license) - 1] = 0; 1192 1193 /* eBPF programs must be GPL compatible to use GPL-ed functions */ 1194 is_gpl = license_is_gpl_compatible(license); 1195 1196 if (attr->insn_cnt == 0 || attr->insn_cnt > BPF_MAXINSNS) 1197 return -E2BIG; 1198 1199 if (type == BPF_PROG_TYPE_KPROBE && 1200 attr->kern_version != LINUX_VERSION_CODE) 1201 return -EINVAL; 1202 1203 if (type != BPF_PROG_TYPE_SOCKET_FILTER && 1204 type != BPF_PROG_TYPE_CGROUP_SKB && 1205 !capable(CAP_SYS_ADMIN)) 1206 return -EPERM; 1207 1208 /* plain bpf_prog allocation */ 1209 prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER); 1210 if (!prog) 1211 return -ENOMEM; 1212 1213 prog->aux->offload_requested = !!attr->prog_ifindex; 1214 1215 err = security_bpf_prog_alloc(prog->aux); 1216 if (err) 1217 goto free_prog_nouncharge; 1218 1219 err = bpf_prog_charge_memlock(prog); 1220 if (err) 1221 goto free_prog_sec; 1222 1223 prog->len = attr->insn_cnt; 1224 1225 err = -EFAULT; 1226 if (copy_from_user(prog->insns, u64_to_user_ptr(attr->insns), 1227 bpf_prog_insn_size(prog)) != 0) 1228 goto free_prog; 1229 1230 prog->orig_prog = NULL; 1231 prog->jited = 0; 1232 1233 atomic_set(&prog->aux->refcnt, 1); 1234 prog->gpl_compatible = is_gpl ? 1 : 0; 1235 1236 if (bpf_prog_is_dev_bound(prog->aux)) { 1237 err = bpf_prog_offload_init(prog, attr); 1238 if (err) 1239 goto free_prog; 1240 } 1241 1242 /* find program type: socket_filter vs tracing_filter */ 1243 err = find_prog_type(type, prog); 1244 if (err < 0) 1245 goto free_prog; 1246 1247 prog->aux->load_time = ktime_get_boot_ns(); 1248 err = bpf_obj_name_cpy(prog->aux->name, attr->prog_name); 1249 if (err) 1250 goto free_prog; 1251 1252 /* run eBPF verifier */ 1253 err = bpf_check(&prog, attr); 1254 if (err < 0) 1255 goto free_used_maps; 1256 1257 /* eBPF program is ready to be JITed */ 1258 if (!prog->bpf_func) 1259 prog = bpf_prog_select_runtime(prog, &err); 1260 if (err < 0) 1261 goto free_used_maps; 1262 1263 err = bpf_prog_alloc_id(prog); 1264 if (err) 1265 goto free_used_maps; 1266 1267 err = bpf_prog_new_fd(prog); 1268 if (err < 0) { 1269 /* failed to allocate fd. 1270 * bpf_prog_put() is needed because the above 1271 * bpf_prog_alloc_id() has published the prog 1272 * to the userspace and the userspace may 1273 * have refcnt-ed it through BPF_PROG_GET_FD_BY_ID. 1274 */ 1275 bpf_prog_put(prog); 1276 return err; 1277 } 1278 1279 bpf_prog_kallsyms_add(prog); 1280 trace_bpf_prog_load(prog, err); 1281 return err; 1282 1283 free_used_maps: 1284 free_used_maps(prog->aux); 1285 free_prog: 1286 bpf_prog_uncharge_memlock(prog); 1287 free_prog_sec: 1288 security_bpf_prog_free(prog->aux); 1289 free_prog_nouncharge: 1290 bpf_prog_free(prog); 1291 return err; 1292 } 1293 1294 #define BPF_OBJ_LAST_FIELD file_flags 1295 1296 static int bpf_obj_pin(const union bpf_attr *attr) 1297 { 1298 if (CHECK_ATTR(BPF_OBJ) || attr->file_flags != 0) 1299 return -EINVAL; 1300 1301 return bpf_obj_pin_user(attr->bpf_fd, u64_to_user_ptr(attr->pathname)); 1302 } 1303 1304 static int bpf_obj_get(const union bpf_attr *attr) 1305 { 1306 if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0 || 1307 attr->file_flags & ~BPF_OBJ_FLAG_MASK) 1308 return -EINVAL; 1309 1310 return bpf_obj_get_user(u64_to_user_ptr(attr->pathname), 1311 attr->file_flags); 1312 } 1313 1314 #ifdef CONFIG_CGROUP_BPF 1315 1316 #define BPF_PROG_ATTACH_LAST_FIELD attach_flags 1317 1318 static int sockmap_get_from_fd(const union bpf_attr *attr, bool attach) 1319 { 1320 struct bpf_prog *prog = NULL; 1321 int ufd = attr->target_fd; 1322 struct bpf_map *map; 1323 struct fd f; 1324 int err; 1325 1326 f = fdget(ufd); 1327 map = __bpf_map_get(f); 1328 if (IS_ERR(map)) 1329 return PTR_ERR(map); 1330 1331 if (attach) { 1332 prog = bpf_prog_get_type(attr->attach_bpf_fd, 1333 BPF_PROG_TYPE_SK_SKB); 1334 if (IS_ERR(prog)) { 1335 fdput(f); 1336 return PTR_ERR(prog); 1337 } 1338 } 1339 1340 err = sock_map_prog(map, prog, attr->attach_type); 1341 if (err) { 1342 fdput(f); 1343 if (prog) 1344 bpf_prog_put(prog); 1345 return err; 1346 } 1347 1348 fdput(f); 1349 return 0; 1350 } 1351 1352 #define BPF_F_ATTACH_MASK \ 1353 (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI) 1354 1355 static int bpf_prog_attach(const union bpf_attr *attr) 1356 { 1357 enum bpf_prog_type ptype; 1358 struct bpf_prog *prog; 1359 struct cgroup *cgrp; 1360 int ret; 1361 1362 if (!capable(CAP_NET_ADMIN)) 1363 return -EPERM; 1364 1365 if (CHECK_ATTR(BPF_PROG_ATTACH)) 1366 return -EINVAL; 1367 1368 if (attr->attach_flags & ~BPF_F_ATTACH_MASK) 1369 return -EINVAL; 1370 1371 switch (attr->attach_type) { 1372 case BPF_CGROUP_INET_INGRESS: 1373 case BPF_CGROUP_INET_EGRESS: 1374 ptype = BPF_PROG_TYPE_CGROUP_SKB; 1375 break; 1376 case BPF_CGROUP_INET_SOCK_CREATE: 1377 ptype = BPF_PROG_TYPE_CGROUP_SOCK; 1378 break; 1379 case BPF_CGROUP_SOCK_OPS: 1380 ptype = BPF_PROG_TYPE_SOCK_OPS; 1381 break; 1382 case BPF_CGROUP_DEVICE: 1383 ptype = BPF_PROG_TYPE_CGROUP_DEVICE; 1384 break; 1385 case BPF_SK_SKB_STREAM_PARSER: 1386 case BPF_SK_SKB_STREAM_VERDICT: 1387 return sockmap_get_from_fd(attr, true); 1388 default: 1389 return -EINVAL; 1390 } 1391 1392 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); 1393 if (IS_ERR(prog)) 1394 return PTR_ERR(prog); 1395 1396 cgrp = cgroup_get_from_fd(attr->target_fd); 1397 if (IS_ERR(cgrp)) { 1398 bpf_prog_put(prog); 1399 return PTR_ERR(cgrp); 1400 } 1401 1402 ret = cgroup_bpf_attach(cgrp, prog, attr->attach_type, 1403 attr->attach_flags); 1404 if (ret) 1405 bpf_prog_put(prog); 1406 cgroup_put(cgrp); 1407 1408 return ret; 1409 } 1410 1411 #define BPF_PROG_DETACH_LAST_FIELD attach_type 1412 1413 static int bpf_prog_detach(const union bpf_attr *attr) 1414 { 1415 enum bpf_prog_type ptype; 1416 struct bpf_prog *prog; 1417 struct cgroup *cgrp; 1418 int ret; 1419 1420 if (!capable(CAP_NET_ADMIN)) 1421 return -EPERM; 1422 1423 if (CHECK_ATTR(BPF_PROG_DETACH)) 1424 return -EINVAL; 1425 1426 switch (attr->attach_type) { 1427 case BPF_CGROUP_INET_INGRESS: 1428 case BPF_CGROUP_INET_EGRESS: 1429 ptype = BPF_PROG_TYPE_CGROUP_SKB; 1430 break; 1431 case BPF_CGROUP_INET_SOCK_CREATE: 1432 ptype = BPF_PROG_TYPE_CGROUP_SOCK; 1433 break; 1434 case BPF_CGROUP_SOCK_OPS: 1435 ptype = BPF_PROG_TYPE_SOCK_OPS; 1436 break; 1437 case BPF_CGROUP_DEVICE: 1438 ptype = BPF_PROG_TYPE_CGROUP_DEVICE; 1439 break; 1440 case BPF_SK_SKB_STREAM_PARSER: 1441 case BPF_SK_SKB_STREAM_VERDICT: 1442 return sockmap_get_from_fd(attr, false); 1443 default: 1444 return -EINVAL; 1445 } 1446 1447 cgrp = cgroup_get_from_fd(attr->target_fd); 1448 if (IS_ERR(cgrp)) 1449 return PTR_ERR(cgrp); 1450 1451 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); 1452 if (IS_ERR(prog)) 1453 prog = NULL; 1454 1455 ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type, 0); 1456 if (prog) 1457 bpf_prog_put(prog); 1458 cgroup_put(cgrp); 1459 return ret; 1460 } 1461 1462 #define BPF_PROG_QUERY_LAST_FIELD query.prog_cnt 1463 1464 static int bpf_prog_query(const union bpf_attr *attr, 1465 union bpf_attr __user *uattr) 1466 { 1467 struct cgroup *cgrp; 1468 int ret; 1469 1470 if (!capable(CAP_NET_ADMIN)) 1471 return -EPERM; 1472 if (CHECK_ATTR(BPF_PROG_QUERY)) 1473 return -EINVAL; 1474 if (attr->query.query_flags & ~BPF_F_QUERY_EFFECTIVE) 1475 return -EINVAL; 1476 1477 switch (attr->query.attach_type) { 1478 case BPF_CGROUP_INET_INGRESS: 1479 case BPF_CGROUP_INET_EGRESS: 1480 case BPF_CGROUP_INET_SOCK_CREATE: 1481 case BPF_CGROUP_SOCK_OPS: 1482 case BPF_CGROUP_DEVICE: 1483 break; 1484 default: 1485 return -EINVAL; 1486 } 1487 cgrp = cgroup_get_from_fd(attr->query.target_fd); 1488 if (IS_ERR(cgrp)) 1489 return PTR_ERR(cgrp); 1490 ret = cgroup_bpf_query(cgrp, attr, uattr); 1491 cgroup_put(cgrp); 1492 return ret; 1493 } 1494 #endif /* CONFIG_CGROUP_BPF */ 1495 1496 #define BPF_PROG_TEST_RUN_LAST_FIELD test.duration 1497 1498 static int bpf_prog_test_run(const union bpf_attr *attr, 1499 union bpf_attr __user *uattr) 1500 { 1501 struct bpf_prog *prog; 1502 int ret = -ENOTSUPP; 1503 1504 if (!capable(CAP_SYS_ADMIN)) 1505 return -EPERM; 1506 if (CHECK_ATTR(BPF_PROG_TEST_RUN)) 1507 return -EINVAL; 1508 1509 prog = bpf_prog_get(attr->test.prog_fd); 1510 if (IS_ERR(prog)) 1511 return PTR_ERR(prog); 1512 1513 if (prog->aux->ops->test_run) 1514 ret = prog->aux->ops->test_run(prog, attr, uattr); 1515 1516 bpf_prog_put(prog); 1517 return ret; 1518 } 1519 1520 #define BPF_OBJ_GET_NEXT_ID_LAST_FIELD next_id 1521 1522 static int bpf_obj_get_next_id(const union bpf_attr *attr, 1523 union bpf_attr __user *uattr, 1524 struct idr *idr, 1525 spinlock_t *lock) 1526 { 1527 u32 next_id = attr->start_id; 1528 int err = 0; 1529 1530 if (CHECK_ATTR(BPF_OBJ_GET_NEXT_ID) || next_id >= INT_MAX) 1531 return -EINVAL; 1532 1533 if (!capable(CAP_SYS_ADMIN)) 1534 return -EPERM; 1535 1536 next_id++; 1537 spin_lock_bh(lock); 1538 if (!idr_get_next(idr, &next_id)) 1539 err = -ENOENT; 1540 spin_unlock_bh(lock); 1541 1542 if (!err) 1543 err = put_user(next_id, &uattr->next_id); 1544 1545 return err; 1546 } 1547 1548 #define BPF_PROG_GET_FD_BY_ID_LAST_FIELD prog_id 1549 1550 static int bpf_prog_get_fd_by_id(const union bpf_attr *attr) 1551 { 1552 struct bpf_prog *prog; 1553 u32 id = attr->prog_id; 1554 int fd; 1555 1556 if (CHECK_ATTR(BPF_PROG_GET_FD_BY_ID)) 1557 return -EINVAL; 1558 1559 if (!capable(CAP_SYS_ADMIN)) 1560 return -EPERM; 1561 1562 spin_lock_bh(&prog_idr_lock); 1563 prog = idr_find(&prog_idr, id); 1564 if (prog) 1565 prog = bpf_prog_inc_not_zero(prog); 1566 else 1567 prog = ERR_PTR(-ENOENT); 1568 spin_unlock_bh(&prog_idr_lock); 1569 1570 if (IS_ERR(prog)) 1571 return PTR_ERR(prog); 1572 1573 fd = bpf_prog_new_fd(prog); 1574 if (fd < 0) 1575 bpf_prog_put(prog); 1576 1577 return fd; 1578 } 1579 1580 #define BPF_MAP_GET_FD_BY_ID_LAST_FIELD open_flags 1581 1582 static int bpf_map_get_fd_by_id(const union bpf_attr *attr) 1583 { 1584 struct bpf_map *map; 1585 u32 id = attr->map_id; 1586 int f_flags; 1587 int fd; 1588 1589 if (CHECK_ATTR(BPF_MAP_GET_FD_BY_ID) || 1590 attr->open_flags & ~BPF_OBJ_FLAG_MASK) 1591 return -EINVAL; 1592 1593 if (!capable(CAP_SYS_ADMIN)) 1594 return -EPERM; 1595 1596 f_flags = bpf_get_file_flag(attr->open_flags); 1597 if (f_flags < 0) 1598 return f_flags; 1599 1600 spin_lock_bh(&map_idr_lock); 1601 map = idr_find(&map_idr, id); 1602 if (map) 1603 map = bpf_map_inc_not_zero(map, true); 1604 else 1605 map = ERR_PTR(-ENOENT); 1606 spin_unlock_bh(&map_idr_lock); 1607 1608 if (IS_ERR(map)) 1609 return PTR_ERR(map); 1610 1611 fd = bpf_map_new_fd(map, f_flags); 1612 if (fd < 0) 1613 bpf_map_put(map); 1614 1615 return fd; 1616 } 1617 1618 static const struct bpf_map *bpf_map_from_imm(const struct bpf_prog *prog, 1619 unsigned long addr) 1620 { 1621 int i; 1622 1623 for (i = 0; i < prog->aux->used_map_cnt; i++) 1624 if (prog->aux->used_maps[i] == (void *)addr) 1625 return prog->aux->used_maps[i]; 1626 return NULL; 1627 } 1628 1629 static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog) 1630 { 1631 const struct bpf_map *map; 1632 struct bpf_insn *insns; 1633 u64 imm; 1634 int i; 1635 1636 insns = kmemdup(prog->insnsi, bpf_prog_insn_size(prog), 1637 GFP_USER); 1638 if (!insns) 1639 return insns; 1640 1641 for (i = 0; i < prog->len; i++) { 1642 if (insns[i].code == (BPF_JMP | BPF_TAIL_CALL)) { 1643 insns[i].code = BPF_JMP | BPF_CALL; 1644 insns[i].imm = BPF_FUNC_tail_call; 1645 /* fall-through */ 1646 } 1647 if (insns[i].code == (BPF_JMP | BPF_CALL) || 1648 insns[i].code == (BPF_JMP | BPF_CALL_ARGS)) { 1649 if (insns[i].code == (BPF_JMP | BPF_CALL_ARGS)) 1650 insns[i].code = BPF_JMP | BPF_CALL; 1651 if (!bpf_dump_raw_ok()) 1652 insns[i].imm = 0; 1653 continue; 1654 } 1655 1656 if (insns[i].code != (BPF_LD | BPF_IMM | BPF_DW)) 1657 continue; 1658 1659 imm = ((u64)insns[i + 1].imm << 32) | (u32)insns[i].imm; 1660 map = bpf_map_from_imm(prog, imm); 1661 if (map) { 1662 insns[i].src_reg = BPF_PSEUDO_MAP_FD; 1663 insns[i].imm = map->id; 1664 insns[i + 1].imm = 0; 1665 continue; 1666 } 1667 1668 if (!bpf_dump_raw_ok() && 1669 imm == (unsigned long)prog->aux) { 1670 insns[i].imm = 0; 1671 insns[i + 1].imm = 0; 1672 continue; 1673 } 1674 } 1675 1676 return insns; 1677 } 1678 1679 static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, 1680 const union bpf_attr *attr, 1681 union bpf_attr __user *uattr) 1682 { 1683 struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info); 1684 struct bpf_prog_info info = {}; 1685 u32 info_len = attr->info.info_len; 1686 char __user *uinsns; 1687 u32 ulen; 1688 int err; 1689 1690 err = check_uarg_tail_zero(uinfo, sizeof(info), info_len); 1691 if (err) 1692 return err; 1693 info_len = min_t(u32, sizeof(info), info_len); 1694 1695 if (copy_from_user(&info, uinfo, info_len)) 1696 return -EFAULT; 1697 1698 info.type = prog->type; 1699 info.id = prog->aux->id; 1700 info.load_time = prog->aux->load_time; 1701 info.created_by_uid = from_kuid_munged(current_user_ns(), 1702 prog->aux->user->uid); 1703 1704 memcpy(info.tag, prog->tag, sizeof(prog->tag)); 1705 memcpy(info.name, prog->aux->name, sizeof(prog->aux->name)); 1706 1707 ulen = info.nr_map_ids; 1708 info.nr_map_ids = prog->aux->used_map_cnt; 1709 ulen = min_t(u32, info.nr_map_ids, ulen); 1710 if (ulen) { 1711 u32 __user *user_map_ids = u64_to_user_ptr(info.map_ids); 1712 u32 i; 1713 1714 for (i = 0; i < ulen; i++) 1715 if (put_user(prog->aux->used_maps[i]->id, 1716 &user_map_ids[i])) 1717 return -EFAULT; 1718 } 1719 1720 if (!capable(CAP_SYS_ADMIN)) { 1721 info.jited_prog_len = 0; 1722 info.xlated_prog_len = 0; 1723 goto done; 1724 } 1725 1726 ulen = info.xlated_prog_len; 1727 info.xlated_prog_len = bpf_prog_insn_size(prog); 1728 if (info.xlated_prog_len && ulen) { 1729 struct bpf_insn *insns_sanitized; 1730 bool fault; 1731 1732 if (prog->blinded && !bpf_dump_raw_ok()) { 1733 info.xlated_prog_insns = 0; 1734 goto done; 1735 } 1736 insns_sanitized = bpf_insn_prepare_dump(prog); 1737 if (!insns_sanitized) 1738 return -ENOMEM; 1739 uinsns = u64_to_user_ptr(info.xlated_prog_insns); 1740 ulen = min_t(u32, info.xlated_prog_len, ulen); 1741 fault = copy_to_user(uinsns, insns_sanitized, ulen); 1742 kfree(insns_sanitized); 1743 if (fault) 1744 return -EFAULT; 1745 } 1746 1747 if (bpf_prog_is_dev_bound(prog->aux)) { 1748 err = bpf_prog_offload_info_fill(&info, prog); 1749 if (err) 1750 return err; 1751 goto done; 1752 } 1753 1754 /* NOTE: the following code is supposed to be skipped for offload. 1755 * bpf_prog_offload_info_fill() is the place to fill similar fields 1756 * for offload. 1757 */ 1758 ulen = info.jited_prog_len; 1759 info.jited_prog_len = prog->jited_len; 1760 if (info.jited_prog_len && ulen) { 1761 if (bpf_dump_raw_ok()) { 1762 uinsns = u64_to_user_ptr(info.jited_prog_insns); 1763 ulen = min_t(u32, info.jited_prog_len, ulen); 1764 if (copy_to_user(uinsns, prog->bpf_func, ulen)) 1765 return -EFAULT; 1766 } else { 1767 info.jited_prog_insns = 0; 1768 } 1769 } 1770 1771 done: 1772 if (copy_to_user(uinfo, &info, info_len) || 1773 put_user(info_len, &uattr->info.info_len)) 1774 return -EFAULT; 1775 1776 return 0; 1777 } 1778 1779 static int bpf_map_get_info_by_fd(struct bpf_map *map, 1780 const union bpf_attr *attr, 1781 union bpf_attr __user *uattr) 1782 { 1783 struct bpf_map_info __user *uinfo = u64_to_user_ptr(attr->info.info); 1784 struct bpf_map_info info = {}; 1785 u32 info_len = attr->info.info_len; 1786 int err; 1787 1788 err = check_uarg_tail_zero(uinfo, sizeof(info), info_len); 1789 if (err) 1790 return err; 1791 info_len = min_t(u32, sizeof(info), info_len); 1792 1793 info.type = map->map_type; 1794 info.id = map->id; 1795 info.key_size = map->key_size; 1796 info.value_size = map->value_size; 1797 info.max_entries = map->max_entries; 1798 info.map_flags = map->map_flags; 1799 memcpy(info.name, map->name, sizeof(map->name)); 1800 1801 if (bpf_map_is_dev_bound(map)) { 1802 err = bpf_map_offload_info_fill(&info, map); 1803 if (err) 1804 return err; 1805 } 1806 1807 if (copy_to_user(uinfo, &info, info_len) || 1808 put_user(info_len, &uattr->info.info_len)) 1809 return -EFAULT; 1810 1811 return 0; 1812 } 1813 1814 #define BPF_OBJ_GET_INFO_BY_FD_LAST_FIELD info.info 1815 1816 static int bpf_obj_get_info_by_fd(const union bpf_attr *attr, 1817 union bpf_attr __user *uattr) 1818 { 1819 int ufd = attr->info.bpf_fd; 1820 struct fd f; 1821 int err; 1822 1823 if (CHECK_ATTR(BPF_OBJ_GET_INFO_BY_FD)) 1824 return -EINVAL; 1825 1826 f = fdget(ufd); 1827 if (!f.file) 1828 return -EBADFD; 1829 1830 if (f.file->f_op == &bpf_prog_fops) 1831 err = bpf_prog_get_info_by_fd(f.file->private_data, attr, 1832 uattr); 1833 else if (f.file->f_op == &bpf_map_fops) 1834 err = bpf_map_get_info_by_fd(f.file->private_data, attr, 1835 uattr); 1836 else 1837 err = -EINVAL; 1838 1839 fdput(f); 1840 return err; 1841 } 1842 1843 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) 1844 { 1845 union bpf_attr attr = {}; 1846 int err; 1847 1848 if (!capable(CAP_SYS_ADMIN) && sysctl_unprivileged_bpf_disabled) 1849 return -EPERM; 1850 1851 err = check_uarg_tail_zero(uattr, sizeof(attr), size); 1852 if (err) 1853 return err; 1854 size = min_t(u32, size, sizeof(attr)); 1855 1856 /* copy attributes from user space, may be less than sizeof(bpf_attr) */ 1857 if (copy_from_user(&attr, uattr, size) != 0) 1858 return -EFAULT; 1859 1860 err = security_bpf(cmd, &attr, size); 1861 if (err < 0) 1862 return err; 1863 1864 switch (cmd) { 1865 case BPF_MAP_CREATE: 1866 err = map_create(&attr); 1867 break; 1868 case BPF_MAP_LOOKUP_ELEM: 1869 err = map_lookup_elem(&attr); 1870 break; 1871 case BPF_MAP_UPDATE_ELEM: 1872 err = map_update_elem(&attr); 1873 break; 1874 case BPF_MAP_DELETE_ELEM: 1875 err = map_delete_elem(&attr); 1876 break; 1877 case BPF_MAP_GET_NEXT_KEY: 1878 err = map_get_next_key(&attr); 1879 break; 1880 case BPF_PROG_LOAD: 1881 err = bpf_prog_load(&attr); 1882 break; 1883 case BPF_OBJ_PIN: 1884 err = bpf_obj_pin(&attr); 1885 break; 1886 case BPF_OBJ_GET: 1887 err = bpf_obj_get(&attr); 1888 break; 1889 #ifdef CONFIG_CGROUP_BPF 1890 case BPF_PROG_ATTACH: 1891 err = bpf_prog_attach(&attr); 1892 break; 1893 case BPF_PROG_DETACH: 1894 err = bpf_prog_detach(&attr); 1895 break; 1896 case BPF_PROG_QUERY: 1897 err = bpf_prog_query(&attr, uattr); 1898 break; 1899 #endif 1900 case BPF_PROG_TEST_RUN: 1901 err = bpf_prog_test_run(&attr, uattr); 1902 break; 1903 case BPF_PROG_GET_NEXT_ID: 1904 err = bpf_obj_get_next_id(&attr, uattr, 1905 &prog_idr, &prog_idr_lock); 1906 break; 1907 case BPF_MAP_GET_NEXT_ID: 1908 err = bpf_obj_get_next_id(&attr, uattr, 1909 &map_idr, &map_idr_lock); 1910 break; 1911 case BPF_PROG_GET_FD_BY_ID: 1912 err = bpf_prog_get_fd_by_id(&attr); 1913 break; 1914 case BPF_MAP_GET_FD_BY_ID: 1915 err = bpf_map_get_fd_by_id(&attr); 1916 break; 1917 case BPF_OBJ_GET_INFO_BY_FD: 1918 err = bpf_obj_get_info_by_fd(&attr, uattr); 1919 break; 1920 default: 1921 err = -EINVAL; 1922 break; 1923 } 1924 1925 return err; 1926 } 1927