1 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 2 * 3 * This program is free software; you can redistribute it and/or 4 * modify it under the terms of version 2 of the GNU General Public 5 * License as published by the Free Software Foundation. 6 * 7 * This program is distributed in the hope that it will be useful, but 8 * WITHOUT ANY WARRANTY; without even the implied warranty of 9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 10 * General Public License for more details. 11 */ 12 #include <linux/bpf.h> 13 #include <linux/bpf_trace.h> 14 #include <linux/syscalls.h> 15 #include <linux/slab.h> 16 #include <linux/sched/signal.h> 17 #include <linux/vmalloc.h> 18 #include <linux/mmzone.h> 19 #include <linux/anon_inodes.h> 20 #include <linux/file.h> 21 #include <linux/license.h> 22 #include <linux/filter.h> 23 #include <linux/version.h> 24 #include <linux/kernel.h> 25 #include <linux/idr.h> 26 #include <linux/cred.h> 27 #include <linux/timekeeping.h> 28 #include <linux/ctype.h> 29 30 #define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY || \ 31 (map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \ 32 (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \ 33 (map)->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) 34 #define IS_FD_HASH(map) ((map)->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) 35 #define IS_FD_MAP(map) (IS_FD_ARRAY(map) || IS_FD_HASH(map)) 36 37 #define BPF_OBJ_FLAG_MASK (BPF_F_RDONLY | BPF_F_WRONLY) 38 39 DEFINE_PER_CPU(int, bpf_prog_active); 40 static DEFINE_IDR(prog_idr); 41 static DEFINE_SPINLOCK(prog_idr_lock); 42 static DEFINE_IDR(map_idr); 43 static DEFINE_SPINLOCK(map_idr_lock); 44 45 int sysctl_unprivileged_bpf_disabled __read_mostly; 46 47 static const struct bpf_map_ops * const bpf_map_types[] = { 48 #define BPF_PROG_TYPE(_id, _ops) 49 #define BPF_MAP_TYPE(_id, _ops) \ 50 [_id] = &_ops, 51 #include <linux/bpf_types.h> 52 #undef BPF_PROG_TYPE 53 #undef BPF_MAP_TYPE 54 }; 55 56 /* 57 * If we're handed a bigger struct than we know of, ensure all the unknown bits 58 * are 0 - i.e. new user-space does not rely on any kernel feature extensions 59 * we don't know about yet. 60 * 61 * There is a ToCToU between this function call and the following 62 * copy_from_user() call. However, this is not a concern since this function is 63 * meant to be a future-proofing of bits. 64 */ 65 static int check_uarg_tail_zero(void __user *uaddr, 66 size_t expected_size, 67 size_t actual_size) 68 { 69 unsigned char __user *addr; 70 unsigned char __user *end; 71 unsigned char val; 72 int err; 73 74 if (unlikely(actual_size > PAGE_SIZE)) /* silly large */ 75 return -E2BIG; 76 77 if (unlikely(!access_ok(VERIFY_READ, uaddr, actual_size))) 78 return -EFAULT; 79 80 if (actual_size <= expected_size) 81 return 0; 82 83 addr = uaddr + expected_size; 84 end = uaddr + actual_size; 85 86 for (; addr < end; addr++) { 87 err = get_user(val, addr); 88 if (err) 89 return err; 90 if (val) 91 return -E2BIG; 92 } 93 94 return 0; 95 } 96 97 static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) 98 { 99 struct bpf_map *map; 100 101 if (attr->map_type >= ARRAY_SIZE(bpf_map_types) || 102 !bpf_map_types[attr->map_type]) 103 return ERR_PTR(-EINVAL); 104 105 map = bpf_map_types[attr->map_type]->map_alloc(attr); 106 if (IS_ERR(map)) 107 return map; 108 map->ops = bpf_map_types[attr->map_type]; 109 map->map_type = attr->map_type; 110 return map; 111 } 112 113 void *bpf_map_area_alloc(size_t size, int numa_node) 114 { 115 /* We definitely need __GFP_NORETRY, so OOM killer doesn't 116 * trigger under memory pressure as we really just want to 117 * fail instead. 118 */ 119 const gfp_t flags = __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO; 120 void *area; 121 122 if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { 123 area = kmalloc_node(size, GFP_USER | flags, numa_node); 124 if (area != NULL) 125 return area; 126 } 127 128 return __vmalloc_node_flags_caller(size, numa_node, GFP_KERNEL | flags, 129 __builtin_return_address(0)); 130 } 131 132 void bpf_map_area_free(void *area) 133 { 134 kvfree(area); 135 } 136 137 int bpf_map_precharge_memlock(u32 pages) 138 { 139 struct user_struct *user = get_current_user(); 140 unsigned long memlock_limit, cur; 141 142 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 143 cur = atomic_long_read(&user->locked_vm); 144 free_uid(user); 145 if (cur + pages > memlock_limit) 146 return -EPERM; 147 return 0; 148 } 149 150 static int bpf_map_charge_memlock(struct bpf_map *map) 151 { 152 struct user_struct *user = get_current_user(); 153 unsigned long memlock_limit; 154 155 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 156 157 atomic_long_add(map->pages, &user->locked_vm); 158 159 if (atomic_long_read(&user->locked_vm) > memlock_limit) { 160 atomic_long_sub(map->pages, &user->locked_vm); 161 free_uid(user); 162 return -EPERM; 163 } 164 map->user = user; 165 return 0; 166 } 167 168 static void bpf_map_uncharge_memlock(struct bpf_map *map) 169 { 170 struct user_struct *user = map->user; 171 172 atomic_long_sub(map->pages, &user->locked_vm); 173 free_uid(user); 174 } 175 176 static int bpf_map_alloc_id(struct bpf_map *map) 177 { 178 int id; 179 180 spin_lock_bh(&map_idr_lock); 181 id = idr_alloc_cyclic(&map_idr, map, 1, INT_MAX, GFP_ATOMIC); 182 if (id > 0) 183 map->id = id; 184 spin_unlock_bh(&map_idr_lock); 185 186 if (WARN_ON_ONCE(!id)) 187 return -ENOSPC; 188 189 return id > 0 ? 0 : id; 190 } 191 192 static void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock) 193 { 194 unsigned long flags; 195 196 if (do_idr_lock) 197 spin_lock_irqsave(&map_idr_lock, flags); 198 else 199 __acquire(&map_idr_lock); 200 201 idr_remove(&map_idr, map->id); 202 203 if (do_idr_lock) 204 spin_unlock_irqrestore(&map_idr_lock, flags); 205 else 206 __release(&map_idr_lock); 207 } 208 209 /* called from workqueue */ 210 static void bpf_map_free_deferred(struct work_struct *work) 211 { 212 struct bpf_map *map = container_of(work, struct bpf_map, work); 213 214 bpf_map_uncharge_memlock(map); 215 security_bpf_map_free(map); 216 /* implementation dependent freeing */ 217 map->ops->map_free(map); 218 } 219 220 static void bpf_map_put_uref(struct bpf_map *map) 221 { 222 if (atomic_dec_and_test(&map->usercnt)) { 223 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) 224 bpf_fd_array_map_clear(map); 225 } 226 } 227 228 /* decrement map refcnt and schedule it for freeing via workqueue 229 * (unrelying map implementation ops->map_free() might sleep) 230 */ 231 static void __bpf_map_put(struct bpf_map *map, bool do_idr_lock) 232 { 233 if (atomic_dec_and_test(&map->refcnt)) { 234 /* bpf_map_free_id() must be called first */ 235 bpf_map_free_id(map, do_idr_lock); 236 INIT_WORK(&map->work, bpf_map_free_deferred); 237 schedule_work(&map->work); 238 } 239 } 240 241 void bpf_map_put(struct bpf_map *map) 242 { 243 __bpf_map_put(map, true); 244 } 245 246 void bpf_map_put_with_uref(struct bpf_map *map) 247 { 248 bpf_map_put_uref(map); 249 bpf_map_put(map); 250 } 251 252 static int bpf_map_release(struct inode *inode, struct file *filp) 253 { 254 struct bpf_map *map = filp->private_data; 255 256 if (map->ops->map_release) 257 map->ops->map_release(map, filp); 258 259 bpf_map_put_with_uref(map); 260 return 0; 261 } 262 263 #ifdef CONFIG_PROC_FS 264 static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) 265 { 266 const struct bpf_map *map = filp->private_data; 267 const struct bpf_array *array; 268 u32 owner_prog_type = 0; 269 u32 owner_jited = 0; 270 271 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) { 272 array = container_of(map, struct bpf_array, map); 273 owner_prog_type = array->owner_prog_type; 274 owner_jited = array->owner_jited; 275 } 276 277 seq_printf(m, 278 "map_type:\t%u\n" 279 "key_size:\t%u\n" 280 "value_size:\t%u\n" 281 "max_entries:\t%u\n" 282 "map_flags:\t%#x\n" 283 "memlock:\t%llu\n", 284 map->map_type, 285 map->key_size, 286 map->value_size, 287 map->max_entries, 288 map->map_flags, 289 map->pages * 1ULL << PAGE_SHIFT); 290 291 if (owner_prog_type) { 292 seq_printf(m, "owner_prog_type:\t%u\n", 293 owner_prog_type); 294 seq_printf(m, "owner_jited:\t%u\n", 295 owner_jited); 296 } 297 } 298 #endif 299 300 static ssize_t bpf_dummy_read(struct file *filp, char __user *buf, size_t siz, 301 loff_t *ppos) 302 { 303 /* We need this handler such that alloc_file() enables 304 * f_mode with FMODE_CAN_READ. 305 */ 306 return -EINVAL; 307 } 308 309 static ssize_t bpf_dummy_write(struct file *filp, const char __user *buf, 310 size_t siz, loff_t *ppos) 311 { 312 /* We need this handler such that alloc_file() enables 313 * f_mode with FMODE_CAN_WRITE. 314 */ 315 return -EINVAL; 316 } 317 318 const struct file_operations bpf_map_fops = { 319 #ifdef CONFIG_PROC_FS 320 .show_fdinfo = bpf_map_show_fdinfo, 321 #endif 322 .release = bpf_map_release, 323 .read = bpf_dummy_read, 324 .write = bpf_dummy_write, 325 }; 326 327 int bpf_map_new_fd(struct bpf_map *map, int flags) 328 { 329 int ret; 330 331 ret = security_bpf_map(map, OPEN_FMODE(flags)); 332 if (ret < 0) 333 return ret; 334 335 return anon_inode_getfd("bpf-map", &bpf_map_fops, map, 336 flags | O_CLOEXEC); 337 } 338 339 int bpf_get_file_flag(int flags) 340 { 341 if ((flags & BPF_F_RDONLY) && (flags & BPF_F_WRONLY)) 342 return -EINVAL; 343 if (flags & BPF_F_RDONLY) 344 return O_RDONLY; 345 if (flags & BPF_F_WRONLY) 346 return O_WRONLY; 347 return O_RDWR; 348 } 349 350 /* helper macro to check that unused fields 'union bpf_attr' are zero */ 351 #define CHECK_ATTR(CMD) \ 352 memchr_inv((void *) &attr->CMD##_LAST_FIELD + \ 353 sizeof(attr->CMD##_LAST_FIELD), 0, \ 354 sizeof(*attr) - \ 355 offsetof(union bpf_attr, CMD##_LAST_FIELD) - \ 356 sizeof(attr->CMD##_LAST_FIELD)) != NULL 357 358 /* dst and src must have at least BPF_OBJ_NAME_LEN number of bytes. 359 * Return 0 on success and < 0 on error. 360 */ 361 static int bpf_obj_name_cpy(char *dst, const char *src) 362 { 363 const char *end = src + BPF_OBJ_NAME_LEN; 364 365 memset(dst, 0, BPF_OBJ_NAME_LEN); 366 367 /* Copy all isalnum() and '_' char */ 368 while (src < end && *src) { 369 if (!isalnum(*src) && *src != '_') 370 return -EINVAL; 371 *dst++ = *src++; 372 } 373 374 /* No '\0' found in BPF_OBJ_NAME_LEN number of bytes */ 375 if (src == end) 376 return -EINVAL; 377 378 return 0; 379 } 380 381 #define BPF_MAP_CREATE_LAST_FIELD map_name 382 /* called via syscall */ 383 static int map_create(union bpf_attr *attr) 384 { 385 int numa_node = bpf_map_attr_numa_node(attr); 386 struct bpf_map *map; 387 int f_flags; 388 int err; 389 390 err = CHECK_ATTR(BPF_MAP_CREATE); 391 if (err) 392 return -EINVAL; 393 394 f_flags = bpf_get_file_flag(attr->map_flags); 395 if (f_flags < 0) 396 return f_flags; 397 398 if (numa_node != NUMA_NO_NODE && 399 ((unsigned int)numa_node >= nr_node_ids || 400 !node_online(numa_node))) 401 return -EINVAL; 402 403 /* find map type and init map: hashtable vs rbtree vs bloom vs ... */ 404 map = find_and_alloc_map(attr); 405 if (IS_ERR(map)) 406 return PTR_ERR(map); 407 408 err = bpf_obj_name_cpy(map->name, attr->map_name); 409 if (err) 410 goto free_map_nouncharge; 411 412 atomic_set(&map->refcnt, 1); 413 atomic_set(&map->usercnt, 1); 414 415 err = security_bpf_map_alloc(map); 416 if (err) 417 goto free_map_nouncharge; 418 419 err = bpf_map_charge_memlock(map); 420 if (err) 421 goto free_map_sec; 422 423 err = bpf_map_alloc_id(map); 424 if (err) 425 goto free_map; 426 427 err = bpf_map_new_fd(map, f_flags); 428 if (err < 0) { 429 /* failed to allocate fd. 430 * bpf_map_put() is needed because the above 431 * bpf_map_alloc_id() has published the map 432 * to the userspace and the userspace may 433 * have refcnt-ed it through BPF_MAP_GET_FD_BY_ID. 434 */ 435 bpf_map_put(map); 436 return err; 437 } 438 439 trace_bpf_map_create(map, err); 440 return err; 441 442 free_map: 443 bpf_map_uncharge_memlock(map); 444 free_map_sec: 445 security_bpf_map_free(map); 446 free_map_nouncharge: 447 map->ops->map_free(map); 448 return err; 449 } 450 451 /* if error is returned, fd is released. 452 * On success caller should complete fd access with matching fdput() 453 */ 454 struct bpf_map *__bpf_map_get(struct fd f) 455 { 456 if (!f.file) 457 return ERR_PTR(-EBADF); 458 if (f.file->f_op != &bpf_map_fops) { 459 fdput(f); 460 return ERR_PTR(-EINVAL); 461 } 462 463 return f.file->private_data; 464 } 465 466 /* prog's and map's refcnt limit */ 467 #define BPF_MAX_REFCNT 32768 468 469 struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref) 470 { 471 if (atomic_inc_return(&map->refcnt) > BPF_MAX_REFCNT) { 472 atomic_dec(&map->refcnt); 473 return ERR_PTR(-EBUSY); 474 } 475 if (uref) 476 atomic_inc(&map->usercnt); 477 return map; 478 } 479 480 struct bpf_map *bpf_map_get_with_uref(u32 ufd) 481 { 482 struct fd f = fdget(ufd); 483 struct bpf_map *map; 484 485 map = __bpf_map_get(f); 486 if (IS_ERR(map)) 487 return map; 488 489 map = bpf_map_inc(map, true); 490 fdput(f); 491 492 return map; 493 } 494 495 /* map_idr_lock should have been held */ 496 static struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map, 497 bool uref) 498 { 499 int refold; 500 501 refold = __atomic_add_unless(&map->refcnt, 1, 0); 502 503 if (refold >= BPF_MAX_REFCNT) { 504 __bpf_map_put(map, false); 505 return ERR_PTR(-EBUSY); 506 } 507 508 if (!refold) 509 return ERR_PTR(-ENOENT); 510 511 if (uref) 512 atomic_inc(&map->usercnt); 513 514 return map; 515 } 516 517 int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value) 518 { 519 return -ENOTSUPP; 520 } 521 522 /* last field in 'union bpf_attr' used by this command */ 523 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value 524 525 static int map_lookup_elem(union bpf_attr *attr) 526 { 527 void __user *ukey = u64_to_user_ptr(attr->key); 528 void __user *uvalue = u64_to_user_ptr(attr->value); 529 int ufd = attr->map_fd; 530 struct bpf_map *map; 531 void *key, *value, *ptr; 532 u32 value_size; 533 struct fd f; 534 int err; 535 536 if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM)) 537 return -EINVAL; 538 539 f = fdget(ufd); 540 map = __bpf_map_get(f); 541 if (IS_ERR(map)) 542 return PTR_ERR(map); 543 544 if (!(f.file->f_mode & FMODE_CAN_READ)) { 545 err = -EPERM; 546 goto err_put; 547 } 548 549 key = memdup_user(ukey, map->key_size); 550 if (IS_ERR(key)) { 551 err = PTR_ERR(key); 552 goto err_put; 553 } 554 555 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 556 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || 557 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) 558 value_size = round_up(map->value_size, 8) * num_possible_cpus(); 559 else if (IS_FD_MAP(map)) 560 value_size = sizeof(u32); 561 else 562 value_size = map->value_size; 563 564 err = -ENOMEM; 565 value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); 566 if (!value) 567 goto free_key; 568 569 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 570 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 571 err = bpf_percpu_hash_copy(map, key, value); 572 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 573 err = bpf_percpu_array_copy(map, key, value); 574 } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) { 575 err = bpf_stackmap_copy(map, key, value); 576 } else if (IS_FD_ARRAY(map)) { 577 err = bpf_fd_array_map_lookup_elem(map, key, value); 578 } else if (IS_FD_HASH(map)) { 579 err = bpf_fd_htab_map_lookup_elem(map, key, value); 580 } else { 581 rcu_read_lock(); 582 ptr = map->ops->map_lookup_elem(map, key); 583 if (ptr) 584 memcpy(value, ptr, value_size); 585 rcu_read_unlock(); 586 err = ptr ? 0 : -ENOENT; 587 } 588 589 if (err) 590 goto free_value; 591 592 err = -EFAULT; 593 if (copy_to_user(uvalue, value, value_size) != 0) 594 goto free_value; 595 596 trace_bpf_map_lookup_elem(map, ufd, key, value); 597 err = 0; 598 599 free_value: 600 kfree(value); 601 free_key: 602 kfree(key); 603 err_put: 604 fdput(f); 605 return err; 606 } 607 608 #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags 609 610 static int map_update_elem(union bpf_attr *attr) 611 { 612 void __user *ukey = u64_to_user_ptr(attr->key); 613 void __user *uvalue = u64_to_user_ptr(attr->value); 614 int ufd = attr->map_fd; 615 struct bpf_map *map; 616 void *key, *value; 617 u32 value_size; 618 struct fd f; 619 int err; 620 621 if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM)) 622 return -EINVAL; 623 624 f = fdget(ufd); 625 map = __bpf_map_get(f); 626 if (IS_ERR(map)) 627 return PTR_ERR(map); 628 629 if (!(f.file->f_mode & FMODE_CAN_WRITE)) { 630 err = -EPERM; 631 goto err_put; 632 } 633 634 key = memdup_user(ukey, map->key_size); 635 if (IS_ERR(key)) { 636 err = PTR_ERR(key); 637 goto err_put; 638 } 639 640 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 641 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || 642 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) 643 value_size = round_up(map->value_size, 8) * num_possible_cpus(); 644 else 645 value_size = map->value_size; 646 647 err = -ENOMEM; 648 value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); 649 if (!value) 650 goto free_key; 651 652 err = -EFAULT; 653 if (copy_from_user(value, uvalue, value_size) != 0) 654 goto free_value; 655 656 /* Need to create a kthread, thus must support schedule */ 657 if (map->map_type == BPF_MAP_TYPE_CPUMAP) { 658 err = map->ops->map_update_elem(map, key, value, attr->flags); 659 goto out; 660 } 661 662 /* must increment bpf_prog_active to avoid kprobe+bpf triggering from 663 * inside bpf map update or delete otherwise deadlocks are possible 664 */ 665 preempt_disable(); 666 __this_cpu_inc(bpf_prog_active); 667 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 668 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 669 err = bpf_percpu_hash_update(map, key, value, attr->flags); 670 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 671 err = bpf_percpu_array_update(map, key, value, attr->flags); 672 } else if (map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || 673 map->map_type == BPF_MAP_TYPE_PROG_ARRAY || 674 map->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || 675 map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) { 676 rcu_read_lock(); 677 err = bpf_fd_array_map_update_elem(map, f.file, key, value, 678 attr->flags); 679 rcu_read_unlock(); 680 } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { 681 rcu_read_lock(); 682 err = bpf_fd_htab_map_update_elem(map, f.file, key, value, 683 attr->flags); 684 rcu_read_unlock(); 685 } else { 686 rcu_read_lock(); 687 err = map->ops->map_update_elem(map, key, value, attr->flags); 688 rcu_read_unlock(); 689 } 690 __this_cpu_dec(bpf_prog_active); 691 preempt_enable(); 692 out: 693 if (!err) 694 trace_bpf_map_update_elem(map, ufd, key, value); 695 free_value: 696 kfree(value); 697 free_key: 698 kfree(key); 699 err_put: 700 fdput(f); 701 return err; 702 } 703 704 #define BPF_MAP_DELETE_ELEM_LAST_FIELD key 705 706 static int map_delete_elem(union bpf_attr *attr) 707 { 708 void __user *ukey = u64_to_user_ptr(attr->key); 709 int ufd = attr->map_fd; 710 struct bpf_map *map; 711 struct fd f; 712 void *key; 713 int err; 714 715 if (CHECK_ATTR(BPF_MAP_DELETE_ELEM)) 716 return -EINVAL; 717 718 f = fdget(ufd); 719 map = __bpf_map_get(f); 720 if (IS_ERR(map)) 721 return PTR_ERR(map); 722 723 if (!(f.file->f_mode & FMODE_CAN_WRITE)) { 724 err = -EPERM; 725 goto err_put; 726 } 727 728 key = memdup_user(ukey, map->key_size); 729 if (IS_ERR(key)) { 730 err = PTR_ERR(key); 731 goto err_put; 732 } 733 734 preempt_disable(); 735 __this_cpu_inc(bpf_prog_active); 736 rcu_read_lock(); 737 err = map->ops->map_delete_elem(map, key); 738 rcu_read_unlock(); 739 __this_cpu_dec(bpf_prog_active); 740 preempt_enable(); 741 742 if (!err) 743 trace_bpf_map_delete_elem(map, ufd, key); 744 kfree(key); 745 err_put: 746 fdput(f); 747 return err; 748 } 749 750 /* last field in 'union bpf_attr' used by this command */ 751 #define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key 752 753 static int map_get_next_key(union bpf_attr *attr) 754 { 755 void __user *ukey = u64_to_user_ptr(attr->key); 756 void __user *unext_key = u64_to_user_ptr(attr->next_key); 757 int ufd = attr->map_fd; 758 struct bpf_map *map; 759 void *key, *next_key; 760 struct fd f; 761 int err; 762 763 if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY)) 764 return -EINVAL; 765 766 f = fdget(ufd); 767 map = __bpf_map_get(f); 768 if (IS_ERR(map)) 769 return PTR_ERR(map); 770 771 if (!(f.file->f_mode & FMODE_CAN_READ)) { 772 err = -EPERM; 773 goto err_put; 774 } 775 776 if (ukey) { 777 key = memdup_user(ukey, map->key_size); 778 if (IS_ERR(key)) { 779 err = PTR_ERR(key); 780 goto err_put; 781 } 782 } else { 783 key = NULL; 784 } 785 786 err = -ENOMEM; 787 next_key = kmalloc(map->key_size, GFP_USER); 788 if (!next_key) 789 goto free_key; 790 791 rcu_read_lock(); 792 err = map->ops->map_get_next_key(map, key, next_key); 793 rcu_read_unlock(); 794 if (err) 795 goto free_next_key; 796 797 err = -EFAULT; 798 if (copy_to_user(unext_key, next_key, map->key_size) != 0) 799 goto free_next_key; 800 801 trace_bpf_map_next_key(map, ufd, key, next_key); 802 err = 0; 803 804 free_next_key: 805 kfree(next_key); 806 free_key: 807 kfree(key); 808 err_put: 809 fdput(f); 810 return err; 811 } 812 813 static const struct bpf_prog_ops * const bpf_prog_types[] = { 814 #define BPF_PROG_TYPE(_id, _name) \ 815 [_id] = & _name ## _prog_ops, 816 #define BPF_MAP_TYPE(_id, _ops) 817 #include <linux/bpf_types.h> 818 #undef BPF_PROG_TYPE 819 #undef BPF_MAP_TYPE 820 }; 821 822 static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog) 823 { 824 if (type >= ARRAY_SIZE(bpf_prog_types) || !bpf_prog_types[type]) 825 return -EINVAL; 826 827 if (!bpf_prog_is_dev_bound(prog->aux)) 828 prog->aux->ops = bpf_prog_types[type]; 829 else 830 prog->aux->ops = &bpf_offload_prog_ops; 831 prog->type = type; 832 return 0; 833 } 834 835 /* drop refcnt on maps used by eBPF program and free auxilary data */ 836 static void free_used_maps(struct bpf_prog_aux *aux) 837 { 838 int i; 839 840 for (i = 0; i < aux->used_map_cnt; i++) 841 bpf_map_put(aux->used_maps[i]); 842 843 kfree(aux->used_maps); 844 } 845 846 int __bpf_prog_charge(struct user_struct *user, u32 pages) 847 { 848 unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 849 unsigned long user_bufs; 850 851 if (user) { 852 user_bufs = atomic_long_add_return(pages, &user->locked_vm); 853 if (user_bufs > memlock_limit) { 854 atomic_long_sub(pages, &user->locked_vm); 855 return -EPERM; 856 } 857 } 858 859 return 0; 860 } 861 862 void __bpf_prog_uncharge(struct user_struct *user, u32 pages) 863 { 864 if (user) 865 atomic_long_sub(pages, &user->locked_vm); 866 } 867 868 static int bpf_prog_charge_memlock(struct bpf_prog *prog) 869 { 870 struct user_struct *user = get_current_user(); 871 int ret; 872 873 ret = __bpf_prog_charge(user, prog->pages); 874 if (ret) { 875 free_uid(user); 876 return ret; 877 } 878 879 prog->aux->user = user; 880 return 0; 881 } 882 883 static void bpf_prog_uncharge_memlock(struct bpf_prog *prog) 884 { 885 struct user_struct *user = prog->aux->user; 886 887 __bpf_prog_uncharge(user, prog->pages); 888 free_uid(user); 889 } 890 891 static int bpf_prog_alloc_id(struct bpf_prog *prog) 892 { 893 int id; 894 895 spin_lock_bh(&prog_idr_lock); 896 id = idr_alloc_cyclic(&prog_idr, prog, 1, INT_MAX, GFP_ATOMIC); 897 if (id > 0) 898 prog->aux->id = id; 899 spin_unlock_bh(&prog_idr_lock); 900 901 /* id is in [1, INT_MAX) */ 902 if (WARN_ON_ONCE(!id)) 903 return -ENOSPC; 904 905 return id > 0 ? 0 : id; 906 } 907 908 void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock) 909 { 910 /* cBPF to eBPF migrations are currently not in the idr store. 911 * Offloaded programs are removed from the store when their device 912 * disappears - even if someone grabs an fd to them they are unusable, 913 * simply waiting for refcnt to drop to be freed. 914 */ 915 if (!prog->aux->id) 916 return; 917 918 if (do_idr_lock) 919 spin_lock_bh(&prog_idr_lock); 920 else 921 __acquire(&prog_idr_lock); 922 923 idr_remove(&prog_idr, prog->aux->id); 924 prog->aux->id = 0; 925 926 if (do_idr_lock) 927 spin_unlock_bh(&prog_idr_lock); 928 else 929 __release(&prog_idr_lock); 930 } 931 932 static void __bpf_prog_put_rcu(struct rcu_head *rcu) 933 { 934 struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu); 935 936 free_used_maps(aux); 937 bpf_prog_uncharge_memlock(aux->prog); 938 security_bpf_prog_free(aux); 939 bpf_prog_free(aux->prog); 940 } 941 942 static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock) 943 { 944 if (atomic_dec_and_test(&prog->aux->refcnt)) { 945 int i; 946 947 trace_bpf_prog_put_rcu(prog); 948 /* bpf_prog_free_id() must be called first */ 949 bpf_prog_free_id(prog, do_idr_lock); 950 951 for (i = 0; i < prog->aux->func_cnt; i++) 952 bpf_prog_kallsyms_del(prog->aux->func[i]); 953 bpf_prog_kallsyms_del(prog); 954 955 call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu); 956 } 957 } 958 959 void bpf_prog_put(struct bpf_prog *prog) 960 { 961 __bpf_prog_put(prog, true); 962 } 963 EXPORT_SYMBOL_GPL(bpf_prog_put); 964 965 static int bpf_prog_release(struct inode *inode, struct file *filp) 966 { 967 struct bpf_prog *prog = filp->private_data; 968 969 bpf_prog_put(prog); 970 return 0; 971 } 972 973 #ifdef CONFIG_PROC_FS 974 static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) 975 { 976 const struct bpf_prog *prog = filp->private_data; 977 char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; 978 979 bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); 980 seq_printf(m, 981 "prog_type:\t%u\n" 982 "prog_jited:\t%u\n" 983 "prog_tag:\t%s\n" 984 "memlock:\t%llu\n", 985 prog->type, 986 prog->jited, 987 prog_tag, 988 prog->pages * 1ULL << PAGE_SHIFT); 989 } 990 #endif 991 992 const struct file_operations bpf_prog_fops = { 993 #ifdef CONFIG_PROC_FS 994 .show_fdinfo = bpf_prog_show_fdinfo, 995 #endif 996 .release = bpf_prog_release, 997 .read = bpf_dummy_read, 998 .write = bpf_dummy_write, 999 }; 1000 1001 int bpf_prog_new_fd(struct bpf_prog *prog) 1002 { 1003 int ret; 1004 1005 ret = security_bpf_prog(prog); 1006 if (ret < 0) 1007 return ret; 1008 1009 return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, 1010 O_RDWR | O_CLOEXEC); 1011 } 1012 1013 static struct bpf_prog *____bpf_prog_get(struct fd f) 1014 { 1015 if (!f.file) 1016 return ERR_PTR(-EBADF); 1017 if (f.file->f_op != &bpf_prog_fops) { 1018 fdput(f); 1019 return ERR_PTR(-EINVAL); 1020 } 1021 1022 return f.file->private_data; 1023 } 1024 1025 struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i) 1026 { 1027 if (atomic_add_return(i, &prog->aux->refcnt) > BPF_MAX_REFCNT) { 1028 atomic_sub(i, &prog->aux->refcnt); 1029 return ERR_PTR(-EBUSY); 1030 } 1031 return prog; 1032 } 1033 EXPORT_SYMBOL_GPL(bpf_prog_add); 1034 1035 void bpf_prog_sub(struct bpf_prog *prog, int i) 1036 { 1037 /* Only to be used for undoing previous bpf_prog_add() in some 1038 * error path. We still know that another entity in our call 1039 * path holds a reference to the program, thus atomic_sub() can 1040 * be safely used in such cases! 1041 */ 1042 WARN_ON(atomic_sub_return(i, &prog->aux->refcnt) == 0); 1043 } 1044 EXPORT_SYMBOL_GPL(bpf_prog_sub); 1045 1046 struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog) 1047 { 1048 return bpf_prog_add(prog, 1); 1049 } 1050 EXPORT_SYMBOL_GPL(bpf_prog_inc); 1051 1052 /* prog_idr_lock should have been held */ 1053 struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog) 1054 { 1055 int refold; 1056 1057 refold = __atomic_add_unless(&prog->aux->refcnt, 1, 0); 1058 1059 if (refold >= BPF_MAX_REFCNT) { 1060 __bpf_prog_put(prog, false); 1061 return ERR_PTR(-EBUSY); 1062 } 1063 1064 if (!refold) 1065 return ERR_PTR(-ENOENT); 1066 1067 return prog; 1068 } 1069 EXPORT_SYMBOL_GPL(bpf_prog_inc_not_zero); 1070 1071 static bool bpf_prog_get_ok(struct bpf_prog *prog, 1072 enum bpf_prog_type *attach_type, bool attach_drv) 1073 { 1074 /* not an attachment, just a refcount inc, always allow */ 1075 if (!attach_type) 1076 return true; 1077 1078 if (prog->type != *attach_type) 1079 return false; 1080 if (bpf_prog_is_dev_bound(prog->aux) && !attach_drv) 1081 return false; 1082 1083 return true; 1084 } 1085 1086 static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *attach_type, 1087 bool attach_drv) 1088 { 1089 struct fd f = fdget(ufd); 1090 struct bpf_prog *prog; 1091 1092 prog = ____bpf_prog_get(f); 1093 if (IS_ERR(prog)) 1094 return prog; 1095 if (!bpf_prog_get_ok(prog, attach_type, attach_drv)) { 1096 prog = ERR_PTR(-EINVAL); 1097 goto out; 1098 } 1099 1100 prog = bpf_prog_inc(prog); 1101 out: 1102 fdput(f); 1103 return prog; 1104 } 1105 1106 struct bpf_prog *bpf_prog_get(u32 ufd) 1107 { 1108 return __bpf_prog_get(ufd, NULL, false); 1109 } 1110 1111 struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type, 1112 bool attach_drv) 1113 { 1114 struct bpf_prog *prog = __bpf_prog_get(ufd, &type, attach_drv); 1115 1116 if (!IS_ERR(prog)) 1117 trace_bpf_prog_get_type(prog); 1118 return prog; 1119 } 1120 EXPORT_SYMBOL_GPL(bpf_prog_get_type_dev); 1121 1122 /* last field in 'union bpf_attr' used by this command */ 1123 #define BPF_PROG_LOAD_LAST_FIELD prog_ifindex 1124 1125 static int bpf_prog_load(union bpf_attr *attr) 1126 { 1127 enum bpf_prog_type type = attr->prog_type; 1128 struct bpf_prog *prog; 1129 int err; 1130 char license[128]; 1131 bool is_gpl; 1132 1133 if (CHECK_ATTR(BPF_PROG_LOAD)) 1134 return -EINVAL; 1135 1136 if (attr->prog_flags & ~BPF_F_STRICT_ALIGNMENT) 1137 return -EINVAL; 1138 1139 /* copy eBPF program license from user space */ 1140 if (strncpy_from_user(license, u64_to_user_ptr(attr->license), 1141 sizeof(license) - 1) < 0) 1142 return -EFAULT; 1143 license[sizeof(license) - 1] = 0; 1144 1145 /* eBPF programs must be GPL compatible to use GPL-ed functions */ 1146 is_gpl = license_is_gpl_compatible(license); 1147 1148 if (attr->insn_cnt == 0 || attr->insn_cnt > BPF_MAXINSNS) 1149 return -E2BIG; 1150 1151 if (type == BPF_PROG_TYPE_KPROBE && 1152 attr->kern_version != LINUX_VERSION_CODE) 1153 return -EINVAL; 1154 1155 if (type != BPF_PROG_TYPE_SOCKET_FILTER && 1156 type != BPF_PROG_TYPE_CGROUP_SKB && 1157 !capable(CAP_SYS_ADMIN)) 1158 return -EPERM; 1159 1160 /* plain bpf_prog allocation */ 1161 prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER); 1162 if (!prog) 1163 return -ENOMEM; 1164 1165 prog->aux->offload_requested = !!attr->prog_ifindex; 1166 1167 err = security_bpf_prog_alloc(prog->aux); 1168 if (err) 1169 goto free_prog_nouncharge; 1170 1171 err = bpf_prog_charge_memlock(prog); 1172 if (err) 1173 goto free_prog_sec; 1174 1175 prog->len = attr->insn_cnt; 1176 1177 err = -EFAULT; 1178 if (copy_from_user(prog->insns, u64_to_user_ptr(attr->insns), 1179 bpf_prog_insn_size(prog)) != 0) 1180 goto free_prog; 1181 1182 prog->orig_prog = NULL; 1183 prog->jited = 0; 1184 1185 atomic_set(&prog->aux->refcnt, 1); 1186 prog->gpl_compatible = is_gpl ? 1 : 0; 1187 1188 if (bpf_prog_is_dev_bound(prog->aux)) { 1189 err = bpf_prog_offload_init(prog, attr); 1190 if (err) 1191 goto free_prog; 1192 } 1193 1194 /* find program type: socket_filter vs tracing_filter */ 1195 err = find_prog_type(type, prog); 1196 if (err < 0) 1197 goto free_prog; 1198 1199 prog->aux->load_time = ktime_get_boot_ns(); 1200 err = bpf_obj_name_cpy(prog->aux->name, attr->prog_name); 1201 if (err) 1202 goto free_prog; 1203 1204 /* run eBPF verifier */ 1205 err = bpf_check(&prog, attr); 1206 if (err < 0) 1207 goto free_used_maps; 1208 1209 /* eBPF program is ready to be JITed */ 1210 if (!prog->bpf_func) 1211 prog = bpf_prog_select_runtime(prog, &err); 1212 if (err < 0) 1213 goto free_used_maps; 1214 1215 err = bpf_prog_alloc_id(prog); 1216 if (err) 1217 goto free_used_maps; 1218 1219 err = bpf_prog_new_fd(prog); 1220 if (err < 0) { 1221 /* failed to allocate fd. 1222 * bpf_prog_put() is needed because the above 1223 * bpf_prog_alloc_id() has published the prog 1224 * to the userspace and the userspace may 1225 * have refcnt-ed it through BPF_PROG_GET_FD_BY_ID. 1226 */ 1227 bpf_prog_put(prog); 1228 return err; 1229 } 1230 1231 bpf_prog_kallsyms_add(prog); 1232 trace_bpf_prog_load(prog, err); 1233 return err; 1234 1235 free_used_maps: 1236 free_used_maps(prog->aux); 1237 free_prog: 1238 bpf_prog_uncharge_memlock(prog); 1239 free_prog_sec: 1240 security_bpf_prog_free(prog->aux); 1241 free_prog_nouncharge: 1242 bpf_prog_free(prog); 1243 return err; 1244 } 1245 1246 #define BPF_OBJ_LAST_FIELD file_flags 1247 1248 static int bpf_obj_pin(const union bpf_attr *attr) 1249 { 1250 if (CHECK_ATTR(BPF_OBJ) || attr->file_flags != 0) 1251 return -EINVAL; 1252 1253 return bpf_obj_pin_user(attr->bpf_fd, u64_to_user_ptr(attr->pathname)); 1254 } 1255 1256 static int bpf_obj_get(const union bpf_attr *attr) 1257 { 1258 if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0 || 1259 attr->file_flags & ~BPF_OBJ_FLAG_MASK) 1260 return -EINVAL; 1261 1262 return bpf_obj_get_user(u64_to_user_ptr(attr->pathname), 1263 attr->file_flags); 1264 } 1265 1266 #ifdef CONFIG_CGROUP_BPF 1267 1268 #define BPF_PROG_ATTACH_LAST_FIELD attach_flags 1269 1270 static int sockmap_get_from_fd(const union bpf_attr *attr, bool attach) 1271 { 1272 struct bpf_prog *prog = NULL; 1273 int ufd = attr->target_fd; 1274 struct bpf_map *map; 1275 struct fd f; 1276 int err; 1277 1278 f = fdget(ufd); 1279 map = __bpf_map_get(f); 1280 if (IS_ERR(map)) 1281 return PTR_ERR(map); 1282 1283 if (attach) { 1284 prog = bpf_prog_get_type(attr->attach_bpf_fd, 1285 BPF_PROG_TYPE_SK_SKB); 1286 if (IS_ERR(prog)) { 1287 fdput(f); 1288 return PTR_ERR(prog); 1289 } 1290 } 1291 1292 err = sock_map_prog(map, prog, attr->attach_type); 1293 if (err) { 1294 fdput(f); 1295 if (prog) 1296 bpf_prog_put(prog); 1297 return err; 1298 } 1299 1300 fdput(f); 1301 return 0; 1302 } 1303 1304 #define BPF_F_ATTACH_MASK \ 1305 (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI) 1306 1307 static int bpf_prog_attach(const union bpf_attr *attr) 1308 { 1309 enum bpf_prog_type ptype; 1310 struct bpf_prog *prog; 1311 struct cgroup *cgrp; 1312 int ret; 1313 1314 if (!capable(CAP_NET_ADMIN)) 1315 return -EPERM; 1316 1317 if (CHECK_ATTR(BPF_PROG_ATTACH)) 1318 return -EINVAL; 1319 1320 if (attr->attach_flags & ~BPF_F_ATTACH_MASK) 1321 return -EINVAL; 1322 1323 switch (attr->attach_type) { 1324 case BPF_CGROUP_INET_INGRESS: 1325 case BPF_CGROUP_INET_EGRESS: 1326 ptype = BPF_PROG_TYPE_CGROUP_SKB; 1327 break; 1328 case BPF_CGROUP_INET_SOCK_CREATE: 1329 ptype = BPF_PROG_TYPE_CGROUP_SOCK; 1330 break; 1331 case BPF_CGROUP_SOCK_OPS: 1332 ptype = BPF_PROG_TYPE_SOCK_OPS; 1333 break; 1334 case BPF_CGROUP_DEVICE: 1335 ptype = BPF_PROG_TYPE_CGROUP_DEVICE; 1336 break; 1337 case BPF_SK_SKB_STREAM_PARSER: 1338 case BPF_SK_SKB_STREAM_VERDICT: 1339 return sockmap_get_from_fd(attr, true); 1340 default: 1341 return -EINVAL; 1342 } 1343 1344 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); 1345 if (IS_ERR(prog)) 1346 return PTR_ERR(prog); 1347 1348 cgrp = cgroup_get_from_fd(attr->target_fd); 1349 if (IS_ERR(cgrp)) { 1350 bpf_prog_put(prog); 1351 return PTR_ERR(cgrp); 1352 } 1353 1354 ret = cgroup_bpf_attach(cgrp, prog, attr->attach_type, 1355 attr->attach_flags); 1356 if (ret) 1357 bpf_prog_put(prog); 1358 cgroup_put(cgrp); 1359 1360 return ret; 1361 } 1362 1363 #define BPF_PROG_DETACH_LAST_FIELD attach_type 1364 1365 static int bpf_prog_detach(const union bpf_attr *attr) 1366 { 1367 enum bpf_prog_type ptype; 1368 struct bpf_prog *prog; 1369 struct cgroup *cgrp; 1370 int ret; 1371 1372 if (!capable(CAP_NET_ADMIN)) 1373 return -EPERM; 1374 1375 if (CHECK_ATTR(BPF_PROG_DETACH)) 1376 return -EINVAL; 1377 1378 switch (attr->attach_type) { 1379 case BPF_CGROUP_INET_INGRESS: 1380 case BPF_CGROUP_INET_EGRESS: 1381 ptype = BPF_PROG_TYPE_CGROUP_SKB; 1382 break; 1383 case BPF_CGROUP_INET_SOCK_CREATE: 1384 ptype = BPF_PROG_TYPE_CGROUP_SOCK; 1385 break; 1386 case BPF_CGROUP_SOCK_OPS: 1387 ptype = BPF_PROG_TYPE_SOCK_OPS; 1388 break; 1389 case BPF_CGROUP_DEVICE: 1390 ptype = BPF_PROG_TYPE_CGROUP_DEVICE; 1391 break; 1392 case BPF_SK_SKB_STREAM_PARSER: 1393 case BPF_SK_SKB_STREAM_VERDICT: 1394 return sockmap_get_from_fd(attr, false); 1395 default: 1396 return -EINVAL; 1397 } 1398 1399 cgrp = cgroup_get_from_fd(attr->target_fd); 1400 if (IS_ERR(cgrp)) 1401 return PTR_ERR(cgrp); 1402 1403 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); 1404 if (IS_ERR(prog)) 1405 prog = NULL; 1406 1407 ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type, 0); 1408 if (prog) 1409 bpf_prog_put(prog); 1410 cgroup_put(cgrp); 1411 return ret; 1412 } 1413 1414 #define BPF_PROG_QUERY_LAST_FIELD query.prog_cnt 1415 1416 static int bpf_prog_query(const union bpf_attr *attr, 1417 union bpf_attr __user *uattr) 1418 { 1419 struct cgroup *cgrp; 1420 int ret; 1421 1422 if (!capable(CAP_NET_ADMIN)) 1423 return -EPERM; 1424 if (CHECK_ATTR(BPF_PROG_QUERY)) 1425 return -EINVAL; 1426 if (attr->query.query_flags & ~BPF_F_QUERY_EFFECTIVE) 1427 return -EINVAL; 1428 1429 switch (attr->query.attach_type) { 1430 case BPF_CGROUP_INET_INGRESS: 1431 case BPF_CGROUP_INET_EGRESS: 1432 case BPF_CGROUP_INET_SOCK_CREATE: 1433 case BPF_CGROUP_SOCK_OPS: 1434 case BPF_CGROUP_DEVICE: 1435 break; 1436 default: 1437 return -EINVAL; 1438 } 1439 cgrp = cgroup_get_from_fd(attr->query.target_fd); 1440 if (IS_ERR(cgrp)) 1441 return PTR_ERR(cgrp); 1442 ret = cgroup_bpf_query(cgrp, attr, uattr); 1443 cgroup_put(cgrp); 1444 return ret; 1445 } 1446 #endif /* CONFIG_CGROUP_BPF */ 1447 1448 #define BPF_PROG_TEST_RUN_LAST_FIELD test.duration 1449 1450 static int bpf_prog_test_run(const union bpf_attr *attr, 1451 union bpf_attr __user *uattr) 1452 { 1453 struct bpf_prog *prog; 1454 int ret = -ENOTSUPP; 1455 1456 if (CHECK_ATTR(BPF_PROG_TEST_RUN)) 1457 return -EINVAL; 1458 1459 prog = bpf_prog_get(attr->test.prog_fd); 1460 if (IS_ERR(prog)) 1461 return PTR_ERR(prog); 1462 1463 if (prog->aux->ops->test_run) 1464 ret = prog->aux->ops->test_run(prog, attr, uattr); 1465 1466 bpf_prog_put(prog); 1467 return ret; 1468 } 1469 1470 #define BPF_OBJ_GET_NEXT_ID_LAST_FIELD next_id 1471 1472 static int bpf_obj_get_next_id(const union bpf_attr *attr, 1473 union bpf_attr __user *uattr, 1474 struct idr *idr, 1475 spinlock_t *lock) 1476 { 1477 u32 next_id = attr->start_id; 1478 int err = 0; 1479 1480 if (CHECK_ATTR(BPF_OBJ_GET_NEXT_ID) || next_id >= INT_MAX) 1481 return -EINVAL; 1482 1483 if (!capable(CAP_SYS_ADMIN)) 1484 return -EPERM; 1485 1486 next_id++; 1487 spin_lock_bh(lock); 1488 if (!idr_get_next(idr, &next_id)) 1489 err = -ENOENT; 1490 spin_unlock_bh(lock); 1491 1492 if (!err) 1493 err = put_user(next_id, &uattr->next_id); 1494 1495 return err; 1496 } 1497 1498 #define BPF_PROG_GET_FD_BY_ID_LAST_FIELD prog_id 1499 1500 static int bpf_prog_get_fd_by_id(const union bpf_attr *attr) 1501 { 1502 struct bpf_prog *prog; 1503 u32 id = attr->prog_id; 1504 int fd; 1505 1506 if (CHECK_ATTR(BPF_PROG_GET_FD_BY_ID)) 1507 return -EINVAL; 1508 1509 if (!capable(CAP_SYS_ADMIN)) 1510 return -EPERM; 1511 1512 spin_lock_bh(&prog_idr_lock); 1513 prog = idr_find(&prog_idr, id); 1514 if (prog) 1515 prog = bpf_prog_inc_not_zero(prog); 1516 else 1517 prog = ERR_PTR(-ENOENT); 1518 spin_unlock_bh(&prog_idr_lock); 1519 1520 if (IS_ERR(prog)) 1521 return PTR_ERR(prog); 1522 1523 fd = bpf_prog_new_fd(prog); 1524 if (fd < 0) 1525 bpf_prog_put(prog); 1526 1527 return fd; 1528 } 1529 1530 #define BPF_MAP_GET_FD_BY_ID_LAST_FIELD open_flags 1531 1532 static int bpf_map_get_fd_by_id(const union bpf_attr *attr) 1533 { 1534 struct bpf_map *map; 1535 u32 id = attr->map_id; 1536 int f_flags; 1537 int fd; 1538 1539 if (CHECK_ATTR(BPF_MAP_GET_FD_BY_ID) || 1540 attr->open_flags & ~BPF_OBJ_FLAG_MASK) 1541 return -EINVAL; 1542 1543 if (!capable(CAP_SYS_ADMIN)) 1544 return -EPERM; 1545 1546 f_flags = bpf_get_file_flag(attr->open_flags); 1547 if (f_flags < 0) 1548 return f_flags; 1549 1550 spin_lock_bh(&map_idr_lock); 1551 map = idr_find(&map_idr, id); 1552 if (map) 1553 map = bpf_map_inc_not_zero(map, true); 1554 else 1555 map = ERR_PTR(-ENOENT); 1556 spin_unlock_bh(&map_idr_lock); 1557 1558 if (IS_ERR(map)) 1559 return PTR_ERR(map); 1560 1561 fd = bpf_map_new_fd(map, f_flags); 1562 if (fd < 0) 1563 bpf_map_put(map); 1564 1565 return fd; 1566 } 1567 1568 static const struct bpf_map *bpf_map_from_imm(const struct bpf_prog *prog, 1569 unsigned long addr) 1570 { 1571 int i; 1572 1573 for (i = 0; i < prog->aux->used_map_cnt; i++) 1574 if (prog->aux->used_maps[i] == (void *)addr) 1575 return prog->aux->used_maps[i]; 1576 return NULL; 1577 } 1578 1579 static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog) 1580 { 1581 const struct bpf_map *map; 1582 struct bpf_insn *insns; 1583 u64 imm; 1584 int i; 1585 1586 insns = kmemdup(prog->insnsi, bpf_prog_insn_size(prog), 1587 GFP_USER); 1588 if (!insns) 1589 return insns; 1590 1591 for (i = 0; i < prog->len; i++) { 1592 if (insns[i].code == (BPF_JMP | BPF_TAIL_CALL)) { 1593 insns[i].code = BPF_JMP | BPF_CALL; 1594 insns[i].imm = BPF_FUNC_tail_call; 1595 /* fall-through */ 1596 } 1597 if (insns[i].code == (BPF_JMP | BPF_CALL) || 1598 insns[i].code == (BPF_JMP | BPF_CALL_ARGS)) { 1599 if (insns[i].code == (BPF_JMP | BPF_CALL_ARGS)) 1600 insns[i].code = BPF_JMP | BPF_CALL; 1601 if (!bpf_dump_raw_ok()) 1602 insns[i].imm = 0; 1603 continue; 1604 } 1605 1606 if (insns[i].code != (BPF_LD | BPF_IMM | BPF_DW)) 1607 continue; 1608 1609 imm = ((u64)insns[i + 1].imm << 32) | (u32)insns[i].imm; 1610 map = bpf_map_from_imm(prog, imm); 1611 if (map) { 1612 insns[i].src_reg = BPF_PSEUDO_MAP_FD; 1613 insns[i].imm = map->id; 1614 insns[i + 1].imm = 0; 1615 continue; 1616 } 1617 1618 if (!bpf_dump_raw_ok() && 1619 imm == (unsigned long)prog->aux) { 1620 insns[i].imm = 0; 1621 insns[i + 1].imm = 0; 1622 continue; 1623 } 1624 } 1625 1626 return insns; 1627 } 1628 1629 static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, 1630 const union bpf_attr *attr, 1631 union bpf_attr __user *uattr) 1632 { 1633 struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info); 1634 struct bpf_prog_info info = {}; 1635 u32 info_len = attr->info.info_len; 1636 char __user *uinsns; 1637 u32 ulen; 1638 int err; 1639 1640 err = check_uarg_tail_zero(uinfo, sizeof(info), info_len); 1641 if (err) 1642 return err; 1643 info_len = min_t(u32, sizeof(info), info_len); 1644 1645 if (copy_from_user(&info, uinfo, info_len)) 1646 return -EFAULT; 1647 1648 info.type = prog->type; 1649 info.id = prog->aux->id; 1650 info.load_time = prog->aux->load_time; 1651 info.created_by_uid = from_kuid_munged(current_user_ns(), 1652 prog->aux->user->uid); 1653 1654 memcpy(info.tag, prog->tag, sizeof(prog->tag)); 1655 memcpy(info.name, prog->aux->name, sizeof(prog->aux->name)); 1656 1657 ulen = info.nr_map_ids; 1658 info.nr_map_ids = prog->aux->used_map_cnt; 1659 ulen = min_t(u32, info.nr_map_ids, ulen); 1660 if (ulen) { 1661 u32 __user *user_map_ids = u64_to_user_ptr(info.map_ids); 1662 u32 i; 1663 1664 for (i = 0; i < ulen; i++) 1665 if (put_user(prog->aux->used_maps[i]->id, 1666 &user_map_ids[i])) 1667 return -EFAULT; 1668 } 1669 1670 if (!capable(CAP_SYS_ADMIN)) { 1671 info.jited_prog_len = 0; 1672 info.xlated_prog_len = 0; 1673 goto done; 1674 } 1675 1676 ulen = info.jited_prog_len; 1677 info.jited_prog_len = prog->jited_len; 1678 if (info.jited_prog_len && ulen) { 1679 if (bpf_dump_raw_ok()) { 1680 uinsns = u64_to_user_ptr(info.jited_prog_insns); 1681 ulen = min_t(u32, info.jited_prog_len, ulen); 1682 if (copy_to_user(uinsns, prog->bpf_func, ulen)) 1683 return -EFAULT; 1684 } else { 1685 info.jited_prog_insns = 0; 1686 } 1687 } 1688 1689 ulen = info.xlated_prog_len; 1690 info.xlated_prog_len = bpf_prog_insn_size(prog); 1691 if (info.xlated_prog_len && ulen) { 1692 struct bpf_insn *insns_sanitized; 1693 bool fault; 1694 1695 if (prog->blinded && !bpf_dump_raw_ok()) { 1696 info.xlated_prog_insns = 0; 1697 goto done; 1698 } 1699 insns_sanitized = bpf_insn_prepare_dump(prog); 1700 if (!insns_sanitized) 1701 return -ENOMEM; 1702 uinsns = u64_to_user_ptr(info.xlated_prog_insns); 1703 ulen = min_t(u32, info.xlated_prog_len, ulen); 1704 fault = copy_to_user(uinsns, insns_sanitized, ulen); 1705 kfree(insns_sanitized); 1706 if (fault) 1707 return -EFAULT; 1708 } 1709 1710 if (bpf_prog_is_dev_bound(prog->aux)) { 1711 err = bpf_prog_offload_info_fill(&info, prog); 1712 if (err) 1713 return err; 1714 } 1715 1716 done: 1717 if (copy_to_user(uinfo, &info, info_len) || 1718 put_user(info_len, &uattr->info.info_len)) 1719 return -EFAULT; 1720 1721 return 0; 1722 } 1723 1724 static int bpf_map_get_info_by_fd(struct bpf_map *map, 1725 const union bpf_attr *attr, 1726 union bpf_attr __user *uattr) 1727 { 1728 struct bpf_map_info __user *uinfo = u64_to_user_ptr(attr->info.info); 1729 struct bpf_map_info info = {}; 1730 u32 info_len = attr->info.info_len; 1731 int err; 1732 1733 err = check_uarg_tail_zero(uinfo, sizeof(info), info_len); 1734 if (err) 1735 return err; 1736 info_len = min_t(u32, sizeof(info), info_len); 1737 1738 info.type = map->map_type; 1739 info.id = map->id; 1740 info.key_size = map->key_size; 1741 info.value_size = map->value_size; 1742 info.max_entries = map->max_entries; 1743 info.map_flags = map->map_flags; 1744 memcpy(info.name, map->name, sizeof(map->name)); 1745 1746 if (copy_to_user(uinfo, &info, info_len) || 1747 put_user(info_len, &uattr->info.info_len)) 1748 return -EFAULT; 1749 1750 return 0; 1751 } 1752 1753 #define BPF_OBJ_GET_INFO_BY_FD_LAST_FIELD info.info 1754 1755 static int bpf_obj_get_info_by_fd(const union bpf_attr *attr, 1756 union bpf_attr __user *uattr) 1757 { 1758 int ufd = attr->info.bpf_fd; 1759 struct fd f; 1760 int err; 1761 1762 if (CHECK_ATTR(BPF_OBJ_GET_INFO_BY_FD)) 1763 return -EINVAL; 1764 1765 f = fdget(ufd); 1766 if (!f.file) 1767 return -EBADFD; 1768 1769 if (f.file->f_op == &bpf_prog_fops) 1770 err = bpf_prog_get_info_by_fd(f.file->private_data, attr, 1771 uattr); 1772 else if (f.file->f_op == &bpf_map_fops) 1773 err = bpf_map_get_info_by_fd(f.file->private_data, attr, 1774 uattr); 1775 else 1776 err = -EINVAL; 1777 1778 fdput(f); 1779 return err; 1780 } 1781 1782 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) 1783 { 1784 union bpf_attr attr = {}; 1785 int err; 1786 1787 if (!capable(CAP_SYS_ADMIN) && sysctl_unprivileged_bpf_disabled) 1788 return -EPERM; 1789 1790 err = check_uarg_tail_zero(uattr, sizeof(attr), size); 1791 if (err) 1792 return err; 1793 size = min_t(u32, size, sizeof(attr)); 1794 1795 /* copy attributes from user space, may be less than sizeof(bpf_attr) */ 1796 if (copy_from_user(&attr, uattr, size) != 0) 1797 return -EFAULT; 1798 1799 err = security_bpf(cmd, &attr, size); 1800 if (err < 0) 1801 return err; 1802 1803 switch (cmd) { 1804 case BPF_MAP_CREATE: 1805 err = map_create(&attr); 1806 break; 1807 case BPF_MAP_LOOKUP_ELEM: 1808 err = map_lookup_elem(&attr); 1809 break; 1810 case BPF_MAP_UPDATE_ELEM: 1811 err = map_update_elem(&attr); 1812 break; 1813 case BPF_MAP_DELETE_ELEM: 1814 err = map_delete_elem(&attr); 1815 break; 1816 case BPF_MAP_GET_NEXT_KEY: 1817 err = map_get_next_key(&attr); 1818 break; 1819 case BPF_PROG_LOAD: 1820 err = bpf_prog_load(&attr); 1821 break; 1822 case BPF_OBJ_PIN: 1823 err = bpf_obj_pin(&attr); 1824 break; 1825 case BPF_OBJ_GET: 1826 err = bpf_obj_get(&attr); 1827 break; 1828 #ifdef CONFIG_CGROUP_BPF 1829 case BPF_PROG_ATTACH: 1830 err = bpf_prog_attach(&attr); 1831 break; 1832 case BPF_PROG_DETACH: 1833 err = bpf_prog_detach(&attr); 1834 break; 1835 case BPF_PROG_QUERY: 1836 err = bpf_prog_query(&attr, uattr); 1837 break; 1838 #endif 1839 case BPF_PROG_TEST_RUN: 1840 err = bpf_prog_test_run(&attr, uattr); 1841 break; 1842 case BPF_PROG_GET_NEXT_ID: 1843 err = bpf_obj_get_next_id(&attr, uattr, 1844 &prog_idr, &prog_idr_lock); 1845 break; 1846 case BPF_MAP_GET_NEXT_ID: 1847 err = bpf_obj_get_next_id(&attr, uattr, 1848 &map_idr, &map_idr_lock); 1849 break; 1850 case BPF_PROG_GET_FD_BY_ID: 1851 err = bpf_prog_get_fd_by_id(&attr); 1852 break; 1853 case BPF_MAP_GET_FD_BY_ID: 1854 err = bpf_map_get_fd_by_id(&attr); 1855 break; 1856 case BPF_OBJ_GET_INFO_BY_FD: 1857 err = bpf_obj_get_info_by_fd(&attr, uattr); 1858 break; 1859 default: 1860 err = -EINVAL; 1861 break; 1862 } 1863 1864 return err; 1865 } 1866