1 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 2 * 3 * This program is free software; you can redistribute it and/or 4 * modify it under the terms of version 2 of the GNU General Public 5 * License as published by the Free Software Foundation. 6 * 7 * This program is distributed in the hope that it will be useful, but 8 * WITHOUT ANY WARRANTY; without even the implied warranty of 9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 10 * General Public License for more details. 11 */ 12 #include <linux/bpf.h> 13 #include <linux/bpf_trace.h> 14 #include <linux/syscalls.h> 15 #include <linux/slab.h> 16 #include <linux/sched/signal.h> 17 #include <linux/vmalloc.h> 18 #include <linux/mmzone.h> 19 #include <linux/anon_inodes.h> 20 #include <linux/file.h> 21 #include <linux/license.h> 22 #include <linux/filter.h> 23 #include <linux/version.h> 24 #include <linux/kernel.h> 25 #include <linux/idr.h> 26 #include <linux/cred.h> 27 #include <linux/timekeeping.h> 28 #include <linux/ctype.h> 29 30 #define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY || \ 31 (map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \ 32 (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \ 33 (map)->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) 34 #define IS_FD_HASH(map) ((map)->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) 35 #define IS_FD_MAP(map) (IS_FD_ARRAY(map) || IS_FD_HASH(map)) 36 37 #define BPF_OBJ_FLAG_MASK (BPF_F_RDONLY | BPF_F_WRONLY) 38 39 DEFINE_PER_CPU(int, bpf_prog_active); 40 static DEFINE_IDR(prog_idr); 41 static DEFINE_SPINLOCK(prog_idr_lock); 42 static DEFINE_IDR(map_idr); 43 static DEFINE_SPINLOCK(map_idr_lock); 44 45 int sysctl_unprivileged_bpf_disabled __read_mostly; 46 47 static const struct bpf_map_ops * const bpf_map_types[] = { 48 #define BPF_PROG_TYPE(_id, _ops) 49 #define BPF_MAP_TYPE(_id, _ops) \ 50 [_id] = &_ops, 51 #include <linux/bpf_types.h> 52 #undef BPF_PROG_TYPE 53 #undef BPF_MAP_TYPE 54 }; 55 56 /* 57 * If we're handed a bigger struct than we know of, ensure all the unknown bits 58 * are 0 - i.e. new user-space does not rely on any kernel feature extensions 59 * we don't know about yet. 60 * 61 * There is a ToCToU between this function call and the following 62 * copy_from_user() call. However, this is not a concern since this function is 63 * meant to be a future-proofing of bits. 64 */ 65 static int check_uarg_tail_zero(void __user *uaddr, 66 size_t expected_size, 67 size_t actual_size) 68 { 69 unsigned char __user *addr; 70 unsigned char __user *end; 71 unsigned char val; 72 int err; 73 74 if (unlikely(actual_size > PAGE_SIZE)) /* silly large */ 75 return -E2BIG; 76 77 if (unlikely(!access_ok(VERIFY_READ, uaddr, actual_size))) 78 return -EFAULT; 79 80 if (actual_size <= expected_size) 81 return 0; 82 83 addr = uaddr + expected_size; 84 end = uaddr + actual_size; 85 86 for (; addr < end; addr++) { 87 err = get_user(val, addr); 88 if (err) 89 return err; 90 if (val) 91 return -E2BIG; 92 } 93 94 return 0; 95 } 96 97 static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) 98 { 99 struct bpf_map *map; 100 101 if (attr->map_type >= ARRAY_SIZE(bpf_map_types) || 102 !bpf_map_types[attr->map_type]) 103 return ERR_PTR(-EINVAL); 104 105 map = bpf_map_types[attr->map_type]->map_alloc(attr); 106 if (IS_ERR(map)) 107 return map; 108 map->ops = bpf_map_types[attr->map_type]; 109 map->map_type = attr->map_type; 110 return map; 111 } 112 113 void *bpf_map_area_alloc(size_t size, int numa_node) 114 { 115 /* We definitely need __GFP_NORETRY, so OOM killer doesn't 116 * trigger under memory pressure as we really just want to 117 * fail instead. 118 */ 119 const gfp_t flags = __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO; 120 void *area; 121 122 if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { 123 area = kmalloc_node(size, GFP_USER | flags, numa_node); 124 if (area != NULL) 125 return area; 126 } 127 128 return __vmalloc_node_flags_caller(size, numa_node, GFP_KERNEL | flags, 129 __builtin_return_address(0)); 130 } 131 132 void bpf_map_area_free(void *area) 133 { 134 kvfree(area); 135 } 136 137 int bpf_map_precharge_memlock(u32 pages) 138 { 139 struct user_struct *user = get_current_user(); 140 unsigned long memlock_limit, cur; 141 142 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 143 cur = atomic_long_read(&user->locked_vm); 144 free_uid(user); 145 if (cur + pages > memlock_limit) 146 return -EPERM; 147 return 0; 148 } 149 150 static int bpf_map_charge_memlock(struct bpf_map *map) 151 { 152 struct user_struct *user = get_current_user(); 153 unsigned long memlock_limit; 154 155 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 156 157 atomic_long_add(map->pages, &user->locked_vm); 158 159 if (atomic_long_read(&user->locked_vm) > memlock_limit) { 160 atomic_long_sub(map->pages, &user->locked_vm); 161 free_uid(user); 162 return -EPERM; 163 } 164 map->user = user; 165 return 0; 166 } 167 168 static void bpf_map_uncharge_memlock(struct bpf_map *map) 169 { 170 struct user_struct *user = map->user; 171 172 atomic_long_sub(map->pages, &user->locked_vm); 173 free_uid(user); 174 } 175 176 static int bpf_map_alloc_id(struct bpf_map *map) 177 { 178 int id; 179 180 spin_lock_bh(&map_idr_lock); 181 id = idr_alloc_cyclic(&map_idr, map, 1, INT_MAX, GFP_ATOMIC); 182 if (id > 0) 183 map->id = id; 184 spin_unlock_bh(&map_idr_lock); 185 186 if (WARN_ON_ONCE(!id)) 187 return -ENOSPC; 188 189 return id > 0 ? 0 : id; 190 } 191 192 static void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock) 193 { 194 unsigned long flags; 195 196 if (do_idr_lock) 197 spin_lock_irqsave(&map_idr_lock, flags); 198 else 199 __acquire(&map_idr_lock); 200 201 idr_remove(&map_idr, map->id); 202 203 if (do_idr_lock) 204 spin_unlock_irqrestore(&map_idr_lock, flags); 205 else 206 __release(&map_idr_lock); 207 } 208 209 /* called from workqueue */ 210 static void bpf_map_free_deferred(struct work_struct *work) 211 { 212 struct bpf_map *map = container_of(work, struct bpf_map, work); 213 214 bpf_map_uncharge_memlock(map); 215 security_bpf_map_free(map); 216 /* implementation dependent freeing */ 217 map->ops->map_free(map); 218 } 219 220 static void bpf_map_put_uref(struct bpf_map *map) 221 { 222 if (atomic_dec_and_test(&map->usercnt)) { 223 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) 224 bpf_fd_array_map_clear(map); 225 } 226 } 227 228 /* decrement map refcnt and schedule it for freeing via workqueue 229 * (unrelying map implementation ops->map_free() might sleep) 230 */ 231 static void __bpf_map_put(struct bpf_map *map, bool do_idr_lock) 232 { 233 if (atomic_dec_and_test(&map->refcnt)) { 234 /* bpf_map_free_id() must be called first */ 235 bpf_map_free_id(map, do_idr_lock); 236 INIT_WORK(&map->work, bpf_map_free_deferred); 237 schedule_work(&map->work); 238 } 239 } 240 241 void bpf_map_put(struct bpf_map *map) 242 { 243 __bpf_map_put(map, true); 244 } 245 246 void bpf_map_put_with_uref(struct bpf_map *map) 247 { 248 bpf_map_put_uref(map); 249 bpf_map_put(map); 250 } 251 252 static int bpf_map_release(struct inode *inode, struct file *filp) 253 { 254 struct bpf_map *map = filp->private_data; 255 256 if (map->ops->map_release) 257 map->ops->map_release(map, filp); 258 259 bpf_map_put_with_uref(map); 260 return 0; 261 } 262 263 #ifdef CONFIG_PROC_FS 264 static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) 265 { 266 const struct bpf_map *map = filp->private_data; 267 const struct bpf_array *array; 268 u32 owner_prog_type = 0; 269 u32 owner_jited = 0; 270 271 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) { 272 array = container_of(map, struct bpf_array, map); 273 owner_prog_type = array->owner_prog_type; 274 owner_jited = array->owner_jited; 275 } 276 277 seq_printf(m, 278 "map_type:\t%u\n" 279 "key_size:\t%u\n" 280 "value_size:\t%u\n" 281 "max_entries:\t%u\n" 282 "map_flags:\t%#x\n" 283 "memlock:\t%llu\n", 284 map->map_type, 285 map->key_size, 286 map->value_size, 287 map->max_entries, 288 map->map_flags, 289 map->pages * 1ULL << PAGE_SHIFT); 290 291 if (owner_prog_type) { 292 seq_printf(m, "owner_prog_type:\t%u\n", 293 owner_prog_type); 294 seq_printf(m, "owner_jited:\t%u\n", 295 owner_jited); 296 } 297 } 298 #endif 299 300 static ssize_t bpf_dummy_read(struct file *filp, char __user *buf, size_t siz, 301 loff_t *ppos) 302 { 303 /* We need this handler such that alloc_file() enables 304 * f_mode with FMODE_CAN_READ. 305 */ 306 return -EINVAL; 307 } 308 309 static ssize_t bpf_dummy_write(struct file *filp, const char __user *buf, 310 size_t siz, loff_t *ppos) 311 { 312 /* We need this handler such that alloc_file() enables 313 * f_mode with FMODE_CAN_WRITE. 314 */ 315 return -EINVAL; 316 } 317 318 const struct file_operations bpf_map_fops = { 319 #ifdef CONFIG_PROC_FS 320 .show_fdinfo = bpf_map_show_fdinfo, 321 #endif 322 .release = bpf_map_release, 323 .read = bpf_dummy_read, 324 .write = bpf_dummy_write, 325 }; 326 327 int bpf_map_new_fd(struct bpf_map *map, int flags) 328 { 329 int ret; 330 331 ret = security_bpf_map(map, OPEN_FMODE(flags)); 332 if (ret < 0) 333 return ret; 334 335 return anon_inode_getfd("bpf-map", &bpf_map_fops, map, 336 flags | O_CLOEXEC); 337 } 338 339 int bpf_get_file_flag(int flags) 340 { 341 if ((flags & BPF_F_RDONLY) && (flags & BPF_F_WRONLY)) 342 return -EINVAL; 343 if (flags & BPF_F_RDONLY) 344 return O_RDONLY; 345 if (flags & BPF_F_WRONLY) 346 return O_WRONLY; 347 return O_RDWR; 348 } 349 350 /* helper macro to check that unused fields 'union bpf_attr' are zero */ 351 #define CHECK_ATTR(CMD) \ 352 memchr_inv((void *) &attr->CMD##_LAST_FIELD + \ 353 sizeof(attr->CMD##_LAST_FIELD), 0, \ 354 sizeof(*attr) - \ 355 offsetof(union bpf_attr, CMD##_LAST_FIELD) - \ 356 sizeof(attr->CMD##_LAST_FIELD)) != NULL 357 358 /* dst and src must have at least BPF_OBJ_NAME_LEN number of bytes. 359 * Return 0 on success and < 0 on error. 360 */ 361 static int bpf_obj_name_cpy(char *dst, const char *src) 362 { 363 const char *end = src + BPF_OBJ_NAME_LEN; 364 365 memset(dst, 0, BPF_OBJ_NAME_LEN); 366 367 /* Copy all isalnum() and '_' char */ 368 while (src < end && *src) { 369 if (!isalnum(*src) && *src != '_') 370 return -EINVAL; 371 *dst++ = *src++; 372 } 373 374 /* No '\0' found in BPF_OBJ_NAME_LEN number of bytes */ 375 if (src == end) 376 return -EINVAL; 377 378 return 0; 379 } 380 381 #define BPF_MAP_CREATE_LAST_FIELD map_name 382 /* called via syscall */ 383 static int map_create(union bpf_attr *attr) 384 { 385 int numa_node = bpf_map_attr_numa_node(attr); 386 struct bpf_map *map; 387 int f_flags; 388 int err; 389 390 err = CHECK_ATTR(BPF_MAP_CREATE); 391 if (err) 392 return -EINVAL; 393 394 f_flags = bpf_get_file_flag(attr->map_flags); 395 if (f_flags < 0) 396 return f_flags; 397 398 if (numa_node != NUMA_NO_NODE && 399 ((unsigned int)numa_node >= nr_node_ids || 400 !node_online(numa_node))) 401 return -EINVAL; 402 403 /* find map type and init map: hashtable vs rbtree vs bloom vs ... */ 404 map = find_and_alloc_map(attr); 405 if (IS_ERR(map)) 406 return PTR_ERR(map); 407 408 err = bpf_obj_name_cpy(map->name, attr->map_name); 409 if (err) 410 goto free_map_nouncharge; 411 412 atomic_set(&map->refcnt, 1); 413 atomic_set(&map->usercnt, 1); 414 415 err = security_bpf_map_alloc(map); 416 if (err) 417 goto free_map_nouncharge; 418 419 err = bpf_map_charge_memlock(map); 420 if (err) 421 goto free_map_sec; 422 423 err = bpf_map_alloc_id(map); 424 if (err) 425 goto free_map; 426 427 err = bpf_map_new_fd(map, f_flags); 428 if (err < 0) { 429 /* failed to allocate fd. 430 * bpf_map_put() is needed because the above 431 * bpf_map_alloc_id() has published the map 432 * to the userspace and the userspace may 433 * have refcnt-ed it through BPF_MAP_GET_FD_BY_ID. 434 */ 435 bpf_map_put(map); 436 return err; 437 } 438 439 trace_bpf_map_create(map, err); 440 return err; 441 442 free_map: 443 bpf_map_uncharge_memlock(map); 444 free_map_sec: 445 security_bpf_map_free(map); 446 free_map_nouncharge: 447 map->ops->map_free(map); 448 return err; 449 } 450 451 /* if error is returned, fd is released. 452 * On success caller should complete fd access with matching fdput() 453 */ 454 struct bpf_map *__bpf_map_get(struct fd f) 455 { 456 if (!f.file) 457 return ERR_PTR(-EBADF); 458 if (f.file->f_op != &bpf_map_fops) { 459 fdput(f); 460 return ERR_PTR(-EINVAL); 461 } 462 463 return f.file->private_data; 464 } 465 466 /* prog's and map's refcnt limit */ 467 #define BPF_MAX_REFCNT 32768 468 469 struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref) 470 { 471 if (atomic_inc_return(&map->refcnt) > BPF_MAX_REFCNT) { 472 atomic_dec(&map->refcnt); 473 return ERR_PTR(-EBUSY); 474 } 475 if (uref) 476 atomic_inc(&map->usercnt); 477 return map; 478 } 479 480 struct bpf_map *bpf_map_get_with_uref(u32 ufd) 481 { 482 struct fd f = fdget(ufd); 483 struct bpf_map *map; 484 485 map = __bpf_map_get(f); 486 if (IS_ERR(map)) 487 return map; 488 489 map = bpf_map_inc(map, true); 490 fdput(f); 491 492 return map; 493 } 494 495 /* map_idr_lock should have been held */ 496 static struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map, 497 bool uref) 498 { 499 int refold; 500 501 refold = __atomic_add_unless(&map->refcnt, 1, 0); 502 503 if (refold >= BPF_MAX_REFCNT) { 504 __bpf_map_put(map, false); 505 return ERR_PTR(-EBUSY); 506 } 507 508 if (!refold) 509 return ERR_PTR(-ENOENT); 510 511 if (uref) 512 atomic_inc(&map->usercnt); 513 514 return map; 515 } 516 517 int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value) 518 { 519 return -ENOTSUPP; 520 } 521 522 /* last field in 'union bpf_attr' used by this command */ 523 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value 524 525 static int map_lookup_elem(union bpf_attr *attr) 526 { 527 void __user *ukey = u64_to_user_ptr(attr->key); 528 void __user *uvalue = u64_to_user_ptr(attr->value); 529 int ufd = attr->map_fd; 530 struct bpf_map *map; 531 void *key, *value, *ptr; 532 u32 value_size; 533 struct fd f; 534 int err; 535 536 if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM)) 537 return -EINVAL; 538 539 f = fdget(ufd); 540 map = __bpf_map_get(f); 541 if (IS_ERR(map)) 542 return PTR_ERR(map); 543 544 if (!(f.file->f_mode & FMODE_CAN_READ)) { 545 err = -EPERM; 546 goto err_put; 547 } 548 549 key = memdup_user(ukey, map->key_size); 550 if (IS_ERR(key)) { 551 err = PTR_ERR(key); 552 goto err_put; 553 } 554 555 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 556 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || 557 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) 558 value_size = round_up(map->value_size, 8) * num_possible_cpus(); 559 else if (IS_FD_MAP(map)) 560 value_size = sizeof(u32); 561 else 562 value_size = map->value_size; 563 564 err = -ENOMEM; 565 value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); 566 if (!value) 567 goto free_key; 568 569 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 570 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 571 err = bpf_percpu_hash_copy(map, key, value); 572 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 573 err = bpf_percpu_array_copy(map, key, value); 574 } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) { 575 err = bpf_stackmap_copy(map, key, value); 576 } else if (IS_FD_ARRAY(map)) { 577 err = bpf_fd_array_map_lookup_elem(map, key, value); 578 } else if (IS_FD_HASH(map)) { 579 err = bpf_fd_htab_map_lookup_elem(map, key, value); 580 } else { 581 rcu_read_lock(); 582 ptr = map->ops->map_lookup_elem(map, key); 583 if (ptr) 584 memcpy(value, ptr, value_size); 585 rcu_read_unlock(); 586 err = ptr ? 0 : -ENOENT; 587 } 588 589 if (err) 590 goto free_value; 591 592 err = -EFAULT; 593 if (copy_to_user(uvalue, value, value_size) != 0) 594 goto free_value; 595 596 trace_bpf_map_lookup_elem(map, ufd, key, value); 597 err = 0; 598 599 free_value: 600 kfree(value); 601 free_key: 602 kfree(key); 603 err_put: 604 fdput(f); 605 return err; 606 } 607 608 #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags 609 610 static int map_update_elem(union bpf_attr *attr) 611 { 612 void __user *ukey = u64_to_user_ptr(attr->key); 613 void __user *uvalue = u64_to_user_ptr(attr->value); 614 int ufd = attr->map_fd; 615 struct bpf_map *map; 616 void *key, *value; 617 u32 value_size; 618 struct fd f; 619 int err; 620 621 if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM)) 622 return -EINVAL; 623 624 f = fdget(ufd); 625 map = __bpf_map_get(f); 626 if (IS_ERR(map)) 627 return PTR_ERR(map); 628 629 if (!(f.file->f_mode & FMODE_CAN_WRITE)) { 630 err = -EPERM; 631 goto err_put; 632 } 633 634 key = memdup_user(ukey, map->key_size); 635 if (IS_ERR(key)) { 636 err = PTR_ERR(key); 637 goto err_put; 638 } 639 640 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 641 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || 642 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) 643 value_size = round_up(map->value_size, 8) * num_possible_cpus(); 644 else 645 value_size = map->value_size; 646 647 err = -ENOMEM; 648 value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); 649 if (!value) 650 goto free_key; 651 652 err = -EFAULT; 653 if (copy_from_user(value, uvalue, value_size) != 0) 654 goto free_value; 655 656 /* Need to create a kthread, thus must support schedule */ 657 if (map->map_type == BPF_MAP_TYPE_CPUMAP) { 658 err = map->ops->map_update_elem(map, key, value, attr->flags); 659 goto out; 660 } 661 662 /* must increment bpf_prog_active to avoid kprobe+bpf triggering from 663 * inside bpf map update or delete otherwise deadlocks are possible 664 */ 665 preempt_disable(); 666 __this_cpu_inc(bpf_prog_active); 667 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 668 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 669 err = bpf_percpu_hash_update(map, key, value, attr->flags); 670 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 671 err = bpf_percpu_array_update(map, key, value, attr->flags); 672 } else if (map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || 673 map->map_type == BPF_MAP_TYPE_PROG_ARRAY || 674 map->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || 675 map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) { 676 rcu_read_lock(); 677 err = bpf_fd_array_map_update_elem(map, f.file, key, value, 678 attr->flags); 679 rcu_read_unlock(); 680 } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { 681 rcu_read_lock(); 682 err = bpf_fd_htab_map_update_elem(map, f.file, key, value, 683 attr->flags); 684 rcu_read_unlock(); 685 } else { 686 rcu_read_lock(); 687 err = map->ops->map_update_elem(map, key, value, attr->flags); 688 rcu_read_unlock(); 689 } 690 __this_cpu_dec(bpf_prog_active); 691 preempt_enable(); 692 out: 693 if (!err) 694 trace_bpf_map_update_elem(map, ufd, key, value); 695 free_value: 696 kfree(value); 697 free_key: 698 kfree(key); 699 err_put: 700 fdput(f); 701 return err; 702 } 703 704 #define BPF_MAP_DELETE_ELEM_LAST_FIELD key 705 706 static int map_delete_elem(union bpf_attr *attr) 707 { 708 void __user *ukey = u64_to_user_ptr(attr->key); 709 int ufd = attr->map_fd; 710 struct bpf_map *map; 711 struct fd f; 712 void *key; 713 int err; 714 715 if (CHECK_ATTR(BPF_MAP_DELETE_ELEM)) 716 return -EINVAL; 717 718 f = fdget(ufd); 719 map = __bpf_map_get(f); 720 if (IS_ERR(map)) 721 return PTR_ERR(map); 722 723 if (!(f.file->f_mode & FMODE_CAN_WRITE)) { 724 err = -EPERM; 725 goto err_put; 726 } 727 728 key = memdup_user(ukey, map->key_size); 729 if (IS_ERR(key)) { 730 err = PTR_ERR(key); 731 goto err_put; 732 } 733 734 preempt_disable(); 735 __this_cpu_inc(bpf_prog_active); 736 rcu_read_lock(); 737 err = map->ops->map_delete_elem(map, key); 738 rcu_read_unlock(); 739 __this_cpu_dec(bpf_prog_active); 740 preempt_enable(); 741 742 if (!err) 743 trace_bpf_map_delete_elem(map, ufd, key); 744 kfree(key); 745 err_put: 746 fdput(f); 747 return err; 748 } 749 750 /* last field in 'union bpf_attr' used by this command */ 751 #define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key 752 753 static int map_get_next_key(union bpf_attr *attr) 754 { 755 void __user *ukey = u64_to_user_ptr(attr->key); 756 void __user *unext_key = u64_to_user_ptr(attr->next_key); 757 int ufd = attr->map_fd; 758 struct bpf_map *map; 759 void *key, *next_key; 760 struct fd f; 761 int err; 762 763 if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY)) 764 return -EINVAL; 765 766 f = fdget(ufd); 767 map = __bpf_map_get(f); 768 if (IS_ERR(map)) 769 return PTR_ERR(map); 770 771 if (!(f.file->f_mode & FMODE_CAN_READ)) { 772 err = -EPERM; 773 goto err_put; 774 } 775 776 if (ukey) { 777 key = memdup_user(ukey, map->key_size); 778 if (IS_ERR(key)) { 779 err = PTR_ERR(key); 780 goto err_put; 781 } 782 } else { 783 key = NULL; 784 } 785 786 err = -ENOMEM; 787 next_key = kmalloc(map->key_size, GFP_USER); 788 if (!next_key) 789 goto free_key; 790 791 rcu_read_lock(); 792 err = map->ops->map_get_next_key(map, key, next_key); 793 rcu_read_unlock(); 794 if (err) 795 goto free_next_key; 796 797 err = -EFAULT; 798 if (copy_to_user(unext_key, next_key, map->key_size) != 0) 799 goto free_next_key; 800 801 trace_bpf_map_next_key(map, ufd, key, next_key); 802 err = 0; 803 804 free_next_key: 805 kfree(next_key); 806 free_key: 807 kfree(key); 808 err_put: 809 fdput(f); 810 return err; 811 } 812 813 static const struct bpf_prog_ops * const bpf_prog_types[] = { 814 #define BPF_PROG_TYPE(_id, _name) \ 815 [_id] = & _name ## _prog_ops, 816 #define BPF_MAP_TYPE(_id, _ops) 817 #include <linux/bpf_types.h> 818 #undef BPF_PROG_TYPE 819 #undef BPF_MAP_TYPE 820 }; 821 822 static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog) 823 { 824 if (type >= ARRAY_SIZE(bpf_prog_types) || !bpf_prog_types[type]) 825 return -EINVAL; 826 827 if (!bpf_prog_is_dev_bound(prog->aux)) 828 prog->aux->ops = bpf_prog_types[type]; 829 else 830 prog->aux->ops = &bpf_offload_prog_ops; 831 prog->type = type; 832 return 0; 833 } 834 835 /* drop refcnt on maps used by eBPF program and free auxilary data */ 836 static void free_used_maps(struct bpf_prog_aux *aux) 837 { 838 int i; 839 840 for (i = 0; i < aux->used_map_cnt; i++) 841 bpf_map_put(aux->used_maps[i]); 842 843 kfree(aux->used_maps); 844 } 845 846 int __bpf_prog_charge(struct user_struct *user, u32 pages) 847 { 848 unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 849 unsigned long user_bufs; 850 851 if (user) { 852 user_bufs = atomic_long_add_return(pages, &user->locked_vm); 853 if (user_bufs > memlock_limit) { 854 atomic_long_sub(pages, &user->locked_vm); 855 return -EPERM; 856 } 857 } 858 859 return 0; 860 } 861 862 void __bpf_prog_uncharge(struct user_struct *user, u32 pages) 863 { 864 if (user) 865 atomic_long_sub(pages, &user->locked_vm); 866 } 867 868 static int bpf_prog_charge_memlock(struct bpf_prog *prog) 869 { 870 struct user_struct *user = get_current_user(); 871 int ret; 872 873 ret = __bpf_prog_charge(user, prog->pages); 874 if (ret) { 875 free_uid(user); 876 return ret; 877 } 878 879 prog->aux->user = user; 880 return 0; 881 } 882 883 static void bpf_prog_uncharge_memlock(struct bpf_prog *prog) 884 { 885 struct user_struct *user = prog->aux->user; 886 887 __bpf_prog_uncharge(user, prog->pages); 888 free_uid(user); 889 } 890 891 static int bpf_prog_alloc_id(struct bpf_prog *prog) 892 { 893 int id; 894 895 spin_lock_bh(&prog_idr_lock); 896 id = idr_alloc_cyclic(&prog_idr, prog, 1, INT_MAX, GFP_ATOMIC); 897 if (id > 0) 898 prog->aux->id = id; 899 spin_unlock_bh(&prog_idr_lock); 900 901 /* id is in [1, INT_MAX) */ 902 if (WARN_ON_ONCE(!id)) 903 return -ENOSPC; 904 905 return id > 0 ? 0 : id; 906 } 907 908 static void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock) 909 { 910 /* cBPF to eBPF migrations are currently not in the idr store. */ 911 if (!prog->aux->id) 912 return; 913 914 if (do_idr_lock) 915 spin_lock_bh(&prog_idr_lock); 916 else 917 __acquire(&prog_idr_lock); 918 919 idr_remove(&prog_idr, prog->aux->id); 920 921 if (do_idr_lock) 922 spin_unlock_bh(&prog_idr_lock); 923 else 924 __release(&prog_idr_lock); 925 } 926 927 static void __bpf_prog_put_rcu(struct rcu_head *rcu) 928 { 929 struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu); 930 931 free_used_maps(aux); 932 bpf_prog_uncharge_memlock(aux->prog); 933 security_bpf_prog_free(aux); 934 bpf_prog_free(aux->prog); 935 } 936 937 static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock) 938 { 939 if (atomic_dec_and_test(&prog->aux->refcnt)) { 940 trace_bpf_prog_put_rcu(prog); 941 /* bpf_prog_free_id() must be called first */ 942 bpf_prog_free_id(prog, do_idr_lock); 943 bpf_prog_kallsyms_del(prog); 944 call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu); 945 } 946 } 947 948 void bpf_prog_put(struct bpf_prog *prog) 949 { 950 __bpf_prog_put(prog, true); 951 } 952 EXPORT_SYMBOL_GPL(bpf_prog_put); 953 954 static int bpf_prog_release(struct inode *inode, struct file *filp) 955 { 956 struct bpf_prog *prog = filp->private_data; 957 958 bpf_prog_put(prog); 959 return 0; 960 } 961 962 #ifdef CONFIG_PROC_FS 963 static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) 964 { 965 const struct bpf_prog *prog = filp->private_data; 966 char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; 967 968 bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); 969 seq_printf(m, 970 "prog_type:\t%u\n" 971 "prog_jited:\t%u\n" 972 "prog_tag:\t%s\n" 973 "memlock:\t%llu\n", 974 prog->type, 975 prog->jited, 976 prog_tag, 977 prog->pages * 1ULL << PAGE_SHIFT); 978 } 979 #endif 980 981 const struct file_operations bpf_prog_fops = { 982 #ifdef CONFIG_PROC_FS 983 .show_fdinfo = bpf_prog_show_fdinfo, 984 #endif 985 .release = bpf_prog_release, 986 .read = bpf_dummy_read, 987 .write = bpf_dummy_write, 988 }; 989 990 int bpf_prog_new_fd(struct bpf_prog *prog) 991 { 992 int ret; 993 994 ret = security_bpf_prog(prog); 995 if (ret < 0) 996 return ret; 997 998 return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, 999 O_RDWR | O_CLOEXEC); 1000 } 1001 1002 static struct bpf_prog *____bpf_prog_get(struct fd f) 1003 { 1004 if (!f.file) 1005 return ERR_PTR(-EBADF); 1006 if (f.file->f_op != &bpf_prog_fops) { 1007 fdput(f); 1008 return ERR_PTR(-EINVAL); 1009 } 1010 1011 return f.file->private_data; 1012 } 1013 1014 struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i) 1015 { 1016 if (atomic_add_return(i, &prog->aux->refcnt) > BPF_MAX_REFCNT) { 1017 atomic_sub(i, &prog->aux->refcnt); 1018 return ERR_PTR(-EBUSY); 1019 } 1020 return prog; 1021 } 1022 EXPORT_SYMBOL_GPL(bpf_prog_add); 1023 1024 void bpf_prog_sub(struct bpf_prog *prog, int i) 1025 { 1026 /* Only to be used for undoing previous bpf_prog_add() in some 1027 * error path. We still know that another entity in our call 1028 * path holds a reference to the program, thus atomic_sub() can 1029 * be safely used in such cases! 1030 */ 1031 WARN_ON(atomic_sub_return(i, &prog->aux->refcnt) == 0); 1032 } 1033 EXPORT_SYMBOL_GPL(bpf_prog_sub); 1034 1035 struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog) 1036 { 1037 return bpf_prog_add(prog, 1); 1038 } 1039 EXPORT_SYMBOL_GPL(bpf_prog_inc); 1040 1041 /* prog_idr_lock should have been held */ 1042 struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog) 1043 { 1044 int refold; 1045 1046 refold = __atomic_add_unless(&prog->aux->refcnt, 1, 0); 1047 1048 if (refold >= BPF_MAX_REFCNT) { 1049 __bpf_prog_put(prog, false); 1050 return ERR_PTR(-EBUSY); 1051 } 1052 1053 if (!refold) 1054 return ERR_PTR(-ENOENT); 1055 1056 return prog; 1057 } 1058 EXPORT_SYMBOL_GPL(bpf_prog_inc_not_zero); 1059 1060 static bool bpf_prog_can_attach(struct bpf_prog *prog, 1061 enum bpf_prog_type *attach_type, 1062 struct net_device *netdev) 1063 { 1064 struct bpf_dev_offload *offload = prog->aux->offload; 1065 1066 if (prog->type != *attach_type) 1067 return false; 1068 if (offload && offload->netdev != netdev) 1069 return false; 1070 1071 return true; 1072 } 1073 1074 static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *attach_type, 1075 struct net_device *netdev) 1076 { 1077 struct fd f = fdget(ufd); 1078 struct bpf_prog *prog; 1079 1080 prog = ____bpf_prog_get(f); 1081 if (IS_ERR(prog)) 1082 return prog; 1083 if (attach_type && !bpf_prog_can_attach(prog, attach_type, netdev)) { 1084 prog = ERR_PTR(-EINVAL); 1085 goto out; 1086 } 1087 1088 prog = bpf_prog_inc(prog); 1089 out: 1090 fdput(f); 1091 return prog; 1092 } 1093 1094 struct bpf_prog *bpf_prog_get(u32 ufd) 1095 { 1096 return __bpf_prog_get(ufd, NULL, NULL); 1097 } 1098 1099 struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type) 1100 { 1101 struct bpf_prog *prog = __bpf_prog_get(ufd, &type, NULL); 1102 1103 if (!IS_ERR(prog)) 1104 trace_bpf_prog_get_type(prog); 1105 return prog; 1106 } 1107 EXPORT_SYMBOL_GPL(bpf_prog_get_type); 1108 1109 struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type, 1110 struct net_device *netdev) 1111 { 1112 struct bpf_prog *prog = __bpf_prog_get(ufd, &type, netdev); 1113 1114 if (!IS_ERR(prog)) 1115 trace_bpf_prog_get_type(prog); 1116 return prog; 1117 } 1118 EXPORT_SYMBOL_GPL(bpf_prog_get_type_dev); 1119 1120 /* last field in 'union bpf_attr' used by this command */ 1121 #define BPF_PROG_LOAD_LAST_FIELD prog_target_ifindex 1122 1123 static int bpf_prog_load(union bpf_attr *attr) 1124 { 1125 enum bpf_prog_type type = attr->prog_type; 1126 struct bpf_prog *prog; 1127 int err; 1128 char license[128]; 1129 bool is_gpl; 1130 1131 if (CHECK_ATTR(BPF_PROG_LOAD)) 1132 return -EINVAL; 1133 1134 if (attr->prog_flags & ~BPF_F_STRICT_ALIGNMENT) 1135 return -EINVAL; 1136 1137 /* copy eBPF program license from user space */ 1138 if (strncpy_from_user(license, u64_to_user_ptr(attr->license), 1139 sizeof(license) - 1) < 0) 1140 return -EFAULT; 1141 license[sizeof(license) - 1] = 0; 1142 1143 /* eBPF programs must be GPL compatible to use GPL-ed functions */ 1144 is_gpl = license_is_gpl_compatible(license); 1145 1146 if (attr->insn_cnt == 0 || attr->insn_cnt > BPF_MAXINSNS) 1147 return -E2BIG; 1148 1149 if (type == BPF_PROG_TYPE_KPROBE && 1150 attr->kern_version != LINUX_VERSION_CODE) 1151 return -EINVAL; 1152 1153 if (type != BPF_PROG_TYPE_SOCKET_FILTER && 1154 type != BPF_PROG_TYPE_CGROUP_SKB && 1155 !capable(CAP_SYS_ADMIN)) 1156 return -EPERM; 1157 1158 /* plain bpf_prog allocation */ 1159 prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER); 1160 if (!prog) 1161 return -ENOMEM; 1162 1163 err = security_bpf_prog_alloc(prog->aux); 1164 if (err) 1165 goto free_prog_nouncharge; 1166 1167 err = bpf_prog_charge_memlock(prog); 1168 if (err) 1169 goto free_prog_sec; 1170 1171 prog->len = attr->insn_cnt; 1172 1173 err = -EFAULT; 1174 if (copy_from_user(prog->insns, u64_to_user_ptr(attr->insns), 1175 bpf_prog_insn_size(prog)) != 0) 1176 goto free_prog; 1177 1178 prog->orig_prog = NULL; 1179 prog->jited = 0; 1180 1181 atomic_set(&prog->aux->refcnt, 1); 1182 prog->gpl_compatible = is_gpl ? 1 : 0; 1183 1184 if (attr->prog_target_ifindex) { 1185 err = bpf_prog_offload_init(prog, attr); 1186 if (err) 1187 goto free_prog; 1188 } 1189 1190 /* find program type: socket_filter vs tracing_filter */ 1191 err = find_prog_type(type, prog); 1192 if (err < 0) 1193 goto free_prog; 1194 1195 prog->aux->load_time = ktime_get_boot_ns(); 1196 err = bpf_obj_name_cpy(prog->aux->name, attr->prog_name); 1197 if (err) 1198 goto free_prog; 1199 1200 /* run eBPF verifier */ 1201 err = bpf_check(&prog, attr); 1202 if (err < 0) 1203 goto free_used_maps; 1204 1205 /* eBPF program is ready to be JITed */ 1206 prog = bpf_prog_select_runtime(prog, &err); 1207 if (err < 0) 1208 goto free_used_maps; 1209 1210 err = bpf_prog_alloc_id(prog); 1211 if (err) 1212 goto free_used_maps; 1213 1214 err = bpf_prog_new_fd(prog); 1215 if (err < 0) { 1216 /* failed to allocate fd. 1217 * bpf_prog_put() is needed because the above 1218 * bpf_prog_alloc_id() has published the prog 1219 * to the userspace and the userspace may 1220 * have refcnt-ed it through BPF_PROG_GET_FD_BY_ID. 1221 */ 1222 bpf_prog_put(prog); 1223 return err; 1224 } 1225 1226 bpf_prog_kallsyms_add(prog); 1227 trace_bpf_prog_load(prog, err); 1228 return err; 1229 1230 free_used_maps: 1231 free_used_maps(prog->aux); 1232 free_prog: 1233 bpf_prog_uncharge_memlock(prog); 1234 free_prog_sec: 1235 security_bpf_prog_free(prog->aux); 1236 free_prog_nouncharge: 1237 bpf_prog_free(prog); 1238 return err; 1239 } 1240 1241 #define BPF_OBJ_LAST_FIELD file_flags 1242 1243 static int bpf_obj_pin(const union bpf_attr *attr) 1244 { 1245 if (CHECK_ATTR(BPF_OBJ) || attr->file_flags != 0) 1246 return -EINVAL; 1247 1248 return bpf_obj_pin_user(attr->bpf_fd, u64_to_user_ptr(attr->pathname)); 1249 } 1250 1251 static int bpf_obj_get(const union bpf_attr *attr) 1252 { 1253 if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0 || 1254 attr->file_flags & ~BPF_OBJ_FLAG_MASK) 1255 return -EINVAL; 1256 1257 return bpf_obj_get_user(u64_to_user_ptr(attr->pathname), 1258 attr->file_flags); 1259 } 1260 1261 #ifdef CONFIG_CGROUP_BPF 1262 1263 #define BPF_PROG_ATTACH_LAST_FIELD attach_flags 1264 1265 static int sockmap_get_from_fd(const union bpf_attr *attr, bool attach) 1266 { 1267 struct bpf_prog *prog = NULL; 1268 int ufd = attr->target_fd; 1269 struct bpf_map *map; 1270 struct fd f; 1271 int err; 1272 1273 f = fdget(ufd); 1274 map = __bpf_map_get(f); 1275 if (IS_ERR(map)) 1276 return PTR_ERR(map); 1277 1278 if (attach) { 1279 prog = bpf_prog_get_type(attr->attach_bpf_fd, 1280 BPF_PROG_TYPE_SK_SKB); 1281 if (IS_ERR(prog)) { 1282 fdput(f); 1283 return PTR_ERR(prog); 1284 } 1285 } 1286 1287 err = sock_map_prog(map, prog, attr->attach_type); 1288 if (err) { 1289 fdput(f); 1290 if (prog) 1291 bpf_prog_put(prog); 1292 return err; 1293 } 1294 1295 fdput(f); 1296 return 0; 1297 } 1298 1299 #define BPF_F_ATTACH_MASK \ 1300 (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI) 1301 1302 static int bpf_prog_attach(const union bpf_attr *attr) 1303 { 1304 enum bpf_prog_type ptype; 1305 struct bpf_prog *prog; 1306 struct cgroup *cgrp; 1307 int ret; 1308 1309 if (!capable(CAP_NET_ADMIN)) 1310 return -EPERM; 1311 1312 if (CHECK_ATTR(BPF_PROG_ATTACH)) 1313 return -EINVAL; 1314 1315 if (attr->attach_flags & ~BPF_F_ATTACH_MASK) 1316 return -EINVAL; 1317 1318 switch (attr->attach_type) { 1319 case BPF_CGROUP_INET_INGRESS: 1320 case BPF_CGROUP_INET_EGRESS: 1321 ptype = BPF_PROG_TYPE_CGROUP_SKB; 1322 break; 1323 case BPF_CGROUP_INET_SOCK_CREATE: 1324 ptype = BPF_PROG_TYPE_CGROUP_SOCK; 1325 break; 1326 case BPF_CGROUP_SOCK_OPS: 1327 ptype = BPF_PROG_TYPE_SOCK_OPS; 1328 break; 1329 case BPF_CGROUP_DEVICE: 1330 ptype = BPF_PROG_TYPE_CGROUP_DEVICE; 1331 break; 1332 case BPF_SK_SKB_STREAM_PARSER: 1333 case BPF_SK_SKB_STREAM_VERDICT: 1334 return sockmap_get_from_fd(attr, true); 1335 default: 1336 return -EINVAL; 1337 } 1338 1339 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); 1340 if (IS_ERR(prog)) 1341 return PTR_ERR(prog); 1342 1343 cgrp = cgroup_get_from_fd(attr->target_fd); 1344 if (IS_ERR(cgrp)) { 1345 bpf_prog_put(prog); 1346 return PTR_ERR(cgrp); 1347 } 1348 1349 ret = cgroup_bpf_attach(cgrp, prog, attr->attach_type, 1350 attr->attach_flags); 1351 if (ret) 1352 bpf_prog_put(prog); 1353 cgroup_put(cgrp); 1354 1355 return ret; 1356 } 1357 1358 #define BPF_PROG_DETACH_LAST_FIELD attach_type 1359 1360 static int bpf_prog_detach(const union bpf_attr *attr) 1361 { 1362 enum bpf_prog_type ptype; 1363 struct bpf_prog *prog; 1364 struct cgroup *cgrp; 1365 int ret; 1366 1367 if (!capable(CAP_NET_ADMIN)) 1368 return -EPERM; 1369 1370 if (CHECK_ATTR(BPF_PROG_DETACH)) 1371 return -EINVAL; 1372 1373 switch (attr->attach_type) { 1374 case BPF_CGROUP_INET_INGRESS: 1375 case BPF_CGROUP_INET_EGRESS: 1376 ptype = BPF_PROG_TYPE_CGROUP_SKB; 1377 break; 1378 case BPF_CGROUP_INET_SOCK_CREATE: 1379 ptype = BPF_PROG_TYPE_CGROUP_SOCK; 1380 break; 1381 case BPF_CGROUP_SOCK_OPS: 1382 ptype = BPF_PROG_TYPE_SOCK_OPS; 1383 break; 1384 case BPF_CGROUP_DEVICE: 1385 ptype = BPF_PROG_TYPE_CGROUP_DEVICE; 1386 break; 1387 case BPF_SK_SKB_STREAM_PARSER: 1388 case BPF_SK_SKB_STREAM_VERDICT: 1389 return sockmap_get_from_fd(attr, false); 1390 default: 1391 return -EINVAL; 1392 } 1393 1394 cgrp = cgroup_get_from_fd(attr->target_fd); 1395 if (IS_ERR(cgrp)) 1396 return PTR_ERR(cgrp); 1397 1398 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); 1399 if (IS_ERR(prog)) 1400 prog = NULL; 1401 1402 ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type, 0); 1403 if (prog) 1404 bpf_prog_put(prog); 1405 cgroup_put(cgrp); 1406 return ret; 1407 } 1408 1409 #define BPF_PROG_QUERY_LAST_FIELD query.prog_cnt 1410 1411 static int bpf_prog_query(const union bpf_attr *attr, 1412 union bpf_attr __user *uattr) 1413 { 1414 struct cgroup *cgrp; 1415 int ret; 1416 1417 if (!capable(CAP_NET_ADMIN)) 1418 return -EPERM; 1419 if (CHECK_ATTR(BPF_PROG_QUERY)) 1420 return -EINVAL; 1421 if (attr->query.query_flags & ~BPF_F_QUERY_EFFECTIVE) 1422 return -EINVAL; 1423 1424 switch (attr->query.attach_type) { 1425 case BPF_CGROUP_INET_INGRESS: 1426 case BPF_CGROUP_INET_EGRESS: 1427 case BPF_CGROUP_INET_SOCK_CREATE: 1428 case BPF_CGROUP_SOCK_OPS: 1429 case BPF_CGROUP_DEVICE: 1430 break; 1431 default: 1432 return -EINVAL; 1433 } 1434 cgrp = cgroup_get_from_fd(attr->query.target_fd); 1435 if (IS_ERR(cgrp)) 1436 return PTR_ERR(cgrp); 1437 ret = cgroup_bpf_query(cgrp, attr, uattr); 1438 cgroup_put(cgrp); 1439 return ret; 1440 } 1441 #endif /* CONFIG_CGROUP_BPF */ 1442 1443 #define BPF_PROG_TEST_RUN_LAST_FIELD test.duration 1444 1445 static int bpf_prog_test_run(const union bpf_attr *attr, 1446 union bpf_attr __user *uattr) 1447 { 1448 struct bpf_prog *prog; 1449 int ret = -ENOTSUPP; 1450 1451 if (CHECK_ATTR(BPF_PROG_TEST_RUN)) 1452 return -EINVAL; 1453 1454 prog = bpf_prog_get(attr->test.prog_fd); 1455 if (IS_ERR(prog)) 1456 return PTR_ERR(prog); 1457 1458 if (prog->aux->ops->test_run) 1459 ret = prog->aux->ops->test_run(prog, attr, uattr); 1460 1461 bpf_prog_put(prog); 1462 return ret; 1463 } 1464 1465 #define BPF_OBJ_GET_NEXT_ID_LAST_FIELD next_id 1466 1467 static int bpf_obj_get_next_id(const union bpf_attr *attr, 1468 union bpf_attr __user *uattr, 1469 struct idr *idr, 1470 spinlock_t *lock) 1471 { 1472 u32 next_id = attr->start_id; 1473 int err = 0; 1474 1475 if (CHECK_ATTR(BPF_OBJ_GET_NEXT_ID) || next_id >= INT_MAX) 1476 return -EINVAL; 1477 1478 if (!capable(CAP_SYS_ADMIN)) 1479 return -EPERM; 1480 1481 next_id++; 1482 spin_lock_bh(lock); 1483 if (!idr_get_next(idr, &next_id)) 1484 err = -ENOENT; 1485 spin_unlock_bh(lock); 1486 1487 if (!err) 1488 err = put_user(next_id, &uattr->next_id); 1489 1490 return err; 1491 } 1492 1493 #define BPF_PROG_GET_FD_BY_ID_LAST_FIELD prog_id 1494 1495 static int bpf_prog_get_fd_by_id(const union bpf_attr *attr) 1496 { 1497 struct bpf_prog *prog; 1498 u32 id = attr->prog_id; 1499 int fd; 1500 1501 if (CHECK_ATTR(BPF_PROG_GET_FD_BY_ID)) 1502 return -EINVAL; 1503 1504 if (!capable(CAP_SYS_ADMIN)) 1505 return -EPERM; 1506 1507 spin_lock_bh(&prog_idr_lock); 1508 prog = idr_find(&prog_idr, id); 1509 if (prog) 1510 prog = bpf_prog_inc_not_zero(prog); 1511 else 1512 prog = ERR_PTR(-ENOENT); 1513 spin_unlock_bh(&prog_idr_lock); 1514 1515 if (IS_ERR(prog)) 1516 return PTR_ERR(prog); 1517 1518 fd = bpf_prog_new_fd(prog); 1519 if (fd < 0) 1520 bpf_prog_put(prog); 1521 1522 return fd; 1523 } 1524 1525 #define BPF_MAP_GET_FD_BY_ID_LAST_FIELD open_flags 1526 1527 static int bpf_map_get_fd_by_id(const union bpf_attr *attr) 1528 { 1529 struct bpf_map *map; 1530 u32 id = attr->map_id; 1531 int f_flags; 1532 int fd; 1533 1534 if (CHECK_ATTR(BPF_MAP_GET_FD_BY_ID) || 1535 attr->open_flags & ~BPF_OBJ_FLAG_MASK) 1536 return -EINVAL; 1537 1538 if (!capable(CAP_SYS_ADMIN)) 1539 return -EPERM; 1540 1541 f_flags = bpf_get_file_flag(attr->open_flags); 1542 if (f_flags < 0) 1543 return f_flags; 1544 1545 spin_lock_bh(&map_idr_lock); 1546 map = idr_find(&map_idr, id); 1547 if (map) 1548 map = bpf_map_inc_not_zero(map, true); 1549 else 1550 map = ERR_PTR(-ENOENT); 1551 spin_unlock_bh(&map_idr_lock); 1552 1553 if (IS_ERR(map)) 1554 return PTR_ERR(map); 1555 1556 fd = bpf_map_new_fd(map, f_flags); 1557 if (fd < 0) 1558 bpf_map_put(map); 1559 1560 return fd; 1561 } 1562 1563 static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, 1564 const union bpf_attr *attr, 1565 union bpf_attr __user *uattr) 1566 { 1567 struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info); 1568 struct bpf_prog_info info = {}; 1569 u32 info_len = attr->info.info_len; 1570 char __user *uinsns; 1571 u32 ulen; 1572 int err; 1573 1574 err = check_uarg_tail_zero(uinfo, sizeof(info), info_len); 1575 if (err) 1576 return err; 1577 info_len = min_t(u32, sizeof(info), info_len); 1578 1579 if (copy_from_user(&info, uinfo, info_len)) 1580 return -EFAULT; 1581 1582 info.type = prog->type; 1583 info.id = prog->aux->id; 1584 info.load_time = prog->aux->load_time; 1585 info.created_by_uid = from_kuid_munged(current_user_ns(), 1586 prog->aux->user->uid); 1587 1588 memcpy(info.tag, prog->tag, sizeof(prog->tag)); 1589 memcpy(info.name, prog->aux->name, sizeof(prog->aux->name)); 1590 1591 ulen = info.nr_map_ids; 1592 info.nr_map_ids = prog->aux->used_map_cnt; 1593 ulen = min_t(u32, info.nr_map_ids, ulen); 1594 if (ulen) { 1595 u32 __user *user_map_ids = u64_to_user_ptr(info.map_ids); 1596 u32 i; 1597 1598 for (i = 0; i < ulen; i++) 1599 if (put_user(prog->aux->used_maps[i]->id, 1600 &user_map_ids[i])) 1601 return -EFAULT; 1602 } 1603 1604 if (!capable(CAP_SYS_ADMIN)) { 1605 info.jited_prog_len = 0; 1606 info.xlated_prog_len = 0; 1607 goto done; 1608 } 1609 1610 ulen = info.jited_prog_len; 1611 info.jited_prog_len = prog->jited_len; 1612 if (info.jited_prog_len && ulen) { 1613 uinsns = u64_to_user_ptr(info.jited_prog_insns); 1614 ulen = min_t(u32, info.jited_prog_len, ulen); 1615 if (copy_to_user(uinsns, prog->bpf_func, ulen)) 1616 return -EFAULT; 1617 } 1618 1619 ulen = info.xlated_prog_len; 1620 info.xlated_prog_len = bpf_prog_insn_size(prog); 1621 if (info.xlated_prog_len && ulen) { 1622 uinsns = u64_to_user_ptr(info.xlated_prog_insns); 1623 ulen = min_t(u32, info.xlated_prog_len, ulen); 1624 if (copy_to_user(uinsns, prog->insnsi, ulen)) 1625 return -EFAULT; 1626 } 1627 1628 if (bpf_prog_is_dev_bound(prog->aux)) { 1629 info.status |= BPF_PROG_STATUS_DEV_BOUND; 1630 info.ifindex = bpf_prog_offload_ifindex(prog); 1631 } 1632 1633 done: 1634 if (copy_to_user(uinfo, &info, info_len) || 1635 put_user(info_len, &uattr->info.info_len)) 1636 return -EFAULT; 1637 1638 return 0; 1639 } 1640 1641 static int bpf_map_get_info_by_fd(struct bpf_map *map, 1642 const union bpf_attr *attr, 1643 union bpf_attr __user *uattr) 1644 { 1645 struct bpf_map_info __user *uinfo = u64_to_user_ptr(attr->info.info); 1646 struct bpf_map_info info = {}; 1647 u32 info_len = attr->info.info_len; 1648 int err; 1649 1650 err = check_uarg_tail_zero(uinfo, sizeof(info), info_len); 1651 if (err) 1652 return err; 1653 info_len = min_t(u32, sizeof(info), info_len); 1654 1655 info.type = map->map_type; 1656 info.id = map->id; 1657 info.key_size = map->key_size; 1658 info.value_size = map->value_size; 1659 info.max_entries = map->max_entries; 1660 info.map_flags = map->map_flags; 1661 memcpy(info.name, map->name, sizeof(map->name)); 1662 1663 if (copy_to_user(uinfo, &info, info_len) || 1664 put_user(info_len, &uattr->info.info_len)) 1665 return -EFAULT; 1666 1667 return 0; 1668 } 1669 1670 #define BPF_OBJ_GET_INFO_BY_FD_LAST_FIELD info.info 1671 1672 static int bpf_obj_get_info_by_fd(const union bpf_attr *attr, 1673 union bpf_attr __user *uattr) 1674 { 1675 int ufd = attr->info.bpf_fd; 1676 struct fd f; 1677 int err; 1678 1679 if (CHECK_ATTR(BPF_OBJ_GET_INFO_BY_FD)) 1680 return -EINVAL; 1681 1682 f = fdget(ufd); 1683 if (!f.file) 1684 return -EBADFD; 1685 1686 if (f.file->f_op == &bpf_prog_fops) 1687 err = bpf_prog_get_info_by_fd(f.file->private_data, attr, 1688 uattr); 1689 else if (f.file->f_op == &bpf_map_fops) 1690 err = bpf_map_get_info_by_fd(f.file->private_data, attr, 1691 uattr); 1692 else 1693 err = -EINVAL; 1694 1695 fdput(f); 1696 return err; 1697 } 1698 1699 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) 1700 { 1701 union bpf_attr attr = {}; 1702 int err; 1703 1704 if (!capable(CAP_SYS_ADMIN) && sysctl_unprivileged_bpf_disabled) 1705 return -EPERM; 1706 1707 err = check_uarg_tail_zero(uattr, sizeof(attr), size); 1708 if (err) 1709 return err; 1710 size = min_t(u32, size, sizeof(attr)); 1711 1712 /* copy attributes from user space, may be less than sizeof(bpf_attr) */ 1713 if (copy_from_user(&attr, uattr, size) != 0) 1714 return -EFAULT; 1715 1716 err = security_bpf(cmd, &attr, size); 1717 if (err < 0) 1718 return err; 1719 1720 switch (cmd) { 1721 case BPF_MAP_CREATE: 1722 err = map_create(&attr); 1723 break; 1724 case BPF_MAP_LOOKUP_ELEM: 1725 err = map_lookup_elem(&attr); 1726 break; 1727 case BPF_MAP_UPDATE_ELEM: 1728 err = map_update_elem(&attr); 1729 break; 1730 case BPF_MAP_DELETE_ELEM: 1731 err = map_delete_elem(&attr); 1732 break; 1733 case BPF_MAP_GET_NEXT_KEY: 1734 err = map_get_next_key(&attr); 1735 break; 1736 case BPF_PROG_LOAD: 1737 err = bpf_prog_load(&attr); 1738 break; 1739 case BPF_OBJ_PIN: 1740 err = bpf_obj_pin(&attr); 1741 break; 1742 case BPF_OBJ_GET: 1743 err = bpf_obj_get(&attr); 1744 break; 1745 #ifdef CONFIG_CGROUP_BPF 1746 case BPF_PROG_ATTACH: 1747 err = bpf_prog_attach(&attr); 1748 break; 1749 case BPF_PROG_DETACH: 1750 err = bpf_prog_detach(&attr); 1751 break; 1752 case BPF_PROG_QUERY: 1753 err = bpf_prog_query(&attr, uattr); 1754 break; 1755 #endif 1756 case BPF_PROG_TEST_RUN: 1757 err = bpf_prog_test_run(&attr, uattr); 1758 break; 1759 case BPF_PROG_GET_NEXT_ID: 1760 err = bpf_obj_get_next_id(&attr, uattr, 1761 &prog_idr, &prog_idr_lock); 1762 break; 1763 case BPF_MAP_GET_NEXT_ID: 1764 err = bpf_obj_get_next_id(&attr, uattr, 1765 &map_idr, &map_idr_lock); 1766 break; 1767 case BPF_PROG_GET_FD_BY_ID: 1768 err = bpf_prog_get_fd_by_id(&attr); 1769 break; 1770 case BPF_MAP_GET_FD_BY_ID: 1771 err = bpf_map_get_fd_by_id(&attr); 1772 break; 1773 case BPF_OBJ_GET_INFO_BY_FD: 1774 err = bpf_obj_get_info_by_fd(&attr, uattr); 1775 break; 1776 default: 1777 err = -EINVAL; 1778 break; 1779 } 1780 1781 return err; 1782 } 1783