1 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 2 * 3 * This program is free software; you can redistribute it and/or 4 * modify it under the terms of version 2 of the GNU General Public 5 * License as published by the Free Software Foundation. 6 * 7 * This program is distributed in the hope that it will be useful, but 8 * WITHOUT ANY WARRANTY; without even the implied warranty of 9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 10 * General Public License for more details. 11 */ 12 #include <linux/bpf.h> 13 #include <linux/bpf_trace.h> 14 #include <linux/syscalls.h> 15 #include <linux/slab.h> 16 #include <linux/vmalloc.h> 17 #include <linux/mmzone.h> 18 #include <linux/anon_inodes.h> 19 #include <linux/file.h> 20 #include <linux/license.h> 21 #include <linux/filter.h> 22 #include <linux/version.h> 23 #include <linux/kernel.h> 24 25 DEFINE_PER_CPU(int, bpf_prog_active); 26 27 int sysctl_unprivileged_bpf_disabled __read_mostly; 28 29 static LIST_HEAD(bpf_map_types); 30 31 static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) 32 { 33 struct bpf_map_type_list *tl; 34 struct bpf_map *map; 35 36 list_for_each_entry(tl, &bpf_map_types, list_node) { 37 if (tl->type == attr->map_type) { 38 map = tl->ops->map_alloc(attr); 39 if (IS_ERR(map)) 40 return map; 41 map->ops = tl->ops; 42 map->map_type = attr->map_type; 43 return map; 44 } 45 } 46 return ERR_PTR(-EINVAL); 47 } 48 49 /* boot time registration of different map implementations */ 50 void bpf_register_map_type(struct bpf_map_type_list *tl) 51 { 52 list_add(&tl->list_node, &bpf_map_types); 53 } 54 55 void *bpf_map_area_alloc(size_t size) 56 { 57 /* We definitely need __GFP_NORETRY, so OOM killer doesn't 58 * trigger under memory pressure as we really just want to 59 * fail instead. 60 */ 61 const gfp_t flags = __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO; 62 void *area; 63 64 if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { 65 area = kmalloc(size, GFP_USER | flags); 66 if (area != NULL) 67 return area; 68 } 69 70 return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | flags, 71 PAGE_KERNEL); 72 } 73 74 void bpf_map_area_free(void *area) 75 { 76 kvfree(area); 77 } 78 79 int bpf_map_precharge_memlock(u32 pages) 80 { 81 struct user_struct *user = get_current_user(); 82 unsigned long memlock_limit, cur; 83 84 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 85 cur = atomic_long_read(&user->locked_vm); 86 free_uid(user); 87 if (cur + pages > memlock_limit) 88 return -EPERM; 89 return 0; 90 } 91 92 static int bpf_map_charge_memlock(struct bpf_map *map) 93 { 94 struct user_struct *user = get_current_user(); 95 unsigned long memlock_limit; 96 97 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 98 99 atomic_long_add(map->pages, &user->locked_vm); 100 101 if (atomic_long_read(&user->locked_vm) > memlock_limit) { 102 atomic_long_sub(map->pages, &user->locked_vm); 103 free_uid(user); 104 return -EPERM; 105 } 106 map->user = user; 107 return 0; 108 } 109 110 static void bpf_map_uncharge_memlock(struct bpf_map *map) 111 { 112 struct user_struct *user = map->user; 113 114 atomic_long_sub(map->pages, &user->locked_vm); 115 free_uid(user); 116 } 117 118 /* called from workqueue */ 119 static void bpf_map_free_deferred(struct work_struct *work) 120 { 121 struct bpf_map *map = container_of(work, struct bpf_map, work); 122 123 bpf_map_uncharge_memlock(map); 124 /* implementation dependent freeing */ 125 map->ops->map_free(map); 126 } 127 128 static void bpf_map_put_uref(struct bpf_map *map) 129 { 130 if (atomic_dec_and_test(&map->usercnt)) { 131 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) 132 bpf_fd_array_map_clear(map); 133 } 134 } 135 136 /* decrement map refcnt and schedule it for freeing via workqueue 137 * (unrelying map implementation ops->map_free() might sleep) 138 */ 139 void bpf_map_put(struct bpf_map *map) 140 { 141 if (atomic_dec_and_test(&map->refcnt)) { 142 INIT_WORK(&map->work, bpf_map_free_deferred); 143 schedule_work(&map->work); 144 } 145 } 146 147 void bpf_map_put_with_uref(struct bpf_map *map) 148 { 149 bpf_map_put_uref(map); 150 bpf_map_put(map); 151 } 152 153 static int bpf_map_release(struct inode *inode, struct file *filp) 154 { 155 struct bpf_map *map = filp->private_data; 156 157 if (map->ops->map_release) 158 map->ops->map_release(map, filp); 159 160 bpf_map_put_with_uref(map); 161 return 0; 162 } 163 164 #ifdef CONFIG_PROC_FS 165 static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) 166 { 167 const struct bpf_map *map = filp->private_data; 168 const struct bpf_array *array; 169 u32 owner_prog_type = 0; 170 171 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) { 172 array = container_of(map, struct bpf_array, map); 173 owner_prog_type = array->owner_prog_type; 174 } 175 176 seq_printf(m, 177 "map_type:\t%u\n" 178 "key_size:\t%u\n" 179 "value_size:\t%u\n" 180 "max_entries:\t%u\n" 181 "map_flags:\t%#x\n" 182 "memlock:\t%llu\n", 183 map->map_type, 184 map->key_size, 185 map->value_size, 186 map->max_entries, 187 map->map_flags, 188 map->pages * 1ULL << PAGE_SHIFT); 189 190 if (owner_prog_type) 191 seq_printf(m, "owner_prog_type:\t%u\n", 192 owner_prog_type); 193 } 194 #endif 195 196 static const struct file_operations bpf_map_fops = { 197 #ifdef CONFIG_PROC_FS 198 .show_fdinfo = bpf_map_show_fdinfo, 199 #endif 200 .release = bpf_map_release, 201 }; 202 203 int bpf_map_new_fd(struct bpf_map *map) 204 { 205 return anon_inode_getfd("bpf-map", &bpf_map_fops, map, 206 O_RDWR | O_CLOEXEC); 207 } 208 209 /* helper macro to check that unused fields 'union bpf_attr' are zero */ 210 #define CHECK_ATTR(CMD) \ 211 memchr_inv((void *) &attr->CMD##_LAST_FIELD + \ 212 sizeof(attr->CMD##_LAST_FIELD), 0, \ 213 sizeof(*attr) - \ 214 offsetof(union bpf_attr, CMD##_LAST_FIELD) - \ 215 sizeof(attr->CMD##_LAST_FIELD)) != NULL 216 217 #define BPF_MAP_CREATE_LAST_FIELD map_flags 218 /* called via syscall */ 219 static int map_create(union bpf_attr *attr) 220 { 221 struct bpf_map *map; 222 int err; 223 224 err = CHECK_ATTR(BPF_MAP_CREATE); 225 if (err) 226 return -EINVAL; 227 228 /* find map type and init map: hashtable vs rbtree vs bloom vs ... */ 229 map = find_and_alloc_map(attr); 230 if (IS_ERR(map)) 231 return PTR_ERR(map); 232 233 atomic_set(&map->refcnt, 1); 234 atomic_set(&map->usercnt, 1); 235 236 err = bpf_map_charge_memlock(map); 237 if (err) 238 goto free_map_nouncharge; 239 240 err = bpf_map_new_fd(map); 241 if (err < 0) 242 /* failed to allocate fd */ 243 goto free_map; 244 245 trace_bpf_map_create(map, err); 246 return err; 247 248 free_map: 249 bpf_map_uncharge_memlock(map); 250 free_map_nouncharge: 251 map->ops->map_free(map); 252 return err; 253 } 254 255 /* if error is returned, fd is released. 256 * On success caller should complete fd access with matching fdput() 257 */ 258 struct bpf_map *__bpf_map_get(struct fd f) 259 { 260 if (!f.file) 261 return ERR_PTR(-EBADF); 262 if (f.file->f_op != &bpf_map_fops) { 263 fdput(f); 264 return ERR_PTR(-EINVAL); 265 } 266 267 return f.file->private_data; 268 } 269 270 /* prog's and map's refcnt limit */ 271 #define BPF_MAX_REFCNT 32768 272 273 struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref) 274 { 275 if (atomic_inc_return(&map->refcnt) > BPF_MAX_REFCNT) { 276 atomic_dec(&map->refcnt); 277 return ERR_PTR(-EBUSY); 278 } 279 if (uref) 280 atomic_inc(&map->usercnt); 281 return map; 282 } 283 284 struct bpf_map *bpf_map_get_with_uref(u32 ufd) 285 { 286 struct fd f = fdget(ufd); 287 struct bpf_map *map; 288 289 map = __bpf_map_get(f); 290 if (IS_ERR(map)) 291 return map; 292 293 map = bpf_map_inc(map, true); 294 fdput(f); 295 296 return map; 297 } 298 299 int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value) 300 { 301 return -ENOTSUPP; 302 } 303 304 /* last field in 'union bpf_attr' used by this command */ 305 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value 306 307 static int map_lookup_elem(union bpf_attr *attr) 308 { 309 void __user *ukey = u64_to_user_ptr(attr->key); 310 void __user *uvalue = u64_to_user_ptr(attr->value); 311 int ufd = attr->map_fd; 312 struct bpf_map *map; 313 void *key, *value, *ptr; 314 u32 value_size; 315 struct fd f; 316 int err; 317 318 if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM)) 319 return -EINVAL; 320 321 f = fdget(ufd); 322 map = __bpf_map_get(f); 323 if (IS_ERR(map)) 324 return PTR_ERR(map); 325 326 err = -ENOMEM; 327 key = kmalloc(map->key_size, GFP_USER); 328 if (!key) 329 goto err_put; 330 331 err = -EFAULT; 332 if (copy_from_user(key, ukey, map->key_size) != 0) 333 goto free_key; 334 335 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 336 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || 337 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) 338 value_size = round_up(map->value_size, 8) * num_possible_cpus(); 339 else 340 value_size = map->value_size; 341 342 err = -ENOMEM; 343 value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); 344 if (!value) 345 goto free_key; 346 347 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 348 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 349 err = bpf_percpu_hash_copy(map, key, value); 350 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 351 err = bpf_percpu_array_copy(map, key, value); 352 } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) { 353 err = bpf_stackmap_copy(map, key, value); 354 } else { 355 rcu_read_lock(); 356 ptr = map->ops->map_lookup_elem(map, key); 357 if (ptr) 358 memcpy(value, ptr, value_size); 359 rcu_read_unlock(); 360 err = ptr ? 0 : -ENOENT; 361 } 362 363 if (err) 364 goto free_value; 365 366 err = -EFAULT; 367 if (copy_to_user(uvalue, value, value_size) != 0) 368 goto free_value; 369 370 trace_bpf_map_lookup_elem(map, ufd, key, value); 371 err = 0; 372 373 free_value: 374 kfree(value); 375 free_key: 376 kfree(key); 377 err_put: 378 fdput(f); 379 return err; 380 } 381 382 #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags 383 384 static int map_update_elem(union bpf_attr *attr) 385 { 386 void __user *ukey = u64_to_user_ptr(attr->key); 387 void __user *uvalue = u64_to_user_ptr(attr->value); 388 int ufd = attr->map_fd; 389 struct bpf_map *map; 390 void *key, *value; 391 u32 value_size; 392 struct fd f; 393 int err; 394 395 if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM)) 396 return -EINVAL; 397 398 f = fdget(ufd); 399 map = __bpf_map_get(f); 400 if (IS_ERR(map)) 401 return PTR_ERR(map); 402 403 err = -ENOMEM; 404 key = kmalloc(map->key_size, GFP_USER); 405 if (!key) 406 goto err_put; 407 408 err = -EFAULT; 409 if (copy_from_user(key, ukey, map->key_size) != 0) 410 goto free_key; 411 412 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 413 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || 414 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) 415 value_size = round_up(map->value_size, 8) * num_possible_cpus(); 416 else 417 value_size = map->value_size; 418 419 err = -ENOMEM; 420 value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); 421 if (!value) 422 goto free_key; 423 424 err = -EFAULT; 425 if (copy_from_user(value, uvalue, value_size) != 0) 426 goto free_value; 427 428 /* must increment bpf_prog_active to avoid kprobe+bpf triggering from 429 * inside bpf map update or delete otherwise deadlocks are possible 430 */ 431 preempt_disable(); 432 __this_cpu_inc(bpf_prog_active); 433 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 434 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 435 err = bpf_percpu_hash_update(map, key, value, attr->flags); 436 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 437 err = bpf_percpu_array_update(map, key, value, attr->flags); 438 } else if (map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || 439 map->map_type == BPF_MAP_TYPE_PROG_ARRAY || 440 map->map_type == BPF_MAP_TYPE_CGROUP_ARRAY) { 441 rcu_read_lock(); 442 err = bpf_fd_array_map_update_elem(map, f.file, key, value, 443 attr->flags); 444 rcu_read_unlock(); 445 } else { 446 rcu_read_lock(); 447 err = map->ops->map_update_elem(map, key, value, attr->flags); 448 rcu_read_unlock(); 449 } 450 __this_cpu_dec(bpf_prog_active); 451 preempt_enable(); 452 453 if (!err) 454 trace_bpf_map_update_elem(map, ufd, key, value); 455 free_value: 456 kfree(value); 457 free_key: 458 kfree(key); 459 err_put: 460 fdput(f); 461 return err; 462 } 463 464 #define BPF_MAP_DELETE_ELEM_LAST_FIELD key 465 466 static int map_delete_elem(union bpf_attr *attr) 467 { 468 void __user *ukey = u64_to_user_ptr(attr->key); 469 int ufd = attr->map_fd; 470 struct bpf_map *map; 471 struct fd f; 472 void *key; 473 int err; 474 475 if (CHECK_ATTR(BPF_MAP_DELETE_ELEM)) 476 return -EINVAL; 477 478 f = fdget(ufd); 479 map = __bpf_map_get(f); 480 if (IS_ERR(map)) 481 return PTR_ERR(map); 482 483 err = -ENOMEM; 484 key = kmalloc(map->key_size, GFP_USER); 485 if (!key) 486 goto err_put; 487 488 err = -EFAULT; 489 if (copy_from_user(key, ukey, map->key_size) != 0) 490 goto free_key; 491 492 preempt_disable(); 493 __this_cpu_inc(bpf_prog_active); 494 rcu_read_lock(); 495 err = map->ops->map_delete_elem(map, key); 496 rcu_read_unlock(); 497 __this_cpu_dec(bpf_prog_active); 498 preempt_enable(); 499 500 if (!err) 501 trace_bpf_map_delete_elem(map, ufd, key); 502 free_key: 503 kfree(key); 504 err_put: 505 fdput(f); 506 return err; 507 } 508 509 /* last field in 'union bpf_attr' used by this command */ 510 #define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key 511 512 static int map_get_next_key(union bpf_attr *attr) 513 { 514 void __user *ukey = u64_to_user_ptr(attr->key); 515 void __user *unext_key = u64_to_user_ptr(attr->next_key); 516 int ufd = attr->map_fd; 517 struct bpf_map *map; 518 void *key, *next_key; 519 struct fd f; 520 int err; 521 522 if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY)) 523 return -EINVAL; 524 525 f = fdget(ufd); 526 map = __bpf_map_get(f); 527 if (IS_ERR(map)) 528 return PTR_ERR(map); 529 530 err = -ENOMEM; 531 key = kmalloc(map->key_size, GFP_USER); 532 if (!key) 533 goto err_put; 534 535 err = -EFAULT; 536 if (copy_from_user(key, ukey, map->key_size) != 0) 537 goto free_key; 538 539 err = -ENOMEM; 540 next_key = kmalloc(map->key_size, GFP_USER); 541 if (!next_key) 542 goto free_key; 543 544 rcu_read_lock(); 545 err = map->ops->map_get_next_key(map, key, next_key); 546 rcu_read_unlock(); 547 if (err) 548 goto free_next_key; 549 550 err = -EFAULT; 551 if (copy_to_user(unext_key, next_key, map->key_size) != 0) 552 goto free_next_key; 553 554 trace_bpf_map_next_key(map, ufd, key, next_key); 555 err = 0; 556 557 free_next_key: 558 kfree(next_key); 559 free_key: 560 kfree(key); 561 err_put: 562 fdput(f); 563 return err; 564 } 565 566 static LIST_HEAD(bpf_prog_types); 567 568 static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog) 569 { 570 struct bpf_prog_type_list *tl; 571 572 list_for_each_entry(tl, &bpf_prog_types, list_node) { 573 if (tl->type == type) { 574 prog->aux->ops = tl->ops; 575 prog->type = type; 576 return 0; 577 } 578 } 579 580 return -EINVAL; 581 } 582 583 void bpf_register_prog_type(struct bpf_prog_type_list *tl) 584 { 585 list_add(&tl->list_node, &bpf_prog_types); 586 } 587 588 /* fixup insn->imm field of bpf_call instructions: 589 * if (insn->imm == BPF_FUNC_map_lookup_elem) 590 * insn->imm = bpf_map_lookup_elem - __bpf_call_base; 591 * else if (insn->imm == BPF_FUNC_map_update_elem) 592 * insn->imm = bpf_map_update_elem - __bpf_call_base; 593 * else ... 594 * 595 * this function is called after eBPF program passed verification 596 */ 597 static void fixup_bpf_calls(struct bpf_prog *prog) 598 { 599 const struct bpf_func_proto *fn; 600 int i; 601 602 for (i = 0; i < prog->len; i++) { 603 struct bpf_insn *insn = &prog->insnsi[i]; 604 605 if (insn->code == (BPF_JMP | BPF_CALL)) { 606 /* we reach here when program has bpf_call instructions 607 * and it passed bpf_check(), means that 608 * ops->get_func_proto must have been supplied, check it 609 */ 610 BUG_ON(!prog->aux->ops->get_func_proto); 611 612 if (insn->imm == BPF_FUNC_get_route_realm) 613 prog->dst_needed = 1; 614 if (insn->imm == BPF_FUNC_get_prandom_u32) 615 bpf_user_rnd_init_once(); 616 if (insn->imm == BPF_FUNC_xdp_adjust_head) 617 prog->xdp_adjust_head = 1; 618 if (insn->imm == BPF_FUNC_tail_call) { 619 /* mark bpf_tail_call as different opcode 620 * to avoid conditional branch in 621 * interpeter for every normal call 622 * and to prevent accidental JITing by 623 * JIT compiler that doesn't support 624 * bpf_tail_call yet 625 */ 626 insn->imm = 0; 627 insn->code |= BPF_X; 628 continue; 629 } 630 631 fn = prog->aux->ops->get_func_proto(insn->imm); 632 /* all functions that have prototype and verifier allowed 633 * programs to call them, must be real in-kernel functions 634 */ 635 BUG_ON(!fn->func); 636 insn->imm = fn->func - __bpf_call_base; 637 } 638 } 639 } 640 641 /* drop refcnt on maps used by eBPF program and free auxilary data */ 642 static void free_used_maps(struct bpf_prog_aux *aux) 643 { 644 int i; 645 646 for (i = 0; i < aux->used_map_cnt; i++) 647 bpf_map_put(aux->used_maps[i]); 648 649 kfree(aux->used_maps); 650 } 651 652 int __bpf_prog_charge(struct user_struct *user, u32 pages) 653 { 654 unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 655 unsigned long user_bufs; 656 657 if (user) { 658 user_bufs = atomic_long_add_return(pages, &user->locked_vm); 659 if (user_bufs > memlock_limit) { 660 atomic_long_sub(pages, &user->locked_vm); 661 return -EPERM; 662 } 663 } 664 665 return 0; 666 } 667 668 void __bpf_prog_uncharge(struct user_struct *user, u32 pages) 669 { 670 if (user) 671 atomic_long_sub(pages, &user->locked_vm); 672 } 673 674 static int bpf_prog_charge_memlock(struct bpf_prog *prog) 675 { 676 struct user_struct *user = get_current_user(); 677 int ret; 678 679 ret = __bpf_prog_charge(user, prog->pages); 680 if (ret) { 681 free_uid(user); 682 return ret; 683 } 684 685 prog->aux->user = user; 686 return 0; 687 } 688 689 static void bpf_prog_uncharge_memlock(struct bpf_prog *prog) 690 { 691 struct user_struct *user = prog->aux->user; 692 693 __bpf_prog_uncharge(user, prog->pages); 694 free_uid(user); 695 } 696 697 static void __bpf_prog_put_rcu(struct rcu_head *rcu) 698 { 699 struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu); 700 701 free_used_maps(aux); 702 bpf_prog_uncharge_memlock(aux->prog); 703 bpf_prog_free(aux->prog); 704 } 705 706 void bpf_prog_put(struct bpf_prog *prog) 707 { 708 if (atomic_dec_and_test(&prog->aux->refcnt)) { 709 trace_bpf_prog_put_rcu(prog); 710 bpf_prog_kallsyms_del(prog); 711 call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu); 712 } 713 } 714 EXPORT_SYMBOL_GPL(bpf_prog_put); 715 716 static int bpf_prog_release(struct inode *inode, struct file *filp) 717 { 718 struct bpf_prog *prog = filp->private_data; 719 720 bpf_prog_put(prog); 721 return 0; 722 } 723 724 #ifdef CONFIG_PROC_FS 725 static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) 726 { 727 const struct bpf_prog *prog = filp->private_data; 728 char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; 729 730 bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); 731 seq_printf(m, 732 "prog_type:\t%u\n" 733 "prog_jited:\t%u\n" 734 "prog_tag:\t%s\n" 735 "memlock:\t%llu\n", 736 prog->type, 737 prog->jited, 738 prog_tag, 739 prog->pages * 1ULL << PAGE_SHIFT); 740 } 741 #endif 742 743 static const struct file_operations bpf_prog_fops = { 744 #ifdef CONFIG_PROC_FS 745 .show_fdinfo = bpf_prog_show_fdinfo, 746 #endif 747 .release = bpf_prog_release, 748 }; 749 750 int bpf_prog_new_fd(struct bpf_prog *prog) 751 { 752 return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, 753 O_RDWR | O_CLOEXEC); 754 } 755 756 static struct bpf_prog *____bpf_prog_get(struct fd f) 757 { 758 if (!f.file) 759 return ERR_PTR(-EBADF); 760 if (f.file->f_op != &bpf_prog_fops) { 761 fdput(f); 762 return ERR_PTR(-EINVAL); 763 } 764 765 return f.file->private_data; 766 } 767 768 struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i) 769 { 770 if (atomic_add_return(i, &prog->aux->refcnt) > BPF_MAX_REFCNT) { 771 atomic_sub(i, &prog->aux->refcnt); 772 return ERR_PTR(-EBUSY); 773 } 774 return prog; 775 } 776 EXPORT_SYMBOL_GPL(bpf_prog_add); 777 778 void bpf_prog_sub(struct bpf_prog *prog, int i) 779 { 780 /* Only to be used for undoing previous bpf_prog_add() in some 781 * error path. We still know that another entity in our call 782 * path holds a reference to the program, thus atomic_sub() can 783 * be safely used in such cases! 784 */ 785 WARN_ON(atomic_sub_return(i, &prog->aux->refcnt) == 0); 786 } 787 EXPORT_SYMBOL_GPL(bpf_prog_sub); 788 789 struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog) 790 { 791 return bpf_prog_add(prog, 1); 792 } 793 EXPORT_SYMBOL_GPL(bpf_prog_inc); 794 795 static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *type) 796 { 797 struct fd f = fdget(ufd); 798 struct bpf_prog *prog; 799 800 prog = ____bpf_prog_get(f); 801 if (IS_ERR(prog)) 802 return prog; 803 if (type && prog->type != *type) { 804 prog = ERR_PTR(-EINVAL); 805 goto out; 806 } 807 808 prog = bpf_prog_inc(prog); 809 out: 810 fdput(f); 811 return prog; 812 } 813 814 struct bpf_prog *bpf_prog_get(u32 ufd) 815 { 816 return __bpf_prog_get(ufd, NULL); 817 } 818 819 struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type) 820 { 821 struct bpf_prog *prog = __bpf_prog_get(ufd, &type); 822 823 if (!IS_ERR(prog)) 824 trace_bpf_prog_get_type(prog); 825 return prog; 826 } 827 EXPORT_SYMBOL_GPL(bpf_prog_get_type); 828 829 /* last field in 'union bpf_attr' used by this command */ 830 #define BPF_PROG_LOAD_LAST_FIELD kern_version 831 832 static int bpf_prog_load(union bpf_attr *attr) 833 { 834 enum bpf_prog_type type = attr->prog_type; 835 struct bpf_prog *prog; 836 int err; 837 char license[128]; 838 bool is_gpl; 839 840 if (CHECK_ATTR(BPF_PROG_LOAD)) 841 return -EINVAL; 842 843 /* copy eBPF program license from user space */ 844 if (strncpy_from_user(license, u64_to_user_ptr(attr->license), 845 sizeof(license) - 1) < 0) 846 return -EFAULT; 847 license[sizeof(license) - 1] = 0; 848 849 /* eBPF programs must be GPL compatible to use GPL-ed functions */ 850 is_gpl = license_is_gpl_compatible(license); 851 852 if (attr->insn_cnt == 0 || attr->insn_cnt > BPF_MAXINSNS) 853 return -E2BIG; 854 855 if (type == BPF_PROG_TYPE_KPROBE && 856 attr->kern_version != LINUX_VERSION_CODE) 857 return -EINVAL; 858 859 if (type != BPF_PROG_TYPE_SOCKET_FILTER && !capable(CAP_SYS_ADMIN)) 860 return -EPERM; 861 862 /* plain bpf_prog allocation */ 863 prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER); 864 if (!prog) 865 return -ENOMEM; 866 867 err = bpf_prog_charge_memlock(prog); 868 if (err) 869 goto free_prog_nouncharge; 870 871 prog->len = attr->insn_cnt; 872 873 err = -EFAULT; 874 if (copy_from_user(prog->insns, u64_to_user_ptr(attr->insns), 875 bpf_prog_insn_size(prog)) != 0) 876 goto free_prog; 877 878 prog->orig_prog = NULL; 879 prog->jited = 0; 880 881 atomic_set(&prog->aux->refcnt, 1); 882 prog->gpl_compatible = is_gpl ? 1 : 0; 883 884 /* find program type: socket_filter vs tracing_filter */ 885 err = find_prog_type(type, prog); 886 if (err < 0) 887 goto free_prog; 888 889 /* run eBPF verifier */ 890 err = bpf_check(&prog, attr); 891 if (err < 0) 892 goto free_used_maps; 893 894 /* fixup BPF_CALL->imm field */ 895 fixup_bpf_calls(prog); 896 897 /* eBPF program is ready to be JITed */ 898 prog = bpf_prog_select_runtime(prog, &err); 899 if (err < 0) 900 goto free_used_maps; 901 902 err = bpf_prog_new_fd(prog); 903 if (err < 0) 904 /* failed to allocate fd */ 905 goto free_used_maps; 906 907 bpf_prog_kallsyms_add(prog); 908 trace_bpf_prog_load(prog, err); 909 return err; 910 911 free_used_maps: 912 free_used_maps(prog->aux); 913 free_prog: 914 bpf_prog_uncharge_memlock(prog); 915 free_prog_nouncharge: 916 bpf_prog_free(prog); 917 return err; 918 } 919 920 #define BPF_OBJ_LAST_FIELD bpf_fd 921 922 static int bpf_obj_pin(const union bpf_attr *attr) 923 { 924 if (CHECK_ATTR(BPF_OBJ)) 925 return -EINVAL; 926 927 return bpf_obj_pin_user(attr->bpf_fd, u64_to_user_ptr(attr->pathname)); 928 } 929 930 static int bpf_obj_get(const union bpf_attr *attr) 931 { 932 if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0) 933 return -EINVAL; 934 935 return bpf_obj_get_user(u64_to_user_ptr(attr->pathname)); 936 } 937 938 #ifdef CONFIG_CGROUP_BPF 939 940 #define BPF_PROG_ATTACH_LAST_FIELD attach_flags 941 942 static int bpf_prog_attach(const union bpf_attr *attr) 943 { 944 enum bpf_prog_type ptype; 945 struct bpf_prog *prog; 946 struct cgroup *cgrp; 947 int ret; 948 949 if (!capable(CAP_NET_ADMIN)) 950 return -EPERM; 951 952 if (CHECK_ATTR(BPF_PROG_ATTACH)) 953 return -EINVAL; 954 955 if (attr->attach_flags & ~BPF_F_ALLOW_OVERRIDE) 956 return -EINVAL; 957 958 switch (attr->attach_type) { 959 case BPF_CGROUP_INET_INGRESS: 960 case BPF_CGROUP_INET_EGRESS: 961 ptype = BPF_PROG_TYPE_CGROUP_SKB; 962 break; 963 case BPF_CGROUP_INET_SOCK_CREATE: 964 ptype = BPF_PROG_TYPE_CGROUP_SOCK; 965 break; 966 default: 967 return -EINVAL; 968 } 969 970 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); 971 if (IS_ERR(prog)) 972 return PTR_ERR(prog); 973 974 cgrp = cgroup_get_from_fd(attr->target_fd); 975 if (IS_ERR(cgrp)) { 976 bpf_prog_put(prog); 977 return PTR_ERR(cgrp); 978 } 979 980 ret = cgroup_bpf_update(cgrp, prog, attr->attach_type, 981 attr->attach_flags & BPF_F_ALLOW_OVERRIDE); 982 if (ret) 983 bpf_prog_put(prog); 984 cgroup_put(cgrp); 985 986 return ret; 987 } 988 989 #define BPF_PROG_DETACH_LAST_FIELD attach_type 990 991 static int bpf_prog_detach(const union bpf_attr *attr) 992 { 993 struct cgroup *cgrp; 994 int ret; 995 996 if (!capable(CAP_NET_ADMIN)) 997 return -EPERM; 998 999 if (CHECK_ATTR(BPF_PROG_DETACH)) 1000 return -EINVAL; 1001 1002 switch (attr->attach_type) { 1003 case BPF_CGROUP_INET_INGRESS: 1004 case BPF_CGROUP_INET_EGRESS: 1005 case BPF_CGROUP_INET_SOCK_CREATE: 1006 cgrp = cgroup_get_from_fd(attr->target_fd); 1007 if (IS_ERR(cgrp)) 1008 return PTR_ERR(cgrp); 1009 1010 ret = cgroup_bpf_update(cgrp, NULL, attr->attach_type, false); 1011 cgroup_put(cgrp); 1012 break; 1013 1014 default: 1015 return -EINVAL; 1016 } 1017 1018 return ret; 1019 } 1020 #endif /* CONFIG_CGROUP_BPF */ 1021 1022 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) 1023 { 1024 union bpf_attr attr = {}; 1025 int err; 1026 1027 if (!capable(CAP_SYS_ADMIN) && sysctl_unprivileged_bpf_disabled) 1028 return -EPERM; 1029 1030 if (!access_ok(VERIFY_READ, uattr, 1)) 1031 return -EFAULT; 1032 1033 if (size > PAGE_SIZE) /* silly large */ 1034 return -E2BIG; 1035 1036 /* If we're handed a bigger struct than we know of, 1037 * ensure all the unknown bits are 0 - i.e. new 1038 * user-space does not rely on any kernel feature 1039 * extensions we dont know about yet. 1040 */ 1041 if (size > sizeof(attr)) { 1042 unsigned char __user *addr; 1043 unsigned char __user *end; 1044 unsigned char val; 1045 1046 addr = (void __user *)uattr + sizeof(attr); 1047 end = (void __user *)uattr + size; 1048 1049 for (; addr < end; addr++) { 1050 err = get_user(val, addr); 1051 if (err) 1052 return err; 1053 if (val) 1054 return -E2BIG; 1055 } 1056 size = sizeof(attr); 1057 } 1058 1059 /* copy attributes from user space, may be less than sizeof(bpf_attr) */ 1060 if (copy_from_user(&attr, uattr, size) != 0) 1061 return -EFAULT; 1062 1063 switch (cmd) { 1064 case BPF_MAP_CREATE: 1065 err = map_create(&attr); 1066 break; 1067 case BPF_MAP_LOOKUP_ELEM: 1068 err = map_lookup_elem(&attr); 1069 break; 1070 case BPF_MAP_UPDATE_ELEM: 1071 err = map_update_elem(&attr); 1072 break; 1073 case BPF_MAP_DELETE_ELEM: 1074 err = map_delete_elem(&attr); 1075 break; 1076 case BPF_MAP_GET_NEXT_KEY: 1077 err = map_get_next_key(&attr); 1078 break; 1079 case BPF_PROG_LOAD: 1080 err = bpf_prog_load(&attr); 1081 break; 1082 case BPF_OBJ_PIN: 1083 err = bpf_obj_pin(&attr); 1084 break; 1085 case BPF_OBJ_GET: 1086 err = bpf_obj_get(&attr); 1087 break; 1088 1089 #ifdef CONFIG_CGROUP_BPF 1090 case BPF_PROG_ATTACH: 1091 err = bpf_prog_attach(&attr); 1092 break; 1093 case BPF_PROG_DETACH: 1094 err = bpf_prog_detach(&attr); 1095 break; 1096 #endif 1097 1098 default: 1099 err = -EINVAL; 1100 break; 1101 } 1102 1103 return err; 1104 } 1105