1 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 2 * 3 * This program is free software; you can redistribute it and/or 4 * modify it under the terms of version 2 of the GNU General Public 5 * License as published by the Free Software Foundation. 6 * 7 * This program is distributed in the hope that it will be useful, but 8 * WITHOUT ANY WARRANTY; without even the implied warranty of 9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 10 * General Public License for more details. 11 */ 12 #include <linux/bpf.h> 13 #include <linux/syscalls.h> 14 #include <linux/slab.h> 15 #include <linux/anon_inodes.h> 16 #include <linux/file.h> 17 #include <linux/license.h> 18 #include <linux/filter.h> 19 #include <linux/version.h> 20 21 int sysctl_unprivileged_bpf_disabled __read_mostly; 22 23 static LIST_HEAD(bpf_map_types); 24 25 static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) 26 { 27 struct bpf_map_type_list *tl; 28 struct bpf_map *map; 29 30 list_for_each_entry(tl, &bpf_map_types, list_node) { 31 if (tl->type == attr->map_type) { 32 map = tl->ops->map_alloc(attr); 33 if (IS_ERR(map)) 34 return map; 35 map->ops = tl->ops; 36 map->map_type = attr->map_type; 37 return map; 38 } 39 } 40 return ERR_PTR(-EINVAL); 41 } 42 43 /* boot time registration of different map implementations */ 44 void bpf_register_map_type(struct bpf_map_type_list *tl) 45 { 46 list_add(&tl->list_node, &bpf_map_types); 47 } 48 49 static int bpf_map_charge_memlock(struct bpf_map *map) 50 { 51 struct user_struct *user = get_current_user(); 52 unsigned long memlock_limit; 53 54 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 55 56 atomic_long_add(map->pages, &user->locked_vm); 57 58 if (atomic_long_read(&user->locked_vm) > memlock_limit) { 59 atomic_long_sub(map->pages, &user->locked_vm); 60 free_uid(user); 61 return -EPERM; 62 } 63 map->user = user; 64 return 0; 65 } 66 67 static void bpf_map_uncharge_memlock(struct bpf_map *map) 68 { 69 struct user_struct *user = map->user; 70 71 atomic_long_sub(map->pages, &user->locked_vm); 72 free_uid(user); 73 } 74 75 /* called from workqueue */ 76 static void bpf_map_free_deferred(struct work_struct *work) 77 { 78 struct bpf_map *map = container_of(work, struct bpf_map, work); 79 80 bpf_map_uncharge_memlock(map); 81 /* implementation dependent freeing */ 82 map->ops->map_free(map); 83 } 84 85 /* decrement map refcnt and schedule it for freeing via workqueue 86 * (unrelying map implementation ops->map_free() might sleep) 87 */ 88 void bpf_map_put(struct bpf_map *map) 89 { 90 if (atomic_dec_and_test(&map->refcnt)) { 91 INIT_WORK(&map->work, bpf_map_free_deferred); 92 schedule_work(&map->work); 93 } 94 } 95 96 static int bpf_map_release(struct inode *inode, struct file *filp) 97 { 98 struct bpf_map *map = filp->private_data; 99 100 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) 101 /* prog_array stores refcnt-ed bpf_prog pointers 102 * release them all when user space closes prog_array_fd 103 */ 104 bpf_fd_array_map_clear(map); 105 106 bpf_map_put(map); 107 return 0; 108 } 109 110 static const struct file_operations bpf_map_fops = { 111 .release = bpf_map_release, 112 }; 113 114 int bpf_map_new_fd(struct bpf_map *map) 115 { 116 return anon_inode_getfd("bpf-map", &bpf_map_fops, map, 117 O_RDWR | O_CLOEXEC); 118 } 119 120 /* helper macro to check that unused fields 'union bpf_attr' are zero */ 121 #define CHECK_ATTR(CMD) \ 122 memchr_inv((void *) &attr->CMD##_LAST_FIELD + \ 123 sizeof(attr->CMD##_LAST_FIELD), 0, \ 124 sizeof(*attr) - \ 125 offsetof(union bpf_attr, CMD##_LAST_FIELD) - \ 126 sizeof(attr->CMD##_LAST_FIELD)) != NULL 127 128 #define BPF_MAP_CREATE_LAST_FIELD max_entries 129 /* called via syscall */ 130 static int map_create(union bpf_attr *attr) 131 { 132 struct bpf_map *map; 133 int err; 134 135 err = CHECK_ATTR(BPF_MAP_CREATE); 136 if (err) 137 return -EINVAL; 138 139 /* find map type and init map: hashtable vs rbtree vs bloom vs ... */ 140 map = find_and_alloc_map(attr); 141 if (IS_ERR(map)) 142 return PTR_ERR(map); 143 144 atomic_set(&map->refcnt, 1); 145 146 err = bpf_map_charge_memlock(map); 147 if (err) 148 goto free_map; 149 150 err = bpf_map_new_fd(map); 151 if (err < 0) 152 /* failed to allocate fd */ 153 goto free_map; 154 155 return err; 156 157 free_map: 158 map->ops->map_free(map); 159 return err; 160 } 161 162 /* if error is returned, fd is released. 163 * On success caller should complete fd access with matching fdput() 164 */ 165 struct bpf_map *__bpf_map_get(struct fd f) 166 { 167 if (!f.file) 168 return ERR_PTR(-EBADF); 169 if (f.file->f_op != &bpf_map_fops) { 170 fdput(f); 171 return ERR_PTR(-EINVAL); 172 } 173 174 return f.file->private_data; 175 } 176 177 struct bpf_map *bpf_map_get(u32 ufd) 178 { 179 struct fd f = fdget(ufd); 180 struct bpf_map *map; 181 182 map = __bpf_map_get(f); 183 if (IS_ERR(map)) 184 return map; 185 186 atomic_inc(&map->refcnt); 187 fdput(f); 188 189 return map; 190 } 191 192 /* helper to convert user pointers passed inside __aligned_u64 fields */ 193 static void __user *u64_to_ptr(__u64 val) 194 { 195 return (void __user *) (unsigned long) val; 196 } 197 198 /* last field in 'union bpf_attr' used by this command */ 199 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value 200 201 static int map_lookup_elem(union bpf_attr *attr) 202 { 203 void __user *ukey = u64_to_ptr(attr->key); 204 void __user *uvalue = u64_to_ptr(attr->value); 205 int ufd = attr->map_fd; 206 struct bpf_map *map; 207 void *key, *value, *ptr; 208 struct fd f; 209 int err; 210 211 if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM)) 212 return -EINVAL; 213 214 f = fdget(ufd); 215 map = __bpf_map_get(f); 216 if (IS_ERR(map)) 217 return PTR_ERR(map); 218 219 err = -ENOMEM; 220 key = kmalloc(map->key_size, GFP_USER); 221 if (!key) 222 goto err_put; 223 224 err = -EFAULT; 225 if (copy_from_user(key, ukey, map->key_size) != 0) 226 goto free_key; 227 228 err = -ENOMEM; 229 value = kmalloc(map->value_size, GFP_USER); 230 if (!value) 231 goto free_key; 232 233 rcu_read_lock(); 234 ptr = map->ops->map_lookup_elem(map, key); 235 if (ptr) 236 memcpy(value, ptr, map->value_size); 237 rcu_read_unlock(); 238 239 err = -ENOENT; 240 if (!ptr) 241 goto free_value; 242 243 err = -EFAULT; 244 if (copy_to_user(uvalue, value, map->value_size) != 0) 245 goto free_value; 246 247 err = 0; 248 249 free_value: 250 kfree(value); 251 free_key: 252 kfree(key); 253 err_put: 254 fdput(f); 255 return err; 256 } 257 258 #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags 259 260 static int map_update_elem(union bpf_attr *attr) 261 { 262 void __user *ukey = u64_to_ptr(attr->key); 263 void __user *uvalue = u64_to_ptr(attr->value); 264 int ufd = attr->map_fd; 265 struct bpf_map *map; 266 void *key, *value; 267 struct fd f; 268 int err; 269 270 if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM)) 271 return -EINVAL; 272 273 f = fdget(ufd); 274 map = __bpf_map_get(f); 275 if (IS_ERR(map)) 276 return PTR_ERR(map); 277 278 err = -ENOMEM; 279 key = kmalloc(map->key_size, GFP_USER); 280 if (!key) 281 goto err_put; 282 283 err = -EFAULT; 284 if (copy_from_user(key, ukey, map->key_size) != 0) 285 goto free_key; 286 287 err = -ENOMEM; 288 value = kmalloc(map->value_size, GFP_USER); 289 if (!value) 290 goto free_key; 291 292 err = -EFAULT; 293 if (copy_from_user(value, uvalue, map->value_size) != 0) 294 goto free_value; 295 296 /* eBPF program that use maps are running under rcu_read_lock(), 297 * therefore all map accessors rely on this fact, so do the same here 298 */ 299 rcu_read_lock(); 300 err = map->ops->map_update_elem(map, key, value, attr->flags); 301 rcu_read_unlock(); 302 303 free_value: 304 kfree(value); 305 free_key: 306 kfree(key); 307 err_put: 308 fdput(f); 309 return err; 310 } 311 312 #define BPF_MAP_DELETE_ELEM_LAST_FIELD key 313 314 static int map_delete_elem(union bpf_attr *attr) 315 { 316 void __user *ukey = u64_to_ptr(attr->key); 317 int ufd = attr->map_fd; 318 struct bpf_map *map; 319 struct fd f; 320 void *key; 321 int err; 322 323 if (CHECK_ATTR(BPF_MAP_DELETE_ELEM)) 324 return -EINVAL; 325 326 f = fdget(ufd); 327 map = __bpf_map_get(f); 328 if (IS_ERR(map)) 329 return PTR_ERR(map); 330 331 err = -ENOMEM; 332 key = kmalloc(map->key_size, GFP_USER); 333 if (!key) 334 goto err_put; 335 336 err = -EFAULT; 337 if (copy_from_user(key, ukey, map->key_size) != 0) 338 goto free_key; 339 340 rcu_read_lock(); 341 err = map->ops->map_delete_elem(map, key); 342 rcu_read_unlock(); 343 344 free_key: 345 kfree(key); 346 err_put: 347 fdput(f); 348 return err; 349 } 350 351 /* last field in 'union bpf_attr' used by this command */ 352 #define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key 353 354 static int map_get_next_key(union bpf_attr *attr) 355 { 356 void __user *ukey = u64_to_ptr(attr->key); 357 void __user *unext_key = u64_to_ptr(attr->next_key); 358 int ufd = attr->map_fd; 359 struct bpf_map *map; 360 void *key, *next_key; 361 struct fd f; 362 int err; 363 364 if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY)) 365 return -EINVAL; 366 367 f = fdget(ufd); 368 map = __bpf_map_get(f); 369 if (IS_ERR(map)) 370 return PTR_ERR(map); 371 372 err = -ENOMEM; 373 key = kmalloc(map->key_size, GFP_USER); 374 if (!key) 375 goto err_put; 376 377 err = -EFAULT; 378 if (copy_from_user(key, ukey, map->key_size) != 0) 379 goto free_key; 380 381 err = -ENOMEM; 382 next_key = kmalloc(map->key_size, GFP_USER); 383 if (!next_key) 384 goto free_key; 385 386 rcu_read_lock(); 387 err = map->ops->map_get_next_key(map, key, next_key); 388 rcu_read_unlock(); 389 if (err) 390 goto free_next_key; 391 392 err = -EFAULT; 393 if (copy_to_user(unext_key, next_key, map->key_size) != 0) 394 goto free_next_key; 395 396 err = 0; 397 398 free_next_key: 399 kfree(next_key); 400 free_key: 401 kfree(key); 402 err_put: 403 fdput(f); 404 return err; 405 } 406 407 static LIST_HEAD(bpf_prog_types); 408 409 static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog) 410 { 411 struct bpf_prog_type_list *tl; 412 413 list_for_each_entry(tl, &bpf_prog_types, list_node) { 414 if (tl->type == type) { 415 prog->aux->ops = tl->ops; 416 prog->type = type; 417 return 0; 418 } 419 } 420 421 return -EINVAL; 422 } 423 424 void bpf_register_prog_type(struct bpf_prog_type_list *tl) 425 { 426 list_add(&tl->list_node, &bpf_prog_types); 427 } 428 429 /* fixup insn->imm field of bpf_call instructions: 430 * if (insn->imm == BPF_FUNC_map_lookup_elem) 431 * insn->imm = bpf_map_lookup_elem - __bpf_call_base; 432 * else if (insn->imm == BPF_FUNC_map_update_elem) 433 * insn->imm = bpf_map_update_elem - __bpf_call_base; 434 * else ... 435 * 436 * this function is called after eBPF program passed verification 437 */ 438 static void fixup_bpf_calls(struct bpf_prog *prog) 439 { 440 const struct bpf_func_proto *fn; 441 int i; 442 443 for (i = 0; i < prog->len; i++) { 444 struct bpf_insn *insn = &prog->insnsi[i]; 445 446 if (insn->code == (BPF_JMP | BPF_CALL)) { 447 /* we reach here when program has bpf_call instructions 448 * and it passed bpf_check(), means that 449 * ops->get_func_proto must have been supplied, check it 450 */ 451 BUG_ON(!prog->aux->ops->get_func_proto); 452 453 if (insn->imm == BPF_FUNC_get_route_realm) 454 prog->dst_needed = 1; 455 if (insn->imm == BPF_FUNC_get_prandom_u32) 456 bpf_user_rnd_init_once(); 457 if (insn->imm == BPF_FUNC_tail_call) { 458 /* mark bpf_tail_call as different opcode 459 * to avoid conditional branch in 460 * interpeter for every normal call 461 * and to prevent accidental JITing by 462 * JIT compiler that doesn't support 463 * bpf_tail_call yet 464 */ 465 insn->imm = 0; 466 insn->code |= BPF_X; 467 continue; 468 } 469 470 fn = prog->aux->ops->get_func_proto(insn->imm); 471 /* all functions that have prototype and verifier allowed 472 * programs to call them, must be real in-kernel functions 473 */ 474 BUG_ON(!fn->func); 475 insn->imm = fn->func - __bpf_call_base; 476 } 477 } 478 } 479 480 /* drop refcnt on maps used by eBPF program and free auxilary data */ 481 static void free_used_maps(struct bpf_prog_aux *aux) 482 { 483 int i; 484 485 for (i = 0; i < aux->used_map_cnt; i++) 486 bpf_map_put(aux->used_maps[i]); 487 488 kfree(aux->used_maps); 489 } 490 491 static int bpf_prog_charge_memlock(struct bpf_prog *prog) 492 { 493 struct user_struct *user = get_current_user(); 494 unsigned long memlock_limit; 495 496 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 497 498 atomic_long_add(prog->pages, &user->locked_vm); 499 if (atomic_long_read(&user->locked_vm) > memlock_limit) { 500 atomic_long_sub(prog->pages, &user->locked_vm); 501 free_uid(user); 502 return -EPERM; 503 } 504 prog->aux->user = user; 505 return 0; 506 } 507 508 static void bpf_prog_uncharge_memlock(struct bpf_prog *prog) 509 { 510 struct user_struct *user = prog->aux->user; 511 512 atomic_long_sub(prog->pages, &user->locked_vm); 513 free_uid(user); 514 } 515 516 static void __prog_put_common(struct rcu_head *rcu) 517 { 518 struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu); 519 520 free_used_maps(aux); 521 bpf_prog_uncharge_memlock(aux->prog); 522 bpf_prog_free(aux->prog); 523 } 524 525 /* version of bpf_prog_put() that is called after a grace period */ 526 void bpf_prog_put_rcu(struct bpf_prog *prog) 527 { 528 if (atomic_dec_and_test(&prog->aux->refcnt)) 529 call_rcu(&prog->aux->rcu, __prog_put_common); 530 } 531 532 void bpf_prog_put(struct bpf_prog *prog) 533 { 534 if (atomic_dec_and_test(&prog->aux->refcnt)) 535 __prog_put_common(&prog->aux->rcu); 536 } 537 EXPORT_SYMBOL_GPL(bpf_prog_put); 538 539 static int bpf_prog_release(struct inode *inode, struct file *filp) 540 { 541 struct bpf_prog *prog = filp->private_data; 542 543 bpf_prog_put_rcu(prog); 544 return 0; 545 } 546 547 static const struct file_operations bpf_prog_fops = { 548 .release = bpf_prog_release, 549 }; 550 551 int bpf_prog_new_fd(struct bpf_prog *prog) 552 { 553 return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, 554 O_RDWR | O_CLOEXEC); 555 } 556 557 static struct bpf_prog *__bpf_prog_get(struct fd f) 558 { 559 if (!f.file) 560 return ERR_PTR(-EBADF); 561 if (f.file->f_op != &bpf_prog_fops) { 562 fdput(f); 563 return ERR_PTR(-EINVAL); 564 } 565 566 return f.file->private_data; 567 } 568 569 /* called by sockets/tracing/seccomp before attaching program to an event 570 * pairs with bpf_prog_put() 571 */ 572 struct bpf_prog *bpf_prog_get(u32 ufd) 573 { 574 struct fd f = fdget(ufd); 575 struct bpf_prog *prog; 576 577 prog = __bpf_prog_get(f); 578 if (IS_ERR(prog)) 579 return prog; 580 581 atomic_inc(&prog->aux->refcnt); 582 fdput(f); 583 584 return prog; 585 } 586 EXPORT_SYMBOL_GPL(bpf_prog_get); 587 588 /* last field in 'union bpf_attr' used by this command */ 589 #define BPF_PROG_LOAD_LAST_FIELD kern_version 590 591 static int bpf_prog_load(union bpf_attr *attr) 592 { 593 enum bpf_prog_type type = attr->prog_type; 594 struct bpf_prog *prog; 595 int err; 596 char license[128]; 597 bool is_gpl; 598 599 if (CHECK_ATTR(BPF_PROG_LOAD)) 600 return -EINVAL; 601 602 /* copy eBPF program license from user space */ 603 if (strncpy_from_user(license, u64_to_ptr(attr->license), 604 sizeof(license) - 1) < 0) 605 return -EFAULT; 606 license[sizeof(license) - 1] = 0; 607 608 /* eBPF programs must be GPL compatible to use GPL-ed functions */ 609 is_gpl = license_is_gpl_compatible(license); 610 611 if (attr->insn_cnt >= BPF_MAXINSNS) 612 return -EINVAL; 613 614 if (type == BPF_PROG_TYPE_KPROBE && 615 attr->kern_version != LINUX_VERSION_CODE) 616 return -EINVAL; 617 618 if (type != BPF_PROG_TYPE_SOCKET_FILTER && !capable(CAP_SYS_ADMIN)) 619 return -EPERM; 620 621 /* plain bpf_prog allocation */ 622 prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER); 623 if (!prog) 624 return -ENOMEM; 625 626 err = bpf_prog_charge_memlock(prog); 627 if (err) 628 goto free_prog_nouncharge; 629 630 prog->len = attr->insn_cnt; 631 632 err = -EFAULT; 633 if (copy_from_user(prog->insns, u64_to_ptr(attr->insns), 634 prog->len * sizeof(struct bpf_insn)) != 0) 635 goto free_prog; 636 637 prog->orig_prog = NULL; 638 prog->jited = 0; 639 640 atomic_set(&prog->aux->refcnt, 1); 641 prog->gpl_compatible = is_gpl ? 1 : 0; 642 643 /* find program type: socket_filter vs tracing_filter */ 644 err = find_prog_type(type, prog); 645 if (err < 0) 646 goto free_prog; 647 648 /* run eBPF verifier */ 649 err = bpf_check(&prog, attr); 650 if (err < 0) 651 goto free_used_maps; 652 653 /* fixup BPF_CALL->imm field */ 654 fixup_bpf_calls(prog); 655 656 /* eBPF program is ready to be JITed */ 657 err = bpf_prog_select_runtime(prog); 658 if (err < 0) 659 goto free_used_maps; 660 661 err = bpf_prog_new_fd(prog); 662 if (err < 0) 663 /* failed to allocate fd */ 664 goto free_used_maps; 665 666 return err; 667 668 free_used_maps: 669 free_used_maps(prog->aux); 670 free_prog: 671 bpf_prog_uncharge_memlock(prog); 672 free_prog_nouncharge: 673 bpf_prog_free(prog); 674 return err; 675 } 676 677 #define BPF_OBJ_LAST_FIELD bpf_fd 678 679 static int bpf_obj_pin(const union bpf_attr *attr) 680 { 681 if (CHECK_ATTR(BPF_OBJ)) 682 return -EINVAL; 683 684 return bpf_obj_pin_user(attr->bpf_fd, u64_to_ptr(attr->pathname)); 685 } 686 687 static int bpf_obj_get(const union bpf_attr *attr) 688 { 689 if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0) 690 return -EINVAL; 691 692 return bpf_obj_get_user(u64_to_ptr(attr->pathname)); 693 } 694 695 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) 696 { 697 union bpf_attr attr = {}; 698 int err; 699 700 if (!capable(CAP_SYS_ADMIN) && sysctl_unprivileged_bpf_disabled) 701 return -EPERM; 702 703 if (!access_ok(VERIFY_READ, uattr, 1)) 704 return -EFAULT; 705 706 if (size > PAGE_SIZE) /* silly large */ 707 return -E2BIG; 708 709 /* If we're handed a bigger struct than we know of, 710 * ensure all the unknown bits are 0 - i.e. new 711 * user-space does not rely on any kernel feature 712 * extensions we dont know about yet. 713 */ 714 if (size > sizeof(attr)) { 715 unsigned char __user *addr; 716 unsigned char __user *end; 717 unsigned char val; 718 719 addr = (void __user *)uattr + sizeof(attr); 720 end = (void __user *)uattr + size; 721 722 for (; addr < end; addr++) { 723 err = get_user(val, addr); 724 if (err) 725 return err; 726 if (val) 727 return -E2BIG; 728 } 729 size = sizeof(attr); 730 } 731 732 /* copy attributes from user space, may be less than sizeof(bpf_attr) */ 733 if (copy_from_user(&attr, uattr, size) != 0) 734 return -EFAULT; 735 736 switch (cmd) { 737 case BPF_MAP_CREATE: 738 err = map_create(&attr); 739 break; 740 case BPF_MAP_LOOKUP_ELEM: 741 err = map_lookup_elem(&attr); 742 break; 743 case BPF_MAP_UPDATE_ELEM: 744 err = map_update_elem(&attr); 745 break; 746 case BPF_MAP_DELETE_ELEM: 747 err = map_delete_elem(&attr); 748 break; 749 case BPF_MAP_GET_NEXT_KEY: 750 err = map_get_next_key(&attr); 751 break; 752 case BPF_PROG_LOAD: 753 err = bpf_prog_load(&attr); 754 break; 755 case BPF_OBJ_PIN: 756 err = bpf_obj_pin(&attr); 757 break; 758 case BPF_OBJ_GET: 759 err = bpf_obj_get(&attr); 760 break; 761 default: 762 err = -EINVAL; 763 break; 764 } 765 766 return err; 767 } 768