1 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 2 * 3 * This program is free software; you can redistribute it and/or 4 * modify it under the terms of version 2 of the GNU General Public 5 * License as published by the Free Software Foundation. 6 * 7 * This program is distributed in the hope that it will be useful, but 8 * WITHOUT ANY WARRANTY; without even the implied warranty of 9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 10 * General Public License for more details. 11 */ 12 #include <linux/bpf.h> 13 #include <linux/syscalls.h> 14 #include <linux/slab.h> 15 #include <linux/anon_inodes.h> 16 #include <linux/file.h> 17 #include <linux/license.h> 18 #include <linux/filter.h> 19 #include <linux/version.h> 20 21 DEFINE_PER_CPU(int, bpf_prog_active); 22 23 int sysctl_unprivileged_bpf_disabled __read_mostly; 24 25 static LIST_HEAD(bpf_map_types); 26 27 static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) 28 { 29 struct bpf_map_type_list *tl; 30 struct bpf_map *map; 31 32 list_for_each_entry(tl, &bpf_map_types, list_node) { 33 if (tl->type == attr->map_type) { 34 map = tl->ops->map_alloc(attr); 35 if (IS_ERR(map)) 36 return map; 37 map->ops = tl->ops; 38 map->map_type = attr->map_type; 39 return map; 40 } 41 } 42 return ERR_PTR(-EINVAL); 43 } 44 45 /* boot time registration of different map implementations */ 46 void bpf_register_map_type(struct bpf_map_type_list *tl) 47 { 48 list_add(&tl->list_node, &bpf_map_types); 49 } 50 51 int bpf_map_precharge_memlock(u32 pages) 52 { 53 struct user_struct *user = get_current_user(); 54 unsigned long memlock_limit, cur; 55 56 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 57 cur = atomic_long_read(&user->locked_vm); 58 free_uid(user); 59 if (cur + pages > memlock_limit) 60 return -EPERM; 61 return 0; 62 } 63 64 static int bpf_map_charge_memlock(struct bpf_map *map) 65 { 66 struct user_struct *user = get_current_user(); 67 unsigned long memlock_limit; 68 69 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 70 71 atomic_long_add(map->pages, &user->locked_vm); 72 73 if (atomic_long_read(&user->locked_vm) > memlock_limit) { 74 atomic_long_sub(map->pages, &user->locked_vm); 75 free_uid(user); 76 return -EPERM; 77 } 78 map->user = user; 79 return 0; 80 } 81 82 static void bpf_map_uncharge_memlock(struct bpf_map *map) 83 { 84 struct user_struct *user = map->user; 85 86 atomic_long_sub(map->pages, &user->locked_vm); 87 free_uid(user); 88 } 89 90 /* called from workqueue */ 91 static void bpf_map_free_deferred(struct work_struct *work) 92 { 93 struct bpf_map *map = container_of(work, struct bpf_map, work); 94 95 bpf_map_uncharge_memlock(map); 96 /* implementation dependent freeing */ 97 map->ops->map_free(map); 98 } 99 100 static void bpf_map_put_uref(struct bpf_map *map) 101 { 102 if (atomic_dec_and_test(&map->usercnt)) { 103 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) 104 bpf_fd_array_map_clear(map); 105 } 106 } 107 108 /* decrement map refcnt and schedule it for freeing via workqueue 109 * (unrelying map implementation ops->map_free() might sleep) 110 */ 111 void bpf_map_put(struct bpf_map *map) 112 { 113 if (atomic_dec_and_test(&map->refcnt)) { 114 INIT_WORK(&map->work, bpf_map_free_deferred); 115 schedule_work(&map->work); 116 } 117 } 118 119 void bpf_map_put_with_uref(struct bpf_map *map) 120 { 121 bpf_map_put_uref(map); 122 bpf_map_put(map); 123 } 124 125 static int bpf_map_release(struct inode *inode, struct file *filp) 126 { 127 struct bpf_map *map = filp->private_data; 128 129 if (map->ops->map_release) 130 map->ops->map_release(map, filp); 131 132 bpf_map_put_with_uref(map); 133 return 0; 134 } 135 136 #ifdef CONFIG_PROC_FS 137 static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) 138 { 139 const struct bpf_map *map = filp->private_data; 140 141 seq_printf(m, 142 "map_type:\t%u\n" 143 "key_size:\t%u\n" 144 "value_size:\t%u\n" 145 "max_entries:\t%u\n" 146 "map_flags:\t%#x\n", 147 map->map_type, 148 map->key_size, 149 map->value_size, 150 map->max_entries, 151 map->map_flags); 152 } 153 #endif 154 155 static const struct file_operations bpf_map_fops = { 156 #ifdef CONFIG_PROC_FS 157 .show_fdinfo = bpf_map_show_fdinfo, 158 #endif 159 .release = bpf_map_release, 160 }; 161 162 int bpf_map_new_fd(struct bpf_map *map) 163 { 164 return anon_inode_getfd("bpf-map", &bpf_map_fops, map, 165 O_RDWR | O_CLOEXEC); 166 } 167 168 /* helper macro to check that unused fields 'union bpf_attr' are zero */ 169 #define CHECK_ATTR(CMD) \ 170 memchr_inv((void *) &attr->CMD##_LAST_FIELD + \ 171 sizeof(attr->CMD##_LAST_FIELD), 0, \ 172 sizeof(*attr) - \ 173 offsetof(union bpf_attr, CMD##_LAST_FIELD) - \ 174 sizeof(attr->CMD##_LAST_FIELD)) != NULL 175 176 #define BPF_MAP_CREATE_LAST_FIELD map_flags 177 /* called via syscall */ 178 static int map_create(union bpf_attr *attr) 179 { 180 struct bpf_map *map; 181 int err; 182 183 err = CHECK_ATTR(BPF_MAP_CREATE); 184 if (err) 185 return -EINVAL; 186 187 /* find map type and init map: hashtable vs rbtree vs bloom vs ... */ 188 map = find_and_alloc_map(attr); 189 if (IS_ERR(map)) 190 return PTR_ERR(map); 191 192 atomic_set(&map->refcnt, 1); 193 atomic_set(&map->usercnt, 1); 194 195 err = bpf_map_charge_memlock(map); 196 if (err) 197 goto free_map_nouncharge; 198 199 err = bpf_map_new_fd(map); 200 if (err < 0) 201 /* failed to allocate fd */ 202 goto free_map; 203 204 return err; 205 206 free_map: 207 bpf_map_uncharge_memlock(map); 208 free_map_nouncharge: 209 map->ops->map_free(map); 210 return err; 211 } 212 213 /* if error is returned, fd is released. 214 * On success caller should complete fd access with matching fdput() 215 */ 216 struct bpf_map *__bpf_map_get(struct fd f) 217 { 218 if (!f.file) 219 return ERR_PTR(-EBADF); 220 if (f.file->f_op != &bpf_map_fops) { 221 fdput(f); 222 return ERR_PTR(-EINVAL); 223 } 224 225 return f.file->private_data; 226 } 227 228 /* prog's and map's refcnt limit */ 229 #define BPF_MAX_REFCNT 32768 230 231 struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref) 232 { 233 if (atomic_inc_return(&map->refcnt) > BPF_MAX_REFCNT) { 234 atomic_dec(&map->refcnt); 235 return ERR_PTR(-EBUSY); 236 } 237 if (uref) 238 atomic_inc(&map->usercnt); 239 return map; 240 } 241 242 struct bpf_map *bpf_map_get_with_uref(u32 ufd) 243 { 244 struct fd f = fdget(ufd); 245 struct bpf_map *map; 246 247 map = __bpf_map_get(f); 248 if (IS_ERR(map)) 249 return map; 250 251 map = bpf_map_inc(map, true); 252 fdput(f); 253 254 return map; 255 } 256 257 /* helper to convert user pointers passed inside __aligned_u64 fields */ 258 static void __user *u64_to_ptr(__u64 val) 259 { 260 return (void __user *) (unsigned long) val; 261 } 262 263 int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value) 264 { 265 return -ENOTSUPP; 266 } 267 268 /* last field in 'union bpf_attr' used by this command */ 269 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value 270 271 static int map_lookup_elem(union bpf_attr *attr) 272 { 273 void __user *ukey = u64_to_ptr(attr->key); 274 void __user *uvalue = u64_to_ptr(attr->value); 275 int ufd = attr->map_fd; 276 struct bpf_map *map; 277 void *key, *value, *ptr; 278 u32 value_size; 279 struct fd f; 280 int err; 281 282 if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM)) 283 return -EINVAL; 284 285 f = fdget(ufd); 286 map = __bpf_map_get(f); 287 if (IS_ERR(map)) 288 return PTR_ERR(map); 289 290 err = -ENOMEM; 291 key = kmalloc(map->key_size, GFP_USER); 292 if (!key) 293 goto err_put; 294 295 err = -EFAULT; 296 if (copy_from_user(key, ukey, map->key_size) != 0) 297 goto free_key; 298 299 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 300 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) 301 value_size = round_up(map->value_size, 8) * num_possible_cpus(); 302 else 303 value_size = map->value_size; 304 305 err = -ENOMEM; 306 value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); 307 if (!value) 308 goto free_key; 309 310 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH) { 311 err = bpf_percpu_hash_copy(map, key, value); 312 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 313 err = bpf_percpu_array_copy(map, key, value); 314 } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) { 315 err = bpf_stackmap_copy(map, key, value); 316 } else { 317 rcu_read_lock(); 318 ptr = map->ops->map_lookup_elem(map, key); 319 if (ptr) 320 memcpy(value, ptr, value_size); 321 rcu_read_unlock(); 322 err = ptr ? 0 : -ENOENT; 323 } 324 325 if (err) 326 goto free_value; 327 328 err = -EFAULT; 329 if (copy_to_user(uvalue, value, value_size) != 0) 330 goto free_value; 331 332 err = 0; 333 334 free_value: 335 kfree(value); 336 free_key: 337 kfree(key); 338 err_put: 339 fdput(f); 340 return err; 341 } 342 343 #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags 344 345 static int map_update_elem(union bpf_attr *attr) 346 { 347 void __user *ukey = u64_to_ptr(attr->key); 348 void __user *uvalue = u64_to_ptr(attr->value); 349 int ufd = attr->map_fd; 350 struct bpf_map *map; 351 void *key, *value; 352 u32 value_size; 353 struct fd f; 354 int err; 355 356 if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM)) 357 return -EINVAL; 358 359 f = fdget(ufd); 360 map = __bpf_map_get(f); 361 if (IS_ERR(map)) 362 return PTR_ERR(map); 363 364 err = -ENOMEM; 365 key = kmalloc(map->key_size, GFP_USER); 366 if (!key) 367 goto err_put; 368 369 err = -EFAULT; 370 if (copy_from_user(key, ukey, map->key_size) != 0) 371 goto free_key; 372 373 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 374 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) 375 value_size = round_up(map->value_size, 8) * num_possible_cpus(); 376 else 377 value_size = map->value_size; 378 379 err = -ENOMEM; 380 value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); 381 if (!value) 382 goto free_key; 383 384 err = -EFAULT; 385 if (copy_from_user(value, uvalue, value_size) != 0) 386 goto free_value; 387 388 /* must increment bpf_prog_active to avoid kprobe+bpf triggering from 389 * inside bpf map update or delete otherwise deadlocks are possible 390 */ 391 preempt_disable(); 392 __this_cpu_inc(bpf_prog_active); 393 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH) { 394 err = bpf_percpu_hash_update(map, key, value, attr->flags); 395 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 396 err = bpf_percpu_array_update(map, key, value, attr->flags); 397 } else if (map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || 398 map->map_type == BPF_MAP_TYPE_PROG_ARRAY || 399 map->map_type == BPF_MAP_TYPE_CGROUP_ARRAY) { 400 rcu_read_lock(); 401 err = bpf_fd_array_map_update_elem(map, f.file, key, value, 402 attr->flags); 403 rcu_read_unlock(); 404 } else { 405 rcu_read_lock(); 406 err = map->ops->map_update_elem(map, key, value, attr->flags); 407 rcu_read_unlock(); 408 } 409 __this_cpu_dec(bpf_prog_active); 410 preempt_enable(); 411 412 free_value: 413 kfree(value); 414 free_key: 415 kfree(key); 416 err_put: 417 fdput(f); 418 return err; 419 } 420 421 #define BPF_MAP_DELETE_ELEM_LAST_FIELD key 422 423 static int map_delete_elem(union bpf_attr *attr) 424 { 425 void __user *ukey = u64_to_ptr(attr->key); 426 int ufd = attr->map_fd; 427 struct bpf_map *map; 428 struct fd f; 429 void *key; 430 int err; 431 432 if (CHECK_ATTR(BPF_MAP_DELETE_ELEM)) 433 return -EINVAL; 434 435 f = fdget(ufd); 436 map = __bpf_map_get(f); 437 if (IS_ERR(map)) 438 return PTR_ERR(map); 439 440 err = -ENOMEM; 441 key = kmalloc(map->key_size, GFP_USER); 442 if (!key) 443 goto err_put; 444 445 err = -EFAULT; 446 if (copy_from_user(key, ukey, map->key_size) != 0) 447 goto free_key; 448 449 preempt_disable(); 450 __this_cpu_inc(bpf_prog_active); 451 rcu_read_lock(); 452 err = map->ops->map_delete_elem(map, key); 453 rcu_read_unlock(); 454 __this_cpu_dec(bpf_prog_active); 455 preempt_enable(); 456 457 free_key: 458 kfree(key); 459 err_put: 460 fdput(f); 461 return err; 462 } 463 464 /* last field in 'union bpf_attr' used by this command */ 465 #define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key 466 467 static int map_get_next_key(union bpf_attr *attr) 468 { 469 void __user *ukey = u64_to_ptr(attr->key); 470 void __user *unext_key = u64_to_ptr(attr->next_key); 471 int ufd = attr->map_fd; 472 struct bpf_map *map; 473 void *key, *next_key; 474 struct fd f; 475 int err; 476 477 if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY)) 478 return -EINVAL; 479 480 f = fdget(ufd); 481 map = __bpf_map_get(f); 482 if (IS_ERR(map)) 483 return PTR_ERR(map); 484 485 err = -ENOMEM; 486 key = kmalloc(map->key_size, GFP_USER); 487 if (!key) 488 goto err_put; 489 490 err = -EFAULT; 491 if (copy_from_user(key, ukey, map->key_size) != 0) 492 goto free_key; 493 494 err = -ENOMEM; 495 next_key = kmalloc(map->key_size, GFP_USER); 496 if (!next_key) 497 goto free_key; 498 499 rcu_read_lock(); 500 err = map->ops->map_get_next_key(map, key, next_key); 501 rcu_read_unlock(); 502 if (err) 503 goto free_next_key; 504 505 err = -EFAULT; 506 if (copy_to_user(unext_key, next_key, map->key_size) != 0) 507 goto free_next_key; 508 509 err = 0; 510 511 free_next_key: 512 kfree(next_key); 513 free_key: 514 kfree(key); 515 err_put: 516 fdput(f); 517 return err; 518 } 519 520 static LIST_HEAD(bpf_prog_types); 521 522 static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog) 523 { 524 struct bpf_prog_type_list *tl; 525 526 list_for_each_entry(tl, &bpf_prog_types, list_node) { 527 if (tl->type == type) { 528 prog->aux->ops = tl->ops; 529 prog->type = type; 530 return 0; 531 } 532 } 533 534 return -EINVAL; 535 } 536 537 void bpf_register_prog_type(struct bpf_prog_type_list *tl) 538 { 539 list_add(&tl->list_node, &bpf_prog_types); 540 } 541 542 /* fixup insn->imm field of bpf_call instructions: 543 * if (insn->imm == BPF_FUNC_map_lookup_elem) 544 * insn->imm = bpf_map_lookup_elem - __bpf_call_base; 545 * else if (insn->imm == BPF_FUNC_map_update_elem) 546 * insn->imm = bpf_map_update_elem - __bpf_call_base; 547 * else ... 548 * 549 * this function is called after eBPF program passed verification 550 */ 551 static void fixup_bpf_calls(struct bpf_prog *prog) 552 { 553 const struct bpf_func_proto *fn; 554 int i; 555 556 for (i = 0; i < prog->len; i++) { 557 struct bpf_insn *insn = &prog->insnsi[i]; 558 559 if (insn->code == (BPF_JMP | BPF_CALL)) { 560 /* we reach here when program has bpf_call instructions 561 * and it passed bpf_check(), means that 562 * ops->get_func_proto must have been supplied, check it 563 */ 564 BUG_ON(!prog->aux->ops->get_func_proto); 565 566 if (insn->imm == BPF_FUNC_get_route_realm) 567 prog->dst_needed = 1; 568 if (insn->imm == BPF_FUNC_get_prandom_u32) 569 bpf_user_rnd_init_once(); 570 if (insn->imm == BPF_FUNC_tail_call) { 571 /* mark bpf_tail_call as different opcode 572 * to avoid conditional branch in 573 * interpeter for every normal call 574 * and to prevent accidental JITing by 575 * JIT compiler that doesn't support 576 * bpf_tail_call yet 577 */ 578 insn->imm = 0; 579 insn->code |= BPF_X; 580 continue; 581 } 582 583 fn = prog->aux->ops->get_func_proto(insn->imm); 584 /* all functions that have prototype and verifier allowed 585 * programs to call them, must be real in-kernel functions 586 */ 587 BUG_ON(!fn->func); 588 insn->imm = fn->func - __bpf_call_base; 589 } 590 } 591 } 592 593 /* drop refcnt on maps used by eBPF program and free auxilary data */ 594 static void free_used_maps(struct bpf_prog_aux *aux) 595 { 596 int i; 597 598 for (i = 0; i < aux->used_map_cnt; i++) 599 bpf_map_put(aux->used_maps[i]); 600 601 kfree(aux->used_maps); 602 } 603 604 static int bpf_prog_charge_memlock(struct bpf_prog *prog) 605 { 606 struct user_struct *user = get_current_user(); 607 unsigned long memlock_limit; 608 609 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 610 611 atomic_long_add(prog->pages, &user->locked_vm); 612 if (atomic_long_read(&user->locked_vm) > memlock_limit) { 613 atomic_long_sub(prog->pages, &user->locked_vm); 614 free_uid(user); 615 return -EPERM; 616 } 617 prog->aux->user = user; 618 return 0; 619 } 620 621 static void bpf_prog_uncharge_memlock(struct bpf_prog *prog) 622 { 623 struct user_struct *user = prog->aux->user; 624 625 atomic_long_sub(prog->pages, &user->locked_vm); 626 free_uid(user); 627 } 628 629 static void __bpf_prog_put_rcu(struct rcu_head *rcu) 630 { 631 struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu); 632 633 free_used_maps(aux); 634 bpf_prog_uncharge_memlock(aux->prog); 635 bpf_prog_free(aux->prog); 636 } 637 638 void bpf_prog_put(struct bpf_prog *prog) 639 { 640 if (atomic_dec_and_test(&prog->aux->refcnt)) 641 call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu); 642 } 643 EXPORT_SYMBOL_GPL(bpf_prog_put); 644 645 static int bpf_prog_release(struct inode *inode, struct file *filp) 646 { 647 struct bpf_prog *prog = filp->private_data; 648 649 bpf_prog_put(prog); 650 return 0; 651 } 652 653 static const struct file_operations bpf_prog_fops = { 654 .release = bpf_prog_release, 655 }; 656 657 int bpf_prog_new_fd(struct bpf_prog *prog) 658 { 659 return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, 660 O_RDWR | O_CLOEXEC); 661 } 662 663 static struct bpf_prog *____bpf_prog_get(struct fd f) 664 { 665 if (!f.file) 666 return ERR_PTR(-EBADF); 667 if (f.file->f_op != &bpf_prog_fops) { 668 fdput(f); 669 return ERR_PTR(-EINVAL); 670 } 671 672 return f.file->private_data; 673 } 674 675 struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i) 676 { 677 if (atomic_add_return(i, &prog->aux->refcnt) > BPF_MAX_REFCNT) { 678 atomic_sub(i, &prog->aux->refcnt); 679 return ERR_PTR(-EBUSY); 680 } 681 return prog; 682 } 683 EXPORT_SYMBOL_GPL(bpf_prog_add); 684 685 struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog) 686 { 687 return bpf_prog_add(prog, 1); 688 } 689 690 static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *type) 691 { 692 struct fd f = fdget(ufd); 693 struct bpf_prog *prog; 694 695 prog = ____bpf_prog_get(f); 696 if (IS_ERR(prog)) 697 return prog; 698 if (type && prog->type != *type) { 699 prog = ERR_PTR(-EINVAL); 700 goto out; 701 } 702 703 prog = bpf_prog_inc(prog); 704 out: 705 fdput(f); 706 return prog; 707 } 708 709 struct bpf_prog *bpf_prog_get(u32 ufd) 710 { 711 return __bpf_prog_get(ufd, NULL); 712 } 713 714 struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type) 715 { 716 return __bpf_prog_get(ufd, &type); 717 } 718 EXPORT_SYMBOL_GPL(bpf_prog_get_type); 719 720 /* last field in 'union bpf_attr' used by this command */ 721 #define BPF_PROG_LOAD_LAST_FIELD kern_version 722 723 static int bpf_prog_load(union bpf_attr *attr) 724 { 725 enum bpf_prog_type type = attr->prog_type; 726 struct bpf_prog *prog; 727 int err; 728 char license[128]; 729 bool is_gpl; 730 731 if (CHECK_ATTR(BPF_PROG_LOAD)) 732 return -EINVAL; 733 734 /* copy eBPF program license from user space */ 735 if (strncpy_from_user(license, u64_to_ptr(attr->license), 736 sizeof(license) - 1) < 0) 737 return -EFAULT; 738 license[sizeof(license) - 1] = 0; 739 740 /* eBPF programs must be GPL compatible to use GPL-ed functions */ 741 is_gpl = license_is_gpl_compatible(license); 742 743 if (attr->insn_cnt >= BPF_MAXINSNS) 744 return -EINVAL; 745 746 if (type == BPF_PROG_TYPE_KPROBE && 747 attr->kern_version != LINUX_VERSION_CODE) 748 return -EINVAL; 749 750 if (type != BPF_PROG_TYPE_SOCKET_FILTER && !capable(CAP_SYS_ADMIN)) 751 return -EPERM; 752 753 /* plain bpf_prog allocation */ 754 prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER); 755 if (!prog) 756 return -ENOMEM; 757 758 err = bpf_prog_charge_memlock(prog); 759 if (err) 760 goto free_prog_nouncharge; 761 762 prog->len = attr->insn_cnt; 763 764 err = -EFAULT; 765 if (copy_from_user(prog->insns, u64_to_ptr(attr->insns), 766 prog->len * sizeof(struct bpf_insn)) != 0) 767 goto free_prog; 768 769 prog->orig_prog = NULL; 770 prog->jited = 0; 771 772 atomic_set(&prog->aux->refcnt, 1); 773 prog->gpl_compatible = is_gpl ? 1 : 0; 774 775 /* find program type: socket_filter vs tracing_filter */ 776 err = find_prog_type(type, prog); 777 if (err < 0) 778 goto free_prog; 779 780 /* run eBPF verifier */ 781 err = bpf_check(&prog, attr); 782 if (err < 0) 783 goto free_used_maps; 784 785 /* fixup BPF_CALL->imm field */ 786 fixup_bpf_calls(prog); 787 788 /* eBPF program is ready to be JITed */ 789 prog = bpf_prog_select_runtime(prog, &err); 790 if (err < 0) 791 goto free_used_maps; 792 793 err = bpf_prog_new_fd(prog); 794 if (err < 0) 795 /* failed to allocate fd */ 796 goto free_used_maps; 797 798 return err; 799 800 free_used_maps: 801 free_used_maps(prog->aux); 802 free_prog: 803 bpf_prog_uncharge_memlock(prog); 804 free_prog_nouncharge: 805 bpf_prog_free(prog); 806 return err; 807 } 808 809 #define BPF_OBJ_LAST_FIELD bpf_fd 810 811 static int bpf_obj_pin(const union bpf_attr *attr) 812 { 813 if (CHECK_ATTR(BPF_OBJ)) 814 return -EINVAL; 815 816 return bpf_obj_pin_user(attr->bpf_fd, u64_to_ptr(attr->pathname)); 817 } 818 819 static int bpf_obj_get(const union bpf_attr *attr) 820 { 821 if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0) 822 return -EINVAL; 823 824 return bpf_obj_get_user(u64_to_ptr(attr->pathname)); 825 } 826 827 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) 828 { 829 union bpf_attr attr = {}; 830 int err; 831 832 if (!capable(CAP_SYS_ADMIN) && sysctl_unprivileged_bpf_disabled) 833 return -EPERM; 834 835 if (!access_ok(VERIFY_READ, uattr, 1)) 836 return -EFAULT; 837 838 if (size > PAGE_SIZE) /* silly large */ 839 return -E2BIG; 840 841 /* If we're handed a bigger struct than we know of, 842 * ensure all the unknown bits are 0 - i.e. new 843 * user-space does not rely on any kernel feature 844 * extensions we dont know about yet. 845 */ 846 if (size > sizeof(attr)) { 847 unsigned char __user *addr; 848 unsigned char __user *end; 849 unsigned char val; 850 851 addr = (void __user *)uattr + sizeof(attr); 852 end = (void __user *)uattr + size; 853 854 for (; addr < end; addr++) { 855 err = get_user(val, addr); 856 if (err) 857 return err; 858 if (val) 859 return -E2BIG; 860 } 861 size = sizeof(attr); 862 } 863 864 /* copy attributes from user space, may be less than sizeof(bpf_attr) */ 865 if (copy_from_user(&attr, uattr, size) != 0) 866 return -EFAULT; 867 868 switch (cmd) { 869 case BPF_MAP_CREATE: 870 err = map_create(&attr); 871 break; 872 case BPF_MAP_LOOKUP_ELEM: 873 err = map_lookup_elem(&attr); 874 break; 875 case BPF_MAP_UPDATE_ELEM: 876 err = map_update_elem(&attr); 877 break; 878 case BPF_MAP_DELETE_ELEM: 879 err = map_delete_elem(&attr); 880 break; 881 case BPF_MAP_GET_NEXT_KEY: 882 err = map_get_next_key(&attr); 883 break; 884 case BPF_PROG_LOAD: 885 err = bpf_prog_load(&attr); 886 break; 887 case BPF_OBJ_PIN: 888 err = bpf_obj_pin(&attr); 889 break; 890 case BPF_OBJ_GET: 891 err = bpf_obj_get(&attr); 892 break; 893 default: 894 err = -EINVAL; 895 break; 896 } 897 898 return err; 899 } 900