1 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 2 * 3 * This program is free software; you can redistribute it and/or 4 * modify it under the terms of version 2 of the GNU General Public 5 * License as published by the Free Software Foundation. 6 * 7 * This program is distributed in the hope that it will be useful, but 8 * WITHOUT ANY WARRANTY; without even the implied warranty of 9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 10 * General Public License for more details. 11 */ 12 #include <linux/bpf.h> 13 #include <linux/syscalls.h> 14 #include <linux/slab.h> 15 #include <linux/anon_inodes.h> 16 #include <linux/file.h> 17 #include <linux/license.h> 18 #include <linux/filter.h> 19 #include <linux/version.h> 20 21 DEFINE_PER_CPU(int, bpf_prog_active); 22 23 int sysctl_unprivileged_bpf_disabled __read_mostly; 24 25 static LIST_HEAD(bpf_map_types); 26 27 static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) 28 { 29 struct bpf_map_type_list *tl; 30 struct bpf_map *map; 31 32 list_for_each_entry(tl, &bpf_map_types, list_node) { 33 if (tl->type == attr->map_type) { 34 map = tl->ops->map_alloc(attr); 35 if (IS_ERR(map)) 36 return map; 37 map->ops = tl->ops; 38 map->map_type = attr->map_type; 39 return map; 40 } 41 } 42 return ERR_PTR(-EINVAL); 43 } 44 45 /* boot time registration of different map implementations */ 46 void bpf_register_map_type(struct bpf_map_type_list *tl) 47 { 48 list_add(&tl->list_node, &bpf_map_types); 49 } 50 51 int bpf_map_precharge_memlock(u32 pages) 52 { 53 struct user_struct *user = get_current_user(); 54 unsigned long memlock_limit, cur; 55 56 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 57 cur = atomic_long_read(&user->locked_vm); 58 free_uid(user); 59 if (cur + pages > memlock_limit) 60 return -EPERM; 61 return 0; 62 } 63 64 static int bpf_map_charge_memlock(struct bpf_map *map) 65 { 66 struct user_struct *user = get_current_user(); 67 unsigned long memlock_limit; 68 69 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 70 71 atomic_long_add(map->pages, &user->locked_vm); 72 73 if (atomic_long_read(&user->locked_vm) > memlock_limit) { 74 atomic_long_sub(map->pages, &user->locked_vm); 75 free_uid(user); 76 return -EPERM; 77 } 78 map->user = user; 79 return 0; 80 } 81 82 static void bpf_map_uncharge_memlock(struct bpf_map *map) 83 { 84 struct user_struct *user = map->user; 85 86 atomic_long_sub(map->pages, &user->locked_vm); 87 free_uid(user); 88 } 89 90 /* called from workqueue */ 91 static void bpf_map_free_deferred(struct work_struct *work) 92 { 93 struct bpf_map *map = container_of(work, struct bpf_map, work); 94 95 bpf_map_uncharge_memlock(map); 96 /* implementation dependent freeing */ 97 map->ops->map_free(map); 98 } 99 100 static void bpf_map_put_uref(struct bpf_map *map) 101 { 102 if (atomic_dec_and_test(&map->usercnt)) { 103 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) 104 bpf_fd_array_map_clear(map); 105 } 106 } 107 108 /* decrement map refcnt and schedule it for freeing via workqueue 109 * (unrelying map implementation ops->map_free() might sleep) 110 */ 111 void bpf_map_put(struct bpf_map *map) 112 { 113 if (atomic_dec_and_test(&map->refcnt)) { 114 INIT_WORK(&map->work, bpf_map_free_deferred); 115 schedule_work(&map->work); 116 } 117 } 118 119 void bpf_map_put_with_uref(struct bpf_map *map) 120 { 121 bpf_map_put_uref(map); 122 bpf_map_put(map); 123 } 124 125 static int bpf_map_release(struct inode *inode, struct file *filp) 126 { 127 struct bpf_map *map = filp->private_data; 128 129 if (map->ops->map_release) 130 map->ops->map_release(map, filp); 131 132 bpf_map_put_with_uref(map); 133 return 0; 134 } 135 136 #ifdef CONFIG_PROC_FS 137 static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) 138 { 139 const struct bpf_map *map = filp->private_data; 140 141 seq_printf(m, 142 "map_type:\t%u\n" 143 "key_size:\t%u\n" 144 "value_size:\t%u\n" 145 "max_entries:\t%u\n" 146 "map_flags:\t%#x\n", 147 map->map_type, 148 map->key_size, 149 map->value_size, 150 map->max_entries, 151 map->map_flags); 152 } 153 #endif 154 155 static const struct file_operations bpf_map_fops = { 156 #ifdef CONFIG_PROC_FS 157 .show_fdinfo = bpf_map_show_fdinfo, 158 #endif 159 .release = bpf_map_release, 160 }; 161 162 int bpf_map_new_fd(struct bpf_map *map) 163 { 164 return anon_inode_getfd("bpf-map", &bpf_map_fops, map, 165 O_RDWR | O_CLOEXEC); 166 } 167 168 /* helper macro to check that unused fields 'union bpf_attr' are zero */ 169 #define CHECK_ATTR(CMD) \ 170 memchr_inv((void *) &attr->CMD##_LAST_FIELD + \ 171 sizeof(attr->CMD##_LAST_FIELD), 0, \ 172 sizeof(*attr) - \ 173 offsetof(union bpf_attr, CMD##_LAST_FIELD) - \ 174 sizeof(attr->CMD##_LAST_FIELD)) != NULL 175 176 #define BPF_MAP_CREATE_LAST_FIELD map_flags 177 /* called via syscall */ 178 static int map_create(union bpf_attr *attr) 179 { 180 struct bpf_map *map; 181 int err; 182 183 err = CHECK_ATTR(BPF_MAP_CREATE); 184 if (err) 185 return -EINVAL; 186 187 /* find map type and init map: hashtable vs rbtree vs bloom vs ... */ 188 map = find_and_alloc_map(attr); 189 if (IS_ERR(map)) 190 return PTR_ERR(map); 191 192 atomic_set(&map->refcnt, 1); 193 atomic_set(&map->usercnt, 1); 194 195 err = bpf_map_charge_memlock(map); 196 if (err) 197 goto free_map; 198 199 err = bpf_map_new_fd(map); 200 if (err < 0) 201 /* failed to allocate fd */ 202 goto free_map; 203 204 return err; 205 206 free_map: 207 map->ops->map_free(map); 208 return err; 209 } 210 211 /* if error is returned, fd is released. 212 * On success caller should complete fd access with matching fdput() 213 */ 214 struct bpf_map *__bpf_map_get(struct fd f) 215 { 216 if (!f.file) 217 return ERR_PTR(-EBADF); 218 if (f.file->f_op != &bpf_map_fops) { 219 fdput(f); 220 return ERR_PTR(-EINVAL); 221 } 222 223 return f.file->private_data; 224 } 225 226 /* prog's and map's refcnt limit */ 227 #define BPF_MAX_REFCNT 32768 228 229 struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref) 230 { 231 if (atomic_inc_return(&map->refcnt) > BPF_MAX_REFCNT) { 232 atomic_dec(&map->refcnt); 233 return ERR_PTR(-EBUSY); 234 } 235 if (uref) 236 atomic_inc(&map->usercnt); 237 return map; 238 } 239 240 struct bpf_map *bpf_map_get_with_uref(u32 ufd) 241 { 242 struct fd f = fdget(ufd); 243 struct bpf_map *map; 244 245 map = __bpf_map_get(f); 246 if (IS_ERR(map)) 247 return map; 248 249 map = bpf_map_inc(map, true); 250 fdput(f); 251 252 return map; 253 } 254 255 /* helper to convert user pointers passed inside __aligned_u64 fields */ 256 static void __user *u64_to_ptr(__u64 val) 257 { 258 return (void __user *) (unsigned long) val; 259 } 260 261 int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value) 262 { 263 return -ENOTSUPP; 264 } 265 266 /* last field in 'union bpf_attr' used by this command */ 267 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value 268 269 static int map_lookup_elem(union bpf_attr *attr) 270 { 271 void __user *ukey = u64_to_ptr(attr->key); 272 void __user *uvalue = u64_to_ptr(attr->value); 273 int ufd = attr->map_fd; 274 struct bpf_map *map; 275 void *key, *value, *ptr; 276 u32 value_size; 277 struct fd f; 278 int err; 279 280 if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM)) 281 return -EINVAL; 282 283 f = fdget(ufd); 284 map = __bpf_map_get(f); 285 if (IS_ERR(map)) 286 return PTR_ERR(map); 287 288 err = -ENOMEM; 289 key = kmalloc(map->key_size, GFP_USER); 290 if (!key) 291 goto err_put; 292 293 err = -EFAULT; 294 if (copy_from_user(key, ukey, map->key_size) != 0) 295 goto free_key; 296 297 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 298 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) 299 value_size = round_up(map->value_size, 8) * num_possible_cpus(); 300 else 301 value_size = map->value_size; 302 303 err = -ENOMEM; 304 value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); 305 if (!value) 306 goto free_key; 307 308 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH) { 309 err = bpf_percpu_hash_copy(map, key, value); 310 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 311 err = bpf_percpu_array_copy(map, key, value); 312 } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) { 313 err = bpf_stackmap_copy(map, key, value); 314 } else { 315 rcu_read_lock(); 316 ptr = map->ops->map_lookup_elem(map, key); 317 if (ptr) 318 memcpy(value, ptr, value_size); 319 rcu_read_unlock(); 320 err = ptr ? 0 : -ENOENT; 321 } 322 323 if (err) 324 goto free_value; 325 326 err = -EFAULT; 327 if (copy_to_user(uvalue, value, value_size) != 0) 328 goto free_value; 329 330 err = 0; 331 332 free_value: 333 kfree(value); 334 free_key: 335 kfree(key); 336 err_put: 337 fdput(f); 338 return err; 339 } 340 341 #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags 342 343 static int map_update_elem(union bpf_attr *attr) 344 { 345 void __user *ukey = u64_to_ptr(attr->key); 346 void __user *uvalue = u64_to_ptr(attr->value); 347 int ufd = attr->map_fd; 348 struct bpf_map *map; 349 void *key, *value; 350 u32 value_size; 351 struct fd f; 352 int err; 353 354 if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM)) 355 return -EINVAL; 356 357 f = fdget(ufd); 358 map = __bpf_map_get(f); 359 if (IS_ERR(map)) 360 return PTR_ERR(map); 361 362 err = -ENOMEM; 363 key = kmalloc(map->key_size, GFP_USER); 364 if (!key) 365 goto err_put; 366 367 err = -EFAULT; 368 if (copy_from_user(key, ukey, map->key_size) != 0) 369 goto free_key; 370 371 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 372 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) 373 value_size = round_up(map->value_size, 8) * num_possible_cpus(); 374 else 375 value_size = map->value_size; 376 377 err = -ENOMEM; 378 value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); 379 if (!value) 380 goto free_key; 381 382 err = -EFAULT; 383 if (copy_from_user(value, uvalue, value_size) != 0) 384 goto free_value; 385 386 /* must increment bpf_prog_active to avoid kprobe+bpf triggering from 387 * inside bpf map update or delete otherwise deadlocks are possible 388 */ 389 preempt_disable(); 390 __this_cpu_inc(bpf_prog_active); 391 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH) { 392 err = bpf_percpu_hash_update(map, key, value, attr->flags); 393 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 394 err = bpf_percpu_array_update(map, key, value, attr->flags); 395 } else if (map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || 396 map->map_type == BPF_MAP_TYPE_PROG_ARRAY || 397 map->map_type == BPF_MAP_TYPE_CGROUP_ARRAY) { 398 rcu_read_lock(); 399 err = bpf_fd_array_map_update_elem(map, f.file, key, value, 400 attr->flags); 401 rcu_read_unlock(); 402 } else { 403 rcu_read_lock(); 404 err = map->ops->map_update_elem(map, key, value, attr->flags); 405 rcu_read_unlock(); 406 } 407 __this_cpu_dec(bpf_prog_active); 408 preempt_enable(); 409 410 free_value: 411 kfree(value); 412 free_key: 413 kfree(key); 414 err_put: 415 fdput(f); 416 return err; 417 } 418 419 #define BPF_MAP_DELETE_ELEM_LAST_FIELD key 420 421 static int map_delete_elem(union bpf_attr *attr) 422 { 423 void __user *ukey = u64_to_ptr(attr->key); 424 int ufd = attr->map_fd; 425 struct bpf_map *map; 426 struct fd f; 427 void *key; 428 int err; 429 430 if (CHECK_ATTR(BPF_MAP_DELETE_ELEM)) 431 return -EINVAL; 432 433 f = fdget(ufd); 434 map = __bpf_map_get(f); 435 if (IS_ERR(map)) 436 return PTR_ERR(map); 437 438 err = -ENOMEM; 439 key = kmalloc(map->key_size, GFP_USER); 440 if (!key) 441 goto err_put; 442 443 err = -EFAULT; 444 if (copy_from_user(key, ukey, map->key_size) != 0) 445 goto free_key; 446 447 preempt_disable(); 448 __this_cpu_inc(bpf_prog_active); 449 rcu_read_lock(); 450 err = map->ops->map_delete_elem(map, key); 451 rcu_read_unlock(); 452 __this_cpu_dec(bpf_prog_active); 453 preempt_enable(); 454 455 free_key: 456 kfree(key); 457 err_put: 458 fdput(f); 459 return err; 460 } 461 462 /* last field in 'union bpf_attr' used by this command */ 463 #define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key 464 465 static int map_get_next_key(union bpf_attr *attr) 466 { 467 void __user *ukey = u64_to_ptr(attr->key); 468 void __user *unext_key = u64_to_ptr(attr->next_key); 469 int ufd = attr->map_fd; 470 struct bpf_map *map; 471 void *key, *next_key; 472 struct fd f; 473 int err; 474 475 if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY)) 476 return -EINVAL; 477 478 f = fdget(ufd); 479 map = __bpf_map_get(f); 480 if (IS_ERR(map)) 481 return PTR_ERR(map); 482 483 err = -ENOMEM; 484 key = kmalloc(map->key_size, GFP_USER); 485 if (!key) 486 goto err_put; 487 488 err = -EFAULT; 489 if (copy_from_user(key, ukey, map->key_size) != 0) 490 goto free_key; 491 492 err = -ENOMEM; 493 next_key = kmalloc(map->key_size, GFP_USER); 494 if (!next_key) 495 goto free_key; 496 497 rcu_read_lock(); 498 err = map->ops->map_get_next_key(map, key, next_key); 499 rcu_read_unlock(); 500 if (err) 501 goto free_next_key; 502 503 err = -EFAULT; 504 if (copy_to_user(unext_key, next_key, map->key_size) != 0) 505 goto free_next_key; 506 507 err = 0; 508 509 free_next_key: 510 kfree(next_key); 511 free_key: 512 kfree(key); 513 err_put: 514 fdput(f); 515 return err; 516 } 517 518 static LIST_HEAD(bpf_prog_types); 519 520 static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog) 521 { 522 struct bpf_prog_type_list *tl; 523 524 list_for_each_entry(tl, &bpf_prog_types, list_node) { 525 if (tl->type == type) { 526 prog->aux->ops = tl->ops; 527 prog->type = type; 528 return 0; 529 } 530 } 531 532 return -EINVAL; 533 } 534 535 void bpf_register_prog_type(struct bpf_prog_type_list *tl) 536 { 537 list_add(&tl->list_node, &bpf_prog_types); 538 } 539 540 /* fixup insn->imm field of bpf_call instructions: 541 * if (insn->imm == BPF_FUNC_map_lookup_elem) 542 * insn->imm = bpf_map_lookup_elem - __bpf_call_base; 543 * else if (insn->imm == BPF_FUNC_map_update_elem) 544 * insn->imm = bpf_map_update_elem - __bpf_call_base; 545 * else ... 546 * 547 * this function is called after eBPF program passed verification 548 */ 549 static void fixup_bpf_calls(struct bpf_prog *prog) 550 { 551 const struct bpf_func_proto *fn; 552 int i; 553 554 for (i = 0; i < prog->len; i++) { 555 struct bpf_insn *insn = &prog->insnsi[i]; 556 557 if (insn->code == (BPF_JMP | BPF_CALL)) { 558 /* we reach here when program has bpf_call instructions 559 * and it passed bpf_check(), means that 560 * ops->get_func_proto must have been supplied, check it 561 */ 562 BUG_ON(!prog->aux->ops->get_func_proto); 563 564 if (insn->imm == BPF_FUNC_get_route_realm) 565 prog->dst_needed = 1; 566 if (insn->imm == BPF_FUNC_get_prandom_u32) 567 bpf_user_rnd_init_once(); 568 if (insn->imm == BPF_FUNC_tail_call) { 569 /* mark bpf_tail_call as different opcode 570 * to avoid conditional branch in 571 * interpeter for every normal call 572 * and to prevent accidental JITing by 573 * JIT compiler that doesn't support 574 * bpf_tail_call yet 575 */ 576 insn->imm = 0; 577 insn->code |= BPF_X; 578 continue; 579 } 580 581 fn = prog->aux->ops->get_func_proto(insn->imm); 582 /* all functions that have prototype and verifier allowed 583 * programs to call them, must be real in-kernel functions 584 */ 585 BUG_ON(!fn->func); 586 insn->imm = fn->func - __bpf_call_base; 587 } 588 } 589 } 590 591 /* drop refcnt on maps used by eBPF program and free auxilary data */ 592 static void free_used_maps(struct bpf_prog_aux *aux) 593 { 594 int i; 595 596 for (i = 0; i < aux->used_map_cnt; i++) 597 bpf_map_put(aux->used_maps[i]); 598 599 kfree(aux->used_maps); 600 } 601 602 static int bpf_prog_charge_memlock(struct bpf_prog *prog) 603 { 604 struct user_struct *user = get_current_user(); 605 unsigned long memlock_limit; 606 607 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 608 609 atomic_long_add(prog->pages, &user->locked_vm); 610 if (atomic_long_read(&user->locked_vm) > memlock_limit) { 611 atomic_long_sub(prog->pages, &user->locked_vm); 612 free_uid(user); 613 return -EPERM; 614 } 615 prog->aux->user = user; 616 return 0; 617 } 618 619 static void bpf_prog_uncharge_memlock(struct bpf_prog *prog) 620 { 621 struct user_struct *user = prog->aux->user; 622 623 atomic_long_sub(prog->pages, &user->locked_vm); 624 free_uid(user); 625 } 626 627 static void __bpf_prog_put_rcu(struct rcu_head *rcu) 628 { 629 struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu); 630 631 free_used_maps(aux); 632 bpf_prog_uncharge_memlock(aux->prog); 633 bpf_prog_free(aux->prog); 634 } 635 636 void bpf_prog_put(struct bpf_prog *prog) 637 { 638 if (atomic_dec_and_test(&prog->aux->refcnt)) 639 call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu); 640 } 641 EXPORT_SYMBOL_GPL(bpf_prog_put); 642 643 static int bpf_prog_release(struct inode *inode, struct file *filp) 644 { 645 struct bpf_prog *prog = filp->private_data; 646 647 bpf_prog_put(prog); 648 return 0; 649 } 650 651 static const struct file_operations bpf_prog_fops = { 652 .release = bpf_prog_release, 653 }; 654 655 int bpf_prog_new_fd(struct bpf_prog *prog) 656 { 657 return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, 658 O_RDWR | O_CLOEXEC); 659 } 660 661 static struct bpf_prog *____bpf_prog_get(struct fd f) 662 { 663 if (!f.file) 664 return ERR_PTR(-EBADF); 665 if (f.file->f_op != &bpf_prog_fops) { 666 fdput(f); 667 return ERR_PTR(-EINVAL); 668 } 669 670 return f.file->private_data; 671 } 672 673 struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i) 674 { 675 if (atomic_add_return(i, &prog->aux->refcnt) > BPF_MAX_REFCNT) { 676 atomic_sub(i, &prog->aux->refcnt); 677 return ERR_PTR(-EBUSY); 678 } 679 return prog; 680 } 681 EXPORT_SYMBOL_GPL(bpf_prog_add); 682 683 struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog) 684 { 685 return bpf_prog_add(prog, 1); 686 } 687 688 static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *type) 689 { 690 struct fd f = fdget(ufd); 691 struct bpf_prog *prog; 692 693 prog = ____bpf_prog_get(f); 694 if (IS_ERR(prog)) 695 return prog; 696 if (type && prog->type != *type) { 697 prog = ERR_PTR(-EINVAL); 698 goto out; 699 } 700 701 prog = bpf_prog_inc(prog); 702 out: 703 fdput(f); 704 return prog; 705 } 706 707 struct bpf_prog *bpf_prog_get(u32 ufd) 708 { 709 return __bpf_prog_get(ufd, NULL); 710 } 711 712 struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type) 713 { 714 return __bpf_prog_get(ufd, &type); 715 } 716 EXPORT_SYMBOL_GPL(bpf_prog_get_type); 717 718 /* last field in 'union bpf_attr' used by this command */ 719 #define BPF_PROG_LOAD_LAST_FIELD kern_version 720 721 static int bpf_prog_load(union bpf_attr *attr) 722 { 723 enum bpf_prog_type type = attr->prog_type; 724 struct bpf_prog *prog; 725 int err; 726 char license[128]; 727 bool is_gpl; 728 729 if (CHECK_ATTR(BPF_PROG_LOAD)) 730 return -EINVAL; 731 732 /* copy eBPF program license from user space */ 733 if (strncpy_from_user(license, u64_to_ptr(attr->license), 734 sizeof(license) - 1) < 0) 735 return -EFAULT; 736 license[sizeof(license) - 1] = 0; 737 738 /* eBPF programs must be GPL compatible to use GPL-ed functions */ 739 is_gpl = license_is_gpl_compatible(license); 740 741 if (attr->insn_cnt >= BPF_MAXINSNS) 742 return -EINVAL; 743 744 if (type == BPF_PROG_TYPE_KPROBE && 745 attr->kern_version != LINUX_VERSION_CODE) 746 return -EINVAL; 747 748 if (type != BPF_PROG_TYPE_SOCKET_FILTER && !capable(CAP_SYS_ADMIN)) 749 return -EPERM; 750 751 /* plain bpf_prog allocation */ 752 prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER); 753 if (!prog) 754 return -ENOMEM; 755 756 err = bpf_prog_charge_memlock(prog); 757 if (err) 758 goto free_prog_nouncharge; 759 760 prog->len = attr->insn_cnt; 761 762 err = -EFAULT; 763 if (copy_from_user(prog->insns, u64_to_ptr(attr->insns), 764 prog->len * sizeof(struct bpf_insn)) != 0) 765 goto free_prog; 766 767 prog->orig_prog = NULL; 768 prog->jited = 0; 769 770 atomic_set(&prog->aux->refcnt, 1); 771 prog->gpl_compatible = is_gpl ? 1 : 0; 772 773 /* find program type: socket_filter vs tracing_filter */ 774 err = find_prog_type(type, prog); 775 if (err < 0) 776 goto free_prog; 777 778 /* run eBPF verifier */ 779 err = bpf_check(&prog, attr); 780 if (err < 0) 781 goto free_used_maps; 782 783 /* fixup BPF_CALL->imm field */ 784 fixup_bpf_calls(prog); 785 786 /* eBPF program is ready to be JITed */ 787 prog = bpf_prog_select_runtime(prog, &err); 788 if (err < 0) 789 goto free_used_maps; 790 791 err = bpf_prog_new_fd(prog); 792 if (err < 0) 793 /* failed to allocate fd */ 794 goto free_used_maps; 795 796 return err; 797 798 free_used_maps: 799 free_used_maps(prog->aux); 800 free_prog: 801 bpf_prog_uncharge_memlock(prog); 802 free_prog_nouncharge: 803 bpf_prog_free(prog); 804 return err; 805 } 806 807 #define BPF_OBJ_LAST_FIELD bpf_fd 808 809 static int bpf_obj_pin(const union bpf_attr *attr) 810 { 811 if (CHECK_ATTR(BPF_OBJ)) 812 return -EINVAL; 813 814 return bpf_obj_pin_user(attr->bpf_fd, u64_to_ptr(attr->pathname)); 815 } 816 817 static int bpf_obj_get(const union bpf_attr *attr) 818 { 819 if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0) 820 return -EINVAL; 821 822 return bpf_obj_get_user(u64_to_ptr(attr->pathname)); 823 } 824 825 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) 826 { 827 union bpf_attr attr = {}; 828 int err; 829 830 if (!capable(CAP_SYS_ADMIN) && sysctl_unprivileged_bpf_disabled) 831 return -EPERM; 832 833 if (!access_ok(VERIFY_READ, uattr, 1)) 834 return -EFAULT; 835 836 if (size > PAGE_SIZE) /* silly large */ 837 return -E2BIG; 838 839 /* If we're handed a bigger struct than we know of, 840 * ensure all the unknown bits are 0 - i.e. new 841 * user-space does not rely on any kernel feature 842 * extensions we dont know about yet. 843 */ 844 if (size > sizeof(attr)) { 845 unsigned char __user *addr; 846 unsigned char __user *end; 847 unsigned char val; 848 849 addr = (void __user *)uattr + sizeof(attr); 850 end = (void __user *)uattr + size; 851 852 for (; addr < end; addr++) { 853 err = get_user(val, addr); 854 if (err) 855 return err; 856 if (val) 857 return -E2BIG; 858 } 859 size = sizeof(attr); 860 } 861 862 /* copy attributes from user space, may be less than sizeof(bpf_attr) */ 863 if (copy_from_user(&attr, uattr, size) != 0) 864 return -EFAULT; 865 866 switch (cmd) { 867 case BPF_MAP_CREATE: 868 err = map_create(&attr); 869 break; 870 case BPF_MAP_LOOKUP_ELEM: 871 err = map_lookup_elem(&attr); 872 break; 873 case BPF_MAP_UPDATE_ELEM: 874 err = map_update_elem(&attr); 875 break; 876 case BPF_MAP_DELETE_ELEM: 877 err = map_delete_elem(&attr); 878 break; 879 case BPF_MAP_GET_NEXT_KEY: 880 err = map_get_next_key(&attr); 881 break; 882 case BPF_PROG_LOAD: 883 err = bpf_prog_load(&attr); 884 break; 885 case BPF_OBJ_PIN: 886 err = bpf_obj_pin(&attr); 887 break; 888 case BPF_OBJ_GET: 889 err = bpf_obj_get(&attr); 890 break; 891 default: 892 err = -EINVAL; 893 break; 894 } 895 896 return err; 897 } 898