1 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 2 * 3 * This program is free software; you can redistribute it and/or 4 * modify it under the terms of version 2 of the GNU General Public 5 * License as published by the Free Software Foundation. 6 * 7 * This program is distributed in the hope that it will be useful, but 8 * WITHOUT ANY WARRANTY; without even the implied warranty of 9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 10 * General Public License for more details. 11 */ 12 #include <linux/bpf.h> 13 #include <linux/bpf_trace.h> 14 #include <linux/syscalls.h> 15 #include <linux/slab.h> 16 #include <linux/sched/signal.h> 17 #include <linux/vmalloc.h> 18 #include <linux/mmzone.h> 19 #include <linux/anon_inodes.h> 20 #include <linux/file.h> 21 #include <linux/license.h> 22 #include <linux/filter.h> 23 #include <linux/version.h> 24 #include <linux/kernel.h> 25 26 DEFINE_PER_CPU(int, bpf_prog_active); 27 28 int sysctl_unprivileged_bpf_disabled __read_mostly; 29 30 static const struct bpf_map_ops * const bpf_map_types[] = { 31 #define BPF_PROG_TYPE(_id, _ops) 32 #define BPF_MAP_TYPE(_id, _ops) \ 33 [_id] = &_ops, 34 #include <linux/bpf_types.h> 35 #undef BPF_PROG_TYPE 36 #undef BPF_MAP_TYPE 37 }; 38 39 static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) 40 { 41 struct bpf_map *map; 42 43 if (attr->map_type >= ARRAY_SIZE(bpf_map_types) || 44 !bpf_map_types[attr->map_type]) 45 return ERR_PTR(-EINVAL); 46 47 map = bpf_map_types[attr->map_type]->map_alloc(attr); 48 if (IS_ERR(map)) 49 return map; 50 map->ops = bpf_map_types[attr->map_type]; 51 map->map_type = attr->map_type; 52 return map; 53 } 54 55 void *bpf_map_area_alloc(size_t size) 56 { 57 /* We definitely need __GFP_NORETRY, so OOM killer doesn't 58 * trigger under memory pressure as we really just want to 59 * fail instead. 60 */ 61 const gfp_t flags = __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO; 62 void *area; 63 64 if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { 65 area = kmalloc(size, GFP_USER | flags); 66 if (area != NULL) 67 return area; 68 } 69 70 return __vmalloc(size, GFP_KERNEL | flags, PAGE_KERNEL); 71 } 72 73 void bpf_map_area_free(void *area) 74 { 75 kvfree(area); 76 } 77 78 int bpf_map_precharge_memlock(u32 pages) 79 { 80 struct user_struct *user = get_current_user(); 81 unsigned long memlock_limit, cur; 82 83 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 84 cur = atomic_long_read(&user->locked_vm); 85 free_uid(user); 86 if (cur + pages > memlock_limit) 87 return -EPERM; 88 return 0; 89 } 90 91 static int bpf_map_charge_memlock(struct bpf_map *map) 92 { 93 struct user_struct *user = get_current_user(); 94 unsigned long memlock_limit; 95 96 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 97 98 atomic_long_add(map->pages, &user->locked_vm); 99 100 if (atomic_long_read(&user->locked_vm) > memlock_limit) { 101 atomic_long_sub(map->pages, &user->locked_vm); 102 free_uid(user); 103 return -EPERM; 104 } 105 map->user = user; 106 return 0; 107 } 108 109 static void bpf_map_uncharge_memlock(struct bpf_map *map) 110 { 111 struct user_struct *user = map->user; 112 113 atomic_long_sub(map->pages, &user->locked_vm); 114 free_uid(user); 115 } 116 117 /* called from workqueue */ 118 static void bpf_map_free_deferred(struct work_struct *work) 119 { 120 struct bpf_map *map = container_of(work, struct bpf_map, work); 121 122 bpf_map_uncharge_memlock(map); 123 /* implementation dependent freeing */ 124 map->ops->map_free(map); 125 } 126 127 static void bpf_map_put_uref(struct bpf_map *map) 128 { 129 if (atomic_dec_and_test(&map->usercnt)) { 130 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) 131 bpf_fd_array_map_clear(map); 132 } 133 } 134 135 /* decrement map refcnt and schedule it for freeing via workqueue 136 * (unrelying map implementation ops->map_free() might sleep) 137 */ 138 void bpf_map_put(struct bpf_map *map) 139 { 140 if (atomic_dec_and_test(&map->refcnt)) { 141 INIT_WORK(&map->work, bpf_map_free_deferred); 142 schedule_work(&map->work); 143 } 144 } 145 146 void bpf_map_put_with_uref(struct bpf_map *map) 147 { 148 bpf_map_put_uref(map); 149 bpf_map_put(map); 150 } 151 152 static int bpf_map_release(struct inode *inode, struct file *filp) 153 { 154 struct bpf_map *map = filp->private_data; 155 156 if (map->ops->map_release) 157 map->ops->map_release(map, filp); 158 159 bpf_map_put_with_uref(map); 160 return 0; 161 } 162 163 #ifdef CONFIG_PROC_FS 164 static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) 165 { 166 const struct bpf_map *map = filp->private_data; 167 const struct bpf_array *array; 168 u32 owner_prog_type = 0; 169 170 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) { 171 array = container_of(map, struct bpf_array, map); 172 owner_prog_type = array->owner_prog_type; 173 } 174 175 seq_printf(m, 176 "map_type:\t%u\n" 177 "key_size:\t%u\n" 178 "value_size:\t%u\n" 179 "max_entries:\t%u\n" 180 "map_flags:\t%#x\n" 181 "memlock:\t%llu\n", 182 map->map_type, 183 map->key_size, 184 map->value_size, 185 map->max_entries, 186 map->map_flags, 187 map->pages * 1ULL << PAGE_SHIFT); 188 189 if (owner_prog_type) 190 seq_printf(m, "owner_prog_type:\t%u\n", 191 owner_prog_type); 192 } 193 #endif 194 195 static const struct file_operations bpf_map_fops = { 196 #ifdef CONFIG_PROC_FS 197 .show_fdinfo = bpf_map_show_fdinfo, 198 #endif 199 .release = bpf_map_release, 200 }; 201 202 int bpf_map_new_fd(struct bpf_map *map) 203 { 204 return anon_inode_getfd("bpf-map", &bpf_map_fops, map, 205 O_RDWR | O_CLOEXEC); 206 } 207 208 /* helper macro to check that unused fields 'union bpf_attr' are zero */ 209 #define CHECK_ATTR(CMD) \ 210 memchr_inv((void *) &attr->CMD##_LAST_FIELD + \ 211 sizeof(attr->CMD##_LAST_FIELD), 0, \ 212 sizeof(*attr) - \ 213 offsetof(union bpf_attr, CMD##_LAST_FIELD) - \ 214 sizeof(attr->CMD##_LAST_FIELD)) != NULL 215 216 #define BPF_MAP_CREATE_LAST_FIELD inner_map_fd 217 /* called via syscall */ 218 static int map_create(union bpf_attr *attr) 219 { 220 struct bpf_map *map; 221 int err; 222 223 err = CHECK_ATTR(BPF_MAP_CREATE); 224 if (err) 225 return -EINVAL; 226 227 /* find map type and init map: hashtable vs rbtree vs bloom vs ... */ 228 map = find_and_alloc_map(attr); 229 if (IS_ERR(map)) 230 return PTR_ERR(map); 231 232 atomic_set(&map->refcnt, 1); 233 atomic_set(&map->usercnt, 1); 234 235 err = bpf_map_charge_memlock(map); 236 if (err) 237 goto free_map_nouncharge; 238 239 err = bpf_map_new_fd(map); 240 if (err < 0) 241 /* failed to allocate fd */ 242 goto free_map; 243 244 trace_bpf_map_create(map, err); 245 return err; 246 247 free_map: 248 bpf_map_uncharge_memlock(map); 249 free_map_nouncharge: 250 map->ops->map_free(map); 251 return err; 252 } 253 254 /* if error is returned, fd is released. 255 * On success caller should complete fd access with matching fdput() 256 */ 257 struct bpf_map *__bpf_map_get(struct fd f) 258 { 259 if (!f.file) 260 return ERR_PTR(-EBADF); 261 if (f.file->f_op != &bpf_map_fops) { 262 fdput(f); 263 return ERR_PTR(-EINVAL); 264 } 265 266 return f.file->private_data; 267 } 268 269 /* prog's and map's refcnt limit */ 270 #define BPF_MAX_REFCNT 32768 271 272 struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref) 273 { 274 if (atomic_inc_return(&map->refcnt) > BPF_MAX_REFCNT) { 275 atomic_dec(&map->refcnt); 276 return ERR_PTR(-EBUSY); 277 } 278 if (uref) 279 atomic_inc(&map->usercnt); 280 return map; 281 } 282 283 struct bpf_map *bpf_map_get_with_uref(u32 ufd) 284 { 285 struct fd f = fdget(ufd); 286 struct bpf_map *map; 287 288 map = __bpf_map_get(f); 289 if (IS_ERR(map)) 290 return map; 291 292 map = bpf_map_inc(map, true); 293 fdput(f); 294 295 return map; 296 } 297 298 int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value) 299 { 300 return -ENOTSUPP; 301 } 302 303 /* last field in 'union bpf_attr' used by this command */ 304 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value 305 306 static int map_lookup_elem(union bpf_attr *attr) 307 { 308 void __user *ukey = u64_to_user_ptr(attr->key); 309 void __user *uvalue = u64_to_user_ptr(attr->value); 310 int ufd = attr->map_fd; 311 struct bpf_map *map; 312 void *key, *value, *ptr; 313 u32 value_size; 314 struct fd f; 315 int err; 316 317 if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM)) 318 return -EINVAL; 319 320 f = fdget(ufd); 321 map = __bpf_map_get(f); 322 if (IS_ERR(map)) 323 return PTR_ERR(map); 324 325 err = -ENOMEM; 326 key = kmalloc(map->key_size, GFP_USER); 327 if (!key) 328 goto err_put; 329 330 err = -EFAULT; 331 if (copy_from_user(key, ukey, map->key_size) != 0) 332 goto free_key; 333 334 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 335 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || 336 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) 337 value_size = round_up(map->value_size, 8) * num_possible_cpus(); 338 else 339 value_size = map->value_size; 340 341 err = -ENOMEM; 342 value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); 343 if (!value) 344 goto free_key; 345 346 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 347 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 348 err = bpf_percpu_hash_copy(map, key, value); 349 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 350 err = bpf_percpu_array_copy(map, key, value); 351 } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) { 352 err = bpf_stackmap_copy(map, key, value); 353 } else if (map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS || 354 map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { 355 err = -ENOTSUPP; 356 } else { 357 rcu_read_lock(); 358 ptr = map->ops->map_lookup_elem(map, key); 359 if (ptr) 360 memcpy(value, ptr, value_size); 361 rcu_read_unlock(); 362 err = ptr ? 0 : -ENOENT; 363 } 364 365 if (err) 366 goto free_value; 367 368 err = -EFAULT; 369 if (copy_to_user(uvalue, value, value_size) != 0) 370 goto free_value; 371 372 trace_bpf_map_lookup_elem(map, ufd, key, value); 373 err = 0; 374 375 free_value: 376 kfree(value); 377 free_key: 378 kfree(key); 379 err_put: 380 fdput(f); 381 return err; 382 } 383 384 #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags 385 386 static int map_update_elem(union bpf_attr *attr) 387 { 388 void __user *ukey = u64_to_user_ptr(attr->key); 389 void __user *uvalue = u64_to_user_ptr(attr->value); 390 int ufd = attr->map_fd; 391 struct bpf_map *map; 392 void *key, *value; 393 u32 value_size; 394 struct fd f; 395 int err; 396 397 if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM)) 398 return -EINVAL; 399 400 f = fdget(ufd); 401 map = __bpf_map_get(f); 402 if (IS_ERR(map)) 403 return PTR_ERR(map); 404 405 err = -ENOMEM; 406 key = kmalloc(map->key_size, GFP_USER); 407 if (!key) 408 goto err_put; 409 410 err = -EFAULT; 411 if (copy_from_user(key, ukey, map->key_size) != 0) 412 goto free_key; 413 414 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 415 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || 416 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) 417 value_size = round_up(map->value_size, 8) * num_possible_cpus(); 418 else 419 value_size = map->value_size; 420 421 err = -ENOMEM; 422 value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); 423 if (!value) 424 goto free_key; 425 426 err = -EFAULT; 427 if (copy_from_user(value, uvalue, value_size) != 0) 428 goto free_value; 429 430 /* must increment bpf_prog_active to avoid kprobe+bpf triggering from 431 * inside bpf map update or delete otherwise deadlocks are possible 432 */ 433 preempt_disable(); 434 __this_cpu_inc(bpf_prog_active); 435 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 436 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 437 err = bpf_percpu_hash_update(map, key, value, attr->flags); 438 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 439 err = bpf_percpu_array_update(map, key, value, attr->flags); 440 } else if (map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || 441 map->map_type == BPF_MAP_TYPE_PROG_ARRAY || 442 map->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || 443 map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) { 444 rcu_read_lock(); 445 err = bpf_fd_array_map_update_elem(map, f.file, key, value, 446 attr->flags); 447 rcu_read_unlock(); 448 } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { 449 rcu_read_lock(); 450 err = bpf_fd_htab_map_update_elem(map, f.file, key, value, 451 attr->flags); 452 rcu_read_unlock(); 453 } else { 454 rcu_read_lock(); 455 err = map->ops->map_update_elem(map, key, value, attr->flags); 456 rcu_read_unlock(); 457 } 458 __this_cpu_dec(bpf_prog_active); 459 preempt_enable(); 460 461 if (!err) 462 trace_bpf_map_update_elem(map, ufd, key, value); 463 free_value: 464 kfree(value); 465 free_key: 466 kfree(key); 467 err_put: 468 fdput(f); 469 return err; 470 } 471 472 #define BPF_MAP_DELETE_ELEM_LAST_FIELD key 473 474 static int map_delete_elem(union bpf_attr *attr) 475 { 476 void __user *ukey = u64_to_user_ptr(attr->key); 477 int ufd = attr->map_fd; 478 struct bpf_map *map; 479 struct fd f; 480 void *key; 481 int err; 482 483 if (CHECK_ATTR(BPF_MAP_DELETE_ELEM)) 484 return -EINVAL; 485 486 f = fdget(ufd); 487 map = __bpf_map_get(f); 488 if (IS_ERR(map)) 489 return PTR_ERR(map); 490 491 err = -ENOMEM; 492 key = kmalloc(map->key_size, GFP_USER); 493 if (!key) 494 goto err_put; 495 496 err = -EFAULT; 497 if (copy_from_user(key, ukey, map->key_size) != 0) 498 goto free_key; 499 500 preempt_disable(); 501 __this_cpu_inc(bpf_prog_active); 502 rcu_read_lock(); 503 err = map->ops->map_delete_elem(map, key); 504 rcu_read_unlock(); 505 __this_cpu_dec(bpf_prog_active); 506 preempt_enable(); 507 508 if (!err) 509 trace_bpf_map_delete_elem(map, ufd, key); 510 free_key: 511 kfree(key); 512 err_put: 513 fdput(f); 514 return err; 515 } 516 517 /* last field in 'union bpf_attr' used by this command */ 518 #define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key 519 520 static int map_get_next_key(union bpf_attr *attr) 521 { 522 void __user *ukey = u64_to_user_ptr(attr->key); 523 void __user *unext_key = u64_to_user_ptr(attr->next_key); 524 int ufd = attr->map_fd; 525 struct bpf_map *map; 526 void *key, *next_key; 527 struct fd f; 528 int err; 529 530 if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY)) 531 return -EINVAL; 532 533 f = fdget(ufd); 534 map = __bpf_map_get(f); 535 if (IS_ERR(map)) 536 return PTR_ERR(map); 537 538 if (ukey) { 539 err = -ENOMEM; 540 key = kmalloc(map->key_size, GFP_USER); 541 if (!key) 542 goto err_put; 543 544 err = -EFAULT; 545 if (copy_from_user(key, ukey, map->key_size) != 0) 546 goto free_key; 547 } else { 548 key = NULL; 549 } 550 551 err = -ENOMEM; 552 next_key = kmalloc(map->key_size, GFP_USER); 553 if (!next_key) 554 goto free_key; 555 556 rcu_read_lock(); 557 err = map->ops->map_get_next_key(map, key, next_key); 558 rcu_read_unlock(); 559 if (err) 560 goto free_next_key; 561 562 err = -EFAULT; 563 if (copy_to_user(unext_key, next_key, map->key_size) != 0) 564 goto free_next_key; 565 566 trace_bpf_map_next_key(map, ufd, key, next_key); 567 err = 0; 568 569 free_next_key: 570 kfree(next_key); 571 free_key: 572 kfree(key); 573 err_put: 574 fdput(f); 575 return err; 576 } 577 578 static const struct bpf_verifier_ops * const bpf_prog_types[] = { 579 #define BPF_PROG_TYPE(_id, _ops) \ 580 [_id] = &_ops, 581 #define BPF_MAP_TYPE(_id, _ops) 582 #include <linux/bpf_types.h> 583 #undef BPF_PROG_TYPE 584 #undef BPF_MAP_TYPE 585 }; 586 587 static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog) 588 { 589 if (type >= ARRAY_SIZE(bpf_prog_types) || !bpf_prog_types[type]) 590 return -EINVAL; 591 592 prog->aux->ops = bpf_prog_types[type]; 593 prog->type = type; 594 return 0; 595 } 596 597 /* drop refcnt on maps used by eBPF program and free auxilary data */ 598 static void free_used_maps(struct bpf_prog_aux *aux) 599 { 600 int i; 601 602 for (i = 0; i < aux->used_map_cnt; i++) 603 bpf_map_put(aux->used_maps[i]); 604 605 kfree(aux->used_maps); 606 } 607 608 int __bpf_prog_charge(struct user_struct *user, u32 pages) 609 { 610 unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 611 unsigned long user_bufs; 612 613 if (user) { 614 user_bufs = atomic_long_add_return(pages, &user->locked_vm); 615 if (user_bufs > memlock_limit) { 616 atomic_long_sub(pages, &user->locked_vm); 617 return -EPERM; 618 } 619 } 620 621 return 0; 622 } 623 624 void __bpf_prog_uncharge(struct user_struct *user, u32 pages) 625 { 626 if (user) 627 atomic_long_sub(pages, &user->locked_vm); 628 } 629 630 static int bpf_prog_charge_memlock(struct bpf_prog *prog) 631 { 632 struct user_struct *user = get_current_user(); 633 int ret; 634 635 ret = __bpf_prog_charge(user, prog->pages); 636 if (ret) { 637 free_uid(user); 638 return ret; 639 } 640 641 prog->aux->user = user; 642 return 0; 643 } 644 645 static void bpf_prog_uncharge_memlock(struct bpf_prog *prog) 646 { 647 struct user_struct *user = prog->aux->user; 648 649 __bpf_prog_uncharge(user, prog->pages); 650 free_uid(user); 651 } 652 653 static void __bpf_prog_put_rcu(struct rcu_head *rcu) 654 { 655 struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu); 656 657 free_used_maps(aux); 658 bpf_prog_uncharge_memlock(aux->prog); 659 bpf_prog_free(aux->prog); 660 } 661 662 void bpf_prog_put(struct bpf_prog *prog) 663 { 664 if (atomic_dec_and_test(&prog->aux->refcnt)) { 665 trace_bpf_prog_put_rcu(prog); 666 bpf_prog_kallsyms_del(prog); 667 call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu); 668 } 669 } 670 EXPORT_SYMBOL_GPL(bpf_prog_put); 671 672 static int bpf_prog_release(struct inode *inode, struct file *filp) 673 { 674 struct bpf_prog *prog = filp->private_data; 675 676 bpf_prog_put(prog); 677 return 0; 678 } 679 680 #ifdef CONFIG_PROC_FS 681 static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) 682 { 683 const struct bpf_prog *prog = filp->private_data; 684 char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; 685 686 bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); 687 seq_printf(m, 688 "prog_type:\t%u\n" 689 "prog_jited:\t%u\n" 690 "prog_tag:\t%s\n" 691 "memlock:\t%llu\n", 692 prog->type, 693 prog->jited, 694 prog_tag, 695 prog->pages * 1ULL << PAGE_SHIFT); 696 } 697 #endif 698 699 static const struct file_operations bpf_prog_fops = { 700 #ifdef CONFIG_PROC_FS 701 .show_fdinfo = bpf_prog_show_fdinfo, 702 #endif 703 .release = bpf_prog_release, 704 }; 705 706 int bpf_prog_new_fd(struct bpf_prog *prog) 707 { 708 return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, 709 O_RDWR | O_CLOEXEC); 710 } 711 712 static struct bpf_prog *____bpf_prog_get(struct fd f) 713 { 714 if (!f.file) 715 return ERR_PTR(-EBADF); 716 if (f.file->f_op != &bpf_prog_fops) { 717 fdput(f); 718 return ERR_PTR(-EINVAL); 719 } 720 721 return f.file->private_data; 722 } 723 724 struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i) 725 { 726 if (atomic_add_return(i, &prog->aux->refcnt) > BPF_MAX_REFCNT) { 727 atomic_sub(i, &prog->aux->refcnt); 728 return ERR_PTR(-EBUSY); 729 } 730 return prog; 731 } 732 EXPORT_SYMBOL_GPL(bpf_prog_add); 733 734 void bpf_prog_sub(struct bpf_prog *prog, int i) 735 { 736 /* Only to be used for undoing previous bpf_prog_add() in some 737 * error path. We still know that another entity in our call 738 * path holds a reference to the program, thus atomic_sub() can 739 * be safely used in such cases! 740 */ 741 WARN_ON(atomic_sub_return(i, &prog->aux->refcnt) == 0); 742 } 743 EXPORT_SYMBOL_GPL(bpf_prog_sub); 744 745 struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog) 746 { 747 return bpf_prog_add(prog, 1); 748 } 749 EXPORT_SYMBOL_GPL(bpf_prog_inc); 750 751 static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *type) 752 { 753 struct fd f = fdget(ufd); 754 struct bpf_prog *prog; 755 756 prog = ____bpf_prog_get(f); 757 if (IS_ERR(prog)) 758 return prog; 759 if (type && prog->type != *type) { 760 prog = ERR_PTR(-EINVAL); 761 goto out; 762 } 763 764 prog = bpf_prog_inc(prog); 765 out: 766 fdput(f); 767 return prog; 768 } 769 770 struct bpf_prog *bpf_prog_get(u32 ufd) 771 { 772 return __bpf_prog_get(ufd, NULL); 773 } 774 775 struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type) 776 { 777 struct bpf_prog *prog = __bpf_prog_get(ufd, &type); 778 779 if (!IS_ERR(prog)) 780 trace_bpf_prog_get_type(prog); 781 return prog; 782 } 783 EXPORT_SYMBOL_GPL(bpf_prog_get_type); 784 785 /* last field in 'union bpf_attr' used by this command */ 786 #define BPF_PROG_LOAD_LAST_FIELD kern_version 787 788 static int bpf_prog_load(union bpf_attr *attr) 789 { 790 enum bpf_prog_type type = attr->prog_type; 791 struct bpf_prog *prog; 792 int err; 793 char license[128]; 794 bool is_gpl; 795 796 if (CHECK_ATTR(BPF_PROG_LOAD)) 797 return -EINVAL; 798 799 /* copy eBPF program license from user space */ 800 if (strncpy_from_user(license, u64_to_user_ptr(attr->license), 801 sizeof(license) - 1) < 0) 802 return -EFAULT; 803 license[sizeof(license) - 1] = 0; 804 805 /* eBPF programs must be GPL compatible to use GPL-ed functions */ 806 is_gpl = license_is_gpl_compatible(license); 807 808 if (attr->insn_cnt == 0 || attr->insn_cnt > BPF_MAXINSNS) 809 return -E2BIG; 810 811 if (type == BPF_PROG_TYPE_KPROBE && 812 attr->kern_version != LINUX_VERSION_CODE) 813 return -EINVAL; 814 815 if (type != BPF_PROG_TYPE_SOCKET_FILTER && !capable(CAP_SYS_ADMIN)) 816 return -EPERM; 817 818 /* plain bpf_prog allocation */ 819 prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER); 820 if (!prog) 821 return -ENOMEM; 822 823 err = bpf_prog_charge_memlock(prog); 824 if (err) 825 goto free_prog_nouncharge; 826 827 prog->len = attr->insn_cnt; 828 829 err = -EFAULT; 830 if (copy_from_user(prog->insns, u64_to_user_ptr(attr->insns), 831 bpf_prog_insn_size(prog)) != 0) 832 goto free_prog; 833 834 prog->orig_prog = NULL; 835 prog->jited = 0; 836 837 atomic_set(&prog->aux->refcnt, 1); 838 prog->gpl_compatible = is_gpl ? 1 : 0; 839 840 /* find program type: socket_filter vs tracing_filter */ 841 err = find_prog_type(type, prog); 842 if (err < 0) 843 goto free_prog; 844 845 /* run eBPF verifier */ 846 err = bpf_check(&prog, attr); 847 if (err < 0) 848 goto free_used_maps; 849 850 /* eBPF program is ready to be JITed */ 851 prog = bpf_prog_select_runtime(prog, &err); 852 if (err < 0) 853 goto free_used_maps; 854 855 err = bpf_prog_new_fd(prog); 856 if (err < 0) 857 /* failed to allocate fd */ 858 goto free_used_maps; 859 860 bpf_prog_kallsyms_add(prog); 861 trace_bpf_prog_load(prog, err); 862 return err; 863 864 free_used_maps: 865 free_used_maps(prog->aux); 866 free_prog: 867 bpf_prog_uncharge_memlock(prog); 868 free_prog_nouncharge: 869 bpf_prog_free(prog); 870 return err; 871 } 872 873 #define BPF_OBJ_LAST_FIELD bpf_fd 874 875 static int bpf_obj_pin(const union bpf_attr *attr) 876 { 877 if (CHECK_ATTR(BPF_OBJ)) 878 return -EINVAL; 879 880 return bpf_obj_pin_user(attr->bpf_fd, u64_to_user_ptr(attr->pathname)); 881 } 882 883 static int bpf_obj_get(const union bpf_attr *attr) 884 { 885 if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0) 886 return -EINVAL; 887 888 return bpf_obj_get_user(u64_to_user_ptr(attr->pathname)); 889 } 890 891 #ifdef CONFIG_CGROUP_BPF 892 893 #define BPF_PROG_ATTACH_LAST_FIELD attach_flags 894 895 static int bpf_prog_attach(const union bpf_attr *attr) 896 { 897 enum bpf_prog_type ptype; 898 struct bpf_prog *prog; 899 struct cgroup *cgrp; 900 int ret; 901 902 if (!capable(CAP_NET_ADMIN)) 903 return -EPERM; 904 905 if (CHECK_ATTR(BPF_PROG_ATTACH)) 906 return -EINVAL; 907 908 if (attr->attach_flags & ~BPF_F_ALLOW_OVERRIDE) 909 return -EINVAL; 910 911 switch (attr->attach_type) { 912 case BPF_CGROUP_INET_INGRESS: 913 case BPF_CGROUP_INET_EGRESS: 914 ptype = BPF_PROG_TYPE_CGROUP_SKB; 915 break; 916 case BPF_CGROUP_INET_SOCK_CREATE: 917 ptype = BPF_PROG_TYPE_CGROUP_SOCK; 918 break; 919 default: 920 return -EINVAL; 921 } 922 923 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); 924 if (IS_ERR(prog)) 925 return PTR_ERR(prog); 926 927 cgrp = cgroup_get_from_fd(attr->target_fd); 928 if (IS_ERR(cgrp)) { 929 bpf_prog_put(prog); 930 return PTR_ERR(cgrp); 931 } 932 933 ret = cgroup_bpf_update(cgrp, prog, attr->attach_type, 934 attr->attach_flags & BPF_F_ALLOW_OVERRIDE); 935 if (ret) 936 bpf_prog_put(prog); 937 cgroup_put(cgrp); 938 939 return ret; 940 } 941 942 #define BPF_PROG_DETACH_LAST_FIELD attach_type 943 944 static int bpf_prog_detach(const union bpf_attr *attr) 945 { 946 struct cgroup *cgrp; 947 int ret; 948 949 if (!capable(CAP_NET_ADMIN)) 950 return -EPERM; 951 952 if (CHECK_ATTR(BPF_PROG_DETACH)) 953 return -EINVAL; 954 955 switch (attr->attach_type) { 956 case BPF_CGROUP_INET_INGRESS: 957 case BPF_CGROUP_INET_EGRESS: 958 case BPF_CGROUP_INET_SOCK_CREATE: 959 cgrp = cgroup_get_from_fd(attr->target_fd); 960 if (IS_ERR(cgrp)) 961 return PTR_ERR(cgrp); 962 963 ret = cgroup_bpf_update(cgrp, NULL, attr->attach_type, false); 964 cgroup_put(cgrp); 965 break; 966 967 default: 968 return -EINVAL; 969 } 970 971 return ret; 972 } 973 #endif /* CONFIG_CGROUP_BPF */ 974 975 #define BPF_PROG_TEST_RUN_LAST_FIELD test.duration 976 977 static int bpf_prog_test_run(const union bpf_attr *attr, 978 union bpf_attr __user *uattr) 979 { 980 struct bpf_prog *prog; 981 int ret = -ENOTSUPP; 982 983 if (CHECK_ATTR(BPF_PROG_TEST_RUN)) 984 return -EINVAL; 985 986 prog = bpf_prog_get(attr->test.prog_fd); 987 if (IS_ERR(prog)) 988 return PTR_ERR(prog); 989 990 if (prog->aux->ops->test_run) 991 ret = prog->aux->ops->test_run(prog, attr, uattr); 992 993 bpf_prog_put(prog); 994 return ret; 995 } 996 997 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) 998 { 999 union bpf_attr attr = {}; 1000 int err; 1001 1002 if (!capable(CAP_SYS_ADMIN) && sysctl_unprivileged_bpf_disabled) 1003 return -EPERM; 1004 1005 if (!access_ok(VERIFY_READ, uattr, 1)) 1006 return -EFAULT; 1007 1008 if (size > PAGE_SIZE) /* silly large */ 1009 return -E2BIG; 1010 1011 /* If we're handed a bigger struct than we know of, 1012 * ensure all the unknown bits are 0 - i.e. new 1013 * user-space does not rely on any kernel feature 1014 * extensions we dont know about yet. 1015 */ 1016 if (size > sizeof(attr)) { 1017 unsigned char __user *addr; 1018 unsigned char __user *end; 1019 unsigned char val; 1020 1021 addr = (void __user *)uattr + sizeof(attr); 1022 end = (void __user *)uattr + size; 1023 1024 for (; addr < end; addr++) { 1025 err = get_user(val, addr); 1026 if (err) 1027 return err; 1028 if (val) 1029 return -E2BIG; 1030 } 1031 size = sizeof(attr); 1032 } 1033 1034 /* copy attributes from user space, may be less than sizeof(bpf_attr) */ 1035 if (copy_from_user(&attr, uattr, size) != 0) 1036 return -EFAULT; 1037 1038 switch (cmd) { 1039 case BPF_MAP_CREATE: 1040 err = map_create(&attr); 1041 break; 1042 case BPF_MAP_LOOKUP_ELEM: 1043 err = map_lookup_elem(&attr); 1044 break; 1045 case BPF_MAP_UPDATE_ELEM: 1046 err = map_update_elem(&attr); 1047 break; 1048 case BPF_MAP_DELETE_ELEM: 1049 err = map_delete_elem(&attr); 1050 break; 1051 case BPF_MAP_GET_NEXT_KEY: 1052 err = map_get_next_key(&attr); 1053 break; 1054 case BPF_PROG_LOAD: 1055 err = bpf_prog_load(&attr); 1056 break; 1057 case BPF_OBJ_PIN: 1058 err = bpf_obj_pin(&attr); 1059 break; 1060 case BPF_OBJ_GET: 1061 err = bpf_obj_get(&attr); 1062 break; 1063 #ifdef CONFIG_CGROUP_BPF 1064 case BPF_PROG_ATTACH: 1065 err = bpf_prog_attach(&attr); 1066 break; 1067 case BPF_PROG_DETACH: 1068 err = bpf_prog_detach(&attr); 1069 break; 1070 #endif 1071 case BPF_PROG_TEST_RUN: 1072 err = bpf_prog_test_run(&attr, uattr); 1073 break; 1074 default: 1075 err = -EINVAL; 1076 break; 1077 } 1078 1079 return err; 1080 } 1081