1 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 2 * 3 * This program is free software; you can redistribute it and/or 4 * modify it under the terms of version 2 of the GNU General Public 5 * License as published by the Free Software Foundation. 6 * 7 * This program is distributed in the hope that it will be useful, but 8 * WITHOUT ANY WARRANTY; without even the implied warranty of 9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 10 * General Public License for more details. 11 */ 12 #include <linux/bpf.h> 13 #include <linux/bpf_trace.h> 14 #include <linux/syscalls.h> 15 #include <linux/slab.h> 16 #include <linux/sched/signal.h> 17 #include <linux/vmalloc.h> 18 #include <linux/mmzone.h> 19 #include <linux/anon_inodes.h> 20 #include <linux/file.h> 21 #include <linux/license.h> 22 #include <linux/filter.h> 23 #include <linux/version.h> 24 #include <linux/kernel.h> 25 26 DEFINE_PER_CPU(int, bpf_prog_active); 27 28 int sysctl_unprivileged_bpf_disabled __read_mostly; 29 30 static const struct bpf_map_ops * const bpf_map_types[] = { 31 #define BPF_PROG_TYPE(_id, _ops) 32 #define BPF_MAP_TYPE(_id, _ops) \ 33 [_id] = &_ops, 34 #include <linux/bpf_types.h> 35 #undef BPF_PROG_TYPE 36 #undef BPF_MAP_TYPE 37 }; 38 39 static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) 40 { 41 struct bpf_map *map; 42 43 if (attr->map_type >= ARRAY_SIZE(bpf_map_types) || 44 !bpf_map_types[attr->map_type]) 45 return ERR_PTR(-EINVAL); 46 47 map = bpf_map_types[attr->map_type]->map_alloc(attr); 48 if (IS_ERR(map)) 49 return map; 50 map->ops = bpf_map_types[attr->map_type]; 51 map->map_type = attr->map_type; 52 return map; 53 } 54 55 void *bpf_map_area_alloc(size_t size) 56 { 57 /* We definitely need __GFP_NORETRY, so OOM killer doesn't 58 * trigger under memory pressure as we really just want to 59 * fail instead. 60 */ 61 const gfp_t flags = __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO; 62 void *area; 63 64 if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { 65 area = kmalloc(size, GFP_USER | flags); 66 if (area != NULL) 67 return area; 68 } 69 70 return __vmalloc(size, GFP_KERNEL | flags, PAGE_KERNEL); 71 } 72 73 void bpf_map_area_free(void *area) 74 { 75 kvfree(area); 76 } 77 78 int bpf_map_precharge_memlock(u32 pages) 79 { 80 struct user_struct *user = get_current_user(); 81 unsigned long memlock_limit, cur; 82 83 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 84 cur = atomic_long_read(&user->locked_vm); 85 free_uid(user); 86 if (cur + pages > memlock_limit) 87 return -EPERM; 88 return 0; 89 } 90 91 static int bpf_map_charge_memlock(struct bpf_map *map) 92 { 93 struct user_struct *user = get_current_user(); 94 unsigned long memlock_limit; 95 96 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 97 98 atomic_long_add(map->pages, &user->locked_vm); 99 100 if (atomic_long_read(&user->locked_vm) > memlock_limit) { 101 atomic_long_sub(map->pages, &user->locked_vm); 102 free_uid(user); 103 return -EPERM; 104 } 105 map->user = user; 106 return 0; 107 } 108 109 static void bpf_map_uncharge_memlock(struct bpf_map *map) 110 { 111 struct user_struct *user = map->user; 112 113 atomic_long_sub(map->pages, &user->locked_vm); 114 free_uid(user); 115 } 116 117 /* called from workqueue */ 118 static void bpf_map_free_deferred(struct work_struct *work) 119 { 120 struct bpf_map *map = container_of(work, struct bpf_map, work); 121 122 bpf_map_uncharge_memlock(map); 123 /* implementation dependent freeing */ 124 map->ops->map_free(map); 125 } 126 127 static void bpf_map_put_uref(struct bpf_map *map) 128 { 129 if (atomic_dec_and_test(&map->usercnt)) { 130 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) 131 bpf_fd_array_map_clear(map); 132 } 133 } 134 135 /* decrement map refcnt and schedule it for freeing via workqueue 136 * (unrelying map implementation ops->map_free() might sleep) 137 */ 138 void bpf_map_put(struct bpf_map *map) 139 { 140 if (atomic_dec_and_test(&map->refcnt)) { 141 INIT_WORK(&map->work, bpf_map_free_deferred); 142 schedule_work(&map->work); 143 } 144 } 145 146 void bpf_map_put_with_uref(struct bpf_map *map) 147 { 148 bpf_map_put_uref(map); 149 bpf_map_put(map); 150 } 151 152 static int bpf_map_release(struct inode *inode, struct file *filp) 153 { 154 struct bpf_map *map = filp->private_data; 155 156 if (map->ops->map_release) 157 map->ops->map_release(map, filp); 158 159 bpf_map_put_with_uref(map); 160 return 0; 161 } 162 163 #ifdef CONFIG_PROC_FS 164 static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) 165 { 166 const struct bpf_map *map = filp->private_data; 167 const struct bpf_array *array; 168 u32 owner_prog_type = 0; 169 170 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) { 171 array = container_of(map, struct bpf_array, map); 172 owner_prog_type = array->owner_prog_type; 173 } 174 175 seq_printf(m, 176 "map_type:\t%u\n" 177 "key_size:\t%u\n" 178 "value_size:\t%u\n" 179 "max_entries:\t%u\n" 180 "map_flags:\t%#x\n" 181 "memlock:\t%llu\n", 182 map->map_type, 183 map->key_size, 184 map->value_size, 185 map->max_entries, 186 map->map_flags, 187 map->pages * 1ULL << PAGE_SHIFT); 188 189 if (owner_prog_type) 190 seq_printf(m, "owner_prog_type:\t%u\n", 191 owner_prog_type); 192 } 193 #endif 194 195 static const struct file_operations bpf_map_fops = { 196 #ifdef CONFIG_PROC_FS 197 .show_fdinfo = bpf_map_show_fdinfo, 198 #endif 199 .release = bpf_map_release, 200 }; 201 202 int bpf_map_new_fd(struct bpf_map *map) 203 { 204 return anon_inode_getfd("bpf-map", &bpf_map_fops, map, 205 O_RDWR | O_CLOEXEC); 206 } 207 208 /* helper macro to check that unused fields 'union bpf_attr' are zero */ 209 #define CHECK_ATTR(CMD) \ 210 memchr_inv((void *) &attr->CMD##_LAST_FIELD + \ 211 sizeof(attr->CMD##_LAST_FIELD), 0, \ 212 sizeof(*attr) - \ 213 offsetof(union bpf_attr, CMD##_LAST_FIELD) - \ 214 sizeof(attr->CMD##_LAST_FIELD)) != NULL 215 216 #define BPF_MAP_CREATE_LAST_FIELD inner_map_fd 217 /* called via syscall */ 218 static int map_create(union bpf_attr *attr) 219 { 220 struct bpf_map *map; 221 int err; 222 223 err = CHECK_ATTR(BPF_MAP_CREATE); 224 if (err) 225 return -EINVAL; 226 227 /* find map type and init map: hashtable vs rbtree vs bloom vs ... */ 228 map = find_and_alloc_map(attr); 229 if (IS_ERR(map)) 230 return PTR_ERR(map); 231 232 atomic_set(&map->refcnt, 1); 233 atomic_set(&map->usercnt, 1); 234 235 err = bpf_map_charge_memlock(map); 236 if (err) 237 goto free_map_nouncharge; 238 239 err = bpf_map_new_fd(map); 240 if (err < 0) 241 /* failed to allocate fd */ 242 goto free_map; 243 244 trace_bpf_map_create(map, err); 245 return err; 246 247 free_map: 248 bpf_map_uncharge_memlock(map); 249 free_map_nouncharge: 250 map->ops->map_free(map); 251 return err; 252 } 253 254 /* if error is returned, fd is released. 255 * On success caller should complete fd access with matching fdput() 256 */ 257 struct bpf_map *__bpf_map_get(struct fd f) 258 { 259 if (!f.file) 260 return ERR_PTR(-EBADF); 261 if (f.file->f_op != &bpf_map_fops) { 262 fdput(f); 263 return ERR_PTR(-EINVAL); 264 } 265 266 return f.file->private_data; 267 } 268 269 /* prog's and map's refcnt limit */ 270 #define BPF_MAX_REFCNT 32768 271 272 struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref) 273 { 274 if (atomic_inc_return(&map->refcnt) > BPF_MAX_REFCNT) { 275 atomic_dec(&map->refcnt); 276 return ERR_PTR(-EBUSY); 277 } 278 if (uref) 279 atomic_inc(&map->usercnt); 280 return map; 281 } 282 283 struct bpf_map *bpf_map_get_with_uref(u32 ufd) 284 { 285 struct fd f = fdget(ufd); 286 struct bpf_map *map; 287 288 map = __bpf_map_get(f); 289 if (IS_ERR(map)) 290 return map; 291 292 map = bpf_map_inc(map, true); 293 fdput(f); 294 295 return map; 296 } 297 298 int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value) 299 { 300 return -ENOTSUPP; 301 } 302 303 /* last field in 'union bpf_attr' used by this command */ 304 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value 305 306 static int map_lookup_elem(union bpf_attr *attr) 307 { 308 void __user *ukey = u64_to_user_ptr(attr->key); 309 void __user *uvalue = u64_to_user_ptr(attr->value); 310 int ufd = attr->map_fd; 311 struct bpf_map *map; 312 void *key, *value, *ptr; 313 u32 value_size; 314 struct fd f; 315 int err; 316 317 if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM)) 318 return -EINVAL; 319 320 f = fdget(ufd); 321 map = __bpf_map_get(f); 322 if (IS_ERR(map)) 323 return PTR_ERR(map); 324 325 err = -ENOMEM; 326 key = kmalloc(map->key_size, GFP_USER); 327 if (!key) 328 goto err_put; 329 330 err = -EFAULT; 331 if (copy_from_user(key, ukey, map->key_size) != 0) 332 goto free_key; 333 334 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 335 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || 336 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) 337 value_size = round_up(map->value_size, 8) * num_possible_cpus(); 338 else 339 value_size = map->value_size; 340 341 err = -ENOMEM; 342 value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); 343 if (!value) 344 goto free_key; 345 346 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 347 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 348 err = bpf_percpu_hash_copy(map, key, value); 349 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 350 err = bpf_percpu_array_copy(map, key, value); 351 } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) { 352 err = bpf_stackmap_copy(map, key, value); 353 } else if (map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS || 354 map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { 355 err = -ENOTSUPP; 356 } else { 357 rcu_read_lock(); 358 ptr = map->ops->map_lookup_elem(map, key); 359 if (ptr) 360 memcpy(value, ptr, value_size); 361 rcu_read_unlock(); 362 err = ptr ? 0 : -ENOENT; 363 } 364 365 if (err) 366 goto free_value; 367 368 err = -EFAULT; 369 if (copy_to_user(uvalue, value, value_size) != 0) 370 goto free_value; 371 372 trace_bpf_map_lookup_elem(map, ufd, key, value); 373 err = 0; 374 375 free_value: 376 kfree(value); 377 free_key: 378 kfree(key); 379 err_put: 380 fdput(f); 381 return err; 382 } 383 384 #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags 385 386 static int map_update_elem(union bpf_attr *attr) 387 { 388 void __user *ukey = u64_to_user_ptr(attr->key); 389 void __user *uvalue = u64_to_user_ptr(attr->value); 390 int ufd = attr->map_fd; 391 struct bpf_map *map; 392 void *key, *value; 393 u32 value_size; 394 struct fd f; 395 int err; 396 397 if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM)) 398 return -EINVAL; 399 400 f = fdget(ufd); 401 map = __bpf_map_get(f); 402 if (IS_ERR(map)) 403 return PTR_ERR(map); 404 405 err = -ENOMEM; 406 key = kmalloc(map->key_size, GFP_USER); 407 if (!key) 408 goto err_put; 409 410 err = -EFAULT; 411 if (copy_from_user(key, ukey, map->key_size) != 0) 412 goto free_key; 413 414 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 415 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || 416 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) 417 value_size = round_up(map->value_size, 8) * num_possible_cpus(); 418 else 419 value_size = map->value_size; 420 421 err = -ENOMEM; 422 value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); 423 if (!value) 424 goto free_key; 425 426 err = -EFAULT; 427 if (copy_from_user(value, uvalue, value_size) != 0) 428 goto free_value; 429 430 /* must increment bpf_prog_active to avoid kprobe+bpf triggering from 431 * inside bpf map update or delete otherwise deadlocks are possible 432 */ 433 preempt_disable(); 434 __this_cpu_inc(bpf_prog_active); 435 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 436 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 437 err = bpf_percpu_hash_update(map, key, value, attr->flags); 438 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 439 err = bpf_percpu_array_update(map, key, value, attr->flags); 440 } else if (map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || 441 map->map_type == BPF_MAP_TYPE_PROG_ARRAY || 442 map->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || 443 map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) { 444 rcu_read_lock(); 445 err = bpf_fd_array_map_update_elem(map, f.file, key, value, 446 attr->flags); 447 rcu_read_unlock(); 448 } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { 449 rcu_read_lock(); 450 err = bpf_fd_htab_map_update_elem(map, f.file, key, value, 451 attr->flags); 452 rcu_read_unlock(); 453 } else { 454 rcu_read_lock(); 455 err = map->ops->map_update_elem(map, key, value, attr->flags); 456 rcu_read_unlock(); 457 } 458 __this_cpu_dec(bpf_prog_active); 459 preempt_enable(); 460 461 if (!err) 462 trace_bpf_map_update_elem(map, ufd, key, value); 463 free_value: 464 kfree(value); 465 free_key: 466 kfree(key); 467 err_put: 468 fdput(f); 469 return err; 470 } 471 472 #define BPF_MAP_DELETE_ELEM_LAST_FIELD key 473 474 static int map_delete_elem(union bpf_attr *attr) 475 { 476 void __user *ukey = u64_to_user_ptr(attr->key); 477 int ufd = attr->map_fd; 478 struct bpf_map *map; 479 struct fd f; 480 void *key; 481 int err; 482 483 if (CHECK_ATTR(BPF_MAP_DELETE_ELEM)) 484 return -EINVAL; 485 486 f = fdget(ufd); 487 map = __bpf_map_get(f); 488 if (IS_ERR(map)) 489 return PTR_ERR(map); 490 491 err = -ENOMEM; 492 key = kmalloc(map->key_size, GFP_USER); 493 if (!key) 494 goto err_put; 495 496 err = -EFAULT; 497 if (copy_from_user(key, ukey, map->key_size) != 0) 498 goto free_key; 499 500 preempt_disable(); 501 __this_cpu_inc(bpf_prog_active); 502 rcu_read_lock(); 503 err = map->ops->map_delete_elem(map, key); 504 rcu_read_unlock(); 505 __this_cpu_dec(bpf_prog_active); 506 preempt_enable(); 507 508 if (!err) 509 trace_bpf_map_delete_elem(map, ufd, key); 510 free_key: 511 kfree(key); 512 err_put: 513 fdput(f); 514 return err; 515 } 516 517 /* last field in 'union bpf_attr' used by this command */ 518 #define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key 519 520 static int map_get_next_key(union bpf_attr *attr) 521 { 522 void __user *ukey = u64_to_user_ptr(attr->key); 523 void __user *unext_key = u64_to_user_ptr(attr->next_key); 524 int ufd = attr->map_fd; 525 struct bpf_map *map; 526 void *key, *next_key; 527 struct fd f; 528 int err; 529 530 if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY)) 531 return -EINVAL; 532 533 f = fdget(ufd); 534 map = __bpf_map_get(f); 535 if (IS_ERR(map)) 536 return PTR_ERR(map); 537 538 if (ukey) { 539 err = -ENOMEM; 540 key = kmalloc(map->key_size, GFP_USER); 541 if (!key) 542 goto err_put; 543 544 err = -EFAULT; 545 if (copy_from_user(key, ukey, map->key_size) != 0) 546 goto free_key; 547 } else { 548 key = NULL; 549 } 550 551 err = -ENOMEM; 552 next_key = kmalloc(map->key_size, GFP_USER); 553 if (!next_key) 554 goto free_key; 555 556 rcu_read_lock(); 557 err = map->ops->map_get_next_key(map, key, next_key); 558 rcu_read_unlock(); 559 if (err) 560 goto free_next_key; 561 562 err = -EFAULT; 563 if (copy_to_user(unext_key, next_key, map->key_size) != 0) 564 goto free_next_key; 565 566 trace_bpf_map_next_key(map, ufd, key, next_key); 567 err = 0; 568 569 free_next_key: 570 kfree(next_key); 571 free_key: 572 kfree(key); 573 err_put: 574 fdput(f); 575 return err; 576 } 577 578 static const struct bpf_verifier_ops * const bpf_prog_types[] = { 579 #define BPF_PROG_TYPE(_id, _ops) \ 580 [_id] = &_ops, 581 #define BPF_MAP_TYPE(_id, _ops) 582 #include <linux/bpf_types.h> 583 #undef BPF_PROG_TYPE 584 #undef BPF_MAP_TYPE 585 }; 586 587 static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog) 588 { 589 if (type >= ARRAY_SIZE(bpf_prog_types) || !bpf_prog_types[type]) 590 return -EINVAL; 591 592 prog->aux->ops = bpf_prog_types[type]; 593 prog->type = type; 594 return 0; 595 } 596 597 /* drop refcnt on maps used by eBPF program and free auxilary data */ 598 static void free_used_maps(struct bpf_prog_aux *aux) 599 { 600 int i; 601 602 for (i = 0; i < aux->used_map_cnt; i++) 603 bpf_map_put(aux->used_maps[i]); 604 605 kfree(aux->used_maps); 606 } 607 608 int __bpf_prog_charge(struct user_struct *user, u32 pages) 609 { 610 unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 611 unsigned long user_bufs; 612 613 if (user) { 614 user_bufs = atomic_long_add_return(pages, &user->locked_vm); 615 if (user_bufs > memlock_limit) { 616 atomic_long_sub(pages, &user->locked_vm); 617 return -EPERM; 618 } 619 } 620 621 return 0; 622 } 623 624 void __bpf_prog_uncharge(struct user_struct *user, u32 pages) 625 { 626 if (user) 627 atomic_long_sub(pages, &user->locked_vm); 628 } 629 630 static int bpf_prog_charge_memlock(struct bpf_prog *prog) 631 { 632 struct user_struct *user = get_current_user(); 633 int ret; 634 635 ret = __bpf_prog_charge(user, prog->pages); 636 if (ret) { 637 free_uid(user); 638 return ret; 639 } 640 641 prog->aux->user = user; 642 return 0; 643 } 644 645 static void bpf_prog_uncharge_memlock(struct bpf_prog *prog) 646 { 647 struct user_struct *user = prog->aux->user; 648 649 __bpf_prog_uncharge(user, prog->pages); 650 free_uid(user); 651 } 652 653 static void __bpf_prog_put_rcu(struct rcu_head *rcu) 654 { 655 struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu); 656 657 free_used_maps(aux); 658 bpf_prog_uncharge_memlock(aux->prog); 659 bpf_prog_free(aux->prog); 660 } 661 662 void bpf_prog_put(struct bpf_prog *prog) 663 { 664 if (atomic_dec_and_test(&prog->aux->refcnt)) { 665 trace_bpf_prog_put_rcu(prog); 666 bpf_prog_kallsyms_del(prog); 667 call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu); 668 } 669 } 670 EXPORT_SYMBOL_GPL(bpf_prog_put); 671 672 static int bpf_prog_release(struct inode *inode, struct file *filp) 673 { 674 struct bpf_prog *prog = filp->private_data; 675 676 bpf_prog_put(prog); 677 return 0; 678 } 679 680 #ifdef CONFIG_PROC_FS 681 static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) 682 { 683 const struct bpf_prog *prog = filp->private_data; 684 char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; 685 686 bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); 687 seq_printf(m, 688 "prog_type:\t%u\n" 689 "prog_jited:\t%u\n" 690 "prog_tag:\t%s\n" 691 "memlock:\t%llu\n", 692 prog->type, 693 prog->jited, 694 prog_tag, 695 prog->pages * 1ULL << PAGE_SHIFT); 696 } 697 #endif 698 699 static const struct file_operations bpf_prog_fops = { 700 #ifdef CONFIG_PROC_FS 701 .show_fdinfo = bpf_prog_show_fdinfo, 702 #endif 703 .release = bpf_prog_release, 704 }; 705 706 int bpf_prog_new_fd(struct bpf_prog *prog) 707 { 708 return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, 709 O_RDWR | O_CLOEXEC); 710 } 711 712 static struct bpf_prog *____bpf_prog_get(struct fd f) 713 { 714 if (!f.file) 715 return ERR_PTR(-EBADF); 716 if (f.file->f_op != &bpf_prog_fops) { 717 fdput(f); 718 return ERR_PTR(-EINVAL); 719 } 720 721 return f.file->private_data; 722 } 723 724 struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i) 725 { 726 if (atomic_add_return(i, &prog->aux->refcnt) > BPF_MAX_REFCNT) { 727 atomic_sub(i, &prog->aux->refcnt); 728 return ERR_PTR(-EBUSY); 729 } 730 return prog; 731 } 732 EXPORT_SYMBOL_GPL(bpf_prog_add); 733 734 void bpf_prog_sub(struct bpf_prog *prog, int i) 735 { 736 /* Only to be used for undoing previous bpf_prog_add() in some 737 * error path. We still know that another entity in our call 738 * path holds a reference to the program, thus atomic_sub() can 739 * be safely used in such cases! 740 */ 741 WARN_ON(atomic_sub_return(i, &prog->aux->refcnt) == 0); 742 } 743 EXPORT_SYMBOL_GPL(bpf_prog_sub); 744 745 struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog) 746 { 747 return bpf_prog_add(prog, 1); 748 } 749 EXPORT_SYMBOL_GPL(bpf_prog_inc); 750 751 static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *type) 752 { 753 struct fd f = fdget(ufd); 754 struct bpf_prog *prog; 755 756 prog = ____bpf_prog_get(f); 757 if (IS_ERR(prog)) 758 return prog; 759 if (type && prog->type != *type) { 760 prog = ERR_PTR(-EINVAL); 761 goto out; 762 } 763 764 prog = bpf_prog_inc(prog); 765 out: 766 fdput(f); 767 return prog; 768 } 769 770 struct bpf_prog *bpf_prog_get(u32 ufd) 771 { 772 return __bpf_prog_get(ufd, NULL); 773 } 774 775 struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type) 776 { 777 struct bpf_prog *prog = __bpf_prog_get(ufd, &type); 778 779 if (!IS_ERR(prog)) 780 trace_bpf_prog_get_type(prog); 781 return prog; 782 } 783 EXPORT_SYMBOL_GPL(bpf_prog_get_type); 784 785 /* last field in 'union bpf_attr' used by this command */ 786 #define BPF_PROG_LOAD_LAST_FIELD prog_flags 787 788 static int bpf_prog_load(union bpf_attr *attr) 789 { 790 enum bpf_prog_type type = attr->prog_type; 791 struct bpf_prog *prog; 792 int err; 793 char license[128]; 794 bool is_gpl; 795 796 if (CHECK_ATTR(BPF_PROG_LOAD)) 797 return -EINVAL; 798 799 if (attr->prog_flags & ~BPF_F_STRICT_ALIGNMENT) 800 return -EINVAL; 801 802 /* copy eBPF program license from user space */ 803 if (strncpy_from_user(license, u64_to_user_ptr(attr->license), 804 sizeof(license) - 1) < 0) 805 return -EFAULT; 806 license[sizeof(license) - 1] = 0; 807 808 /* eBPF programs must be GPL compatible to use GPL-ed functions */ 809 is_gpl = license_is_gpl_compatible(license); 810 811 if (attr->insn_cnt == 0 || attr->insn_cnt > BPF_MAXINSNS) 812 return -E2BIG; 813 814 if (type == BPF_PROG_TYPE_KPROBE && 815 attr->kern_version != LINUX_VERSION_CODE) 816 return -EINVAL; 817 818 if (type != BPF_PROG_TYPE_SOCKET_FILTER && !capable(CAP_SYS_ADMIN)) 819 return -EPERM; 820 821 /* plain bpf_prog allocation */ 822 prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER); 823 if (!prog) 824 return -ENOMEM; 825 826 err = bpf_prog_charge_memlock(prog); 827 if (err) 828 goto free_prog_nouncharge; 829 830 prog->len = attr->insn_cnt; 831 832 err = -EFAULT; 833 if (copy_from_user(prog->insns, u64_to_user_ptr(attr->insns), 834 bpf_prog_insn_size(prog)) != 0) 835 goto free_prog; 836 837 prog->orig_prog = NULL; 838 prog->jited = 0; 839 840 atomic_set(&prog->aux->refcnt, 1); 841 prog->gpl_compatible = is_gpl ? 1 : 0; 842 843 /* find program type: socket_filter vs tracing_filter */ 844 err = find_prog_type(type, prog); 845 if (err < 0) 846 goto free_prog; 847 848 /* run eBPF verifier */ 849 err = bpf_check(&prog, attr); 850 if (err < 0) 851 goto free_used_maps; 852 853 /* eBPF program is ready to be JITed */ 854 prog = bpf_prog_select_runtime(prog, &err); 855 if (err < 0) 856 goto free_used_maps; 857 858 err = bpf_prog_new_fd(prog); 859 if (err < 0) 860 /* failed to allocate fd */ 861 goto free_used_maps; 862 863 bpf_prog_kallsyms_add(prog); 864 trace_bpf_prog_load(prog, err); 865 return err; 866 867 free_used_maps: 868 free_used_maps(prog->aux); 869 free_prog: 870 bpf_prog_uncharge_memlock(prog); 871 free_prog_nouncharge: 872 bpf_prog_free(prog); 873 return err; 874 } 875 876 #define BPF_OBJ_LAST_FIELD bpf_fd 877 878 static int bpf_obj_pin(const union bpf_attr *attr) 879 { 880 if (CHECK_ATTR(BPF_OBJ)) 881 return -EINVAL; 882 883 return bpf_obj_pin_user(attr->bpf_fd, u64_to_user_ptr(attr->pathname)); 884 } 885 886 static int bpf_obj_get(const union bpf_attr *attr) 887 { 888 if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0) 889 return -EINVAL; 890 891 return bpf_obj_get_user(u64_to_user_ptr(attr->pathname)); 892 } 893 894 #ifdef CONFIG_CGROUP_BPF 895 896 #define BPF_PROG_ATTACH_LAST_FIELD attach_flags 897 898 static int bpf_prog_attach(const union bpf_attr *attr) 899 { 900 enum bpf_prog_type ptype; 901 struct bpf_prog *prog; 902 struct cgroup *cgrp; 903 int ret; 904 905 if (!capable(CAP_NET_ADMIN)) 906 return -EPERM; 907 908 if (CHECK_ATTR(BPF_PROG_ATTACH)) 909 return -EINVAL; 910 911 if (attr->attach_flags & ~BPF_F_ALLOW_OVERRIDE) 912 return -EINVAL; 913 914 switch (attr->attach_type) { 915 case BPF_CGROUP_INET_INGRESS: 916 case BPF_CGROUP_INET_EGRESS: 917 ptype = BPF_PROG_TYPE_CGROUP_SKB; 918 break; 919 case BPF_CGROUP_INET_SOCK_CREATE: 920 ptype = BPF_PROG_TYPE_CGROUP_SOCK; 921 break; 922 default: 923 return -EINVAL; 924 } 925 926 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); 927 if (IS_ERR(prog)) 928 return PTR_ERR(prog); 929 930 cgrp = cgroup_get_from_fd(attr->target_fd); 931 if (IS_ERR(cgrp)) { 932 bpf_prog_put(prog); 933 return PTR_ERR(cgrp); 934 } 935 936 ret = cgroup_bpf_update(cgrp, prog, attr->attach_type, 937 attr->attach_flags & BPF_F_ALLOW_OVERRIDE); 938 if (ret) 939 bpf_prog_put(prog); 940 cgroup_put(cgrp); 941 942 return ret; 943 } 944 945 #define BPF_PROG_DETACH_LAST_FIELD attach_type 946 947 static int bpf_prog_detach(const union bpf_attr *attr) 948 { 949 struct cgroup *cgrp; 950 int ret; 951 952 if (!capable(CAP_NET_ADMIN)) 953 return -EPERM; 954 955 if (CHECK_ATTR(BPF_PROG_DETACH)) 956 return -EINVAL; 957 958 switch (attr->attach_type) { 959 case BPF_CGROUP_INET_INGRESS: 960 case BPF_CGROUP_INET_EGRESS: 961 case BPF_CGROUP_INET_SOCK_CREATE: 962 cgrp = cgroup_get_from_fd(attr->target_fd); 963 if (IS_ERR(cgrp)) 964 return PTR_ERR(cgrp); 965 966 ret = cgroup_bpf_update(cgrp, NULL, attr->attach_type, false); 967 cgroup_put(cgrp); 968 break; 969 970 default: 971 return -EINVAL; 972 } 973 974 return ret; 975 } 976 #endif /* CONFIG_CGROUP_BPF */ 977 978 #define BPF_PROG_TEST_RUN_LAST_FIELD test.duration 979 980 static int bpf_prog_test_run(const union bpf_attr *attr, 981 union bpf_attr __user *uattr) 982 { 983 struct bpf_prog *prog; 984 int ret = -ENOTSUPP; 985 986 if (CHECK_ATTR(BPF_PROG_TEST_RUN)) 987 return -EINVAL; 988 989 prog = bpf_prog_get(attr->test.prog_fd); 990 if (IS_ERR(prog)) 991 return PTR_ERR(prog); 992 993 if (prog->aux->ops->test_run) 994 ret = prog->aux->ops->test_run(prog, attr, uattr); 995 996 bpf_prog_put(prog); 997 return ret; 998 } 999 1000 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) 1001 { 1002 union bpf_attr attr = {}; 1003 int err; 1004 1005 if (!capable(CAP_SYS_ADMIN) && sysctl_unprivileged_bpf_disabled) 1006 return -EPERM; 1007 1008 if (!access_ok(VERIFY_READ, uattr, 1)) 1009 return -EFAULT; 1010 1011 if (size > PAGE_SIZE) /* silly large */ 1012 return -E2BIG; 1013 1014 /* If we're handed a bigger struct than we know of, 1015 * ensure all the unknown bits are 0 - i.e. new 1016 * user-space does not rely on any kernel feature 1017 * extensions we dont know about yet. 1018 */ 1019 if (size > sizeof(attr)) { 1020 unsigned char __user *addr; 1021 unsigned char __user *end; 1022 unsigned char val; 1023 1024 addr = (void __user *)uattr + sizeof(attr); 1025 end = (void __user *)uattr + size; 1026 1027 for (; addr < end; addr++) { 1028 err = get_user(val, addr); 1029 if (err) 1030 return err; 1031 if (val) 1032 return -E2BIG; 1033 } 1034 size = sizeof(attr); 1035 } 1036 1037 /* copy attributes from user space, may be less than sizeof(bpf_attr) */ 1038 if (copy_from_user(&attr, uattr, size) != 0) 1039 return -EFAULT; 1040 1041 switch (cmd) { 1042 case BPF_MAP_CREATE: 1043 err = map_create(&attr); 1044 break; 1045 case BPF_MAP_LOOKUP_ELEM: 1046 err = map_lookup_elem(&attr); 1047 break; 1048 case BPF_MAP_UPDATE_ELEM: 1049 err = map_update_elem(&attr); 1050 break; 1051 case BPF_MAP_DELETE_ELEM: 1052 err = map_delete_elem(&attr); 1053 break; 1054 case BPF_MAP_GET_NEXT_KEY: 1055 err = map_get_next_key(&attr); 1056 break; 1057 case BPF_PROG_LOAD: 1058 err = bpf_prog_load(&attr); 1059 break; 1060 case BPF_OBJ_PIN: 1061 err = bpf_obj_pin(&attr); 1062 break; 1063 case BPF_OBJ_GET: 1064 err = bpf_obj_get(&attr); 1065 break; 1066 #ifdef CONFIG_CGROUP_BPF 1067 case BPF_PROG_ATTACH: 1068 err = bpf_prog_attach(&attr); 1069 break; 1070 case BPF_PROG_DETACH: 1071 err = bpf_prog_detach(&attr); 1072 break; 1073 #endif 1074 case BPF_PROG_TEST_RUN: 1075 err = bpf_prog_test_run(&attr, uattr); 1076 break; 1077 default: 1078 err = -EINVAL; 1079 break; 1080 } 1081 1082 return err; 1083 } 1084