1 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 2 * 3 * This program is free software; you can redistribute it and/or 4 * modify it under the terms of version 2 of the GNU General Public 5 * License as published by the Free Software Foundation. 6 * 7 * This program is distributed in the hope that it will be useful, but 8 * WITHOUT ANY WARRANTY; without even the implied warranty of 9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 10 * General Public License for more details. 11 */ 12 #include <linux/bpf.h> 13 #include <linux/syscalls.h> 14 #include <linux/slab.h> 15 #include <linux/anon_inodes.h> 16 #include <linux/file.h> 17 #include <linux/license.h> 18 #include <linux/filter.h> 19 #include <linux/version.h> 20 21 static LIST_HEAD(bpf_map_types); 22 23 static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) 24 { 25 struct bpf_map_type_list *tl; 26 struct bpf_map *map; 27 28 list_for_each_entry(tl, &bpf_map_types, list_node) { 29 if (tl->type == attr->map_type) { 30 map = tl->ops->map_alloc(attr); 31 if (IS_ERR(map)) 32 return map; 33 map->ops = tl->ops; 34 map->map_type = attr->map_type; 35 return map; 36 } 37 } 38 return ERR_PTR(-EINVAL); 39 } 40 41 /* boot time registration of different map implementations */ 42 void bpf_register_map_type(struct bpf_map_type_list *tl) 43 { 44 list_add(&tl->list_node, &bpf_map_types); 45 } 46 47 /* called from workqueue */ 48 static void bpf_map_free_deferred(struct work_struct *work) 49 { 50 struct bpf_map *map = container_of(work, struct bpf_map, work); 51 52 /* implementation dependent freeing */ 53 map->ops->map_free(map); 54 } 55 56 /* decrement map refcnt and schedule it for freeing via workqueue 57 * (unrelying map implementation ops->map_free() might sleep) 58 */ 59 void bpf_map_put(struct bpf_map *map) 60 { 61 if (atomic_dec_and_test(&map->refcnt)) { 62 INIT_WORK(&map->work, bpf_map_free_deferred); 63 schedule_work(&map->work); 64 } 65 } 66 67 static int bpf_map_release(struct inode *inode, struct file *filp) 68 { 69 struct bpf_map *map = filp->private_data; 70 71 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) 72 /* prog_array stores refcnt-ed bpf_prog pointers 73 * release them all when user space closes prog_array_fd 74 */ 75 bpf_prog_array_map_clear(map); 76 77 bpf_map_put(map); 78 return 0; 79 } 80 81 static const struct file_operations bpf_map_fops = { 82 .release = bpf_map_release, 83 }; 84 85 /* helper macro to check that unused fields 'union bpf_attr' are zero */ 86 #define CHECK_ATTR(CMD) \ 87 memchr_inv((void *) &attr->CMD##_LAST_FIELD + \ 88 sizeof(attr->CMD##_LAST_FIELD), 0, \ 89 sizeof(*attr) - \ 90 offsetof(union bpf_attr, CMD##_LAST_FIELD) - \ 91 sizeof(attr->CMD##_LAST_FIELD)) != NULL 92 93 #define BPF_MAP_CREATE_LAST_FIELD max_entries 94 /* called via syscall */ 95 static int map_create(union bpf_attr *attr) 96 { 97 struct bpf_map *map; 98 int err; 99 100 err = CHECK_ATTR(BPF_MAP_CREATE); 101 if (err) 102 return -EINVAL; 103 104 /* find map type and init map: hashtable vs rbtree vs bloom vs ... */ 105 map = find_and_alloc_map(attr); 106 if (IS_ERR(map)) 107 return PTR_ERR(map); 108 109 atomic_set(&map->refcnt, 1); 110 111 err = anon_inode_getfd("bpf-map", &bpf_map_fops, map, O_RDWR | O_CLOEXEC); 112 113 if (err < 0) 114 /* failed to allocate fd */ 115 goto free_map; 116 117 return err; 118 119 free_map: 120 map->ops->map_free(map); 121 return err; 122 } 123 124 /* if error is returned, fd is released. 125 * On success caller should complete fd access with matching fdput() 126 */ 127 struct bpf_map *bpf_map_get(struct fd f) 128 { 129 struct bpf_map *map; 130 131 if (!f.file) 132 return ERR_PTR(-EBADF); 133 134 if (f.file->f_op != &bpf_map_fops) { 135 fdput(f); 136 return ERR_PTR(-EINVAL); 137 } 138 139 map = f.file->private_data; 140 141 return map; 142 } 143 144 /* helper to convert user pointers passed inside __aligned_u64 fields */ 145 static void __user *u64_to_ptr(__u64 val) 146 { 147 return (void __user *) (unsigned long) val; 148 } 149 150 /* last field in 'union bpf_attr' used by this command */ 151 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value 152 153 static int map_lookup_elem(union bpf_attr *attr) 154 { 155 void __user *ukey = u64_to_ptr(attr->key); 156 void __user *uvalue = u64_to_ptr(attr->value); 157 int ufd = attr->map_fd; 158 struct fd f = fdget(ufd); 159 struct bpf_map *map; 160 void *key, *value, *ptr; 161 int err; 162 163 if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM)) 164 return -EINVAL; 165 166 map = bpf_map_get(f); 167 if (IS_ERR(map)) 168 return PTR_ERR(map); 169 170 err = -ENOMEM; 171 key = kmalloc(map->key_size, GFP_USER); 172 if (!key) 173 goto err_put; 174 175 err = -EFAULT; 176 if (copy_from_user(key, ukey, map->key_size) != 0) 177 goto free_key; 178 179 err = -ENOMEM; 180 value = kmalloc(map->value_size, GFP_USER); 181 if (!value) 182 goto free_key; 183 184 rcu_read_lock(); 185 ptr = map->ops->map_lookup_elem(map, key); 186 if (ptr) 187 memcpy(value, ptr, map->value_size); 188 rcu_read_unlock(); 189 190 err = -ENOENT; 191 if (!ptr) 192 goto free_value; 193 194 err = -EFAULT; 195 if (copy_to_user(uvalue, value, map->value_size) != 0) 196 goto free_value; 197 198 err = 0; 199 200 free_value: 201 kfree(value); 202 free_key: 203 kfree(key); 204 err_put: 205 fdput(f); 206 return err; 207 } 208 209 #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags 210 211 static int map_update_elem(union bpf_attr *attr) 212 { 213 void __user *ukey = u64_to_ptr(attr->key); 214 void __user *uvalue = u64_to_ptr(attr->value); 215 int ufd = attr->map_fd; 216 struct fd f = fdget(ufd); 217 struct bpf_map *map; 218 void *key, *value; 219 int err; 220 221 if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM)) 222 return -EINVAL; 223 224 map = bpf_map_get(f); 225 if (IS_ERR(map)) 226 return PTR_ERR(map); 227 228 err = -ENOMEM; 229 key = kmalloc(map->key_size, GFP_USER); 230 if (!key) 231 goto err_put; 232 233 err = -EFAULT; 234 if (copy_from_user(key, ukey, map->key_size) != 0) 235 goto free_key; 236 237 err = -ENOMEM; 238 value = kmalloc(map->value_size, GFP_USER); 239 if (!value) 240 goto free_key; 241 242 err = -EFAULT; 243 if (copy_from_user(value, uvalue, map->value_size) != 0) 244 goto free_value; 245 246 /* eBPF program that use maps are running under rcu_read_lock(), 247 * therefore all map accessors rely on this fact, so do the same here 248 */ 249 rcu_read_lock(); 250 err = map->ops->map_update_elem(map, key, value, attr->flags); 251 rcu_read_unlock(); 252 253 free_value: 254 kfree(value); 255 free_key: 256 kfree(key); 257 err_put: 258 fdput(f); 259 return err; 260 } 261 262 #define BPF_MAP_DELETE_ELEM_LAST_FIELD key 263 264 static int map_delete_elem(union bpf_attr *attr) 265 { 266 void __user *ukey = u64_to_ptr(attr->key); 267 int ufd = attr->map_fd; 268 struct fd f = fdget(ufd); 269 struct bpf_map *map; 270 void *key; 271 int err; 272 273 if (CHECK_ATTR(BPF_MAP_DELETE_ELEM)) 274 return -EINVAL; 275 276 map = bpf_map_get(f); 277 if (IS_ERR(map)) 278 return PTR_ERR(map); 279 280 err = -ENOMEM; 281 key = kmalloc(map->key_size, GFP_USER); 282 if (!key) 283 goto err_put; 284 285 err = -EFAULT; 286 if (copy_from_user(key, ukey, map->key_size) != 0) 287 goto free_key; 288 289 rcu_read_lock(); 290 err = map->ops->map_delete_elem(map, key); 291 rcu_read_unlock(); 292 293 free_key: 294 kfree(key); 295 err_put: 296 fdput(f); 297 return err; 298 } 299 300 /* last field in 'union bpf_attr' used by this command */ 301 #define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key 302 303 static int map_get_next_key(union bpf_attr *attr) 304 { 305 void __user *ukey = u64_to_ptr(attr->key); 306 void __user *unext_key = u64_to_ptr(attr->next_key); 307 int ufd = attr->map_fd; 308 struct fd f = fdget(ufd); 309 struct bpf_map *map; 310 void *key, *next_key; 311 int err; 312 313 if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY)) 314 return -EINVAL; 315 316 map = bpf_map_get(f); 317 if (IS_ERR(map)) 318 return PTR_ERR(map); 319 320 err = -ENOMEM; 321 key = kmalloc(map->key_size, GFP_USER); 322 if (!key) 323 goto err_put; 324 325 err = -EFAULT; 326 if (copy_from_user(key, ukey, map->key_size) != 0) 327 goto free_key; 328 329 err = -ENOMEM; 330 next_key = kmalloc(map->key_size, GFP_USER); 331 if (!next_key) 332 goto free_key; 333 334 rcu_read_lock(); 335 err = map->ops->map_get_next_key(map, key, next_key); 336 rcu_read_unlock(); 337 if (err) 338 goto free_next_key; 339 340 err = -EFAULT; 341 if (copy_to_user(unext_key, next_key, map->key_size) != 0) 342 goto free_next_key; 343 344 err = 0; 345 346 free_next_key: 347 kfree(next_key); 348 free_key: 349 kfree(key); 350 err_put: 351 fdput(f); 352 return err; 353 } 354 355 static LIST_HEAD(bpf_prog_types); 356 357 static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog) 358 { 359 struct bpf_prog_type_list *tl; 360 361 list_for_each_entry(tl, &bpf_prog_types, list_node) { 362 if (tl->type == type) { 363 prog->aux->ops = tl->ops; 364 prog->type = type; 365 return 0; 366 } 367 } 368 369 return -EINVAL; 370 } 371 372 void bpf_register_prog_type(struct bpf_prog_type_list *tl) 373 { 374 list_add(&tl->list_node, &bpf_prog_types); 375 } 376 377 /* fixup insn->imm field of bpf_call instructions: 378 * if (insn->imm == BPF_FUNC_map_lookup_elem) 379 * insn->imm = bpf_map_lookup_elem - __bpf_call_base; 380 * else if (insn->imm == BPF_FUNC_map_update_elem) 381 * insn->imm = bpf_map_update_elem - __bpf_call_base; 382 * else ... 383 * 384 * this function is called after eBPF program passed verification 385 */ 386 static void fixup_bpf_calls(struct bpf_prog *prog) 387 { 388 const struct bpf_func_proto *fn; 389 int i; 390 391 for (i = 0; i < prog->len; i++) { 392 struct bpf_insn *insn = &prog->insnsi[i]; 393 394 if (insn->code == (BPF_JMP | BPF_CALL)) { 395 /* we reach here when program has bpf_call instructions 396 * and it passed bpf_check(), means that 397 * ops->get_func_proto must have been supplied, check it 398 */ 399 BUG_ON(!prog->aux->ops->get_func_proto); 400 401 if (insn->imm == BPF_FUNC_tail_call) { 402 /* mark bpf_tail_call as different opcode 403 * to avoid conditional branch in 404 * interpeter for every normal call 405 * and to prevent accidental JITing by 406 * JIT compiler that doesn't support 407 * bpf_tail_call yet 408 */ 409 insn->imm = 0; 410 insn->code |= BPF_X; 411 continue; 412 } 413 414 fn = prog->aux->ops->get_func_proto(insn->imm); 415 /* all functions that have prototype and verifier allowed 416 * programs to call them, must be real in-kernel functions 417 */ 418 BUG_ON(!fn->func); 419 insn->imm = fn->func - __bpf_call_base; 420 } 421 } 422 } 423 424 /* drop refcnt on maps used by eBPF program and free auxilary data */ 425 static void free_used_maps(struct bpf_prog_aux *aux) 426 { 427 int i; 428 429 for (i = 0; i < aux->used_map_cnt; i++) 430 bpf_map_put(aux->used_maps[i]); 431 432 kfree(aux->used_maps); 433 } 434 435 static void __prog_put_rcu(struct rcu_head *rcu) 436 { 437 struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu); 438 439 free_used_maps(aux); 440 bpf_prog_free(aux->prog); 441 } 442 443 /* version of bpf_prog_put() that is called after a grace period */ 444 void bpf_prog_put_rcu(struct bpf_prog *prog) 445 { 446 if (atomic_dec_and_test(&prog->aux->refcnt)) { 447 prog->aux->prog = prog; 448 call_rcu(&prog->aux->rcu, __prog_put_rcu); 449 } 450 } 451 452 void bpf_prog_put(struct bpf_prog *prog) 453 { 454 if (atomic_dec_and_test(&prog->aux->refcnt)) { 455 free_used_maps(prog->aux); 456 bpf_prog_free(prog); 457 } 458 } 459 EXPORT_SYMBOL_GPL(bpf_prog_put); 460 461 static int bpf_prog_release(struct inode *inode, struct file *filp) 462 { 463 struct bpf_prog *prog = filp->private_data; 464 465 bpf_prog_put_rcu(prog); 466 return 0; 467 } 468 469 static const struct file_operations bpf_prog_fops = { 470 .release = bpf_prog_release, 471 }; 472 473 static struct bpf_prog *get_prog(struct fd f) 474 { 475 struct bpf_prog *prog; 476 477 if (!f.file) 478 return ERR_PTR(-EBADF); 479 480 if (f.file->f_op != &bpf_prog_fops) { 481 fdput(f); 482 return ERR_PTR(-EINVAL); 483 } 484 485 prog = f.file->private_data; 486 487 return prog; 488 } 489 490 /* called by sockets/tracing/seccomp before attaching program to an event 491 * pairs with bpf_prog_put() 492 */ 493 struct bpf_prog *bpf_prog_get(u32 ufd) 494 { 495 struct fd f = fdget(ufd); 496 struct bpf_prog *prog; 497 498 prog = get_prog(f); 499 500 if (IS_ERR(prog)) 501 return prog; 502 503 atomic_inc(&prog->aux->refcnt); 504 fdput(f); 505 return prog; 506 } 507 EXPORT_SYMBOL_GPL(bpf_prog_get); 508 509 /* last field in 'union bpf_attr' used by this command */ 510 #define BPF_PROG_LOAD_LAST_FIELD kern_version 511 512 static int bpf_prog_load(union bpf_attr *attr) 513 { 514 enum bpf_prog_type type = attr->prog_type; 515 struct bpf_prog *prog; 516 int err; 517 char license[128]; 518 bool is_gpl; 519 520 if (CHECK_ATTR(BPF_PROG_LOAD)) 521 return -EINVAL; 522 523 /* copy eBPF program license from user space */ 524 if (strncpy_from_user(license, u64_to_ptr(attr->license), 525 sizeof(license) - 1) < 0) 526 return -EFAULT; 527 license[sizeof(license) - 1] = 0; 528 529 /* eBPF programs must be GPL compatible to use GPL-ed functions */ 530 is_gpl = license_is_gpl_compatible(license); 531 532 if (attr->insn_cnt >= BPF_MAXINSNS) 533 return -EINVAL; 534 535 if (type == BPF_PROG_TYPE_KPROBE && 536 attr->kern_version != LINUX_VERSION_CODE) 537 return -EINVAL; 538 539 /* plain bpf_prog allocation */ 540 prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER); 541 if (!prog) 542 return -ENOMEM; 543 544 prog->len = attr->insn_cnt; 545 546 err = -EFAULT; 547 if (copy_from_user(prog->insns, u64_to_ptr(attr->insns), 548 prog->len * sizeof(struct bpf_insn)) != 0) 549 goto free_prog; 550 551 prog->orig_prog = NULL; 552 prog->jited = false; 553 554 atomic_set(&prog->aux->refcnt, 1); 555 prog->gpl_compatible = is_gpl; 556 557 /* find program type: socket_filter vs tracing_filter */ 558 err = find_prog_type(type, prog); 559 if (err < 0) 560 goto free_prog; 561 562 /* run eBPF verifier */ 563 err = bpf_check(&prog, attr); 564 if (err < 0) 565 goto free_used_maps; 566 567 /* fixup BPF_CALL->imm field */ 568 fixup_bpf_calls(prog); 569 570 /* eBPF program is ready to be JITed */ 571 err = bpf_prog_select_runtime(prog); 572 if (err < 0) 573 goto free_used_maps; 574 575 err = anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, O_RDWR | O_CLOEXEC); 576 if (err < 0) 577 /* failed to allocate fd */ 578 goto free_used_maps; 579 580 return err; 581 582 free_used_maps: 583 free_used_maps(prog->aux); 584 free_prog: 585 bpf_prog_free(prog); 586 return err; 587 } 588 589 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) 590 { 591 union bpf_attr attr = {}; 592 int err; 593 594 /* the syscall is limited to root temporarily. This restriction will be 595 * lifted when security audit is clean. Note that eBPF+tracing must have 596 * this restriction, since it may pass kernel data to user space 597 */ 598 if (!capable(CAP_SYS_ADMIN)) 599 return -EPERM; 600 601 if (!access_ok(VERIFY_READ, uattr, 1)) 602 return -EFAULT; 603 604 if (size > PAGE_SIZE) /* silly large */ 605 return -E2BIG; 606 607 /* If we're handed a bigger struct than we know of, 608 * ensure all the unknown bits are 0 - i.e. new 609 * user-space does not rely on any kernel feature 610 * extensions we dont know about yet. 611 */ 612 if (size > sizeof(attr)) { 613 unsigned char __user *addr; 614 unsigned char __user *end; 615 unsigned char val; 616 617 addr = (void __user *)uattr + sizeof(attr); 618 end = (void __user *)uattr + size; 619 620 for (; addr < end; addr++) { 621 err = get_user(val, addr); 622 if (err) 623 return err; 624 if (val) 625 return -E2BIG; 626 } 627 size = sizeof(attr); 628 } 629 630 /* copy attributes from user space, may be less than sizeof(bpf_attr) */ 631 if (copy_from_user(&attr, uattr, size) != 0) 632 return -EFAULT; 633 634 switch (cmd) { 635 case BPF_MAP_CREATE: 636 err = map_create(&attr); 637 break; 638 case BPF_MAP_LOOKUP_ELEM: 639 err = map_lookup_elem(&attr); 640 break; 641 case BPF_MAP_UPDATE_ELEM: 642 err = map_update_elem(&attr); 643 break; 644 case BPF_MAP_DELETE_ELEM: 645 err = map_delete_elem(&attr); 646 break; 647 case BPF_MAP_GET_NEXT_KEY: 648 err = map_get_next_key(&attr); 649 break; 650 case BPF_PROG_LOAD: 651 err = bpf_prog_load(&attr); 652 break; 653 default: 654 err = -EINVAL; 655 break; 656 } 657 658 return err; 659 } 660