1 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 2 * 3 * This program is free software; you can redistribute it and/or 4 * modify it under the terms of version 2 of the GNU General Public 5 * License as published by the Free Software Foundation. 6 * 7 * This program is distributed in the hope that it will be useful, but 8 * WITHOUT ANY WARRANTY; without even the implied warranty of 9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 10 * General Public License for more details. 11 */ 12 #include <linux/bpf.h> 13 #include <linux/syscalls.h> 14 #include <linux/slab.h> 15 #include <linux/anon_inodes.h> 16 #include <linux/file.h> 17 #include <linux/license.h> 18 #include <linux/filter.h> 19 #include <linux/version.h> 20 21 static LIST_HEAD(bpf_map_types); 22 23 static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) 24 { 25 struct bpf_map_type_list *tl; 26 struct bpf_map *map; 27 28 list_for_each_entry(tl, &bpf_map_types, list_node) { 29 if (tl->type == attr->map_type) { 30 map = tl->ops->map_alloc(attr); 31 if (IS_ERR(map)) 32 return map; 33 map->ops = tl->ops; 34 map->map_type = attr->map_type; 35 return map; 36 } 37 } 38 return ERR_PTR(-EINVAL); 39 } 40 41 /* boot time registration of different map implementations */ 42 void bpf_register_map_type(struct bpf_map_type_list *tl) 43 { 44 list_add(&tl->list_node, &bpf_map_types); 45 } 46 47 /* called from workqueue */ 48 static void bpf_map_free_deferred(struct work_struct *work) 49 { 50 struct bpf_map *map = container_of(work, struct bpf_map, work); 51 52 /* implementation dependent freeing */ 53 map->ops->map_free(map); 54 } 55 56 /* decrement map refcnt and schedule it for freeing via workqueue 57 * (unrelying map implementation ops->map_free() might sleep) 58 */ 59 void bpf_map_put(struct bpf_map *map) 60 { 61 if (atomic_dec_and_test(&map->refcnt)) { 62 INIT_WORK(&map->work, bpf_map_free_deferred); 63 schedule_work(&map->work); 64 } 65 } 66 67 static int bpf_map_release(struct inode *inode, struct file *filp) 68 { 69 struct bpf_map *map = filp->private_data; 70 71 bpf_map_put(map); 72 return 0; 73 } 74 75 static const struct file_operations bpf_map_fops = { 76 .release = bpf_map_release, 77 }; 78 79 /* helper macro to check that unused fields 'union bpf_attr' are zero */ 80 #define CHECK_ATTR(CMD) \ 81 memchr_inv((void *) &attr->CMD##_LAST_FIELD + \ 82 sizeof(attr->CMD##_LAST_FIELD), 0, \ 83 sizeof(*attr) - \ 84 offsetof(union bpf_attr, CMD##_LAST_FIELD) - \ 85 sizeof(attr->CMD##_LAST_FIELD)) != NULL 86 87 #define BPF_MAP_CREATE_LAST_FIELD max_entries 88 /* called via syscall */ 89 static int map_create(union bpf_attr *attr) 90 { 91 struct bpf_map *map; 92 int err; 93 94 err = CHECK_ATTR(BPF_MAP_CREATE); 95 if (err) 96 return -EINVAL; 97 98 /* find map type and init map: hashtable vs rbtree vs bloom vs ... */ 99 map = find_and_alloc_map(attr); 100 if (IS_ERR(map)) 101 return PTR_ERR(map); 102 103 atomic_set(&map->refcnt, 1); 104 105 err = anon_inode_getfd("bpf-map", &bpf_map_fops, map, O_RDWR | O_CLOEXEC); 106 107 if (err < 0) 108 /* failed to allocate fd */ 109 goto free_map; 110 111 return err; 112 113 free_map: 114 map->ops->map_free(map); 115 return err; 116 } 117 118 /* if error is returned, fd is released. 119 * On success caller should complete fd access with matching fdput() 120 */ 121 struct bpf_map *bpf_map_get(struct fd f) 122 { 123 struct bpf_map *map; 124 125 if (!f.file) 126 return ERR_PTR(-EBADF); 127 128 if (f.file->f_op != &bpf_map_fops) { 129 fdput(f); 130 return ERR_PTR(-EINVAL); 131 } 132 133 map = f.file->private_data; 134 135 return map; 136 } 137 138 /* helper to convert user pointers passed inside __aligned_u64 fields */ 139 static void __user *u64_to_ptr(__u64 val) 140 { 141 return (void __user *) (unsigned long) val; 142 } 143 144 /* last field in 'union bpf_attr' used by this command */ 145 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value 146 147 static int map_lookup_elem(union bpf_attr *attr) 148 { 149 void __user *ukey = u64_to_ptr(attr->key); 150 void __user *uvalue = u64_to_ptr(attr->value); 151 int ufd = attr->map_fd; 152 struct fd f = fdget(ufd); 153 struct bpf_map *map; 154 void *key, *value, *ptr; 155 int err; 156 157 if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM)) 158 return -EINVAL; 159 160 map = bpf_map_get(f); 161 if (IS_ERR(map)) 162 return PTR_ERR(map); 163 164 err = -ENOMEM; 165 key = kmalloc(map->key_size, GFP_USER); 166 if (!key) 167 goto err_put; 168 169 err = -EFAULT; 170 if (copy_from_user(key, ukey, map->key_size) != 0) 171 goto free_key; 172 173 err = -ENOMEM; 174 value = kmalloc(map->value_size, GFP_USER); 175 if (!value) 176 goto free_key; 177 178 rcu_read_lock(); 179 ptr = map->ops->map_lookup_elem(map, key); 180 if (ptr) 181 memcpy(value, ptr, map->value_size); 182 rcu_read_unlock(); 183 184 err = -ENOENT; 185 if (!ptr) 186 goto free_value; 187 188 err = -EFAULT; 189 if (copy_to_user(uvalue, value, map->value_size) != 0) 190 goto free_value; 191 192 err = 0; 193 194 free_value: 195 kfree(value); 196 free_key: 197 kfree(key); 198 err_put: 199 fdput(f); 200 return err; 201 } 202 203 #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags 204 205 static int map_update_elem(union bpf_attr *attr) 206 { 207 void __user *ukey = u64_to_ptr(attr->key); 208 void __user *uvalue = u64_to_ptr(attr->value); 209 int ufd = attr->map_fd; 210 struct fd f = fdget(ufd); 211 struct bpf_map *map; 212 void *key, *value; 213 int err; 214 215 if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM)) 216 return -EINVAL; 217 218 map = bpf_map_get(f); 219 if (IS_ERR(map)) 220 return PTR_ERR(map); 221 222 err = -ENOMEM; 223 key = kmalloc(map->key_size, GFP_USER); 224 if (!key) 225 goto err_put; 226 227 err = -EFAULT; 228 if (copy_from_user(key, ukey, map->key_size) != 0) 229 goto free_key; 230 231 err = -ENOMEM; 232 value = kmalloc(map->value_size, GFP_USER); 233 if (!value) 234 goto free_key; 235 236 err = -EFAULT; 237 if (copy_from_user(value, uvalue, map->value_size) != 0) 238 goto free_value; 239 240 /* eBPF program that use maps are running under rcu_read_lock(), 241 * therefore all map accessors rely on this fact, so do the same here 242 */ 243 rcu_read_lock(); 244 err = map->ops->map_update_elem(map, key, value, attr->flags); 245 rcu_read_unlock(); 246 247 free_value: 248 kfree(value); 249 free_key: 250 kfree(key); 251 err_put: 252 fdput(f); 253 return err; 254 } 255 256 #define BPF_MAP_DELETE_ELEM_LAST_FIELD key 257 258 static int map_delete_elem(union bpf_attr *attr) 259 { 260 void __user *ukey = u64_to_ptr(attr->key); 261 int ufd = attr->map_fd; 262 struct fd f = fdget(ufd); 263 struct bpf_map *map; 264 void *key; 265 int err; 266 267 if (CHECK_ATTR(BPF_MAP_DELETE_ELEM)) 268 return -EINVAL; 269 270 map = bpf_map_get(f); 271 if (IS_ERR(map)) 272 return PTR_ERR(map); 273 274 err = -ENOMEM; 275 key = kmalloc(map->key_size, GFP_USER); 276 if (!key) 277 goto err_put; 278 279 err = -EFAULT; 280 if (copy_from_user(key, ukey, map->key_size) != 0) 281 goto free_key; 282 283 rcu_read_lock(); 284 err = map->ops->map_delete_elem(map, key); 285 rcu_read_unlock(); 286 287 free_key: 288 kfree(key); 289 err_put: 290 fdput(f); 291 return err; 292 } 293 294 /* last field in 'union bpf_attr' used by this command */ 295 #define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key 296 297 static int map_get_next_key(union bpf_attr *attr) 298 { 299 void __user *ukey = u64_to_ptr(attr->key); 300 void __user *unext_key = u64_to_ptr(attr->next_key); 301 int ufd = attr->map_fd; 302 struct fd f = fdget(ufd); 303 struct bpf_map *map; 304 void *key, *next_key; 305 int err; 306 307 if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY)) 308 return -EINVAL; 309 310 map = bpf_map_get(f); 311 if (IS_ERR(map)) 312 return PTR_ERR(map); 313 314 err = -ENOMEM; 315 key = kmalloc(map->key_size, GFP_USER); 316 if (!key) 317 goto err_put; 318 319 err = -EFAULT; 320 if (copy_from_user(key, ukey, map->key_size) != 0) 321 goto free_key; 322 323 err = -ENOMEM; 324 next_key = kmalloc(map->key_size, GFP_USER); 325 if (!next_key) 326 goto free_key; 327 328 rcu_read_lock(); 329 err = map->ops->map_get_next_key(map, key, next_key); 330 rcu_read_unlock(); 331 if (err) 332 goto free_next_key; 333 334 err = -EFAULT; 335 if (copy_to_user(unext_key, next_key, map->key_size) != 0) 336 goto free_next_key; 337 338 err = 0; 339 340 free_next_key: 341 kfree(next_key); 342 free_key: 343 kfree(key); 344 err_put: 345 fdput(f); 346 return err; 347 } 348 349 static LIST_HEAD(bpf_prog_types); 350 351 static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog) 352 { 353 struct bpf_prog_type_list *tl; 354 355 list_for_each_entry(tl, &bpf_prog_types, list_node) { 356 if (tl->type == type) { 357 prog->aux->ops = tl->ops; 358 prog->type = type; 359 return 0; 360 } 361 } 362 363 return -EINVAL; 364 } 365 366 void bpf_register_prog_type(struct bpf_prog_type_list *tl) 367 { 368 list_add(&tl->list_node, &bpf_prog_types); 369 } 370 371 /* fixup insn->imm field of bpf_call instructions: 372 * if (insn->imm == BPF_FUNC_map_lookup_elem) 373 * insn->imm = bpf_map_lookup_elem - __bpf_call_base; 374 * else if (insn->imm == BPF_FUNC_map_update_elem) 375 * insn->imm = bpf_map_update_elem - __bpf_call_base; 376 * else ... 377 * 378 * this function is called after eBPF program passed verification 379 */ 380 static void fixup_bpf_calls(struct bpf_prog *prog) 381 { 382 const struct bpf_func_proto *fn; 383 int i; 384 385 for (i = 0; i < prog->len; i++) { 386 struct bpf_insn *insn = &prog->insnsi[i]; 387 388 if (insn->code == (BPF_JMP | BPF_CALL)) { 389 /* we reach here when program has bpf_call instructions 390 * and it passed bpf_check(), means that 391 * ops->get_func_proto must have been supplied, check it 392 */ 393 BUG_ON(!prog->aux->ops->get_func_proto); 394 395 fn = prog->aux->ops->get_func_proto(insn->imm); 396 /* all functions that have prototype and verifier allowed 397 * programs to call them, must be real in-kernel functions 398 */ 399 BUG_ON(!fn->func); 400 insn->imm = fn->func - __bpf_call_base; 401 } 402 } 403 } 404 405 /* drop refcnt on maps used by eBPF program and free auxilary data */ 406 static void free_used_maps(struct bpf_prog_aux *aux) 407 { 408 int i; 409 410 for (i = 0; i < aux->used_map_cnt; i++) 411 bpf_map_put(aux->used_maps[i]); 412 413 kfree(aux->used_maps); 414 } 415 416 void bpf_prog_put(struct bpf_prog *prog) 417 { 418 if (atomic_dec_and_test(&prog->aux->refcnt)) { 419 free_used_maps(prog->aux); 420 bpf_prog_free(prog); 421 } 422 } 423 EXPORT_SYMBOL_GPL(bpf_prog_put); 424 425 static int bpf_prog_release(struct inode *inode, struct file *filp) 426 { 427 struct bpf_prog *prog = filp->private_data; 428 429 bpf_prog_put(prog); 430 return 0; 431 } 432 433 static const struct file_operations bpf_prog_fops = { 434 .release = bpf_prog_release, 435 }; 436 437 static struct bpf_prog *get_prog(struct fd f) 438 { 439 struct bpf_prog *prog; 440 441 if (!f.file) 442 return ERR_PTR(-EBADF); 443 444 if (f.file->f_op != &bpf_prog_fops) { 445 fdput(f); 446 return ERR_PTR(-EINVAL); 447 } 448 449 prog = f.file->private_data; 450 451 return prog; 452 } 453 454 /* called by sockets/tracing/seccomp before attaching program to an event 455 * pairs with bpf_prog_put() 456 */ 457 struct bpf_prog *bpf_prog_get(u32 ufd) 458 { 459 struct fd f = fdget(ufd); 460 struct bpf_prog *prog; 461 462 prog = get_prog(f); 463 464 if (IS_ERR(prog)) 465 return prog; 466 467 atomic_inc(&prog->aux->refcnt); 468 fdput(f); 469 return prog; 470 } 471 EXPORT_SYMBOL_GPL(bpf_prog_get); 472 473 /* last field in 'union bpf_attr' used by this command */ 474 #define BPF_PROG_LOAD_LAST_FIELD kern_version 475 476 static int bpf_prog_load(union bpf_attr *attr) 477 { 478 enum bpf_prog_type type = attr->prog_type; 479 struct bpf_prog *prog; 480 int err; 481 char license[128]; 482 bool is_gpl; 483 484 if (CHECK_ATTR(BPF_PROG_LOAD)) 485 return -EINVAL; 486 487 /* copy eBPF program license from user space */ 488 if (strncpy_from_user(license, u64_to_ptr(attr->license), 489 sizeof(license) - 1) < 0) 490 return -EFAULT; 491 license[sizeof(license) - 1] = 0; 492 493 /* eBPF programs must be GPL compatible to use GPL-ed functions */ 494 is_gpl = license_is_gpl_compatible(license); 495 496 if (attr->insn_cnt >= BPF_MAXINSNS) 497 return -EINVAL; 498 499 if (type == BPF_PROG_TYPE_KPROBE && 500 attr->kern_version != LINUX_VERSION_CODE) 501 return -EINVAL; 502 503 /* plain bpf_prog allocation */ 504 prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER); 505 if (!prog) 506 return -ENOMEM; 507 508 prog->len = attr->insn_cnt; 509 510 err = -EFAULT; 511 if (copy_from_user(prog->insns, u64_to_ptr(attr->insns), 512 prog->len * sizeof(struct bpf_insn)) != 0) 513 goto free_prog; 514 515 prog->orig_prog = NULL; 516 prog->jited = false; 517 518 atomic_set(&prog->aux->refcnt, 1); 519 prog->gpl_compatible = is_gpl; 520 521 /* find program type: socket_filter vs tracing_filter */ 522 err = find_prog_type(type, prog); 523 if (err < 0) 524 goto free_prog; 525 526 /* run eBPF verifier */ 527 err = bpf_check(&prog, attr); 528 if (err < 0) 529 goto free_used_maps; 530 531 /* fixup BPF_CALL->imm field */ 532 fixup_bpf_calls(prog); 533 534 /* eBPF program is ready to be JITed */ 535 bpf_prog_select_runtime(prog); 536 537 err = anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, O_RDWR | O_CLOEXEC); 538 if (err < 0) 539 /* failed to allocate fd */ 540 goto free_used_maps; 541 542 return err; 543 544 free_used_maps: 545 free_used_maps(prog->aux); 546 free_prog: 547 bpf_prog_free(prog); 548 return err; 549 } 550 551 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) 552 { 553 union bpf_attr attr = {}; 554 int err; 555 556 /* the syscall is limited to root temporarily. This restriction will be 557 * lifted when security audit is clean. Note that eBPF+tracing must have 558 * this restriction, since it may pass kernel data to user space 559 */ 560 if (!capable(CAP_SYS_ADMIN)) 561 return -EPERM; 562 563 if (!access_ok(VERIFY_READ, uattr, 1)) 564 return -EFAULT; 565 566 if (size > PAGE_SIZE) /* silly large */ 567 return -E2BIG; 568 569 /* If we're handed a bigger struct than we know of, 570 * ensure all the unknown bits are 0 - i.e. new 571 * user-space does not rely on any kernel feature 572 * extensions we dont know about yet. 573 */ 574 if (size > sizeof(attr)) { 575 unsigned char __user *addr; 576 unsigned char __user *end; 577 unsigned char val; 578 579 addr = (void __user *)uattr + sizeof(attr); 580 end = (void __user *)uattr + size; 581 582 for (; addr < end; addr++) { 583 err = get_user(val, addr); 584 if (err) 585 return err; 586 if (val) 587 return -E2BIG; 588 } 589 size = sizeof(attr); 590 } 591 592 /* copy attributes from user space, may be less than sizeof(bpf_attr) */ 593 if (copy_from_user(&attr, uattr, size) != 0) 594 return -EFAULT; 595 596 switch (cmd) { 597 case BPF_MAP_CREATE: 598 err = map_create(&attr); 599 break; 600 case BPF_MAP_LOOKUP_ELEM: 601 err = map_lookup_elem(&attr); 602 break; 603 case BPF_MAP_UPDATE_ELEM: 604 err = map_update_elem(&attr); 605 break; 606 case BPF_MAP_DELETE_ELEM: 607 err = map_delete_elem(&attr); 608 break; 609 case BPF_MAP_GET_NEXT_KEY: 610 err = map_get_next_key(&attr); 611 break; 612 case BPF_PROG_LOAD: 613 err = bpf_prog_load(&attr); 614 break; 615 default: 616 err = -EINVAL; 617 break; 618 } 619 620 return err; 621 } 622