1 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 2 * 3 * This program is free software; you can redistribute it and/or 4 * modify it under the terms of version 2 of the GNU General Public 5 * License as published by the Free Software Foundation. 6 * 7 * This program is distributed in the hope that it will be useful, but 8 * WITHOUT ANY WARRANTY; without even the implied warranty of 9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 10 * General Public License for more details. 11 */ 12 #include <linux/bpf.h> 13 #include <linux/syscalls.h> 14 #include <linux/slab.h> 15 #include <linux/anon_inodes.h> 16 #include <linux/file.h> 17 #include <linux/license.h> 18 #include <linux/filter.h> 19 20 static LIST_HEAD(bpf_map_types); 21 22 static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) 23 { 24 struct bpf_map_type_list *tl; 25 struct bpf_map *map; 26 27 list_for_each_entry(tl, &bpf_map_types, list_node) { 28 if (tl->type == attr->map_type) { 29 map = tl->ops->map_alloc(attr); 30 if (IS_ERR(map)) 31 return map; 32 map->ops = tl->ops; 33 map->map_type = attr->map_type; 34 return map; 35 } 36 } 37 return ERR_PTR(-EINVAL); 38 } 39 40 /* boot time registration of different map implementations */ 41 void bpf_register_map_type(struct bpf_map_type_list *tl) 42 { 43 list_add(&tl->list_node, &bpf_map_types); 44 } 45 46 /* called from workqueue */ 47 static void bpf_map_free_deferred(struct work_struct *work) 48 { 49 struct bpf_map *map = container_of(work, struct bpf_map, work); 50 51 /* implementation dependent freeing */ 52 map->ops->map_free(map); 53 } 54 55 /* decrement map refcnt and schedule it for freeing via workqueue 56 * (unrelying map implementation ops->map_free() might sleep) 57 */ 58 void bpf_map_put(struct bpf_map *map) 59 { 60 if (atomic_dec_and_test(&map->refcnt)) { 61 INIT_WORK(&map->work, bpf_map_free_deferred); 62 schedule_work(&map->work); 63 } 64 } 65 66 static int bpf_map_release(struct inode *inode, struct file *filp) 67 { 68 struct bpf_map *map = filp->private_data; 69 70 bpf_map_put(map); 71 return 0; 72 } 73 74 static const struct file_operations bpf_map_fops = { 75 .release = bpf_map_release, 76 }; 77 78 /* helper macro to check that unused fields 'union bpf_attr' are zero */ 79 #define CHECK_ATTR(CMD) \ 80 memchr_inv((void *) &attr->CMD##_LAST_FIELD + \ 81 sizeof(attr->CMD##_LAST_FIELD), 0, \ 82 sizeof(*attr) - \ 83 offsetof(union bpf_attr, CMD##_LAST_FIELD) - \ 84 sizeof(attr->CMD##_LAST_FIELD)) != NULL 85 86 #define BPF_MAP_CREATE_LAST_FIELD max_entries 87 /* called via syscall */ 88 static int map_create(union bpf_attr *attr) 89 { 90 struct bpf_map *map; 91 int err; 92 93 err = CHECK_ATTR(BPF_MAP_CREATE); 94 if (err) 95 return -EINVAL; 96 97 /* find map type and init map: hashtable vs rbtree vs bloom vs ... */ 98 map = find_and_alloc_map(attr); 99 if (IS_ERR(map)) 100 return PTR_ERR(map); 101 102 atomic_set(&map->refcnt, 1); 103 104 err = anon_inode_getfd("bpf-map", &bpf_map_fops, map, O_RDWR | O_CLOEXEC); 105 106 if (err < 0) 107 /* failed to allocate fd */ 108 goto free_map; 109 110 return err; 111 112 free_map: 113 map->ops->map_free(map); 114 return err; 115 } 116 117 /* if error is returned, fd is released. 118 * On success caller should complete fd access with matching fdput() 119 */ 120 struct bpf_map *bpf_map_get(struct fd f) 121 { 122 struct bpf_map *map; 123 124 if (!f.file) 125 return ERR_PTR(-EBADF); 126 127 if (f.file->f_op != &bpf_map_fops) { 128 fdput(f); 129 return ERR_PTR(-EINVAL); 130 } 131 132 map = f.file->private_data; 133 134 return map; 135 } 136 137 /* helper to convert user pointers passed inside __aligned_u64 fields */ 138 static void __user *u64_to_ptr(__u64 val) 139 { 140 return (void __user *) (unsigned long) val; 141 } 142 143 /* last field in 'union bpf_attr' used by this command */ 144 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value 145 146 static int map_lookup_elem(union bpf_attr *attr) 147 { 148 void __user *ukey = u64_to_ptr(attr->key); 149 void __user *uvalue = u64_to_ptr(attr->value); 150 int ufd = attr->map_fd; 151 struct fd f = fdget(ufd); 152 struct bpf_map *map; 153 void *key, *value, *ptr; 154 int err; 155 156 if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM)) 157 return -EINVAL; 158 159 map = bpf_map_get(f); 160 if (IS_ERR(map)) 161 return PTR_ERR(map); 162 163 err = -ENOMEM; 164 key = kmalloc(map->key_size, GFP_USER); 165 if (!key) 166 goto err_put; 167 168 err = -EFAULT; 169 if (copy_from_user(key, ukey, map->key_size) != 0) 170 goto free_key; 171 172 err = -ENOMEM; 173 value = kmalloc(map->value_size, GFP_USER); 174 if (!value) 175 goto free_key; 176 177 rcu_read_lock(); 178 ptr = map->ops->map_lookup_elem(map, key); 179 if (ptr) 180 memcpy(value, ptr, map->value_size); 181 rcu_read_unlock(); 182 183 err = -ENOENT; 184 if (!ptr) 185 goto free_value; 186 187 err = -EFAULT; 188 if (copy_to_user(uvalue, value, map->value_size) != 0) 189 goto free_value; 190 191 err = 0; 192 193 free_value: 194 kfree(value); 195 free_key: 196 kfree(key); 197 err_put: 198 fdput(f); 199 return err; 200 } 201 202 #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags 203 204 static int map_update_elem(union bpf_attr *attr) 205 { 206 void __user *ukey = u64_to_ptr(attr->key); 207 void __user *uvalue = u64_to_ptr(attr->value); 208 int ufd = attr->map_fd; 209 struct fd f = fdget(ufd); 210 struct bpf_map *map; 211 void *key, *value; 212 int err; 213 214 if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM)) 215 return -EINVAL; 216 217 map = bpf_map_get(f); 218 if (IS_ERR(map)) 219 return PTR_ERR(map); 220 221 err = -ENOMEM; 222 key = kmalloc(map->key_size, GFP_USER); 223 if (!key) 224 goto err_put; 225 226 err = -EFAULT; 227 if (copy_from_user(key, ukey, map->key_size) != 0) 228 goto free_key; 229 230 err = -ENOMEM; 231 value = kmalloc(map->value_size, GFP_USER); 232 if (!value) 233 goto free_key; 234 235 err = -EFAULT; 236 if (copy_from_user(value, uvalue, map->value_size) != 0) 237 goto free_value; 238 239 /* eBPF program that use maps are running under rcu_read_lock(), 240 * therefore all map accessors rely on this fact, so do the same here 241 */ 242 rcu_read_lock(); 243 err = map->ops->map_update_elem(map, key, value, attr->flags); 244 rcu_read_unlock(); 245 246 free_value: 247 kfree(value); 248 free_key: 249 kfree(key); 250 err_put: 251 fdput(f); 252 return err; 253 } 254 255 #define BPF_MAP_DELETE_ELEM_LAST_FIELD key 256 257 static int map_delete_elem(union bpf_attr *attr) 258 { 259 void __user *ukey = u64_to_ptr(attr->key); 260 int ufd = attr->map_fd; 261 struct fd f = fdget(ufd); 262 struct bpf_map *map; 263 void *key; 264 int err; 265 266 if (CHECK_ATTR(BPF_MAP_DELETE_ELEM)) 267 return -EINVAL; 268 269 map = bpf_map_get(f); 270 if (IS_ERR(map)) 271 return PTR_ERR(map); 272 273 err = -ENOMEM; 274 key = kmalloc(map->key_size, GFP_USER); 275 if (!key) 276 goto err_put; 277 278 err = -EFAULT; 279 if (copy_from_user(key, ukey, map->key_size) != 0) 280 goto free_key; 281 282 rcu_read_lock(); 283 err = map->ops->map_delete_elem(map, key); 284 rcu_read_unlock(); 285 286 free_key: 287 kfree(key); 288 err_put: 289 fdput(f); 290 return err; 291 } 292 293 /* last field in 'union bpf_attr' used by this command */ 294 #define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key 295 296 static int map_get_next_key(union bpf_attr *attr) 297 { 298 void __user *ukey = u64_to_ptr(attr->key); 299 void __user *unext_key = u64_to_ptr(attr->next_key); 300 int ufd = attr->map_fd; 301 struct fd f = fdget(ufd); 302 struct bpf_map *map; 303 void *key, *next_key; 304 int err; 305 306 if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY)) 307 return -EINVAL; 308 309 map = bpf_map_get(f); 310 if (IS_ERR(map)) 311 return PTR_ERR(map); 312 313 err = -ENOMEM; 314 key = kmalloc(map->key_size, GFP_USER); 315 if (!key) 316 goto err_put; 317 318 err = -EFAULT; 319 if (copy_from_user(key, ukey, map->key_size) != 0) 320 goto free_key; 321 322 err = -ENOMEM; 323 next_key = kmalloc(map->key_size, GFP_USER); 324 if (!next_key) 325 goto free_key; 326 327 rcu_read_lock(); 328 err = map->ops->map_get_next_key(map, key, next_key); 329 rcu_read_unlock(); 330 if (err) 331 goto free_next_key; 332 333 err = -EFAULT; 334 if (copy_to_user(unext_key, next_key, map->key_size) != 0) 335 goto free_next_key; 336 337 err = 0; 338 339 free_next_key: 340 kfree(next_key); 341 free_key: 342 kfree(key); 343 err_put: 344 fdput(f); 345 return err; 346 } 347 348 static LIST_HEAD(bpf_prog_types); 349 350 static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog) 351 { 352 struct bpf_prog_type_list *tl; 353 354 list_for_each_entry(tl, &bpf_prog_types, list_node) { 355 if (tl->type == type) { 356 prog->aux->ops = tl->ops; 357 prog->aux->prog_type = type; 358 return 0; 359 } 360 } 361 return -EINVAL; 362 } 363 364 void bpf_register_prog_type(struct bpf_prog_type_list *tl) 365 { 366 list_add(&tl->list_node, &bpf_prog_types); 367 } 368 369 /* fixup insn->imm field of bpf_call instructions: 370 * if (insn->imm == BPF_FUNC_map_lookup_elem) 371 * insn->imm = bpf_map_lookup_elem - __bpf_call_base; 372 * else if (insn->imm == BPF_FUNC_map_update_elem) 373 * insn->imm = bpf_map_update_elem - __bpf_call_base; 374 * else ... 375 * 376 * this function is called after eBPF program passed verification 377 */ 378 static void fixup_bpf_calls(struct bpf_prog *prog) 379 { 380 const struct bpf_func_proto *fn; 381 int i; 382 383 for (i = 0; i < prog->len; i++) { 384 struct bpf_insn *insn = &prog->insnsi[i]; 385 386 if (insn->code == (BPF_JMP | BPF_CALL)) { 387 /* we reach here when program has bpf_call instructions 388 * and it passed bpf_check(), means that 389 * ops->get_func_proto must have been supplied, check it 390 */ 391 BUG_ON(!prog->aux->ops->get_func_proto); 392 393 fn = prog->aux->ops->get_func_proto(insn->imm); 394 /* all functions that have prototype and verifier allowed 395 * programs to call them, must be real in-kernel functions 396 */ 397 BUG_ON(!fn->func); 398 insn->imm = fn->func - __bpf_call_base; 399 } 400 } 401 } 402 403 /* drop refcnt on maps used by eBPF program and free auxilary data */ 404 static void free_used_maps(struct bpf_prog_aux *aux) 405 { 406 int i; 407 408 for (i = 0; i < aux->used_map_cnt; i++) 409 bpf_map_put(aux->used_maps[i]); 410 411 kfree(aux->used_maps); 412 } 413 414 void bpf_prog_put(struct bpf_prog *prog) 415 { 416 if (atomic_dec_and_test(&prog->aux->refcnt)) { 417 free_used_maps(prog->aux); 418 bpf_prog_free(prog); 419 } 420 } 421 422 static int bpf_prog_release(struct inode *inode, struct file *filp) 423 { 424 struct bpf_prog *prog = filp->private_data; 425 426 bpf_prog_put(prog); 427 return 0; 428 } 429 430 static const struct file_operations bpf_prog_fops = { 431 .release = bpf_prog_release, 432 }; 433 434 static struct bpf_prog *get_prog(struct fd f) 435 { 436 struct bpf_prog *prog; 437 438 if (!f.file) 439 return ERR_PTR(-EBADF); 440 441 if (f.file->f_op != &bpf_prog_fops) { 442 fdput(f); 443 return ERR_PTR(-EINVAL); 444 } 445 446 prog = f.file->private_data; 447 448 return prog; 449 } 450 451 /* called by sockets/tracing/seccomp before attaching program to an event 452 * pairs with bpf_prog_put() 453 */ 454 struct bpf_prog *bpf_prog_get(u32 ufd) 455 { 456 struct fd f = fdget(ufd); 457 struct bpf_prog *prog; 458 459 prog = get_prog(f); 460 461 if (IS_ERR(prog)) 462 return prog; 463 464 atomic_inc(&prog->aux->refcnt); 465 fdput(f); 466 return prog; 467 } 468 469 /* last field in 'union bpf_attr' used by this command */ 470 #define BPF_PROG_LOAD_LAST_FIELD log_buf 471 472 static int bpf_prog_load(union bpf_attr *attr) 473 { 474 enum bpf_prog_type type = attr->prog_type; 475 struct bpf_prog *prog; 476 int err; 477 char license[128]; 478 bool is_gpl; 479 480 if (CHECK_ATTR(BPF_PROG_LOAD)) 481 return -EINVAL; 482 483 /* copy eBPF program license from user space */ 484 if (strncpy_from_user(license, u64_to_ptr(attr->license), 485 sizeof(license) - 1) < 0) 486 return -EFAULT; 487 license[sizeof(license) - 1] = 0; 488 489 /* eBPF programs must be GPL compatible to use GPL-ed functions */ 490 is_gpl = license_is_gpl_compatible(license); 491 492 if (attr->insn_cnt >= BPF_MAXINSNS) 493 return -EINVAL; 494 495 /* plain bpf_prog allocation */ 496 prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER); 497 if (!prog) 498 return -ENOMEM; 499 500 prog->len = attr->insn_cnt; 501 502 err = -EFAULT; 503 if (copy_from_user(prog->insns, u64_to_ptr(attr->insns), 504 prog->len * sizeof(struct bpf_insn)) != 0) 505 goto free_prog; 506 507 prog->orig_prog = NULL; 508 prog->jited = false; 509 510 atomic_set(&prog->aux->refcnt, 1); 511 prog->aux->is_gpl_compatible = is_gpl; 512 513 /* find program type: socket_filter vs tracing_filter */ 514 err = find_prog_type(type, prog); 515 if (err < 0) 516 goto free_prog; 517 518 /* run eBPF verifier */ 519 err = bpf_check(prog, attr); 520 521 if (err < 0) 522 goto free_used_maps; 523 524 /* fixup BPF_CALL->imm field */ 525 fixup_bpf_calls(prog); 526 527 /* eBPF program is ready to be JITed */ 528 bpf_prog_select_runtime(prog); 529 530 err = anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, O_RDWR | O_CLOEXEC); 531 532 if (err < 0) 533 /* failed to allocate fd */ 534 goto free_used_maps; 535 536 return err; 537 538 free_used_maps: 539 free_used_maps(prog->aux); 540 free_prog: 541 bpf_prog_free(prog); 542 return err; 543 } 544 545 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) 546 { 547 union bpf_attr attr = {}; 548 int err; 549 550 /* the syscall is limited to root temporarily. This restriction will be 551 * lifted when security audit is clean. Note that eBPF+tracing must have 552 * this restriction, since it may pass kernel data to user space 553 */ 554 if (!capable(CAP_SYS_ADMIN)) 555 return -EPERM; 556 557 if (!access_ok(VERIFY_READ, uattr, 1)) 558 return -EFAULT; 559 560 if (size > PAGE_SIZE) /* silly large */ 561 return -E2BIG; 562 563 /* If we're handed a bigger struct than we know of, 564 * ensure all the unknown bits are 0 - i.e. new 565 * user-space does not rely on any kernel feature 566 * extensions we dont know about yet. 567 */ 568 if (size > sizeof(attr)) { 569 unsigned char __user *addr; 570 unsigned char __user *end; 571 unsigned char val; 572 573 addr = (void __user *)uattr + sizeof(attr); 574 end = (void __user *)uattr + size; 575 576 for (; addr < end; addr++) { 577 err = get_user(val, addr); 578 if (err) 579 return err; 580 if (val) 581 return -E2BIG; 582 } 583 size = sizeof(attr); 584 } 585 586 /* copy attributes from user space, may be less than sizeof(bpf_attr) */ 587 if (copy_from_user(&attr, uattr, size) != 0) 588 return -EFAULT; 589 590 switch (cmd) { 591 case BPF_MAP_CREATE: 592 err = map_create(&attr); 593 break; 594 case BPF_MAP_LOOKUP_ELEM: 595 err = map_lookup_elem(&attr); 596 break; 597 case BPF_MAP_UPDATE_ELEM: 598 err = map_update_elem(&attr); 599 break; 600 case BPF_MAP_DELETE_ELEM: 601 err = map_delete_elem(&attr); 602 break; 603 case BPF_MAP_GET_NEXT_KEY: 604 err = map_get_next_key(&attr); 605 break; 606 case BPF_PROG_LOAD: 607 err = bpf_prog_load(&attr); 608 break; 609 default: 610 err = -EINVAL; 611 break; 612 } 613 614 return err; 615 } 616