1 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 2 * 3 * This program is free software; you can redistribute it and/or 4 * modify it under the terms of version 2 of the GNU General Public 5 * License as published by the Free Software Foundation. 6 * 7 * This program is distributed in the hope that it will be useful, but 8 * WITHOUT ANY WARRANTY; without even the implied warranty of 9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 10 * General Public License for more details. 11 */ 12 #include <linux/bpf.h> 13 #include <linux/syscalls.h> 14 #include <linux/slab.h> 15 #include <linux/anon_inodes.h> 16 #include <linux/file.h> 17 #include <linux/license.h> 18 #include <linux/filter.h> 19 20 static LIST_HEAD(bpf_map_types); 21 22 static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) 23 { 24 struct bpf_map_type_list *tl; 25 struct bpf_map *map; 26 27 list_for_each_entry(tl, &bpf_map_types, list_node) { 28 if (tl->type == attr->map_type) { 29 map = tl->ops->map_alloc(attr); 30 if (IS_ERR(map)) 31 return map; 32 map->ops = tl->ops; 33 map->map_type = attr->map_type; 34 return map; 35 } 36 } 37 return ERR_PTR(-EINVAL); 38 } 39 40 /* boot time registration of different map implementations */ 41 void bpf_register_map_type(struct bpf_map_type_list *tl) 42 { 43 list_add(&tl->list_node, &bpf_map_types); 44 } 45 46 /* called from workqueue */ 47 static void bpf_map_free_deferred(struct work_struct *work) 48 { 49 struct bpf_map *map = container_of(work, struct bpf_map, work); 50 51 /* implementation dependent freeing */ 52 map->ops->map_free(map); 53 } 54 55 /* decrement map refcnt and schedule it for freeing via workqueue 56 * (unrelying map implementation ops->map_free() might sleep) 57 */ 58 void bpf_map_put(struct bpf_map *map) 59 { 60 if (atomic_dec_and_test(&map->refcnt)) { 61 INIT_WORK(&map->work, bpf_map_free_deferred); 62 schedule_work(&map->work); 63 } 64 } 65 66 static int bpf_map_release(struct inode *inode, struct file *filp) 67 { 68 struct bpf_map *map = filp->private_data; 69 70 bpf_map_put(map); 71 return 0; 72 } 73 74 static const struct file_operations bpf_map_fops = { 75 .release = bpf_map_release, 76 }; 77 78 /* helper macro to check that unused fields 'union bpf_attr' are zero */ 79 #define CHECK_ATTR(CMD) \ 80 memchr_inv((void *) &attr->CMD##_LAST_FIELD + \ 81 sizeof(attr->CMD##_LAST_FIELD), 0, \ 82 sizeof(*attr) - \ 83 offsetof(union bpf_attr, CMD##_LAST_FIELD) - \ 84 sizeof(attr->CMD##_LAST_FIELD)) != NULL 85 86 #define BPF_MAP_CREATE_LAST_FIELD max_entries 87 /* called via syscall */ 88 static int map_create(union bpf_attr *attr) 89 { 90 struct bpf_map *map; 91 int err; 92 93 err = CHECK_ATTR(BPF_MAP_CREATE); 94 if (err) 95 return -EINVAL; 96 97 /* find map type and init map: hashtable vs rbtree vs bloom vs ... */ 98 map = find_and_alloc_map(attr); 99 if (IS_ERR(map)) 100 return PTR_ERR(map); 101 102 atomic_set(&map->refcnt, 1); 103 104 err = anon_inode_getfd("bpf-map", &bpf_map_fops, map, O_RDWR | O_CLOEXEC); 105 106 if (err < 0) 107 /* failed to allocate fd */ 108 goto free_map; 109 110 return err; 111 112 free_map: 113 map->ops->map_free(map); 114 return err; 115 } 116 117 /* if error is returned, fd is released. 118 * On success caller should complete fd access with matching fdput() 119 */ 120 struct bpf_map *bpf_map_get(struct fd f) 121 { 122 struct bpf_map *map; 123 124 if (!f.file) 125 return ERR_PTR(-EBADF); 126 127 if (f.file->f_op != &bpf_map_fops) { 128 fdput(f); 129 return ERR_PTR(-EINVAL); 130 } 131 132 map = f.file->private_data; 133 134 return map; 135 } 136 137 /* helper to convert user pointers passed inside __aligned_u64 fields */ 138 static void __user *u64_to_ptr(__u64 val) 139 { 140 return (void __user *) (unsigned long) val; 141 } 142 143 /* last field in 'union bpf_attr' used by this command */ 144 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value 145 146 static int map_lookup_elem(union bpf_attr *attr) 147 { 148 void __user *ukey = u64_to_ptr(attr->key); 149 void __user *uvalue = u64_to_ptr(attr->value); 150 int ufd = attr->map_fd; 151 struct fd f = fdget(ufd); 152 struct bpf_map *map; 153 void *key, *value; 154 int err; 155 156 if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM)) 157 return -EINVAL; 158 159 map = bpf_map_get(f); 160 if (IS_ERR(map)) 161 return PTR_ERR(map); 162 163 err = -ENOMEM; 164 key = kmalloc(map->key_size, GFP_USER); 165 if (!key) 166 goto err_put; 167 168 err = -EFAULT; 169 if (copy_from_user(key, ukey, map->key_size) != 0) 170 goto free_key; 171 172 err = -ESRCH; 173 rcu_read_lock(); 174 value = map->ops->map_lookup_elem(map, key); 175 if (!value) 176 goto err_unlock; 177 178 err = -EFAULT; 179 if (copy_to_user(uvalue, value, map->value_size) != 0) 180 goto err_unlock; 181 182 err = 0; 183 184 err_unlock: 185 rcu_read_unlock(); 186 free_key: 187 kfree(key); 188 err_put: 189 fdput(f); 190 return err; 191 } 192 193 #define BPF_MAP_UPDATE_ELEM_LAST_FIELD value 194 195 static int map_update_elem(union bpf_attr *attr) 196 { 197 void __user *ukey = u64_to_ptr(attr->key); 198 void __user *uvalue = u64_to_ptr(attr->value); 199 int ufd = attr->map_fd; 200 struct fd f = fdget(ufd); 201 struct bpf_map *map; 202 void *key, *value; 203 int err; 204 205 if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM)) 206 return -EINVAL; 207 208 map = bpf_map_get(f); 209 if (IS_ERR(map)) 210 return PTR_ERR(map); 211 212 err = -ENOMEM; 213 key = kmalloc(map->key_size, GFP_USER); 214 if (!key) 215 goto err_put; 216 217 err = -EFAULT; 218 if (copy_from_user(key, ukey, map->key_size) != 0) 219 goto free_key; 220 221 err = -ENOMEM; 222 value = kmalloc(map->value_size, GFP_USER); 223 if (!value) 224 goto free_key; 225 226 err = -EFAULT; 227 if (copy_from_user(value, uvalue, map->value_size) != 0) 228 goto free_value; 229 230 /* eBPF program that use maps are running under rcu_read_lock(), 231 * therefore all map accessors rely on this fact, so do the same here 232 */ 233 rcu_read_lock(); 234 err = map->ops->map_update_elem(map, key, value); 235 rcu_read_unlock(); 236 237 free_value: 238 kfree(value); 239 free_key: 240 kfree(key); 241 err_put: 242 fdput(f); 243 return err; 244 } 245 246 #define BPF_MAP_DELETE_ELEM_LAST_FIELD key 247 248 static int map_delete_elem(union bpf_attr *attr) 249 { 250 void __user *ukey = u64_to_ptr(attr->key); 251 int ufd = attr->map_fd; 252 struct fd f = fdget(ufd); 253 struct bpf_map *map; 254 void *key; 255 int err; 256 257 if (CHECK_ATTR(BPF_MAP_DELETE_ELEM)) 258 return -EINVAL; 259 260 map = bpf_map_get(f); 261 if (IS_ERR(map)) 262 return PTR_ERR(map); 263 264 err = -ENOMEM; 265 key = kmalloc(map->key_size, GFP_USER); 266 if (!key) 267 goto err_put; 268 269 err = -EFAULT; 270 if (copy_from_user(key, ukey, map->key_size) != 0) 271 goto free_key; 272 273 rcu_read_lock(); 274 err = map->ops->map_delete_elem(map, key); 275 rcu_read_unlock(); 276 277 free_key: 278 kfree(key); 279 err_put: 280 fdput(f); 281 return err; 282 } 283 284 /* last field in 'union bpf_attr' used by this command */ 285 #define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key 286 287 static int map_get_next_key(union bpf_attr *attr) 288 { 289 void __user *ukey = u64_to_ptr(attr->key); 290 void __user *unext_key = u64_to_ptr(attr->next_key); 291 int ufd = attr->map_fd; 292 struct fd f = fdget(ufd); 293 struct bpf_map *map; 294 void *key, *next_key; 295 int err; 296 297 if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY)) 298 return -EINVAL; 299 300 map = bpf_map_get(f); 301 if (IS_ERR(map)) 302 return PTR_ERR(map); 303 304 err = -ENOMEM; 305 key = kmalloc(map->key_size, GFP_USER); 306 if (!key) 307 goto err_put; 308 309 err = -EFAULT; 310 if (copy_from_user(key, ukey, map->key_size) != 0) 311 goto free_key; 312 313 err = -ENOMEM; 314 next_key = kmalloc(map->key_size, GFP_USER); 315 if (!next_key) 316 goto free_key; 317 318 rcu_read_lock(); 319 err = map->ops->map_get_next_key(map, key, next_key); 320 rcu_read_unlock(); 321 if (err) 322 goto free_next_key; 323 324 err = -EFAULT; 325 if (copy_to_user(unext_key, next_key, map->key_size) != 0) 326 goto free_next_key; 327 328 err = 0; 329 330 free_next_key: 331 kfree(next_key); 332 free_key: 333 kfree(key); 334 err_put: 335 fdput(f); 336 return err; 337 } 338 339 static LIST_HEAD(bpf_prog_types); 340 341 static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog) 342 { 343 struct bpf_prog_type_list *tl; 344 345 list_for_each_entry(tl, &bpf_prog_types, list_node) { 346 if (tl->type == type) { 347 prog->aux->ops = tl->ops; 348 prog->aux->prog_type = type; 349 return 0; 350 } 351 } 352 return -EINVAL; 353 } 354 355 void bpf_register_prog_type(struct bpf_prog_type_list *tl) 356 { 357 list_add(&tl->list_node, &bpf_prog_types); 358 } 359 360 /* fixup insn->imm field of bpf_call instructions: 361 * if (insn->imm == BPF_FUNC_map_lookup_elem) 362 * insn->imm = bpf_map_lookup_elem - __bpf_call_base; 363 * else if (insn->imm == BPF_FUNC_map_update_elem) 364 * insn->imm = bpf_map_update_elem - __bpf_call_base; 365 * else ... 366 * 367 * this function is called after eBPF program passed verification 368 */ 369 static void fixup_bpf_calls(struct bpf_prog *prog) 370 { 371 const struct bpf_func_proto *fn; 372 int i; 373 374 for (i = 0; i < prog->len; i++) { 375 struct bpf_insn *insn = &prog->insnsi[i]; 376 377 if (insn->code == (BPF_JMP | BPF_CALL)) { 378 /* we reach here when program has bpf_call instructions 379 * and it passed bpf_check(), means that 380 * ops->get_func_proto must have been supplied, check it 381 */ 382 BUG_ON(!prog->aux->ops->get_func_proto); 383 384 fn = prog->aux->ops->get_func_proto(insn->imm); 385 /* all functions that have prototype and verifier allowed 386 * programs to call them, must be real in-kernel functions 387 */ 388 BUG_ON(!fn->func); 389 insn->imm = fn->func - __bpf_call_base; 390 } 391 } 392 } 393 394 /* drop refcnt on maps used by eBPF program and free auxilary data */ 395 static void free_used_maps(struct bpf_prog_aux *aux) 396 { 397 int i; 398 399 for (i = 0; i < aux->used_map_cnt; i++) 400 bpf_map_put(aux->used_maps[i]); 401 402 kfree(aux->used_maps); 403 } 404 405 void bpf_prog_put(struct bpf_prog *prog) 406 { 407 if (atomic_dec_and_test(&prog->aux->refcnt)) { 408 free_used_maps(prog->aux); 409 bpf_prog_free(prog); 410 } 411 } 412 413 static int bpf_prog_release(struct inode *inode, struct file *filp) 414 { 415 struct bpf_prog *prog = filp->private_data; 416 417 bpf_prog_put(prog); 418 return 0; 419 } 420 421 static const struct file_operations bpf_prog_fops = { 422 .release = bpf_prog_release, 423 }; 424 425 static struct bpf_prog *get_prog(struct fd f) 426 { 427 struct bpf_prog *prog; 428 429 if (!f.file) 430 return ERR_PTR(-EBADF); 431 432 if (f.file->f_op != &bpf_prog_fops) { 433 fdput(f); 434 return ERR_PTR(-EINVAL); 435 } 436 437 prog = f.file->private_data; 438 439 return prog; 440 } 441 442 /* called by sockets/tracing/seccomp before attaching program to an event 443 * pairs with bpf_prog_put() 444 */ 445 struct bpf_prog *bpf_prog_get(u32 ufd) 446 { 447 struct fd f = fdget(ufd); 448 struct bpf_prog *prog; 449 450 prog = get_prog(f); 451 452 if (IS_ERR(prog)) 453 return prog; 454 455 atomic_inc(&prog->aux->refcnt); 456 fdput(f); 457 return prog; 458 } 459 460 /* last field in 'union bpf_attr' used by this command */ 461 #define BPF_PROG_LOAD_LAST_FIELD log_buf 462 463 static int bpf_prog_load(union bpf_attr *attr) 464 { 465 enum bpf_prog_type type = attr->prog_type; 466 struct bpf_prog *prog; 467 int err; 468 char license[128]; 469 bool is_gpl; 470 471 if (CHECK_ATTR(BPF_PROG_LOAD)) 472 return -EINVAL; 473 474 /* copy eBPF program license from user space */ 475 if (strncpy_from_user(license, u64_to_ptr(attr->license), 476 sizeof(license) - 1) < 0) 477 return -EFAULT; 478 license[sizeof(license) - 1] = 0; 479 480 /* eBPF programs must be GPL compatible to use GPL-ed functions */ 481 is_gpl = license_is_gpl_compatible(license); 482 483 if (attr->insn_cnt >= BPF_MAXINSNS) 484 return -EINVAL; 485 486 /* plain bpf_prog allocation */ 487 prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER); 488 if (!prog) 489 return -ENOMEM; 490 491 prog->len = attr->insn_cnt; 492 493 err = -EFAULT; 494 if (copy_from_user(prog->insns, u64_to_ptr(attr->insns), 495 prog->len * sizeof(struct bpf_insn)) != 0) 496 goto free_prog; 497 498 prog->orig_prog = NULL; 499 prog->jited = false; 500 501 atomic_set(&prog->aux->refcnt, 1); 502 prog->aux->is_gpl_compatible = is_gpl; 503 504 /* find program type: socket_filter vs tracing_filter */ 505 err = find_prog_type(type, prog); 506 if (err < 0) 507 goto free_prog; 508 509 /* run eBPF verifier */ 510 err = bpf_check(prog, attr); 511 512 if (err < 0) 513 goto free_used_maps; 514 515 /* fixup BPF_CALL->imm field */ 516 fixup_bpf_calls(prog); 517 518 /* eBPF program is ready to be JITed */ 519 bpf_prog_select_runtime(prog); 520 521 err = anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, O_RDWR | O_CLOEXEC); 522 523 if (err < 0) 524 /* failed to allocate fd */ 525 goto free_used_maps; 526 527 return err; 528 529 free_used_maps: 530 free_used_maps(prog->aux); 531 free_prog: 532 bpf_prog_free(prog); 533 return err; 534 } 535 536 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) 537 { 538 union bpf_attr attr = {}; 539 int err; 540 541 /* the syscall is limited to root temporarily. This restriction will be 542 * lifted when security audit is clean. Note that eBPF+tracing must have 543 * this restriction, since it may pass kernel data to user space 544 */ 545 if (!capable(CAP_SYS_ADMIN)) 546 return -EPERM; 547 548 if (!access_ok(VERIFY_READ, uattr, 1)) 549 return -EFAULT; 550 551 if (size > PAGE_SIZE) /* silly large */ 552 return -E2BIG; 553 554 /* If we're handed a bigger struct than we know of, 555 * ensure all the unknown bits are 0 - i.e. new 556 * user-space does not rely on any kernel feature 557 * extensions we dont know about yet. 558 */ 559 if (size > sizeof(attr)) { 560 unsigned char __user *addr; 561 unsigned char __user *end; 562 unsigned char val; 563 564 addr = (void __user *)uattr + sizeof(attr); 565 end = (void __user *)uattr + size; 566 567 for (; addr < end; addr++) { 568 err = get_user(val, addr); 569 if (err) 570 return err; 571 if (val) 572 return -E2BIG; 573 } 574 size = sizeof(attr); 575 } 576 577 /* copy attributes from user space, may be less than sizeof(bpf_attr) */ 578 if (copy_from_user(&attr, uattr, size) != 0) 579 return -EFAULT; 580 581 switch (cmd) { 582 case BPF_MAP_CREATE: 583 err = map_create(&attr); 584 break; 585 case BPF_MAP_LOOKUP_ELEM: 586 err = map_lookup_elem(&attr); 587 break; 588 case BPF_MAP_UPDATE_ELEM: 589 err = map_update_elem(&attr); 590 break; 591 case BPF_MAP_DELETE_ELEM: 592 err = map_delete_elem(&attr); 593 break; 594 case BPF_MAP_GET_NEXT_KEY: 595 err = map_get_next_key(&attr); 596 break; 597 case BPF_PROG_LOAD: 598 err = bpf_prog_load(&attr); 599 break; 600 default: 601 err = -EINVAL; 602 break; 603 } 604 605 return err; 606 } 607