1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2020 Facebook */ 3 4 #include <linux/fs.h> 5 #include <linux/anon_inodes.h> 6 #include <linux/filter.h> 7 #include <linux/bpf.h> 8 #include <linux/rcupdate_trace.h> 9 10 struct bpf_iter_target_info { 11 struct list_head list; 12 const struct bpf_iter_reg *reg_info; 13 u32 btf_id; /* cached value */ 14 }; 15 16 struct bpf_iter_link { 17 struct bpf_link link; 18 struct bpf_iter_aux_info aux; 19 struct bpf_iter_target_info *tinfo; 20 }; 21 22 struct bpf_iter_priv_data { 23 struct bpf_iter_target_info *tinfo; 24 const struct bpf_iter_seq_info *seq_info; 25 struct bpf_prog *prog; 26 u64 session_id; 27 u64 seq_num; 28 bool done_stop; 29 u8 target_private[] __aligned(8); 30 }; 31 32 static struct list_head targets = LIST_HEAD_INIT(targets); 33 static DEFINE_MUTEX(targets_mutex); 34 35 /* protect bpf_iter_link changes */ 36 static DEFINE_MUTEX(link_mutex); 37 38 /* incremented on every opened seq_file */ 39 static atomic64_t session_id; 40 41 static int prepare_seq_file(struct file *file, struct bpf_iter_link *link, 42 const struct bpf_iter_seq_info *seq_info); 43 44 static void bpf_iter_inc_seq_num(struct seq_file *seq) 45 { 46 struct bpf_iter_priv_data *iter_priv; 47 48 iter_priv = container_of(seq->private, struct bpf_iter_priv_data, 49 target_private); 50 iter_priv->seq_num++; 51 } 52 53 static void bpf_iter_dec_seq_num(struct seq_file *seq) 54 { 55 struct bpf_iter_priv_data *iter_priv; 56 57 iter_priv = container_of(seq->private, struct bpf_iter_priv_data, 58 target_private); 59 iter_priv->seq_num--; 60 } 61 62 static void bpf_iter_done_stop(struct seq_file *seq) 63 { 64 struct bpf_iter_priv_data *iter_priv; 65 66 iter_priv = container_of(seq->private, struct bpf_iter_priv_data, 67 target_private); 68 iter_priv->done_stop = true; 69 } 70 71 static bool bpf_iter_support_resched(struct seq_file *seq) 72 { 73 struct bpf_iter_priv_data *iter_priv; 74 75 iter_priv = container_of(seq->private, struct bpf_iter_priv_data, 76 target_private); 77 return iter_priv->tinfo->reg_info->feature & BPF_ITER_RESCHED; 78 } 79 80 /* maximum visited objects before bailing out */ 81 #define MAX_ITER_OBJECTS 1000000 82 83 /* bpf_seq_read, a customized and simpler version for bpf iterator. 84 * The following are differences from seq_read(): 85 * . fixed buffer size (PAGE_SIZE) 86 * . assuming NULL ->llseek() 87 * . stop() may call bpf program, handling potential overflow there 88 */ 89 static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size, 90 loff_t *ppos) 91 { 92 struct seq_file *seq = file->private_data; 93 size_t n, offs, copied = 0; 94 int err = 0, num_objs = 0; 95 bool can_resched; 96 void *p; 97 98 mutex_lock(&seq->lock); 99 100 if (!seq->buf) { 101 seq->size = PAGE_SIZE << 3; 102 seq->buf = kvmalloc(seq->size, GFP_KERNEL); 103 if (!seq->buf) { 104 err = -ENOMEM; 105 goto done; 106 } 107 } 108 109 if (seq->count) { 110 n = min(seq->count, size); 111 err = copy_to_user(buf, seq->buf + seq->from, n); 112 if (err) { 113 err = -EFAULT; 114 goto done; 115 } 116 seq->count -= n; 117 seq->from += n; 118 copied = n; 119 goto done; 120 } 121 122 seq->from = 0; 123 p = seq->op->start(seq, &seq->index); 124 if (!p) 125 goto stop; 126 if (IS_ERR(p)) { 127 err = PTR_ERR(p); 128 seq->op->stop(seq, p); 129 seq->count = 0; 130 goto done; 131 } 132 133 err = seq->op->show(seq, p); 134 if (err > 0) { 135 /* object is skipped, decrease seq_num, so next 136 * valid object can reuse the same seq_num. 137 */ 138 bpf_iter_dec_seq_num(seq); 139 seq->count = 0; 140 } else if (err < 0 || seq_has_overflowed(seq)) { 141 if (!err) 142 err = -E2BIG; 143 seq->op->stop(seq, p); 144 seq->count = 0; 145 goto done; 146 } 147 148 can_resched = bpf_iter_support_resched(seq); 149 while (1) { 150 loff_t pos = seq->index; 151 152 num_objs++; 153 offs = seq->count; 154 p = seq->op->next(seq, p, &seq->index); 155 if (pos == seq->index) { 156 pr_info_ratelimited("buggy seq_file .next function %ps " 157 "did not updated position index\n", 158 seq->op->next); 159 seq->index++; 160 } 161 162 if (IS_ERR_OR_NULL(p)) 163 break; 164 165 /* got a valid next object, increase seq_num */ 166 bpf_iter_inc_seq_num(seq); 167 168 if (seq->count >= size) 169 break; 170 171 if (num_objs >= MAX_ITER_OBJECTS) { 172 if (offs == 0) { 173 err = -EAGAIN; 174 seq->op->stop(seq, p); 175 goto done; 176 } 177 break; 178 } 179 180 err = seq->op->show(seq, p); 181 if (err > 0) { 182 bpf_iter_dec_seq_num(seq); 183 seq->count = offs; 184 } else if (err < 0 || seq_has_overflowed(seq)) { 185 seq->count = offs; 186 if (offs == 0) { 187 if (!err) 188 err = -E2BIG; 189 seq->op->stop(seq, p); 190 goto done; 191 } 192 break; 193 } 194 195 if (can_resched) 196 cond_resched(); 197 } 198 stop: 199 offs = seq->count; 200 /* bpf program called if !p */ 201 seq->op->stop(seq, p); 202 if (!p) { 203 if (!seq_has_overflowed(seq)) { 204 bpf_iter_done_stop(seq); 205 } else { 206 seq->count = offs; 207 if (offs == 0) { 208 err = -E2BIG; 209 goto done; 210 } 211 } 212 } 213 214 n = min(seq->count, size); 215 err = copy_to_user(buf, seq->buf, n); 216 if (err) { 217 err = -EFAULT; 218 goto done; 219 } 220 copied = n; 221 seq->count -= n; 222 seq->from = n; 223 done: 224 if (!copied) 225 copied = err; 226 else 227 *ppos += copied; 228 mutex_unlock(&seq->lock); 229 return copied; 230 } 231 232 static const struct bpf_iter_seq_info * 233 __get_seq_info(struct bpf_iter_link *link) 234 { 235 const struct bpf_iter_seq_info *seq_info; 236 237 if (link->aux.map) { 238 seq_info = link->aux.map->ops->iter_seq_info; 239 if (seq_info) 240 return seq_info; 241 } 242 243 return link->tinfo->reg_info->seq_info; 244 } 245 246 static int iter_open(struct inode *inode, struct file *file) 247 { 248 struct bpf_iter_link *link = inode->i_private; 249 250 return prepare_seq_file(file, link, __get_seq_info(link)); 251 } 252 253 static int iter_release(struct inode *inode, struct file *file) 254 { 255 struct bpf_iter_priv_data *iter_priv; 256 struct seq_file *seq; 257 258 seq = file->private_data; 259 if (!seq) 260 return 0; 261 262 iter_priv = container_of(seq->private, struct bpf_iter_priv_data, 263 target_private); 264 265 if (iter_priv->seq_info->fini_seq_private) 266 iter_priv->seq_info->fini_seq_private(seq->private); 267 268 bpf_prog_put(iter_priv->prog); 269 seq->private = iter_priv; 270 271 return seq_release_private(inode, file); 272 } 273 274 const struct file_operations bpf_iter_fops = { 275 .open = iter_open, 276 .llseek = no_llseek, 277 .read = bpf_seq_read, 278 .release = iter_release, 279 }; 280 281 /* The argument reg_info will be cached in bpf_iter_target_info. 282 * The common practice is to declare target reg_info as 283 * a const static variable and passed as an argument to 284 * bpf_iter_reg_target(). 285 */ 286 int bpf_iter_reg_target(const struct bpf_iter_reg *reg_info) 287 { 288 struct bpf_iter_target_info *tinfo; 289 290 tinfo = kzalloc(sizeof(*tinfo), GFP_KERNEL); 291 if (!tinfo) 292 return -ENOMEM; 293 294 tinfo->reg_info = reg_info; 295 INIT_LIST_HEAD(&tinfo->list); 296 297 mutex_lock(&targets_mutex); 298 list_add(&tinfo->list, &targets); 299 mutex_unlock(&targets_mutex); 300 301 return 0; 302 } 303 304 void bpf_iter_unreg_target(const struct bpf_iter_reg *reg_info) 305 { 306 struct bpf_iter_target_info *tinfo; 307 bool found = false; 308 309 mutex_lock(&targets_mutex); 310 list_for_each_entry(tinfo, &targets, list) { 311 if (reg_info == tinfo->reg_info) { 312 list_del(&tinfo->list); 313 kfree(tinfo); 314 found = true; 315 break; 316 } 317 } 318 mutex_unlock(&targets_mutex); 319 320 WARN_ON(found == false); 321 } 322 323 static void cache_btf_id(struct bpf_iter_target_info *tinfo, 324 struct bpf_prog *prog) 325 { 326 tinfo->btf_id = prog->aux->attach_btf_id; 327 } 328 329 bool bpf_iter_prog_supported(struct bpf_prog *prog) 330 { 331 const char *attach_fname = prog->aux->attach_func_name; 332 struct bpf_iter_target_info *tinfo = NULL, *iter; 333 u32 prog_btf_id = prog->aux->attach_btf_id; 334 const char *prefix = BPF_ITER_FUNC_PREFIX; 335 int prefix_len = strlen(prefix); 336 337 if (strncmp(attach_fname, prefix, prefix_len)) 338 return false; 339 340 mutex_lock(&targets_mutex); 341 list_for_each_entry(iter, &targets, list) { 342 if (iter->btf_id && iter->btf_id == prog_btf_id) { 343 tinfo = iter; 344 break; 345 } 346 if (!strcmp(attach_fname + prefix_len, iter->reg_info->target)) { 347 cache_btf_id(iter, prog); 348 tinfo = iter; 349 break; 350 } 351 } 352 mutex_unlock(&targets_mutex); 353 354 if (tinfo) { 355 prog->aux->ctx_arg_info_size = tinfo->reg_info->ctx_arg_info_size; 356 prog->aux->ctx_arg_info = tinfo->reg_info->ctx_arg_info; 357 } 358 359 return tinfo != NULL; 360 } 361 362 const struct bpf_func_proto * 363 bpf_iter_get_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 364 { 365 const struct bpf_iter_target_info *tinfo; 366 const struct bpf_func_proto *fn = NULL; 367 368 mutex_lock(&targets_mutex); 369 list_for_each_entry(tinfo, &targets, list) { 370 if (tinfo->btf_id == prog->aux->attach_btf_id) { 371 const struct bpf_iter_reg *reg_info; 372 373 reg_info = tinfo->reg_info; 374 if (reg_info->get_func_proto) 375 fn = reg_info->get_func_proto(func_id, prog); 376 break; 377 } 378 } 379 mutex_unlock(&targets_mutex); 380 381 return fn; 382 } 383 384 static void bpf_iter_link_release(struct bpf_link *link) 385 { 386 struct bpf_iter_link *iter_link = 387 container_of(link, struct bpf_iter_link, link); 388 389 if (iter_link->tinfo->reg_info->detach_target) 390 iter_link->tinfo->reg_info->detach_target(&iter_link->aux); 391 } 392 393 static void bpf_iter_link_dealloc(struct bpf_link *link) 394 { 395 struct bpf_iter_link *iter_link = 396 container_of(link, struct bpf_iter_link, link); 397 398 kfree(iter_link); 399 } 400 401 static int bpf_iter_link_replace(struct bpf_link *link, 402 struct bpf_prog *new_prog, 403 struct bpf_prog *old_prog) 404 { 405 int ret = 0; 406 407 mutex_lock(&link_mutex); 408 if (old_prog && link->prog != old_prog) { 409 ret = -EPERM; 410 goto out_unlock; 411 } 412 413 if (link->prog->type != new_prog->type || 414 link->prog->expected_attach_type != new_prog->expected_attach_type || 415 link->prog->aux->attach_btf_id != new_prog->aux->attach_btf_id) { 416 ret = -EINVAL; 417 goto out_unlock; 418 } 419 420 old_prog = xchg(&link->prog, new_prog); 421 bpf_prog_put(old_prog); 422 423 out_unlock: 424 mutex_unlock(&link_mutex); 425 return ret; 426 } 427 428 static void bpf_iter_link_show_fdinfo(const struct bpf_link *link, 429 struct seq_file *seq) 430 { 431 struct bpf_iter_link *iter_link = 432 container_of(link, struct bpf_iter_link, link); 433 bpf_iter_show_fdinfo_t show_fdinfo; 434 435 seq_printf(seq, 436 "target_name:\t%s\n", 437 iter_link->tinfo->reg_info->target); 438 439 show_fdinfo = iter_link->tinfo->reg_info->show_fdinfo; 440 if (show_fdinfo) 441 show_fdinfo(&iter_link->aux, seq); 442 } 443 444 static int bpf_iter_link_fill_link_info(const struct bpf_link *link, 445 struct bpf_link_info *info) 446 { 447 struct bpf_iter_link *iter_link = 448 container_of(link, struct bpf_iter_link, link); 449 char __user *ubuf = u64_to_user_ptr(info->iter.target_name); 450 bpf_iter_fill_link_info_t fill_link_info; 451 u32 ulen = info->iter.target_name_len; 452 const char *target_name; 453 u32 target_len; 454 455 if (!ulen ^ !ubuf) 456 return -EINVAL; 457 458 target_name = iter_link->tinfo->reg_info->target; 459 target_len = strlen(target_name); 460 info->iter.target_name_len = target_len + 1; 461 462 if (ubuf) { 463 if (ulen >= target_len + 1) { 464 if (copy_to_user(ubuf, target_name, target_len + 1)) 465 return -EFAULT; 466 } else { 467 char zero = '\0'; 468 469 if (copy_to_user(ubuf, target_name, ulen - 1)) 470 return -EFAULT; 471 if (put_user(zero, ubuf + ulen - 1)) 472 return -EFAULT; 473 return -ENOSPC; 474 } 475 } 476 477 fill_link_info = iter_link->tinfo->reg_info->fill_link_info; 478 if (fill_link_info) 479 return fill_link_info(&iter_link->aux, info); 480 481 return 0; 482 } 483 484 static const struct bpf_link_ops bpf_iter_link_lops = { 485 .release = bpf_iter_link_release, 486 .dealloc = bpf_iter_link_dealloc, 487 .update_prog = bpf_iter_link_replace, 488 .show_fdinfo = bpf_iter_link_show_fdinfo, 489 .fill_link_info = bpf_iter_link_fill_link_info, 490 }; 491 492 bool bpf_link_is_iter(struct bpf_link *link) 493 { 494 return link->ops == &bpf_iter_link_lops; 495 } 496 497 int bpf_iter_link_attach(const union bpf_attr *attr, bpfptr_t uattr, 498 struct bpf_prog *prog) 499 { 500 struct bpf_iter_target_info *tinfo = NULL, *iter; 501 struct bpf_link_primer link_primer; 502 union bpf_iter_link_info linfo; 503 struct bpf_iter_link *link; 504 u32 prog_btf_id, linfo_len; 505 bpfptr_t ulinfo; 506 int err; 507 508 if (attr->link_create.target_fd || attr->link_create.flags) 509 return -EINVAL; 510 511 memset(&linfo, 0, sizeof(union bpf_iter_link_info)); 512 513 ulinfo = make_bpfptr(attr->link_create.iter_info, uattr.is_kernel); 514 linfo_len = attr->link_create.iter_info_len; 515 if (bpfptr_is_null(ulinfo) ^ !linfo_len) 516 return -EINVAL; 517 518 if (!bpfptr_is_null(ulinfo)) { 519 err = bpf_check_uarg_tail_zero(ulinfo, sizeof(linfo), 520 linfo_len); 521 if (err) 522 return err; 523 linfo_len = min_t(u32, linfo_len, sizeof(linfo)); 524 if (copy_from_bpfptr(&linfo, ulinfo, linfo_len)) 525 return -EFAULT; 526 } 527 528 prog_btf_id = prog->aux->attach_btf_id; 529 mutex_lock(&targets_mutex); 530 list_for_each_entry(iter, &targets, list) { 531 if (iter->btf_id == prog_btf_id) { 532 tinfo = iter; 533 break; 534 } 535 } 536 mutex_unlock(&targets_mutex); 537 if (!tinfo) 538 return -ENOENT; 539 540 link = kzalloc(sizeof(*link), GFP_USER | __GFP_NOWARN); 541 if (!link) 542 return -ENOMEM; 543 544 bpf_link_init(&link->link, BPF_LINK_TYPE_ITER, &bpf_iter_link_lops, prog); 545 link->tinfo = tinfo; 546 547 err = bpf_link_prime(&link->link, &link_primer); 548 if (err) { 549 kfree(link); 550 return err; 551 } 552 553 if (tinfo->reg_info->attach_target) { 554 err = tinfo->reg_info->attach_target(prog, &linfo, &link->aux); 555 if (err) { 556 bpf_link_cleanup(&link_primer); 557 return err; 558 } 559 } 560 561 return bpf_link_settle(&link_primer); 562 } 563 564 static void init_seq_meta(struct bpf_iter_priv_data *priv_data, 565 struct bpf_iter_target_info *tinfo, 566 const struct bpf_iter_seq_info *seq_info, 567 struct bpf_prog *prog) 568 { 569 priv_data->tinfo = tinfo; 570 priv_data->seq_info = seq_info; 571 priv_data->prog = prog; 572 priv_data->session_id = atomic64_inc_return(&session_id); 573 priv_data->seq_num = 0; 574 priv_data->done_stop = false; 575 } 576 577 static int prepare_seq_file(struct file *file, struct bpf_iter_link *link, 578 const struct bpf_iter_seq_info *seq_info) 579 { 580 struct bpf_iter_priv_data *priv_data; 581 struct bpf_iter_target_info *tinfo; 582 struct bpf_prog *prog; 583 u32 total_priv_dsize; 584 struct seq_file *seq; 585 int err = 0; 586 587 mutex_lock(&link_mutex); 588 prog = link->link.prog; 589 bpf_prog_inc(prog); 590 mutex_unlock(&link_mutex); 591 592 tinfo = link->tinfo; 593 total_priv_dsize = offsetof(struct bpf_iter_priv_data, target_private) + 594 seq_info->seq_priv_size; 595 priv_data = __seq_open_private(file, seq_info->seq_ops, 596 total_priv_dsize); 597 if (!priv_data) { 598 err = -ENOMEM; 599 goto release_prog; 600 } 601 602 if (seq_info->init_seq_private) { 603 err = seq_info->init_seq_private(priv_data->target_private, &link->aux); 604 if (err) 605 goto release_seq_file; 606 } 607 608 init_seq_meta(priv_data, tinfo, seq_info, prog); 609 seq = file->private_data; 610 seq->private = priv_data->target_private; 611 612 return 0; 613 614 release_seq_file: 615 seq_release_private(file->f_inode, file); 616 file->private_data = NULL; 617 release_prog: 618 bpf_prog_put(prog); 619 return err; 620 } 621 622 int bpf_iter_new_fd(struct bpf_link *link) 623 { 624 struct bpf_iter_link *iter_link; 625 struct file *file; 626 unsigned int flags; 627 int err, fd; 628 629 if (link->ops != &bpf_iter_link_lops) 630 return -EINVAL; 631 632 flags = O_RDONLY | O_CLOEXEC; 633 fd = get_unused_fd_flags(flags); 634 if (fd < 0) 635 return fd; 636 637 file = anon_inode_getfile("bpf_iter", &bpf_iter_fops, NULL, flags); 638 if (IS_ERR(file)) { 639 err = PTR_ERR(file); 640 goto free_fd; 641 } 642 643 iter_link = container_of(link, struct bpf_iter_link, link); 644 err = prepare_seq_file(file, iter_link, __get_seq_info(iter_link)); 645 if (err) 646 goto free_file; 647 648 fd_install(fd, file); 649 return fd; 650 651 free_file: 652 fput(file); 653 free_fd: 654 put_unused_fd(fd); 655 return err; 656 } 657 658 struct bpf_prog *bpf_iter_get_info(struct bpf_iter_meta *meta, bool in_stop) 659 { 660 struct bpf_iter_priv_data *iter_priv; 661 struct seq_file *seq; 662 void *seq_priv; 663 664 seq = meta->seq; 665 if (seq->file->f_op != &bpf_iter_fops) 666 return NULL; 667 668 seq_priv = seq->private; 669 iter_priv = container_of(seq_priv, struct bpf_iter_priv_data, 670 target_private); 671 672 if (in_stop && iter_priv->done_stop) 673 return NULL; 674 675 meta->session_id = iter_priv->session_id; 676 meta->seq_num = iter_priv->seq_num; 677 678 return iter_priv->prog; 679 } 680 681 int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx) 682 { 683 int ret; 684 685 if (prog->aux->sleepable) { 686 rcu_read_lock_trace(); 687 migrate_disable(); 688 might_fault(); 689 ret = bpf_prog_run(prog, ctx); 690 migrate_enable(); 691 rcu_read_unlock_trace(); 692 } else { 693 rcu_read_lock(); 694 migrate_disable(); 695 ret = bpf_prog_run(prog, ctx); 696 migrate_enable(); 697 rcu_read_unlock(); 698 } 699 700 /* bpf program can only return 0 or 1: 701 * 0 : okay 702 * 1 : retry the same object 703 * The bpf_iter_run_prog() return value 704 * will be seq_ops->show() return value. 705 */ 706 return ret == 0 ? 0 : -EAGAIN; 707 } 708 709 BPF_CALL_4(bpf_for_each_map_elem, struct bpf_map *, map, void *, callback_fn, 710 void *, callback_ctx, u64, flags) 711 { 712 return map->ops->map_for_each_callback(map, callback_fn, callback_ctx, flags); 713 } 714 715 const struct bpf_func_proto bpf_for_each_map_elem_proto = { 716 .func = bpf_for_each_map_elem, 717 .gpl_only = false, 718 .ret_type = RET_INTEGER, 719 .arg1_type = ARG_CONST_MAP_PTR, 720 .arg2_type = ARG_PTR_TO_FUNC, 721 .arg3_type = ARG_PTR_TO_STACK_OR_NULL, 722 .arg4_type = ARG_ANYTHING, 723 }; 724 725 BPF_CALL_4(bpf_loop, u32, nr_loops, void *, callback_fn, void *, callback_ctx, 726 u64, flags) 727 { 728 bpf_callback_t callback = (bpf_callback_t)callback_fn; 729 u64 ret; 730 u32 i; 731 732 /* Note: these safety checks are also verified when bpf_loop 733 * is inlined, be careful to modify this code in sync. See 734 * function verifier.c:inline_bpf_loop. 735 */ 736 if (flags) 737 return -EINVAL; 738 if (nr_loops > BPF_MAX_LOOPS) 739 return -E2BIG; 740 741 for (i = 0; i < nr_loops; i++) { 742 ret = callback((u64)i, (u64)(long)callback_ctx, 0, 0, 0); 743 /* return value: 0 - continue, 1 - stop and return */ 744 if (ret) 745 return i + 1; 746 } 747 748 return i; 749 } 750 751 const struct bpf_func_proto bpf_loop_proto = { 752 .func = bpf_loop, 753 .gpl_only = false, 754 .ret_type = RET_INTEGER, 755 .arg1_type = ARG_ANYTHING, 756 .arg2_type = ARG_PTR_TO_FUNC, 757 .arg3_type = ARG_PTR_TO_STACK_OR_NULL, 758 .arg4_type = ARG_ANYTHING, 759 }; 760