1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2020 Facebook */ 3 4 #include <linux/fs.h> 5 #include <linux/anon_inodes.h> 6 #include <linux/filter.h> 7 #include <linux/bpf.h> 8 #include <linux/rcupdate_trace.h> 9 10 struct bpf_iter_target_info { 11 struct list_head list; 12 const struct bpf_iter_reg *reg_info; 13 u32 btf_id; /* cached value */ 14 }; 15 16 struct bpf_iter_link { 17 struct bpf_link link; 18 struct bpf_iter_aux_info aux; 19 struct bpf_iter_target_info *tinfo; 20 }; 21 22 struct bpf_iter_priv_data { 23 struct bpf_iter_target_info *tinfo; 24 const struct bpf_iter_seq_info *seq_info; 25 struct bpf_prog *prog; 26 u64 session_id; 27 u64 seq_num; 28 bool done_stop; 29 u8 target_private[] __aligned(8); 30 }; 31 32 static struct list_head targets = LIST_HEAD_INIT(targets); 33 static DEFINE_MUTEX(targets_mutex); 34 35 /* protect bpf_iter_link changes */ 36 static DEFINE_MUTEX(link_mutex); 37 38 /* incremented on every opened seq_file */ 39 static atomic64_t session_id; 40 41 static int prepare_seq_file(struct file *file, struct bpf_iter_link *link, 42 const struct bpf_iter_seq_info *seq_info); 43 44 static void bpf_iter_inc_seq_num(struct seq_file *seq) 45 { 46 struct bpf_iter_priv_data *iter_priv; 47 48 iter_priv = container_of(seq->private, struct bpf_iter_priv_data, 49 target_private); 50 iter_priv->seq_num++; 51 } 52 53 static void bpf_iter_dec_seq_num(struct seq_file *seq) 54 { 55 struct bpf_iter_priv_data *iter_priv; 56 57 iter_priv = container_of(seq->private, struct bpf_iter_priv_data, 58 target_private); 59 iter_priv->seq_num--; 60 } 61 62 static void bpf_iter_done_stop(struct seq_file *seq) 63 { 64 struct bpf_iter_priv_data *iter_priv; 65 66 iter_priv = container_of(seq->private, struct bpf_iter_priv_data, 67 target_private); 68 iter_priv->done_stop = true; 69 } 70 71 static bool bpf_iter_support_resched(struct seq_file *seq) 72 { 73 struct bpf_iter_priv_data *iter_priv; 74 75 iter_priv = container_of(seq->private, struct bpf_iter_priv_data, 76 target_private); 77 return iter_priv->tinfo->reg_info->feature & BPF_ITER_RESCHED; 78 } 79 80 /* maximum visited objects before bailing out */ 81 #define MAX_ITER_OBJECTS 1000000 82 83 /* bpf_seq_read, a customized and simpler version for bpf iterator. 84 * no_llseek is assumed for this file. 85 * The following are differences from seq_read(): 86 * . fixed buffer size (PAGE_SIZE) 87 * . assuming no_llseek 88 * . stop() may call bpf program, handling potential overflow there 89 */ 90 static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size, 91 loff_t *ppos) 92 { 93 struct seq_file *seq = file->private_data; 94 size_t n, offs, copied = 0; 95 int err = 0, num_objs = 0; 96 bool can_resched; 97 void *p; 98 99 mutex_lock(&seq->lock); 100 101 if (!seq->buf) { 102 seq->size = PAGE_SIZE << 3; 103 seq->buf = kvmalloc(seq->size, GFP_KERNEL); 104 if (!seq->buf) { 105 err = -ENOMEM; 106 goto done; 107 } 108 } 109 110 if (seq->count) { 111 n = min(seq->count, size); 112 err = copy_to_user(buf, seq->buf + seq->from, n); 113 if (err) { 114 err = -EFAULT; 115 goto done; 116 } 117 seq->count -= n; 118 seq->from += n; 119 copied = n; 120 goto done; 121 } 122 123 seq->from = 0; 124 p = seq->op->start(seq, &seq->index); 125 if (!p) 126 goto stop; 127 if (IS_ERR(p)) { 128 err = PTR_ERR(p); 129 seq->op->stop(seq, p); 130 seq->count = 0; 131 goto done; 132 } 133 134 err = seq->op->show(seq, p); 135 if (err > 0) { 136 /* object is skipped, decrease seq_num, so next 137 * valid object can reuse the same seq_num. 138 */ 139 bpf_iter_dec_seq_num(seq); 140 seq->count = 0; 141 } else if (err < 0 || seq_has_overflowed(seq)) { 142 if (!err) 143 err = -E2BIG; 144 seq->op->stop(seq, p); 145 seq->count = 0; 146 goto done; 147 } 148 149 can_resched = bpf_iter_support_resched(seq); 150 while (1) { 151 loff_t pos = seq->index; 152 153 num_objs++; 154 offs = seq->count; 155 p = seq->op->next(seq, p, &seq->index); 156 if (pos == seq->index) { 157 pr_info_ratelimited("buggy seq_file .next function %ps " 158 "did not updated position index\n", 159 seq->op->next); 160 seq->index++; 161 } 162 163 if (IS_ERR_OR_NULL(p)) 164 break; 165 166 /* got a valid next object, increase seq_num */ 167 bpf_iter_inc_seq_num(seq); 168 169 if (seq->count >= size) 170 break; 171 172 if (num_objs >= MAX_ITER_OBJECTS) { 173 if (offs == 0) { 174 err = -EAGAIN; 175 seq->op->stop(seq, p); 176 goto done; 177 } 178 break; 179 } 180 181 err = seq->op->show(seq, p); 182 if (err > 0) { 183 bpf_iter_dec_seq_num(seq); 184 seq->count = offs; 185 } else if (err < 0 || seq_has_overflowed(seq)) { 186 seq->count = offs; 187 if (offs == 0) { 188 if (!err) 189 err = -E2BIG; 190 seq->op->stop(seq, p); 191 goto done; 192 } 193 break; 194 } 195 196 if (can_resched) 197 cond_resched(); 198 } 199 stop: 200 offs = seq->count; 201 /* bpf program called if !p */ 202 seq->op->stop(seq, p); 203 if (!p) { 204 if (!seq_has_overflowed(seq)) { 205 bpf_iter_done_stop(seq); 206 } else { 207 seq->count = offs; 208 if (offs == 0) { 209 err = -E2BIG; 210 goto done; 211 } 212 } 213 } 214 215 n = min(seq->count, size); 216 err = copy_to_user(buf, seq->buf, n); 217 if (err) { 218 err = -EFAULT; 219 goto done; 220 } 221 copied = n; 222 seq->count -= n; 223 seq->from = n; 224 done: 225 if (!copied) 226 copied = err; 227 else 228 *ppos += copied; 229 mutex_unlock(&seq->lock); 230 return copied; 231 } 232 233 static const struct bpf_iter_seq_info * 234 __get_seq_info(struct bpf_iter_link *link) 235 { 236 const struct bpf_iter_seq_info *seq_info; 237 238 if (link->aux.map) { 239 seq_info = link->aux.map->ops->iter_seq_info; 240 if (seq_info) 241 return seq_info; 242 } 243 244 return link->tinfo->reg_info->seq_info; 245 } 246 247 static int iter_open(struct inode *inode, struct file *file) 248 { 249 struct bpf_iter_link *link = inode->i_private; 250 251 return prepare_seq_file(file, link, __get_seq_info(link)); 252 } 253 254 static int iter_release(struct inode *inode, struct file *file) 255 { 256 struct bpf_iter_priv_data *iter_priv; 257 struct seq_file *seq; 258 259 seq = file->private_data; 260 if (!seq) 261 return 0; 262 263 iter_priv = container_of(seq->private, struct bpf_iter_priv_data, 264 target_private); 265 266 if (iter_priv->seq_info->fini_seq_private) 267 iter_priv->seq_info->fini_seq_private(seq->private); 268 269 bpf_prog_put(iter_priv->prog); 270 seq->private = iter_priv; 271 272 return seq_release_private(inode, file); 273 } 274 275 const struct file_operations bpf_iter_fops = { 276 .open = iter_open, 277 .llseek = no_llseek, 278 .read = bpf_seq_read, 279 .release = iter_release, 280 }; 281 282 /* The argument reg_info will be cached in bpf_iter_target_info. 283 * The common practice is to declare target reg_info as 284 * a const static variable and passed as an argument to 285 * bpf_iter_reg_target(). 286 */ 287 int bpf_iter_reg_target(const struct bpf_iter_reg *reg_info) 288 { 289 struct bpf_iter_target_info *tinfo; 290 291 tinfo = kzalloc(sizeof(*tinfo), GFP_KERNEL); 292 if (!tinfo) 293 return -ENOMEM; 294 295 tinfo->reg_info = reg_info; 296 INIT_LIST_HEAD(&tinfo->list); 297 298 mutex_lock(&targets_mutex); 299 list_add(&tinfo->list, &targets); 300 mutex_unlock(&targets_mutex); 301 302 return 0; 303 } 304 305 void bpf_iter_unreg_target(const struct bpf_iter_reg *reg_info) 306 { 307 struct bpf_iter_target_info *tinfo; 308 bool found = false; 309 310 mutex_lock(&targets_mutex); 311 list_for_each_entry(tinfo, &targets, list) { 312 if (reg_info == tinfo->reg_info) { 313 list_del(&tinfo->list); 314 kfree(tinfo); 315 found = true; 316 break; 317 } 318 } 319 mutex_unlock(&targets_mutex); 320 321 WARN_ON(found == false); 322 } 323 324 static void cache_btf_id(struct bpf_iter_target_info *tinfo, 325 struct bpf_prog *prog) 326 { 327 tinfo->btf_id = prog->aux->attach_btf_id; 328 } 329 330 bool bpf_iter_prog_supported(struct bpf_prog *prog) 331 { 332 const char *attach_fname = prog->aux->attach_func_name; 333 u32 prog_btf_id = prog->aux->attach_btf_id; 334 const char *prefix = BPF_ITER_FUNC_PREFIX; 335 struct bpf_iter_target_info *tinfo; 336 int prefix_len = strlen(prefix); 337 bool supported = false; 338 339 if (strncmp(attach_fname, prefix, prefix_len)) 340 return false; 341 342 mutex_lock(&targets_mutex); 343 list_for_each_entry(tinfo, &targets, list) { 344 if (tinfo->btf_id && tinfo->btf_id == prog_btf_id) { 345 supported = true; 346 break; 347 } 348 if (!strcmp(attach_fname + prefix_len, tinfo->reg_info->target)) { 349 cache_btf_id(tinfo, prog); 350 supported = true; 351 break; 352 } 353 } 354 mutex_unlock(&targets_mutex); 355 356 if (supported) { 357 prog->aux->ctx_arg_info_size = tinfo->reg_info->ctx_arg_info_size; 358 prog->aux->ctx_arg_info = tinfo->reg_info->ctx_arg_info; 359 } 360 361 return supported; 362 } 363 364 const struct bpf_func_proto * 365 bpf_iter_get_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 366 { 367 const struct bpf_iter_target_info *tinfo; 368 const struct bpf_func_proto *fn = NULL; 369 370 mutex_lock(&targets_mutex); 371 list_for_each_entry(tinfo, &targets, list) { 372 if (tinfo->btf_id == prog->aux->attach_btf_id) { 373 const struct bpf_iter_reg *reg_info; 374 375 reg_info = tinfo->reg_info; 376 if (reg_info->get_func_proto) 377 fn = reg_info->get_func_proto(func_id, prog); 378 break; 379 } 380 } 381 mutex_unlock(&targets_mutex); 382 383 return fn; 384 } 385 386 static void bpf_iter_link_release(struct bpf_link *link) 387 { 388 struct bpf_iter_link *iter_link = 389 container_of(link, struct bpf_iter_link, link); 390 391 if (iter_link->tinfo->reg_info->detach_target) 392 iter_link->tinfo->reg_info->detach_target(&iter_link->aux); 393 } 394 395 static void bpf_iter_link_dealloc(struct bpf_link *link) 396 { 397 struct bpf_iter_link *iter_link = 398 container_of(link, struct bpf_iter_link, link); 399 400 kfree(iter_link); 401 } 402 403 static int bpf_iter_link_replace(struct bpf_link *link, 404 struct bpf_prog *new_prog, 405 struct bpf_prog *old_prog) 406 { 407 int ret = 0; 408 409 mutex_lock(&link_mutex); 410 if (old_prog && link->prog != old_prog) { 411 ret = -EPERM; 412 goto out_unlock; 413 } 414 415 if (link->prog->type != new_prog->type || 416 link->prog->expected_attach_type != new_prog->expected_attach_type || 417 link->prog->aux->attach_btf_id != new_prog->aux->attach_btf_id) { 418 ret = -EINVAL; 419 goto out_unlock; 420 } 421 422 old_prog = xchg(&link->prog, new_prog); 423 bpf_prog_put(old_prog); 424 425 out_unlock: 426 mutex_unlock(&link_mutex); 427 return ret; 428 } 429 430 static void bpf_iter_link_show_fdinfo(const struct bpf_link *link, 431 struct seq_file *seq) 432 { 433 struct bpf_iter_link *iter_link = 434 container_of(link, struct bpf_iter_link, link); 435 bpf_iter_show_fdinfo_t show_fdinfo; 436 437 seq_printf(seq, 438 "target_name:\t%s\n", 439 iter_link->tinfo->reg_info->target); 440 441 show_fdinfo = iter_link->tinfo->reg_info->show_fdinfo; 442 if (show_fdinfo) 443 show_fdinfo(&iter_link->aux, seq); 444 } 445 446 static int bpf_iter_link_fill_link_info(const struct bpf_link *link, 447 struct bpf_link_info *info) 448 { 449 struct bpf_iter_link *iter_link = 450 container_of(link, struct bpf_iter_link, link); 451 char __user *ubuf = u64_to_user_ptr(info->iter.target_name); 452 bpf_iter_fill_link_info_t fill_link_info; 453 u32 ulen = info->iter.target_name_len; 454 const char *target_name; 455 u32 target_len; 456 457 if (!ulen ^ !ubuf) 458 return -EINVAL; 459 460 target_name = iter_link->tinfo->reg_info->target; 461 target_len = strlen(target_name); 462 info->iter.target_name_len = target_len + 1; 463 464 if (ubuf) { 465 if (ulen >= target_len + 1) { 466 if (copy_to_user(ubuf, target_name, target_len + 1)) 467 return -EFAULT; 468 } else { 469 char zero = '\0'; 470 471 if (copy_to_user(ubuf, target_name, ulen - 1)) 472 return -EFAULT; 473 if (put_user(zero, ubuf + ulen - 1)) 474 return -EFAULT; 475 return -ENOSPC; 476 } 477 } 478 479 fill_link_info = iter_link->tinfo->reg_info->fill_link_info; 480 if (fill_link_info) 481 return fill_link_info(&iter_link->aux, info); 482 483 return 0; 484 } 485 486 static const struct bpf_link_ops bpf_iter_link_lops = { 487 .release = bpf_iter_link_release, 488 .dealloc = bpf_iter_link_dealloc, 489 .update_prog = bpf_iter_link_replace, 490 .show_fdinfo = bpf_iter_link_show_fdinfo, 491 .fill_link_info = bpf_iter_link_fill_link_info, 492 }; 493 494 bool bpf_link_is_iter(struct bpf_link *link) 495 { 496 return link->ops == &bpf_iter_link_lops; 497 } 498 499 int bpf_iter_link_attach(const union bpf_attr *attr, bpfptr_t uattr, 500 struct bpf_prog *prog) 501 { 502 struct bpf_link_primer link_primer; 503 struct bpf_iter_target_info *tinfo; 504 union bpf_iter_link_info linfo; 505 struct bpf_iter_link *link; 506 u32 prog_btf_id, linfo_len; 507 bool existed = false; 508 bpfptr_t ulinfo; 509 int err; 510 511 if (attr->link_create.target_fd || attr->link_create.flags) 512 return -EINVAL; 513 514 memset(&linfo, 0, sizeof(union bpf_iter_link_info)); 515 516 ulinfo = make_bpfptr(attr->link_create.iter_info, uattr.is_kernel); 517 linfo_len = attr->link_create.iter_info_len; 518 if (bpfptr_is_null(ulinfo) ^ !linfo_len) 519 return -EINVAL; 520 521 if (!bpfptr_is_null(ulinfo)) { 522 err = bpf_check_uarg_tail_zero(ulinfo, sizeof(linfo), 523 linfo_len); 524 if (err) 525 return err; 526 linfo_len = min_t(u32, linfo_len, sizeof(linfo)); 527 if (copy_from_bpfptr(&linfo, ulinfo, linfo_len)) 528 return -EFAULT; 529 } 530 531 prog_btf_id = prog->aux->attach_btf_id; 532 mutex_lock(&targets_mutex); 533 list_for_each_entry(tinfo, &targets, list) { 534 if (tinfo->btf_id == prog_btf_id) { 535 existed = true; 536 break; 537 } 538 } 539 mutex_unlock(&targets_mutex); 540 if (!existed) 541 return -ENOENT; 542 543 link = kzalloc(sizeof(*link), GFP_USER | __GFP_NOWARN); 544 if (!link) 545 return -ENOMEM; 546 547 bpf_link_init(&link->link, BPF_LINK_TYPE_ITER, &bpf_iter_link_lops, prog); 548 link->tinfo = tinfo; 549 550 err = bpf_link_prime(&link->link, &link_primer); 551 if (err) { 552 kfree(link); 553 return err; 554 } 555 556 if (tinfo->reg_info->attach_target) { 557 err = tinfo->reg_info->attach_target(prog, &linfo, &link->aux); 558 if (err) { 559 bpf_link_cleanup(&link_primer); 560 return err; 561 } 562 } 563 564 return bpf_link_settle(&link_primer); 565 } 566 567 static void init_seq_meta(struct bpf_iter_priv_data *priv_data, 568 struct bpf_iter_target_info *tinfo, 569 const struct bpf_iter_seq_info *seq_info, 570 struct bpf_prog *prog) 571 { 572 priv_data->tinfo = tinfo; 573 priv_data->seq_info = seq_info; 574 priv_data->prog = prog; 575 priv_data->session_id = atomic64_inc_return(&session_id); 576 priv_data->seq_num = 0; 577 priv_data->done_stop = false; 578 } 579 580 static int prepare_seq_file(struct file *file, struct bpf_iter_link *link, 581 const struct bpf_iter_seq_info *seq_info) 582 { 583 struct bpf_iter_priv_data *priv_data; 584 struct bpf_iter_target_info *tinfo; 585 struct bpf_prog *prog; 586 u32 total_priv_dsize; 587 struct seq_file *seq; 588 int err = 0; 589 590 mutex_lock(&link_mutex); 591 prog = link->link.prog; 592 bpf_prog_inc(prog); 593 mutex_unlock(&link_mutex); 594 595 tinfo = link->tinfo; 596 total_priv_dsize = offsetof(struct bpf_iter_priv_data, target_private) + 597 seq_info->seq_priv_size; 598 priv_data = __seq_open_private(file, seq_info->seq_ops, 599 total_priv_dsize); 600 if (!priv_data) { 601 err = -ENOMEM; 602 goto release_prog; 603 } 604 605 if (seq_info->init_seq_private) { 606 err = seq_info->init_seq_private(priv_data->target_private, &link->aux); 607 if (err) 608 goto release_seq_file; 609 } 610 611 init_seq_meta(priv_data, tinfo, seq_info, prog); 612 seq = file->private_data; 613 seq->private = priv_data->target_private; 614 615 return 0; 616 617 release_seq_file: 618 seq_release_private(file->f_inode, file); 619 file->private_data = NULL; 620 release_prog: 621 bpf_prog_put(prog); 622 return err; 623 } 624 625 int bpf_iter_new_fd(struct bpf_link *link) 626 { 627 struct bpf_iter_link *iter_link; 628 struct file *file; 629 unsigned int flags; 630 int err, fd; 631 632 if (link->ops != &bpf_iter_link_lops) 633 return -EINVAL; 634 635 flags = O_RDONLY | O_CLOEXEC; 636 fd = get_unused_fd_flags(flags); 637 if (fd < 0) 638 return fd; 639 640 file = anon_inode_getfile("bpf_iter", &bpf_iter_fops, NULL, flags); 641 if (IS_ERR(file)) { 642 err = PTR_ERR(file); 643 goto free_fd; 644 } 645 646 iter_link = container_of(link, struct bpf_iter_link, link); 647 err = prepare_seq_file(file, iter_link, __get_seq_info(iter_link)); 648 if (err) 649 goto free_file; 650 651 fd_install(fd, file); 652 return fd; 653 654 free_file: 655 fput(file); 656 free_fd: 657 put_unused_fd(fd); 658 return err; 659 } 660 661 struct bpf_prog *bpf_iter_get_info(struct bpf_iter_meta *meta, bool in_stop) 662 { 663 struct bpf_iter_priv_data *iter_priv; 664 struct seq_file *seq; 665 void *seq_priv; 666 667 seq = meta->seq; 668 if (seq->file->f_op != &bpf_iter_fops) 669 return NULL; 670 671 seq_priv = seq->private; 672 iter_priv = container_of(seq_priv, struct bpf_iter_priv_data, 673 target_private); 674 675 if (in_stop && iter_priv->done_stop) 676 return NULL; 677 678 meta->session_id = iter_priv->session_id; 679 meta->seq_num = iter_priv->seq_num; 680 681 return iter_priv->prog; 682 } 683 684 int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx) 685 { 686 int ret; 687 688 if (prog->aux->sleepable) { 689 rcu_read_lock_trace(); 690 migrate_disable(); 691 might_fault(); 692 ret = bpf_prog_run(prog, ctx); 693 migrate_enable(); 694 rcu_read_unlock_trace(); 695 } else { 696 rcu_read_lock(); 697 migrate_disable(); 698 ret = bpf_prog_run(prog, ctx); 699 migrate_enable(); 700 rcu_read_unlock(); 701 } 702 703 /* bpf program can only return 0 or 1: 704 * 0 : okay 705 * 1 : retry the same object 706 * The bpf_iter_run_prog() return value 707 * will be seq_ops->show() return value. 708 */ 709 return ret == 0 ? 0 : -EAGAIN; 710 } 711 712 BPF_CALL_4(bpf_for_each_map_elem, struct bpf_map *, map, void *, callback_fn, 713 void *, callback_ctx, u64, flags) 714 { 715 return map->ops->map_for_each_callback(map, callback_fn, callback_ctx, flags); 716 } 717 718 const struct bpf_func_proto bpf_for_each_map_elem_proto = { 719 .func = bpf_for_each_map_elem, 720 .gpl_only = false, 721 .ret_type = RET_INTEGER, 722 .arg1_type = ARG_CONST_MAP_PTR, 723 .arg2_type = ARG_PTR_TO_FUNC, 724 .arg3_type = ARG_PTR_TO_STACK_OR_NULL, 725 .arg4_type = ARG_ANYTHING, 726 }; 727 728 /* maximum number of loops */ 729 #define MAX_LOOPS BIT(23) 730 731 BPF_CALL_4(bpf_loop, u32, nr_loops, void *, callback_fn, void *, callback_ctx, 732 u64, flags) 733 { 734 bpf_callback_t callback = (bpf_callback_t)callback_fn; 735 u64 ret; 736 u32 i; 737 738 if (flags) 739 return -EINVAL; 740 if (nr_loops > MAX_LOOPS) 741 return -E2BIG; 742 743 for (i = 0; i < nr_loops; i++) { 744 ret = callback((u64)i, (u64)(long)callback_ctx, 0, 0, 0); 745 /* return value: 0 - continue, 1 - stop and return */ 746 if (ret) 747 return i + 1; 748 } 749 750 return i; 751 } 752 753 const struct bpf_func_proto bpf_loop_proto = { 754 .func = bpf_loop, 755 .gpl_only = false, 756 .ret_type = RET_INTEGER, 757 .arg1_type = ARG_ANYTHING, 758 .arg2_type = ARG_PTR_TO_FUNC, 759 .arg3_type = ARG_PTR_TO_STACK_OR_NULL, 760 .arg4_type = ARG_ANYTHING, 761 }; 762