1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2020 Facebook */ 3 4 #include <linux/fs.h> 5 #include <linux/anon_inodes.h> 6 #include <linux/filter.h> 7 #include <linux/bpf.h> 8 9 struct bpf_iter_target_info { 10 struct list_head list; 11 const struct bpf_iter_reg *reg_info; 12 u32 btf_id; /* cached value */ 13 }; 14 15 struct bpf_iter_link { 16 struct bpf_link link; 17 struct bpf_iter_aux_info aux; 18 struct bpf_iter_target_info *tinfo; 19 }; 20 21 struct bpf_iter_priv_data { 22 struct bpf_iter_target_info *tinfo; 23 const struct bpf_iter_seq_info *seq_info; 24 struct bpf_prog *prog; 25 u64 session_id; 26 u64 seq_num; 27 bool done_stop; 28 u8 target_private[] __aligned(8); 29 }; 30 31 static struct list_head targets = LIST_HEAD_INIT(targets); 32 static DEFINE_MUTEX(targets_mutex); 33 34 /* protect bpf_iter_link changes */ 35 static DEFINE_MUTEX(link_mutex); 36 37 /* incremented on every opened seq_file */ 38 static atomic64_t session_id; 39 40 static int prepare_seq_file(struct file *file, struct bpf_iter_link *link, 41 const struct bpf_iter_seq_info *seq_info); 42 43 static void bpf_iter_inc_seq_num(struct seq_file *seq) 44 { 45 struct bpf_iter_priv_data *iter_priv; 46 47 iter_priv = container_of(seq->private, struct bpf_iter_priv_data, 48 target_private); 49 iter_priv->seq_num++; 50 } 51 52 static void bpf_iter_dec_seq_num(struct seq_file *seq) 53 { 54 struct bpf_iter_priv_data *iter_priv; 55 56 iter_priv = container_of(seq->private, struct bpf_iter_priv_data, 57 target_private); 58 iter_priv->seq_num--; 59 } 60 61 static void bpf_iter_done_stop(struct seq_file *seq) 62 { 63 struct bpf_iter_priv_data *iter_priv; 64 65 iter_priv = container_of(seq->private, struct bpf_iter_priv_data, 66 target_private); 67 iter_priv->done_stop = true; 68 } 69 70 /* maximum visited objects before bailing out */ 71 #define MAX_ITER_OBJECTS 1000000 72 73 /* bpf_seq_read, a customized and simpler version for bpf iterator. 74 * no_llseek is assumed for this file. 75 * The following are differences from seq_read(): 76 * . fixed buffer size (PAGE_SIZE) 77 * . assuming no_llseek 78 * . stop() may call bpf program, handling potential overflow there 79 */ 80 static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size, 81 loff_t *ppos) 82 { 83 struct seq_file *seq = file->private_data; 84 size_t n, offs, copied = 0; 85 int err = 0, num_objs = 0; 86 void *p; 87 88 mutex_lock(&seq->lock); 89 90 if (!seq->buf) { 91 seq->size = PAGE_SIZE; 92 seq->buf = kmalloc(seq->size, GFP_KERNEL); 93 if (!seq->buf) { 94 err = -ENOMEM; 95 goto done; 96 } 97 } 98 99 if (seq->count) { 100 n = min(seq->count, size); 101 err = copy_to_user(buf, seq->buf + seq->from, n); 102 if (err) { 103 err = -EFAULT; 104 goto done; 105 } 106 seq->count -= n; 107 seq->from += n; 108 copied = n; 109 goto done; 110 } 111 112 seq->from = 0; 113 p = seq->op->start(seq, &seq->index); 114 if (!p) 115 goto stop; 116 if (IS_ERR(p)) { 117 err = PTR_ERR(p); 118 seq->op->stop(seq, p); 119 seq->count = 0; 120 goto done; 121 } 122 123 err = seq->op->show(seq, p); 124 if (err > 0) { 125 /* object is skipped, decrease seq_num, so next 126 * valid object can reuse the same seq_num. 127 */ 128 bpf_iter_dec_seq_num(seq); 129 seq->count = 0; 130 } else if (err < 0 || seq_has_overflowed(seq)) { 131 if (!err) 132 err = -E2BIG; 133 seq->op->stop(seq, p); 134 seq->count = 0; 135 goto done; 136 } 137 138 while (1) { 139 loff_t pos = seq->index; 140 141 num_objs++; 142 offs = seq->count; 143 p = seq->op->next(seq, p, &seq->index); 144 if (pos == seq->index) { 145 pr_info_ratelimited("buggy seq_file .next function %ps " 146 "did not updated position index\n", 147 seq->op->next); 148 seq->index++; 149 } 150 151 if (IS_ERR_OR_NULL(p)) 152 break; 153 154 /* got a valid next object, increase seq_num */ 155 bpf_iter_inc_seq_num(seq); 156 157 if (seq->count >= size) 158 break; 159 160 if (num_objs >= MAX_ITER_OBJECTS) { 161 if (offs == 0) { 162 err = -EAGAIN; 163 seq->op->stop(seq, p); 164 goto done; 165 } 166 break; 167 } 168 169 err = seq->op->show(seq, p); 170 if (err > 0) { 171 bpf_iter_dec_seq_num(seq); 172 seq->count = offs; 173 } else if (err < 0 || seq_has_overflowed(seq)) { 174 seq->count = offs; 175 if (offs == 0) { 176 if (!err) 177 err = -E2BIG; 178 seq->op->stop(seq, p); 179 goto done; 180 } 181 break; 182 } 183 } 184 stop: 185 offs = seq->count; 186 /* bpf program called if !p */ 187 seq->op->stop(seq, p); 188 if (!p) { 189 if (!seq_has_overflowed(seq)) { 190 bpf_iter_done_stop(seq); 191 } else { 192 seq->count = offs; 193 if (offs == 0) { 194 err = -E2BIG; 195 goto done; 196 } 197 } 198 } 199 200 n = min(seq->count, size); 201 err = copy_to_user(buf, seq->buf, n); 202 if (err) { 203 err = -EFAULT; 204 goto done; 205 } 206 copied = n; 207 seq->count -= n; 208 seq->from = n; 209 done: 210 if (!copied) 211 copied = err; 212 else 213 *ppos += copied; 214 mutex_unlock(&seq->lock); 215 return copied; 216 } 217 218 static const struct bpf_iter_seq_info * 219 __get_seq_info(struct bpf_iter_link *link) 220 { 221 const struct bpf_iter_seq_info *seq_info; 222 223 if (link->aux.map) { 224 seq_info = link->aux.map->ops->iter_seq_info; 225 if (seq_info) 226 return seq_info; 227 } 228 229 return link->tinfo->reg_info->seq_info; 230 } 231 232 static int iter_open(struct inode *inode, struct file *file) 233 { 234 struct bpf_iter_link *link = inode->i_private; 235 236 return prepare_seq_file(file, link, __get_seq_info(link)); 237 } 238 239 static int iter_release(struct inode *inode, struct file *file) 240 { 241 struct bpf_iter_priv_data *iter_priv; 242 struct seq_file *seq; 243 244 seq = file->private_data; 245 if (!seq) 246 return 0; 247 248 iter_priv = container_of(seq->private, struct bpf_iter_priv_data, 249 target_private); 250 251 if (iter_priv->seq_info->fini_seq_private) 252 iter_priv->seq_info->fini_seq_private(seq->private); 253 254 bpf_prog_put(iter_priv->prog); 255 seq->private = iter_priv; 256 257 return seq_release_private(inode, file); 258 } 259 260 const struct file_operations bpf_iter_fops = { 261 .open = iter_open, 262 .llseek = no_llseek, 263 .read = bpf_seq_read, 264 .release = iter_release, 265 }; 266 267 /* The argument reg_info will be cached in bpf_iter_target_info. 268 * The common practice is to declare target reg_info as 269 * a const static variable and passed as an argument to 270 * bpf_iter_reg_target(). 271 */ 272 int bpf_iter_reg_target(const struct bpf_iter_reg *reg_info) 273 { 274 struct bpf_iter_target_info *tinfo; 275 276 tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL); 277 if (!tinfo) 278 return -ENOMEM; 279 280 tinfo->reg_info = reg_info; 281 INIT_LIST_HEAD(&tinfo->list); 282 283 mutex_lock(&targets_mutex); 284 list_add(&tinfo->list, &targets); 285 mutex_unlock(&targets_mutex); 286 287 return 0; 288 } 289 290 void bpf_iter_unreg_target(const struct bpf_iter_reg *reg_info) 291 { 292 struct bpf_iter_target_info *tinfo; 293 bool found = false; 294 295 mutex_lock(&targets_mutex); 296 list_for_each_entry(tinfo, &targets, list) { 297 if (reg_info == tinfo->reg_info) { 298 list_del(&tinfo->list); 299 kfree(tinfo); 300 found = true; 301 break; 302 } 303 } 304 mutex_unlock(&targets_mutex); 305 306 WARN_ON(found == false); 307 } 308 309 static void cache_btf_id(struct bpf_iter_target_info *tinfo, 310 struct bpf_prog *prog) 311 { 312 tinfo->btf_id = prog->aux->attach_btf_id; 313 } 314 315 bool bpf_iter_prog_supported(struct bpf_prog *prog) 316 { 317 const char *attach_fname = prog->aux->attach_func_name; 318 u32 prog_btf_id = prog->aux->attach_btf_id; 319 const char *prefix = BPF_ITER_FUNC_PREFIX; 320 struct bpf_iter_target_info *tinfo; 321 int prefix_len = strlen(prefix); 322 bool supported = false; 323 324 if (strncmp(attach_fname, prefix, prefix_len)) 325 return false; 326 327 mutex_lock(&targets_mutex); 328 list_for_each_entry(tinfo, &targets, list) { 329 if (tinfo->btf_id && tinfo->btf_id == prog_btf_id) { 330 supported = true; 331 break; 332 } 333 if (!strcmp(attach_fname + prefix_len, tinfo->reg_info->target)) { 334 cache_btf_id(tinfo, prog); 335 supported = true; 336 break; 337 } 338 } 339 mutex_unlock(&targets_mutex); 340 341 if (supported) { 342 prog->aux->ctx_arg_info_size = tinfo->reg_info->ctx_arg_info_size; 343 prog->aux->ctx_arg_info = tinfo->reg_info->ctx_arg_info; 344 } 345 346 return supported; 347 } 348 349 static void bpf_iter_link_release(struct bpf_link *link) 350 { 351 struct bpf_iter_link *iter_link = 352 container_of(link, struct bpf_iter_link, link); 353 354 if (iter_link->tinfo->reg_info->detach_target) 355 iter_link->tinfo->reg_info->detach_target(&iter_link->aux); 356 } 357 358 static void bpf_iter_link_dealloc(struct bpf_link *link) 359 { 360 struct bpf_iter_link *iter_link = 361 container_of(link, struct bpf_iter_link, link); 362 363 kfree(iter_link); 364 } 365 366 static int bpf_iter_link_replace(struct bpf_link *link, 367 struct bpf_prog *new_prog, 368 struct bpf_prog *old_prog) 369 { 370 int ret = 0; 371 372 mutex_lock(&link_mutex); 373 if (old_prog && link->prog != old_prog) { 374 ret = -EPERM; 375 goto out_unlock; 376 } 377 378 if (link->prog->type != new_prog->type || 379 link->prog->expected_attach_type != new_prog->expected_attach_type || 380 link->prog->aux->attach_btf_id != new_prog->aux->attach_btf_id) { 381 ret = -EINVAL; 382 goto out_unlock; 383 } 384 385 old_prog = xchg(&link->prog, new_prog); 386 bpf_prog_put(old_prog); 387 388 out_unlock: 389 mutex_unlock(&link_mutex); 390 return ret; 391 } 392 393 static void bpf_iter_link_show_fdinfo(const struct bpf_link *link, 394 struct seq_file *seq) 395 { 396 struct bpf_iter_link *iter_link = 397 container_of(link, struct bpf_iter_link, link); 398 bpf_iter_show_fdinfo_t show_fdinfo; 399 400 seq_printf(seq, 401 "target_name:\t%s\n", 402 iter_link->tinfo->reg_info->target); 403 404 show_fdinfo = iter_link->tinfo->reg_info->show_fdinfo; 405 if (show_fdinfo) 406 show_fdinfo(&iter_link->aux, seq); 407 } 408 409 static int bpf_iter_link_fill_link_info(const struct bpf_link *link, 410 struct bpf_link_info *info) 411 { 412 struct bpf_iter_link *iter_link = 413 container_of(link, struct bpf_iter_link, link); 414 char __user *ubuf = u64_to_user_ptr(info->iter.target_name); 415 bpf_iter_fill_link_info_t fill_link_info; 416 u32 ulen = info->iter.target_name_len; 417 const char *target_name; 418 u32 target_len; 419 420 if (!ulen ^ !ubuf) 421 return -EINVAL; 422 423 target_name = iter_link->tinfo->reg_info->target; 424 target_len = strlen(target_name); 425 info->iter.target_name_len = target_len + 1; 426 427 if (ubuf) { 428 if (ulen >= target_len + 1) { 429 if (copy_to_user(ubuf, target_name, target_len + 1)) 430 return -EFAULT; 431 } else { 432 char zero = '\0'; 433 434 if (copy_to_user(ubuf, target_name, ulen - 1)) 435 return -EFAULT; 436 if (put_user(zero, ubuf + ulen - 1)) 437 return -EFAULT; 438 return -ENOSPC; 439 } 440 } 441 442 fill_link_info = iter_link->tinfo->reg_info->fill_link_info; 443 if (fill_link_info) 444 return fill_link_info(&iter_link->aux, info); 445 446 return 0; 447 } 448 449 static const struct bpf_link_ops bpf_iter_link_lops = { 450 .release = bpf_iter_link_release, 451 .dealloc = bpf_iter_link_dealloc, 452 .update_prog = bpf_iter_link_replace, 453 .show_fdinfo = bpf_iter_link_show_fdinfo, 454 .fill_link_info = bpf_iter_link_fill_link_info, 455 }; 456 457 bool bpf_link_is_iter(struct bpf_link *link) 458 { 459 return link->ops == &bpf_iter_link_lops; 460 } 461 462 int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) 463 { 464 union bpf_iter_link_info __user *ulinfo; 465 struct bpf_link_primer link_primer; 466 struct bpf_iter_target_info *tinfo; 467 union bpf_iter_link_info linfo; 468 struct bpf_iter_link *link; 469 u32 prog_btf_id, linfo_len; 470 bool existed = false; 471 int err; 472 473 if (attr->link_create.target_fd || attr->link_create.flags) 474 return -EINVAL; 475 476 memset(&linfo, 0, sizeof(union bpf_iter_link_info)); 477 478 ulinfo = u64_to_user_ptr(attr->link_create.iter_info); 479 linfo_len = attr->link_create.iter_info_len; 480 if (!ulinfo ^ !linfo_len) 481 return -EINVAL; 482 483 if (ulinfo) { 484 err = bpf_check_uarg_tail_zero(ulinfo, sizeof(linfo), 485 linfo_len); 486 if (err) 487 return err; 488 linfo_len = min_t(u32, linfo_len, sizeof(linfo)); 489 if (copy_from_user(&linfo, ulinfo, linfo_len)) 490 return -EFAULT; 491 } 492 493 prog_btf_id = prog->aux->attach_btf_id; 494 mutex_lock(&targets_mutex); 495 list_for_each_entry(tinfo, &targets, list) { 496 if (tinfo->btf_id == prog_btf_id) { 497 existed = true; 498 break; 499 } 500 } 501 mutex_unlock(&targets_mutex); 502 if (!existed) 503 return -ENOENT; 504 505 link = kzalloc(sizeof(*link), GFP_USER | __GFP_NOWARN); 506 if (!link) 507 return -ENOMEM; 508 509 bpf_link_init(&link->link, BPF_LINK_TYPE_ITER, &bpf_iter_link_lops, prog); 510 link->tinfo = tinfo; 511 512 err = bpf_link_prime(&link->link, &link_primer); 513 if (err) { 514 kfree(link); 515 return err; 516 } 517 518 if (tinfo->reg_info->attach_target) { 519 err = tinfo->reg_info->attach_target(prog, &linfo, &link->aux); 520 if (err) { 521 bpf_link_cleanup(&link_primer); 522 return err; 523 } 524 } 525 526 return bpf_link_settle(&link_primer); 527 } 528 529 static void init_seq_meta(struct bpf_iter_priv_data *priv_data, 530 struct bpf_iter_target_info *tinfo, 531 const struct bpf_iter_seq_info *seq_info, 532 struct bpf_prog *prog) 533 { 534 priv_data->tinfo = tinfo; 535 priv_data->seq_info = seq_info; 536 priv_data->prog = prog; 537 priv_data->session_id = atomic64_inc_return(&session_id); 538 priv_data->seq_num = 0; 539 priv_data->done_stop = false; 540 } 541 542 static int prepare_seq_file(struct file *file, struct bpf_iter_link *link, 543 const struct bpf_iter_seq_info *seq_info) 544 { 545 struct bpf_iter_priv_data *priv_data; 546 struct bpf_iter_target_info *tinfo; 547 struct bpf_prog *prog; 548 u32 total_priv_dsize; 549 struct seq_file *seq; 550 int err = 0; 551 552 mutex_lock(&link_mutex); 553 prog = link->link.prog; 554 bpf_prog_inc(prog); 555 mutex_unlock(&link_mutex); 556 557 tinfo = link->tinfo; 558 total_priv_dsize = offsetof(struct bpf_iter_priv_data, target_private) + 559 seq_info->seq_priv_size; 560 priv_data = __seq_open_private(file, seq_info->seq_ops, 561 total_priv_dsize); 562 if (!priv_data) { 563 err = -ENOMEM; 564 goto release_prog; 565 } 566 567 if (seq_info->init_seq_private) { 568 err = seq_info->init_seq_private(priv_data->target_private, &link->aux); 569 if (err) 570 goto release_seq_file; 571 } 572 573 init_seq_meta(priv_data, tinfo, seq_info, prog); 574 seq = file->private_data; 575 seq->private = priv_data->target_private; 576 577 return 0; 578 579 release_seq_file: 580 seq_release_private(file->f_inode, file); 581 file->private_data = NULL; 582 release_prog: 583 bpf_prog_put(prog); 584 return err; 585 } 586 587 int bpf_iter_new_fd(struct bpf_link *link) 588 { 589 struct bpf_iter_link *iter_link; 590 struct file *file; 591 unsigned int flags; 592 int err, fd; 593 594 if (link->ops != &bpf_iter_link_lops) 595 return -EINVAL; 596 597 flags = O_RDONLY | O_CLOEXEC; 598 fd = get_unused_fd_flags(flags); 599 if (fd < 0) 600 return fd; 601 602 file = anon_inode_getfile("bpf_iter", &bpf_iter_fops, NULL, flags); 603 if (IS_ERR(file)) { 604 err = PTR_ERR(file); 605 goto free_fd; 606 } 607 608 iter_link = container_of(link, struct bpf_iter_link, link); 609 err = prepare_seq_file(file, iter_link, __get_seq_info(iter_link)); 610 if (err) 611 goto free_file; 612 613 fd_install(fd, file); 614 return fd; 615 616 free_file: 617 fput(file); 618 free_fd: 619 put_unused_fd(fd); 620 return err; 621 } 622 623 struct bpf_prog *bpf_iter_get_info(struct bpf_iter_meta *meta, bool in_stop) 624 { 625 struct bpf_iter_priv_data *iter_priv; 626 struct seq_file *seq; 627 void *seq_priv; 628 629 seq = meta->seq; 630 if (seq->file->f_op != &bpf_iter_fops) 631 return NULL; 632 633 seq_priv = seq->private; 634 iter_priv = container_of(seq_priv, struct bpf_iter_priv_data, 635 target_private); 636 637 if (in_stop && iter_priv->done_stop) 638 return NULL; 639 640 meta->session_id = iter_priv->session_id; 641 meta->seq_num = iter_priv->seq_num; 642 643 return iter_priv->prog; 644 } 645 646 int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx) 647 { 648 int ret; 649 650 rcu_read_lock(); 651 migrate_disable(); 652 ret = BPF_PROG_RUN(prog, ctx); 653 migrate_enable(); 654 rcu_read_unlock(); 655 656 /* bpf program can only return 0 or 1: 657 * 0 : okay 658 * 1 : retry the same object 659 * The bpf_iter_run_prog() return value 660 * will be seq_ops->show() return value. 661 */ 662 return ret == 0 ? 0 : -EAGAIN; 663 } 664