1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2020 Facebook */ 3 4 #include <linux/fs.h> 5 #include <linux/anon_inodes.h> 6 #include <linux/filter.h> 7 #include <linux/bpf.h> 8 9 struct bpf_iter_target_info { 10 struct list_head list; 11 const struct bpf_iter_reg *reg_info; 12 u32 btf_id; /* cached value */ 13 }; 14 15 struct bpf_iter_link { 16 struct bpf_link link; 17 struct bpf_iter_aux_info aux; 18 struct bpf_iter_target_info *tinfo; 19 }; 20 21 struct bpf_iter_priv_data { 22 struct bpf_iter_target_info *tinfo; 23 const struct bpf_iter_seq_info *seq_info; 24 struct bpf_prog *prog; 25 u64 session_id; 26 u64 seq_num; 27 bool done_stop; 28 u8 target_private[] __aligned(8); 29 }; 30 31 static struct list_head targets = LIST_HEAD_INIT(targets); 32 static DEFINE_MUTEX(targets_mutex); 33 34 /* protect bpf_iter_link changes */ 35 static DEFINE_MUTEX(link_mutex); 36 37 /* incremented on every opened seq_file */ 38 static atomic64_t session_id; 39 40 static int prepare_seq_file(struct file *file, struct bpf_iter_link *link, 41 const struct bpf_iter_seq_info *seq_info); 42 43 static void bpf_iter_inc_seq_num(struct seq_file *seq) 44 { 45 struct bpf_iter_priv_data *iter_priv; 46 47 iter_priv = container_of(seq->private, struct bpf_iter_priv_data, 48 target_private); 49 iter_priv->seq_num++; 50 } 51 52 static void bpf_iter_dec_seq_num(struct seq_file *seq) 53 { 54 struct bpf_iter_priv_data *iter_priv; 55 56 iter_priv = container_of(seq->private, struct bpf_iter_priv_data, 57 target_private); 58 iter_priv->seq_num--; 59 } 60 61 static void bpf_iter_done_stop(struct seq_file *seq) 62 { 63 struct bpf_iter_priv_data *iter_priv; 64 65 iter_priv = container_of(seq->private, struct bpf_iter_priv_data, 66 target_private); 67 iter_priv->done_stop = true; 68 } 69 70 /* maximum visited objects before bailing out */ 71 #define MAX_ITER_OBJECTS 1000000 72 73 /* bpf_seq_read, a customized and simpler version for bpf iterator. 74 * no_llseek is assumed for this file. 75 * The following are differences from seq_read(): 76 * . fixed buffer size (PAGE_SIZE) 77 * . assuming no_llseek 78 * . stop() may call bpf program, handling potential overflow there 79 */ 80 static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size, 81 loff_t *ppos) 82 { 83 struct seq_file *seq = file->private_data; 84 size_t n, offs, copied = 0; 85 int err = 0, num_objs = 0; 86 void *p; 87 88 mutex_lock(&seq->lock); 89 90 if (!seq->buf) { 91 seq->size = PAGE_SIZE; 92 seq->buf = kmalloc(seq->size, GFP_KERNEL); 93 if (!seq->buf) { 94 err = -ENOMEM; 95 goto done; 96 } 97 } 98 99 if (seq->count) { 100 n = min(seq->count, size); 101 err = copy_to_user(buf, seq->buf + seq->from, n); 102 if (err) { 103 err = -EFAULT; 104 goto done; 105 } 106 seq->count -= n; 107 seq->from += n; 108 copied = n; 109 goto done; 110 } 111 112 seq->from = 0; 113 p = seq->op->start(seq, &seq->index); 114 if (!p) 115 goto stop; 116 if (IS_ERR(p)) { 117 err = PTR_ERR(p); 118 seq->op->stop(seq, p); 119 seq->count = 0; 120 goto done; 121 } 122 123 err = seq->op->show(seq, p); 124 if (err > 0) { 125 /* object is skipped, decrease seq_num, so next 126 * valid object can reuse the same seq_num. 127 */ 128 bpf_iter_dec_seq_num(seq); 129 seq->count = 0; 130 } else if (err < 0 || seq_has_overflowed(seq)) { 131 if (!err) 132 err = -E2BIG; 133 seq->op->stop(seq, p); 134 seq->count = 0; 135 goto done; 136 } 137 138 while (1) { 139 loff_t pos = seq->index; 140 141 num_objs++; 142 offs = seq->count; 143 p = seq->op->next(seq, p, &seq->index); 144 if (pos == seq->index) { 145 pr_info_ratelimited("buggy seq_file .next function %ps " 146 "did not updated position index\n", 147 seq->op->next); 148 seq->index++; 149 } 150 151 if (IS_ERR_OR_NULL(p)) 152 break; 153 154 /* got a valid next object, increase seq_num */ 155 bpf_iter_inc_seq_num(seq); 156 157 if (seq->count >= size) 158 break; 159 160 if (num_objs >= MAX_ITER_OBJECTS) { 161 if (offs == 0) { 162 err = -EAGAIN; 163 seq->op->stop(seq, p); 164 goto done; 165 } 166 break; 167 } 168 169 err = seq->op->show(seq, p); 170 if (err > 0) { 171 bpf_iter_dec_seq_num(seq); 172 seq->count = offs; 173 } else if (err < 0 || seq_has_overflowed(seq)) { 174 seq->count = offs; 175 if (offs == 0) { 176 if (!err) 177 err = -E2BIG; 178 seq->op->stop(seq, p); 179 goto done; 180 } 181 break; 182 } 183 } 184 stop: 185 offs = seq->count; 186 /* bpf program called if !p */ 187 seq->op->stop(seq, p); 188 if (!p) { 189 if (!seq_has_overflowed(seq)) { 190 bpf_iter_done_stop(seq); 191 } else { 192 seq->count = offs; 193 if (offs == 0) { 194 err = -E2BIG; 195 goto done; 196 } 197 } 198 } 199 200 n = min(seq->count, size); 201 err = copy_to_user(buf, seq->buf, n); 202 if (err) { 203 err = -EFAULT; 204 goto done; 205 } 206 copied = n; 207 seq->count -= n; 208 seq->from = n; 209 done: 210 if (!copied) 211 copied = err; 212 else 213 *ppos += copied; 214 mutex_unlock(&seq->lock); 215 return copied; 216 } 217 218 static const struct bpf_iter_seq_info * 219 __get_seq_info(struct bpf_iter_link *link) 220 { 221 const struct bpf_iter_seq_info *seq_info; 222 223 if (link->aux.map) { 224 seq_info = link->aux.map->ops->iter_seq_info; 225 if (seq_info) 226 return seq_info; 227 } 228 229 return link->tinfo->reg_info->seq_info; 230 } 231 232 static int iter_open(struct inode *inode, struct file *file) 233 { 234 struct bpf_iter_link *link = inode->i_private; 235 236 return prepare_seq_file(file, link, __get_seq_info(link)); 237 } 238 239 static int iter_release(struct inode *inode, struct file *file) 240 { 241 struct bpf_iter_priv_data *iter_priv; 242 struct seq_file *seq; 243 244 seq = file->private_data; 245 if (!seq) 246 return 0; 247 248 iter_priv = container_of(seq->private, struct bpf_iter_priv_data, 249 target_private); 250 251 if (iter_priv->seq_info->fini_seq_private) 252 iter_priv->seq_info->fini_seq_private(seq->private); 253 254 bpf_prog_put(iter_priv->prog); 255 seq->private = iter_priv; 256 257 return seq_release_private(inode, file); 258 } 259 260 const struct file_operations bpf_iter_fops = { 261 .open = iter_open, 262 .llseek = no_llseek, 263 .read = bpf_seq_read, 264 .release = iter_release, 265 }; 266 267 /* The argument reg_info will be cached in bpf_iter_target_info. 268 * The common practice is to declare target reg_info as 269 * a const static variable and passed as an argument to 270 * bpf_iter_reg_target(). 271 */ 272 int bpf_iter_reg_target(const struct bpf_iter_reg *reg_info) 273 { 274 struct bpf_iter_target_info *tinfo; 275 276 tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL); 277 if (!tinfo) 278 return -ENOMEM; 279 280 tinfo->reg_info = reg_info; 281 INIT_LIST_HEAD(&tinfo->list); 282 283 mutex_lock(&targets_mutex); 284 list_add(&tinfo->list, &targets); 285 mutex_unlock(&targets_mutex); 286 287 return 0; 288 } 289 290 void bpf_iter_unreg_target(const struct bpf_iter_reg *reg_info) 291 { 292 struct bpf_iter_target_info *tinfo; 293 bool found = false; 294 295 mutex_lock(&targets_mutex); 296 list_for_each_entry(tinfo, &targets, list) { 297 if (reg_info == tinfo->reg_info) { 298 list_del(&tinfo->list); 299 kfree(tinfo); 300 found = true; 301 break; 302 } 303 } 304 mutex_unlock(&targets_mutex); 305 306 WARN_ON(found == false); 307 } 308 309 static void cache_btf_id(struct bpf_iter_target_info *tinfo, 310 struct bpf_prog *prog) 311 { 312 tinfo->btf_id = prog->aux->attach_btf_id; 313 } 314 315 bool bpf_iter_prog_supported(struct bpf_prog *prog) 316 { 317 const char *attach_fname = prog->aux->attach_func_name; 318 u32 prog_btf_id = prog->aux->attach_btf_id; 319 const char *prefix = BPF_ITER_FUNC_PREFIX; 320 struct bpf_iter_target_info *tinfo; 321 int prefix_len = strlen(prefix); 322 bool supported = false; 323 324 if (strncmp(attach_fname, prefix, prefix_len)) 325 return false; 326 327 mutex_lock(&targets_mutex); 328 list_for_each_entry(tinfo, &targets, list) { 329 if (tinfo->btf_id && tinfo->btf_id == prog_btf_id) { 330 supported = true; 331 break; 332 } 333 if (!strcmp(attach_fname + prefix_len, tinfo->reg_info->target)) { 334 cache_btf_id(tinfo, prog); 335 supported = true; 336 break; 337 } 338 } 339 mutex_unlock(&targets_mutex); 340 341 if (supported) { 342 prog->aux->ctx_arg_info_size = tinfo->reg_info->ctx_arg_info_size; 343 prog->aux->ctx_arg_info = tinfo->reg_info->ctx_arg_info; 344 } 345 346 return supported; 347 } 348 349 static void bpf_iter_link_release(struct bpf_link *link) 350 { 351 struct bpf_iter_link *iter_link = 352 container_of(link, struct bpf_iter_link, link); 353 354 if (iter_link->tinfo->reg_info->detach_target) 355 iter_link->tinfo->reg_info->detach_target(&iter_link->aux); 356 } 357 358 static void bpf_iter_link_dealloc(struct bpf_link *link) 359 { 360 struct bpf_iter_link *iter_link = 361 container_of(link, struct bpf_iter_link, link); 362 363 kfree(iter_link); 364 } 365 366 static int bpf_iter_link_replace(struct bpf_link *link, 367 struct bpf_prog *new_prog, 368 struct bpf_prog *old_prog) 369 { 370 int ret = 0; 371 372 mutex_lock(&link_mutex); 373 if (old_prog && link->prog != old_prog) { 374 ret = -EPERM; 375 goto out_unlock; 376 } 377 378 if (link->prog->type != new_prog->type || 379 link->prog->expected_attach_type != new_prog->expected_attach_type || 380 link->prog->aux->attach_btf_id != new_prog->aux->attach_btf_id) { 381 ret = -EINVAL; 382 goto out_unlock; 383 } 384 385 old_prog = xchg(&link->prog, new_prog); 386 bpf_prog_put(old_prog); 387 388 out_unlock: 389 mutex_unlock(&link_mutex); 390 return ret; 391 } 392 393 static const struct bpf_link_ops bpf_iter_link_lops = { 394 .release = bpf_iter_link_release, 395 .dealloc = bpf_iter_link_dealloc, 396 .update_prog = bpf_iter_link_replace, 397 }; 398 399 bool bpf_link_is_iter(struct bpf_link *link) 400 { 401 return link->ops == &bpf_iter_link_lops; 402 } 403 404 int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) 405 { 406 union bpf_iter_link_info __user *ulinfo; 407 struct bpf_link_primer link_primer; 408 struct bpf_iter_target_info *tinfo; 409 union bpf_iter_link_info linfo; 410 struct bpf_iter_link *link; 411 u32 prog_btf_id, linfo_len; 412 bool existed = false; 413 int err; 414 415 if (attr->link_create.target_fd || attr->link_create.flags) 416 return -EINVAL; 417 418 memset(&linfo, 0, sizeof(union bpf_iter_link_info)); 419 420 ulinfo = u64_to_user_ptr(attr->link_create.iter_info); 421 linfo_len = attr->link_create.iter_info_len; 422 if (!ulinfo ^ !linfo_len) 423 return -EINVAL; 424 425 if (ulinfo) { 426 err = bpf_check_uarg_tail_zero(ulinfo, sizeof(linfo), 427 linfo_len); 428 if (err) 429 return err; 430 linfo_len = min_t(u32, linfo_len, sizeof(linfo)); 431 if (copy_from_user(&linfo, ulinfo, linfo_len)) 432 return -EFAULT; 433 } 434 435 prog_btf_id = prog->aux->attach_btf_id; 436 mutex_lock(&targets_mutex); 437 list_for_each_entry(tinfo, &targets, list) { 438 if (tinfo->btf_id == prog_btf_id) { 439 existed = true; 440 break; 441 } 442 } 443 mutex_unlock(&targets_mutex); 444 if (!existed) 445 return -ENOENT; 446 447 link = kzalloc(sizeof(*link), GFP_USER | __GFP_NOWARN); 448 if (!link) 449 return -ENOMEM; 450 451 bpf_link_init(&link->link, BPF_LINK_TYPE_ITER, &bpf_iter_link_lops, prog); 452 link->tinfo = tinfo; 453 454 err = bpf_link_prime(&link->link, &link_primer); 455 if (err) { 456 kfree(link); 457 return err; 458 } 459 460 if (tinfo->reg_info->attach_target) { 461 err = tinfo->reg_info->attach_target(prog, &linfo, &link->aux); 462 if (err) { 463 bpf_link_cleanup(&link_primer); 464 return err; 465 } 466 } 467 468 return bpf_link_settle(&link_primer); 469 } 470 471 static void init_seq_meta(struct bpf_iter_priv_data *priv_data, 472 struct bpf_iter_target_info *tinfo, 473 const struct bpf_iter_seq_info *seq_info, 474 struct bpf_prog *prog) 475 { 476 priv_data->tinfo = tinfo; 477 priv_data->seq_info = seq_info; 478 priv_data->prog = prog; 479 priv_data->session_id = atomic64_inc_return(&session_id); 480 priv_data->seq_num = 0; 481 priv_data->done_stop = false; 482 } 483 484 static int prepare_seq_file(struct file *file, struct bpf_iter_link *link, 485 const struct bpf_iter_seq_info *seq_info) 486 { 487 struct bpf_iter_priv_data *priv_data; 488 struct bpf_iter_target_info *tinfo; 489 struct bpf_prog *prog; 490 u32 total_priv_dsize; 491 struct seq_file *seq; 492 int err = 0; 493 494 mutex_lock(&link_mutex); 495 prog = link->link.prog; 496 bpf_prog_inc(prog); 497 mutex_unlock(&link_mutex); 498 499 tinfo = link->tinfo; 500 total_priv_dsize = offsetof(struct bpf_iter_priv_data, target_private) + 501 seq_info->seq_priv_size; 502 priv_data = __seq_open_private(file, seq_info->seq_ops, 503 total_priv_dsize); 504 if (!priv_data) { 505 err = -ENOMEM; 506 goto release_prog; 507 } 508 509 if (seq_info->init_seq_private) { 510 err = seq_info->init_seq_private(priv_data->target_private, &link->aux); 511 if (err) 512 goto release_seq_file; 513 } 514 515 init_seq_meta(priv_data, tinfo, seq_info, prog); 516 seq = file->private_data; 517 seq->private = priv_data->target_private; 518 519 return 0; 520 521 release_seq_file: 522 seq_release_private(file->f_inode, file); 523 file->private_data = NULL; 524 release_prog: 525 bpf_prog_put(prog); 526 return err; 527 } 528 529 int bpf_iter_new_fd(struct bpf_link *link) 530 { 531 struct bpf_iter_link *iter_link; 532 struct file *file; 533 unsigned int flags; 534 int err, fd; 535 536 if (link->ops != &bpf_iter_link_lops) 537 return -EINVAL; 538 539 flags = O_RDONLY | O_CLOEXEC; 540 fd = get_unused_fd_flags(flags); 541 if (fd < 0) 542 return fd; 543 544 file = anon_inode_getfile("bpf_iter", &bpf_iter_fops, NULL, flags); 545 if (IS_ERR(file)) { 546 err = PTR_ERR(file); 547 goto free_fd; 548 } 549 550 iter_link = container_of(link, struct bpf_iter_link, link); 551 err = prepare_seq_file(file, iter_link, __get_seq_info(iter_link)); 552 if (err) 553 goto free_file; 554 555 fd_install(fd, file); 556 return fd; 557 558 free_file: 559 fput(file); 560 free_fd: 561 put_unused_fd(fd); 562 return err; 563 } 564 565 struct bpf_prog *bpf_iter_get_info(struct bpf_iter_meta *meta, bool in_stop) 566 { 567 struct bpf_iter_priv_data *iter_priv; 568 struct seq_file *seq; 569 void *seq_priv; 570 571 seq = meta->seq; 572 if (seq->file->f_op != &bpf_iter_fops) 573 return NULL; 574 575 seq_priv = seq->private; 576 iter_priv = container_of(seq_priv, struct bpf_iter_priv_data, 577 target_private); 578 579 if (in_stop && iter_priv->done_stop) 580 return NULL; 581 582 meta->session_id = iter_priv->session_id; 583 meta->seq_num = iter_priv->seq_num; 584 585 return iter_priv->prog; 586 } 587 588 int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx) 589 { 590 int ret; 591 592 rcu_read_lock(); 593 migrate_disable(); 594 ret = BPF_PROG_RUN(prog, ctx); 595 migrate_enable(); 596 rcu_read_unlock(); 597 598 /* bpf program can only return 0 or 1: 599 * 0 : okay 600 * 1 : retry the same object 601 * The bpf_iter_run_prog() return value 602 * will be seq_ops->show() return value. 603 */ 604 return ret == 0 ? 0 : -EAGAIN; 605 } 606