1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2020 Facebook */ 3 4 #include <linux/fs.h> 5 #include <linux/anon_inodes.h> 6 #include <linux/filter.h> 7 #include <linux/bpf.h> 8 9 struct bpf_iter_target_info { 10 struct list_head list; 11 const struct bpf_iter_reg *reg_info; 12 u32 btf_id; /* cached value */ 13 }; 14 15 struct bpf_iter_link { 16 struct bpf_link link; 17 struct bpf_iter_aux_info aux; 18 struct bpf_iter_target_info *tinfo; 19 }; 20 21 struct bpf_iter_priv_data { 22 struct bpf_iter_target_info *tinfo; 23 const struct bpf_iter_seq_info *seq_info; 24 struct bpf_prog *prog; 25 u64 session_id; 26 u64 seq_num; 27 bool done_stop; 28 u8 target_private[] __aligned(8); 29 }; 30 31 static struct list_head targets = LIST_HEAD_INIT(targets); 32 static DEFINE_MUTEX(targets_mutex); 33 34 /* protect bpf_iter_link changes */ 35 static DEFINE_MUTEX(link_mutex); 36 37 /* incremented on every opened seq_file */ 38 static atomic64_t session_id; 39 40 static int prepare_seq_file(struct file *file, struct bpf_iter_link *link, 41 const struct bpf_iter_seq_info *seq_info); 42 43 static void bpf_iter_inc_seq_num(struct seq_file *seq) 44 { 45 struct bpf_iter_priv_data *iter_priv; 46 47 iter_priv = container_of(seq->private, struct bpf_iter_priv_data, 48 target_private); 49 iter_priv->seq_num++; 50 } 51 52 static void bpf_iter_dec_seq_num(struct seq_file *seq) 53 { 54 struct bpf_iter_priv_data *iter_priv; 55 56 iter_priv = container_of(seq->private, struct bpf_iter_priv_data, 57 target_private); 58 iter_priv->seq_num--; 59 } 60 61 static void bpf_iter_done_stop(struct seq_file *seq) 62 { 63 struct bpf_iter_priv_data *iter_priv; 64 65 iter_priv = container_of(seq->private, struct bpf_iter_priv_data, 66 target_private); 67 iter_priv->done_stop = true; 68 } 69 70 /* bpf_seq_read, a customized and simpler version for bpf iterator. 71 * no_llseek is assumed for this file. 72 * The following are differences from seq_read(): 73 * . fixed buffer size (PAGE_SIZE) 74 * . assuming no_llseek 75 * . stop() may call bpf program, handling potential overflow there 76 */ 77 static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size, 78 loff_t *ppos) 79 { 80 struct seq_file *seq = file->private_data; 81 size_t n, offs, copied = 0; 82 int err = 0; 83 void *p; 84 85 mutex_lock(&seq->lock); 86 87 if (!seq->buf) { 88 seq->size = PAGE_SIZE; 89 seq->buf = kmalloc(seq->size, GFP_KERNEL); 90 if (!seq->buf) { 91 err = -ENOMEM; 92 goto done; 93 } 94 } 95 96 if (seq->count) { 97 n = min(seq->count, size); 98 err = copy_to_user(buf, seq->buf + seq->from, n); 99 if (err) { 100 err = -EFAULT; 101 goto done; 102 } 103 seq->count -= n; 104 seq->from += n; 105 copied = n; 106 goto done; 107 } 108 109 seq->from = 0; 110 p = seq->op->start(seq, &seq->index); 111 if (!p) 112 goto stop; 113 if (IS_ERR(p)) { 114 err = PTR_ERR(p); 115 seq->op->stop(seq, p); 116 seq->count = 0; 117 goto done; 118 } 119 120 err = seq->op->show(seq, p); 121 if (err > 0) { 122 /* object is skipped, decrease seq_num, so next 123 * valid object can reuse the same seq_num. 124 */ 125 bpf_iter_dec_seq_num(seq); 126 seq->count = 0; 127 } else if (err < 0 || seq_has_overflowed(seq)) { 128 if (!err) 129 err = -E2BIG; 130 seq->op->stop(seq, p); 131 seq->count = 0; 132 goto done; 133 } 134 135 while (1) { 136 loff_t pos = seq->index; 137 138 offs = seq->count; 139 p = seq->op->next(seq, p, &seq->index); 140 if (pos == seq->index) { 141 pr_info_ratelimited("buggy seq_file .next function %ps " 142 "did not updated position index\n", 143 seq->op->next); 144 seq->index++; 145 } 146 147 if (IS_ERR_OR_NULL(p)) 148 break; 149 150 /* got a valid next object, increase seq_num */ 151 bpf_iter_inc_seq_num(seq); 152 153 if (seq->count >= size) 154 break; 155 156 err = seq->op->show(seq, p); 157 if (err > 0) { 158 bpf_iter_dec_seq_num(seq); 159 seq->count = offs; 160 } else if (err < 0 || seq_has_overflowed(seq)) { 161 seq->count = offs; 162 if (offs == 0) { 163 if (!err) 164 err = -E2BIG; 165 seq->op->stop(seq, p); 166 goto done; 167 } 168 break; 169 } 170 } 171 stop: 172 offs = seq->count; 173 /* bpf program called if !p */ 174 seq->op->stop(seq, p); 175 if (!p) { 176 if (!seq_has_overflowed(seq)) { 177 bpf_iter_done_stop(seq); 178 } else { 179 seq->count = offs; 180 if (offs == 0) { 181 err = -E2BIG; 182 goto done; 183 } 184 } 185 } 186 187 n = min(seq->count, size); 188 err = copy_to_user(buf, seq->buf, n); 189 if (err) { 190 err = -EFAULT; 191 goto done; 192 } 193 copied = n; 194 seq->count -= n; 195 seq->from = n; 196 done: 197 if (!copied) 198 copied = err; 199 else 200 *ppos += copied; 201 mutex_unlock(&seq->lock); 202 return copied; 203 } 204 205 static const struct bpf_iter_seq_info * 206 __get_seq_info(struct bpf_iter_link *link) 207 { 208 const struct bpf_iter_seq_info *seq_info; 209 210 if (link->aux.map) { 211 seq_info = link->aux.map->ops->iter_seq_info; 212 if (seq_info) 213 return seq_info; 214 } 215 216 return link->tinfo->reg_info->seq_info; 217 } 218 219 static int iter_open(struct inode *inode, struct file *file) 220 { 221 struct bpf_iter_link *link = inode->i_private; 222 223 return prepare_seq_file(file, link, __get_seq_info(link)); 224 } 225 226 static int iter_release(struct inode *inode, struct file *file) 227 { 228 struct bpf_iter_priv_data *iter_priv; 229 struct seq_file *seq; 230 231 seq = file->private_data; 232 if (!seq) 233 return 0; 234 235 iter_priv = container_of(seq->private, struct bpf_iter_priv_data, 236 target_private); 237 238 if (iter_priv->seq_info->fini_seq_private) 239 iter_priv->seq_info->fini_seq_private(seq->private); 240 241 bpf_prog_put(iter_priv->prog); 242 seq->private = iter_priv; 243 244 return seq_release_private(inode, file); 245 } 246 247 const struct file_operations bpf_iter_fops = { 248 .open = iter_open, 249 .llseek = no_llseek, 250 .read = bpf_seq_read, 251 .release = iter_release, 252 }; 253 254 /* The argument reg_info will be cached in bpf_iter_target_info. 255 * The common practice is to declare target reg_info as 256 * a const static variable and passed as an argument to 257 * bpf_iter_reg_target(). 258 */ 259 int bpf_iter_reg_target(const struct bpf_iter_reg *reg_info) 260 { 261 struct bpf_iter_target_info *tinfo; 262 263 tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL); 264 if (!tinfo) 265 return -ENOMEM; 266 267 tinfo->reg_info = reg_info; 268 INIT_LIST_HEAD(&tinfo->list); 269 270 mutex_lock(&targets_mutex); 271 list_add(&tinfo->list, &targets); 272 mutex_unlock(&targets_mutex); 273 274 return 0; 275 } 276 277 void bpf_iter_unreg_target(const struct bpf_iter_reg *reg_info) 278 { 279 struct bpf_iter_target_info *tinfo; 280 bool found = false; 281 282 mutex_lock(&targets_mutex); 283 list_for_each_entry(tinfo, &targets, list) { 284 if (reg_info == tinfo->reg_info) { 285 list_del(&tinfo->list); 286 kfree(tinfo); 287 found = true; 288 break; 289 } 290 } 291 mutex_unlock(&targets_mutex); 292 293 WARN_ON(found == false); 294 } 295 296 static void cache_btf_id(struct bpf_iter_target_info *tinfo, 297 struct bpf_prog *prog) 298 { 299 tinfo->btf_id = prog->aux->attach_btf_id; 300 } 301 302 bool bpf_iter_prog_supported(struct bpf_prog *prog) 303 { 304 const char *attach_fname = prog->aux->attach_func_name; 305 u32 prog_btf_id = prog->aux->attach_btf_id; 306 const char *prefix = BPF_ITER_FUNC_PREFIX; 307 struct bpf_iter_target_info *tinfo; 308 int prefix_len = strlen(prefix); 309 bool supported = false; 310 311 if (strncmp(attach_fname, prefix, prefix_len)) 312 return false; 313 314 mutex_lock(&targets_mutex); 315 list_for_each_entry(tinfo, &targets, list) { 316 if (tinfo->btf_id && tinfo->btf_id == prog_btf_id) { 317 supported = true; 318 break; 319 } 320 if (!strcmp(attach_fname + prefix_len, tinfo->reg_info->target)) { 321 cache_btf_id(tinfo, prog); 322 supported = true; 323 break; 324 } 325 } 326 mutex_unlock(&targets_mutex); 327 328 if (supported) { 329 prog->aux->ctx_arg_info_size = tinfo->reg_info->ctx_arg_info_size; 330 prog->aux->ctx_arg_info = tinfo->reg_info->ctx_arg_info; 331 } 332 333 return supported; 334 } 335 336 static void bpf_iter_link_release(struct bpf_link *link) 337 { 338 struct bpf_iter_link *iter_link = 339 container_of(link, struct bpf_iter_link, link); 340 341 if (iter_link->tinfo->reg_info->detach_target) 342 iter_link->tinfo->reg_info->detach_target(&iter_link->aux); 343 } 344 345 static void bpf_iter_link_dealloc(struct bpf_link *link) 346 { 347 struct bpf_iter_link *iter_link = 348 container_of(link, struct bpf_iter_link, link); 349 350 kfree(iter_link); 351 } 352 353 static int bpf_iter_link_replace(struct bpf_link *link, 354 struct bpf_prog *new_prog, 355 struct bpf_prog *old_prog) 356 { 357 int ret = 0; 358 359 mutex_lock(&link_mutex); 360 if (old_prog && link->prog != old_prog) { 361 ret = -EPERM; 362 goto out_unlock; 363 } 364 365 if (link->prog->type != new_prog->type || 366 link->prog->expected_attach_type != new_prog->expected_attach_type || 367 link->prog->aux->attach_btf_id != new_prog->aux->attach_btf_id) { 368 ret = -EINVAL; 369 goto out_unlock; 370 } 371 372 old_prog = xchg(&link->prog, new_prog); 373 bpf_prog_put(old_prog); 374 375 out_unlock: 376 mutex_unlock(&link_mutex); 377 return ret; 378 } 379 380 static const struct bpf_link_ops bpf_iter_link_lops = { 381 .release = bpf_iter_link_release, 382 .dealloc = bpf_iter_link_dealloc, 383 .update_prog = bpf_iter_link_replace, 384 }; 385 386 bool bpf_link_is_iter(struct bpf_link *link) 387 { 388 return link->ops == &bpf_iter_link_lops; 389 } 390 391 int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) 392 { 393 union bpf_iter_link_info __user *ulinfo; 394 struct bpf_link_primer link_primer; 395 struct bpf_iter_target_info *tinfo; 396 union bpf_iter_link_info linfo; 397 struct bpf_iter_link *link; 398 u32 prog_btf_id, linfo_len; 399 bool existed = false; 400 int err; 401 402 if (attr->link_create.target_fd || attr->link_create.flags) 403 return -EINVAL; 404 405 memset(&linfo, 0, sizeof(union bpf_iter_link_info)); 406 407 ulinfo = u64_to_user_ptr(attr->link_create.iter_info); 408 linfo_len = attr->link_create.iter_info_len; 409 if (!ulinfo ^ !linfo_len) 410 return -EINVAL; 411 412 if (ulinfo) { 413 err = bpf_check_uarg_tail_zero(ulinfo, sizeof(linfo), 414 linfo_len); 415 if (err) 416 return err; 417 linfo_len = min_t(u32, linfo_len, sizeof(linfo)); 418 if (copy_from_user(&linfo, ulinfo, linfo_len)) 419 return -EFAULT; 420 } 421 422 prog_btf_id = prog->aux->attach_btf_id; 423 mutex_lock(&targets_mutex); 424 list_for_each_entry(tinfo, &targets, list) { 425 if (tinfo->btf_id == prog_btf_id) { 426 existed = true; 427 break; 428 } 429 } 430 mutex_unlock(&targets_mutex); 431 if (!existed) 432 return -ENOENT; 433 434 link = kzalloc(sizeof(*link), GFP_USER | __GFP_NOWARN); 435 if (!link) 436 return -ENOMEM; 437 438 bpf_link_init(&link->link, BPF_LINK_TYPE_ITER, &bpf_iter_link_lops, prog); 439 link->tinfo = tinfo; 440 441 err = bpf_link_prime(&link->link, &link_primer); 442 if (err) { 443 kfree(link); 444 return err; 445 } 446 447 if (tinfo->reg_info->attach_target) { 448 err = tinfo->reg_info->attach_target(prog, &linfo, &link->aux); 449 if (err) { 450 bpf_link_cleanup(&link_primer); 451 return err; 452 } 453 } 454 455 return bpf_link_settle(&link_primer); 456 } 457 458 static void init_seq_meta(struct bpf_iter_priv_data *priv_data, 459 struct bpf_iter_target_info *tinfo, 460 const struct bpf_iter_seq_info *seq_info, 461 struct bpf_prog *prog) 462 { 463 priv_data->tinfo = tinfo; 464 priv_data->seq_info = seq_info; 465 priv_data->prog = prog; 466 priv_data->session_id = atomic64_inc_return(&session_id); 467 priv_data->seq_num = 0; 468 priv_data->done_stop = false; 469 } 470 471 static int prepare_seq_file(struct file *file, struct bpf_iter_link *link, 472 const struct bpf_iter_seq_info *seq_info) 473 { 474 struct bpf_iter_priv_data *priv_data; 475 struct bpf_iter_target_info *tinfo; 476 struct bpf_prog *prog; 477 u32 total_priv_dsize; 478 struct seq_file *seq; 479 int err = 0; 480 481 mutex_lock(&link_mutex); 482 prog = link->link.prog; 483 bpf_prog_inc(prog); 484 mutex_unlock(&link_mutex); 485 486 tinfo = link->tinfo; 487 total_priv_dsize = offsetof(struct bpf_iter_priv_data, target_private) + 488 seq_info->seq_priv_size; 489 priv_data = __seq_open_private(file, seq_info->seq_ops, 490 total_priv_dsize); 491 if (!priv_data) { 492 err = -ENOMEM; 493 goto release_prog; 494 } 495 496 if (seq_info->init_seq_private) { 497 err = seq_info->init_seq_private(priv_data->target_private, &link->aux); 498 if (err) 499 goto release_seq_file; 500 } 501 502 init_seq_meta(priv_data, tinfo, seq_info, prog); 503 seq = file->private_data; 504 seq->private = priv_data->target_private; 505 506 return 0; 507 508 release_seq_file: 509 seq_release_private(file->f_inode, file); 510 file->private_data = NULL; 511 release_prog: 512 bpf_prog_put(prog); 513 return err; 514 } 515 516 int bpf_iter_new_fd(struct bpf_link *link) 517 { 518 struct bpf_iter_link *iter_link; 519 struct file *file; 520 unsigned int flags; 521 int err, fd; 522 523 if (link->ops != &bpf_iter_link_lops) 524 return -EINVAL; 525 526 flags = O_RDONLY | O_CLOEXEC; 527 fd = get_unused_fd_flags(flags); 528 if (fd < 0) 529 return fd; 530 531 file = anon_inode_getfile("bpf_iter", &bpf_iter_fops, NULL, flags); 532 if (IS_ERR(file)) { 533 err = PTR_ERR(file); 534 goto free_fd; 535 } 536 537 iter_link = container_of(link, struct bpf_iter_link, link); 538 err = prepare_seq_file(file, iter_link, __get_seq_info(iter_link)); 539 if (err) 540 goto free_file; 541 542 fd_install(fd, file); 543 return fd; 544 545 free_file: 546 fput(file); 547 free_fd: 548 put_unused_fd(fd); 549 return err; 550 } 551 552 struct bpf_prog *bpf_iter_get_info(struct bpf_iter_meta *meta, bool in_stop) 553 { 554 struct bpf_iter_priv_data *iter_priv; 555 struct seq_file *seq; 556 void *seq_priv; 557 558 seq = meta->seq; 559 if (seq->file->f_op != &bpf_iter_fops) 560 return NULL; 561 562 seq_priv = seq->private; 563 iter_priv = container_of(seq_priv, struct bpf_iter_priv_data, 564 target_private); 565 566 if (in_stop && iter_priv->done_stop) 567 return NULL; 568 569 meta->session_id = iter_priv->session_id; 570 meta->seq_num = iter_priv->seq_num; 571 572 return iter_priv->prog; 573 } 574 575 int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx) 576 { 577 int ret; 578 579 rcu_read_lock(); 580 migrate_disable(); 581 ret = BPF_PROG_RUN(prog, ctx); 582 migrate_enable(); 583 rcu_read_unlock(); 584 585 /* bpf program can only return 0 or 1: 586 * 0 : okay 587 * 1 : retry the same object 588 * The bpf_iter_run_prog() return value 589 * will be seq_ops->show() return value. 590 */ 591 return ret == 0 ? 0 : -EAGAIN; 592 } 593