1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2020 Facebook */ 3 4 #include <linux/init.h> 5 #include <linux/namei.h> 6 #include <linux/pid_namespace.h> 7 #include <linux/fs.h> 8 #include <linux/fdtable.h> 9 #include <linux/filter.h> 10 #include <linux/btf_ids.h> 11 #include "mmap_unlock_work.h" 12 13 static const char * const iter_task_type_names[] = { 14 "ALL", 15 "TID", 16 "PID", 17 }; 18 19 struct bpf_iter_seq_task_common { 20 struct pid_namespace *ns; 21 enum bpf_iter_task_type type; 22 u32 pid; 23 u32 pid_visiting; 24 }; 25 26 struct bpf_iter_seq_task_info { 27 /* The first field must be struct bpf_iter_seq_task_common. 28 * this is assumed by {init, fini}_seq_pidns() callback functions. 29 */ 30 struct bpf_iter_seq_task_common common; 31 u32 tid; 32 }; 33 34 static struct task_struct *task_group_seq_get_next(struct bpf_iter_seq_task_common *common, 35 u32 *tid, 36 bool skip_if_dup_files) 37 { 38 struct task_struct *task, *next_task; 39 struct pid *pid; 40 u32 saved_tid; 41 42 if (!*tid) { 43 /* The first time, the iterator calls this function. */ 44 pid = find_pid_ns(common->pid, common->ns); 45 if (!pid) 46 return NULL; 47 48 task = get_pid_task(pid, PIDTYPE_TGID); 49 if (!task) 50 return NULL; 51 52 *tid = common->pid; 53 common->pid_visiting = common->pid; 54 55 return task; 56 } 57 58 /* If the control returns to user space and comes back to the 59 * kernel again, *tid and common->pid_visiting should be the 60 * same for task_seq_start() to pick up the correct task. 61 */ 62 if (*tid == common->pid_visiting) { 63 pid = find_pid_ns(common->pid_visiting, common->ns); 64 task = get_pid_task(pid, PIDTYPE_PID); 65 66 return task; 67 } 68 69 pid = find_pid_ns(common->pid_visiting, common->ns); 70 if (!pid) 71 return NULL; 72 73 task = get_pid_task(pid, PIDTYPE_PID); 74 if (!task) 75 return NULL; 76 77 retry: 78 if (!pid_alive(task)) { 79 put_task_struct(task); 80 return NULL; 81 } 82 83 next_task = next_thread(task); 84 put_task_struct(task); 85 if (!next_task) 86 return NULL; 87 88 saved_tid = *tid; 89 *tid = __task_pid_nr_ns(next_task, PIDTYPE_PID, common->ns); 90 if (!*tid || *tid == common->pid) { 91 /* Run out of tasks of a process. The tasks of a 92 * thread_group are linked as circular linked list. 93 */ 94 *tid = saved_tid; 95 return NULL; 96 } 97 98 get_task_struct(next_task); 99 common->pid_visiting = *tid; 100 101 if (skip_if_dup_files && task->files == task->group_leader->files) { 102 task = next_task; 103 goto retry; 104 } 105 106 return next_task; 107 } 108 109 static struct task_struct *task_seq_get_next(struct bpf_iter_seq_task_common *common, 110 u32 *tid, 111 bool skip_if_dup_files) 112 { 113 struct task_struct *task = NULL; 114 struct pid *pid; 115 116 if (common->type == BPF_TASK_ITER_TID) { 117 if (*tid && *tid != common->pid) 118 return NULL; 119 rcu_read_lock(); 120 pid = find_pid_ns(common->pid, common->ns); 121 if (pid) { 122 task = get_pid_task(pid, PIDTYPE_PID); 123 *tid = common->pid; 124 } 125 rcu_read_unlock(); 126 127 return task; 128 } 129 130 if (common->type == BPF_TASK_ITER_TGID) { 131 rcu_read_lock(); 132 task = task_group_seq_get_next(common, tid, skip_if_dup_files); 133 rcu_read_unlock(); 134 135 return task; 136 } 137 138 rcu_read_lock(); 139 retry: 140 pid = find_ge_pid(*tid, common->ns); 141 if (pid) { 142 *tid = pid_nr_ns(pid, common->ns); 143 task = get_pid_task(pid, PIDTYPE_PID); 144 if (!task) { 145 ++*tid; 146 goto retry; 147 } else if (skip_if_dup_files && !thread_group_leader(task) && 148 task->files == task->group_leader->files) { 149 put_task_struct(task); 150 task = NULL; 151 ++*tid; 152 goto retry; 153 } 154 } 155 rcu_read_unlock(); 156 157 return task; 158 } 159 160 static void *task_seq_start(struct seq_file *seq, loff_t *pos) 161 { 162 struct bpf_iter_seq_task_info *info = seq->private; 163 struct task_struct *task; 164 165 task = task_seq_get_next(&info->common, &info->tid, false); 166 if (!task) 167 return NULL; 168 169 if (*pos == 0) 170 ++*pos; 171 return task; 172 } 173 174 static void *task_seq_next(struct seq_file *seq, void *v, loff_t *pos) 175 { 176 struct bpf_iter_seq_task_info *info = seq->private; 177 struct task_struct *task; 178 179 ++*pos; 180 ++info->tid; 181 put_task_struct((struct task_struct *)v); 182 task = task_seq_get_next(&info->common, &info->tid, false); 183 if (!task) 184 return NULL; 185 186 return task; 187 } 188 189 struct bpf_iter__task { 190 __bpf_md_ptr(struct bpf_iter_meta *, meta); 191 __bpf_md_ptr(struct task_struct *, task); 192 }; 193 194 DEFINE_BPF_ITER_FUNC(task, struct bpf_iter_meta *meta, struct task_struct *task) 195 196 static int __task_seq_show(struct seq_file *seq, struct task_struct *task, 197 bool in_stop) 198 { 199 struct bpf_iter_meta meta; 200 struct bpf_iter__task ctx; 201 struct bpf_prog *prog; 202 203 meta.seq = seq; 204 prog = bpf_iter_get_info(&meta, in_stop); 205 if (!prog) 206 return 0; 207 208 ctx.meta = &meta; 209 ctx.task = task; 210 return bpf_iter_run_prog(prog, &ctx); 211 } 212 213 static int task_seq_show(struct seq_file *seq, void *v) 214 { 215 return __task_seq_show(seq, v, false); 216 } 217 218 static void task_seq_stop(struct seq_file *seq, void *v) 219 { 220 if (!v) 221 (void)__task_seq_show(seq, v, true); 222 else 223 put_task_struct((struct task_struct *)v); 224 } 225 226 static int bpf_iter_attach_task(struct bpf_prog *prog, 227 union bpf_iter_link_info *linfo, 228 struct bpf_iter_aux_info *aux) 229 { 230 unsigned int flags; 231 struct pid *pid; 232 pid_t tgid; 233 234 if ((!!linfo->task.tid + !!linfo->task.pid + !!linfo->task.pid_fd) > 1) 235 return -EINVAL; 236 237 aux->task.type = BPF_TASK_ITER_ALL; 238 if (linfo->task.tid != 0) { 239 aux->task.type = BPF_TASK_ITER_TID; 240 aux->task.pid = linfo->task.tid; 241 } 242 if (linfo->task.pid != 0) { 243 aux->task.type = BPF_TASK_ITER_TGID; 244 aux->task.pid = linfo->task.pid; 245 } 246 if (linfo->task.pid_fd != 0) { 247 aux->task.type = BPF_TASK_ITER_TGID; 248 249 pid = pidfd_get_pid(linfo->task.pid_fd, &flags); 250 if (IS_ERR(pid)) 251 return PTR_ERR(pid); 252 253 tgid = pid_nr_ns(pid, task_active_pid_ns(current)); 254 aux->task.pid = tgid; 255 put_pid(pid); 256 } 257 258 return 0; 259 } 260 261 static const struct seq_operations task_seq_ops = { 262 .start = task_seq_start, 263 .next = task_seq_next, 264 .stop = task_seq_stop, 265 .show = task_seq_show, 266 }; 267 268 struct bpf_iter_seq_task_file_info { 269 /* The first field must be struct bpf_iter_seq_task_common. 270 * this is assumed by {init, fini}_seq_pidns() callback functions. 271 */ 272 struct bpf_iter_seq_task_common common; 273 struct task_struct *task; 274 u32 tid; 275 u32 fd; 276 }; 277 278 static struct file * 279 task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info) 280 { 281 u32 saved_tid = info->tid; 282 struct task_struct *curr_task; 283 unsigned int curr_fd = info->fd; 284 285 /* If this function returns a non-NULL file object, 286 * it held a reference to the task/file. 287 * Otherwise, it does not hold any reference. 288 */ 289 again: 290 if (info->task) { 291 curr_task = info->task; 292 curr_fd = info->fd; 293 } else { 294 curr_task = task_seq_get_next(&info->common, &info->tid, true); 295 if (!curr_task) { 296 info->task = NULL; 297 return NULL; 298 } 299 300 /* set info->task */ 301 info->task = curr_task; 302 if (saved_tid == info->tid) 303 curr_fd = info->fd; 304 else 305 curr_fd = 0; 306 } 307 308 rcu_read_lock(); 309 for (;; curr_fd++) { 310 struct file *f; 311 f = task_lookup_next_fd_rcu(curr_task, &curr_fd); 312 if (!f) 313 break; 314 if (!get_file_rcu(f)) 315 continue; 316 317 /* set info->fd */ 318 info->fd = curr_fd; 319 rcu_read_unlock(); 320 return f; 321 } 322 323 /* the current task is done, go to the next task */ 324 rcu_read_unlock(); 325 put_task_struct(curr_task); 326 327 if (info->common.type == BPF_TASK_ITER_TID) { 328 info->task = NULL; 329 return NULL; 330 } 331 332 info->task = NULL; 333 info->fd = 0; 334 saved_tid = ++(info->tid); 335 goto again; 336 } 337 338 static void *task_file_seq_start(struct seq_file *seq, loff_t *pos) 339 { 340 struct bpf_iter_seq_task_file_info *info = seq->private; 341 struct file *file; 342 343 info->task = NULL; 344 file = task_file_seq_get_next(info); 345 if (file && *pos == 0) 346 ++*pos; 347 348 return file; 349 } 350 351 static void *task_file_seq_next(struct seq_file *seq, void *v, loff_t *pos) 352 { 353 struct bpf_iter_seq_task_file_info *info = seq->private; 354 355 ++*pos; 356 ++info->fd; 357 fput((struct file *)v); 358 return task_file_seq_get_next(info); 359 } 360 361 struct bpf_iter__task_file { 362 __bpf_md_ptr(struct bpf_iter_meta *, meta); 363 __bpf_md_ptr(struct task_struct *, task); 364 u32 fd __aligned(8); 365 __bpf_md_ptr(struct file *, file); 366 }; 367 368 DEFINE_BPF_ITER_FUNC(task_file, struct bpf_iter_meta *meta, 369 struct task_struct *task, u32 fd, 370 struct file *file) 371 372 static int __task_file_seq_show(struct seq_file *seq, struct file *file, 373 bool in_stop) 374 { 375 struct bpf_iter_seq_task_file_info *info = seq->private; 376 struct bpf_iter__task_file ctx; 377 struct bpf_iter_meta meta; 378 struct bpf_prog *prog; 379 380 meta.seq = seq; 381 prog = bpf_iter_get_info(&meta, in_stop); 382 if (!prog) 383 return 0; 384 385 ctx.meta = &meta; 386 ctx.task = info->task; 387 ctx.fd = info->fd; 388 ctx.file = file; 389 return bpf_iter_run_prog(prog, &ctx); 390 } 391 392 static int task_file_seq_show(struct seq_file *seq, void *v) 393 { 394 return __task_file_seq_show(seq, v, false); 395 } 396 397 static void task_file_seq_stop(struct seq_file *seq, void *v) 398 { 399 struct bpf_iter_seq_task_file_info *info = seq->private; 400 401 if (!v) { 402 (void)__task_file_seq_show(seq, v, true); 403 } else { 404 fput((struct file *)v); 405 put_task_struct(info->task); 406 info->task = NULL; 407 } 408 } 409 410 static int init_seq_pidns(void *priv_data, struct bpf_iter_aux_info *aux) 411 { 412 struct bpf_iter_seq_task_common *common = priv_data; 413 414 common->ns = get_pid_ns(task_active_pid_ns(current)); 415 common->type = aux->task.type; 416 common->pid = aux->task.pid; 417 418 return 0; 419 } 420 421 static void fini_seq_pidns(void *priv_data) 422 { 423 struct bpf_iter_seq_task_common *common = priv_data; 424 425 put_pid_ns(common->ns); 426 } 427 428 static const struct seq_operations task_file_seq_ops = { 429 .start = task_file_seq_start, 430 .next = task_file_seq_next, 431 .stop = task_file_seq_stop, 432 .show = task_file_seq_show, 433 }; 434 435 struct bpf_iter_seq_task_vma_info { 436 /* The first field must be struct bpf_iter_seq_task_common. 437 * this is assumed by {init, fini}_seq_pidns() callback functions. 438 */ 439 struct bpf_iter_seq_task_common common; 440 struct task_struct *task; 441 struct mm_struct *mm; 442 struct vm_area_struct *vma; 443 u32 tid; 444 unsigned long prev_vm_start; 445 unsigned long prev_vm_end; 446 }; 447 448 enum bpf_task_vma_iter_find_op { 449 task_vma_iter_first_vma, /* use find_vma() with addr 0 */ 450 task_vma_iter_next_vma, /* use vma_next() with curr_vma */ 451 task_vma_iter_find_vma, /* use find_vma() to find next vma */ 452 }; 453 454 static struct vm_area_struct * 455 task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info) 456 { 457 enum bpf_task_vma_iter_find_op op; 458 struct vm_area_struct *curr_vma; 459 struct task_struct *curr_task; 460 struct mm_struct *curr_mm; 461 u32 saved_tid = info->tid; 462 463 /* If this function returns a non-NULL vma, it holds a reference to 464 * the task_struct, holds a refcount on mm->mm_users, and holds 465 * read lock on vma->mm->mmap_lock. 466 * If this function returns NULL, it does not hold any reference or 467 * lock. 468 */ 469 if (info->task) { 470 curr_task = info->task; 471 curr_vma = info->vma; 472 curr_mm = info->mm; 473 /* In case of lock contention, drop mmap_lock to unblock 474 * the writer. 475 * 476 * After relock, call find(mm, prev_vm_end - 1) to find 477 * new vma to process. 478 * 479 * +------+------+-----------+ 480 * | VMA1 | VMA2 | VMA3 | 481 * +------+------+-----------+ 482 * | | | | 483 * 4k 8k 16k 400k 484 * 485 * For example, curr_vma == VMA2. Before unlock, we set 486 * 487 * prev_vm_start = 8k 488 * prev_vm_end = 16k 489 * 490 * There are a few cases: 491 * 492 * 1) VMA2 is freed, but VMA3 exists. 493 * 494 * find_vma() will return VMA3, just process VMA3. 495 * 496 * 2) VMA2 still exists. 497 * 498 * find_vma() will return VMA2, process VMA2->next. 499 * 500 * 3) no more vma in this mm. 501 * 502 * Process the next task. 503 * 504 * 4) find_vma() returns a different vma, VMA2'. 505 * 506 * 4.1) If VMA2 covers same range as VMA2', skip VMA2', 507 * because we already covered the range; 508 * 4.2) VMA2 and VMA2' covers different ranges, process 509 * VMA2'. 510 */ 511 if (mmap_lock_is_contended(curr_mm)) { 512 info->prev_vm_start = curr_vma->vm_start; 513 info->prev_vm_end = curr_vma->vm_end; 514 op = task_vma_iter_find_vma; 515 mmap_read_unlock(curr_mm); 516 if (mmap_read_lock_killable(curr_mm)) { 517 mmput(curr_mm); 518 goto finish; 519 } 520 } else { 521 op = task_vma_iter_next_vma; 522 } 523 } else { 524 again: 525 curr_task = task_seq_get_next(&info->common, &info->tid, true); 526 if (!curr_task) { 527 info->tid++; 528 goto finish; 529 } 530 531 if (saved_tid != info->tid) { 532 /* new task, process the first vma */ 533 op = task_vma_iter_first_vma; 534 } else { 535 /* Found the same tid, which means the user space 536 * finished data in previous buffer and read more. 537 * We dropped mmap_lock before returning to user 538 * space, so it is necessary to use find_vma() to 539 * find the next vma to process. 540 */ 541 op = task_vma_iter_find_vma; 542 } 543 544 curr_mm = get_task_mm(curr_task); 545 if (!curr_mm) 546 goto next_task; 547 548 if (mmap_read_lock_killable(curr_mm)) { 549 mmput(curr_mm); 550 goto finish; 551 } 552 } 553 554 switch (op) { 555 case task_vma_iter_first_vma: 556 curr_vma = find_vma(curr_mm, 0); 557 break; 558 case task_vma_iter_next_vma: 559 curr_vma = find_vma(curr_mm, curr_vma->vm_end); 560 break; 561 case task_vma_iter_find_vma: 562 /* We dropped mmap_lock so it is necessary to use find_vma 563 * to find the next vma. This is similar to the mechanism 564 * in show_smaps_rollup(). 565 */ 566 curr_vma = find_vma(curr_mm, info->prev_vm_end - 1); 567 /* case 1) and 4.2) above just use curr_vma */ 568 569 /* check for case 2) or case 4.1) above */ 570 if (curr_vma && 571 curr_vma->vm_start == info->prev_vm_start && 572 curr_vma->vm_end == info->prev_vm_end) 573 curr_vma = find_vma(curr_mm, curr_vma->vm_end); 574 break; 575 } 576 if (!curr_vma) { 577 /* case 3) above, or case 2) 4.1) with vma->next == NULL */ 578 mmap_read_unlock(curr_mm); 579 mmput(curr_mm); 580 goto next_task; 581 } 582 info->task = curr_task; 583 info->vma = curr_vma; 584 info->mm = curr_mm; 585 return curr_vma; 586 587 next_task: 588 if (info->common.type == BPF_TASK_ITER_TID) 589 goto finish; 590 591 put_task_struct(curr_task); 592 info->task = NULL; 593 info->mm = NULL; 594 info->tid++; 595 goto again; 596 597 finish: 598 if (curr_task) 599 put_task_struct(curr_task); 600 info->task = NULL; 601 info->vma = NULL; 602 info->mm = NULL; 603 return NULL; 604 } 605 606 static void *task_vma_seq_start(struct seq_file *seq, loff_t *pos) 607 { 608 struct bpf_iter_seq_task_vma_info *info = seq->private; 609 struct vm_area_struct *vma; 610 611 vma = task_vma_seq_get_next(info); 612 if (vma && *pos == 0) 613 ++*pos; 614 615 return vma; 616 } 617 618 static void *task_vma_seq_next(struct seq_file *seq, void *v, loff_t *pos) 619 { 620 struct bpf_iter_seq_task_vma_info *info = seq->private; 621 622 ++*pos; 623 return task_vma_seq_get_next(info); 624 } 625 626 struct bpf_iter__task_vma { 627 __bpf_md_ptr(struct bpf_iter_meta *, meta); 628 __bpf_md_ptr(struct task_struct *, task); 629 __bpf_md_ptr(struct vm_area_struct *, vma); 630 }; 631 632 DEFINE_BPF_ITER_FUNC(task_vma, struct bpf_iter_meta *meta, 633 struct task_struct *task, struct vm_area_struct *vma) 634 635 static int __task_vma_seq_show(struct seq_file *seq, bool in_stop) 636 { 637 struct bpf_iter_seq_task_vma_info *info = seq->private; 638 struct bpf_iter__task_vma ctx; 639 struct bpf_iter_meta meta; 640 struct bpf_prog *prog; 641 642 meta.seq = seq; 643 prog = bpf_iter_get_info(&meta, in_stop); 644 if (!prog) 645 return 0; 646 647 ctx.meta = &meta; 648 ctx.task = info->task; 649 ctx.vma = info->vma; 650 return bpf_iter_run_prog(prog, &ctx); 651 } 652 653 static int task_vma_seq_show(struct seq_file *seq, void *v) 654 { 655 return __task_vma_seq_show(seq, false); 656 } 657 658 static void task_vma_seq_stop(struct seq_file *seq, void *v) 659 { 660 struct bpf_iter_seq_task_vma_info *info = seq->private; 661 662 if (!v) { 663 (void)__task_vma_seq_show(seq, true); 664 } else { 665 /* info->vma has not been seen by the BPF program. If the 666 * user space reads more, task_vma_seq_get_next should 667 * return this vma again. Set prev_vm_start to ~0UL, 668 * so that we don't skip the vma returned by the next 669 * find_vma() (case task_vma_iter_find_vma in 670 * task_vma_seq_get_next()). 671 */ 672 info->prev_vm_start = ~0UL; 673 info->prev_vm_end = info->vma->vm_end; 674 mmap_read_unlock(info->mm); 675 mmput(info->mm); 676 info->mm = NULL; 677 put_task_struct(info->task); 678 info->task = NULL; 679 } 680 } 681 682 static const struct seq_operations task_vma_seq_ops = { 683 .start = task_vma_seq_start, 684 .next = task_vma_seq_next, 685 .stop = task_vma_seq_stop, 686 .show = task_vma_seq_show, 687 }; 688 689 static const struct bpf_iter_seq_info task_seq_info = { 690 .seq_ops = &task_seq_ops, 691 .init_seq_private = init_seq_pidns, 692 .fini_seq_private = fini_seq_pidns, 693 .seq_priv_size = sizeof(struct bpf_iter_seq_task_info), 694 }; 695 696 static int bpf_iter_fill_link_info(const struct bpf_iter_aux_info *aux, struct bpf_link_info *info) 697 { 698 switch (aux->task.type) { 699 case BPF_TASK_ITER_TID: 700 info->iter.task.tid = aux->task.pid; 701 break; 702 case BPF_TASK_ITER_TGID: 703 info->iter.task.pid = aux->task.pid; 704 break; 705 default: 706 break; 707 } 708 return 0; 709 } 710 711 static void bpf_iter_task_show_fdinfo(const struct bpf_iter_aux_info *aux, struct seq_file *seq) 712 { 713 seq_printf(seq, "task_type:\t%s\n", iter_task_type_names[aux->task.type]); 714 if (aux->task.type == BPF_TASK_ITER_TID) 715 seq_printf(seq, "tid:\t%u\n", aux->task.pid); 716 else if (aux->task.type == BPF_TASK_ITER_TGID) 717 seq_printf(seq, "pid:\t%u\n", aux->task.pid); 718 } 719 720 static struct bpf_iter_reg task_reg_info = { 721 .target = "task", 722 .attach_target = bpf_iter_attach_task, 723 .feature = BPF_ITER_RESCHED, 724 .ctx_arg_info_size = 1, 725 .ctx_arg_info = { 726 { offsetof(struct bpf_iter__task, task), 727 PTR_TO_BTF_ID_OR_NULL }, 728 }, 729 .seq_info = &task_seq_info, 730 .fill_link_info = bpf_iter_fill_link_info, 731 .show_fdinfo = bpf_iter_task_show_fdinfo, 732 }; 733 734 static const struct bpf_iter_seq_info task_file_seq_info = { 735 .seq_ops = &task_file_seq_ops, 736 .init_seq_private = init_seq_pidns, 737 .fini_seq_private = fini_seq_pidns, 738 .seq_priv_size = sizeof(struct bpf_iter_seq_task_file_info), 739 }; 740 741 static struct bpf_iter_reg task_file_reg_info = { 742 .target = "task_file", 743 .attach_target = bpf_iter_attach_task, 744 .feature = BPF_ITER_RESCHED, 745 .ctx_arg_info_size = 2, 746 .ctx_arg_info = { 747 { offsetof(struct bpf_iter__task_file, task), 748 PTR_TO_BTF_ID_OR_NULL }, 749 { offsetof(struct bpf_iter__task_file, file), 750 PTR_TO_BTF_ID_OR_NULL }, 751 }, 752 .seq_info = &task_file_seq_info, 753 .fill_link_info = bpf_iter_fill_link_info, 754 .show_fdinfo = bpf_iter_task_show_fdinfo, 755 }; 756 757 static const struct bpf_iter_seq_info task_vma_seq_info = { 758 .seq_ops = &task_vma_seq_ops, 759 .init_seq_private = init_seq_pidns, 760 .fini_seq_private = fini_seq_pidns, 761 .seq_priv_size = sizeof(struct bpf_iter_seq_task_vma_info), 762 }; 763 764 static struct bpf_iter_reg task_vma_reg_info = { 765 .target = "task_vma", 766 .attach_target = bpf_iter_attach_task, 767 .feature = BPF_ITER_RESCHED, 768 .ctx_arg_info_size = 2, 769 .ctx_arg_info = { 770 { offsetof(struct bpf_iter__task_vma, task), 771 PTR_TO_BTF_ID_OR_NULL }, 772 { offsetof(struct bpf_iter__task_vma, vma), 773 PTR_TO_BTF_ID_OR_NULL }, 774 }, 775 .seq_info = &task_vma_seq_info, 776 .fill_link_info = bpf_iter_fill_link_info, 777 .show_fdinfo = bpf_iter_task_show_fdinfo, 778 }; 779 780 BPF_CALL_5(bpf_find_vma, struct task_struct *, task, u64, start, 781 bpf_callback_t, callback_fn, void *, callback_ctx, u64, flags) 782 { 783 struct mmap_unlock_irq_work *work = NULL; 784 struct vm_area_struct *vma; 785 bool irq_work_busy = false; 786 struct mm_struct *mm; 787 int ret = -ENOENT; 788 789 if (flags) 790 return -EINVAL; 791 792 if (!task) 793 return -ENOENT; 794 795 mm = task->mm; 796 if (!mm) 797 return -ENOENT; 798 799 irq_work_busy = bpf_mmap_unlock_get_irq_work(&work); 800 801 if (irq_work_busy || !mmap_read_trylock(mm)) 802 return -EBUSY; 803 804 vma = find_vma(mm, start); 805 806 if (vma && vma->vm_start <= start && vma->vm_end > start) { 807 callback_fn((u64)(long)task, (u64)(long)vma, 808 (u64)(long)callback_ctx, 0, 0); 809 ret = 0; 810 } 811 bpf_mmap_unlock_mm(work, mm); 812 return ret; 813 } 814 815 const struct bpf_func_proto bpf_find_vma_proto = { 816 .func = bpf_find_vma, 817 .ret_type = RET_INTEGER, 818 .arg1_type = ARG_PTR_TO_BTF_ID, 819 .arg1_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], 820 .arg2_type = ARG_ANYTHING, 821 .arg3_type = ARG_PTR_TO_FUNC, 822 .arg4_type = ARG_PTR_TO_STACK_OR_NULL, 823 .arg5_type = ARG_ANYTHING, 824 }; 825 826 DEFINE_PER_CPU(struct mmap_unlock_irq_work, mmap_unlock_work); 827 828 static void do_mmap_read_unlock(struct irq_work *entry) 829 { 830 struct mmap_unlock_irq_work *work; 831 832 if (WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_RT))) 833 return; 834 835 work = container_of(entry, struct mmap_unlock_irq_work, irq_work); 836 mmap_read_unlock_non_owner(work->mm); 837 } 838 839 static int __init task_iter_init(void) 840 { 841 struct mmap_unlock_irq_work *work; 842 int ret, cpu; 843 844 for_each_possible_cpu(cpu) { 845 work = per_cpu_ptr(&mmap_unlock_work, cpu); 846 init_irq_work(&work->irq_work, do_mmap_read_unlock); 847 } 848 849 task_reg_info.ctx_arg_info[0].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_TASK]; 850 ret = bpf_iter_reg_target(&task_reg_info); 851 if (ret) 852 return ret; 853 854 task_file_reg_info.ctx_arg_info[0].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_TASK]; 855 task_file_reg_info.ctx_arg_info[1].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_FILE]; 856 ret = bpf_iter_reg_target(&task_file_reg_info); 857 if (ret) 858 return ret; 859 860 task_vma_reg_info.ctx_arg_info[0].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_TASK]; 861 task_vma_reg_info.ctx_arg_info[1].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_VMA]; 862 return bpf_iter_reg_target(&task_vma_reg_info); 863 } 864 late_initcall(task_iter_init); 865