1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #undef TRACE_SYSTEM 3 #define TRACE_SYSTEM sched 4 5 #if !defined(_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ) 6 #define _TRACE_SCHED_H 7 8 #include <linux/sched/numa_balancing.h> 9 #include <linux/tracepoint.h> 10 #include <linux/binfmts.h> 11 12 /* 13 * Tracepoint for calling kthread_stop, performed to end a kthread: 14 */ 15 TRACE_EVENT(sched_kthread_stop, 16 17 TP_PROTO(struct task_struct *t), 18 19 TP_ARGS(t), 20 21 TP_STRUCT__entry( 22 __array( char, comm, TASK_COMM_LEN ) 23 __field( pid_t, pid ) 24 ), 25 26 TP_fast_assign( 27 memcpy(__entry->comm, t->comm, TASK_COMM_LEN); 28 __entry->pid = t->pid; 29 ), 30 31 TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid) 32 ); 33 34 /* 35 * Tracepoint for the return value of the kthread stopping: 36 */ 37 TRACE_EVENT(sched_kthread_stop_ret, 38 39 TP_PROTO(int ret), 40 41 TP_ARGS(ret), 42 43 TP_STRUCT__entry( 44 __field( int, ret ) 45 ), 46 47 TP_fast_assign( 48 __entry->ret = ret; 49 ), 50 51 TP_printk("ret=%d", __entry->ret) 52 ); 53 54 /* 55 * Tracepoint for waking up a task: 56 */ 57 DECLARE_EVENT_CLASS(sched_wakeup_template, 58 59 TP_PROTO(struct task_struct *p), 60 61 TP_ARGS(__perf_task(p)), 62 63 TP_STRUCT__entry( 64 __array( char, comm, TASK_COMM_LEN ) 65 __field( pid_t, pid ) 66 __field( int, prio ) 67 __field( int, success ) 68 __field( int, target_cpu ) 69 ), 70 71 TP_fast_assign( 72 memcpy(__entry->comm, p->comm, TASK_COMM_LEN); 73 __entry->pid = p->pid; 74 __entry->prio = p->prio; /* XXX SCHED_DEADLINE */ 75 __entry->success = 1; /* rudiment, kill when possible */ 76 __entry->target_cpu = task_cpu(p); 77 ), 78 79 TP_printk("comm=%s pid=%d prio=%d target_cpu=%03d", 80 __entry->comm, __entry->pid, __entry->prio, 81 __entry->target_cpu) 82 ); 83 84 /* 85 * Tracepoint called when waking a task; this tracepoint is guaranteed to be 86 * called from the waking context. 87 */ 88 DEFINE_EVENT(sched_wakeup_template, sched_waking, 89 TP_PROTO(struct task_struct *p), 90 TP_ARGS(p)); 91 92 /* 93 * Tracepoint called when the task is actually woken; p->state == TASK_RUNNNG. 94 * It is not always called from the waking context. 95 */ 96 DEFINE_EVENT(sched_wakeup_template, sched_wakeup, 97 TP_PROTO(struct task_struct *p), 98 TP_ARGS(p)); 99 100 /* 101 * Tracepoint for waking up a new task: 102 */ 103 DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new, 104 TP_PROTO(struct task_struct *p), 105 TP_ARGS(p)); 106 107 #ifdef CREATE_TRACE_POINTS 108 static inline long __trace_sched_switch_state(bool preempt, struct task_struct *p) 109 { 110 unsigned int state; 111 112 #ifdef CONFIG_SCHED_DEBUG 113 BUG_ON(p != current); 114 #endif /* CONFIG_SCHED_DEBUG */ 115 116 /* 117 * Preemption ignores task state, therefore preempted tasks are always 118 * RUNNING (we will not have dequeued if state != RUNNING). 119 */ 120 if (preempt) 121 return TASK_REPORT_MAX; 122 123 /* 124 * task_state_index() uses fls() and returns a value from 0-8 range. 125 * Decrement it by 1 (except TASK_RUNNING state i.e 0) before using 126 * it for left shift operation to get the correct task->state 127 * mapping. 128 */ 129 state = task_state_index(p); 130 131 return state ? (1 << (state - 1)) : state; 132 } 133 #endif /* CREATE_TRACE_POINTS */ 134 135 /* 136 * Tracepoint for task switches, performed by the scheduler: 137 */ 138 TRACE_EVENT(sched_switch, 139 140 TP_PROTO(bool preempt, 141 struct task_struct *prev, 142 struct task_struct *next), 143 144 TP_ARGS(preempt, prev, next), 145 146 TP_STRUCT__entry( 147 __array( char, prev_comm, TASK_COMM_LEN ) 148 __field( pid_t, prev_pid ) 149 __field( int, prev_prio ) 150 __field( long, prev_state ) 151 __array( char, next_comm, TASK_COMM_LEN ) 152 __field( pid_t, next_pid ) 153 __field( int, next_prio ) 154 ), 155 156 TP_fast_assign( 157 memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); 158 __entry->prev_pid = prev->pid; 159 __entry->prev_prio = prev->prio; 160 __entry->prev_state = __trace_sched_switch_state(preempt, prev); 161 memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); 162 __entry->next_pid = next->pid; 163 __entry->next_prio = next->prio; 164 /* XXX SCHED_DEADLINE */ 165 ), 166 167 TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==> next_comm=%s next_pid=%d next_prio=%d", 168 __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, 169 170 (__entry->prev_state & (TASK_REPORT_MAX - 1)) ? 171 __print_flags(__entry->prev_state & (TASK_REPORT_MAX - 1), "|", 172 { TASK_INTERRUPTIBLE, "S" }, 173 { TASK_UNINTERRUPTIBLE, "D" }, 174 { __TASK_STOPPED, "T" }, 175 { __TASK_TRACED, "t" }, 176 { EXIT_DEAD, "X" }, 177 { EXIT_ZOMBIE, "Z" }, 178 { TASK_PARKED, "P" }, 179 { TASK_DEAD, "I" }) : 180 "R", 181 182 __entry->prev_state & TASK_REPORT_MAX ? "+" : "", 183 __entry->next_comm, __entry->next_pid, __entry->next_prio) 184 ); 185 186 /* 187 * Tracepoint for a task being migrated: 188 */ 189 TRACE_EVENT(sched_migrate_task, 190 191 TP_PROTO(struct task_struct *p, int dest_cpu), 192 193 TP_ARGS(p, dest_cpu), 194 195 TP_STRUCT__entry( 196 __array( char, comm, TASK_COMM_LEN ) 197 __field( pid_t, pid ) 198 __field( int, prio ) 199 __field( int, orig_cpu ) 200 __field( int, dest_cpu ) 201 ), 202 203 TP_fast_assign( 204 memcpy(__entry->comm, p->comm, TASK_COMM_LEN); 205 __entry->pid = p->pid; 206 __entry->prio = p->prio; /* XXX SCHED_DEADLINE */ 207 __entry->orig_cpu = task_cpu(p); 208 __entry->dest_cpu = dest_cpu; 209 ), 210 211 TP_printk("comm=%s pid=%d prio=%d orig_cpu=%d dest_cpu=%d", 212 __entry->comm, __entry->pid, __entry->prio, 213 __entry->orig_cpu, __entry->dest_cpu) 214 ); 215 216 DECLARE_EVENT_CLASS(sched_process_template, 217 218 TP_PROTO(struct task_struct *p), 219 220 TP_ARGS(p), 221 222 TP_STRUCT__entry( 223 __array( char, comm, TASK_COMM_LEN ) 224 __field( pid_t, pid ) 225 __field( int, prio ) 226 ), 227 228 TP_fast_assign( 229 memcpy(__entry->comm, p->comm, TASK_COMM_LEN); 230 __entry->pid = p->pid; 231 __entry->prio = p->prio; /* XXX SCHED_DEADLINE */ 232 ), 233 234 TP_printk("comm=%s pid=%d prio=%d", 235 __entry->comm, __entry->pid, __entry->prio) 236 ); 237 238 /* 239 * Tracepoint for freeing a task: 240 */ 241 DEFINE_EVENT(sched_process_template, sched_process_free, 242 TP_PROTO(struct task_struct *p), 243 TP_ARGS(p)); 244 245 /* 246 * Tracepoint for a task exiting: 247 */ 248 DEFINE_EVENT(sched_process_template, sched_process_exit, 249 TP_PROTO(struct task_struct *p), 250 TP_ARGS(p)); 251 252 /* 253 * Tracepoint for waiting on task to unschedule: 254 */ 255 DEFINE_EVENT(sched_process_template, sched_wait_task, 256 TP_PROTO(struct task_struct *p), 257 TP_ARGS(p)); 258 259 /* 260 * Tracepoint for a waiting task: 261 */ 262 TRACE_EVENT(sched_process_wait, 263 264 TP_PROTO(struct pid *pid), 265 266 TP_ARGS(pid), 267 268 TP_STRUCT__entry( 269 __array( char, comm, TASK_COMM_LEN ) 270 __field( pid_t, pid ) 271 __field( int, prio ) 272 ), 273 274 TP_fast_assign( 275 memcpy(__entry->comm, current->comm, TASK_COMM_LEN); 276 __entry->pid = pid_nr(pid); 277 __entry->prio = current->prio; /* XXX SCHED_DEADLINE */ 278 ), 279 280 TP_printk("comm=%s pid=%d prio=%d", 281 __entry->comm, __entry->pid, __entry->prio) 282 ); 283 284 /* 285 * Tracepoint for do_fork: 286 */ 287 TRACE_EVENT(sched_process_fork, 288 289 TP_PROTO(struct task_struct *parent, struct task_struct *child), 290 291 TP_ARGS(parent, child), 292 293 TP_STRUCT__entry( 294 __array( char, parent_comm, TASK_COMM_LEN ) 295 __field( pid_t, parent_pid ) 296 __array( char, child_comm, TASK_COMM_LEN ) 297 __field( pid_t, child_pid ) 298 ), 299 300 TP_fast_assign( 301 memcpy(__entry->parent_comm, parent->comm, TASK_COMM_LEN); 302 __entry->parent_pid = parent->pid; 303 memcpy(__entry->child_comm, child->comm, TASK_COMM_LEN); 304 __entry->child_pid = child->pid; 305 ), 306 307 TP_printk("comm=%s pid=%d child_comm=%s child_pid=%d", 308 __entry->parent_comm, __entry->parent_pid, 309 __entry->child_comm, __entry->child_pid) 310 ); 311 312 /* 313 * Tracepoint for exec: 314 */ 315 TRACE_EVENT(sched_process_exec, 316 317 TP_PROTO(struct task_struct *p, pid_t old_pid, 318 struct linux_binprm *bprm), 319 320 TP_ARGS(p, old_pid, bprm), 321 322 TP_STRUCT__entry( 323 __string( filename, bprm->filename ) 324 __field( pid_t, pid ) 325 __field( pid_t, old_pid ) 326 ), 327 328 TP_fast_assign( 329 __assign_str(filename, bprm->filename); 330 __entry->pid = p->pid; 331 __entry->old_pid = old_pid; 332 ), 333 334 TP_printk("filename=%s pid=%d old_pid=%d", __get_str(filename), 335 __entry->pid, __entry->old_pid) 336 ); 337 338 339 #ifdef CONFIG_SCHEDSTATS 340 #define DEFINE_EVENT_SCHEDSTAT DEFINE_EVENT 341 #define DECLARE_EVENT_CLASS_SCHEDSTAT DECLARE_EVENT_CLASS 342 #else 343 #define DEFINE_EVENT_SCHEDSTAT DEFINE_EVENT_NOP 344 #define DECLARE_EVENT_CLASS_SCHEDSTAT DECLARE_EVENT_CLASS_NOP 345 #endif 346 347 /* 348 * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE 349 * adding sched_stat support to SCHED_FIFO/RR would be welcome. 350 */ 351 DECLARE_EVENT_CLASS_SCHEDSTAT(sched_stat_template, 352 353 TP_PROTO(struct task_struct *tsk, u64 delay), 354 355 TP_ARGS(__perf_task(tsk), __perf_count(delay)), 356 357 TP_STRUCT__entry( 358 __array( char, comm, TASK_COMM_LEN ) 359 __field( pid_t, pid ) 360 __field( u64, delay ) 361 ), 362 363 TP_fast_assign( 364 memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); 365 __entry->pid = tsk->pid; 366 __entry->delay = delay; 367 ), 368 369 TP_printk("comm=%s pid=%d delay=%Lu [ns]", 370 __entry->comm, __entry->pid, 371 (unsigned long long)__entry->delay) 372 ); 373 374 /* 375 * Tracepoint for accounting wait time (time the task is runnable 376 * but not actually running due to scheduler contention). 377 */ 378 DEFINE_EVENT_SCHEDSTAT(sched_stat_template, sched_stat_wait, 379 TP_PROTO(struct task_struct *tsk, u64 delay), 380 TP_ARGS(tsk, delay)); 381 382 /* 383 * Tracepoint for accounting sleep time (time the task is not runnable, 384 * including iowait, see below). 385 */ 386 DEFINE_EVENT_SCHEDSTAT(sched_stat_template, sched_stat_sleep, 387 TP_PROTO(struct task_struct *tsk, u64 delay), 388 TP_ARGS(tsk, delay)); 389 390 /* 391 * Tracepoint for accounting iowait time (time the task is not runnable 392 * due to waiting on IO to complete). 393 */ 394 DEFINE_EVENT_SCHEDSTAT(sched_stat_template, sched_stat_iowait, 395 TP_PROTO(struct task_struct *tsk, u64 delay), 396 TP_ARGS(tsk, delay)); 397 398 /* 399 * Tracepoint for accounting blocked time (time the task is in uninterruptible). 400 */ 401 DEFINE_EVENT_SCHEDSTAT(sched_stat_template, sched_stat_blocked, 402 TP_PROTO(struct task_struct *tsk, u64 delay), 403 TP_ARGS(tsk, delay)); 404 405 /* 406 * Tracepoint for accounting runtime (time the task is executing 407 * on a CPU). 408 */ 409 DECLARE_EVENT_CLASS(sched_stat_runtime, 410 411 TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime), 412 413 TP_ARGS(tsk, __perf_count(runtime), vruntime), 414 415 TP_STRUCT__entry( 416 __array( char, comm, TASK_COMM_LEN ) 417 __field( pid_t, pid ) 418 __field( u64, runtime ) 419 __field( u64, vruntime ) 420 ), 421 422 TP_fast_assign( 423 memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); 424 __entry->pid = tsk->pid; 425 __entry->runtime = runtime; 426 __entry->vruntime = vruntime; 427 ), 428 429 TP_printk("comm=%s pid=%d runtime=%Lu [ns] vruntime=%Lu [ns]", 430 __entry->comm, __entry->pid, 431 (unsigned long long)__entry->runtime, 432 (unsigned long long)__entry->vruntime) 433 ); 434 435 DEFINE_EVENT(sched_stat_runtime, sched_stat_runtime, 436 TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime), 437 TP_ARGS(tsk, runtime, vruntime)); 438 439 /* 440 * Tracepoint for showing priority inheritance modifying a tasks 441 * priority. 442 */ 443 TRACE_EVENT(sched_pi_setprio, 444 445 TP_PROTO(struct task_struct *tsk, struct task_struct *pi_task), 446 447 TP_ARGS(tsk, pi_task), 448 449 TP_STRUCT__entry( 450 __array( char, comm, TASK_COMM_LEN ) 451 __field( pid_t, pid ) 452 __field( int, oldprio ) 453 __field( int, newprio ) 454 ), 455 456 TP_fast_assign( 457 memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); 458 __entry->pid = tsk->pid; 459 __entry->oldprio = tsk->prio; 460 __entry->newprio = pi_task ? 461 min(tsk->normal_prio, pi_task->prio) : 462 tsk->normal_prio; 463 /* XXX SCHED_DEADLINE bits missing */ 464 ), 465 466 TP_printk("comm=%s pid=%d oldprio=%d newprio=%d", 467 __entry->comm, __entry->pid, 468 __entry->oldprio, __entry->newprio) 469 ); 470 471 #ifdef CONFIG_DETECT_HUNG_TASK 472 TRACE_EVENT(sched_process_hang, 473 TP_PROTO(struct task_struct *tsk), 474 TP_ARGS(tsk), 475 476 TP_STRUCT__entry( 477 __array( char, comm, TASK_COMM_LEN ) 478 __field( pid_t, pid ) 479 ), 480 481 TP_fast_assign( 482 memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); 483 __entry->pid = tsk->pid; 484 ), 485 486 TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid) 487 ); 488 #endif /* CONFIG_DETECT_HUNG_TASK */ 489 490 /* 491 * Tracks migration of tasks from one runqueue to another. Can be used to 492 * detect if automatic NUMA balancing is bouncing between nodes. 493 */ 494 TRACE_EVENT(sched_move_numa, 495 496 TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu), 497 498 TP_ARGS(tsk, src_cpu, dst_cpu), 499 500 TP_STRUCT__entry( 501 __field( pid_t, pid ) 502 __field( pid_t, tgid ) 503 __field( pid_t, ngid ) 504 __field( int, src_cpu ) 505 __field( int, src_nid ) 506 __field( int, dst_cpu ) 507 __field( int, dst_nid ) 508 ), 509 510 TP_fast_assign( 511 __entry->pid = task_pid_nr(tsk); 512 __entry->tgid = task_tgid_nr(tsk); 513 __entry->ngid = task_numa_group_id(tsk); 514 __entry->src_cpu = src_cpu; 515 __entry->src_nid = cpu_to_node(src_cpu); 516 __entry->dst_cpu = dst_cpu; 517 __entry->dst_nid = cpu_to_node(dst_cpu); 518 ), 519 520 TP_printk("pid=%d tgid=%d ngid=%d src_cpu=%d src_nid=%d dst_cpu=%d dst_nid=%d", 521 __entry->pid, __entry->tgid, __entry->ngid, 522 __entry->src_cpu, __entry->src_nid, 523 __entry->dst_cpu, __entry->dst_nid) 524 ); 525 526 DECLARE_EVENT_CLASS(sched_numa_pair_template, 527 528 TP_PROTO(struct task_struct *src_tsk, int src_cpu, 529 struct task_struct *dst_tsk, int dst_cpu), 530 531 TP_ARGS(src_tsk, src_cpu, dst_tsk, dst_cpu), 532 533 TP_STRUCT__entry( 534 __field( pid_t, src_pid ) 535 __field( pid_t, src_tgid ) 536 __field( pid_t, src_ngid ) 537 __field( int, src_cpu ) 538 __field( int, src_nid ) 539 __field( pid_t, dst_pid ) 540 __field( pid_t, dst_tgid ) 541 __field( pid_t, dst_ngid ) 542 __field( int, dst_cpu ) 543 __field( int, dst_nid ) 544 ), 545 546 TP_fast_assign( 547 __entry->src_pid = task_pid_nr(src_tsk); 548 __entry->src_tgid = task_tgid_nr(src_tsk); 549 __entry->src_ngid = task_numa_group_id(src_tsk); 550 __entry->src_cpu = src_cpu; 551 __entry->src_nid = cpu_to_node(src_cpu); 552 __entry->dst_pid = dst_tsk ? task_pid_nr(dst_tsk) : 0; 553 __entry->dst_tgid = dst_tsk ? task_tgid_nr(dst_tsk) : 0; 554 __entry->dst_ngid = dst_tsk ? task_numa_group_id(dst_tsk) : 0; 555 __entry->dst_cpu = dst_cpu; 556 __entry->dst_nid = dst_cpu >= 0 ? cpu_to_node(dst_cpu) : -1; 557 ), 558 559 TP_printk("src_pid=%d src_tgid=%d src_ngid=%d src_cpu=%d src_nid=%d dst_pid=%d dst_tgid=%d dst_ngid=%d dst_cpu=%d dst_nid=%d", 560 __entry->src_pid, __entry->src_tgid, __entry->src_ngid, 561 __entry->src_cpu, __entry->src_nid, 562 __entry->dst_pid, __entry->dst_tgid, __entry->dst_ngid, 563 __entry->dst_cpu, __entry->dst_nid) 564 ); 565 566 DEFINE_EVENT(sched_numa_pair_template, sched_stick_numa, 567 568 TP_PROTO(struct task_struct *src_tsk, int src_cpu, 569 struct task_struct *dst_tsk, int dst_cpu), 570 571 TP_ARGS(src_tsk, src_cpu, dst_tsk, dst_cpu) 572 ); 573 574 DEFINE_EVENT(sched_numa_pair_template, sched_swap_numa, 575 576 TP_PROTO(struct task_struct *src_tsk, int src_cpu, 577 struct task_struct *dst_tsk, int dst_cpu), 578 579 TP_ARGS(src_tsk, src_cpu, dst_tsk, dst_cpu) 580 ); 581 582 583 /* 584 * Tracepoint for waking a polling cpu without an IPI. 585 */ 586 TRACE_EVENT(sched_wake_idle_without_ipi, 587 588 TP_PROTO(int cpu), 589 590 TP_ARGS(cpu), 591 592 TP_STRUCT__entry( 593 __field( int, cpu ) 594 ), 595 596 TP_fast_assign( 597 __entry->cpu = cpu; 598 ), 599 600 TP_printk("cpu=%d", __entry->cpu) 601 ); 602 603 /* 604 * Following tracepoints are not exported in tracefs and provide hooking 605 * mechanisms only for testing and debugging purposes. 606 * 607 * Postfixed with _tp to make them easily identifiable in the code. 608 */ 609 DECLARE_TRACE(pelt_cfs_tp, 610 TP_PROTO(struct cfs_rq *cfs_rq), 611 TP_ARGS(cfs_rq)); 612 613 DECLARE_TRACE(pelt_rt_tp, 614 TP_PROTO(struct rq *rq), 615 TP_ARGS(rq)); 616 617 DECLARE_TRACE(pelt_dl_tp, 618 TP_PROTO(struct rq *rq), 619 TP_ARGS(rq)); 620 621 DECLARE_TRACE(pelt_thermal_tp, 622 TP_PROTO(struct rq *rq), 623 TP_ARGS(rq)); 624 625 DECLARE_TRACE(pelt_irq_tp, 626 TP_PROTO(struct rq *rq), 627 TP_ARGS(rq)); 628 629 DECLARE_TRACE(pelt_se_tp, 630 TP_PROTO(struct sched_entity *se), 631 TP_ARGS(se)); 632 633 DECLARE_TRACE(sched_cpu_capacity_tp, 634 TP_PROTO(struct rq *rq), 635 TP_ARGS(rq)); 636 637 DECLARE_TRACE(sched_overutilized_tp, 638 TP_PROTO(struct root_domain *rd, bool overutilized), 639 TP_ARGS(rd, overutilized)); 640 641 DECLARE_TRACE(sched_util_est_cfs_tp, 642 TP_PROTO(struct cfs_rq *cfs_rq), 643 TP_ARGS(cfs_rq)); 644 645 DECLARE_TRACE(sched_util_est_se_tp, 646 TP_PROTO(struct sched_entity *se), 647 TP_ARGS(se)); 648 649 DECLARE_TRACE(sched_update_nr_running_tp, 650 TP_PROTO(struct rq *rq, int change), 651 TP_ARGS(rq, change)); 652 653 #endif /* _TRACE_SCHED_H */ 654 655 /* This part must be outside protection */ 656 #include <trace/define_trace.h> 657