1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #undef TRACE_SYSTEM 3 #define TRACE_SYSTEM sched 4 5 #if !defined(_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ) 6 #define _TRACE_SCHED_H 7 8 #include <linux/sched/numa_balancing.h> 9 #include <linux/tracepoint.h> 10 #include <linux/binfmts.h> 11 12 /* 13 * Tracepoint for calling kthread_stop, performed to end a kthread: 14 */ 15 TRACE_EVENT(sched_kthread_stop, 16 17 TP_PROTO(struct task_struct *t), 18 19 TP_ARGS(t), 20 21 TP_STRUCT__entry( 22 __array( char, comm, TASK_COMM_LEN ) 23 __field( pid_t, pid ) 24 ), 25 26 TP_fast_assign( 27 memcpy(__entry->comm, t->comm, TASK_COMM_LEN); 28 __entry->pid = t->pid; 29 ), 30 31 TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid) 32 ); 33 34 /* 35 * Tracepoint for the return value of the kthread stopping: 36 */ 37 TRACE_EVENT(sched_kthread_stop_ret, 38 39 TP_PROTO(int ret), 40 41 TP_ARGS(ret), 42 43 TP_STRUCT__entry( 44 __field( int, ret ) 45 ), 46 47 TP_fast_assign( 48 __entry->ret = ret; 49 ), 50 51 TP_printk("ret=%d", __entry->ret) 52 ); 53 54 /* 55 * Tracepoint for waking up a task: 56 */ 57 DECLARE_EVENT_CLASS(sched_wakeup_template, 58 59 TP_PROTO(struct task_struct *p), 60 61 TP_ARGS(__perf_task(p)), 62 63 TP_STRUCT__entry( 64 __array( char, comm, TASK_COMM_LEN ) 65 __field( pid_t, pid ) 66 __field( int, prio ) 67 __field( int, success ) 68 __field( int, target_cpu ) 69 ), 70 71 TP_fast_assign( 72 memcpy(__entry->comm, p->comm, TASK_COMM_LEN); 73 __entry->pid = p->pid; 74 __entry->prio = p->prio; /* XXX SCHED_DEADLINE */ 75 __entry->success = 1; /* rudiment, kill when possible */ 76 __entry->target_cpu = task_cpu(p); 77 ), 78 79 TP_printk("comm=%s pid=%d prio=%d target_cpu=%03d", 80 __entry->comm, __entry->pid, __entry->prio, 81 __entry->target_cpu) 82 ); 83 84 /* 85 * Tracepoint called when waking a task; this tracepoint is guaranteed to be 86 * called from the waking context. 87 */ 88 DEFINE_EVENT(sched_wakeup_template, sched_waking, 89 TP_PROTO(struct task_struct *p), 90 TP_ARGS(p)); 91 92 /* 93 * Tracepoint called when the task is actually woken; p->state == TASK_RUNNNG. 94 * It it not always called from the waking context. 95 */ 96 DEFINE_EVENT(sched_wakeup_template, sched_wakeup, 97 TP_PROTO(struct task_struct *p), 98 TP_ARGS(p)); 99 100 /* 101 * Tracepoint for waking up a new task: 102 */ 103 DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new, 104 TP_PROTO(struct task_struct *p), 105 TP_ARGS(p)); 106 107 #ifdef CREATE_TRACE_POINTS 108 static inline long __trace_sched_switch_state(bool preempt, struct task_struct *p) 109 { 110 unsigned int state; 111 112 #ifdef CONFIG_SCHED_DEBUG 113 BUG_ON(p != current); 114 #endif /* CONFIG_SCHED_DEBUG */ 115 116 /* 117 * Preemption ignores task state, therefore preempted tasks are always 118 * RUNNING (we will not have dequeued if state != RUNNING). 119 */ 120 if (preempt) 121 return TASK_REPORT_MAX; 122 123 /* 124 * task_state_index() uses fls() and returns a value from 0-8 range. 125 * Decrement it by 1 (except TASK_RUNNING state i.e 0) before using 126 * it for left shift operation to get the correct task->state 127 * mapping. 128 */ 129 state = task_state_index(p); 130 131 return state ? (1 << (state - 1)) : state; 132 } 133 #endif /* CREATE_TRACE_POINTS */ 134 135 /* 136 * Tracepoint for task switches, performed by the scheduler: 137 */ 138 TRACE_EVENT(sched_switch, 139 140 TP_PROTO(bool preempt, 141 struct task_struct *prev, 142 struct task_struct *next), 143 144 TP_ARGS(preempt, prev, next), 145 146 TP_STRUCT__entry( 147 __array( char, prev_comm, TASK_COMM_LEN ) 148 __field( pid_t, prev_pid ) 149 __field( int, prev_prio ) 150 __field( long, prev_state ) 151 __array( char, next_comm, TASK_COMM_LEN ) 152 __field( pid_t, next_pid ) 153 __field( int, next_prio ) 154 ), 155 156 TP_fast_assign( 157 memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); 158 __entry->prev_pid = prev->pid; 159 __entry->prev_prio = prev->prio; 160 __entry->prev_state = __trace_sched_switch_state(preempt, prev); 161 memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); 162 __entry->next_pid = next->pid; 163 __entry->next_prio = next->prio; 164 /* XXX SCHED_DEADLINE */ 165 ), 166 167 TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==> next_comm=%s next_pid=%d next_prio=%d", 168 __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, 169 170 (__entry->prev_state & (TASK_REPORT_MAX - 1)) ? 171 __print_flags(__entry->prev_state & (TASK_REPORT_MAX - 1), "|", 172 { TASK_INTERRUPTIBLE, "S" }, 173 { TASK_UNINTERRUPTIBLE, "D" }, 174 { __TASK_STOPPED, "T" }, 175 { __TASK_TRACED, "t" }, 176 { EXIT_DEAD, "X" }, 177 { EXIT_ZOMBIE, "Z" }, 178 { TASK_PARKED, "P" }, 179 { TASK_DEAD, "I" }) : 180 "R", 181 182 __entry->prev_state & TASK_REPORT_MAX ? "+" : "", 183 __entry->next_comm, __entry->next_pid, __entry->next_prio) 184 ); 185 186 /* 187 * Tracepoint for a task being migrated: 188 */ 189 TRACE_EVENT(sched_migrate_task, 190 191 TP_PROTO(struct task_struct *p, int dest_cpu), 192 193 TP_ARGS(p, dest_cpu), 194 195 TP_STRUCT__entry( 196 __array( char, comm, TASK_COMM_LEN ) 197 __field( pid_t, pid ) 198 __field( int, prio ) 199 __field( int, orig_cpu ) 200 __field( int, dest_cpu ) 201 ), 202 203 TP_fast_assign( 204 memcpy(__entry->comm, p->comm, TASK_COMM_LEN); 205 __entry->pid = p->pid; 206 __entry->prio = p->prio; /* XXX SCHED_DEADLINE */ 207 __entry->orig_cpu = task_cpu(p); 208 __entry->dest_cpu = dest_cpu; 209 ), 210 211 TP_printk("comm=%s pid=%d prio=%d orig_cpu=%d dest_cpu=%d", 212 __entry->comm, __entry->pid, __entry->prio, 213 __entry->orig_cpu, __entry->dest_cpu) 214 ); 215 216 DECLARE_EVENT_CLASS(sched_process_template, 217 218 TP_PROTO(struct task_struct *p), 219 220 TP_ARGS(p), 221 222 TP_STRUCT__entry( 223 __array( char, comm, TASK_COMM_LEN ) 224 __field( pid_t, pid ) 225 __field( int, prio ) 226 ), 227 228 TP_fast_assign( 229 memcpy(__entry->comm, p->comm, TASK_COMM_LEN); 230 __entry->pid = p->pid; 231 __entry->prio = p->prio; /* XXX SCHED_DEADLINE */ 232 ), 233 234 TP_printk("comm=%s pid=%d prio=%d", 235 __entry->comm, __entry->pid, __entry->prio) 236 ); 237 238 /* 239 * Tracepoint for freeing a task: 240 */ 241 DEFINE_EVENT(sched_process_template, sched_process_free, 242 TP_PROTO(struct task_struct *p), 243 TP_ARGS(p)); 244 245 246 /* 247 * Tracepoint for a task exiting: 248 */ 249 DEFINE_EVENT(sched_process_template, sched_process_exit, 250 TP_PROTO(struct task_struct *p), 251 TP_ARGS(p)); 252 253 /* 254 * Tracepoint for waiting on task to unschedule: 255 */ 256 DEFINE_EVENT(sched_process_template, sched_wait_task, 257 TP_PROTO(struct task_struct *p), 258 TP_ARGS(p)); 259 260 /* 261 * Tracepoint for a waiting task: 262 */ 263 TRACE_EVENT(sched_process_wait, 264 265 TP_PROTO(struct pid *pid), 266 267 TP_ARGS(pid), 268 269 TP_STRUCT__entry( 270 __array( char, comm, TASK_COMM_LEN ) 271 __field( pid_t, pid ) 272 __field( int, prio ) 273 ), 274 275 TP_fast_assign( 276 memcpy(__entry->comm, current->comm, TASK_COMM_LEN); 277 __entry->pid = pid_nr(pid); 278 __entry->prio = current->prio; /* XXX SCHED_DEADLINE */ 279 ), 280 281 TP_printk("comm=%s pid=%d prio=%d", 282 __entry->comm, __entry->pid, __entry->prio) 283 ); 284 285 /* 286 * Tracepoint for do_fork: 287 */ 288 TRACE_EVENT(sched_process_fork, 289 290 TP_PROTO(struct task_struct *parent, struct task_struct *child), 291 292 TP_ARGS(parent, child), 293 294 TP_STRUCT__entry( 295 __array( char, parent_comm, TASK_COMM_LEN ) 296 __field( pid_t, parent_pid ) 297 __array( char, child_comm, TASK_COMM_LEN ) 298 __field( pid_t, child_pid ) 299 ), 300 301 TP_fast_assign( 302 memcpy(__entry->parent_comm, parent->comm, TASK_COMM_LEN); 303 __entry->parent_pid = parent->pid; 304 memcpy(__entry->child_comm, child->comm, TASK_COMM_LEN); 305 __entry->child_pid = child->pid; 306 ), 307 308 TP_printk("comm=%s pid=%d child_comm=%s child_pid=%d", 309 __entry->parent_comm, __entry->parent_pid, 310 __entry->child_comm, __entry->child_pid) 311 ); 312 313 /* 314 * Tracepoint for exec: 315 */ 316 TRACE_EVENT(sched_process_exec, 317 318 TP_PROTO(struct task_struct *p, pid_t old_pid, 319 struct linux_binprm *bprm), 320 321 TP_ARGS(p, old_pid, bprm), 322 323 TP_STRUCT__entry( 324 __string( filename, bprm->filename ) 325 __field( pid_t, pid ) 326 __field( pid_t, old_pid ) 327 ), 328 329 TP_fast_assign( 330 __assign_str(filename, bprm->filename); 331 __entry->pid = p->pid; 332 __entry->old_pid = old_pid; 333 ), 334 335 TP_printk("filename=%s pid=%d old_pid=%d", __get_str(filename), 336 __entry->pid, __entry->old_pid) 337 ); 338 339 /* 340 * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE 341 * adding sched_stat support to SCHED_FIFO/RR would be welcome. 342 */ 343 DECLARE_EVENT_CLASS(sched_stat_template, 344 345 TP_PROTO(struct task_struct *tsk, u64 delay), 346 347 TP_ARGS(__perf_task(tsk), __perf_count(delay)), 348 349 TP_STRUCT__entry( 350 __array( char, comm, TASK_COMM_LEN ) 351 __field( pid_t, pid ) 352 __field( u64, delay ) 353 ), 354 355 TP_fast_assign( 356 memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); 357 __entry->pid = tsk->pid; 358 __entry->delay = delay; 359 ), 360 361 TP_printk("comm=%s pid=%d delay=%Lu [ns]", 362 __entry->comm, __entry->pid, 363 (unsigned long long)__entry->delay) 364 ); 365 366 367 /* 368 * Tracepoint for accounting wait time (time the task is runnable 369 * but not actually running due to scheduler contention). 370 */ 371 DEFINE_EVENT(sched_stat_template, sched_stat_wait, 372 TP_PROTO(struct task_struct *tsk, u64 delay), 373 TP_ARGS(tsk, delay)); 374 375 /* 376 * Tracepoint for accounting sleep time (time the task is not runnable, 377 * including iowait, see below). 378 */ 379 DEFINE_EVENT(sched_stat_template, sched_stat_sleep, 380 TP_PROTO(struct task_struct *tsk, u64 delay), 381 TP_ARGS(tsk, delay)); 382 383 /* 384 * Tracepoint for accounting iowait time (time the task is not runnable 385 * due to waiting on IO to complete). 386 */ 387 DEFINE_EVENT(sched_stat_template, sched_stat_iowait, 388 TP_PROTO(struct task_struct *tsk, u64 delay), 389 TP_ARGS(tsk, delay)); 390 391 /* 392 * Tracepoint for accounting blocked time (time the task is in uninterruptible). 393 */ 394 DEFINE_EVENT(sched_stat_template, sched_stat_blocked, 395 TP_PROTO(struct task_struct *tsk, u64 delay), 396 TP_ARGS(tsk, delay)); 397 398 /* 399 * Tracepoint for accounting runtime (time the task is executing 400 * on a CPU). 401 */ 402 DECLARE_EVENT_CLASS(sched_stat_runtime, 403 404 TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime), 405 406 TP_ARGS(tsk, __perf_count(runtime), vruntime), 407 408 TP_STRUCT__entry( 409 __array( char, comm, TASK_COMM_LEN ) 410 __field( pid_t, pid ) 411 __field( u64, runtime ) 412 __field( u64, vruntime ) 413 ), 414 415 TP_fast_assign( 416 memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); 417 __entry->pid = tsk->pid; 418 __entry->runtime = runtime; 419 __entry->vruntime = vruntime; 420 ), 421 422 TP_printk("comm=%s pid=%d runtime=%Lu [ns] vruntime=%Lu [ns]", 423 __entry->comm, __entry->pid, 424 (unsigned long long)__entry->runtime, 425 (unsigned long long)__entry->vruntime) 426 ); 427 428 DEFINE_EVENT(sched_stat_runtime, sched_stat_runtime, 429 TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime), 430 TP_ARGS(tsk, runtime, vruntime)); 431 432 /* 433 * Tracepoint for showing priority inheritance modifying a tasks 434 * priority. 435 */ 436 TRACE_EVENT(sched_pi_setprio, 437 438 TP_PROTO(struct task_struct *tsk, struct task_struct *pi_task), 439 440 TP_ARGS(tsk, pi_task), 441 442 TP_STRUCT__entry( 443 __array( char, comm, TASK_COMM_LEN ) 444 __field( pid_t, pid ) 445 __field( int, oldprio ) 446 __field( int, newprio ) 447 ), 448 449 TP_fast_assign( 450 memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); 451 __entry->pid = tsk->pid; 452 __entry->oldprio = tsk->prio; 453 __entry->newprio = pi_task ? 454 min(tsk->normal_prio, pi_task->prio) : 455 tsk->normal_prio; 456 /* XXX SCHED_DEADLINE bits missing */ 457 ), 458 459 TP_printk("comm=%s pid=%d oldprio=%d newprio=%d", 460 __entry->comm, __entry->pid, 461 __entry->oldprio, __entry->newprio) 462 ); 463 464 #ifdef CONFIG_DETECT_HUNG_TASK 465 TRACE_EVENT(sched_process_hang, 466 TP_PROTO(struct task_struct *tsk), 467 TP_ARGS(tsk), 468 469 TP_STRUCT__entry( 470 __array( char, comm, TASK_COMM_LEN ) 471 __field( pid_t, pid ) 472 ), 473 474 TP_fast_assign( 475 memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); 476 __entry->pid = tsk->pid; 477 ), 478 479 TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid) 480 ); 481 #endif /* CONFIG_DETECT_HUNG_TASK */ 482 483 DECLARE_EVENT_CLASS(sched_move_task_template, 484 485 TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu), 486 487 TP_ARGS(tsk, src_cpu, dst_cpu), 488 489 TP_STRUCT__entry( 490 __field( pid_t, pid ) 491 __field( pid_t, tgid ) 492 __field( pid_t, ngid ) 493 __field( int, src_cpu ) 494 __field( int, src_nid ) 495 __field( int, dst_cpu ) 496 __field( int, dst_nid ) 497 ), 498 499 TP_fast_assign( 500 __entry->pid = task_pid_nr(tsk); 501 __entry->tgid = task_tgid_nr(tsk); 502 __entry->ngid = task_numa_group_id(tsk); 503 __entry->src_cpu = src_cpu; 504 __entry->src_nid = cpu_to_node(src_cpu); 505 __entry->dst_cpu = dst_cpu; 506 __entry->dst_nid = cpu_to_node(dst_cpu); 507 ), 508 509 TP_printk("pid=%d tgid=%d ngid=%d src_cpu=%d src_nid=%d dst_cpu=%d dst_nid=%d", 510 __entry->pid, __entry->tgid, __entry->ngid, 511 __entry->src_cpu, __entry->src_nid, 512 __entry->dst_cpu, __entry->dst_nid) 513 ); 514 515 /* 516 * Tracks migration of tasks from one runqueue to another. Can be used to 517 * detect if automatic NUMA balancing is bouncing between nodes 518 */ 519 DEFINE_EVENT(sched_move_task_template, sched_move_numa, 520 TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu), 521 522 TP_ARGS(tsk, src_cpu, dst_cpu) 523 ); 524 525 DEFINE_EVENT(sched_move_task_template, sched_stick_numa, 526 TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu), 527 528 TP_ARGS(tsk, src_cpu, dst_cpu) 529 ); 530 531 TRACE_EVENT(sched_swap_numa, 532 533 TP_PROTO(struct task_struct *src_tsk, int src_cpu, 534 struct task_struct *dst_tsk, int dst_cpu), 535 536 TP_ARGS(src_tsk, src_cpu, dst_tsk, dst_cpu), 537 538 TP_STRUCT__entry( 539 __field( pid_t, src_pid ) 540 __field( pid_t, src_tgid ) 541 __field( pid_t, src_ngid ) 542 __field( int, src_cpu ) 543 __field( int, src_nid ) 544 __field( pid_t, dst_pid ) 545 __field( pid_t, dst_tgid ) 546 __field( pid_t, dst_ngid ) 547 __field( int, dst_cpu ) 548 __field( int, dst_nid ) 549 ), 550 551 TP_fast_assign( 552 __entry->src_pid = task_pid_nr(src_tsk); 553 __entry->src_tgid = task_tgid_nr(src_tsk); 554 __entry->src_ngid = task_numa_group_id(src_tsk); 555 __entry->src_cpu = src_cpu; 556 __entry->src_nid = cpu_to_node(src_cpu); 557 __entry->dst_pid = task_pid_nr(dst_tsk); 558 __entry->dst_tgid = task_tgid_nr(dst_tsk); 559 __entry->dst_ngid = task_numa_group_id(dst_tsk); 560 __entry->dst_cpu = dst_cpu; 561 __entry->dst_nid = cpu_to_node(dst_cpu); 562 ), 563 564 TP_printk("src_pid=%d src_tgid=%d src_ngid=%d src_cpu=%d src_nid=%d dst_pid=%d dst_tgid=%d dst_ngid=%d dst_cpu=%d dst_nid=%d", 565 __entry->src_pid, __entry->src_tgid, __entry->src_ngid, 566 __entry->src_cpu, __entry->src_nid, 567 __entry->dst_pid, __entry->dst_tgid, __entry->dst_ngid, 568 __entry->dst_cpu, __entry->dst_nid) 569 ); 570 571 /* 572 * Tracepoint for waking a polling cpu without an IPI. 573 */ 574 TRACE_EVENT(sched_wake_idle_without_ipi, 575 576 TP_PROTO(int cpu), 577 578 TP_ARGS(cpu), 579 580 TP_STRUCT__entry( 581 __field( int, cpu ) 582 ), 583 584 TP_fast_assign( 585 __entry->cpu = cpu; 586 ), 587 588 TP_printk("cpu=%d", __entry->cpu) 589 ); 590 #endif /* _TRACE_SCHED_H */ 591 592 /* This part must be outside protection */ 593 #include <trace/define_trace.h> 594