1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #undef TRACE_SYSTEM 3 #define TRACE_SYSTEM sched 4 5 #if !defined(_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ) 6 #define _TRACE_SCHED_H 7 8 #include <linux/sched/numa_balancing.h> 9 #include <linux/tracepoint.h> 10 #include <linux/binfmts.h> 11 12 /* 13 * Tracepoint for calling kthread_stop, performed to end a kthread: 14 */ 15 TRACE_EVENT(sched_kthread_stop, 16 17 TP_PROTO(struct task_struct *t), 18 19 TP_ARGS(t), 20 21 TP_STRUCT__entry( 22 __array( char, comm, TASK_COMM_LEN ) 23 __field( pid_t, pid ) 24 ), 25 26 TP_fast_assign( 27 memcpy(__entry->comm, t->comm, TASK_COMM_LEN); 28 __entry->pid = t->pid; 29 ), 30 31 TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid) 32 ); 33 34 /* 35 * Tracepoint for the return value of the kthread stopping: 36 */ 37 TRACE_EVENT(sched_kthread_stop_ret, 38 39 TP_PROTO(int ret), 40 41 TP_ARGS(ret), 42 43 TP_STRUCT__entry( 44 __field( int, ret ) 45 ), 46 47 TP_fast_assign( 48 __entry->ret = ret; 49 ), 50 51 TP_printk("ret=%d", __entry->ret) 52 ); 53 54 /* 55 * Tracepoint for waking up a task: 56 */ 57 DECLARE_EVENT_CLASS(sched_wakeup_template, 58 59 TP_PROTO(struct task_struct *p), 60 61 TP_ARGS(__perf_task(p)), 62 63 TP_STRUCT__entry( 64 __array( char, comm, TASK_COMM_LEN ) 65 __field( pid_t, pid ) 66 __field( int, prio ) 67 __field( int, success ) 68 __field( int, target_cpu ) 69 ), 70 71 TP_fast_assign( 72 memcpy(__entry->comm, p->comm, TASK_COMM_LEN); 73 __entry->pid = p->pid; 74 __entry->prio = p->prio; /* XXX SCHED_DEADLINE */ 75 __entry->success = 1; /* rudiment, kill when possible */ 76 __entry->target_cpu = task_cpu(p); 77 ), 78 79 TP_printk("comm=%s pid=%d prio=%d target_cpu=%03d", 80 __entry->comm, __entry->pid, __entry->prio, 81 __entry->target_cpu) 82 ); 83 84 /* 85 * Tracepoint called when waking a task; this tracepoint is guaranteed to be 86 * called from the waking context. 87 */ 88 DEFINE_EVENT(sched_wakeup_template, sched_waking, 89 TP_PROTO(struct task_struct *p), 90 TP_ARGS(p)); 91 92 /* 93 * Tracepoint called when the task is actually woken; p->state == TASK_RUNNNG. 94 * It it not always called from the waking context. 95 */ 96 DEFINE_EVENT(sched_wakeup_template, sched_wakeup, 97 TP_PROTO(struct task_struct *p), 98 TP_ARGS(p)); 99 100 /* 101 * Tracepoint for waking up a new task: 102 */ 103 DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new, 104 TP_PROTO(struct task_struct *p), 105 TP_ARGS(p)); 106 107 #ifdef CREATE_TRACE_POINTS 108 static inline long __trace_sched_switch_state(bool preempt, struct task_struct *p) 109 { 110 #ifdef CONFIG_SCHED_DEBUG 111 BUG_ON(p != current); 112 #endif /* CONFIG_SCHED_DEBUG */ 113 114 /* 115 * Preemption ignores task state, therefore preempted tasks are always 116 * RUNNING (we will not have dequeued if state != RUNNING). 117 */ 118 if (preempt) 119 return TASK_REPORT_MAX; 120 121 return 1 << task_state_index(p); 122 } 123 #endif /* CREATE_TRACE_POINTS */ 124 125 /* 126 * Tracepoint for task switches, performed by the scheduler: 127 */ 128 TRACE_EVENT(sched_switch, 129 130 TP_PROTO(bool preempt, 131 struct task_struct *prev, 132 struct task_struct *next), 133 134 TP_ARGS(preempt, prev, next), 135 136 TP_STRUCT__entry( 137 __array( char, prev_comm, TASK_COMM_LEN ) 138 __field( pid_t, prev_pid ) 139 __field( int, prev_prio ) 140 __field( long, prev_state ) 141 __array( char, next_comm, TASK_COMM_LEN ) 142 __field( pid_t, next_pid ) 143 __field( int, next_prio ) 144 ), 145 146 TP_fast_assign( 147 memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); 148 __entry->prev_pid = prev->pid; 149 __entry->prev_prio = prev->prio; 150 __entry->prev_state = __trace_sched_switch_state(preempt, prev); 151 memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); 152 __entry->next_pid = next->pid; 153 __entry->next_prio = next->prio; 154 /* XXX SCHED_DEADLINE */ 155 ), 156 157 TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==> next_comm=%s next_pid=%d next_prio=%d", 158 __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, 159 160 (__entry->prev_state & (TASK_REPORT_MAX - 1)) ? 161 __print_flags(__entry->prev_state & (TASK_REPORT_MAX - 1), "|", 162 { TASK_INTERRUPTIBLE, "S" }, 163 { TASK_UNINTERRUPTIBLE, "D" }, 164 { __TASK_STOPPED, "T" }, 165 { __TASK_TRACED, "t" }, 166 { EXIT_DEAD, "X" }, 167 { EXIT_ZOMBIE, "Z" }, 168 { TASK_PARKED, "P" }, 169 { TASK_DEAD, "I" }) : 170 "R", 171 172 __entry->prev_state & TASK_REPORT_MAX ? "+" : "", 173 __entry->next_comm, __entry->next_pid, __entry->next_prio) 174 ); 175 176 /* 177 * Tracepoint for a task being migrated: 178 */ 179 TRACE_EVENT(sched_migrate_task, 180 181 TP_PROTO(struct task_struct *p, int dest_cpu), 182 183 TP_ARGS(p, dest_cpu), 184 185 TP_STRUCT__entry( 186 __array( char, comm, TASK_COMM_LEN ) 187 __field( pid_t, pid ) 188 __field( int, prio ) 189 __field( int, orig_cpu ) 190 __field( int, dest_cpu ) 191 ), 192 193 TP_fast_assign( 194 memcpy(__entry->comm, p->comm, TASK_COMM_LEN); 195 __entry->pid = p->pid; 196 __entry->prio = p->prio; /* XXX SCHED_DEADLINE */ 197 __entry->orig_cpu = task_cpu(p); 198 __entry->dest_cpu = dest_cpu; 199 ), 200 201 TP_printk("comm=%s pid=%d prio=%d orig_cpu=%d dest_cpu=%d", 202 __entry->comm, __entry->pid, __entry->prio, 203 __entry->orig_cpu, __entry->dest_cpu) 204 ); 205 206 DECLARE_EVENT_CLASS(sched_process_template, 207 208 TP_PROTO(struct task_struct *p), 209 210 TP_ARGS(p), 211 212 TP_STRUCT__entry( 213 __array( char, comm, TASK_COMM_LEN ) 214 __field( pid_t, pid ) 215 __field( int, prio ) 216 ), 217 218 TP_fast_assign( 219 memcpy(__entry->comm, p->comm, TASK_COMM_LEN); 220 __entry->pid = p->pid; 221 __entry->prio = p->prio; /* XXX SCHED_DEADLINE */ 222 ), 223 224 TP_printk("comm=%s pid=%d prio=%d", 225 __entry->comm, __entry->pid, __entry->prio) 226 ); 227 228 /* 229 * Tracepoint for freeing a task: 230 */ 231 DEFINE_EVENT(sched_process_template, sched_process_free, 232 TP_PROTO(struct task_struct *p), 233 TP_ARGS(p)); 234 235 236 /* 237 * Tracepoint for a task exiting: 238 */ 239 DEFINE_EVENT(sched_process_template, sched_process_exit, 240 TP_PROTO(struct task_struct *p), 241 TP_ARGS(p)); 242 243 /* 244 * Tracepoint for waiting on task to unschedule: 245 */ 246 DEFINE_EVENT(sched_process_template, sched_wait_task, 247 TP_PROTO(struct task_struct *p), 248 TP_ARGS(p)); 249 250 /* 251 * Tracepoint for a waiting task: 252 */ 253 TRACE_EVENT(sched_process_wait, 254 255 TP_PROTO(struct pid *pid), 256 257 TP_ARGS(pid), 258 259 TP_STRUCT__entry( 260 __array( char, comm, TASK_COMM_LEN ) 261 __field( pid_t, pid ) 262 __field( int, prio ) 263 ), 264 265 TP_fast_assign( 266 memcpy(__entry->comm, current->comm, TASK_COMM_LEN); 267 __entry->pid = pid_nr(pid); 268 __entry->prio = current->prio; /* XXX SCHED_DEADLINE */ 269 ), 270 271 TP_printk("comm=%s pid=%d prio=%d", 272 __entry->comm, __entry->pid, __entry->prio) 273 ); 274 275 /* 276 * Tracepoint for do_fork: 277 */ 278 TRACE_EVENT(sched_process_fork, 279 280 TP_PROTO(struct task_struct *parent, struct task_struct *child), 281 282 TP_ARGS(parent, child), 283 284 TP_STRUCT__entry( 285 __array( char, parent_comm, TASK_COMM_LEN ) 286 __field( pid_t, parent_pid ) 287 __array( char, child_comm, TASK_COMM_LEN ) 288 __field( pid_t, child_pid ) 289 ), 290 291 TP_fast_assign( 292 memcpy(__entry->parent_comm, parent->comm, TASK_COMM_LEN); 293 __entry->parent_pid = parent->pid; 294 memcpy(__entry->child_comm, child->comm, TASK_COMM_LEN); 295 __entry->child_pid = child->pid; 296 ), 297 298 TP_printk("comm=%s pid=%d child_comm=%s child_pid=%d", 299 __entry->parent_comm, __entry->parent_pid, 300 __entry->child_comm, __entry->child_pid) 301 ); 302 303 /* 304 * Tracepoint for exec: 305 */ 306 TRACE_EVENT(sched_process_exec, 307 308 TP_PROTO(struct task_struct *p, pid_t old_pid, 309 struct linux_binprm *bprm), 310 311 TP_ARGS(p, old_pid, bprm), 312 313 TP_STRUCT__entry( 314 __string( filename, bprm->filename ) 315 __field( pid_t, pid ) 316 __field( pid_t, old_pid ) 317 ), 318 319 TP_fast_assign( 320 __assign_str(filename, bprm->filename); 321 __entry->pid = p->pid; 322 __entry->old_pid = old_pid; 323 ), 324 325 TP_printk("filename=%s pid=%d old_pid=%d", __get_str(filename), 326 __entry->pid, __entry->old_pid) 327 ); 328 329 /* 330 * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE 331 * adding sched_stat support to SCHED_FIFO/RR would be welcome. 332 */ 333 DECLARE_EVENT_CLASS(sched_stat_template, 334 335 TP_PROTO(struct task_struct *tsk, u64 delay), 336 337 TP_ARGS(__perf_task(tsk), __perf_count(delay)), 338 339 TP_STRUCT__entry( 340 __array( char, comm, TASK_COMM_LEN ) 341 __field( pid_t, pid ) 342 __field( u64, delay ) 343 ), 344 345 TP_fast_assign( 346 memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); 347 __entry->pid = tsk->pid; 348 __entry->delay = delay; 349 ), 350 351 TP_printk("comm=%s pid=%d delay=%Lu [ns]", 352 __entry->comm, __entry->pid, 353 (unsigned long long)__entry->delay) 354 ); 355 356 357 /* 358 * Tracepoint for accounting wait time (time the task is runnable 359 * but not actually running due to scheduler contention). 360 */ 361 DEFINE_EVENT(sched_stat_template, sched_stat_wait, 362 TP_PROTO(struct task_struct *tsk, u64 delay), 363 TP_ARGS(tsk, delay)); 364 365 /* 366 * Tracepoint for accounting sleep time (time the task is not runnable, 367 * including iowait, see below). 368 */ 369 DEFINE_EVENT(sched_stat_template, sched_stat_sleep, 370 TP_PROTO(struct task_struct *tsk, u64 delay), 371 TP_ARGS(tsk, delay)); 372 373 /* 374 * Tracepoint for accounting iowait time (time the task is not runnable 375 * due to waiting on IO to complete). 376 */ 377 DEFINE_EVENT(sched_stat_template, sched_stat_iowait, 378 TP_PROTO(struct task_struct *tsk, u64 delay), 379 TP_ARGS(tsk, delay)); 380 381 /* 382 * Tracepoint for accounting blocked time (time the task is in uninterruptible). 383 */ 384 DEFINE_EVENT(sched_stat_template, sched_stat_blocked, 385 TP_PROTO(struct task_struct *tsk, u64 delay), 386 TP_ARGS(tsk, delay)); 387 388 /* 389 * Tracepoint for accounting runtime (time the task is executing 390 * on a CPU). 391 */ 392 DECLARE_EVENT_CLASS(sched_stat_runtime, 393 394 TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime), 395 396 TP_ARGS(tsk, __perf_count(runtime), vruntime), 397 398 TP_STRUCT__entry( 399 __array( char, comm, TASK_COMM_LEN ) 400 __field( pid_t, pid ) 401 __field( u64, runtime ) 402 __field( u64, vruntime ) 403 ), 404 405 TP_fast_assign( 406 memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); 407 __entry->pid = tsk->pid; 408 __entry->runtime = runtime; 409 __entry->vruntime = vruntime; 410 ), 411 412 TP_printk("comm=%s pid=%d runtime=%Lu [ns] vruntime=%Lu [ns]", 413 __entry->comm, __entry->pid, 414 (unsigned long long)__entry->runtime, 415 (unsigned long long)__entry->vruntime) 416 ); 417 418 DEFINE_EVENT(sched_stat_runtime, sched_stat_runtime, 419 TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime), 420 TP_ARGS(tsk, runtime, vruntime)); 421 422 /* 423 * Tracepoint for showing priority inheritance modifying a tasks 424 * priority. 425 */ 426 TRACE_EVENT(sched_pi_setprio, 427 428 TP_PROTO(struct task_struct *tsk, struct task_struct *pi_task), 429 430 TP_ARGS(tsk, pi_task), 431 432 TP_STRUCT__entry( 433 __array( char, comm, TASK_COMM_LEN ) 434 __field( pid_t, pid ) 435 __field( int, oldprio ) 436 __field( int, newprio ) 437 ), 438 439 TP_fast_assign( 440 memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); 441 __entry->pid = tsk->pid; 442 __entry->oldprio = tsk->prio; 443 __entry->newprio = pi_task ? 444 min(tsk->normal_prio, pi_task->prio) : 445 tsk->normal_prio; 446 /* XXX SCHED_DEADLINE bits missing */ 447 ), 448 449 TP_printk("comm=%s pid=%d oldprio=%d newprio=%d", 450 __entry->comm, __entry->pid, 451 __entry->oldprio, __entry->newprio) 452 ); 453 454 #ifdef CONFIG_DETECT_HUNG_TASK 455 TRACE_EVENT(sched_process_hang, 456 TP_PROTO(struct task_struct *tsk), 457 TP_ARGS(tsk), 458 459 TP_STRUCT__entry( 460 __array( char, comm, TASK_COMM_LEN ) 461 __field( pid_t, pid ) 462 ), 463 464 TP_fast_assign( 465 memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); 466 __entry->pid = tsk->pid; 467 ), 468 469 TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid) 470 ); 471 #endif /* CONFIG_DETECT_HUNG_TASK */ 472 473 DECLARE_EVENT_CLASS(sched_move_task_template, 474 475 TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu), 476 477 TP_ARGS(tsk, src_cpu, dst_cpu), 478 479 TP_STRUCT__entry( 480 __field( pid_t, pid ) 481 __field( pid_t, tgid ) 482 __field( pid_t, ngid ) 483 __field( int, src_cpu ) 484 __field( int, src_nid ) 485 __field( int, dst_cpu ) 486 __field( int, dst_nid ) 487 ), 488 489 TP_fast_assign( 490 __entry->pid = task_pid_nr(tsk); 491 __entry->tgid = task_tgid_nr(tsk); 492 __entry->ngid = task_numa_group_id(tsk); 493 __entry->src_cpu = src_cpu; 494 __entry->src_nid = cpu_to_node(src_cpu); 495 __entry->dst_cpu = dst_cpu; 496 __entry->dst_nid = cpu_to_node(dst_cpu); 497 ), 498 499 TP_printk("pid=%d tgid=%d ngid=%d src_cpu=%d src_nid=%d dst_cpu=%d dst_nid=%d", 500 __entry->pid, __entry->tgid, __entry->ngid, 501 __entry->src_cpu, __entry->src_nid, 502 __entry->dst_cpu, __entry->dst_nid) 503 ); 504 505 /* 506 * Tracks migration of tasks from one runqueue to another. Can be used to 507 * detect if automatic NUMA balancing is bouncing between nodes 508 */ 509 DEFINE_EVENT(sched_move_task_template, sched_move_numa, 510 TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu), 511 512 TP_ARGS(tsk, src_cpu, dst_cpu) 513 ); 514 515 DEFINE_EVENT(sched_move_task_template, sched_stick_numa, 516 TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu), 517 518 TP_ARGS(tsk, src_cpu, dst_cpu) 519 ); 520 521 TRACE_EVENT(sched_swap_numa, 522 523 TP_PROTO(struct task_struct *src_tsk, int src_cpu, 524 struct task_struct *dst_tsk, int dst_cpu), 525 526 TP_ARGS(src_tsk, src_cpu, dst_tsk, dst_cpu), 527 528 TP_STRUCT__entry( 529 __field( pid_t, src_pid ) 530 __field( pid_t, src_tgid ) 531 __field( pid_t, src_ngid ) 532 __field( int, src_cpu ) 533 __field( int, src_nid ) 534 __field( pid_t, dst_pid ) 535 __field( pid_t, dst_tgid ) 536 __field( pid_t, dst_ngid ) 537 __field( int, dst_cpu ) 538 __field( int, dst_nid ) 539 ), 540 541 TP_fast_assign( 542 __entry->src_pid = task_pid_nr(src_tsk); 543 __entry->src_tgid = task_tgid_nr(src_tsk); 544 __entry->src_ngid = task_numa_group_id(src_tsk); 545 __entry->src_cpu = src_cpu; 546 __entry->src_nid = cpu_to_node(src_cpu); 547 __entry->dst_pid = task_pid_nr(dst_tsk); 548 __entry->dst_tgid = task_tgid_nr(dst_tsk); 549 __entry->dst_ngid = task_numa_group_id(dst_tsk); 550 __entry->dst_cpu = dst_cpu; 551 __entry->dst_nid = cpu_to_node(dst_cpu); 552 ), 553 554 TP_printk("src_pid=%d src_tgid=%d src_ngid=%d src_cpu=%d src_nid=%d dst_pid=%d dst_tgid=%d dst_ngid=%d dst_cpu=%d dst_nid=%d", 555 __entry->src_pid, __entry->src_tgid, __entry->src_ngid, 556 __entry->src_cpu, __entry->src_nid, 557 __entry->dst_pid, __entry->dst_tgid, __entry->dst_ngid, 558 __entry->dst_cpu, __entry->dst_nid) 559 ); 560 561 /* 562 * Tracepoint for waking a polling cpu without an IPI. 563 */ 564 TRACE_EVENT(sched_wake_idle_without_ipi, 565 566 TP_PROTO(int cpu), 567 568 TP_ARGS(cpu), 569 570 TP_STRUCT__entry( 571 __field( int, cpu ) 572 ), 573 574 TP_fast_assign( 575 __entry->cpu = cpu; 576 ), 577 578 TP_printk("cpu=%d", __entry->cpu) 579 ); 580 #endif /* _TRACE_SCHED_H */ 581 582 /* This part must be outside protection */ 583 #include <trace/define_trace.h> 584