1 /* 2 * trace task wakeup timings 3 * 4 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com> 5 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com> 6 * 7 * Based on code from the latency_tracer, that is: 8 * 9 * Copyright (C) 2004-2006 Ingo Molnar 10 * Copyright (C) 2004 William Lee Irwin III 11 */ 12 #include <linux/module.h> 13 #include <linux/fs.h> 14 #include <linux/debugfs.h> 15 #include <linux/kallsyms.h> 16 #include <linux/uaccess.h> 17 #include <linux/ftrace.h> 18 #include <linux/marker.h> 19 20 #include "trace.h" 21 22 static struct trace_array *wakeup_trace; 23 static int __read_mostly tracer_enabled; 24 25 static struct task_struct *wakeup_task; 26 static int wakeup_cpu; 27 static unsigned wakeup_prio = -1; 28 29 static raw_spinlock_t wakeup_lock = 30 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 31 32 static void __wakeup_reset(struct trace_array *tr); 33 34 #ifdef CONFIG_FTRACE 35 /* 36 * irqsoff uses its own tracer function to keep the overhead down: 37 */ 38 static void 39 wakeup_tracer_call(unsigned long ip, unsigned long parent_ip) 40 { 41 struct trace_array *tr = wakeup_trace; 42 struct trace_array_cpu *data; 43 unsigned long flags; 44 long disabled; 45 int resched; 46 int cpu; 47 48 if (likely(!wakeup_task)) 49 return; 50 51 resched = need_resched(); 52 preempt_disable_notrace(); 53 54 cpu = raw_smp_processor_id(); 55 data = tr->data[cpu]; 56 disabled = atomic_inc_return(&data->disabled); 57 if (unlikely(disabled != 1)) 58 goto out; 59 60 local_irq_save(flags); 61 __raw_spin_lock(&wakeup_lock); 62 63 if (unlikely(!wakeup_task)) 64 goto unlock; 65 66 /* 67 * The task can't disappear because it needs to 68 * wake up first, and we have the wakeup_lock. 69 */ 70 if (task_cpu(wakeup_task) != cpu) 71 goto unlock; 72 73 trace_function(tr, data, ip, parent_ip, flags); 74 75 unlock: 76 __raw_spin_unlock(&wakeup_lock); 77 local_irq_restore(flags); 78 79 out: 80 atomic_dec(&data->disabled); 81 82 /* 83 * To prevent recursion from the scheduler, if the 84 * resched flag was set before we entered, then 85 * don't reschedule. 86 */ 87 if (resched) 88 preempt_enable_no_resched_notrace(); 89 else 90 preempt_enable_notrace(); 91 } 92 93 static struct ftrace_ops trace_ops __read_mostly = 94 { 95 .func = wakeup_tracer_call, 96 }; 97 #endif /* CONFIG_FTRACE */ 98 99 /* 100 * Should this new latency be reported/recorded? 101 */ 102 static int report_latency(cycle_t delta) 103 { 104 if (tracing_thresh) { 105 if (delta < tracing_thresh) 106 return 0; 107 } else { 108 if (delta <= tracing_max_latency) 109 return 0; 110 } 111 return 1; 112 } 113 114 static void notrace 115 wakeup_sched_switch(void *private, void *rq, struct task_struct *prev, 116 struct task_struct *next) 117 { 118 unsigned long latency = 0, t0 = 0, t1 = 0; 119 struct trace_array **ptr = private; 120 struct trace_array *tr = *ptr; 121 struct trace_array_cpu *data; 122 cycle_t T0, T1, delta; 123 unsigned long flags; 124 long disabled; 125 int cpu; 126 127 if (unlikely(!tracer_enabled)) 128 return; 129 130 /* 131 * When we start a new trace, we set wakeup_task to NULL 132 * and then set tracer_enabled = 1. We want to make sure 133 * that another CPU does not see the tracer_enabled = 1 134 * and the wakeup_task with an older task, that might 135 * actually be the same as next. 136 */ 137 smp_rmb(); 138 139 if (next != wakeup_task) 140 return; 141 142 /* The task we are waiting for is waking up */ 143 data = tr->data[wakeup_cpu]; 144 145 /* disable local data, not wakeup_cpu data */ 146 cpu = raw_smp_processor_id(); 147 disabled = atomic_inc_return(&tr->data[cpu]->disabled); 148 if (likely(disabled != 1)) 149 goto out; 150 151 local_irq_save(flags); 152 __raw_spin_lock(&wakeup_lock); 153 154 /* We could race with grabbing wakeup_lock */ 155 if (unlikely(!tracer_enabled || next != wakeup_task)) 156 goto out_unlock; 157 158 trace_function(tr, data, CALLER_ADDR1, CALLER_ADDR2, flags); 159 160 /* 161 * usecs conversion is slow so we try to delay the conversion 162 * as long as possible: 163 */ 164 T0 = data->preempt_timestamp; 165 T1 = ftrace_now(cpu); 166 delta = T1-T0; 167 168 if (!report_latency(delta)) 169 goto out_unlock; 170 171 latency = nsecs_to_usecs(delta); 172 173 tracing_max_latency = delta; 174 t0 = nsecs_to_usecs(T0); 175 t1 = nsecs_to_usecs(T1); 176 177 update_max_tr(tr, wakeup_task, wakeup_cpu); 178 179 out_unlock: 180 __wakeup_reset(tr); 181 __raw_spin_unlock(&wakeup_lock); 182 local_irq_restore(flags); 183 out: 184 atomic_dec(&tr->data[cpu]->disabled); 185 } 186 187 static notrace void 188 sched_switch_callback(void *probe_data, void *call_data, 189 const char *format, va_list *args) 190 { 191 struct task_struct *prev; 192 struct task_struct *next; 193 struct rq *__rq; 194 195 /* skip prev_pid %d next_pid %d prev_state %ld */ 196 (void)va_arg(*args, int); 197 (void)va_arg(*args, int); 198 (void)va_arg(*args, long); 199 __rq = va_arg(*args, typeof(__rq)); 200 prev = va_arg(*args, typeof(prev)); 201 next = va_arg(*args, typeof(next)); 202 203 tracing_record_cmdline(prev); 204 205 /* 206 * If tracer_switch_func only points to the local 207 * switch func, it still needs the ptr passed to it. 208 */ 209 wakeup_sched_switch(probe_data, __rq, prev, next); 210 } 211 212 static void __wakeup_reset(struct trace_array *tr) 213 { 214 struct trace_array_cpu *data; 215 int cpu; 216 217 for_each_possible_cpu(cpu) { 218 data = tr->data[cpu]; 219 tracing_reset(data); 220 } 221 222 wakeup_cpu = -1; 223 wakeup_prio = -1; 224 225 if (wakeup_task) 226 put_task_struct(wakeup_task); 227 228 wakeup_task = NULL; 229 } 230 231 static void wakeup_reset(struct trace_array *tr) 232 { 233 unsigned long flags; 234 235 local_irq_save(flags); 236 __raw_spin_lock(&wakeup_lock); 237 __wakeup_reset(tr); 238 __raw_spin_unlock(&wakeup_lock); 239 local_irq_restore(flags); 240 } 241 242 static void 243 wakeup_check_start(struct trace_array *tr, struct task_struct *p, 244 struct task_struct *curr) 245 { 246 int cpu = smp_processor_id(); 247 unsigned long flags; 248 long disabled; 249 250 if (likely(!rt_task(p)) || 251 p->prio >= wakeup_prio || 252 p->prio >= curr->prio) 253 return; 254 255 disabled = atomic_inc_return(&tr->data[cpu]->disabled); 256 if (unlikely(disabled != 1)) 257 goto out; 258 259 /* interrupts should be off from try_to_wake_up */ 260 __raw_spin_lock(&wakeup_lock); 261 262 /* check for races. */ 263 if (!tracer_enabled || p->prio >= wakeup_prio) 264 goto out_locked; 265 266 /* reset the trace */ 267 __wakeup_reset(tr); 268 269 wakeup_cpu = task_cpu(p); 270 wakeup_prio = p->prio; 271 272 wakeup_task = p; 273 get_task_struct(wakeup_task); 274 275 local_save_flags(flags); 276 277 tr->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu); 278 trace_function(tr, tr->data[wakeup_cpu], 279 CALLER_ADDR1, CALLER_ADDR2, flags); 280 281 out_locked: 282 __raw_spin_unlock(&wakeup_lock); 283 out: 284 atomic_dec(&tr->data[cpu]->disabled); 285 } 286 287 static notrace void 288 wake_up_callback(void *probe_data, void *call_data, 289 const char *format, va_list *args) 290 { 291 struct trace_array **ptr = probe_data; 292 struct trace_array *tr = *ptr; 293 struct task_struct *curr; 294 struct task_struct *task; 295 struct rq *__rq; 296 297 if (likely(!tracer_enabled)) 298 return; 299 300 /* Skip pid %d state %ld */ 301 (void)va_arg(*args, int); 302 (void)va_arg(*args, long); 303 /* now get the meat: "rq %p task %p rq->curr %p" */ 304 __rq = va_arg(*args, typeof(__rq)); 305 task = va_arg(*args, typeof(task)); 306 curr = va_arg(*args, typeof(curr)); 307 308 tracing_record_cmdline(task); 309 tracing_record_cmdline(curr); 310 311 wakeup_check_start(tr, task, curr); 312 } 313 314 static void start_wakeup_tracer(struct trace_array *tr) 315 { 316 int ret; 317 318 ret = marker_probe_register("kernel_sched_wakeup", 319 "pid %d state %ld ## rq %p task %p rq->curr %p", 320 wake_up_callback, 321 &wakeup_trace); 322 if (ret) { 323 pr_info("wakeup trace: Couldn't add marker" 324 " probe to kernel_sched_wakeup\n"); 325 return; 326 } 327 328 ret = marker_probe_register("kernel_sched_wakeup_new", 329 "pid %d state %ld ## rq %p task %p rq->curr %p", 330 wake_up_callback, 331 &wakeup_trace); 332 if (ret) { 333 pr_info("wakeup trace: Couldn't add marker" 334 " probe to kernel_sched_wakeup_new\n"); 335 goto fail_deprobe; 336 } 337 338 ret = marker_probe_register("kernel_sched_schedule", 339 "prev_pid %d next_pid %d prev_state %ld " 340 "## rq %p prev %p next %p", 341 sched_switch_callback, 342 &wakeup_trace); 343 if (ret) { 344 pr_info("sched trace: Couldn't add marker" 345 " probe to kernel_sched_schedule\n"); 346 goto fail_deprobe_wake_new; 347 } 348 349 wakeup_reset(tr); 350 351 /* 352 * Don't let the tracer_enabled = 1 show up before 353 * the wakeup_task is reset. This may be overkill since 354 * wakeup_reset does a spin_unlock after setting the 355 * wakeup_task to NULL, but I want to be safe. 356 * This is a slow path anyway. 357 */ 358 smp_wmb(); 359 360 register_ftrace_function(&trace_ops); 361 362 tracer_enabled = 1; 363 364 return; 365 fail_deprobe_wake_new: 366 marker_probe_unregister("kernel_sched_wakeup_new", 367 wake_up_callback, 368 &wakeup_trace); 369 fail_deprobe: 370 marker_probe_unregister("kernel_sched_wakeup", 371 wake_up_callback, 372 &wakeup_trace); 373 } 374 375 static void stop_wakeup_tracer(struct trace_array *tr) 376 { 377 tracer_enabled = 0; 378 unregister_ftrace_function(&trace_ops); 379 marker_probe_unregister("kernel_sched_schedule", 380 sched_switch_callback, 381 &wakeup_trace); 382 marker_probe_unregister("kernel_sched_wakeup_new", 383 wake_up_callback, 384 &wakeup_trace); 385 marker_probe_unregister("kernel_sched_wakeup", 386 wake_up_callback, 387 &wakeup_trace); 388 } 389 390 static void wakeup_tracer_init(struct trace_array *tr) 391 { 392 wakeup_trace = tr; 393 394 if (tr->ctrl) 395 start_wakeup_tracer(tr); 396 } 397 398 static void wakeup_tracer_reset(struct trace_array *tr) 399 { 400 if (tr->ctrl) { 401 stop_wakeup_tracer(tr); 402 /* make sure we put back any tasks we are tracing */ 403 wakeup_reset(tr); 404 } 405 } 406 407 static void wakeup_tracer_ctrl_update(struct trace_array *tr) 408 { 409 if (tr->ctrl) 410 start_wakeup_tracer(tr); 411 else 412 stop_wakeup_tracer(tr); 413 } 414 415 static void wakeup_tracer_open(struct trace_iterator *iter) 416 { 417 /* stop the trace while dumping */ 418 if (iter->tr->ctrl) 419 stop_wakeup_tracer(iter->tr); 420 } 421 422 static void wakeup_tracer_close(struct trace_iterator *iter) 423 { 424 /* forget about any processes we were recording */ 425 if (iter->tr->ctrl) 426 start_wakeup_tracer(iter->tr); 427 } 428 429 static struct tracer wakeup_tracer __read_mostly = 430 { 431 .name = "wakeup", 432 .init = wakeup_tracer_init, 433 .reset = wakeup_tracer_reset, 434 .open = wakeup_tracer_open, 435 .close = wakeup_tracer_close, 436 .ctrl_update = wakeup_tracer_ctrl_update, 437 .print_max = 1, 438 #ifdef CONFIG_FTRACE_SELFTEST 439 .selftest = trace_selftest_startup_wakeup, 440 #endif 441 }; 442 443 __init static int init_wakeup_tracer(void) 444 { 445 int ret; 446 447 ret = register_tracer(&wakeup_tracer); 448 if (ret) 449 return ret; 450 451 return 0; 452 } 453 device_initcall(init_wakeup_tracer); 454