1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * OS Noise Tracer: computes the OS Noise suffered by a running thread. 4 * Timerlat Tracer: measures the wakeup latency of a timer triggered IRQ and thread. 5 * 6 * Based on "hwlat_detector" tracer by: 7 * Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. <jcm@redhat.com> 8 * Copyright (C) 2013-2016 Steven Rostedt, Red Hat, Inc. <srostedt@redhat.com> 9 * With feedback from Clark Williams <williams@redhat.com> 10 * 11 * And also based on the rtsl tracer presented on: 12 * DE OLIVEIRA, Daniel Bristot, et al. Demystifying the real-time linux 13 * scheduling latency. In: 32nd Euromicro Conference on Real-Time Systems 14 * (ECRTS 2020). Schloss Dagstuhl-Leibniz-Zentrum fur Informatik, 2020. 15 * 16 * Copyright (C) 2021 Daniel Bristot de Oliveira, Red Hat, Inc. <bristot@redhat.com> 17 */ 18 19 #include <linux/kthread.h> 20 #include <linux/tracefs.h> 21 #include <linux/uaccess.h> 22 #include <linux/cpumask.h> 23 #include <linux/delay.h> 24 #include <linux/sched/clock.h> 25 #include <uapi/linux/sched/types.h> 26 #include <linux/sched.h> 27 #include "trace.h" 28 29 #ifdef CONFIG_X86_LOCAL_APIC 30 #include <asm/trace/irq_vectors.h> 31 #undef TRACE_INCLUDE_PATH 32 #undef TRACE_INCLUDE_FILE 33 #endif /* CONFIG_X86_LOCAL_APIC */ 34 35 #include <trace/events/irq.h> 36 #include <trace/events/sched.h> 37 38 #define CREATE_TRACE_POINTS 39 #include <trace/events/osnoise.h> 40 41 static struct trace_array *osnoise_trace; 42 43 /* 44 * Default values. 45 */ 46 #define BANNER "osnoise: " 47 #define DEFAULT_SAMPLE_PERIOD 1000000 /* 1s */ 48 #define DEFAULT_SAMPLE_RUNTIME 1000000 /* 1s */ 49 50 #define DEFAULT_TIMERLAT_PERIOD 1000 /* 1ms */ 51 #define DEFAULT_TIMERLAT_PRIO 95 /* FIFO 95 */ 52 53 /* 54 * NMI runtime info. 55 */ 56 struct osn_nmi { 57 u64 count; 58 u64 delta_start; 59 }; 60 61 /* 62 * IRQ runtime info. 63 */ 64 struct osn_irq { 65 u64 count; 66 u64 arrival_time; 67 u64 delta_start; 68 }; 69 70 #define IRQ_CONTEXT 0 71 #define THREAD_CONTEXT 1 72 /* 73 * sofirq runtime info. 74 */ 75 struct osn_softirq { 76 u64 count; 77 u64 arrival_time; 78 u64 delta_start; 79 }; 80 81 /* 82 * thread runtime info. 83 */ 84 struct osn_thread { 85 u64 count; 86 u64 arrival_time; 87 u64 delta_start; 88 }; 89 90 /* 91 * Runtime information: this structure saves the runtime information used by 92 * one sampling thread. 93 */ 94 struct osnoise_variables { 95 struct task_struct *kthread; 96 bool sampling; 97 pid_t pid; 98 struct osn_nmi nmi; 99 struct osn_irq irq; 100 struct osn_softirq softirq; 101 struct osn_thread thread; 102 local_t int_counter; 103 }; 104 105 /* 106 * Per-cpu runtime information. 107 */ 108 DEFINE_PER_CPU(struct osnoise_variables, per_cpu_osnoise_var); 109 110 /* 111 * this_cpu_osn_var - Return the per-cpu osnoise_variables on its relative CPU 112 */ 113 static inline struct osnoise_variables *this_cpu_osn_var(void) 114 { 115 return this_cpu_ptr(&per_cpu_osnoise_var); 116 } 117 118 #ifdef CONFIG_TIMERLAT_TRACER 119 /* 120 * Runtime information for the timer mode. 121 */ 122 struct timerlat_variables { 123 struct task_struct *kthread; 124 struct hrtimer timer; 125 u64 rel_period; 126 u64 abs_period; 127 bool tracing_thread; 128 u64 count; 129 }; 130 131 DEFINE_PER_CPU(struct timerlat_variables, per_cpu_timerlat_var); 132 133 /* 134 * this_cpu_tmr_var - Return the per-cpu timerlat_variables on its relative CPU 135 */ 136 static inline struct timerlat_variables *this_cpu_tmr_var(void) 137 { 138 return this_cpu_ptr(&per_cpu_timerlat_var); 139 } 140 141 /* 142 * tlat_var_reset - Reset the values of the given timerlat_variables 143 */ 144 static inline void tlat_var_reset(void) 145 { 146 struct timerlat_variables *tlat_var; 147 int cpu; 148 /* 149 * So far, all the values are initialized as 0, so 150 * zeroing the structure is perfect. 151 */ 152 for_each_cpu(cpu, cpu_online_mask) { 153 tlat_var = per_cpu_ptr(&per_cpu_timerlat_var, cpu); 154 memset(tlat_var, 0, sizeof(*tlat_var)); 155 } 156 } 157 #else /* CONFIG_TIMERLAT_TRACER */ 158 #define tlat_var_reset() do {} while (0) 159 #endif /* CONFIG_TIMERLAT_TRACER */ 160 161 /* 162 * osn_var_reset - Reset the values of the given osnoise_variables 163 */ 164 static inline void osn_var_reset(void) 165 { 166 struct osnoise_variables *osn_var; 167 int cpu; 168 169 /* 170 * So far, all the values are initialized as 0, so 171 * zeroing the structure is perfect. 172 */ 173 for_each_cpu(cpu, cpu_online_mask) { 174 osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu); 175 memset(osn_var, 0, sizeof(*osn_var)); 176 } 177 } 178 179 /* 180 * osn_var_reset_all - Reset the value of all per-cpu osnoise_variables 181 */ 182 static inline void osn_var_reset_all(void) 183 { 184 osn_var_reset(); 185 tlat_var_reset(); 186 } 187 188 /* 189 * Tells NMIs to call back to the osnoise tracer to record timestamps. 190 */ 191 bool trace_osnoise_callback_enabled; 192 193 /* 194 * osnoise sample structure definition. Used to store the statistics of a 195 * sample run. 196 */ 197 struct osnoise_sample { 198 u64 runtime; /* runtime */ 199 u64 noise; /* noise */ 200 u64 max_sample; /* max single noise sample */ 201 int hw_count; /* # HW (incl. hypervisor) interference */ 202 int nmi_count; /* # NMIs during this sample */ 203 int irq_count; /* # IRQs during this sample */ 204 int softirq_count; /* # softirqs during this sample */ 205 int thread_count; /* # threads during this sample */ 206 }; 207 208 #ifdef CONFIG_TIMERLAT_TRACER 209 /* 210 * timerlat sample structure definition. Used to store the statistics of 211 * a sample run. 212 */ 213 struct timerlat_sample { 214 u64 timer_latency; /* timer_latency */ 215 unsigned int seqnum; /* unique sequence */ 216 int context; /* timer context */ 217 }; 218 #endif 219 220 /* 221 * Protect the interface. 222 */ 223 struct mutex interface_lock; 224 225 /* 226 * Tracer data. 227 */ 228 static struct osnoise_data { 229 u64 sample_period; /* total sampling period */ 230 u64 sample_runtime; /* active sampling portion of period */ 231 u64 stop_tracing; /* stop trace in the internal operation (loop/irq) */ 232 u64 stop_tracing_total; /* stop trace in the final operation (report/thread) */ 233 #ifdef CONFIG_TIMERLAT_TRACER 234 u64 timerlat_period; /* timerlat period */ 235 u64 print_stack; /* print IRQ stack if total > */ 236 int timerlat_tracer; /* timerlat tracer */ 237 #endif 238 bool tainted; /* infor users and developers about a problem */ 239 } osnoise_data = { 240 .sample_period = DEFAULT_SAMPLE_PERIOD, 241 .sample_runtime = DEFAULT_SAMPLE_RUNTIME, 242 .stop_tracing = 0, 243 .stop_tracing_total = 0, 244 #ifdef CONFIG_TIMERLAT_TRACER 245 .print_stack = 0, 246 .timerlat_period = DEFAULT_TIMERLAT_PERIOD, 247 .timerlat_tracer = 0, 248 #endif 249 }; 250 251 /* 252 * Boolean variable used to inform that the tracer is currently sampling. 253 */ 254 static bool osnoise_busy; 255 256 #ifdef CONFIG_PREEMPT_RT 257 /* 258 * Print the osnoise header info. 259 */ 260 static void print_osnoise_headers(struct seq_file *s) 261 { 262 if (osnoise_data.tainted) 263 seq_puts(s, "# osnoise is tainted!\n"); 264 265 seq_puts(s, "# _-------=> irqs-off\n"); 266 seq_puts(s, "# / _------=> need-resched\n"); 267 seq_puts(s, "# | / _-----=> need-resched-lazy\n"); 268 seq_puts(s, "# || / _----=> hardirq/softirq\n"); 269 seq_puts(s, "# ||| / _---=> preempt-depth\n"); 270 seq_puts(s, "# |||| / _--=> preempt-lazy-depth\n"); 271 seq_puts(s, "# ||||| / _-=> migrate-disable\n"); 272 273 seq_puts(s, "# |||||| / "); 274 seq_puts(s, " MAX\n"); 275 276 seq_puts(s, "# ||||| / "); 277 seq_puts(s, " SINGLE Interference counters:\n"); 278 279 seq_puts(s, "# ||||||| RUNTIME "); 280 seq_puts(s, " NOISE %% OF CPU NOISE +-----------------------------+\n"); 281 282 seq_puts(s, "# TASK-PID CPU# ||||||| TIMESTAMP IN US "); 283 seq_puts(s, " IN US AVAILABLE IN US HW NMI IRQ SIRQ THREAD\n"); 284 285 seq_puts(s, "# | | | ||||||| | | "); 286 seq_puts(s, " | | | | | | | |\n"); 287 } 288 #else /* CONFIG_PREEMPT_RT */ 289 static void print_osnoise_headers(struct seq_file *s) 290 { 291 if (osnoise_data.tainted) 292 seq_puts(s, "# osnoise is tainted!\n"); 293 294 seq_puts(s, "# _-----=> irqs-off\n"); 295 seq_puts(s, "# / _----=> need-resched\n"); 296 seq_puts(s, "# | / _---=> hardirq/softirq\n"); 297 seq_puts(s, "# || / _--=> preempt-depth\n"); 298 seq_puts(s, "# ||| / _-=> migrate-disable "); 299 seq_puts(s, " MAX\n"); 300 seq_puts(s, "# |||| / delay "); 301 seq_puts(s, " SINGLE Interference counters:\n"); 302 303 seq_puts(s, "# ||||| RUNTIME "); 304 seq_puts(s, " NOISE %% OF CPU NOISE +-----------------------------+\n"); 305 306 seq_puts(s, "# TASK-PID CPU# ||||| TIMESTAMP IN US "); 307 seq_puts(s, " IN US AVAILABLE IN US HW NMI IRQ SIRQ THREAD\n"); 308 309 seq_puts(s, "# | | | ||||| | | "); 310 seq_puts(s, " | | | | | | | |\n"); 311 } 312 #endif /* CONFIG_PREEMPT_RT */ 313 314 /* 315 * osnoise_taint - report an osnoise error. 316 */ 317 #define osnoise_taint(msg) ({ \ 318 struct trace_array *tr = osnoise_trace; \ 319 \ 320 trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_, msg); \ 321 osnoise_data.tainted = true; \ 322 }) 323 324 /* 325 * Record an osnoise_sample into the tracer buffer. 326 */ 327 static void trace_osnoise_sample(struct osnoise_sample *sample) 328 { 329 struct trace_array *tr = osnoise_trace; 330 struct trace_buffer *buffer = tr->array_buffer.buffer; 331 struct trace_event_call *call = &event_osnoise; 332 struct ring_buffer_event *event; 333 struct osnoise_entry *entry; 334 335 event = trace_buffer_lock_reserve(buffer, TRACE_OSNOISE, sizeof(*entry), 336 tracing_gen_ctx()); 337 if (!event) 338 return; 339 entry = ring_buffer_event_data(event); 340 entry->runtime = sample->runtime; 341 entry->noise = sample->noise; 342 entry->max_sample = sample->max_sample; 343 entry->hw_count = sample->hw_count; 344 entry->nmi_count = sample->nmi_count; 345 entry->irq_count = sample->irq_count; 346 entry->softirq_count = sample->softirq_count; 347 entry->thread_count = sample->thread_count; 348 349 if (!call_filter_check_discard(call, entry, buffer, event)) 350 trace_buffer_unlock_commit_nostack(buffer, event); 351 } 352 353 #ifdef CONFIG_TIMERLAT_TRACER 354 /* 355 * Print the timerlat header info. 356 */ 357 #ifdef CONFIG_PREEMPT_RT 358 static void print_timerlat_headers(struct seq_file *s) 359 { 360 seq_puts(s, "# _-------=> irqs-off\n"); 361 seq_puts(s, "# / _------=> need-resched\n"); 362 seq_puts(s, "# | / _-----=> need-resched-lazy\n"); 363 seq_puts(s, "# || / _----=> hardirq/softirq\n"); 364 seq_puts(s, "# ||| / _---=> preempt-depth\n"); 365 seq_puts(s, "# |||| / _--=> preempt-lazy-depth\n"); 366 seq_puts(s, "# ||||| / _-=> migrate-disable\n"); 367 seq_puts(s, "# |||||| /\n"); 368 seq_puts(s, "# ||||||| ACTIVATION\n"); 369 seq_puts(s, "# TASK-PID CPU# ||||||| TIMESTAMP ID "); 370 seq_puts(s, " CONTEXT LATENCY\n"); 371 seq_puts(s, "# | | | ||||||| | | "); 372 seq_puts(s, " | |\n"); 373 } 374 #else /* CONFIG_PREEMPT_RT */ 375 static void print_timerlat_headers(struct seq_file *s) 376 { 377 seq_puts(s, "# _-----=> irqs-off\n"); 378 seq_puts(s, "# / _----=> need-resched\n"); 379 seq_puts(s, "# | / _---=> hardirq/softirq\n"); 380 seq_puts(s, "# || / _--=> preempt-depth\n"); 381 seq_puts(s, "# ||| / _-=> migrate-disable\n"); 382 seq_puts(s, "# |||| / delay\n"); 383 seq_puts(s, "# ||||| ACTIVATION\n"); 384 seq_puts(s, "# TASK-PID CPU# ||||| TIMESTAMP ID "); 385 seq_puts(s, " CONTEXT LATENCY\n"); 386 seq_puts(s, "# | | | ||||| | | "); 387 seq_puts(s, " | |\n"); 388 } 389 #endif /* CONFIG_PREEMPT_RT */ 390 391 /* 392 * Record an timerlat_sample into the tracer buffer. 393 */ 394 static void trace_timerlat_sample(struct timerlat_sample *sample) 395 { 396 struct trace_array *tr = osnoise_trace; 397 struct trace_event_call *call = &event_osnoise; 398 struct trace_buffer *buffer = tr->array_buffer.buffer; 399 struct ring_buffer_event *event; 400 struct timerlat_entry *entry; 401 402 event = trace_buffer_lock_reserve(buffer, TRACE_TIMERLAT, sizeof(*entry), 403 tracing_gen_ctx()); 404 if (!event) 405 return; 406 entry = ring_buffer_event_data(event); 407 entry->seqnum = sample->seqnum; 408 entry->context = sample->context; 409 entry->timer_latency = sample->timer_latency; 410 411 if (!call_filter_check_discard(call, entry, buffer, event)) 412 trace_buffer_unlock_commit_nostack(buffer, event); 413 } 414 415 #ifdef CONFIG_STACKTRACE 416 417 #define MAX_CALLS 256 418 419 /* 420 * Stack trace will take place only at IRQ level, so, no need 421 * to control nesting here. 422 */ 423 struct trace_stack { 424 int stack_size; 425 int nr_entries; 426 unsigned long calls[MAX_CALLS]; 427 }; 428 429 static DEFINE_PER_CPU(struct trace_stack, trace_stack); 430 431 /* 432 * timerlat_save_stack - save a stack trace without printing 433 * 434 * Save the current stack trace without printing. The 435 * stack will be printed later, after the end of the measurement. 436 */ 437 static void timerlat_save_stack(int skip) 438 { 439 unsigned int size, nr_entries; 440 struct trace_stack *fstack; 441 442 fstack = this_cpu_ptr(&trace_stack); 443 444 size = ARRAY_SIZE(fstack->calls); 445 446 nr_entries = stack_trace_save(fstack->calls, size, skip); 447 448 fstack->stack_size = nr_entries * sizeof(unsigned long); 449 fstack->nr_entries = nr_entries; 450 451 return; 452 453 } 454 /* 455 * timerlat_dump_stack - dump a stack trace previously saved 456 * 457 * Dump a saved stack trace into the trace buffer. 458 */ 459 static void timerlat_dump_stack(void) 460 { 461 struct trace_event_call *call = &event_osnoise; 462 struct trace_array *tr = osnoise_trace; 463 struct trace_buffer *buffer = tr->array_buffer.buffer; 464 struct ring_buffer_event *event; 465 struct trace_stack *fstack; 466 struct stack_entry *entry; 467 unsigned int size; 468 469 preempt_disable_notrace(); 470 fstack = this_cpu_ptr(&trace_stack); 471 size = fstack->stack_size; 472 473 event = trace_buffer_lock_reserve(buffer, TRACE_STACK, sizeof(*entry) + size, 474 tracing_gen_ctx()); 475 if (!event) 476 goto out; 477 478 entry = ring_buffer_event_data(event); 479 480 memcpy(&entry->caller, fstack->calls, size); 481 entry->size = fstack->nr_entries; 482 483 if (!call_filter_check_discard(call, entry, buffer, event)) 484 trace_buffer_unlock_commit_nostack(buffer, event); 485 486 out: 487 preempt_enable_notrace(); 488 } 489 #else 490 #define timerlat_dump_stack() do {} while (0) 491 #define timerlat_save_stack(a) do {} while (0) 492 #endif /* CONFIG_STACKTRACE */ 493 #endif /* CONFIG_TIMERLAT_TRACER */ 494 495 /* 496 * Macros to encapsulate the time capturing infrastructure. 497 */ 498 #define time_get() trace_clock_local() 499 #define time_to_us(x) div_u64(x, 1000) 500 #define time_sub(a, b) ((a) - (b)) 501 502 /* 503 * cond_move_irq_delta_start - Forward the delta_start of a running IRQ 504 * 505 * If an IRQ is preempted by an NMI, its delta_start is pushed forward 506 * to discount the NMI interference. 507 * 508 * See get_int_safe_duration(). 509 */ 510 static inline void 511 cond_move_irq_delta_start(struct osnoise_variables *osn_var, u64 duration) 512 { 513 if (osn_var->irq.delta_start) 514 osn_var->irq.delta_start += duration; 515 } 516 517 #ifndef CONFIG_PREEMPT_RT 518 /* 519 * cond_move_softirq_delta_start - Forward the delta_start of a running softirq. 520 * 521 * If a softirq is preempted by an IRQ or NMI, its delta_start is pushed 522 * forward to discount the interference. 523 * 524 * See get_int_safe_duration(). 525 */ 526 static inline void 527 cond_move_softirq_delta_start(struct osnoise_variables *osn_var, u64 duration) 528 { 529 if (osn_var->softirq.delta_start) 530 osn_var->softirq.delta_start += duration; 531 } 532 #else /* CONFIG_PREEMPT_RT */ 533 #define cond_move_softirq_delta_start(osn_var, duration) do {} while (0) 534 #endif 535 536 /* 537 * cond_move_thread_delta_start - Forward the delta_start of a running thread 538 * 539 * If a noisy thread is preempted by an softirq, IRQ or NMI, its delta_start 540 * is pushed forward to discount the interference. 541 * 542 * See get_int_safe_duration(). 543 */ 544 static inline void 545 cond_move_thread_delta_start(struct osnoise_variables *osn_var, u64 duration) 546 { 547 if (osn_var->thread.delta_start) 548 osn_var->thread.delta_start += duration; 549 } 550 551 /* 552 * get_int_safe_duration - Get the duration of a window 553 * 554 * The irq, softirq and thread varaibles need to have its duration without 555 * the interference from higher priority interrupts. Instead of keeping a 556 * variable to discount the interrupt interference from these variables, the 557 * starting time of these variables are pushed forward with the interrupt's 558 * duration. In this way, a single variable is used to: 559 * 560 * - Know if a given window is being measured. 561 * - Account its duration. 562 * - Discount the interference. 563 * 564 * To avoid getting inconsistent values, e.g.,: 565 * 566 * now = time_get() 567 * ---> interrupt! 568 * delta_start -= int duration; 569 * <--- 570 * duration = now - delta_start; 571 * 572 * result: negative duration if the variable duration before the 573 * interrupt was smaller than the interrupt execution. 574 * 575 * A counter of interrupts is used. If the counter increased, try 576 * to capture an interference safe duration. 577 */ 578 static inline s64 579 get_int_safe_duration(struct osnoise_variables *osn_var, u64 *delta_start) 580 { 581 u64 int_counter, now; 582 s64 duration; 583 584 do { 585 int_counter = local_read(&osn_var->int_counter); 586 /* synchronize with interrupts */ 587 barrier(); 588 589 now = time_get(); 590 duration = (now - *delta_start); 591 592 /* synchronize with interrupts */ 593 barrier(); 594 } while (int_counter != local_read(&osn_var->int_counter)); 595 596 /* 597 * This is an evidence of race conditions that cause 598 * a value to be "discounted" too much. 599 */ 600 if (duration < 0) 601 osnoise_taint("Negative duration!\n"); 602 603 *delta_start = 0; 604 605 return duration; 606 } 607 608 /* 609 * 610 * set_int_safe_time - Save the current time on *time, aware of interference 611 * 612 * Get the time, taking into consideration a possible interference from 613 * higher priority interrupts. 614 * 615 * See get_int_safe_duration() for an explanation. 616 */ 617 static u64 618 set_int_safe_time(struct osnoise_variables *osn_var, u64 *time) 619 { 620 u64 int_counter; 621 622 do { 623 int_counter = local_read(&osn_var->int_counter); 624 /* synchronize with interrupts */ 625 barrier(); 626 627 *time = time_get(); 628 629 /* synchronize with interrupts */ 630 barrier(); 631 } while (int_counter != local_read(&osn_var->int_counter)); 632 633 return int_counter; 634 } 635 636 #ifdef CONFIG_TIMERLAT_TRACER 637 /* 638 * copy_int_safe_time - Copy *src into *desc aware of interference 639 */ 640 static u64 641 copy_int_safe_time(struct osnoise_variables *osn_var, u64 *dst, u64 *src) 642 { 643 u64 int_counter; 644 645 do { 646 int_counter = local_read(&osn_var->int_counter); 647 /* synchronize with interrupts */ 648 barrier(); 649 650 *dst = *src; 651 652 /* synchronize with interrupts */ 653 barrier(); 654 } while (int_counter != local_read(&osn_var->int_counter)); 655 656 return int_counter; 657 } 658 #endif /* CONFIG_TIMERLAT_TRACER */ 659 660 /* 661 * trace_osnoise_callback - NMI entry/exit callback 662 * 663 * This function is called at the entry and exit NMI code. The bool enter 664 * distinguishes between either case. This function is used to note a NMI 665 * occurrence, compute the noise caused by the NMI, and to remove the noise 666 * it is potentially causing on other interference variables. 667 */ 668 void trace_osnoise_callback(bool enter) 669 { 670 struct osnoise_variables *osn_var = this_cpu_osn_var(); 671 u64 duration; 672 673 if (!osn_var->sampling) 674 return; 675 676 /* 677 * Currently trace_clock_local() calls sched_clock() and the 678 * generic version is not NMI safe. 679 */ 680 if (!IS_ENABLED(CONFIG_GENERIC_SCHED_CLOCK)) { 681 if (enter) { 682 osn_var->nmi.delta_start = time_get(); 683 local_inc(&osn_var->int_counter); 684 } else { 685 duration = time_get() - osn_var->nmi.delta_start; 686 687 trace_nmi_noise(osn_var->nmi.delta_start, duration); 688 689 cond_move_irq_delta_start(osn_var, duration); 690 cond_move_softirq_delta_start(osn_var, duration); 691 cond_move_thread_delta_start(osn_var, duration); 692 } 693 } 694 695 if (enter) 696 osn_var->nmi.count++; 697 } 698 699 /* 700 * osnoise_trace_irq_entry - Note the starting of an IRQ 701 * 702 * Save the starting time of an IRQ. As IRQs are non-preemptive to other IRQs, 703 * it is safe to use a single variable (ons_var->irq) to save the statistics. 704 * The arrival_time is used to report... the arrival time. The delta_start 705 * is used to compute the duration at the IRQ exit handler. See 706 * cond_move_irq_delta_start(). 707 */ 708 void osnoise_trace_irq_entry(int id) 709 { 710 struct osnoise_variables *osn_var = this_cpu_osn_var(); 711 712 if (!osn_var->sampling) 713 return; 714 /* 715 * This value will be used in the report, but not to compute 716 * the execution time, so it is safe to get it unsafe. 717 */ 718 osn_var->irq.arrival_time = time_get(); 719 set_int_safe_time(osn_var, &osn_var->irq.delta_start); 720 osn_var->irq.count++; 721 722 local_inc(&osn_var->int_counter); 723 } 724 725 /* 726 * osnoise_irq_exit - Note the end of an IRQ, sava data and trace 727 * 728 * Computes the duration of the IRQ noise, and trace it. Also discounts the 729 * interference from other sources of noise could be currently being accounted. 730 */ 731 void osnoise_trace_irq_exit(int id, const char *desc) 732 { 733 struct osnoise_variables *osn_var = this_cpu_osn_var(); 734 int duration; 735 736 if (!osn_var->sampling) 737 return; 738 739 duration = get_int_safe_duration(osn_var, &osn_var->irq.delta_start); 740 trace_irq_noise(id, desc, osn_var->irq.arrival_time, duration); 741 osn_var->irq.arrival_time = 0; 742 cond_move_softirq_delta_start(osn_var, duration); 743 cond_move_thread_delta_start(osn_var, duration); 744 } 745 746 /* 747 * trace_irqentry_callback - Callback to the irq:irq_entry traceevent 748 * 749 * Used to note the starting of an IRQ occurece. 750 */ 751 static void trace_irqentry_callback(void *data, int irq, 752 struct irqaction *action) 753 { 754 osnoise_trace_irq_entry(irq); 755 } 756 757 /* 758 * trace_irqexit_callback - Callback to the irq:irq_exit traceevent 759 * 760 * Used to note the end of an IRQ occurece. 761 */ 762 static void trace_irqexit_callback(void *data, int irq, 763 struct irqaction *action, int ret) 764 { 765 osnoise_trace_irq_exit(irq, action->name); 766 } 767 768 /* 769 * arch specific register function. 770 */ 771 int __weak osnoise_arch_register(void) 772 { 773 return 0; 774 } 775 776 /* 777 * arch specific unregister function. 778 */ 779 void __weak osnoise_arch_unregister(void) 780 { 781 return; 782 } 783 784 /* 785 * hook_irq_events - Hook IRQ handling events 786 * 787 * This function hooks the IRQ related callbacks to the respective trace 788 * events. 789 */ 790 static int hook_irq_events(void) 791 { 792 int ret; 793 794 ret = register_trace_irq_handler_entry(trace_irqentry_callback, NULL); 795 if (ret) 796 goto out_err; 797 798 ret = register_trace_irq_handler_exit(trace_irqexit_callback, NULL); 799 if (ret) 800 goto out_unregister_entry; 801 802 ret = osnoise_arch_register(); 803 if (ret) 804 goto out_irq_exit; 805 806 return 0; 807 808 out_irq_exit: 809 unregister_trace_irq_handler_exit(trace_irqexit_callback, NULL); 810 out_unregister_entry: 811 unregister_trace_irq_handler_entry(trace_irqentry_callback, NULL); 812 out_err: 813 return -EINVAL; 814 } 815 816 /* 817 * unhook_irq_events - Unhook IRQ handling events 818 * 819 * This function unhooks the IRQ related callbacks to the respective trace 820 * events. 821 */ 822 static void unhook_irq_events(void) 823 { 824 osnoise_arch_unregister(); 825 unregister_trace_irq_handler_exit(trace_irqexit_callback, NULL); 826 unregister_trace_irq_handler_entry(trace_irqentry_callback, NULL); 827 } 828 829 #ifndef CONFIG_PREEMPT_RT 830 /* 831 * trace_softirq_entry_callback - Note the starting of a softirq 832 * 833 * Save the starting time of a softirq. As softirqs are non-preemptive to 834 * other softirqs, it is safe to use a single variable (ons_var->softirq) 835 * to save the statistics. The arrival_time is used to report... the 836 * arrival time. The delta_start is used to compute the duration at the 837 * softirq exit handler. See cond_move_softirq_delta_start(). 838 */ 839 static void trace_softirq_entry_callback(void *data, unsigned int vec_nr) 840 { 841 struct osnoise_variables *osn_var = this_cpu_osn_var(); 842 843 if (!osn_var->sampling) 844 return; 845 /* 846 * This value will be used in the report, but not to compute 847 * the execution time, so it is safe to get it unsafe. 848 */ 849 osn_var->softirq.arrival_time = time_get(); 850 set_int_safe_time(osn_var, &osn_var->softirq.delta_start); 851 osn_var->softirq.count++; 852 853 local_inc(&osn_var->int_counter); 854 } 855 856 /* 857 * trace_softirq_exit_callback - Note the end of an softirq 858 * 859 * Computes the duration of the softirq noise, and trace it. Also discounts the 860 * interference from other sources of noise could be currently being accounted. 861 */ 862 static void trace_softirq_exit_callback(void *data, unsigned int vec_nr) 863 { 864 struct osnoise_variables *osn_var = this_cpu_osn_var(); 865 int duration; 866 867 if (!osn_var->sampling) 868 return; 869 870 #ifdef CONFIG_TIMERLAT_TRACER 871 /* 872 * If the timerlat is enabled, but the irq handler did 873 * not run yet enabling timerlat_tracer, do not trace. 874 */ 875 if (unlikely(osnoise_data.timerlat_tracer)) { 876 struct timerlat_variables *tlat_var; 877 tlat_var = this_cpu_tmr_var(); 878 if (!tlat_var->tracing_thread) { 879 osn_var->softirq.arrival_time = 0; 880 osn_var->softirq.delta_start = 0; 881 return; 882 } 883 } 884 #endif 885 886 duration = get_int_safe_duration(osn_var, &osn_var->softirq.delta_start); 887 trace_softirq_noise(vec_nr, osn_var->softirq.arrival_time, duration); 888 cond_move_thread_delta_start(osn_var, duration); 889 osn_var->softirq.arrival_time = 0; 890 } 891 892 /* 893 * hook_softirq_events - Hook softirq handling events 894 * 895 * This function hooks the softirq related callbacks to the respective trace 896 * events. 897 */ 898 static int hook_softirq_events(void) 899 { 900 int ret; 901 902 ret = register_trace_softirq_entry(trace_softirq_entry_callback, NULL); 903 if (ret) 904 goto out_err; 905 906 ret = register_trace_softirq_exit(trace_softirq_exit_callback, NULL); 907 if (ret) 908 goto out_unreg_entry; 909 910 return 0; 911 912 out_unreg_entry: 913 unregister_trace_softirq_entry(trace_softirq_entry_callback, NULL); 914 out_err: 915 return -EINVAL; 916 } 917 918 /* 919 * unhook_softirq_events - Unhook softirq handling events 920 * 921 * This function hooks the softirq related callbacks to the respective trace 922 * events. 923 */ 924 static void unhook_softirq_events(void) 925 { 926 unregister_trace_softirq_entry(trace_softirq_entry_callback, NULL); 927 unregister_trace_softirq_exit(trace_softirq_exit_callback, NULL); 928 } 929 #else /* CONFIG_PREEMPT_RT */ 930 /* 931 * softirq are threads on the PREEMPT_RT mode. 932 */ 933 static int hook_softirq_events(void) 934 { 935 return 0; 936 } 937 static void unhook_softirq_events(void) 938 { 939 } 940 #endif 941 942 /* 943 * thread_entry - Record the starting of a thread noise window 944 * 945 * It saves the context switch time for a noisy thread, and increments 946 * the interference counters. 947 */ 948 static void 949 thread_entry(struct osnoise_variables *osn_var, struct task_struct *t) 950 { 951 if (!osn_var->sampling) 952 return; 953 /* 954 * The arrival time will be used in the report, but not to compute 955 * the execution time, so it is safe to get it unsafe. 956 */ 957 osn_var->thread.arrival_time = time_get(); 958 959 set_int_safe_time(osn_var, &osn_var->thread.delta_start); 960 961 osn_var->thread.count++; 962 local_inc(&osn_var->int_counter); 963 } 964 965 /* 966 * thread_exit - Report the end of a thread noise window 967 * 968 * It computes the total noise from a thread, tracing if needed. 969 */ 970 static void 971 thread_exit(struct osnoise_variables *osn_var, struct task_struct *t) 972 { 973 int duration; 974 975 if (!osn_var->sampling) 976 return; 977 978 #ifdef CONFIG_TIMERLAT_TRACER 979 if (osnoise_data.timerlat_tracer) { 980 struct timerlat_variables *tlat_var; 981 tlat_var = this_cpu_tmr_var(); 982 if (!tlat_var->tracing_thread) { 983 osn_var->thread.delta_start = 0; 984 osn_var->thread.arrival_time = 0; 985 return; 986 } 987 } 988 #endif 989 990 duration = get_int_safe_duration(osn_var, &osn_var->thread.delta_start); 991 992 trace_thread_noise(t, osn_var->thread.arrival_time, duration); 993 994 osn_var->thread.arrival_time = 0; 995 } 996 997 /* 998 * trace_sched_switch - sched:sched_switch trace event handler 999 * 1000 * This function is hooked to the sched:sched_switch trace event, and it is 1001 * used to record the beginning and to report the end of a thread noise window. 1002 */ 1003 static void 1004 trace_sched_switch_callback(void *data, bool preempt, struct task_struct *p, 1005 struct task_struct *n) 1006 { 1007 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1008 1009 if (p->pid != osn_var->pid) 1010 thread_exit(osn_var, p); 1011 1012 if (n->pid != osn_var->pid) 1013 thread_entry(osn_var, n); 1014 } 1015 1016 /* 1017 * hook_thread_events - Hook the insturmentation for thread noise 1018 * 1019 * Hook the osnoise tracer callbacks to handle the noise from other 1020 * threads on the necessary kernel events. 1021 */ 1022 static int hook_thread_events(void) 1023 { 1024 int ret; 1025 1026 ret = register_trace_sched_switch(trace_sched_switch_callback, NULL); 1027 if (ret) 1028 return -EINVAL; 1029 1030 return 0; 1031 } 1032 1033 /* 1034 * unhook_thread_events - *nhook the insturmentation for thread noise 1035 * 1036 * Unook the osnoise tracer callbacks to handle the noise from other 1037 * threads on the necessary kernel events. 1038 */ 1039 static void unhook_thread_events(void) 1040 { 1041 unregister_trace_sched_switch(trace_sched_switch_callback, NULL); 1042 } 1043 1044 /* 1045 * save_osn_sample_stats - Save the osnoise_sample statistics 1046 * 1047 * Save the osnoise_sample statistics before the sampling phase. These 1048 * values will be used later to compute the diff betwneen the statistics 1049 * before and after the osnoise sampling. 1050 */ 1051 static void 1052 save_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample *s) 1053 { 1054 s->nmi_count = osn_var->nmi.count; 1055 s->irq_count = osn_var->irq.count; 1056 s->softirq_count = osn_var->softirq.count; 1057 s->thread_count = osn_var->thread.count; 1058 } 1059 1060 /* 1061 * diff_osn_sample_stats - Compute the osnoise_sample statistics 1062 * 1063 * After a sample period, compute the difference on the osnoise_sample 1064 * statistics. The struct osnoise_sample *s contains the statistics saved via 1065 * save_osn_sample_stats() before the osnoise sampling. 1066 */ 1067 static void 1068 diff_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample *s) 1069 { 1070 s->nmi_count = osn_var->nmi.count - s->nmi_count; 1071 s->irq_count = osn_var->irq.count - s->irq_count; 1072 s->softirq_count = osn_var->softirq.count - s->softirq_count; 1073 s->thread_count = osn_var->thread.count - s->thread_count; 1074 } 1075 1076 /* 1077 * osnoise_stop_tracing - Stop tracing and the tracer. 1078 */ 1079 static __always_inline void osnoise_stop_tracing(void) 1080 { 1081 struct trace_array *tr = osnoise_trace; 1082 1083 trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_, 1084 "stop tracing hit on cpu %d\n", smp_processor_id()); 1085 1086 tracer_tracing_off(tr); 1087 } 1088 1089 /* 1090 * run_osnoise - Sample the time and look for osnoise 1091 * 1092 * Used to capture the time, looking for potential osnoise latency repeatedly. 1093 * Different from hwlat_detector, it is called with preemption and interrupts 1094 * enabled. This allows irqs, softirqs and threads to run, interfering on the 1095 * osnoise sampling thread, as they would do with a regular thread. 1096 */ 1097 static int run_osnoise(void) 1098 { 1099 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1100 struct trace_array *tr = osnoise_trace; 1101 u64 start, sample, last_sample; 1102 u64 last_int_count, int_count; 1103 s64 noise = 0, max_noise = 0; 1104 s64 total, last_total = 0; 1105 struct osnoise_sample s; 1106 unsigned int threshold; 1107 u64 runtime, stop_in; 1108 u64 sum_noise = 0; 1109 int hw_count = 0; 1110 int ret = -1; 1111 1112 /* 1113 * Considers the current thread as the workload. 1114 */ 1115 osn_var->pid = current->pid; 1116 1117 /* 1118 * Save the current stats for the diff 1119 */ 1120 save_osn_sample_stats(osn_var, &s); 1121 1122 /* 1123 * if threshold is 0, use the default value of 5 us. 1124 */ 1125 threshold = tracing_thresh ? : 5000; 1126 1127 /* 1128 * Make sure NMIs see sampling first 1129 */ 1130 osn_var->sampling = true; 1131 barrier(); 1132 1133 /* 1134 * Transform the *_us config to nanoseconds to avoid the 1135 * division on the main loop. 1136 */ 1137 runtime = osnoise_data.sample_runtime * NSEC_PER_USEC; 1138 stop_in = osnoise_data.stop_tracing * NSEC_PER_USEC; 1139 1140 /* 1141 * Start timestemp 1142 */ 1143 start = time_get(); 1144 1145 /* 1146 * "previous" loop. 1147 */ 1148 last_int_count = set_int_safe_time(osn_var, &last_sample); 1149 1150 do { 1151 /* 1152 * Get sample! 1153 */ 1154 int_count = set_int_safe_time(osn_var, &sample); 1155 1156 noise = time_sub(sample, last_sample); 1157 1158 /* 1159 * This shouldn't happen. 1160 */ 1161 if (noise < 0) { 1162 osnoise_taint("negative noise!"); 1163 goto out; 1164 } 1165 1166 /* 1167 * Sample runtime. 1168 */ 1169 total = time_sub(sample, start); 1170 1171 /* 1172 * Check for possible overflows. 1173 */ 1174 if (total < last_total) { 1175 osnoise_taint("total overflow!"); 1176 break; 1177 } 1178 1179 last_total = total; 1180 1181 if (noise >= threshold) { 1182 int interference = int_count - last_int_count; 1183 1184 if (noise > max_noise) 1185 max_noise = noise; 1186 1187 if (!interference) 1188 hw_count++; 1189 1190 sum_noise += noise; 1191 1192 trace_sample_threshold(last_sample, noise, interference); 1193 1194 if (osnoise_data.stop_tracing) 1195 if (noise > stop_in) 1196 osnoise_stop_tracing(); 1197 } 1198 1199 /* 1200 * For the non-preemptive kernel config: let threads runs, if 1201 * they so wish. 1202 */ 1203 cond_resched(); 1204 1205 last_sample = sample; 1206 last_int_count = int_count; 1207 1208 } while (total < runtime && !kthread_should_stop()); 1209 1210 /* 1211 * Finish the above in the view for interrupts. 1212 */ 1213 barrier(); 1214 1215 osn_var->sampling = false; 1216 1217 /* 1218 * Make sure sampling data is no longer updated. 1219 */ 1220 barrier(); 1221 1222 /* 1223 * Save noise info. 1224 */ 1225 s.noise = time_to_us(sum_noise); 1226 s.runtime = time_to_us(total); 1227 s.max_sample = time_to_us(max_noise); 1228 s.hw_count = hw_count; 1229 1230 /* Save interference stats info */ 1231 diff_osn_sample_stats(osn_var, &s); 1232 1233 trace_osnoise_sample(&s); 1234 1235 /* Keep a running maximum ever recorded osnoise "latency" */ 1236 if (max_noise > tr->max_latency) { 1237 tr->max_latency = max_noise; 1238 latency_fsnotify(tr); 1239 } 1240 1241 if (osnoise_data.stop_tracing_total) 1242 if (s.noise > osnoise_data.stop_tracing_total) 1243 osnoise_stop_tracing(); 1244 1245 return 0; 1246 out: 1247 return ret; 1248 } 1249 1250 static struct cpumask osnoise_cpumask; 1251 static struct cpumask save_cpumask; 1252 1253 /* 1254 * osnoise_main - The osnoise detection kernel thread 1255 * 1256 * Calls run_osnoise() function to measure the osnoise for the configured runtime, 1257 * every period. 1258 */ 1259 static int osnoise_main(void *data) 1260 { 1261 u64 interval; 1262 1263 while (!kthread_should_stop()) { 1264 1265 run_osnoise(); 1266 1267 mutex_lock(&interface_lock); 1268 interval = osnoise_data.sample_period - osnoise_data.sample_runtime; 1269 mutex_unlock(&interface_lock); 1270 1271 do_div(interval, USEC_PER_MSEC); 1272 1273 /* 1274 * differently from hwlat_detector, the osnoise tracer can run 1275 * without a pause because preemption is on. 1276 */ 1277 if (interval < 1) { 1278 /* Let synchronize_rcu_tasks() make progress */ 1279 cond_resched_tasks_rcu_qs(); 1280 continue; 1281 } 1282 1283 if (msleep_interruptible(interval)) 1284 break; 1285 } 1286 1287 return 0; 1288 } 1289 1290 #ifdef CONFIG_TIMERLAT_TRACER 1291 /* 1292 * timerlat_irq - hrtimer handler for timerlat. 1293 */ 1294 static enum hrtimer_restart timerlat_irq(struct hrtimer *timer) 1295 { 1296 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1297 struct trace_array *tr = osnoise_trace; 1298 struct timerlat_variables *tlat; 1299 struct timerlat_sample s; 1300 u64 now; 1301 u64 diff; 1302 1303 /* 1304 * I am not sure if the timer was armed for this CPU. So, get 1305 * the timerlat struct from the timer itself, not from this 1306 * CPU. 1307 */ 1308 tlat = container_of(timer, struct timerlat_variables, timer); 1309 1310 now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer)); 1311 1312 /* 1313 * Enable the osnoise: events for thread an softirq. 1314 */ 1315 tlat->tracing_thread = true; 1316 1317 osn_var->thread.arrival_time = time_get(); 1318 1319 /* 1320 * A hardirq is running: the timer IRQ. It is for sure preempting 1321 * a thread, and potentially preempting a softirq. 1322 * 1323 * At this point, it is not interesting to know the duration of the 1324 * preempted thread (and maybe softirq), but how much time they will 1325 * delay the beginning of the execution of the timer thread. 1326 * 1327 * To get the correct (net) delay added by the softirq, its delta_start 1328 * is set as the IRQ one. In this way, at the return of the IRQ, the delta 1329 * start of the sofitrq will be zeroed, accounting then only the time 1330 * after that. 1331 * 1332 * The thread follows the same principle. However, if a softirq is 1333 * running, the thread needs to receive the softirq delta_start. The 1334 * reason being is that the softirq will be the last to be unfolded, 1335 * resseting the thread delay to zero. 1336 */ 1337 #ifndef CONFIG_PREEMPT_RT 1338 if (osn_var->softirq.delta_start) { 1339 copy_int_safe_time(osn_var, &osn_var->thread.delta_start, 1340 &osn_var->softirq.delta_start); 1341 1342 copy_int_safe_time(osn_var, &osn_var->softirq.delta_start, 1343 &osn_var->irq.delta_start); 1344 } else { 1345 copy_int_safe_time(osn_var, &osn_var->thread.delta_start, 1346 &osn_var->irq.delta_start); 1347 } 1348 #else /* CONFIG_PREEMPT_RT */ 1349 /* 1350 * The sofirqs run as threads on RT, so there is not need 1351 * to keep track of it. 1352 */ 1353 copy_int_safe_time(osn_var, &osn_var->thread.delta_start, &osn_var->irq.delta_start); 1354 #endif /* CONFIG_PREEMPT_RT */ 1355 1356 /* 1357 * Compute the current time with the expected time. 1358 */ 1359 diff = now - tlat->abs_period; 1360 1361 tlat->count++; 1362 s.seqnum = tlat->count; 1363 s.timer_latency = diff; 1364 s.context = IRQ_CONTEXT; 1365 1366 trace_timerlat_sample(&s); 1367 1368 /* Keep a running maximum ever recorded os noise "latency" */ 1369 if (diff > tr->max_latency) { 1370 tr->max_latency = diff; 1371 latency_fsnotify(tr); 1372 } 1373 1374 if (osnoise_data.stop_tracing) 1375 if (time_to_us(diff) >= osnoise_data.stop_tracing) 1376 osnoise_stop_tracing(); 1377 1378 wake_up_process(tlat->kthread); 1379 1380 if (osnoise_data.print_stack) 1381 timerlat_save_stack(0); 1382 1383 return HRTIMER_NORESTART; 1384 } 1385 1386 /* 1387 * wait_next_period - Wait for the next period for timerlat 1388 */ 1389 static int wait_next_period(struct timerlat_variables *tlat) 1390 { 1391 ktime_t next_abs_period, now; 1392 u64 rel_period = osnoise_data.timerlat_period * 1000; 1393 1394 now = hrtimer_cb_get_time(&tlat->timer); 1395 next_abs_period = ns_to_ktime(tlat->abs_period + rel_period); 1396 1397 /* 1398 * Save the next abs_period. 1399 */ 1400 tlat->abs_period = (u64) ktime_to_ns(next_abs_period); 1401 1402 /* 1403 * If the new abs_period is in the past, skip the activation. 1404 */ 1405 while (ktime_compare(now, next_abs_period) > 0) { 1406 next_abs_period = ns_to_ktime(tlat->abs_period + rel_period); 1407 tlat->abs_period = (u64) ktime_to_ns(next_abs_period); 1408 } 1409 1410 set_current_state(TASK_INTERRUPTIBLE); 1411 1412 hrtimer_start(&tlat->timer, next_abs_period, HRTIMER_MODE_ABS_PINNED_HARD); 1413 schedule(); 1414 return 1; 1415 } 1416 1417 /* 1418 * timerlat_main- Timerlat main 1419 */ 1420 static int timerlat_main(void *data) 1421 { 1422 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1423 struct timerlat_variables *tlat = this_cpu_tmr_var(); 1424 struct timerlat_sample s; 1425 struct sched_param sp; 1426 u64 now, diff; 1427 1428 /* 1429 * Make the thread RT, that is how cyclictest is usually used. 1430 */ 1431 sp.sched_priority = DEFAULT_TIMERLAT_PRIO; 1432 sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); 1433 1434 tlat->count = 0; 1435 tlat->tracing_thread = false; 1436 1437 hrtimer_init(&tlat->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD); 1438 tlat->timer.function = timerlat_irq; 1439 tlat->kthread = current; 1440 osn_var->pid = current->pid; 1441 /* 1442 * Anotate the arrival time. 1443 */ 1444 tlat->abs_period = hrtimer_cb_get_time(&tlat->timer); 1445 1446 wait_next_period(tlat); 1447 1448 osn_var->sampling = 1; 1449 1450 while (!kthread_should_stop()) { 1451 now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer)); 1452 diff = now - tlat->abs_period; 1453 1454 s.seqnum = tlat->count; 1455 s.timer_latency = diff; 1456 s.context = THREAD_CONTEXT; 1457 1458 trace_timerlat_sample(&s); 1459 1460 #ifdef CONFIG_STACKTRACE 1461 if (osnoise_data.print_stack) 1462 if (osnoise_data.print_stack <= time_to_us(diff)) 1463 timerlat_dump_stack(); 1464 #endif /* CONFIG_STACKTRACE */ 1465 1466 tlat->tracing_thread = false; 1467 if (osnoise_data.stop_tracing_total) 1468 if (time_to_us(diff) >= osnoise_data.stop_tracing_total) 1469 osnoise_stop_tracing(); 1470 1471 wait_next_period(tlat); 1472 } 1473 1474 hrtimer_cancel(&tlat->timer); 1475 return 0; 1476 } 1477 #endif /* CONFIG_TIMERLAT_TRACER */ 1478 1479 /* 1480 * stop_kthread - stop a workload thread 1481 */ 1482 static void stop_kthread(unsigned int cpu) 1483 { 1484 struct task_struct *kthread; 1485 1486 kthread = per_cpu(per_cpu_osnoise_var, cpu).kthread; 1487 if (kthread) 1488 kthread_stop(kthread); 1489 per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL; 1490 } 1491 1492 /* 1493 * stop_per_cpu_kthread - Stop per-cpu threads 1494 * 1495 * Stop the osnoise sampling htread. Use this on unload and at system 1496 * shutdown. 1497 */ 1498 static void stop_per_cpu_kthreads(void) 1499 { 1500 int cpu; 1501 1502 cpus_read_lock(); 1503 1504 for_each_online_cpu(cpu) 1505 stop_kthread(cpu); 1506 1507 cpus_read_unlock(); 1508 } 1509 1510 /* 1511 * start_kthread - Start a workload tread 1512 */ 1513 static int start_kthread(unsigned int cpu) 1514 { 1515 struct task_struct *kthread; 1516 void *main = osnoise_main; 1517 char comm[24]; 1518 1519 #ifdef CONFIG_TIMERLAT_TRACER 1520 if (osnoise_data.timerlat_tracer) { 1521 snprintf(comm, 24, "timerlat/%d", cpu); 1522 main = timerlat_main; 1523 } else { 1524 snprintf(comm, 24, "osnoise/%d", cpu); 1525 } 1526 #else 1527 snprintf(comm, 24, "osnoise/%d", cpu); 1528 #endif 1529 kthread = kthread_create_on_cpu(main, NULL, cpu, comm); 1530 1531 if (IS_ERR(kthread)) { 1532 pr_err(BANNER "could not start sampling thread\n"); 1533 stop_per_cpu_kthreads(); 1534 return -ENOMEM; 1535 } 1536 1537 per_cpu(per_cpu_osnoise_var, cpu).kthread = kthread; 1538 wake_up_process(kthread); 1539 1540 return 0; 1541 } 1542 1543 /* 1544 * start_per_cpu_kthread - Kick off per-cpu osnoise sampling kthreads 1545 * 1546 * This starts the kernel thread that will look for osnoise on many 1547 * cpus. 1548 */ 1549 static int start_per_cpu_kthreads(struct trace_array *tr) 1550 { 1551 struct cpumask *current_mask = &save_cpumask; 1552 int retval = 0; 1553 int cpu; 1554 1555 cpus_read_lock(); 1556 /* 1557 * Run only on CPUs in which trace and osnoise are allowed to run. 1558 */ 1559 cpumask_and(current_mask, tr->tracing_cpumask, &osnoise_cpumask); 1560 /* 1561 * And the CPU is online. 1562 */ 1563 cpumask_and(current_mask, cpu_online_mask, current_mask); 1564 1565 for_each_possible_cpu(cpu) 1566 per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL; 1567 1568 for_each_cpu(cpu, current_mask) { 1569 retval = start_kthread(cpu); 1570 if (retval) { 1571 stop_per_cpu_kthreads(); 1572 break; 1573 } 1574 } 1575 1576 cpus_read_unlock(); 1577 1578 return retval; 1579 } 1580 1581 #ifdef CONFIG_HOTPLUG_CPU 1582 static void osnoise_hotplug_workfn(struct work_struct *dummy) 1583 { 1584 struct trace_array *tr = osnoise_trace; 1585 unsigned int cpu = smp_processor_id(); 1586 1587 1588 mutex_lock(&trace_types_lock); 1589 1590 if (!osnoise_busy) 1591 goto out_unlock_trace; 1592 1593 mutex_lock(&interface_lock); 1594 cpus_read_lock(); 1595 1596 if (!cpumask_test_cpu(cpu, &osnoise_cpumask)) 1597 goto out_unlock; 1598 1599 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask)) 1600 goto out_unlock; 1601 1602 start_kthread(cpu); 1603 1604 out_unlock: 1605 cpus_read_unlock(); 1606 mutex_unlock(&interface_lock); 1607 out_unlock_trace: 1608 mutex_unlock(&trace_types_lock); 1609 } 1610 1611 static DECLARE_WORK(osnoise_hotplug_work, osnoise_hotplug_workfn); 1612 1613 /* 1614 * osnoise_cpu_init - CPU hotplug online callback function 1615 */ 1616 static int osnoise_cpu_init(unsigned int cpu) 1617 { 1618 schedule_work_on(cpu, &osnoise_hotplug_work); 1619 return 0; 1620 } 1621 1622 /* 1623 * osnoise_cpu_die - CPU hotplug offline callback function 1624 */ 1625 static int osnoise_cpu_die(unsigned int cpu) 1626 { 1627 stop_kthread(cpu); 1628 return 0; 1629 } 1630 1631 static void osnoise_init_hotplug_support(void) 1632 { 1633 int ret; 1634 1635 ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "trace/osnoise:online", 1636 osnoise_cpu_init, osnoise_cpu_die); 1637 if (ret < 0) 1638 pr_warn(BANNER "Error to init cpu hotplug support\n"); 1639 1640 return; 1641 } 1642 #else /* CONFIG_HOTPLUG_CPU */ 1643 static void osnoise_init_hotplug_support(void) 1644 { 1645 return; 1646 } 1647 #endif /* CONFIG_HOTPLUG_CPU */ 1648 1649 /* 1650 * osnoise_cpus_read - Read function for reading the "cpus" file 1651 * @filp: The active open file structure 1652 * @ubuf: The userspace provided buffer to read value into 1653 * @cnt: The maximum number of bytes to read 1654 * @ppos: The current "file" position 1655 * 1656 * Prints the "cpus" output into the user-provided buffer. 1657 */ 1658 static ssize_t 1659 osnoise_cpus_read(struct file *filp, char __user *ubuf, size_t count, 1660 loff_t *ppos) 1661 { 1662 char *mask_str; 1663 int len; 1664 1665 mutex_lock(&interface_lock); 1666 1667 len = snprintf(NULL, 0, "%*pbl\n", cpumask_pr_args(&osnoise_cpumask)) + 1; 1668 mask_str = kmalloc(len, GFP_KERNEL); 1669 if (!mask_str) { 1670 count = -ENOMEM; 1671 goto out_unlock; 1672 } 1673 1674 len = snprintf(mask_str, len, "%*pbl\n", cpumask_pr_args(&osnoise_cpumask)); 1675 if (len >= count) { 1676 count = -EINVAL; 1677 goto out_free; 1678 } 1679 1680 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len); 1681 1682 out_free: 1683 kfree(mask_str); 1684 out_unlock: 1685 mutex_unlock(&interface_lock); 1686 1687 return count; 1688 } 1689 1690 static void osnoise_tracer_start(struct trace_array *tr); 1691 static void osnoise_tracer_stop(struct trace_array *tr); 1692 1693 /* 1694 * osnoise_cpus_write - Write function for "cpus" entry 1695 * @filp: The active open file structure 1696 * @ubuf: The user buffer that contains the value to write 1697 * @cnt: The maximum number of bytes to write to "file" 1698 * @ppos: The current position in @file 1699 * 1700 * This function provides a write implementation for the "cpus" 1701 * interface to the osnoise trace. By default, it lists all CPUs, 1702 * in this way, allowing osnoise threads to run on any online CPU 1703 * of the system. It serves to restrict the execution of osnoise to the 1704 * set of CPUs writing via this interface. Note that osnoise also 1705 * respects the "tracing_cpumask." Hence, osnoise threads will run only 1706 * on the set of CPUs allowed here AND on "tracing_cpumask." Why not 1707 * have just "tracing_cpumask?" Because the user might be interested 1708 * in tracing what is running on other CPUs. For instance, one might 1709 * run osnoise in one HT CPU while observing what is running on the 1710 * sibling HT CPU. 1711 */ 1712 static ssize_t 1713 osnoise_cpus_write(struct file *filp, const char __user *ubuf, size_t count, 1714 loff_t *ppos) 1715 { 1716 struct trace_array *tr = osnoise_trace; 1717 cpumask_var_t osnoise_cpumask_new; 1718 int running, err; 1719 char buf[256]; 1720 1721 if (count >= 256) 1722 return -EINVAL; 1723 1724 if (copy_from_user(buf, ubuf, count)) 1725 return -EFAULT; 1726 1727 if (!zalloc_cpumask_var(&osnoise_cpumask_new, GFP_KERNEL)) 1728 return -ENOMEM; 1729 1730 err = cpulist_parse(buf, osnoise_cpumask_new); 1731 if (err) 1732 goto err_free; 1733 1734 /* 1735 * trace_types_lock is taken to avoid concurrency on start/stop 1736 * and osnoise_busy. 1737 */ 1738 mutex_lock(&trace_types_lock); 1739 running = osnoise_busy; 1740 if (running) 1741 osnoise_tracer_stop(tr); 1742 1743 mutex_lock(&interface_lock); 1744 /* 1745 * osnoise_cpumask is read by CPU hotplug operations. 1746 */ 1747 cpus_read_lock(); 1748 1749 cpumask_copy(&osnoise_cpumask, osnoise_cpumask_new); 1750 1751 cpus_read_unlock(); 1752 mutex_unlock(&interface_lock); 1753 1754 if (running) 1755 osnoise_tracer_start(tr); 1756 mutex_unlock(&trace_types_lock); 1757 1758 free_cpumask_var(osnoise_cpumask_new); 1759 return count; 1760 1761 err_free: 1762 free_cpumask_var(osnoise_cpumask_new); 1763 1764 return err; 1765 } 1766 1767 /* 1768 * osnoise/runtime_us: cannot be greater than the period. 1769 */ 1770 static struct trace_min_max_param osnoise_runtime = { 1771 .lock = &interface_lock, 1772 .val = &osnoise_data.sample_runtime, 1773 .max = &osnoise_data.sample_period, 1774 .min = NULL, 1775 }; 1776 1777 /* 1778 * osnoise/period_us: cannot be smaller than the runtime. 1779 */ 1780 static struct trace_min_max_param osnoise_period = { 1781 .lock = &interface_lock, 1782 .val = &osnoise_data.sample_period, 1783 .max = NULL, 1784 .min = &osnoise_data.sample_runtime, 1785 }; 1786 1787 /* 1788 * osnoise/stop_tracing_us: no limit. 1789 */ 1790 static struct trace_min_max_param osnoise_stop_tracing_in = { 1791 .lock = &interface_lock, 1792 .val = &osnoise_data.stop_tracing, 1793 .max = NULL, 1794 .min = NULL, 1795 }; 1796 1797 /* 1798 * osnoise/stop_tracing_total_us: no limit. 1799 */ 1800 static struct trace_min_max_param osnoise_stop_tracing_total = { 1801 .lock = &interface_lock, 1802 .val = &osnoise_data.stop_tracing_total, 1803 .max = NULL, 1804 .min = NULL, 1805 }; 1806 1807 #ifdef CONFIG_TIMERLAT_TRACER 1808 /* 1809 * osnoise/print_stack: print the stacktrace of the IRQ handler if the total 1810 * latency is higher than val. 1811 */ 1812 static struct trace_min_max_param osnoise_print_stack = { 1813 .lock = &interface_lock, 1814 .val = &osnoise_data.print_stack, 1815 .max = NULL, 1816 .min = NULL, 1817 }; 1818 1819 /* 1820 * osnoise/timerlat_period: min 100 us, max 1 s 1821 */ 1822 u64 timerlat_min_period = 100; 1823 u64 timerlat_max_period = 1000000; 1824 static struct trace_min_max_param timerlat_period = { 1825 .lock = &interface_lock, 1826 .val = &osnoise_data.timerlat_period, 1827 .max = &timerlat_max_period, 1828 .min = &timerlat_min_period, 1829 }; 1830 #endif 1831 1832 static const struct file_operations cpus_fops = { 1833 .open = tracing_open_generic, 1834 .read = osnoise_cpus_read, 1835 .write = osnoise_cpus_write, 1836 .llseek = generic_file_llseek, 1837 }; 1838 1839 /* 1840 * init_tracefs - A function to initialize the tracefs interface files 1841 * 1842 * This function creates entries in tracefs for "osnoise" and "timerlat". 1843 * It creates these directories in the tracing directory, and within that 1844 * directory the use can change and view the configs. 1845 */ 1846 static int init_tracefs(void) 1847 { 1848 struct dentry *top_dir; 1849 struct dentry *tmp; 1850 int ret; 1851 1852 ret = tracing_init_dentry(); 1853 if (ret) 1854 return -ENOMEM; 1855 1856 top_dir = tracefs_create_dir("osnoise", NULL); 1857 if (!top_dir) 1858 return 0; 1859 1860 tmp = tracefs_create_file("period_us", TRACE_MODE_WRITE, top_dir, 1861 &osnoise_period, &trace_min_max_fops); 1862 if (!tmp) 1863 goto err; 1864 1865 tmp = tracefs_create_file("runtime_us", TRACE_MODE_WRITE, top_dir, 1866 &osnoise_runtime, &trace_min_max_fops); 1867 if (!tmp) 1868 goto err; 1869 1870 tmp = tracefs_create_file("stop_tracing_us", TRACE_MODE_WRITE, top_dir, 1871 &osnoise_stop_tracing_in, &trace_min_max_fops); 1872 if (!tmp) 1873 goto err; 1874 1875 tmp = tracefs_create_file("stop_tracing_total_us", TRACE_MODE_WRITE, top_dir, 1876 &osnoise_stop_tracing_total, &trace_min_max_fops); 1877 if (!tmp) 1878 goto err; 1879 1880 tmp = trace_create_file("cpus", TRACE_MODE_WRITE, top_dir, NULL, &cpus_fops); 1881 if (!tmp) 1882 goto err; 1883 #ifdef CONFIG_TIMERLAT_TRACER 1884 #ifdef CONFIG_STACKTRACE 1885 tmp = tracefs_create_file("print_stack", TRACE_MODE_WRITE, top_dir, 1886 &osnoise_print_stack, &trace_min_max_fops); 1887 if (!tmp) 1888 goto err; 1889 #endif 1890 1891 tmp = tracefs_create_file("timerlat_period_us", TRACE_MODE_WRITE, top_dir, 1892 &timerlat_period, &trace_min_max_fops); 1893 if (!tmp) 1894 goto err; 1895 #endif 1896 1897 return 0; 1898 1899 err: 1900 tracefs_remove(top_dir); 1901 return -ENOMEM; 1902 } 1903 1904 static int osnoise_hook_events(void) 1905 { 1906 int retval; 1907 1908 /* 1909 * Trace is already hooked, we are re-enabling from 1910 * a stop_tracing_*. 1911 */ 1912 if (trace_osnoise_callback_enabled) 1913 return 0; 1914 1915 retval = hook_irq_events(); 1916 if (retval) 1917 return -EINVAL; 1918 1919 retval = hook_softirq_events(); 1920 if (retval) 1921 goto out_unhook_irq; 1922 1923 retval = hook_thread_events(); 1924 /* 1925 * All fine! 1926 */ 1927 if (!retval) 1928 return 0; 1929 1930 unhook_softirq_events(); 1931 out_unhook_irq: 1932 unhook_irq_events(); 1933 return -EINVAL; 1934 } 1935 1936 static int __osnoise_tracer_start(struct trace_array *tr) 1937 { 1938 int retval; 1939 1940 osn_var_reset_all(); 1941 1942 retval = osnoise_hook_events(); 1943 if (retval) 1944 return retval; 1945 /* 1946 * Make sure NMIs see reseted values. 1947 */ 1948 barrier(); 1949 trace_osnoise_callback_enabled = true; 1950 1951 retval = start_per_cpu_kthreads(tr); 1952 if (retval) { 1953 unhook_irq_events(); 1954 return retval; 1955 } 1956 1957 osnoise_busy = true; 1958 1959 return 0; 1960 } 1961 1962 static void osnoise_tracer_start(struct trace_array *tr) 1963 { 1964 int retval; 1965 1966 if (osnoise_busy) 1967 return; 1968 1969 retval = __osnoise_tracer_start(tr); 1970 if (retval) 1971 pr_err(BANNER "Error starting osnoise tracer\n"); 1972 1973 } 1974 1975 static void osnoise_tracer_stop(struct trace_array *tr) 1976 { 1977 if (!osnoise_busy) 1978 return; 1979 1980 trace_osnoise_callback_enabled = false; 1981 barrier(); 1982 1983 stop_per_cpu_kthreads(); 1984 1985 unhook_irq_events(); 1986 unhook_softirq_events(); 1987 unhook_thread_events(); 1988 1989 osnoise_busy = false; 1990 } 1991 1992 static int osnoise_tracer_init(struct trace_array *tr) 1993 { 1994 1995 /* Only allow one instance to enable this */ 1996 if (osnoise_busy) 1997 return -EBUSY; 1998 1999 osnoise_trace = tr; 2000 tr->max_latency = 0; 2001 2002 osnoise_tracer_start(tr); 2003 2004 return 0; 2005 } 2006 2007 static void osnoise_tracer_reset(struct trace_array *tr) 2008 { 2009 osnoise_tracer_stop(tr); 2010 } 2011 2012 static struct tracer osnoise_tracer __read_mostly = { 2013 .name = "osnoise", 2014 .init = osnoise_tracer_init, 2015 .reset = osnoise_tracer_reset, 2016 .start = osnoise_tracer_start, 2017 .stop = osnoise_tracer_stop, 2018 .print_header = print_osnoise_headers, 2019 .allow_instances = true, 2020 }; 2021 2022 #ifdef CONFIG_TIMERLAT_TRACER 2023 static void timerlat_tracer_start(struct trace_array *tr) 2024 { 2025 int retval; 2026 2027 if (osnoise_busy) 2028 return; 2029 2030 osnoise_data.timerlat_tracer = 1; 2031 2032 retval = __osnoise_tracer_start(tr); 2033 if (retval) 2034 goto out_err; 2035 2036 return; 2037 out_err: 2038 pr_err(BANNER "Error starting timerlat tracer\n"); 2039 } 2040 2041 static void timerlat_tracer_stop(struct trace_array *tr) 2042 { 2043 int cpu; 2044 2045 if (!osnoise_busy) 2046 return; 2047 2048 for_each_online_cpu(cpu) 2049 per_cpu(per_cpu_osnoise_var, cpu).sampling = 0; 2050 2051 osnoise_tracer_stop(tr); 2052 2053 osnoise_data.timerlat_tracer = 0; 2054 } 2055 2056 static int timerlat_tracer_init(struct trace_array *tr) 2057 { 2058 /* Only allow one instance to enable this */ 2059 if (osnoise_busy) 2060 return -EBUSY; 2061 2062 osnoise_trace = tr; 2063 2064 tr->max_latency = 0; 2065 2066 timerlat_tracer_start(tr); 2067 2068 return 0; 2069 } 2070 2071 static void timerlat_tracer_reset(struct trace_array *tr) 2072 { 2073 timerlat_tracer_stop(tr); 2074 } 2075 2076 static struct tracer timerlat_tracer __read_mostly = { 2077 .name = "timerlat", 2078 .init = timerlat_tracer_init, 2079 .reset = timerlat_tracer_reset, 2080 .start = timerlat_tracer_start, 2081 .stop = timerlat_tracer_stop, 2082 .print_header = print_timerlat_headers, 2083 .allow_instances = true, 2084 }; 2085 #endif /* CONFIG_TIMERLAT_TRACER */ 2086 2087 __init static int init_osnoise_tracer(void) 2088 { 2089 int ret; 2090 2091 mutex_init(&interface_lock); 2092 2093 cpumask_copy(&osnoise_cpumask, cpu_all_mask); 2094 2095 ret = register_tracer(&osnoise_tracer); 2096 if (ret) { 2097 pr_err(BANNER "Error registering osnoise!\n"); 2098 return ret; 2099 } 2100 2101 #ifdef CONFIG_TIMERLAT_TRACER 2102 ret = register_tracer(&timerlat_tracer); 2103 if (ret) { 2104 pr_err(BANNER "Error registering timerlat\n"); 2105 return ret; 2106 } 2107 #endif 2108 osnoise_init_hotplug_support(); 2109 2110 init_tracefs(); 2111 2112 return 0; 2113 } 2114 late_initcall(init_osnoise_tracer); 2115