1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * OS Noise Tracer: computes the OS Noise suffered by a running thread. 4 * Timerlat Tracer: measures the wakeup latency of a timer triggered IRQ and thread. 5 * 6 * Based on "hwlat_detector" tracer by: 7 * Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. <jcm@redhat.com> 8 * Copyright (C) 2013-2016 Steven Rostedt, Red Hat, Inc. <srostedt@redhat.com> 9 * With feedback from Clark Williams <williams@redhat.com> 10 * 11 * And also based on the rtsl tracer presented on: 12 * DE OLIVEIRA, Daniel Bristot, et al. Demystifying the real-time linux 13 * scheduling latency. In: 32nd Euromicro Conference on Real-Time Systems 14 * (ECRTS 2020). Schloss Dagstuhl-Leibniz-Zentrum fur Informatik, 2020. 15 * 16 * Copyright (C) 2021 Daniel Bristot de Oliveira, Red Hat, Inc. <bristot@redhat.com> 17 */ 18 19 #include <linux/kthread.h> 20 #include <linux/tracefs.h> 21 #include <linux/uaccess.h> 22 #include <linux/cpumask.h> 23 #include <linux/delay.h> 24 #include <linux/sched/clock.h> 25 #include <uapi/linux/sched/types.h> 26 #include <linux/sched.h> 27 #include "trace.h" 28 29 #ifdef CONFIG_X86_LOCAL_APIC 30 #include <asm/trace/irq_vectors.h> 31 #undef TRACE_INCLUDE_PATH 32 #undef TRACE_INCLUDE_FILE 33 #endif /* CONFIG_X86_LOCAL_APIC */ 34 35 #include <trace/events/irq.h> 36 #include <trace/events/sched.h> 37 38 #define CREATE_TRACE_POINTS 39 #include <trace/events/osnoise.h> 40 41 static struct trace_array *osnoise_trace; 42 43 /* 44 * Default values. 45 */ 46 #define BANNER "osnoise: " 47 #define DEFAULT_SAMPLE_PERIOD 1000000 /* 1s */ 48 #define DEFAULT_SAMPLE_RUNTIME 1000000 /* 1s */ 49 50 #define DEFAULT_TIMERLAT_PERIOD 1000 /* 1ms */ 51 #define DEFAULT_TIMERLAT_PRIO 95 /* FIFO 95 */ 52 53 /* 54 * NMI runtime info. 55 */ 56 struct osn_nmi { 57 u64 count; 58 u64 delta_start; 59 }; 60 61 /* 62 * IRQ runtime info. 63 */ 64 struct osn_irq { 65 u64 count; 66 u64 arrival_time; 67 u64 delta_start; 68 }; 69 70 #define IRQ_CONTEXT 0 71 #define THREAD_CONTEXT 1 72 /* 73 * sofirq runtime info. 74 */ 75 struct osn_softirq { 76 u64 count; 77 u64 arrival_time; 78 u64 delta_start; 79 }; 80 81 /* 82 * thread runtime info. 83 */ 84 struct osn_thread { 85 u64 count; 86 u64 arrival_time; 87 u64 delta_start; 88 }; 89 90 /* 91 * Runtime information: this structure saves the runtime information used by 92 * one sampling thread. 93 */ 94 struct osnoise_variables { 95 struct task_struct *kthread; 96 bool sampling; 97 pid_t pid; 98 struct osn_nmi nmi; 99 struct osn_irq irq; 100 struct osn_softirq softirq; 101 struct osn_thread thread; 102 local_t int_counter; 103 }; 104 105 /* 106 * Per-cpu runtime information. 107 */ 108 DEFINE_PER_CPU(struct osnoise_variables, per_cpu_osnoise_var); 109 110 /* 111 * this_cpu_osn_var - Return the per-cpu osnoise_variables on its relative CPU 112 */ 113 static inline struct osnoise_variables *this_cpu_osn_var(void) 114 { 115 return this_cpu_ptr(&per_cpu_osnoise_var); 116 } 117 118 #ifdef CONFIG_TIMERLAT_TRACER 119 /* 120 * Runtime information for the timer mode. 121 */ 122 struct timerlat_variables { 123 struct task_struct *kthread; 124 struct hrtimer timer; 125 u64 rel_period; 126 u64 abs_period; 127 bool tracing_thread; 128 u64 count; 129 }; 130 131 DEFINE_PER_CPU(struct timerlat_variables, per_cpu_timerlat_var); 132 133 /* 134 * this_cpu_tmr_var - Return the per-cpu timerlat_variables on its relative CPU 135 */ 136 static inline struct timerlat_variables *this_cpu_tmr_var(void) 137 { 138 return this_cpu_ptr(&per_cpu_timerlat_var); 139 } 140 141 /* 142 * tlat_var_reset - Reset the values of the given timerlat_variables 143 */ 144 static inline void tlat_var_reset(void) 145 { 146 struct timerlat_variables *tlat_var; 147 int cpu; 148 /* 149 * So far, all the values are initialized as 0, so 150 * zeroing the structure is perfect. 151 */ 152 for_each_cpu(cpu, cpu_online_mask) { 153 tlat_var = per_cpu_ptr(&per_cpu_timerlat_var, cpu); 154 memset(tlat_var, 0, sizeof(*tlat_var)); 155 } 156 } 157 #else /* CONFIG_TIMERLAT_TRACER */ 158 #define tlat_var_reset() do {} while (0) 159 #endif /* CONFIG_TIMERLAT_TRACER */ 160 161 /* 162 * osn_var_reset - Reset the values of the given osnoise_variables 163 */ 164 static inline void osn_var_reset(void) 165 { 166 struct osnoise_variables *osn_var; 167 int cpu; 168 169 /* 170 * So far, all the values are initialized as 0, so 171 * zeroing the structure is perfect. 172 */ 173 for_each_cpu(cpu, cpu_online_mask) { 174 osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu); 175 memset(osn_var, 0, sizeof(*osn_var)); 176 } 177 } 178 179 /* 180 * osn_var_reset_all - Reset the value of all per-cpu osnoise_variables 181 */ 182 static inline void osn_var_reset_all(void) 183 { 184 osn_var_reset(); 185 tlat_var_reset(); 186 } 187 188 /* 189 * Tells NMIs to call back to the osnoise tracer to record timestamps. 190 */ 191 bool trace_osnoise_callback_enabled; 192 193 /* 194 * osnoise sample structure definition. Used to store the statistics of a 195 * sample run. 196 */ 197 struct osnoise_sample { 198 u64 runtime; /* runtime */ 199 u64 noise; /* noise */ 200 u64 max_sample; /* max single noise sample */ 201 int hw_count; /* # HW (incl. hypervisor) interference */ 202 int nmi_count; /* # NMIs during this sample */ 203 int irq_count; /* # IRQs during this sample */ 204 int softirq_count; /* # softirqs during this sample */ 205 int thread_count; /* # threads during this sample */ 206 }; 207 208 #ifdef CONFIG_TIMERLAT_TRACER 209 /* 210 * timerlat sample structure definition. Used to store the statistics of 211 * a sample run. 212 */ 213 struct timerlat_sample { 214 u64 timer_latency; /* timer_latency */ 215 unsigned int seqnum; /* unique sequence */ 216 int context; /* timer context */ 217 }; 218 #endif 219 220 /* 221 * Protect the interface. 222 */ 223 struct mutex interface_lock; 224 225 /* 226 * Tracer data. 227 */ 228 static struct osnoise_data { 229 u64 sample_period; /* total sampling period */ 230 u64 sample_runtime; /* active sampling portion of period */ 231 u64 stop_tracing; /* stop trace in the internal operation (loop/irq) */ 232 u64 stop_tracing_total; /* stop trace in the final operation (report/thread) */ 233 #ifdef CONFIG_TIMERLAT_TRACER 234 u64 timerlat_period; /* timerlat period */ 235 u64 print_stack; /* print IRQ stack if total > */ 236 int timerlat_tracer; /* timerlat tracer */ 237 #endif 238 bool tainted; /* infor users and developers about a problem */ 239 } osnoise_data = { 240 .sample_period = DEFAULT_SAMPLE_PERIOD, 241 .sample_runtime = DEFAULT_SAMPLE_RUNTIME, 242 .stop_tracing = 0, 243 .stop_tracing_total = 0, 244 #ifdef CONFIG_TIMERLAT_TRACER 245 .print_stack = 0, 246 .timerlat_period = DEFAULT_TIMERLAT_PERIOD, 247 .timerlat_tracer = 0, 248 #endif 249 }; 250 251 /* 252 * Boolean variable used to inform that the tracer is currently sampling. 253 */ 254 static bool osnoise_busy; 255 256 /* 257 * Print the osnoise header info. 258 */ 259 static void print_osnoise_headers(struct seq_file *s) 260 { 261 if (osnoise_data.tainted) 262 seq_puts(s, "# osnoise is tainted!\n"); 263 264 seq_puts(s, "# _-----=> irqs-off\n"); 265 seq_puts(s, "# / _----=> need-resched\n"); 266 seq_puts(s, "# | / _---=> hardirq/softirq\n"); 267 seq_puts(s, "# || / _--=> preempt-depth "); 268 seq_puts(s, " MAX\n"); 269 270 seq_puts(s, "# || / "); 271 seq_puts(s, " SINGLE Interference counters:\n"); 272 273 seq_puts(s, "# |||| RUNTIME "); 274 seq_puts(s, " NOISE %% OF CPU NOISE +-----------------------------+\n"); 275 276 seq_puts(s, "# TASK-PID CPU# |||| TIMESTAMP IN US "); 277 seq_puts(s, " IN US AVAILABLE IN US HW NMI IRQ SIRQ THREAD\n"); 278 279 seq_puts(s, "# | | | |||| | | "); 280 seq_puts(s, " | | | | | | | |\n"); 281 } 282 283 /* 284 * osnoise_taint - report an osnoise error. 285 */ 286 #define osnoise_taint(msg) ({ \ 287 struct trace_array *tr = osnoise_trace; \ 288 \ 289 trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_, msg); \ 290 osnoise_data.tainted = true; \ 291 }) 292 293 /* 294 * Record an osnoise_sample into the tracer buffer. 295 */ 296 static void trace_osnoise_sample(struct osnoise_sample *sample) 297 { 298 struct trace_array *tr = osnoise_trace; 299 struct trace_buffer *buffer = tr->array_buffer.buffer; 300 struct trace_event_call *call = &event_osnoise; 301 struct ring_buffer_event *event; 302 struct osnoise_entry *entry; 303 304 event = trace_buffer_lock_reserve(buffer, TRACE_OSNOISE, sizeof(*entry), 305 tracing_gen_ctx()); 306 if (!event) 307 return; 308 entry = ring_buffer_event_data(event); 309 entry->runtime = sample->runtime; 310 entry->noise = sample->noise; 311 entry->max_sample = sample->max_sample; 312 entry->hw_count = sample->hw_count; 313 entry->nmi_count = sample->nmi_count; 314 entry->irq_count = sample->irq_count; 315 entry->softirq_count = sample->softirq_count; 316 entry->thread_count = sample->thread_count; 317 318 if (!call_filter_check_discard(call, entry, buffer, event)) 319 trace_buffer_unlock_commit_nostack(buffer, event); 320 } 321 322 #ifdef CONFIG_TIMERLAT_TRACER 323 /* 324 * Print the timerlat header info. 325 */ 326 static void print_timerlat_headers(struct seq_file *s) 327 { 328 seq_puts(s, "# _-----=> irqs-off\n"); 329 seq_puts(s, "# / _----=> need-resched\n"); 330 seq_puts(s, "# | / _---=> hardirq/softirq\n"); 331 seq_puts(s, "# || / _--=> preempt-depth\n"); 332 seq_puts(s, "# || /\n"); 333 seq_puts(s, "# |||| ACTIVATION\n"); 334 seq_puts(s, "# TASK-PID CPU# |||| TIMESTAMP ID "); 335 seq_puts(s, " CONTEXT LATENCY\n"); 336 seq_puts(s, "# | | | |||| | | "); 337 seq_puts(s, " | |\n"); 338 } 339 340 /* 341 * Record an timerlat_sample into the tracer buffer. 342 */ 343 static void trace_timerlat_sample(struct timerlat_sample *sample) 344 { 345 struct trace_array *tr = osnoise_trace; 346 struct trace_event_call *call = &event_osnoise; 347 struct trace_buffer *buffer = tr->array_buffer.buffer; 348 struct ring_buffer_event *event; 349 struct timerlat_entry *entry; 350 351 event = trace_buffer_lock_reserve(buffer, TRACE_TIMERLAT, sizeof(*entry), 352 tracing_gen_ctx()); 353 if (!event) 354 return; 355 entry = ring_buffer_event_data(event); 356 entry->seqnum = sample->seqnum; 357 entry->context = sample->context; 358 entry->timer_latency = sample->timer_latency; 359 360 if (!call_filter_check_discard(call, entry, buffer, event)) 361 trace_buffer_unlock_commit_nostack(buffer, event); 362 } 363 364 #ifdef CONFIG_STACKTRACE 365 366 #define MAX_CALLS 256 367 368 /* 369 * Stack trace will take place only at IRQ level, so, no need 370 * to control nesting here. 371 */ 372 struct trace_stack { 373 int stack_size; 374 int nr_entries; 375 unsigned long calls[MAX_CALLS]; 376 }; 377 378 static DEFINE_PER_CPU(struct trace_stack, trace_stack); 379 380 /* 381 * timerlat_save_stack - save a stack trace without printing 382 * 383 * Save the current stack trace without printing. The 384 * stack will be printed later, after the end of the measurement. 385 */ 386 static void timerlat_save_stack(int skip) 387 { 388 unsigned int size, nr_entries; 389 struct trace_stack *fstack; 390 391 fstack = this_cpu_ptr(&trace_stack); 392 393 size = ARRAY_SIZE(fstack->calls); 394 395 nr_entries = stack_trace_save(fstack->calls, size, skip); 396 397 fstack->stack_size = nr_entries * sizeof(unsigned long); 398 fstack->nr_entries = nr_entries; 399 400 return; 401 402 } 403 /* 404 * timerlat_dump_stack - dump a stack trace previously saved 405 * 406 * Dump a saved stack trace into the trace buffer. 407 */ 408 static void timerlat_dump_stack(void) 409 { 410 struct trace_event_call *call = &event_osnoise; 411 struct trace_array *tr = osnoise_trace; 412 struct trace_buffer *buffer = tr->array_buffer.buffer; 413 struct ring_buffer_event *event; 414 struct trace_stack *fstack; 415 struct stack_entry *entry; 416 unsigned int size; 417 418 preempt_disable_notrace(); 419 fstack = this_cpu_ptr(&trace_stack); 420 size = fstack->stack_size; 421 422 event = trace_buffer_lock_reserve(buffer, TRACE_STACK, sizeof(*entry) + size, 423 tracing_gen_ctx()); 424 if (!event) 425 goto out; 426 427 entry = ring_buffer_event_data(event); 428 429 memcpy(&entry->caller, fstack->calls, size); 430 entry->size = fstack->nr_entries; 431 432 if (!call_filter_check_discard(call, entry, buffer, event)) 433 trace_buffer_unlock_commit_nostack(buffer, event); 434 435 out: 436 preempt_enable_notrace(); 437 } 438 #else 439 #define timerlat_dump_stack() do {} while (0) 440 #define timerlat_save_stack(a) do {} while (0) 441 #endif /* CONFIG_STACKTRACE */ 442 #endif /* CONFIG_TIMERLAT_TRACER */ 443 444 /* 445 * Macros to encapsulate the time capturing infrastructure. 446 */ 447 #define time_get() trace_clock_local() 448 #define time_to_us(x) div_u64(x, 1000) 449 #define time_sub(a, b) ((a) - (b)) 450 451 /* 452 * cond_move_irq_delta_start - Forward the delta_start of a running IRQ 453 * 454 * If an IRQ is preempted by an NMI, its delta_start is pushed forward 455 * to discount the NMI interference. 456 * 457 * See get_int_safe_duration(). 458 */ 459 static inline void 460 cond_move_irq_delta_start(struct osnoise_variables *osn_var, u64 duration) 461 { 462 if (osn_var->irq.delta_start) 463 osn_var->irq.delta_start += duration; 464 } 465 466 #ifndef CONFIG_PREEMPT_RT 467 /* 468 * cond_move_softirq_delta_start - Forward the delta_start of a running softirq. 469 * 470 * If a softirq is preempted by an IRQ or NMI, its delta_start is pushed 471 * forward to discount the interference. 472 * 473 * See get_int_safe_duration(). 474 */ 475 static inline void 476 cond_move_softirq_delta_start(struct osnoise_variables *osn_var, u64 duration) 477 { 478 if (osn_var->softirq.delta_start) 479 osn_var->softirq.delta_start += duration; 480 } 481 #else /* CONFIG_PREEMPT_RT */ 482 #define cond_move_softirq_delta_start(osn_var, duration) do {} while (0) 483 #endif 484 485 /* 486 * cond_move_thread_delta_start - Forward the delta_start of a running thread 487 * 488 * If a noisy thread is preempted by an softirq, IRQ or NMI, its delta_start 489 * is pushed forward to discount the interference. 490 * 491 * See get_int_safe_duration(). 492 */ 493 static inline void 494 cond_move_thread_delta_start(struct osnoise_variables *osn_var, u64 duration) 495 { 496 if (osn_var->thread.delta_start) 497 osn_var->thread.delta_start += duration; 498 } 499 500 /* 501 * get_int_safe_duration - Get the duration of a window 502 * 503 * The irq, softirq and thread varaibles need to have its duration without 504 * the interference from higher priority interrupts. Instead of keeping a 505 * variable to discount the interrupt interference from these variables, the 506 * starting time of these variables are pushed forward with the interrupt's 507 * duration. In this way, a single variable is used to: 508 * 509 * - Know if a given window is being measured. 510 * - Account its duration. 511 * - Discount the interference. 512 * 513 * To avoid getting inconsistent values, e.g.,: 514 * 515 * now = time_get() 516 * ---> interrupt! 517 * delta_start -= int duration; 518 * <--- 519 * duration = now - delta_start; 520 * 521 * result: negative duration if the variable duration before the 522 * interrupt was smaller than the interrupt execution. 523 * 524 * A counter of interrupts is used. If the counter increased, try 525 * to capture an interference safe duration. 526 */ 527 static inline s64 528 get_int_safe_duration(struct osnoise_variables *osn_var, u64 *delta_start) 529 { 530 u64 int_counter, now; 531 s64 duration; 532 533 do { 534 int_counter = local_read(&osn_var->int_counter); 535 /* synchronize with interrupts */ 536 barrier(); 537 538 now = time_get(); 539 duration = (now - *delta_start); 540 541 /* synchronize with interrupts */ 542 barrier(); 543 } while (int_counter != local_read(&osn_var->int_counter)); 544 545 /* 546 * This is an evidence of race conditions that cause 547 * a value to be "discounted" too much. 548 */ 549 if (duration < 0) 550 osnoise_taint("Negative duration!\n"); 551 552 *delta_start = 0; 553 554 return duration; 555 } 556 557 /* 558 * 559 * set_int_safe_time - Save the current time on *time, aware of interference 560 * 561 * Get the time, taking into consideration a possible interference from 562 * higher priority interrupts. 563 * 564 * See get_int_safe_duration() for an explanation. 565 */ 566 static u64 567 set_int_safe_time(struct osnoise_variables *osn_var, u64 *time) 568 { 569 u64 int_counter; 570 571 do { 572 int_counter = local_read(&osn_var->int_counter); 573 /* synchronize with interrupts */ 574 barrier(); 575 576 *time = time_get(); 577 578 /* synchronize with interrupts */ 579 barrier(); 580 } while (int_counter != local_read(&osn_var->int_counter)); 581 582 return int_counter; 583 } 584 585 #ifdef CONFIG_TIMERLAT_TRACER 586 /* 587 * copy_int_safe_time - Copy *src into *desc aware of interference 588 */ 589 static u64 590 copy_int_safe_time(struct osnoise_variables *osn_var, u64 *dst, u64 *src) 591 { 592 u64 int_counter; 593 594 do { 595 int_counter = local_read(&osn_var->int_counter); 596 /* synchronize with interrupts */ 597 barrier(); 598 599 *dst = *src; 600 601 /* synchronize with interrupts */ 602 barrier(); 603 } while (int_counter != local_read(&osn_var->int_counter)); 604 605 return int_counter; 606 } 607 #endif /* CONFIG_TIMERLAT_TRACER */ 608 609 /* 610 * trace_osnoise_callback - NMI entry/exit callback 611 * 612 * This function is called at the entry and exit NMI code. The bool enter 613 * distinguishes between either case. This function is used to note a NMI 614 * occurrence, compute the noise caused by the NMI, and to remove the noise 615 * it is potentially causing on other interference variables. 616 */ 617 void trace_osnoise_callback(bool enter) 618 { 619 struct osnoise_variables *osn_var = this_cpu_osn_var(); 620 u64 duration; 621 622 if (!osn_var->sampling) 623 return; 624 625 /* 626 * Currently trace_clock_local() calls sched_clock() and the 627 * generic version is not NMI safe. 628 */ 629 if (!IS_ENABLED(CONFIG_GENERIC_SCHED_CLOCK)) { 630 if (enter) { 631 osn_var->nmi.delta_start = time_get(); 632 local_inc(&osn_var->int_counter); 633 } else { 634 duration = time_get() - osn_var->nmi.delta_start; 635 636 trace_nmi_noise(osn_var->nmi.delta_start, duration); 637 638 cond_move_irq_delta_start(osn_var, duration); 639 cond_move_softirq_delta_start(osn_var, duration); 640 cond_move_thread_delta_start(osn_var, duration); 641 } 642 } 643 644 if (enter) 645 osn_var->nmi.count++; 646 } 647 648 /* 649 * osnoise_trace_irq_entry - Note the starting of an IRQ 650 * 651 * Save the starting time of an IRQ. As IRQs are non-preemptive to other IRQs, 652 * it is safe to use a single variable (ons_var->irq) to save the statistics. 653 * The arrival_time is used to report... the arrival time. The delta_start 654 * is used to compute the duration at the IRQ exit handler. See 655 * cond_move_irq_delta_start(). 656 */ 657 void osnoise_trace_irq_entry(int id) 658 { 659 struct osnoise_variables *osn_var = this_cpu_osn_var(); 660 661 if (!osn_var->sampling) 662 return; 663 /* 664 * This value will be used in the report, but not to compute 665 * the execution time, so it is safe to get it unsafe. 666 */ 667 osn_var->irq.arrival_time = time_get(); 668 set_int_safe_time(osn_var, &osn_var->irq.delta_start); 669 osn_var->irq.count++; 670 671 local_inc(&osn_var->int_counter); 672 } 673 674 /* 675 * osnoise_irq_exit - Note the end of an IRQ, sava data and trace 676 * 677 * Computes the duration of the IRQ noise, and trace it. Also discounts the 678 * interference from other sources of noise could be currently being accounted. 679 */ 680 void osnoise_trace_irq_exit(int id, const char *desc) 681 { 682 struct osnoise_variables *osn_var = this_cpu_osn_var(); 683 int duration; 684 685 if (!osn_var->sampling) 686 return; 687 688 duration = get_int_safe_duration(osn_var, &osn_var->irq.delta_start); 689 trace_irq_noise(id, desc, osn_var->irq.arrival_time, duration); 690 osn_var->irq.arrival_time = 0; 691 cond_move_softirq_delta_start(osn_var, duration); 692 cond_move_thread_delta_start(osn_var, duration); 693 } 694 695 /* 696 * trace_irqentry_callback - Callback to the irq:irq_entry traceevent 697 * 698 * Used to note the starting of an IRQ occurece. 699 */ 700 static void trace_irqentry_callback(void *data, int irq, 701 struct irqaction *action) 702 { 703 osnoise_trace_irq_entry(irq); 704 } 705 706 /* 707 * trace_irqexit_callback - Callback to the irq:irq_exit traceevent 708 * 709 * Used to note the end of an IRQ occurece. 710 */ 711 static void trace_irqexit_callback(void *data, int irq, 712 struct irqaction *action, int ret) 713 { 714 osnoise_trace_irq_exit(irq, action->name); 715 } 716 717 /* 718 * arch specific register function. 719 */ 720 int __weak osnoise_arch_register(void) 721 { 722 return 0; 723 } 724 725 /* 726 * arch specific unregister function. 727 */ 728 void __weak osnoise_arch_unregister(void) 729 { 730 return; 731 } 732 733 /* 734 * hook_irq_events - Hook IRQ handling events 735 * 736 * This function hooks the IRQ related callbacks to the respective trace 737 * events. 738 */ 739 int hook_irq_events(void) 740 { 741 int ret; 742 743 ret = register_trace_irq_handler_entry(trace_irqentry_callback, NULL); 744 if (ret) 745 goto out_err; 746 747 ret = register_trace_irq_handler_exit(trace_irqexit_callback, NULL); 748 if (ret) 749 goto out_unregister_entry; 750 751 ret = osnoise_arch_register(); 752 if (ret) 753 goto out_irq_exit; 754 755 return 0; 756 757 out_irq_exit: 758 unregister_trace_irq_handler_exit(trace_irqexit_callback, NULL); 759 out_unregister_entry: 760 unregister_trace_irq_handler_entry(trace_irqentry_callback, NULL); 761 out_err: 762 return -EINVAL; 763 } 764 765 /* 766 * unhook_irq_events - Unhook IRQ handling events 767 * 768 * This function unhooks the IRQ related callbacks to the respective trace 769 * events. 770 */ 771 void unhook_irq_events(void) 772 { 773 osnoise_arch_unregister(); 774 unregister_trace_irq_handler_exit(trace_irqexit_callback, NULL); 775 unregister_trace_irq_handler_entry(trace_irqentry_callback, NULL); 776 } 777 778 #ifndef CONFIG_PREEMPT_RT 779 /* 780 * trace_softirq_entry_callback - Note the starting of a softirq 781 * 782 * Save the starting time of a softirq. As softirqs are non-preemptive to 783 * other softirqs, it is safe to use a single variable (ons_var->softirq) 784 * to save the statistics. The arrival_time is used to report... the 785 * arrival time. The delta_start is used to compute the duration at the 786 * softirq exit handler. See cond_move_softirq_delta_start(). 787 */ 788 void trace_softirq_entry_callback(void *data, unsigned int vec_nr) 789 { 790 struct osnoise_variables *osn_var = this_cpu_osn_var(); 791 792 if (!osn_var->sampling) 793 return; 794 /* 795 * This value will be used in the report, but not to compute 796 * the execution time, so it is safe to get it unsafe. 797 */ 798 osn_var->softirq.arrival_time = time_get(); 799 set_int_safe_time(osn_var, &osn_var->softirq.delta_start); 800 osn_var->softirq.count++; 801 802 local_inc(&osn_var->int_counter); 803 } 804 805 /* 806 * trace_softirq_exit_callback - Note the end of an softirq 807 * 808 * Computes the duration of the softirq noise, and trace it. Also discounts the 809 * interference from other sources of noise could be currently being accounted. 810 */ 811 void trace_softirq_exit_callback(void *data, unsigned int vec_nr) 812 { 813 struct osnoise_variables *osn_var = this_cpu_osn_var(); 814 int duration; 815 816 if (!osn_var->sampling) 817 return; 818 819 #ifdef CONFIG_TIMERLAT_TRACER 820 /* 821 * If the timerlat is enabled, but the irq handler did 822 * not run yet enabling timerlat_tracer, do not trace. 823 */ 824 if (unlikely(osnoise_data.timerlat_tracer)) { 825 struct timerlat_variables *tlat_var; 826 tlat_var = this_cpu_tmr_var(); 827 if (!tlat_var->tracing_thread) { 828 osn_var->softirq.arrival_time = 0; 829 osn_var->softirq.delta_start = 0; 830 return; 831 } 832 } 833 #endif 834 835 duration = get_int_safe_duration(osn_var, &osn_var->softirq.delta_start); 836 trace_softirq_noise(vec_nr, osn_var->softirq.arrival_time, duration); 837 cond_move_thread_delta_start(osn_var, duration); 838 osn_var->softirq.arrival_time = 0; 839 } 840 841 /* 842 * hook_softirq_events - Hook softirq handling events 843 * 844 * This function hooks the softirq related callbacks to the respective trace 845 * events. 846 */ 847 static int hook_softirq_events(void) 848 { 849 int ret; 850 851 ret = register_trace_softirq_entry(trace_softirq_entry_callback, NULL); 852 if (ret) 853 goto out_err; 854 855 ret = register_trace_softirq_exit(trace_softirq_exit_callback, NULL); 856 if (ret) 857 goto out_unreg_entry; 858 859 return 0; 860 861 out_unreg_entry: 862 unregister_trace_softirq_entry(trace_softirq_entry_callback, NULL); 863 out_err: 864 return -EINVAL; 865 } 866 867 /* 868 * unhook_softirq_events - Unhook softirq handling events 869 * 870 * This function hooks the softirq related callbacks to the respective trace 871 * events. 872 */ 873 static void unhook_softirq_events(void) 874 { 875 unregister_trace_softirq_entry(trace_softirq_entry_callback, NULL); 876 unregister_trace_softirq_exit(trace_softirq_exit_callback, NULL); 877 } 878 #else /* CONFIG_PREEMPT_RT */ 879 /* 880 * softirq are threads on the PREEMPT_RT mode. 881 */ 882 static int hook_softirq_events(void) 883 { 884 return 0; 885 } 886 static void unhook_softirq_events(void) 887 { 888 } 889 #endif 890 891 /* 892 * thread_entry - Record the starting of a thread noise window 893 * 894 * It saves the context switch time for a noisy thread, and increments 895 * the interference counters. 896 */ 897 static void 898 thread_entry(struct osnoise_variables *osn_var, struct task_struct *t) 899 { 900 if (!osn_var->sampling) 901 return; 902 /* 903 * The arrival time will be used in the report, but not to compute 904 * the execution time, so it is safe to get it unsafe. 905 */ 906 osn_var->thread.arrival_time = time_get(); 907 908 set_int_safe_time(osn_var, &osn_var->thread.delta_start); 909 910 osn_var->thread.count++; 911 local_inc(&osn_var->int_counter); 912 } 913 914 /* 915 * thread_exit - Report the end of a thread noise window 916 * 917 * It computes the total noise from a thread, tracing if needed. 918 */ 919 static void 920 thread_exit(struct osnoise_variables *osn_var, struct task_struct *t) 921 { 922 int duration; 923 924 if (!osn_var->sampling) 925 return; 926 927 #ifdef CONFIG_TIMERLAT_TRACER 928 if (osnoise_data.timerlat_tracer) { 929 struct timerlat_variables *tlat_var; 930 tlat_var = this_cpu_tmr_var(); 931 if (!tlat_var->tracing_thread) { 932 osn_var->thread.delta_start = 0; 933 osn_var->thread.arrival_time = 0; 934 return; 935 } 936 } 937 #endif 938 939 duration = get_int_safe_duration(osn_var, &osn_var->thread.delta_start); 940 941 trace_thread_noise(t, osn_var->thread.arrival_time, duration); 942 943 osn_var->thread.arrival_time = 0; 944 } 945 946 /* 947 * trace_sched_switch - sched:sched_switch trace event handler 948 * 949 * This function is hooked to the sched:sched_switch trace event, and it is 950 * used to record the beginning and to report the end of a thread noise window. 951 */ 952 void 953 trace_sched_switch_callback(void *data, bool preempt, struct task_struct *p, 954 struct task_struct *n) 955 { 956 struct osnoise_variables *osn_var = this_cpu_osn_var(); 957 958 if (p->pid != osn_var->pid) 959 thread_exit(osn_var, p); 960 961 if (n->pid != osn_var->pid) 962 thread_entry(osn_var, n); 963 } 964 965 /* 966 * hook_thread_events - Hook the insturmentation for thread noise 967 * 968 * Hook the osnoise tracer callbacks to handle the noise from other 969 * threads on the necessary kernel events. 970 */ 971 int hook_thread_events(void) 972 { 973 int ret; 974 975 ret = register_trace_sched_switch(trace_sched_switch_callback, NULL); 976 if (ret) 977 return -EINVAL; 978 979 return 0; 980 } 981 982 /* 983 * unhook_thread_events - *nhook the insturmentation for thread noise 984 * 985 * Unook the osnoise tracer callbacks to handle the noise from other 986 * threads on the necessary kernel events. 987 */ 988 void unhook_thread_events(void) 989 { 990 unregister_trace_sched_switch(trace_sched_switch_callback, NULL); 991 } 992 993 /* 994 * save_osn_sample_stats - Save the osnoise_sample statistics 995 * 996 * Save the osnoise_sample statistics before the sampling phase. These 997 * values will be used later to compute the diff betwneen the statistics 998 * before and after the osnoise sampling. 999 */ 1000 void save_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample *s) 1001 { 1002 s->nmi_count = osn_var->nmi.count; 1003 s->irq_count = osn_var->irq.count; 1004 s->softirq_count = osn_var->softirq.count; 1005 s->thread_count = osn_var->thread.count; 1006 } 1007 1008 /* 1009 * diff_osn_sample_stats - Compute the osnoise_sample statistics 1010 * 1011 * After a sample period, compute the difference on the osnoise_sample 1012 * statistics. The struct osnoise_sample *s contains the statistics saved via 1013 * save_osn_sample_stats() before the osnoise sampling. 1014 */ 1015 void diff_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample *s) 1016 { 1017 s->nmi_count = osn_var->nmi.count - s->nmi_count; 1018 s->irq_count = osn_var->irq.count - s->irq_count; 1019 s->softirq_count = osn_var->softirq.count - s->softirq_count; 1020 s->thread_count = osn_var->thread.count - s->thread_count; 1021 } 1022 1023 /* 1024 * osnoise_stop_tracing - Stop tracing and the tracer. 1025 */ 1026 static void osnoise_stop_tracing(void) 1027 { 1028 struct trace_array *tr = osnoise_trace; 1029 tracer_tracing_off(tr); 1030 } 1031 1032 /* 1033 * run_osnoise - Sample the time and look for osnoise 1034 * 1035 * Used to capture the time, looking for potential osnoise latency repeatedly. 1036 * Different from hwlat_detector, it is called with preemption and interrupts 1037 * enabled. This allows irqs, softirqs and threads to run, interfering on the 1038 * osnoise sampling thread, as they would do with a regular thread. 1039 */ 1040 static int run_osnoise(void) 1041 { 1042 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1043 u64 noise = 0, sum_noise = 0, max_noise = 0; 1044 struct trace_array *tr = osnoise_trace; 1045 u64 start, sample, last_sample; 1046 u64 last_int_count, int_count; 1047 s64 total, last_total = 0; 1048 struct osnoise_sample s; 1049 unsigned int threshold; 1050 int hw_count = 0; 1051 u64 runtime, stop_in; 1052 int ret = -1; 1053 1054 /* 1055 * Considers the current thread as the workload. 1056 */ 1057 osn_var->pid = current->pid; 1058 1059 /* 1060 * Save the current stats for the diff 1061 */ 1062 save_osn_sample_stats(osn_var, &s); 1063 1064 /* 1065 * if threshold is 0, use the default value of 5 us. 1066 */ 1067 threshold = tracing_thresh ? : 5000; 1068 1069 /* 1070 * Make sure NMIs see sampling first 1071 */ 1072 osn_var->sampling = true; 1073 barrier(); 1074 1075 /* 1076 * Transform the *_us config to nanoseconds to avoid the 1077 * division on the main loop. 1078 */ 1079 runtime = osnoise_data.sample_runtime * NSEC_PER_USEC; 1080 stop_in = osnoise_data.stop_tracing * NSEC_PER_USEC; 1081 1082 /* 1083 * Start timestemp 1084 */ 1085 start = time_get(); 1086 1087 /* 1088 * "previous" loop. 1089 */ 1090 last_int_count = set_int_safe_time(osn_var, &last_sample); 1091 1092 do { 1093 /* 1094 * Get sample! 1095 */ 1096 int_count = set_int_safe_time(osn_var, &sample); 1097 1098 noise = time_sub(sample, last_sample); 1099 1100 /* 1101 * This shouldn't happen. 1102 */ 1103 if (noise < 0) { 1104 osnoise_taint("negative noise!"); 1105 goto out; 1106 } 1107 1108 /* 1109 * Sample runtime. 1110 */ 1111 total = time_sub(sample, start); 1112 1113 /* 1114 * Check for possible overflows. 1115 */ 1116 if (total < last_total) { 1117 osnoise_taint("total overflow!"); 1118 break; 1119 } 1120 1121 last_total = total; 1122 1123 if (noise >= threshold) { 1124 int interference = int_count - last_int_count; 1125 1126 if (noise > max_noise) 1127 max_noise = noise; 1128 1129 if (!interference) 1130 hw_count++; 1131 1132 sum_noise += noise; 1133 1134 trace_sample_threshold(last_sample, noise, interference); 1135 1136 if (osnoise_data.stop_tracing) 1137 if (noise > stop_in) 1138 osnoise_stop_tracing(); 1139 } 1140 1141 /* 1142 * For the non-preemptive kernel config: let threads runs, if 1143 * they so wish. 1144 */ 1145 cond_resched(); 1146 1147 last_sample = sample; 1148 last_int_count = int_count; 1149 1150 } while (total < runtime && !kthread_should_stop()); 1151 1152 /* 1153 * Finish the above in the view for interrupts. 1154 */ 1155 barrier(); 1156 1157 osn_var->sampling = false; 1158 1159 /* 1160 * Make sure sampling data is no longer updated. 1161 */ 1162 barrier(); 1163 1164 /* 1165 * Save noise info. 1166 */ 1167 s.noise = time_to_us(sum_noise); 1168 s.runtime = time_to_us(total); 1169 s.max_sample = time_to_us(max_noise); 1170 s.hw_count = hw_count; 1171 1172 /* Save interference stats info */ 1173 diff_osn_sample_stats(osn_var, &s); 1174 1175 trace_osnoise_sample(&s); 1176 1177 /* Keep a running maximum ever recorded osnoise "latency" */ 1178 if (max_noise > tr->max_latency) { 1179 tr->max_latency = max_noise; 1180 latency_fsnotify(tr); 1181 } 1182 1183 if (osnoise_data.stop_tracing_total) 1184 if (s.noise > osnoise_data.stop_tracing_total) 1185 osnoise_stop_tracing(); 1186 1187 return 0; 1188 out: 1189 return ret; 1190 } 1191 1192 static struct cpumask osnoise_cpumask; 1193 static struct cpumask save_cpumask; 1194 1195 /* 1196 * osnoise_main - The osnoise detection kernel thread 1197 * 1198 * Calls run_osnoise() function to measure the osnoise for the configured runtime, 1199 * every period. 1200 */ 1201 static int osnoise_main(void *data) 1202 { 1203 s64 interval; 1204 1205 while (!kthread_should_stop()) { 1206 1207 run_osnoise(); 1208 1209 mutex_lock(&interface_lock); 1210 interval = osnoise_data.sample_period - osnoise_data.sample_runtime; 1211 mutex_unlock(&interface_lock); 1212 1213 do_div(interval, USEC_PER_MSEC); 1214 1215 /* 1216 * differently from hwlat_detector, the osnoise tracer can run 1217 * without a pause because preemption is on. 1218 */ 1219 if (interval < 1) { 1220 /* Let synchronize_rcu_tasks() make progress */ 1221 cond_resched_tasks_rcu_qs(); 1222 continue; 1223 } 1224 1225 if (msleep_interruptible(interval)) 1226 break; 1227 } 1228 1229 return 0; 1230 } 1231 1232 #ifdef CONFIG_TIMERLAT_TRACER 1233 /* 1234 * timerlat_irq - hrtimer handler for timerlat. 1235 */ 1236 static enum hrtimer_restart timerlat_irq(struct hrtimer *timer) 1237 { 1238 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1239 struct trace_array *tr = osnoise_trace; 1240 struct timerlat_variables *tlat; 1241 struct timerlat_sample s; 1242 u64 now; 1243 u64 diff; 1244 1245 /* 1246 * I am not sure if the timer was armed for this CPU. So, get 1247 * the timerlat struct from the timer itself, not from this 1248 * CPU. 1249 */ 1250 tlat = container_of(timer, struct timerlat_variables, timer); 1251 1252 now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer)); 1253 1254 /* 1255 * Enable the osnoise: events for thread an softirq. 1256 */ 1257 tlat->tracing_thread = true; 1258 1259 osn_var->thread.arrival_time = time_get(); 1260 1261 /* 1262 * A hardirq is running: the timer IRQ. It is for sure preempting 1263 * a thread, and potentially preempting a softirq. 1264 * 1265 * At this point, it is not interesting to know the duration of the 1266 * preempted thread (and maybe softirq), but how much time they will 1267 * delay the beginning of the execution of the timer thread. 1268 * 1269 * To get the correct (net) delay added by the softirq, its delta_start 1270 * is set as the IRQ one. In this way, at the return of the IRQ, the delta 1271 * start of the sofitrq will be zeroed, accounting then only the time 1272 * after that. 1273 * 1274 * The thread follows the same principle. However, if a softirq is 1275 * running, the thread needs to receive the softirq delta_start. The 1276 * reason being is that the softirq will be the last to be unfolded, 1277 * resseting the thread delay to zero. 1278 */ 1279 #ifndef CONFIG_PREEMPT_RT 1280 if (osn_var->softirq.delta_start) { 1281 copy_int_safe_time(osn_var, &osn_var->thread.delta_start, 1282 &osn_var->softirq.delta_start); 1283 1284 copy_int_safe_time(osn_var, &osn_var->softirq.delta_start, 1285 &osn_var->irq.delta_start); 1286 } else { 1287 copy_int_safe_time(osn_var, &osn_var->thread.delta_start, 1288 &osn_var->irq.delta_start); 1289 } 1290 #else /* CONFIG_PREEMPT_RT */ 1291 /* 1292 * The sofirqs run as threads on RT, so there is not need 1293 * to keep track of it. 1294 */ 1295 copy_int_safe_time(osn_var, &osn_var->thread.delta_start, &osn_var->irq.delta_start); 1296 #endif /* CONFIG_PREEMPT_RT */ 1297 1298 /* 1299 * Compute the current time with the expected time. 1300 */ 1301 diff = now - tlat->abs_period; 1302 1303 tlat->count++; 1304 s.seqnum = tlat->count; 1305 s.timer_latency = diff; 1306 s.context = IRQ_CONTEXT; 1307 1308 trace_timerlat_sample(&s); 1309 1310 /* Keep a running maximum ever recorded os noise "latency" */ 1311 if (diff > tr->max_latency) { 1312 tr->max_latency = diff; 1313 latency_fsnotify(tr); 1314 } 1315 1316 if (osnoise_data.stop_tracing) 1317 if (time_to_us(diff) >= osnoise_data.stop_tracing) 1318 osnoise_stop_tracing(); 1319 1320 wake_up_process(tlat->kthread); 1321 1322 if (osnoise_data.print_stack) 1323 timerlat_save_stack(0); 1324 1325 return HRTIMER_NORESTART; 1326 } 1327 1328 /* 1329 * wait_next_period - Wait for the next period for timerlat 1330 */ 1331 static int wait_next_period(struct timerlat_variables *tlat) 1332 { 1333 ktime_t next_abs_period, now; 1334 u64 rel_period = osnoise_data.timerlat_period * 1000; 1335 1336 now = hrtimer_cb_get_time(&tlat->timer); 1337 next_abs_period = ns_to_ktime(tlat->abs_period + rel_period); 1338 1339 /* 1340 * Save the next abs_period. 1341 */ 1342 tlat->abs_period = (u64) ktime_to_ns(next_abs_period); 1343 1344 /* 1345 * If the new abs_period is in the past, skip the activation. 1346 */ 1347 while (ktime_compare(now, next_abs_period) > 0) { 1348 next_abs_period = ns_to_ktime(tlat->abs_period + rel_period); 1349 tlat->abs_period = (u64) ktime_to_ns(next_abs_period); 1350 } 1351 1352 set_current_state(TASK_INTERRUPTIBLE); 1353 1354 hrtimer_start(&tlat->timer, next_abs_period, HRTIMER_MODE_ABS_PINNED_HARD); 1355 schedule(); 1356 return 1; 1357 } 1358 1359 /* 1360 * timerlat_main- Timerlat main 1361 */ 1362 static int timerlat_main(void *data) 1363 { 1364 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1365 struct timerlat_variables *tlat = this_cpu_tmr_var(); 1366 struct timerlat_sample s; 1367 struct sched_param sp; 1368 u64 now, diff; 1369 1370 /* 1371 * Make the thread RT, that is how cyclictest is usually used. 1372 */ 1373 sp.sched_priority = DEFAULT_TIMERLAT_PRIO; 1374 sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); 1375 1376 tlat->count = 0; 1377 tlat->tracing_thread = false; 1378 1379 hrtimer_init(&tlat->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD); 1380 tlat->timer.function = timerlat_irq; 1381 tlat->kthread = current; 1382 osn_var->pid = current->pid; 1383 /* 1384 * Anotate the arrival time. 1385 */ 1386 tlat->abs_period = hrtimer_cb_get_time(&tlat->timer); 1387 1388 wait_next_period(tlat); 1389 1390 osn_var->sampling = 1; 1391 1392 while (!kthread_should_stop()) { 1393 now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer)); 1394 diff = now - tlat->abs_period; 1395 1396 s.seqnum = tlat->count; 1397 s.timer_latency = diff; 1398 s.context = THREAD_CONTEXT; 1399 1400 trace_timerlat_sample(&s); 1401 1402 #ifdef CONFIG_STACKTRACE 1403 if (osnoise_data.print_stack) 1404 if (osnoise_data.print_stack <= time_to_us(diff)) 1405 timerlat_dump_stack(); 1406 #endif /* CONFIG_STACKTRACE */ 1407 1408 tlat->tracing_thread = false; 1409 if (osnoise_data.stop_tracing_total) 1410 if (time_to_us(diff) >= osnoise_data.stop_tracing_total) 1411 osnoise_stop_tracing(); 1412 1413 wait_next_period(tlat); 1414 } 1415 1416 hrtimer_cancel(&tlat->timer); 1417 return 0; 1418 } 1419 #endif /* CONFIG_TIMERLAT_TRACER */ 1420 1421 /* 1422 * stop_kthread - stop a workload thread 1423 */ 1424 static void stop_kthread(unsigned int cpu) 1425 { 1426 struct task_struct *kthread; 1427 1428 kthread = per_cpu(per_cpu_osnoise_var, cpu).kthread; 1429 if (kthread) 1430 kthread_stop(kthread); 1431 per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL; 1432 } 1433 1434 /* 1435 * stop_per_cpu_kthread - Stop per-cpu threads 1436 * 1437 * Stop the osnoise sampling htread. Use this on unload and at system 1438 * shutdown. 1439 */ 1440 static void stop_per_cpu_kthreads(void) 1441 { 1442 int cpu; 1443 1444 get_online_cpus(); 1445 1446 for_each_online_cpu(cpu) 1447 stop_kthread(cpu); 1448 1449 put_online_cpus(); 1450 } 1451 1452 /* 1453 * start_kthread - Start a workload tread 1454 */ 1455 static int start_kthread(unsigned int cpu) 1456 { 1457 struct task_struct *kthread; 1458 void *main = osnoise_main; 1459 char comm[24]; 1460 1461 #ifdef CONFIG_TIMERLAT_TRACER 1462 if (osnoise_data.timerlat_tracer) { 1463 snprintf(comm, 24, "timerlat/%d", cpu); 1464 main = timerlat_main; 1465 } else { 1466 snprintf(comm, 24, "osnoise/%d", cpu); 1467 } 1468 #else 1469 snprintf(comm, 24, "osnoise/%d", cpu); 1470 #endif 1471 kthread = kthread_create_on_cpu(main, NULL, cpu, comm); 1472 1473 if (IS_ERR(kthread)) { 1474 pr_err(BANNER "could not start sampling thread\n"); 1475 stop_per_cpu_kthreads(); 1476 return -ENOMEM; 1477 } 1478 1479 per_cpu(per_cpu_osnoise_var, cpu).kthread = kthread; 1480 wake_up_process(kthread); 1481 1482 return 0; 1483 } 1484 1485 /* 1486 * start_per_cpu_kthread - Kick off per-cpu osnoise sampling kthreads 1487 * 1488 * This starts the kernel thread that will look for osnoise on many 1489 * cpus. 1490 */ 1491 static int start_per_cpu_kthreads(struct trace_array *tr) 1492 { 1493 struct cpumask *current_mask = &save_cpumask; 1494 int retval; 1495 int cpu; 1496 1497 get_online_cpus(); 1498 /* 1499 * Run only on CPUs in which trace and osnoise are allowed to run. 1500 */ 1501 cpumask_and(current_mask, tr->tracing_cpumask, &osnoise_cpumask); 1502 /* 1503 * And the CPU is online. 1504 */ 1505 cpumask_and(current_mask, cpu_online_mask, current_mask); 1506 1507 for_each_possible_cpu(cpu) 1508 per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL; 1509 1510 for_each_cpu(cpu, current_mask) { 1511 retval = start_kthread(cpu); 1512 if (retval) { 1513 stop_per_cpu_kthreads(); 1514 return retval; 1515 } 1516 } 1517 1518 put_online_cpus(); 1519 1520 return 0; 1521 } 1522 1523 #ifdef CONFIG_HOTPLUG_CPU 1524 static void osnoise_hotplug_workfn(struct work_struct *dummy) 1525 { 1526 struct trace_array *tr = osnoise_trace; 1527 unsigned int cpu = smp_processor_id(); 1528 1529 1530 mutex_lock(&trace_types_lock); 1531 1532 if (!osnoise_busy) 1533 goto out_unlock_trace; 1534 1535 mutex_lock(&interface_lock); 1536 get_online_cpus(); 1537 1538 if (!cpumask_test_cpu(cpu, &osnoise_cpumask)) 1539 goto out_unlock; 1540 1541 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask)) 1542 goto out_unlock; 1543 1544 start_kthread(cpu); 1545 1546 out_unlock: 1547 put_online_cpus(); 1548 mutex_unlock(&interface_lock); 1549 out_unlock_trace: 1550 mutex_unlock(&trace_types_lock); 1551 } 1552 1553 static DECLARE_WORK(osnoise_hotplug_work, osnoise_hotplug_workfn); 1554 1555 /* 1556 * osnoise_cpu_init - CPU hotplug online callback function 1557 */ 1558 static int osnoise_cpu_init(unsigned int cpu) 1559 { 1560 schedule_work_on(cpu, &osnoise_hotplug_work); 1561 return 0; 1562 } 1563 1564 /* 1565 * osnoise_cpu_die - CPU hotplug offline callback function 1566 */ 1567 static int osnoise_cpu_die(unsigned int cpu) 1568 { 1569 stop_kthread(cpu); 1570 return 0; 1571 } 1572 1573 static void osnoise_init_hotplug_support(void) 1574 { 1575 int ret; 1576 1577 ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "trace/osnoise:online", 1578 osnoise_cpu_init, osnoise_cpu_die); 1579 if (ret < 0) 1580 pr_warn(BANNER "Error to init cpu hotplug support\n"); 1581 1582 return; 1583 } 1584 #else /* CONFIG_HOTPLUG_CPU */ 1585 static void osnoise_init_hotplug_support(void) 1586 { 1587 return 0; 1588 } 1589 #endif /* CONFIG_HOTPLUG_CPU */ 1590 1591 /* 1592 * osnoise_cpus_read - Read function for reading the "cpus" file 1593 * @filp: The active open file structure 1594 * @ubuf: The userspace provided buffer to read value into 1595 * @cnt: The maximum number of bytes to read 1596 * @ppos: The current "file" position 1597 * 1598 * Prints the "cpus" output into the user-provided buffer. 1599 */ 1600 static ssize_t 1601 osnoise_cpus_read(struct file *filp, char __user *ubuf, size_t count, 1602 loff_t *ppos) 1603 { 1604 char *mask_str; 1605 int len; 1606 1607 mutex_lock(&interface_lock); 1608 1609 len = snprintf(NULL, 0, "%*pbl\n", cpumask_pr_args(&osnoise_cpumask)) + 1; 1610 mask_str = kmalloc(len, GFP_KERNEL); 1611 if (!mask_str) { 1612 count = -ENOMEM; 1613 goto out_unlock; 1614 } 1615 1616 len = snprintf(mask_str, len, "%*pbl\n", cpumask_pr_args(&osnoise_cpumask)); 1617 if (len >= count) { 1618 count = -EINVAL; 1619 goto out_free; 1620 } 1621 1622 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len); 1623 1624 out_free: 1625 kfree(mask_str); 1626 out_unlock: 1627 mutex_unlock(&interface_lock); 1628 1629 return count; 1630 } 1631 1632 static void osnoise_tracer_start(struct trace_array *tr); 1633 static void osnoise_tracer_stop(struct trace_array *tr); 1634 1635 /* 1636 * osnoise_cpus_write - Write function for "cpus" entry 1637 * @filp: The active open file structure 1638 * @ubuf: The user buffer that contains the value to write 1639 * @cnt: The maximum number of bytes to write to "file" 1640 * @ppos: The current position in @file 1641 * 1642 * This function provides a write implementation for the "cpus" 1643 * interface to the osnoise trace. By default, it lists all CPUs, 1644 * in this way, allowing osnoise threads to run on any online CPU 1645 * of the system. It serves to restrict the execution of osnoise to the 1646 * set of CPUs writing via this interface. Note that osnoise also 1647 * respects the "tracing_cpumask." Hence, osnoise threads will run only 1648 * on the set of CPUs allowed here AND on "tracing_cpumask." Why not 1649 * have just "tracing_cpumask?" Because the user might be interested 1650 * in tracing what is running on other CPUs. For instance, one might 1651 * run osnoise in one HT CPU while observing what is running on the 1652 * sibling HT CPU. 1653 */ 1654 static ssize_t 1655 osnoise_cpus_write(struct file *filp, const char __user *ubuf, size_t count, 1656 loff_t *ppos) 1657 { 1658 struct trace_array *tr = osnoise_trace; 1659 cpumask_var_t osnoise_cpumask_new; 1660 int running, err; 1661 char buf[256]; 1662 1663 if (count >= 256) 1664 return -EINVAL; 1665 1666 if (copy_from_user(buf, ubuf, count)) 1667 return -EFAULT; 1668 1669 if (!zalloc_cpumask_var(&osnoise_cpumask_new, GFP_KERNEL)) 1670 return -ENOMEM; 1671 1672 err = cpulist_parse(buf, osnoise_cpumask_new); 1673 if (err) 1674 goto err_free; 1675 1676 /* 1677 * trace_types_lock is taken to avoid concurrency on start/stop 1678 * and osnoise_busy. 1679 */ 1680 mutex_lock(&trace_types_lock); 1681 running = osnoise_busy; 1682 if (running) 1683 osnoise_tracer_stop(tr); 1684 1685 mutex_lock(&interface_lock); 1686 /* 1687 * osnoise_cpumask is read by CPU hotplug operations. 1688 */ 1689 get_online_cpus(); 1690 1691 cpumask_copy(&osnoise_cpumask, osnoise_cpumask_new); 1692 1693 put_online_cpus(); 1694 mutex_unlock(&interface_lock); 1695 1696 if (running) 1697 osnoise_tracer_start(tr); 1698 mutex_unlock(&trace_types_lock); 1699 1700 free_cpumask_var(osnoise_cpumask_new); 1701 return count; 1702 1703 err_free: 1704 free_cpumask_var(osnoise_cpumask_new); 1705 1706 return err; 1707 } 1708 1709 /* 1710 * osnoise/runtime_us: cannot be greater than the period. 1711 */ 1712 static struct trace_min_max_param osnoise_runtime = { 1713 .lock = &interface_lock, 1714 .val = &osnoise_data.sample_runtime, 1715 .max = &osnoise_data.sample_period, 1716 .min = NULL, 1717 }; 1718 1719 /* 1720 * osnoise/period_us: cannot be smaller than the runtime. 1721 */ 1722 static struct trace_min_max_param osnoise_period = { 1723 .lock = &interface_lock, 1724 .val = &osnoise_data.sample_period, 1725 .max = NULL, 1726 .min = &osnoise_data.sample_runtime, 1727 }; 1728 1729 /* 1730 * osnoise/stop_tracing_us: no limit. 1731 */ 1732 static struct trace_min_max_param osnoise_stop_tracing_in = { 1733 .lock = &interface_lock, 1734 .val = &osnoise_data.stop_tracing, 1735 .max = NULL, 1736 .min = NULL, 1737 }; 1738 1739 /* 1740 * osnoise/stop_tracing_total_us: no limit. 1741 */ 1742 static struct trace_min_max_param osnoise_stop_tracing_total = { 1743 .lock = &interface_lock, 1744 .val = &osnoise_data.stop_tracing_total, 1745 .max = NULL, 1746 .min = NULL, 1747 }; 1748 1749 #ifdef CONFIG_TIMERLAT_TRACER 1750 /* 1751 * osnoise/print_stack: print the stacktrace of the IRQ handler if the total 1752 * latency is higher than val. 1753 */ 1754 static struct trace_min_max_param osnoise_print_stack = { 1755 .lock = &interface_lock, 1756 .val = &osnoise_data.print_stack, 1757 .max = NULL, 1758 .min = NULL, 1759 }; 1760 1761 /* 1762 * osnoise/timerlat_period: min 100 us, max 1 s 1763 */ 1764 u64 timerlat_min_period = 100; 1765 u64 timerlat_max_period = 1000000; 1766 static struct trace_min_max_param timerlat_period = { 1767 .lock = &interface_lock, 1768 .val = &osnoise_data.timerlat_period, 1769 .max = &timerlat_max_period, 1770 .min = &timerlat_min_period, 1771 }; 1772 #endif 1773 1774 static const struct file_operations cpus_fops = { 1775 .open = tracing_open_generic, 1776 .read = osnoise_cpus_read, 1777 .write = osnoise_cpus_write, 1778 .llseek = generic_file_llseek, 1779 }; 1780 1781 /* 1782 * init_tracefs - A function to initialize the tracefs interface files 1783 * 1784 * This function creates entries in tracefs for "osnoise" and "timerlat". 1785 * It creates these directories in the tracing directory, and within that 1786 * directory the use can change and view the configs. 1787 */ 1788 static int init_tracefs(void) 1789 { 1790 struct dentry *top_dir; 1791 struct dentry *tmp; 1792 int ret; 1793 1794 ret = tracing_init_dentry(); 1795 if (ret) 1796 return -ENOMEM; 1797 1798 top_dir = tracefs_create_dir("osnoise", NULL); 1799 if (!top_dir) 1800 return 0; 1801 1802 tmp = tracefs_create_file("period_us", 0640, top_dir, 1803 &osnoise_period, &trace_min_max_fops); 1804 if (!tmp) 1805 goto err; 1806 1807 tmp = tracefs_create_file("runtime_us", 0644, top_dir, 1808 &osnoise_runtime, &trace_min_max_fops); 1809 if (!tmp) 1810 goto err; 1811 1812 tmp = tracefs_create_file("stop_tracing_us", 0640, top_dir, 1813 &osnoise_stop_tracing_in, &trace_min_max_fops); 1814 if (!tmp) 1815 goto err; 1816 1817 tmp = tracefs_create_file("stop_tracing_total_us", 0640, top_dir, 1818 &osnoise_stop_tracing_total, &trace_min_max_fops); 1819 if (!tmp) 1820 goto err; 1821 1822 tmp = trace_create_file("cpus", 0644, top_dir, NULL, &cpus_fops); 1823 if (!tmp) 1824 goto err; 1825 #ifdef CONFIG_TIMERLAT_TRACER 1826 #ifdef CONFIG_STACKTRACE 1827 tmp = tracefs_create_file("print_stack", 0640, top_dir, 1828 &osnoise_print_stack, &trace_min_max_fops); 1829 if (!tmp) 1830 goto err; 1831 #endif 1832 1833 tmp = tracefs_create_file("timerlat_period_us", 0640, top_dir, 1834 &timerlat_period, &trace_min_max_fops); 1835 if (!tmp) 1836 goto err; 1837 #endif 1838 1839 return 0; 1840 1841 err: 1842 tracefs_remove(top_dir); 1843 return -ENOMEM; 1844 } 1845 1846 static int osnoise_hook_events(void) 1847 { 1848 int retval; 1849 1850 /* 1851 * Trace is already hooked, we are re-enabling from 1852 * a stop_tracing_*. 1853 */ 1854 if (trace_osnoise_callback_enabled) 1855 return 0; 1856 1857 retval = hook_irq_events(); 1858 if (retval) 1859 return -EINVAL; 1860 1861 retval = hook_softirq_events(); 1862 if (retval) 1863 goto out_unhook_irq; 1864 1865 retval = hook_thread_events(); 1866 /* 1867 * All fine! 1868 */ 1869 if (!retval) 1870 return 0; 1871 1872 unhook_softirq_events(); 1873 out_unhook_irq: 1874 unhook_irq_events(); 1875 return -EINVAL; 1876 } 1877 1878 static int __osnoise_tracer_start(struct trace_array *tr) 1879 { 1880 int retval; 1881 1882 osn_var_reset_all(); 1883 1884 retval = osnoise_hook_events(); 1885 if (retval) 1886 return retval; 1887 /* 1888 * Make sure NMIs see reseted values. 1889 */ 1890 barrier(); 1891 trace_osnoise_callback_enabled = true; 1892 1893 retval = start_per_cpu_kthreads(tr); 1894 if (retval) { 1895 unhook_irq_events(); 1896 return retval; 1897 } 1898 1899 osnoise_busy = true; 1900 1901 return 0; 1902 } 1903 1904 static void osnoise_tracer_start(struct trace_array *tr) 1905 { 1906 int retval; 1907 1908 if (osnoise_busy) 1909 return; 1910 1911 retval = __osnoise_tracer_start(tr); 1912 if (retval) 1913 pr_err(BANNER "Error starting osnoise tracer\n"); 1914 1915 } 1916 1917 static void osnoise_tracer_stop(struct trace_array *tr) 1918 { 1919 if (!osnoise_busy) 1920 return; 1921 1922 trace_osnoise_callback_enabled = false; 1923 barrier(); 1924 1925 stop_per_cpu_kthreads(); 1926 1927 unhook_irq_events(); 1928 unhook_softirq_events(); 1929 unhook_thread_events(); 1930 1931 osnoise_busy = false; 1932 } 1933 1934 static int osnoise_tracer_init(struct trace_array *tr) 1935 { 1936 1937 /* Only allow one instance to enable this */ 1938 if (osnoise_busy) 1939 return -EBUSY; 1940 1941 osnoise_trace = tr; 1942 tr->max_latency = 0; 1943 1944 osnoise_tracer_start(tr); 1945 1946 return 0; 1947 } 1948 1949 static void osnoise_tracer_reset(struct trace_array *tr) 1950 { 1951 osnoise_tracer_stop(tr); 1952 } 1953 1954 static struct tracer osnoise_tracer __read_mostly = { 1955 .name = "osnoise", 1956 .init = osnoise_tracer_init, 1957 .reset = osnoise_tracer_reset, 1958 .start = osnoise_tracer_start, 1959 .stop = osnoise_tracer_stop, 1960 .print_header = print_osnoise_headers, 1961 .allow_instances = true, 1962 }; 1963 1964 #ifdef CONFIG_TIMERLAT_TRACER 1965 static void timerlat_tracer_start(struct trace_array *tr) 1966 { 1967 int retval; 1968 1969 if (osnoise_busy) 1970 return; 1971 1972 osnoise_data.timerlat_tracer = 1; 1973 1974 retval = __osnoise_tracer_start(tr); 1975 if (retval) 1976 goto out_err; 1977 1978 return; 1979 out_err: 1980 pr_err(BANNER "Error starting timerlat tracer\n"); 1981 } 1982 1983 static void timerlat_tracer_stop(struct trace_array *tr) 1984 { 1985 int cpu; 1986 1987 if (!osnoise_busy) 1988 return; 1989 1990 for_each_online_cpu(cpu) 1991 per_cpu(per_cpu_osnoise_var, cpu).sampling = 0; 1992 1993 osnoise_tracer_stop(tr); 1994 1995 osnoise_data.timerlat_tracer = 0; 1996 } 1997 1998 static int timerlat_tracer_init(struct trace_array *tr) 1999 { 2000 /* Only allow one instance to enable this */ 2001 if (osnoise_busy) 2002 return -EBUSY; 2003 2004 osnoise_trace = tr; 2005 2006 tr->max_latency = 0; 2007 2008 timerlat_tracer_start(tr); 2009 2010 return 0; 2011 } 2012 2013 static void timerlat_tracer_reset(struct trace_array *tr) 2014 { 2015 timerlat_tracer_stop(tr); 2016 } 2017 2018 static struct tracer timerlat_tracer __read_mostly = { 2019 .name = "timerlat", 2020 .init = timerlat_tracer_init, 2021 .reset = timerlat_tracer_reset, 2022 .start = timerlat_tracer_start, 2023 .stop = timerlat_tracer_stop, 2024 .print_header = print_timerlat_headers, 2025 .allow_instances = true, 2026 }; 2027 #endif /* CONFIG_TIMERLAT_TRACER */ 2028 2029 __init static int init_osnoise_tracer(void) 2030 { 2031 int ret; 2032 2033 mutex_init(&interface_lock); 2034 2035 cpumask_copy(&osnoise_cpumask, cpu_all_mask); 2036 2037 ret = register_tracer(&osnoise_tracer); 2038 if (ret) { 2039 pr_err(BANNER "Error registering osnoise!\n"); 2040 return ret; 2041 } 2042 2043 #ifdef CONFIG_TIMERLAT_TRACER 2044 ret = register_tracer(&timerlat_tracer); 2045 if (ret) { 2046 pr_err(BANNER "Error registering timerlat\n"); 2047 return ret; 2048 } 2049 #endif 2050 osnoise_init_hotplug_support(); 2051 2052 init_tracefs(); 2053 2054 return 0; 2055 } 2056 late_initcall(init_osnoise_tracer); 2057