1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * OS Noise Tracer: computes the OS Noise suffered by a running thread. 4 * Timerlat Tracer: measures the wakeup latency of a timer triggered IRQ and thread. 5 * 6 * Based on "hwlat_detector" tracer by: 7 * Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. <jcm@redhat.com> 8 * Copyright (C) 2013-2016 Steven Rostedt, Red Hat, Inc. <srostedt@redhat.com> 9 * With feedback from Clark Williams <williams@redhat.com> 10 * 11 * And also based on the rtsl tracer presented on: 12 * DE OLIVEIRA, Daniel Bristot, et al. Demystifying the real-time linux 13 * scheduling latency. In: 32nd Euromicro Conference on Real-Time Systems 14 * (ECRTS 2020). Schloss Dagstuhl-Leibniz-Zentrum fur Informatik, 2020. 15 * 16 * Copyright (C) 2021 Daniel Bristot de Oliveira, Red Hat, Inc. <bristot@redhat.com> 17 */ 18 19 #include <linux/kthread.h> 20 #include <linux/tracefs.h> 21 #include <linux/uaccess.h> 22 #include <linux/cpumask.h> 23 #include <linux/delay.h> 24 #include <linux/sched/clock.h> 25 #include <uapi/linux/sched/types.h> 26 #include <linux/sched.h> 27 #include "trace.h" 28 29 #ifdef CONFIG_X86_LOCAL_APIC 30 #include <asm/trace/irq_vectors.h> 31 #undef TRACE_INCLUDE_PATH 32 #undef TRACE_INCLUDE_FILE 33 #endif /* CONFIG_X86_LOCAL_APIC */ 34 35 #include <trace/events/irq.h> 36 #include <trace/events/sched.h> 37 38 #define CREATE_TRACE_POINTS 39 #include <trace/events/osnoise.h> 40 41 static struct trace_array *osnoise_trace; 42 43 /* 44 * Default values. 45 */ 46 #define BANNER "osnoise: " 47 #define DEFAULT_SAMPLE_PERIOD 1000000 /* 1s */ 48 #define DEFAULT_SAMPLE_RUNTIME 1000000 /* 1s */ 49 50 #define DEFAULT_TIMERLAT_PERIOD 1000 /* 1ms */ 51 #define DEFAULT_TIMERLAT_PRIO 95 /* FIFO 95 */ 52 53 /* 54 * NMI runtime info. 55 */ 56 struct osn_nmi { 57 u64 count; 58 u64 delta_start; 59 }; 60 61 /* 62 * IRQ runtime info. 63 */ 64 struct osn_irq { 65 u64 count; 66 u64 arrival_time; 67 u64 delta_start; 68 }; 69 70 #define IRQ_CONTEXT 0 71 #define THREAD_CONTEXT 1 72 /* 73 * sofirq runtime info. 74 */ 75 struct osn_softirq { 76 u64 count; 77 u64 arrival_time; 78 u64 delta_start; 79 }; 80 81 /* 82 * thread runtime info. 83 */ 84 struct osn_thread { 85 u64 count; 86 u64 arrival_time; 87 u64 delta_start; 88 }; 89 90 /* 91 * Runtime information: this structure saves the runtime information used by 92 * one sampling thread. 93 */ 94 struct osnoise_variables { 95 struct task_struct *kthread; 96 bool sampling; 97 pid_t pid; 98 struct osn_nmi nmi; 99 struct osn_irq irq; 100 struct osn_softirq softirq; 101 struct osn_thread thread; 102 local_t int_counter; 103 }; 104 105 /* 106 * Per-cpu runtime information. 107 */ 108 DEFINE_PER_CPU(struct osnoise_variables, per_cpu_osnoise_var); 109 110 /* 111 * this_cpu_osn_var - Return the per-cpu osnoise_variables on its relative CPU 112 */ 113 static inline struct osnoise_variables *this_cpu_osn_var(void) 114 { 115 return this_cpu_ptr(&per_cpu_osnoise_var); 116 } 117 118 #ifdef CONFIG_TIMERLAT_TRACER 119 /* 120 * Runtime information for the timer mode. 121 */ 122 struct timerlat_variables { 123 struct task_struct *kthread; 124 struct hrtimer timer; 125 u64 rel_period; 126 u64 abs_period; 127 bool tracing_thread; 128 u64 count; 129 }; 130 131 DEFINE_PER_CPU(struct timerlat_variables, per_cpu_timerlat_var); 132 133 /* 134 * this_cpu_tmr_var - Return the per-cpu timerlat_variables on its relative CPU 135 */ 136 static inline struct timerlat_variables *this_cpu_tmr_var(void) 137 { 138 return this_cpu_ptr(&per_cpu_timerlat_var); 139 } 140 141 /* 142 * tlat_var_reset - Reset the values of the given timerlat_variables 143 */ 144 static inline void tlat_var_reset(void) 145 { 146 struct timerlat_variables *tlat_var; 147 int cpu; 148 /* 149 * So far, all the values are initialized as 0, so 150 * zeroing the structure is perfect. 151 */ 152 for_each_cpu(cpu, cpu_online_mask) { 153 tlat_var = per_cpu_ptr(&per_cpu_timerlat_var, cpu); 154 memset(tlat_var, 0, sizeof(*tlat_var)); 155 } 156 } 157 #else /* CONFIG_TIMERLAT_TRACER */ 158 #define tlat_var_reset() do {} while (0) 159 #endif /* CONFIG_TIMERLAT_TRACER */ 160 161 /* 162 * osn_var_reset - Reset the values of the given osnoise_variables 163 */ 164 static inline void osn_var_reset(void) 165 { 166 struct osnoise_variables *osn_var; 167 int cpu; 168 169 /* 170 * So far, all the values are initialized as 0, so 171 * zeroing the structure is perfect. 172 */ 173 for_each_cpu(cpu, cpu_online_mask) { 174 osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu); 175 memset(osn_var, 0, sizeof(*osn_var)); 176 } 177 } 178 179 /* 180 * osn_var_reset_all - Reset the value of all per-cpu osnoise_variables 181 */ 182 static inline void osn_var_reset_all(void) 183 { 184 osn_var_reset(); 185 tlat_var_reset(); 186 } 187 188 /* 189 * Tells NMIs to call back to the osnoise tracer to record timestamps. 190 */ 191 bool trace_osnoise_callback_enabled; 192 193 /* 194 * osnoise sample structure definition. Used to store the statistics of a 195 * sample run. 196 */ 197 struct osnoise_sample { 198 u64 runtime; /* runtime */ 199 u64 noise; /* noise */ 200 u64 max_sample; /* max single noise sample */ 201 int hw_count; /* # HW (incl. hypervisor) interference */ 202 int nmi_count; /* # NMIs during this sample */ 203 int irq_count; /* # IRQs during this sample */ 204 int softirq_count; /* # softirqs during this sample */ 205 int thread_count; /* # threads during this sample */ 206 }; 207 208 #ifdef CONFIG_TIMERLAT_TRACER 209 /* 210 * timerlat sample structure definition. Used to store the statistics of 211 * a sample run. 212 */ 213 struct timerlat_sample { 214 u64 timer_latency; /* timer_latency */ 215 unsigned int seqnum; /* unique sequence */ 216 int context; /* timer context */ 217 }; 218 #endif 219 220 /* 221 * Protect the interface. 222 */ 223 struct mutex interface_lock; 224 225 /* 226 * Tracer data. 227 */ 228 static struct osnoise_data { 229 u64 sample_period; /* total sampling period */ 230 u64 sample_runtime; /* active sampling portion of period */ 231 u64 stop_tracing; /* stop trace in the internal operation (loop/irq) */ 232 u64 stop_tracing_total; /* stop trace in the final operation (report/thread) */ 233 #ifdef CONFIG_TIMERLAT_TRACER 234 u64 timerlat_period; /* timerlat period */ 235 u64 print_stack; /* print IRQ stack if total > */ 236 int timerlat_tracer; /* timerlat tracer */ 237 #endif 238 bool tainted; /* infor users and developers about a problem */ 239 } osnoise_data = { 240 .sample_period = DEFAULT_SAMPLE_PERIOD, 241 .sample_runtime = DEFAULT_SAMPLE_RUNTIME, 242 .stop_tracing = 0, 243 .stop_tracing_total = 0, 244 #ifdef CONFIG_TIMERLAT_TRACER 245 .print_stack = 0, 246 .timerlat_period = DEFAULT_TIMERLAT_PERIOD, 247 .timerlat_tracer = 0, 248 #endif 249 }; 250 251 /* 252 * Boolean variable used to inform that the tracer is currently sampling. 253 */ 254 static bool osnoise_busy; 255 256 /* 257 * Print the osnoise header info. 258 */ 259 static void print_osnoise_headers(struct seq_file *s) 260 { 261 if (osnoise_data.tainted) 262 seq_puts(s, "# osnoise is tainted!\n"); 263 264 seq_puts(s, "# _-----=> irqs-off\n"); 265 seq_puts(s, "# / _----=> need-resched\n"); 266 seq_puts(s, "# | / _---=> hardirq/softirq\n"); 267 seq_puts(s, "# || / _--=> preempt-depth "); 268 seq_puts(s, " MAX\n"); 269 270 seq_puts(s, "# || / "); 271 seq_puts(s, " SINGLE Interference counters:\n"); 272 273 seq_puts(s, "# |||| RUNTIME "); 274 seq_puts(s, " NOISE %% OF CPU NOISE +-----------------------------+\n"); 275 276 seq_puts(s, "# TASK-PID CPU# |||| TIMESTAMP IN US "); 277 seq_puts(s, " IN US AVAILABLE IN US HW NMI IRQ SIRQ THREAD\n"); 278 279 seq_puts(s, "# | | | |||| | | "); 280 seq_puts(s, " | | | | | | | |\n"); 281 } 282 283 /* 284 * osnoise_taint - report an osnoise error. 285 */ 286 #define osnoise_taint(msg) ({ \ 287 struct trace_array *tr = osnoise_trace; \ 288 \ 289 trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_, msg); \ 290 osnoise_data.tainted = true; \ 291 }) 292 293 /* 294 * Record an osnoise_sample into the tracer buffer. 295 */ 296 static void trace_osnoise_sample(struct osnoise_sample *sample) 297 { 298 struct trace_array *tr = osnoise_trace; 299 struct trace_buffer *buffer = tr->array_buffer.buffer; 300 struct trace_event_call *call = &event_osnoise; 301 struct ring_buffer_event *event; 302 struct osnoise_entry *entry; 303 304 event = trace_buffer_lock_reserve(buffer, TRACE_OSNOISE, sizeof(*entry), 305 tracing_gen_ctx()); 306 if (!event) 307 return; 308 entry = ring_buffer_event_data(event); 309 entry->runtime = sample->runtime; 310 entry->noise = sample->noise; 311 entry->max_sample = sample->max_sample; 312 entry->hw_count = sample->hw_count; 313 entry->nmi_count = sample->nmi_count; 314 entry->irq_count = sample->irq_count; 315 entry->softirq_count = sample->softirq_count; 316 entry->thread_count = sample->thread_count; 317 318 if (!call_filter_check_discard(call, entry, buffer, event)) 319 trace_buffer_unlock_commit_nostack(buffer, event); 320 } 321 322 #ifdef CONFIG_TIMERLAT_TRACER 323 /* 324 * Print the timerlat header info. 325 */ 326 static void print_timerlat_headers(struct seq_file *s) 327 { 328 seq_puts(s, "# _-----=> irqs-off\n"); 329 seq_puts(s, "# / _----=> need-resched\n"); 330 seq_puts(s, "# | / _---=> hardirq/softirq\n"); 331 seq_puts(s, "# || / _--=> preempt-depth\n"); 332 seq_puts(s, "# || /\n"); 333 seq_puts(s, "# |||| ACTIVATION\n"); 334 seq_puts(s, "# TASK-PID CPU# |||| TIMESTAMP ID "); 335 seq_puts(s, " CONTEXT LATENCY\n"); 336 seq_puts(s, "# | | | |||| | | "); 337 seq_puts(s, " | |\n"); 338 } 339 340 /* 341 * Record an timerlat_sample into the tracer buffer. 342 */ 343 static void trace_timerlat_sample(struct timerlat_sample *sample) 344 { 345 struct trace_array *tr = osnoise_trace; 346 struct trace_event_call *call = &event_osnoise; 347 struct trace_buffer *buffer = tr->array_buffer.buffer; 348 struct ring_buffer_event *event; 349 struct timerlat_entry *entry; 350 351 event = trace_buffer_lock_reserve(buffer, TRACE_TIMERLAT, sizeof(*entry), 352 tracing_gen_ctx()); 353 if (!event) 354 return; 355 entry = ring_buffer_event_data(event); 356 entry->seqnum = sample->seqnum; 357 entry->context = sample->context; 358 entry->timer_latency = sample->timer_latency; 359 360 if (!call_filter_check_discard(call, entry, buffer, event)) 361 trace_buffer_unlock_commit_nostack(buffer, event); 362 } 363 364 #ifdef CONFIG_STACKTRACE 365 366 #define MAX_CALLS 256 367 368 /* 369 * Stack trace will take place only at IRQ level, so, no need 370 * to control nesting here. 371 */ 372 struct trace_stack { 373 int stack_size; 374 int nr_entries; 375 unsigned long calls[MAX_CALLS]; 376 }; 377 378 static DEFINE_PER_CPU(struct trace_stack, trace_stack); 379 380 /* 381 * timerlat_save_stack - save a stack trace without printing 382 * 383 * Save the current stack trace without printing. The 384 * stack will be printed later, after the end of the measurement. 385 */ 386 static void timerlat_save_stack(int skip) 387 { 388 unsigned int size, nr_entries; 389 struct trace_stack *fstack; 390 391 fstack = this_cpu_ptr(&trace_stack); 392 393 size = ARRAY_SIZE(fstack->calls); 394 395 nr_entries = stack_trace_save(fstack->calls, size, skip); 396 397 fstack->stack_size = nr_entries * sizeof(unsigned long); 398 fstack->nr_entries = nr_entries; 399 400 return; 401 402 } 403 /* 404 * timerlat_dump_stack - dump a stack trace previously saved 405 * 406 * Dump a saved stack trace into the trace buffer. 407 */ 408 static void timerlat_dump_stack(void) 409 { 410 struct trace_event_call *call = &event_osnoise; 411 struct trace_array *tr = osnoise_trace; 412 struct trace_buffer *buffer = tr->array_buffer.buffer; 413 struct ring_buffer_event *event; 414 struct trace_stack *fstack; 415 struct stack_entry *entry; 416 unsigned int size; 417 418 preempt_disable_notrace(); 419 fstack = this_cpu_ptr(&trace_stack); 420 size = fstack->stack_size; 421 422 event = trace_buffer_lock_reserve(buffer, TRACE_STACK, sizeof(*entry) + size, 423 tracing_gen_ctx()); 424 if (!event) 425 goto out; 426 427 entry = ring_buffer_event_data(event); 428 429 memcpy(&entry->caller, fstack->calls, size); 430 entry->size = fstack->nr_entries; 431 432 if (!call_filter_check_discard(call, entry, buffer, event)) 433 trace_buffer_unlock_commit_nostack(buffer, event); 434 435 out: 436 preempt_enable_notrace(); 437 } 438 #else 439 #define timerlat_dump_stack() do {} while (0) 440 #define timerlat_save_stack(a) do {} while (0) 441 #endif /* CONFIG_STACKTRACE */ 442 #endif /* CONFIG_TIMERLAT_TRACER */ 443 444 /* 445 * Macros to encapsulate the time capturing infrastructure. 446 */ 447 #define time_get() trace_clock_local() 448 #define time_to_us(x) div_u64(x, 1000) 449 #define time_sub(a, b) ((a) - (b)) 450 451 /* 452 * cond_move_irq_delta_start - Forward the delta_start of a running IRQ 453 * 454 * If an IRQ is preempted by an NMI, its delta_start is pushed forward 455 * to discount the NMI interference. 456 * 457 * See get_int_safe_duration(). 458 */ 459 static inline void 460 cond_move_irq_delta_start(struct osnoise_variables *osn_var, u64 duration) 461 { 462 if (osn_var->irq.delta_start) 463 osn_var->irq.delta_start += duration; 464 } 465 466 #ifndef CONFIG_PREEMPT_RT 467 /* 468 * cond_move_softirq_delta_start - Forward the delta_start of a running softirq. 469 * 470 * If a softirq is preempted by an IRQ or NMI, its delta_start is pushed 471 * forward to discount the interference. 472 * 473 * See get_int_safe_duration(). 474 */ 475 static inline void 476 cond_move_softirq_delta_start(struct osnoise_variables *osn_var, u64 duration) 477 { 478 if (osn_var->softirq.delta_start) 479 osn_var->softirq.delta_start += duration; 480 } 481 #else /* CONFIG_PREEMPT_RT */ 482 #define cond_move_softirq_delta_start(osn_var, duration) do {} while (0) 483 #endif 484 485 /* 486 * cond_move_thread_delta_start - Forward the delta_start of a running thread 487 * 488 * If a noisy thread is preempted by an softirq, IRQ or NMI, its delta_start 489 * is pushed forward to discount the interference. 490 * 491 * See get_int_safe_duration(). 492 */ 493 static inline void 494 cond_move_thread_delta_start(struct osnoise_variables *osn_var, u64 duration) 495 { 496 if (osn_var->thread.delta_start) 497 osn_var->thread.delta_start += duration; 498 } 499 500 /* 501 * get_int_safe_duration - Get the duration of a window 502 * 503 * The irq, softirq and thread varaibles need to have its duration without 504 * the interference from higher priority interrupts. Instead of keeping a 505 * variable to discount the interrupt interference from these variables, the 506 * starting time of these variables are pushed forward with the interrupt's 507 * duration. In this way, a single variable is used to: 508 * 509 * - Know if a given window is being measured. 510 * - Account its duration. 511 * - Discount the interference. 512 * 513 * To avoid getting inconsistent values, e.g.,: 514 * 515 * now = time_get() 516 * ---> interrupt! 517 * delta_start -= int duration; 518 * <--- 519 * duration = now - delta_start; 520 * 521 * result: negative duration if the variable duration before the 522 * interrupt was smaller than the interrupt execution. 523 * 524 * A counter of interrupts is used. If the counter increased, try 525 * to capture an interference safe duration. 526 */ 527 static inline s64 528 get_int_safe_duration(struct osnoise_variables *osn_var, u64 *delta_start) 529 { 530 u64 int_counter, now; 531 s64 duration; 532 533 do { 534 int_counter = local_read(&osn_var->int_counter); 535 /* synchronize with interrupts */ 536 barrier(); 537 538 now = time_get(); 539 duration = (now - *delta_start); 540 541 /* synchronize with interrupts */ 542 barrier(); 543 } while (int_counter != local_read(&osn_var->int_counter)); 544 545 /* 546 * This is an evidence of race conditions that cause 547 * a value to be "discounted" too much. 548 */ 549 if (duration < 0) 550 osnoise_taint("Negative duration!\n"); 551 552 *delta_start = 0; 553 554 return duration; 555 } 556 557 /* 558 * 559 * set_int_safe_time - Save the current time on *time, aware of interference 560 * 561 * Get the time, taking into consideration a possible interference from 562 * higher priority interrupts. 563 * 564 * See get_int_safe_duration() for an explanation. 565 */ 566 static u64 567 set_int_safe_time(struct osnoise_variables *osn_var, u64 *time) 568 { 569 u64 int_counter; 570 571 do { 572 int_counter = local_read(&osn_var->int_counter); 573 /* synchronize with interrupts */ 574 barrier(); 575 576 *time = time_get(); 577 578 /* synchronize with interrupts */ 579 barrier(); 580 } while (int_counter != local_read(&osn_var->int_counter)); 581 582 return int_counter; 583 } 584 585 #ifdef CONFIG_TIMERLAT_TRACER 586 /* 587 * copy_int_safe_time - Copy *src into *desc aware of interference 588 */ 589 static u64 590 copy_int_safe_time(struct osnoise_variables *osn_var, u64 *dst, u64 *src) 591 { 592 u64 int_counter; 593 594 do { 595 int_counter = local_read(&osn_var->int_counter); 596 /* synchronize with interrupts */ 597 barrier(); 598 599 *dst = *src; 600 601 /* synchronize with interrupts */ 602 barrier(); 603 } while (int_counter != local_read(&osn_var->int_counter)); 604 605 return int_counter; 606 } 607 #endif /* CONFIG_TIMERLAT_TRACER */ 608 609 /* 610 * trace_osnoise_callback - NMI entry/exit callback 611 * 612 * This function is called at the entry and exit NMI code. The bool enter 613 * distinguishes between either case. This function is used to note a NMI 614 * occurrence, compute the noise caused by the NMI, and to remove the noise 615 * it is potentially causing on other interference variables. 616 */ 617 void trace_osnoise_callback(bool enter) 618 { 619 struct osnoise_variables *osn_var = this_cpu_osn_var(); 620 u64 duration; 621 622 if (!osn_var->sampling) 623 return; 624 625 /* 626 * Currently trace_clock_local() calls sched_clock() and the 627 * generic version is not NMI safe. 628 */ 629 if (!IS_ENABLED(CONFIG_GENERIC_SCHED_CLOCK)) { 630 if (enter) { 631 osn_var->nmi.delta_start = time_get(); 632 local_inc(&osn_var->int_counter); 633 } else { 634 duration = time_get() - osn_var->nmi.delta_start; 635 636 trace_nmi_noise(osn_var->nmi.delta_start, duration); 637 638 cond_move_irq_delta_start(osn_var, duration); 639 cond_move_softirq_delta_start(osn_var, duration); 640 cond_move_thread_delta_start(osn_var, duration); 641 } 642 } 643 644 if (enter) 645 osn_var->nmi.count++; 646 } 647 648 /* 649 * osnoise_trace_irq_entry - Note the starting of an IRQ 650 * 651 * Save the starting time of an IRQ. As IRQs are non-preemptive to other IRQs, 652 * it is safe to use a single variable (ons_var->irq) to save the statistics. 653 * The arrival_time is used to report... the arrival time. The delta_start 654 * is used to compute the duration at the IRQ exit handler. See 655 * cond_move_irq_delta_start(). 656 */ 657 void osnoise_trace_irq_entry(int id) 658 { 659 struct osnoise_variables *osn_var = this_cpu_osn_var(); 660 661 if (!osn_var->sampling) 662 return; 663 /* 664 * This value will be used in the report, but not to compute 665 * the execution time, so it is safe to get it unsafe. 666 */ 667 osn_var->irq.arrival_time = time_get(); 668 set_int_safe_time(osn_var, &osn_var->irq.delta_start); 669 osn_var->irq.count++; 670 671 local_inc(&osn_var->int_counter); 672 } 673 674 /* 675 * osnoise_irq_exit - Note the end of an IRQ, sava data and trace 676 * 677 * Computes the duration of the IRQ noise, and trace it. Also discounts the 678 * interference from other sources of noise could be currently being accounted. 679 */ 680 void osnoise_trace_irq_exit(int id, const char *desc) 681 { 682 struct osnoise_variables *osn_var = this_cpu_osn_var(); 683 int duration; 684 685 if (!osn_var->sampling) 686 return; 687 688 duration = get_int_safe_duration(osn_var, &osn_var->irq.delta_start); 689 trace_irq_noise(id, desc, osn_var->irq.arrival_time, duration); 690 osn_var->irq.arrival_time = 0; 691 cond_move_softirq_delta_start(osn_var, duration); 692 cond_move_thread_delta_start(osn_var, duration); 693 } 694 695 /* 696 * trace_irqentry_callback - Callback to the irq:irq_entry traceevent 697 * 698 * Used to note the starting of an IRQ occurece. 699 */ 700 static void trace_irqentry_callback(void *data, int irq, 701 struct irqaction *action) 702 { 703 osnoise_trace_irq_entry(irq); 704 } 705 706 /* 707 * trace_irqexit_callback - Callback to the irq:irq_exit traceevent 708 * 709 * Used to note the end of an IRQ occurece. 710 */ 711 static void trace_irqexit_callback(void *data, int irq, 712 struct irqaction *action, int ret) 713 { 714 osnoise_trace_irq_exit(irq, action->name); 715 } 716 717 /* 718 * arch specific register function. 719 */ 720 int __weak osnoise_arch_register(void) 721 { 722 return 0; 723 } 724 725 /* 726 * arch specific unregister function. 727 */ 728 void __weak osnoise_arch_unregister(void) 729 { 730 return; 731 } 732 733 /* 734 * hook_irq_events - Hook IRQ handling events 735 * 736 * This function hooks the IRQ related callbacks to the respective trace 737 * events. 738 */ 739 static int hook_irq_events(void) 740 { 741 int ret; 742 743 ret = register_trace_irq_handler_entry(trace_irqentry_callback, NULL); 744 if (ret) 745 goto out_err; 746 747 ret = register_trace_irq_handler_exit(trace_irqexit_callback, NULL); 748 if (ret) 749 goto out_unregister_entry; 750 751 ret = osnoise_arch_register(); 752 if (ret) 753 goto out_irq_exit; 754 755 return 0; 756 757 out_irq_exit: 758 unregister_trace_irq_handler_exit(trace_irqexit_callback, NULL); 759 out_unregister_entry: 760 unregister_trace_irq_handler_entry(trace_irqentry_callback, NULL); 761 out_err: 762 return -EINVAL; 763 } 764 765 /* 766 * unhook_irq_events - Unhook IRQ handling events 767 * 768 * This function unhooks the IRQ related callbacks to the respective trace 769 * events. 770 */ 771 static void unhook_irq_events(void) 772 { 773 osnoise_arch_unregister(); 774 unregister_trace_irq_handler_exit(trace_irqexit_callback, NULL); 775 unregister_trace_irq_handler_entry(trace_irqentry_callback, NULL); 776 } 777 778 #ifndef CONFIG_PREEMPT_RT 779 /* 780 * trace_softirq_entry_callback - Note the starting of a softirq 781 * 782 * Save the starting time of a softirq. As softirqs are non-preemptive to 783 * other softirqs, it is safe to use a single variable (ons_var->softirq) 784 * to save the statistics. The arrival_time is used to report... the 785 * arrival time. The delta_start is used to compute the duration at the 786 * softirq exit handler. See cond_move_softirq_delta_start(). 787 */ 788 static void trace_softirq_entry_callback(void *data, unsigned int vec_nr) 789 { 790 struct osnoise_variables *osn_var = this_cpu_osn_var(); 791 792 if (!osn_var->sampling) 793 return; 794 /* 795 * This value will be used in the report, but not to compute 796 * the execution time, so it is safe to get it unsafe. 797 */ 798 osn_var->softirq.arrival_time = time_get(); 799 set_int_safe_time(osn_var, &osn_var->softirq.delta_start); 800 osn_var->softirq.count++; 801 802 local_inc(&osn_var->int_counter); 803 } 804 805 /* 806 * trace_softirq_exit_callback - Note the end of an softirq 807 * 808 * Computes the duration of the softirq noise, and trace it. Also discounts the 809 * interference from other sources of noise could be currently being accounted. 810 */ 811 static void trace_softirq_exit_callback(void *data, unsigned int vec_nr) 812 { 813 struct osnoise_variables *osn_var = this_cpu_osn_var(); 814 int duration; 815 816 if (!osn_var->sampling) 817 return; 818 819 #ifdef CONFIG_TIMERLAT_TRACER 820 /* 821 * If the timerlat is enabled, but the irq handler did 822 * not run yet enabling timerlat_tracer, do not trace. 823 */ 824 if (unlikely(osnoise_data.timerlat_tracer)) { 825 struct timerlat_variables *tlat_var; 826 tlat_var = this_cpu_tmr_var(); 827 if (!tlat_var->tracing_thread) { 828 osn_var->softirq.arrival_time = 0; 829 osn_var->softirq.delta_start = 0; 830 return; 831 } 832 } 833 #endif 834 835 duration = get_int_safe_duration(osn_var, &osn_var->softirq.delta_start); 836 trace_softirq_noise(vec_nr, osn_var->softirq.arrival_time, duration); 837 cond_move_thread_delta_start(osn_var, duration); 838 osn_var->softirq.arrival_time = 0; 839 } 840 841 /* 842 * hook_softirq_events - Hook softirq handling events 843 * 844 * This function hooks the softirq related callbacks to the respective trace 845 * events. 846 */ 847 static int hook_softirq_events(void) 848 { 849 int ret; 850 851 ret = register_trace_softirq_entry(trace_softirq_entry_callback, NULL); 852 if (ret) 853 goto out_err; 854 855 ret = register_trace_softirq_exit(trace_softirq_exit_callback, NULL); 856 if (ret) 857 goto out_unreg_entry; 858 859 return 0; 860 861 out_unreg_entry: 862 unregister_trace_softirq_entry(trace_softirq_entry_callback, NULL); 863 out_err: 864 return -EINVAL; 865 } 866 867 /* 868 * unhook_softirq_events - Unhook softirq handling events 869 * 870 * This function hooks the softirq related callbacks to the respective trace 871 * events. 872 */ 873 static void unhook_softirq_events(void) 874 { 875 unregister_trace_softirq_entry(trace_softirq_entry_callback, NULL); 876 unregister_trace_softirq_exit(trace_softirq_exit_callback, NULL); 877 } 878 #else /* CONFIG_PREEMPT_RT */ 879 /* 880 * softirq are threads on the PREEMPT_RT mode. 881 */ 882 static int hook_softirq_events(void) 883 { 884 return 0; 885 } 886 static void unhook_softirq_events(void) 887 { 888 } 889 #endif 890 891 /* 892 * thread_entry - Record the starting of a thread noise window 893 * 894 * It saves the context switch time for a noisy thread, and increments 895 * the interference counters. 896 */ 897 static void 898 thread_entry(struct osnoise_variables *osn_var, struct task_struct *t) 899 { 900 if (!osn_var->sampling) 901 return; 902 /* 903 * The arrival time will be used in the report, but not to compute 904 * the execution time, so it is safe to get it unsafe. 905 */ 906 osn_var->thread.arrival_time = time_get(); 907 908 set_int_safe_time(osn_var, &osn_var->thread.delta_start); 909 910 osn_var->thread.count++; 911 local_inc(&osn_var->int_counter); 912 } 913 914 /* 915 * thread_exit - Report the end of a thread noise window 916 * 917 * It computes the total noise from a thread, tracing if needed. 918 */ 919 static void 920 thread_exit(struct osnoise_variables *osn_var, struct task_struct *t) 921 { 922 int duration; 923 924 if (!osn_var->sampling) 925 return; 926 927 #ifdef CONFIG_TIMERLAT_TRACER 928 if (osnoise_data.timerlat_tracer) { 929 struct timerlat_variables *tlat_var; 930 tlat_var = this_cpu_tmr_var(); 931 if (!tlat_var->tracing_thread) { 932 osn_var->thread.delta_start = 0; 933 osn_var->thread.arrival_time = 0; 934 return; 935 } 936 } 937 #endif 938 939 duration = get_int_safe_duration(osn_var, &osn_var->thread.delta_start); 940 941 trace_thread_noise(t, osn_var->thread.arrival_time, duration); 942 943 osn_var->thread.arrival_time = 0; 944 } 945 946 /* 947 * trace_sched_switch - sched:sched_switch trace event handler 948 * 949 * This function is hooked to the sched:sched_switch trace event, and it is 950 * used to record the beginning and to report the end of a thread noise window. 951 */ 952 static void 953 trace_sched_switch_callback(void *data, bool preempt, struct task_struct *p, 954 struct task_struct *n) 955 { 956 struct osnoise_variables *osn_var = this_cpu_osn_var(); 957 958 if (p->pid != osn_var->pid) 959 thread_exit(osn_var, p); 960 961 if (n->pid != osn_var->pid) 962 thread_entry(osn_var, n); 963 } 964 965 /* 966 * hook_thread_events - Hook the insturmentation for thread noise 967 * 968 * Hook the osnoise tracer callbacks to handle the noise from other 969 * threads on the necessary kernel events. 970 */ 971 static int hook_thread_events(void) 972 { 973 int ret; 974 975 ret = register_trace_sched_switch(trace_sched_switch_callback, NULL); 976 if (ret) 977 return -EINVAL; 978 979 return 0; 980 } 981 982 /* 983 * unhook_thread_events - *nhook the insturmentation for thread noise 984 * 985 * Unook the osnoise tracer callbacks to handle the noise from other 986 * threads on the necessary kernel events. 987 */ 988 static void unhook_thread_events(void) 989 { 990 unregister_trace_sched_switch(trace_sched_switch_callback, NULL); 991 } 992 993 /* 994 * save_osn_sample_stats - Save the osnoise_sample statistics 995 * 996 * Save the osnoise_sample statistics before the sampling phase. These 997 * values will be used later to compute the diff betwneen the statistics 998 * before and after the osnoise sampling. 999 */ 1000 static void 1001 save_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample *s) 1002 { 1003 s->nmi_count = osn_var->nmi.count; 1004 s->irq_count = osn_var->irq.count; 1005 s->softirq_count = osn_var->softirq.count; 1006 s->thread_count = osn_var->thread.count; 1007 } 1008 1009 /* 1010 * diff_osn_sample_stats - Compute the osnoise_sample statistics 1011 * 1012 * After a sample period, compute the difference on the osnoise_sample 1013 * statistics. The struct osnoise_sample *s contains the statistics saved via 1014 * save_osn_sample_stats() before the osnoise sampling. 1015 */ 1016 static void 1017 diff_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample *s) 1018 { 1019 s->nmi_count = osn_var->nmi.count - s->nmi_count; 1020 s->irq_count = osn_var->irq.count - s->irq_count; 1021 s->softirq_count = osn_var->softirq.count - s->softirq_count; 1022 s->thread_count = osn_var->thread.count - s->thread_count; 1023 } 1024 1025 /* 1026 * osnoise_stop_tracing - Stop tracing and the tracer. 1027 */ 1028 static void osnoise_stop_tracing(void) 1029 { 1030 struct trace_array *tr = osnoise_trace; 1031 tracer_tracing_off(tr); 1032 } 1033 1034 /* 1035 * run_osnoise - Sample the time and look for osnoise 1036 * 1037 * Used to capture the time, looking for potential osnoise latency repeatedly. 1038 * Different from hwlat_detector, it is called with preemption and interrupts 1039 * enabled. This allows irqs, softirqs and threads to run, interfering on the 1040 * osnoise sampling thread, as they would do with a regular thread. 1041 */ 1042 static int run_osnoise(void) 1043 { 1044 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1045 u64 noise = 0, sum_noise = 0, max_noise = 0; 1046 struct trace_array *tr = osnoise_trace; 1047 u64 start, sample, last_sample; 1048 u64 last_int_count, int_count; 1049 s64 total, last_total = 0; 1050 struct osnoise_sample s; 1051 unsigned int threshold; 1052 int hw_count = 0; 1053 u64 runtime, stop_in; 1054 int ret = -1; 1055 1056 /* 1057 * Considers the current thread as the workload. 1058 */ 1059 osn_var->pid = current->pid; 1060 1061 /* 1062 * Save the current stats for the diff 1063 */ 1064 save_osn_sample_stats(osn_var, &s); 1065 1066 /* 1067 * if threshold is 0, use the default value of 5 us. 1068 */ 1069 threshold = tracing_thresh ? : 5000; 1070 1071 /* 1072 * Make sure NMIs see sampling first 1073 */ 1074 osn_var->sampling = true; 1075 barrier(); 1076 1077 /* 1078 * Transform the *_us config to nanoseconds to avoid the 1079 * division on the main loop. 1080 */ 1081 runtime = osnoise_data.sample_runtime * NSEC_PER_USEC; 1082 stop_in = osnoise_data.stop_tracing * NSEC_PER_USEC; 1083 1084 /* 1085 * Start timestemp 1086 */ 1087 start = time_get(); 1088 1089 /* 1090 * "previous" loop. 1091 */ 1092 last_int_count = set_int_safe_time(osn_var, &last_sample); 1093 1094 do { 1095 /* 1096 * Get sample! 1097 */ 1098 int_count = set_int_safe_time(osn_var, &sample); 1099 1100 noise = time_sub(sample, last_sample); 1101 1102 /* 1103 * This shouldn't happen. 1104 */ 1105 if (noise < 0) { 1106 osnoise_taint("negative noise!"); 1107 goto out; 1108 } 1109 1110 /* 1111 * Sample runtime. 1112 */ 1113 total = time_sub(sample, start); 1114 1115 /* 1116 * Check for possible overflows. 1117 */ 1118 if (total < last_total) { 1119 osnoise_taint("total overflow!"); 1120 break; 1121 } 1122 1123 last_total = total; 1124 1125 if (noise >= threshold) { 1126 int interference = int_count - last_int_count; 1127 1128 if (noise > max_noise) 1129 max_noise = noise; 1130 1131 if (!interference) 1132 hw_count++; 1133 1134 sum_noise += noise; 1135 1136 trace_sample_threshold(last_sample, noise, interference); 1137 1138 if (osnoise_data.stop_tracing) 1139 if (noise > stop_in) 1140 osnoise_stop_tracing(); 1141 } 1142 1143 /* 1144 * For the non-preemptive kernel config: let threads runs, if 1145 * they so wish. 1146 */ 1147 cond_resched(); 1148 1149 last_sample = sample; 1150 last_int_count = int_count; 1151 1152 } while (total < runtime && !kthread_should_stop()); 1153 1154 /* 1155 * Finish the above in the view for interrupts. 1156 */ 1157 barrier(); 1158 1159 osn_var->sampling = false; 1160 1161 /* 1162 * Make sure sampling data is no longer updated. 1163 */ 1164 barrier(); 1165 1166 /* 1167 * Save noise info. 1168 */ 1169 s.noise = time_to_us(sum_noise); 1170 s.runtime = time_to_us(total); 1171 s.max_sample = time_to_us(max_noise); 1172 s.hw_count = hw_count; 1173 1174 /* Save interference stats info */ 1175 diff_osn_sample_stats(osn_var, &s); 1176 1177 trace_osnoise_sample(&s); 1178 1179 /* Keep a running maximum ever recorded osnoise "latency" */ 1180 if (max_noise > tr->max_latency) { 1181 tr->max_latency = max_noise; 1182 latency_fsnotify(tr); 1183 } 1184 1185 if (osnoise_data.stop_tracing_total) 1186 if (s.noise > osnoise_data.stop_tracing_total) 1187 osnoise_stop_tracing(); 1188 1189 return 0; 1190 out: 1191 return ret; 1192 } 1193 1194 static struct cpumask osnoise_cpumask; 1195 static struct cpumask save_cpumask; 1196 1197 /* 1198 * osnoise_main - The osnoise detection kernel thread 1199 * 1200 * Calls run_osnoise() function to measure the osnoise for the configured runtime, 1201 * every period. 1202 */ 1203 static int osnoise_main(void *data) 1204 { 1205 s64 interval; 1206 1207 while (!kthread_should_stop()) { 1208 1209 run_osnoise(); 1210 1211 mutex_lock(&interface_lock); 1212 interval = osnoise_data.sample_period - osnoise_data.sample_runtime; 1213 mutex_unlock(&interface_lock); 1214 1215 do_div(interval, USEC_PER_MSEC); 1216 1217 /* 1218 * differently from hwlat_detector, the osnoise tracer can run 1219 * without a pause because preemption is on. 1220 */ 1221 if (interval < 1) { 1222 /* Let synchronize_rcu_tasks() make progress */ 1223 cond_resched_tasks_rcu_qs(); 1224 continue; 1225 } 1226 1227 if (msleep_interruptible(interval)) 1228 break; 1229 } 1230 1231 return 0; 1232 } 1233 1234 #ifdef CONFIG_TIMERLAT_TRACER 1235 /* 1236 * timerlat_irq - hrtimer handler for timerlat. 1237 */ 1238 static enum hrtimer_restart timerlat_irq(struct hrtimer *timer) 1239 { 1240 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1241 struct trace_array *tr = osnoise_trace; 1242 struct timerlat_variables *tlat; 1243 struct timerlat_sample s; 1244 u64 now; 1245 u64 diff; 1246 1247 /* 1248 * I am not sure if the timer was armed for this CPU. So, get 1249 * the timerlat struct from the timer itself, not from this 1250 * CPU. 1251 */ 1252 tlat = container_of(timer, struct timerlat_variables, timer); 1253 1254 now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer)); 1255 1256 /* 1257 * Enable the osnoise: events for thread an softirq. 1258 */ 1259 tlat->tracing_thread = true; 1260 1261 osn_var->thread.arrival_time = time_get(); 1262 1263 /* 1264 * A hardirq is running: the timer IRQ. It is for sure preempting 1265 * a thread, and potentially preempting a softirq. 1266 * 1267 * At this point, it is not interesting to know the duration of the 1268 * preempted thread (and maybe softirq), but how much time they will 1269 * delay the beginning of the execution of the timer thread. 1270 * 1271 * To get the correct (net) delay added by the softirq, its delta_start 1272 * is set as the IRQ one. In this way, at the return of the IRQ, the delta 1273 * start of the sofitrq will be zeroed, accounting then only the time 1274 * after that. 1275 * 1276 * The thread follows the same principle. However, if a softirq is 1277 * running, the thread needs to receive the softirq delta_start. The 1278 * reason being is that the softirq will be the last to be unfolded, 1279 * resseting the thread delay to zero. 1280 */ 1281 #ifndef CONFIG_PREEMPT_RT 1282 if (osn_var->softirq.delta_start) { 1283 copy_int_safe_time(osn_var, &osn_var->thread.delta_start, 1284 &osn_var->softirq.delta_start); 1285 1286 copy_int_safe_time(osn_var, &osn_var->softirq.delta_start, 1287 &osn_var->irq.delta_start); 1288 } else { 1289 copy_int_safe_time(osn_var, &osn_var->thread.delta_start, 1290 &osn_var->irq.delta_start); 1291 } 1292 #else /* CONFIG_PREEMPT_RT */ 1293 /* 1294 * The sofirqs run as threads on RT, so there is not need 1295 * to keep track of it. 1296 */ 1297 copy_int_safe_time(osn_var, &osn_var->thread.delta_start, &osn_var->irq.delta_start); 1298 #endif /* CONFIG_PREEMPT_RT */ 1299 1300 /* 1301 * Compute the current time with the expected time. 1302 */ 1303 diff = now - tlat->abs_period; 1304 1305 tlat->count++; 1306 s.seqnum = tlat->count; 1307 s.timer_latency = diff; 1308 s.context = IRQ_CONTEXT; 1309 1310 trace_timerlat_sample(&s); 1311 1312 /* Keep a running maximum ever recorded os noise "latency" */ 1313 if (diff > tr->max_latency) { 1314 tr->max_latency = diff; 1315 latency_fsnotify(tr); 1316 } 1317 1318 if (osnoise_data.stop_tracing) 1319 if (time_to_us(diff) >= osnoise_data.stop_tracing) 1320 osnoise_stop_tracing(); 1321 1322 wake_up_process(tlat->kthread); 1323 1324 if (osnoise_data.print_stack) 1325 timerlat_save_stack(0); 1326 1327 return HRTIMER_NORESTART; 1328 } 1329 1330 /* 1331 * wait_next_period - Wait for the next period for timerlat 1332 */ 1333 static int wait_next_period(struct timerlat_variables *tlat) 1334 { 1335 ktime_t next_abs_period, now; 1336 u64 rel_period = osnoise_data.timerlat_period * 1000; 1337 1338 now = hrtimer_cb_get_time(&tlat->timer); 1339 next_abs_period = ns_to_ktime(tlat->abs_period + rel_period); 1340 1341 /* 1342 * Save the next abs_period. 1343 */ 1344 tlat->abs_period = (u64) ktime_to_ns(next_abs_period); 1345 1346 /* 1347 * If the new abs_period is in the past, skip the activation. 1348 */ 1349 while (ktime_compare(now, next_abs_period) > 0) { 1350 next_abs_period = ns_to_ktime(tlat->abs_period + rel_period); 1351 tlat->abs_period = (u64) ktime_to_ns(next_abs_period); 1352 } 1353 1354 set_current_state(TASK_INTERRUPTIBLE); 1355 1356 hrtimer_start(&tlat->timer, next_abs_period, HRTIMER_MODE_ABS_PINNED_HARD); 1357 schedule(); 1358 return 1; 1359 } 1360 1361 /* 1362 * timerlat_main- Timerlat main 1363 */ 1364 static int timerlat_main(void *data) 1365 { 1366 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1367 struct timerlat_variables *tlat = this_cpu_tmr_var(); 1368 struct timerlat_sample s; 1369 struct sched_param sp; 1370 u64 now, diff; 1371 1372 /* 1373 * Make the thread RT, that is how cyclictest is usually used. 1374 */ 1375 sp.sched_priority = DEFAULT_TIMERLAT_PRIO; 1376 sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); 1377 1378 tlat->count = 0; 1379 tlat->tracing_thread = false; 1380 1381 hrtimer_init(&tlat->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD); 1382 tlat->timer.function = timerlat_irq; 1383 tlat->kthread = current; 1384 osn_var->pid = current->pid; 1385 /* 1386 * Anotate the arrival time. 1387 */ 1388 tlat->abs_period = hrtimer_cb_get_time(&tlat->timer); 1389 1390 wait_next_period(tlat); 1391 1392 osn_var->sampling = 1; 1393 1394 while (!kthread_should_stop()) { 1395 now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer)); 1396 diff = now - tlat->abs_period; 1397 1398 s.seqnum = tlat->count; 1399 s.timer_latency = diff; 1400 s.context = THREAD_CONTEXT; 1401 1402 trace_timerlat_sample(&s); 1403 1404 #ifdef CONFIG_STACKTRACE 1405 if (osnoise_data.print_stack) 1406 if (osnoise_data.print_stack <= time_to_us(diff)) 1407 timerlat_dump_stack(); 1408 #endif /* CONFIG_STACKTRACE */ 1409 1410 tlat->tracing_thread = false; 1411 if (osnoise_data.stop_tracing_total) 1412 if (time_to_us(diff) >= osnoise_data.stop_tracing_total) 1413 osnoise_stop_tracing(); 1414 1415 wait_next_period(tlat); 1416 } 1417 1418 hrtimer_cancel(&tlat->timer); 1419 return 0; 1420 } 1421 #endif /* CONFIG_TIMERLAT_TRACER */ 1422 1423 /* 1424 * stop_kthread - stop a workload thread 1425 */ 1426 static void stop_kthread(unsigned int cpu) 1427 { 1428 struct task_struct *kthread; 1429 1430 kthread = per_cpu(per_cpu_osnoise_var, cpu).kthread; 1431 if (kthread) 1432 kthread_stop(kthread); 1433 per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL; 1434 } 1435 1436 /* 1437 * stop_per_cpu_kthread - Stop per-cpu threads 1438 * 1439 * Stop the osnoise sampling htread. Use this on unload and at system 1440 * shutdown. 1441 */ 1442 static void stop_per_cpu_kthreads(void) 1443 { 1444 int cpu; 1445 1446 get_online_cpus(); 1447 1448 for_each_online_cpu(cpu) 1449 stop_kthread(cpu); 1450 1451 put_online_cpus(); 1452 } 1453 1454 /* 1455 * start_kthread - Start a workload tread 1456 */ 1457 static int start_kthread(unsigned int cpu) 1458 { 1459 struct task_struct *kthread; 1460 void *main = osnoise_main; 1461 char comm[24]; 1462 1463 #ifdef CONFIG_TIMERLAT_TRACER 1464 if (osnoise_data.timerlat_tracer) { 1465 snprintf(comm, 24, "timerlat/%d", cpu); 1466 main = timerlat_main; 1467 } else { 1468 snprintf(comm, 24, "osnoise/%d", cpu); 1469 } 1470 #else 1471 snprintf(comm, 24, "osnoise/%d", cpu); 1472 #endif 1473 kthread = kthread_create_on_cpu(main, NULL, cpu, comm); 1474 1475 if (IS_ERR(kthread)) { 1476 pr_err(BANNER "could not start sampling thread\n"); 1477 stop_per_cpu_kthreads(); 1478 return -ENOMEM; 1479 } 1480 1481 per_cpu(per_cpu_osnoise_var, cpu).kthread = kthread; 1482 wake_up_process(kthread); 1483 1484 return 0; 1485 } 1486 1487 /* 1488 * start_per_cpu_kthread - Kick off per-cpu osnoise sampling kthreads 1489 * 1490 * This starts the kernel thread that will look for osnoise on many 1491 * cpus. 1492 */ 1493 static int start_per_cpu_kthreads(struct trace_array *tr) 1494 { 1495 struct cpumask *current_mask = &save_cpumask; 1496 int retval; 1497 int cpu; 1498 1499 get_online_cpus(); 1500 /* 1501 * Run only on CPUs in which trace and osnoise are allowed to run. 1502 */ 1503 cpumask_and(current_mask, tr->tracing_cpumask, &osnoise_cpumask); 1504 /* 1505 * And the CPU is online. 1506 */ 1507 cpumask_and(current_mask, cpu_online_mask, current_mask); 1508 1509 for_each_possible_cpu(cpu) 1510 per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL; 1511 1512 for_each_cpu(cpu, current_mask) { 1513 retval = start_kthread(cpu); 1514 if (retval) { 1515 stop_per_cpu_kthreads(); 1516 return retval; 1517 } 1518 } 1519 1520 put_online_cpus(); 1521 1522 return 0; 1523 } 1524 1525 #ifdef CONFIG_HOTPLUG_CPU 1526 static void osnoise_hotplug_workfn(struct work_struct *dummy) 1527 { 1528 struct trace_array *tr = osnoise_trace; 1529 unsigned int cpu = smp_processor_id(); 1530 1531 1532 mutex_lock(&trace_types_lock); 1533 1534 if (!osnoise_busy) 1535 goto out_unlock_trace; 1536 1537 mutex_lock(&interface_lock); 1538 get_online_cpus(); 1539 1540 if (!cpumask_test_cpu(cpu, &osnoise_cpumask)) 1541 goto out_unlock; 1542 1543 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask)) 1544 goto out_unlock; 1545 1546 start_kthread(cpu); 1547 1548 out_unlock: 1549 put_online_cpus(); 1550 mutex_unlock(&interface_lock); 1551 out_unlock_trace: 1552 mutex_unlock(&trace_types_lock); 1553 } 1554 1555 static DECLARE_WORK(osnoise_hotplug_work, osnoise_hotplug_workfn); 1556 1557 /* 1558 * osnoise_cpu_init - CPU hotplug online callback function 1559 */ 1560 static int osnoise_cpu_init(unsigned int cpu) 1561 { 1562 schedule_work_on(cpu, &osnoise_hotplug_work); 1563 return 0; 1564 } 1565 1566 /* 1567 * osnoise_cpu_die - CPU hotplug offline callback function 1568 */ 1569 static int osnoise_cpu_die(unsigned int cpu) 1570 { 1571 stop_kthread(cpu); 1572 return 0; 1573 } 1574 1575 static void osnoise_init_hotplug_support(void) 1576 { 1577 int ret; 1578 1579 ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "trace/osnoise:online", 1580 osnoise_cpu_init, osnoise_cpu_die); 1581 if (ret < 0) 1582 pr_warn(BANNER "Error to init cpu hotplug support\n"); 1583 1584 return; 1585 } 1586 #else /* CONFIG_HOTPLUG_CPU */ 1587 static void osnoise_init_hotplug_support(void) 1588 { 1589 return 0; 1590 } 1591 #endif /* CONFIG_HOTPLUG_CPU */ 1592 1593 /* 1594 * osnoise_cpus_read - Read function for reading the "cpus" file 1595 * @filp: The active open file structure 1596 * @ubuf: The userspace provided buffer to read value into 1597 * @cnt: The maximum number of bytes to read 1598 * @ppos: The current "file" position 1599 * 1600 * Prints the "cpus" output into the user-provided buffer. 1601 */ 1602 static ssize_t 1603 osnoise_cpus_read(struct file *filp, char __user *ubuf, size_t count, 1604 loff_t *ppos) 1605 { 1606 char *mask_str; 1607 int len; 1608 1609 mutex_lock(&interface_lock); 1610 1611 len = snprintf(NULL, 0, "%*pbl\n", cpumask_pr_args(&osnoise_cpumask)) + 1; 1612 mask_str = kmalloc(len, GFP_KERNEL); 1613 if (!mask_str) { 1614 count = -ENOMEM; 1615 goto out_unlock; 1616 } 1617 1618 len = snprintf(mask_str, len, "%*pbl\n", cpumask_pr_args(&osnoise_cpumask)); 1619 if (len >= count) { 1620 count = -EINVAL; 1621 goto out_free; 1622 } 1623 1624 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len); 1625 1626 out_free: 1627 kfree(mask_str); 1628 out_unlock: 1629 mutex_unlock(&interface_lock); 1630 1631 return count; 1632 } 1633 1634 static void osnoise_tracer_start(struct trace_array *tr); 1635 static void osnoise_tracer_stop(struct trace_array *tr); 1636 1637 /* 1638 * osnoise_cpus_write - Write function for "cpus" entry 1639 * @filp: The active open file structure 1640 * @ubuf: The user buffer that contains the value to write 1641 * @cnt: The maximum number of bytes to write to "file" 1642 * @ppos: The current position in @file 1643 * 1644 * This function provides a write implementation for the "cpus" 1645 * interface to the osnoise trace. By default, it lists all CPUs, 1646 * in this way, allowing osnoise threads to run on any online CPU 1647 * of the system. It serves to restrict the execution of osnoise to the 1648 * set of CPUs writing via this interface. Note that osnoise also 1649 * respects the "tracing_cpumask." Hence, osnoise threads will run only 1650 * on the set of CPUs allowed here AND on "tracing_cpumask." Why not 1651 * have just "tracing_cpumask?" Because the user might be interested 1652 * in tracing what is running on other CPUs. For instance, one might 1653 * run osnoise in one HT CPU while observing what is running on the 1654 * sibling HT CPU. 1655 */ 1656 static ssize_t 1657 osnoise_cpus_write(struct file *filp, const char __user *ubuf, size_t count, 1658 loff_t *ppos) 1659 { 1660 struct trace_array *tr = osnoise_trace; 1661 cpumask_var_t osnoise_cpumask_new; 1662 int running, err; 1663 char buf[256]; 1664 1665 if (count >= 256) 1666 return -EINVAL; 1667 1668 if (copy_from_user(buf, ubuf, count)) 1669 return -EFAULT; 1670 1671 if (!zalloc_cpumask_var(&osnoise_cpumask_new, GFP_KERNEL)) 1672 return -ENOMEM; 1673 1674 err = cpulist_parse(buf, osnoise_cpumask_new); 1675 if (err) 1676 goto err_free; 1677 1678 /* 1679 * trace_types_lock is taken to avoid concurrency on start/stop 1680 * and osnoise_busy. 1681 */ 1682 mutex_lock(&trace_types_lock); 1683 running = osnoise_busy; 1684 if (running) 1685 osnoise_tracer_stop(tr); 1686 1687 mutex_lock(&interface_lock); 1688 /* 1689 * osnoise_cpumask is read by CPU hotplug operations. 1690 */ 1691 get_online_cpus(); 1692 1693 cpumask_copy(&osnoise_cpumask, osnoise_cpumask_new); 1694 1695 put_online_cpus(); 1696 mutex_unlock(&interface_lock); 1697 1698 if (running) 1699 osnoise_tracer_start(tr); 1700 mutex_unlock(&trace_types_lock); 1701 1702 free_cpumask_var(osnoise_cpumask_new); 1703 return count; 1704 1705 err_free: 1706 free_cpumask_var(osnoise_cpumask_new); 1707 1708 return err; 1709 } 1710 1711 /* 1712 * osnoise/runtime_us: cannot be greater than the period. 1713 */ 1714 static struct trace_min_max_param osnoise_runtime = { 1715 .lock = &interface_lock, 1716 .val = &osnoise_data.sample_runtime, 1717 .max = &osnoise_data.sample_period, 1718 .min = NULL, 1719 }; 1720 1721 /* 1722 * osnoise/period_us: cannot be smaller than the runtime. 1723 */ 1724 static struct trace_min_max_param osnoise_period = { 1725 .lock = &interface_lock, 1726 .val = &osnoise_data.sample_period, 1727 .max = NULL, 1728 .min = &osnoise_data.sample_runtime, 1729 }; 1730 1731 /* 1732 * osnoise/stop_tracing_us: no limit. 1733 */ 1734 static struct trace_min_max_param osnoise_stop_tracing_in = { 1735 .lock = &interface_lock, 1736 .val = &osnoise_data.stop_tracing, 1737 .max = NULL, 1738 .min = NULL, 1739 }; 1740 1741 /* 1742 * osnoise/stop_tracing_total_us: no limit. 1743 */ 1744 static struct trace_min_max_param osnoise_stop_tracing_total = { 1745 .lock = &interface_lock, 1746 .val = &osnoise_data.stop_tracing_total, 1747 .max = NULL, 1748 .min = NULL, 1749 }; 1750 1751 #ifdef CONFIG_TIMERLAT_TRACER 1752 /* 1753 * osnoise/print_stack: print the stacktrace of the IRQ handler if the total 1754 * latency is higher than val. 1755 */ 1756 static struct trace_min_max_param osnoise_print_stack = { 1757 .lock = &interface_lock, 1758 .val = &osnoise_data.print_stack, 1759 .max = NULL, 1760 .min = NULL, 1761 }; 1762 1763 /* 1764 * osnoise/timerlat_period: min 100 us, max 1 s 1765 */ 1766 u64 timerlat_min_period = 100; 1767 u64 timerlat_max_period = 1000000; 1768 static struct trace_min_max_param timerlat_period = { 1769 .lock = &interface_lock, 1770 .val = &osnoise_data.timerlat_period, 1771 .max = &timerlat_max_period, 1772 .min = &timerlat_min_period, 1773 }; 1774 #endif 1775 1776 static const struct file_operations cpus_fops = { 1777 .open = tracing_open_generic, 1778 .read = osnoise_cpus_read, 1779 .write = osnoise_cpus_write, 1780 .llseek = generic_file_llseek, 1781 }; 1782 1783 /* 1784 * init_tracefs - A function to initialize the tracefs interface files 1785 * 1786 * This function creates entries in tracefs for "osnoise" and "timerlat". 1787 * It creates these directories in the tracing directory, and within that 1788 * directory the use can change and view the configs. 1789 */ 1790 static int init_tracefs(void) 1791 { 1792 struct dentry *top_dir; 1793 struct dentry *tmp; 1794 int ret; 1795 1796 ret = tracing_init_dentry(); 1797 if (ret) 1798 return -ENOMEM; 1799 1800 top_dir = tracefs_create_dir("osnoise", NULL); 1801 if (!top_dir) 1802 return 0; 1803 1804 tmp = tracefs_create_file("period_us", 0640, top_dir, 1805 &osnoise_period, &trace_min_max_fops); 1806 if (!tmp) 1807 goto err; 1808 1809 tmp = tracefs_create_file("runtime_us", 0644, top_dir, 1810 &osnoise_runtime, &trace_min_max_fops); 1811 if (!tmp) 1812 goto err; 1813 1814 tmp = tracefs_create_file("stop_tracing_us", 0640, top_dir, 1815 &osnoise_stop_tracing_in, &trace_min_max_fops); 1816 if (!tmp) 1817 goto err; 1818 1819 tmp = tracefs_create_file("stop_tracing_total_us", 0640, top_dir, 1820 &osnoise_stop_tracing_total, &trace_min_max_fops); 1821 if (!tmp) 1822 goto err; 1823 1824 tmp = trace_create_file("cpus", 0644, top_dir, NULL, &cpus_fops); 1825 if (!tmp) 1826 goto err; 1827 #ifdef CONFIG_TIMERLAT_TRACER 1828 #ifdef CONFIG_STACKTRACE 1829 tmp = tracefs_create_file("print_stack", 0640, top_dir, 1830 &osnoise_print_stack, &trace_min_max_fops); 1831 if (!tmp) 1832 goto err; 1833 #endif 1834 1835 tmp = tracefs_create_file("timerlat_period_us", 0640, top_dir, 1836 &timerlat_period, &trace_min_max_fops); 1837 if (!tmp) 1838 goto err; 1839 #endif 1840 1841 return 0; 1842 1843 err: 1844 tracefs_remove(top_dir); 1845 return -ENOMEM; 1846 } 1847 1848 static int osnoise_hook_events(void) 1849 { 1850 int retval; 1851 1852 /* 1853 * Trace is already hooked, we are re-enabling from 1854 * a stop_tracing_*. 1855 */ 1856 if (trace_osnoise_callback_enabled) 1857 return 0; 1858 1859 retval = hook_irq_events(); 1860 if (retval) 1861 return -EINVAL; 1862 1863 retval = hook_softirq_events(); 1864 if (retval) 1865 goto out_unhook_irq; 1866 1867 retval = hook_thread_events(); 1868 /* 1869 * All fine! 1870 */ 1871 if (!retval) 1872 return 0; 1873 1874 unhook_softirq_events(); 1875 out_unhook_irq: 1876 unhook_irq_events(); 1877 return -EINVAL; 1878 } 1879 1880 static int __osnoise_tracer_start(struct trace_array *tr) 1881 { 1882 int retval; 1883 1884 osn_var_reset_all(); 1885 1886 retval = osnoise_hook_events(); 1887 if (retval) 1888 return retval; 1889 /* 1890 * Make sure NMIs see reseted values. 1891 */ 1892 barrier(); 1893 trace_osnoise_callback_enabled = true; 1894 1895 retval = start_per_cpu_kthreads(tr); 1896 if (retval) { 1897 unhook_irq_events(); 1898 return retval; 1899 } 1900 1901 osnoise_busy = true; 1902 1903 return 0; 1904 } 1905 1906 static void osnoise_tracer_start(struct trace_array *tr) 1907 { 1908 int retval; 1909 1910 if (osnoise_busy) 1911 return; 1912 1913 retval = __osnoise_tracer_start(tr); 1914 if (retval) 1915 pr_err(BANNER "Error starting osnoise tracer\n"); 1916 1917 } 1918 1919 static void osnoise_tracer_stop(struct trace_array *tr) 1920 { 1921 if (!osnoise_busy) 1922 return; 1923 1924 trace_osnoise_callback_enabled = false; 1925 barrier(); 1926 1927 stop_per_cpu_kthreads(); 1928 1929 unhook_irq_events(); 1930 unhook_softirq_events(); 1931 unhook_thread_events(); 1932 1933 osnoise_busy = false; 1934 } 1935 1936 static int osnoise_tracer_init(struct trace_array *tr) 1937 { 1938 1939 /* Only allow one instance to enable this */ 1940 if (osnoise_busy) 1941 return -EBUSY; 1942 1943 osnoise_trace = tr; 1944 tr->max_latency = 0; 1945 1946 osnoise_tracer_start(tr); 1947 1948 return 0; 1949 } 1950 1951 static void osnoise_tracer_reset(struct trace_array *tr) 1952 { 1953 osnoise_tracer_stop(tr); 1954 } 1955 1956 static struct tracer osnoise_tracer __read_mostly = { 1957 .name = "osnoise", 1958 .init = osnoise_tracer_init, 1959 .reset = osnoise_tracer_reset, 1960 .start = osnoise_tracer_start, 1961 .stop = osnoise_tracer_stop, 1962 .print_header = print_osnoise_headers, 1963 .allow_instances = true, 1964 }; 1965 1966 #ifdef CONFIG_TIMERLAT_TRACER 1967 static void timerlat_tracer_start(struct trace_array *tr) 1968 { 1969 int retval; 1970 1971 if (osnoise_busy) 1972 return; 1973 1974 osnoise_data.timerlat_tracer = 1; 1975 1976 retval = __osnoise_tracer_start(tr); 1977 if (retval) 1978 goto out_err; 1979 1980 return; 1981 out_err: 1982 pr_err(BANNER "Error starting timerlat tracer\n"); 1983 } 1984 1985 static void timerlat_tracer_stop(struct trace_array *tr) 1986 { 1987 int cpu; 1988 1989 if (!osnoise_busy) 1990 return; 1991 1992 for_each_online_cpu(cpu) 1993 per_cpu(per_cpu_osnoise_var, cpu).sampling = 0; 1994 1995 osnoise_tracer_stop(tr); 1996 1997 osnoise_data.timerlat_tracer = 0; 1998 } 1999 2000 static int timerlat_tracer_init(struct trace_array *tr) 2001 { 2002 /* Only allow one instance to enable this */ 2003 if (osnoise_busy) 2004 return -EBUSY; 2005 2006 osnoise_trace = tr; 2007 2008 tr->max_latency = 0; 2009 2010 timerlat_tracer_start(tr); 2011 2012 return 0; 2013 } 2014 2015 static void timerlat_tracer_reset(struct trace_array *tr) 2016 { 2017 timerlat_tracer_stop(tr); 2018 } 2019 2020 static struct tracer timerlat_tracer __read_mostly = { 2021 .name = "timerlat", 2022 .init = timerlat_tracer_init, 2023 .reset = timerlat_tracer_reset, 2024 .start = timerlat_tracer_start, 2025 .stop = timerlat_tracer_stop, 2026 .print_header = print_timerlat_headers, 2027 .allow_instances = true, 2028 }; 2029 #endif /* CONFIG_TIMERLAT_TRACER */ 2030 2031 __init static int init_osnoise_tracer(void) 2032 { 2033 int ret; 2034 2035 mutex_init(&interface_lock); 2036 2037 cpumask_copy(&osnoise_cpumask, cpu_all_mask); 2038 2039 ret = register_tracer(&osnoise_tracer); 2040 if (ret) { 2041 pr_err(BANNER "Error registering osnoise!\n"); 2042 return ret; 2043 } 2044 2045 #ifdef CONFIG_TIMERLAT_TRACER 2046 ret = register_tracer(&timerlat_tracer); 2047 if (ret) { 2048 pr_err(BANNER "Error registering timerlat\n"); 2049 return ret; 2050 } 2051 #endif 2052 osnoise_init_hotplug_support(); 2053 2054 init_tracefs(); 2055 2056 return 0; 2057 } 2058 late_initcall(init_osnoise_tracer); 2059