1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * OS Noise Tracer: computes the OS Noise suffered by a running thread. 4 * Timerlat Tracer: measures the wakeup latency of a timer triggered IRQ and thread. 5 * 6 * Based on "hwlat_detector" tracer by: 7 * Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. <jcm@redhat.com> 8 * Copyright (C) 2013-2016 Steven Rostedt, Red Hat, Inc. <srostedt@redhat.com> 9 * With feedback from Clark Williams <williams@redhat.com> 10 * 11 * And also based on the rtsl tracer presented on: 12 * DE OLIVEIRA, Daniel Bristot, et al. Demystifying the real-time linux 13 * scheduling latency. In: 32nd Euromicro Conference on Real-Time Systems 14 * (ECRTS 2020). Schloss Dagstuhl-Leibniz-Zentrum fur Informatik, 2020. 15 * 16 * Copyright (C) 2021 Daniel Bristot de Oliveira, Red Hat, Inc. <bristot@redhat.com> 17 */ 18 19 #include <linux/kthread.h> 20 #include <linux/tracefs.h> 21 #include <linux/uaccess.h> 22 #include <linux/cpumask.h> 23 #include <linux/delay.h> 24 #include <linux/sched/clock.h> 25 #include <uapi/linux/sched/types.h> 26 #include <linux/sched.h> 27 #include "trace.h" 28 29 #ifdef CONFIG_X86_LOCAL_APIC 30 #include <asm/trace/irq_vectors.h> 31 #undef TRACE_INCLUDE_PATH 32 #undef TRACE_INCLUDE_FILE 33 #endif /* CONFIG_X86_LOCAL_APIC */ 34 35 #include <trace/events/irq.h> 36 #include <trace/events/sched.h> 37 38 #define CREATE_TRACE_POINTS 39 #include <trace/events/osnoise.h> 40 41 static struct trace_array *osnoise_trace; 42 43 /* 44 * Default values. 45 */ 46 #define BANNER "osnoise: " 47 #define DEFAULT_SAMPLE_PERIOD 1000000 /* 1s */ 48 #define DEFAULT_SAMPLE_RUNTIME 1000000 /* 1s */ 49 50 #define DEFAULT_TIMERLAT_PERIOD 1000 /* 1ms */ 51 #define DEFAULT_TIMERLAT_PRIO 95 /* FIFO 95 */ 52 53 /* 54 * NMI runtime info. 55 */ 56 struct osn_nmi { 57 u64 count; 58 u64 delta_start; 59 }; 60 61 /* 62 * IRQ runtime info. 63 */ 64 struct osn_irq { 65 u64 count; 66 u64 arrival_time; 67 u64 delta_start; 68 }; 69 70 #define IRQ_CONTEXT 0 71 #define THREAD_CONTEXT 1 72 /* 73 * sofirq runtime info. 74 */ 75 struct osn_softirq { 76 u64 count; 77 u64 arrival_time; 78 u64 delta_start; 79 }; 80 81 /* 82 * thread runtime info. 83 */ 84 struct osn_thread { 85 u64 count; 86 u64 arrival_time; 87 u64 delta_start; 88 }; 89 90 /* 91 * Runtime information: this structure saves the runtime information used by 92 * one sampling thread. 93 */ 94 struct osnoise_variables { 95 struct task_struct *kthread; 96 bool sampling; 97 pid_t pid; 98 struct osn_nmi nmi; 99 struct osn_irq irq; 100 struct osn_softirq softirq; 101 struct osn_thread thread; 102 local_t int_counter; 103 }; 104 105 /* 106 * Per-cpu runtime information. 107 */ 108 DEFINE_PER_CPU(struct osnoise_variables, per_cpu_osnoise_var); 109 110 /* 111 * this_cpu_osn_var - Return the per-cpu osnoise_variables on its relative CPU 112 */ 113 static inline struct osnoise_variables *this_cpu_osn_var(void) 114 { 115 return this_cpu_ptr(&per_cpu_osnoise_var); 116 } 117 118 #ifdef CONFIG_TIMERLAT_TRACER 119 /* 120 * Runtime information for the timer mode. 121 */ 122 struct timerlat_variables { 123 struct task_struct *kthread; 124 struct hrtimer timer; 125 u64 rel_period; 126 u64 abs_period; 127 bool tracing_thread; 128 u64 count; 129 }; 130 131 DEFINE_PER_CPU(struct timerlat_variables, per_cpu_timerlat_var); 132 133 /* 134 * this_cpu_tmr_var - Return the per-cpu timerlat_variables on its relative CPU 135 */ 136 static inline struct timerlat_variables *this_cpu_tmr_var(void) 137 { 138 return this_cpu_ptr(&per_cpu_timerlat_var); 139 } 140 141 /* 142 * tlat_var_reset - Reset the values of the given timerlat_variables 143 */ 144 static inline void tlat_var_reset(void) 145 { 146 struct timerlat_variables *tlat_var; 147 int cpu; 148 /* 149 * So far, all the values are initialized as 0, so 150 * zeroing the structure is perfect. 151 */ 152 for_each_cpu(cpu, cpu_online_mask) { 153 tlat_var = per_cpu_ptr(&per_cpu_timerlat_var, cpu); 154 memset(tlat_var, 0, sizeof(*tlat_var)); 155 } 156 } 157 #else /* CONFIG_TIMERLAT_TRACER */ 158 #define tlat_var_reset() do {} while (0) 159 #endif /* CONFIG_TIMERLAT_TRACER */ 160 161 /* 162 * osn_var_reset - Reset the values of the given osnoise_variables 163 */ 164 static inline void osn_var_reset(void) 165 { 166 struct osnoise_variables *osn_var; 167 int cpu; 168 169 /* 170 * So far, all the values are initialized as 0, so 171 * zeroing the structure is perfect. 172 */ 173 for_each_cpu(cpu, cpu_online_mask) { 174 osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu); 175 memset(osn_var, 0, sizeof(*osn_var)); 176 } 177 } 178 179 /* 180 * osn_var_reset_all - Reset the value of all per-cpu osnoise_variables 181 */ 182 static inline void osn_var_reset_all(void) 183 { 184 osn_var_reset(); 185 tlat_var_reset(); 186 } 187 188 /* 189 * Tells NMIs to call back to the osnoise tracer to record timestamps. 190 */ 191 bool trace_osnoise_callback_enabled; 192 193 /* 194 * osnoise sample structure definition. Used to store the statistics of a 195 * sample run. 196 */ 197 struct osnoise_sample { 198 u64 runtime; /* runtime */ 199 u64 noise; /* noise */ 200 u64 max_sample; /* max single noise sample */ 201 int hw_count; /* # HW (incl. hypervisor) interference */ 202 int nmi_count; /* # NMIs during this sample */ 203 int irq_count; /* # IRQs during this sample */ 204 int softirq_count; /* # softirqs during this sample */ 205 int thread_count; /* # threads during this sample */ 206 }; 207 208 #ifdef CONFIG_TIMERLAT_TRACER 209 /* 210 * timerlat sample structure definition. Used to store the statistics of 211 * a sample run. 212 */ 213 struct timerlat_sample { 214 u64 timer_latency; /* timer_latency */ 215 unsigned int seqnum; /* unique sequence */ 216 int context; /* timer context */ 217 }; 218 #endif 219 220 /* 221 * Protect the interface. 222 */ 223 struct mutex interface_lock; 224 225 /* 226 * Tracer data. 227 */ 228 static struct osnoise_data { 229 u64 sample_period; /* total sampling period */ 230 u64 sample_runtime; /* active sampling portion of period */ 231 u64 stop_tracing; /* stop trace in the internal operation (loop/irq) */ 232 u64 stop_tracing_total; /* stop trace in the final operation (report/thread) */ 233 #ifdef CONFIG_TIMERLAT_TRACER 234 u64 timerlat_period; /* timerlat period */ 235 u64 print_stack; /* print IRQ stack if total > */ 236 int timerlat_tracer; /* timerlat tracer */ 237 #endif 238 bool tainted; /* infor users and developers about a problem */ 239 } osnoise_data = { 240 .sample_period = DEFAULT_SAMPLE_PERIOD, 241 .sample_runtime = DEFAULT_SAMPLE_RUNTIME, 242 .stop_tracing = 0, 243 .stop_tracing_total = 0, 244 #ifdef CONFIG_TIMERLAT_TRACER 245 .print_stack = 0, 246 .timerlat_period = DEFAULT_TIMERLAT_PERIOD, 247 .timerlat_tracer = 0, 248 #endif 249 }; 250 251 /* 252 * Boolean variable used to inform that the tracer is currently sampling. 253 */ 254 static bool osnoise_busy; 255 256 /* 257 * Print the osnoise header info. 258 */ 259 static void print_osnoise_headers(struct seq_file *s) 260 { 261 if (osnoise_data.tainted) 262 seq_puts(s, "# osnoise is tainted!\n"); 263 264 seq_puts(s, "# _-----=> irqs-off\n"); 265 seq_puts(s, "# / _----=> need-resched\n"); 266 seq_puts(s, "# | / _---=> hardirq/softirq\n"); 267 seq_puts(s, "# || / _--=> preempt-depth "); 268 seq_puts(s, " MAX\n"); 269 270 seq_puts(s, "# || / "); 271 seq_puts(s, " SINGLE Interference counters:\n"); 272 273 seq_puts(s, "# |||| RUNTIME "); 274 seq_puts(s, " NOISE %% OF CPU NOISE +-----------------------------+\n"); 275 276 seq_puts(s, "# TASK-PID CPU# |||| TIMESTAMP IN US "); 277 seq_puts(s, " IN US AVAILABLE IN US HW NMI IRQ SIRQ THREAD\n"); 278 279 seq_puts(s, "# | | | |||| | | "); 280 seq_puts(s, " | | | | | | | |\n"); 281 } 282 283 /* 284 * osnoise_taint - report an osnoise error. 285 */ 286 #define osnoise_taint(msg) ({ \ 287 struct trace_array *tr = osnoise_trace; \ 288 \ 289 trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_, msg); \ 290 osnoise_data.tainted = true; \ 291 }) 292 293 /* 294 * Record an osnoise_sample into the tracer buffer. 295 */ 296 static void trace_osnoise_sample(struct osnoise_sample *sample) 297 { 298 struct trace_array *tr = osnoise_trace; 299 struct trace_buffer *buffer = tr->array_buffer.buffer; 300 struct trace_event_call *call = &event_osnoise; 301 struct ring_buffer_event *event; 302 struct osnoise_entry *entry; 303 304 event = trace_buffer_lock_reserve(buffer, TRACE_OSNOISE, sizeof(*entry), 305 tracing_gen_ctx()); 306 if (!event) 307 return; 308 entry = ring_buffer_event_data(event); 309 entry->runtime = sample->runtime; 310 entry->noise = sample->noise; 311 entry->max_sample = sample->max_sample; 312 entry->hw_count = sample->hw_count; 313 entry->nmi_count = sample->nmi_count; 314 entry->irq_count = sample->irq_count; 315 entry->softirq_count = sample->softirq_count; 316 entry->thread_count = sample->thread_count; 317 318 if (!call_filter_check_discard(call, entry, buffer, event)) 319 trace_buffer_unlock_commit_nostack(buffer, event); 320 } 321 322 #ifdef CONFIG_TIMERLAT_TRACER 323 /* 324 * Print the timerlat header info. 325 */ 326 static void print_timerlat_headers(struct seq_file *s) 327 { 328 seq_puts(s, "# _-----=> irqs-off\n"); 329 seq_puts(s, "# / _----=> need-resched\n"); 330 seq_puts(s, "# | / _---=> hardirq/softirq\n"); 331 seq_puts(s, "# || / _--=> preempt-depth\n"); 332 seq_puts(s, "# || /\n"); 333 seq_puts(s, "# |||| ACTIVATION\n"); 334 seq_puts(s, "# TASK-PID CPU# |||| TIMESTAMP ID "); 335 seq_puts(s, " CONTEXT LATENCY\n"); 336 seq_puts(s, "# | | | |||| | | "); 337 seq_puts(s, " | |\n"); 338 } 339 340 /* 341 * Record an timerlat_sample into the tracer buffer. 342 */ 343 static void trace_timerlat_sample(struct timerlat_sample *sample) 344 { 345 struct trace_array *tr = osnoise_trace; 346 struct trace_event_call *call = &event_osnoise; 347 struct trace_buffer *buffer = tr->array_buffer.buffer; 348 struct ring_buffer_event *event; 349 struct timerlat_entry *entry; 350 351 event = trace_buffer_lock_reserve(buffer, TRACE_TIMERLAT, sizeof(*entry), 352 tracing_gen_ctx()); 353 if (!event) 354 return; 355 entry = ring_buffer_event_data(event); 356 entry->seqnum = sample->seqnum; 357 entry->context = sample->context; 358 entry->timer_latency = sample->timer_latency; 359 360 if (!call_filter_check_discard(call, entry, buffer, event)) 361 trace_buffer_unlock_commit_nostack(buffer, event); 362 } 363 364 #ifdef CONFIG_STACKTRACE 365 366 #define MAX_CALLS 256 367 368 /* 369 * Stack trace will take place only at IRQ level, so, no need 370 * to control nesting here. 371 */ 372 struct trace_stack { 373 int stack_size; 374 int nr_entries; 375 unsigned long calls[MAX_CALLS]; 376 }; 377 378 static DEFINE_PER_CPU(struct trace_stack, trace_stack); 379 380 /* 381 * timerlat_save_stack - save a stack trace without printing 382 * 383 * Save the current stack trace without printing. The 384 * stack will be printed later, after the end of the measurement. 385 */ 386 static void timerlat_save_stack(int skip) 387 { 388 unsigned int size, nr_entries; 389 struct trace_stack *fstack; 390 391 fstack = this_cpu_ptr(&trace_stack); 392 393 size = ARRAY_SIZE(fstack->calls); 394 395 nr_entries = stack_trace_save(fstack->calls, size, skip); 396 397 fstack->stack_size = nr_entries * sizeof(unsigned long); 398 fstack->nr_entries = nr_entries; 399 400 return; 401 402 } 403 /* 404 * timerlat_dump_stack - dump a stack trace previously saved 405 * 406 * Dump a saved stack trace into the trace buffer. 407 */ 408 static void timerlat_dump_stack(void) 409 { 410 struct trace_event_call *call = &event_osnoise; 411 struct trace_array *tr = osnoise_trace; 412 struct trace_buffer *buffer = tr->array_buffer.buffer; 413 struct ring_buffer_event *event; 414 struct trace_stack *fstack; 415 struct stack_entry *entry; 416 unsigned int size; 417 418 preempt_disable_notrace(); 419 fstack = this_cpu_ptr(&trace_stack); 420 size = fstack->stack_size; 421 422 event = trace_buffer_lock_reserve(buffer, TRACE_STACK, sizeof(*entry) + size, 423 tracing_gen_ctx()); 424 if (!event) 425 goto out; 426 427 entry = ring_buffer_event_data(event); 428 429 memcpy(&entry->caller, fstack->calls, size); 430 entry->size = fstack->nr_entries; 431 432 if (!call_filter_check_discard(call, entry, buffer, event)) 433 trace_buffer_unlock_commit_nostack(buffer, event); 434 435 out: 436 preempt_enable_notrace(); 437 } 438 #else 439 #define timerlat_dump_stack() do {} while (0) 440 #define timerlat_save_stack(a) do {} while (0) 441 #endif /* CONFIG_STACKTRACE */ 442 #endif /* CONFIG_TIMERLAT_TRACER */ 443 444 /* 445 * Macros to encapsulate the time capturing infrastructure. 446 */ 447 #define time_get() trace_clock_local() 448 #define time_to_us(x) div_u64(x, 1000) 449 #define time_sub(a, b) ((a) - (b)) 450 451 /* 452 * cond_move_irq_delta_start - Forward the delta_start of a running IRQ 453 * 454 * If an IRQ is preempted by an NMI, its delta_start is pushed forward 455 * to discount the NMI interference. 456 * 457 * See get_int_safe_duration(). 458 */ 459 static inline void 460 cond_move_irq_delta_start(struct osnoise_variables *osn_var, u64 duration) 461 { 462 if (osn_var->irq.delta_start) 463 osn_var->irq.delta_start += duration; 464 } 465 466 #ifndef CONFIG_PREEMPT_RT 467 /* 468 * cond_move_softirq_delta_start - Forward the delta_start of a running softirq. 469 * 470 * If a softirq is preempted by an IRQ or NMI, its delta_start is pushed 471 * forward to discount the interference. 472 * 473 * See get_int_safe_duration(). 474 */ 475 static inline void 476 cond_move_softirq_delta_start(struct osnoise_variables *osn_var, u64 duration) 477 { 478 if (osn_var->softirq.delta_start) 479 osn_var->softirq.delta_start += duration; 480 } 481 #else /* CONFIG_PREEMPT_RT */ 482 #define cond_move_softirq_delta_start(osn_var, duration) do {} while (0) 483 #endif 484 485 /* 486 * cond_move_thread_delta_start - Forward the delta_start of a running thread 487 * 488 * If a noisy thread is preempted by an softirq, IRQ or NMI, its delta_start 489 * is pushed forward to discount the interference. 490 * 491 * See get_int_safe_duration(). 492 */ 493 static inline void 494 cond_move_thread_delta_start(struct osnoise_variables *osn_var, u64 duration) 495 { 496 if (osn_var->thread.delta_start) 497 osn_var->thread.delta_start += duration; 498 } 499 500 /* 501 * get_int_safe_duration - Get the duration of a window 502 * 503 * The irq, softirq and thread varaibles need to have its duration without 504 * the interference from higher priority interrupts. Instead of keeping a 505 * variable to discount the interrupt interference from these variables, the 506 * starting time of these variables are pushed forward with the interrupt's 507 * duration. In this way, a single variable is used to: 508 * 509 * - Know if a given window is being measured. 510 * - Account its duration. 511 * - Discount the interference. 512 * 513 * To avoid getting inconsistent values, e.g.,: 514 * 515 * now = time_get() 516 * ---> interrupt! 517 * delta_start -= int duration; 518 * <--- 519 * duration = now - delta_start; 520 * 521 * result: negative duration if the variable duration before the 522 * interrupt was smaller than the interrupt execution. 523 * 524 * A counter of interrupts is used. If the counter increased, try 525 * to capture an interference safe duration. 526 */ 527 static inline s64 528 get_int_safe_duration(struct osnoise_variables *osn_var, u64 *delta_start) 529 { 530 u64 int_counter, now; 531 s64 duration; 532 533 do { 534 int_counter = local_read(&osn_var->int_counter); 535 /* synchronize with interrupts */ 536 barrier(); 537 538 now = time_get(); 539 duration = (now - *delta_start); 540 541 /* synchronize with interrupts */ 542 barrier(); 543 } while (int_counter != local_read(&osn_var->int_counter)); 544 545 /* 546 * This is an evidence of race conditions that cause 547 * a value to be "discounted" too much. 548 */ 549 if (duration < 0) 550 osnoise_taint("Negative duration!\n"); 551 552 *delta_start = 0; 553 554 return duration; 555 } 556 557 /* 558 * 559 * set_int_safe_time - Save the current time on *time, aware of interference 560 * 561 * Get the time, taking into consideration a possible interference from 562 * higher priority interrupts. 563 * 564 * See get_int_safe_duration() for an explanation. 565 */ 566 static u64 567 set_int_safe_time(struct osnoise_variables *osn_var, u64 *time) 568 { 569 u64 int_counter; 570 571 do { 572 int_counter = local_read(&osn_var->int_counter); 573 /* synchronize with interrupts */ 574 barrier(); 575 576 *time = time_get(); 577 578 /* synchronize with interrupts */ 579 barrier(); 580 } while (int_counter != local_read(&osn_var->int_counter)); 581 582 return int_counter; 583 } 584 585 #ifdef CONFIG_TIMERLAT_TRACER 586 /* 587 * copy_int_safe_time - Copy *src into *desc aware of interference 588 */ 589 static u64 590 copy_int_safe_time(struct osnoise_variables *osn_var, u64 *dst, u64 *src) 591 { 592 u64 int_counter; 593 594 do { 595 int_counter = local_read(&osn_var->int_counter); 596 /* synchronize with interrupts */ 597 barrier(); 598 599 *dst = *src; 600 601 /* synchronize with interrupts */ 602 barrier(); 603 } while (int_counter != local_read(&osn_var->int_counter)); 604 605 return int_counter; 606 } 607 #endif /* CONFIG_TIMERLAT_TRACER */ 608 609 /* 610 * trace_osnoise_callback - NMI entry/exit callback 611 * 612 * This function is called at the entry and exit NMI code. The bool enter 613 * distinguishes between either case. This function is used to note a NMI 614 * occurrence, compute the noise caused by the NMI, and to remove the noise 615 * it is potentially causing on other interference variables. 616 */ 617 void trace_osnoise_callback(bool enter) 618 { 619 struct osnoise_variables *osn_var = this_cpu_osn_var(); 620 u64 duration; 621 622 if (!osn_var->sampling) 623 return; 624 625 /* 626 * Currently trace_clock_local() calls sched_clock() and the 627 * generic version is not NMI safe. 628 */ 629 if (!IS_ENABLED(CONFIG_GENERIC_SCHED_CLOCK)) { 630 if (enter) { 631 osn_var->nmi.delta_start = time_get(); 632 local_inc(&osn_var->int_counter); 633 } else { 634 duration = time_get() - osn_var->nmi.delta_start; 635 636 trace_nmi_noise(osn_var->nmi.delta_start, duration); 637 638 cond_move_irq_delta_start(osn_var, duration); 639 cond_move_softirq_delta_start(osn_var, duration); 640 cond_move_thread_delta_start(osn_var, duration); 641 } 642 } 643 644 if (enter) 645 osn_var->nmi.count++; 646 } 647 648 /* 649 * osnoise_trace_irq_entry - Note the starting of an IRQ 650 * 651 * Save the starting time of an IRQ. As IRQs are non-preemptive to other IRQs, 652 * it is safe to use a single variable (ons_var->irq) to save the statistics. 653 * The arrival_time is used to report... the arrival time. The delta_start 654 * is used to compute the duration at the IRQ exit handler. See 655 * cond_move_irq_delta_start(). 656 */ 657 void osnoise_trace_irq_entry(int id) 658 { 659 struct osnoise_variables *osn_var = this_cpu_osn_var(); 660 661 if (!osn_var->sampling) 662 return; 663 /* 664 * This value will be used in the report, but not to compute 665 * the execution time, so it is safe to get it unsafe. 666 */ 667 osn_var->irq.arrival_time = time_get(); 668 set_int_safe_time(osn_var, &osn_var->irq.delta_start); 669 osn_var->irq.count++; 670 671 local_inc(&osn_var->int_counter); 672 } 673 674 /* 675 * osnoise_irq_exit - Note the end of an IRQ, sava data and trace 676 * 677 * Computes the duration of the IRQ noise, and trace it. Also discounts the 678 * interference from other sources of noise could be currently being accounted. 679 */ 680 void osnoise_trace_irq_exit(int id, const char *desc) 681 { 682 struct osnoise_variables *osn_var = this_cpu_osn_var(); 683 int duration; 684 685 if (!osn_var->sampling) 686 return; 687 688 duration = get_int_safe_duration(osn_var, &osn_var->irq.delta_start); 689 trace_irq_noise(id, desc, osn_var->irq.arrival_time, duration); 690 osn_var->irq.arrival_time = 0; 691 cond_move_softirq_delta_start(osn_var, duration); 692 cond_move_thread_delta_start(osn_var, duration); 693 } 694 695 /* 696 * trace_irqentry_callback - Callback to the irq:irq_entry traceevent 697 * 698 * Used to note the starting of an IRQ occurece. 699 */ 700 static void trace_irqentry_callback(void *data, int irq, 701 struct irqaction *action) 702 { 703 osnoise_trace_irq_entry(irq); 704 } 705 706 /* 707 * trace_irqexit_callback - Callback to the irq:irq_exit traceevent 708 * 709 * Used to note the end of an IRQ occurece. 710 */ 711 static void trace_irqexit_callback(void *data, int irq, 712 struct irqaction *action, int ret) 713 { 714 osnoise_trace_irq_exit(irq, action->name); 715 } 716 717 /* 718 * arch specific register function. 719 */ 720 int __weak osnoise_arch_register(void) 721 { 722 return 0; 723 } 724 725 /* 726 * arch specific unregister function. 727 */ 728 void __weak osnoise_arch_unregister(void) 729 { 730 return; 731 } 732 733 /* 734 * hook_irq_events - Hook IRQ handling events 735 * 736 * This function hooks the IRQ related callbacks to the respective trace 737 * events. 738 */ 739 static int hook_irq_events(void) 740 { 741 int ret; 742 743 ret = register_trace_irq_handler_entry(trace_irqentry_callback, NULL); 744 if (ret) 745 goto out_err; 746 747 ret = register_trace_irq_handler_exit(trace_irqexit_callback, NULL); 748 if (ret) 749 goto out_unregister_entry; 750 751 ret = osnoise_arch_register(); 752 if (ret) 753 goto out_irq_exit; 754 755 return 0; 756 757 out_irq_exit: 758 unregister_trace_irq_handler_exit(trace_irqexit_callback, NULL); 759 out_unregister_entry: 760 unregister_trace_irq_handler_entry(trace_irqentry_callback, NULL); 761 out_err: 762 return -EINVAL; 763 } 764 765 /* 766 * unhook_irq_events - Unhook IRQ handling events 767 * 768 * This function unhooks the IRQ related callbacks to the respective trace 769 * events. 770 */ 771 static void unhook_irq_events(void) 772 { 773 osnoise_arch_unregister(); 774 unregister_trace_irq_handler_exit(trace_irqexit_callback, NULL); 775 unregister_trace_irq_handler_entry(trace_irqentry_callback, NULL); 776 } 777 778 #ifndef CONFIG_PREEMPT_RT 779 /* 780 * trace_softirq_entry_callback - Note the starting of a softirq 781 * 782 * Save the starting time of a softirq. As softirqs are non-preemptive to 783 * other softirqs, it is safe to use a single variable (ons_var->softirq) 784 * to save the statistics. The arrival_time is used to report... the 785 * arrival time. The delta_start is used to compute the duration at the 786 * softirq exit handler. See cond_move_softirq_delta_start(). 787 */ 788 static void trace_softirq_entry_callback(void *data, unsigned int vec_nr) 789 { 790 struct osnoise_variables *osn_var = this_cpu_osn_var(); 791 792 if (!osn_var->sampling) 793 return; 794 /* 795 * This value will be used in the report, but not to compute 796 * the execution time, so it is safe to get it unsafe. 797 */ 798 osn_var->softirq.arrival_time = time_get(); 799 set_int_safe_time(osn_var, &osn_var->softirq.delta_start); 800 osn_var->softirq.count++; 801 802 local_inc(&osn_var->int_counter); 803 } 804 805 /* 806 * trace_softirq_exit_callback - Note the end of an softirq 807 * 808 * Computes the duration of the softirq noise, and trace it. Also discounts the 809 * interference from other sources of noise could be currently being accounted. 810 */ 811 static void trace_softirq_exit_callback(void *data, unsigned int vec_nr) 812 { 813 struct osnoise_variables *osn_var = this_cpu_osn_var(); 814 int duration; 815 816 if (!osn_var->sampling) 817 return; 818 819 #ifdef CONFIG_TIMERLAT_TRACER 820 /* 821 * If the timerlat is enabled, but the irq handler did 822 * not run yet enabling timerlat_tracer, do not trace. 823 */ 824 if (unlikely(osnoise_data.timerlat_tracer)) { 825 struct timerlat_variables *tlat_var; 826 tlat_var = this_cpu_tmr_var(); 827 if (!tlat_var->tracing_thread) { 828 osn_var->softirq.arrival_time = 0; 829 osn_var->softirq.delta_start = 0; 830 return; 831 } 832 } 833 #endif 834 835 duration = get_int_safe_duration(osn_var, &osn_var->softirq.delta_start); 836 trace_softirq_noise(vec_nr, osn_var->softirq.arrival_time, duration); 837 cond_move_thread_delta_start(osn_var, duration); 838 osn_var->softirq.arrival_time = 0; 839 } 840 841 /* 842 * hook_softirq_events - Hook softirq handling events 843 * 844 * This function hooks the softirq related callbacks to the respective trace 845 * events. 846 */ 847 static int hook_softirq_events(void) 848 { 849 int ret; 850 851 ret = register_trace_softirq_entry(trace_softirq_entry_callback, NULL); 852 if (ret) 853 goto out_err; 854 855 ret = register_trace_softirq_exit(trace_softirq_exit_callback, NULL); 856 if (ret) 857 goto out_unreg_entry; 858 859 return 0; 860 861 out_unreg_entry: 862 unregister_trace_softirq_entry(trace_softirq_entry_callback, NULL); 863 out_err: 864 return -EINVAL; 865 } 866 867 /* 868 * unhook_softirq_events - Unhook softirq handling events 869 * 870 * This function hooks the softirq related callbacks to the respective trace 871 * events. 872 */ 873 static void unhook_softirq_events(void) 874 { 875 unregister_trace_softirq_entry(trace_softirq_entry_callback, NULL); 876 unregister_trace_softirq_exit(trace_softirq_exit_callback, NULL); 877 } 878 #else /* CONFIG_PREEMPT_RT */ 879 /* 880 * softirq are threads on the PREEMPT_RT mode. 881 */ 882 static int hook_softirq_events(void) 883 { 884 return 0; 885 } 886 static void unhook_softirq_events(void) 887 { 888 } 889 #endif 890 891 /* 892 * thread_entry - Record the starting of a thread noise window 893 * 894 * It saves the context switch time for a noisy thread, and increments 895 * the interference counters. 896 */ 897 static void 898 thread_entry(struct osnoise_variables *osn_var, struct task_struct *t) 899 { 900 if (!osn_var->sampling) 901 return; 902 /* 903 * The arrival time will be used in the report, but not to compute 904 * the execution time, so it is safe to get it unsafe. 905 */ 906 osn_var->thread.arrival_time = time_get(); 907 908 set_int_safe_time(osn_var, &osn_var->thread.delta_start); 909 910 osn_var->thread.count++; 911 local_inc(&osn_var->int_counter); 912 } 913 914 /* 915 * thread_exit - Report the end of a thread noise window 916 * 917 * It computes the total noise from a thread, tracing if needed. 918 */ 919 static void 920 thread_exit(struct osnoise_variables *osn_var, struct task_struct *t) 921 { 922 int duration; 923 924 if (!osn_var->sampling) 925 return; 926 927 #ifdef CONFIG_TIMERLAT_TRACER 928 if (osnoise_data.timerlat_tracer) { 929 struct timerlat_variables *tlat_var; 930 tlat_var = this_cpu_tmr_var(); 931 if (!tlat_var->tracing_thread) { 932 osn_var->thread.delta_start = 0; 933 osn_var->thread.arrival_time = 0; 934 return; 935 } 936 } 937 #endif 938 939 duration = get_int_safe_duration(osn_var, &osn_var->thread.delta_start); 940 941 trace_thread_noise(t, osn_var->thread.arrival_time, duration); 942 943 osn_var->thread.arrival_time = 0; 944 } 945 946 /* 947 * trace_sched_switch - sched:sched_switch trace event handler 948 * 949 * This function is hooked to the sched:sched_switch trace event, and it is 950 * used to record the beginning and to report the end of a thread noise window. 951 */ 952 static void 953 trace_sched_switch_callback(void *data, bool preempt, struct task_struct *p, 954 struct task_struct *n) 955 { 956 struct osnoise_variables *osn_var = this_cpu_osn_var(); 957 958 if (p->pid != osn_var->pid) 959 thread_exit(osn_var, p); 960 961 if (n->pid != osn_var->pid) 962 thread_entry(osn_var, n); 963 } 964 965 /* 966 * hook_thread_events - Hook the insturmentation for thread noise 967 * 968 * Hook the osnoise tracer callbacks to handle the noise from other 969 * threads on the necessary kernel events. 970 */ 971 static int hook_thread_events(void) 972 { 973 int ret; 974 975 ret = register_trace_sched_switch(trace_sched_switch_callback, NULL); 976 if (ret) 977 return -EINVAL; 978 979 return 0; 980 } 981 982 /* 983 * unhook_thread_events - *nhook the insturmentation for thread noise 984 * 985 * Unook the osnoise tracer callbacks to handle the noise from other 986 * threads on the necessary kernel events. 987 */ 988 static void unhook_thread_events(void) 989 { 990 unregister_trace_sched_switch(trace_sched_switch_callback, NULL); 991 } 992 993 /* 994 * save_osn_sample_stats - Save the osnoise_sample statistics 995 * 996 * Save the osnoise_sample statistics before the sampling phase. These 997 * values will be used later to compute the diff betwneen the statistics 998 * before and after the osnoise sampling. 999 */ 1000 static void 1001 save_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample *s) 1002 { 1003 s->nmi_count = osn_var->nmi.count; 1004 s->irq_count = osn_var->irq.count; 1005 s->softirq_count = osn_var->softirq.count; 1006 s->thread_count = osn_var->thread.count; 1007 } 1008 1009 /* 1010 * diff_osn_sample_stats - Compute the osnoise_sample statistics 1011 * 1012 * After a sample period, compute the difference on the osnoise_sample 1013 * statistics. The struct osnoise_sample *s contains the statistics saved via 1014 * save_osn_sample_stats() before the osnoise sampling. 1015 */ 1016 static void 1017 diff_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample *s) 1018 { 1019 s->nmi_count = osn_var->nmi.count - s->nmi_count; 1020 s->irq_count = osn_var->irq.count - s->irq_count; 1021 s->softirq_count = osn_var->softirq.count - s->softirq_count; 1022 s->thread_count = osn_var->thread.count - s->thread_count; 1023 } 1024 1025 /* 1026 * osnoise_stop_tracing - Stop tracing and the tracer. 1027 */ 1028 static void osnoise_stop_tracing(void) 1029 { 1030 struct trace_array *tr = osnoise_trace; 1031 tracer_tracing_off(tr); 1032 } 1033 1034 /* 1035 * run_osnoise - Sample the time and look for osnoise 1036 * 1037 * Used to capture the time, looking for potential osnoise latency repeatedly. 1038 * Different from hwlat_detector, it is called with preemption and interrupts 1039 * enabled. This allows irqs, softirqs and threads to run, interfering on the 1040 * osnoise sampling thread, as they would do with a regular thread. 1041 */ 1042 static int run_osnoise(void) 1043 { 1044 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1045 struct trace_array *tr = osnoise_trace; 1046 u64 start, sample, last_sample; 1047 u64 last_int_count, int_count; 1048 s64 noise = 0, max_noise = 0; 1049 s64 total, last_total = 0; 1050 struct osnoise_sample s; 1051 unsigned int threshold; 1052 u64 runtime, stop_in; 1053 u64 sum_noise = 0; 1054 int hw_count = 0; 1055 int ret = -1; 1056 1057 /* 1058 * Considers the current thread as the workload. 1059 */ 1060 osn_var->pid = current->pid; 1061 1062 /* 1063 * Save the current stats for the diff 1064 */ 1065 save_osn_sample_stats(osn_var, &s); 1066 1067 /* 1068 * if threshold is 0, use the default value of 5 us. 1069 */ 1070 threshold = tracing_thresh ? : 5000; 1071 1072 /* 1073 * Make sure NMIs see sampling first 1074 */ 1075 osn_var->sampling = true; 1076 barrier(); 1077 1078 /* 1079 * Transform the *_us config to nanoseconds to avoid the 1080 * division on the main loop. 1081 */ 1082 runtime = osnoise_data.sample_runtime * NSEC_PER_USEC; 1083 stop_in = osnoise_data.stop_tracing * NSEC_PER_USEC; 1084 1085 /* 1086 * Start timestemp 1087 */ 1088 start = time_get(); 1089 1090 /* 1091 * "previous" loop. 1092 */ 1093 last_int_count = set_int_safe_time(osn_var, &last_sample); 1094 1095 do { 1096 /* 1097 * Get sample! 1098 */ 1099 int_count = set_int_safe_time(osn_var, &sample); 1100 1101 noise = time_sub(sample, last_sample); 1102 1103 /* 1104 * This shouldn't happen. 1105 */ 1106 if (noise < 0) { 1107 osnoise_taint("negative noise!"); 1108 goto out; 1109 } 1110 1111 /* 1112 * Sample runtime. 1113 */ 1114 total = time_sub(sample, start); 1115 1116 /* 1117 * Check for possible overflows. 1118 */ 1119 if (total < last_total) { 1120 osnoise_taint("total overflow!"); 1121 break; 1122 } 1123 1124 last_total = total; 1125 1126 if (noise >= threshold) { 1127 int interference = int_count - last_int_count; 1128 1129 if (noise > max_noise) 1130 max_noise = noise; 1131 1132 if (!interference) 1133 hw_count++; 1134 1135 sum_noise += noise; 1136 1137 trace_sample_threshold(last_sample, noise, interference); 1138 1139 if (osnoise_data.stop_tracing) 1140 if (noise > stop_in) 1141 osnoise_stop_tracing(); 1142 } 1143 1144 /* 1145 * For the non-preemptive kernel config: let threads runs, if 1146 * they so wish. 1147 */ 1148 cond_resched(); 1149 1150 last_sample = sample; 1151 last_int_count = int_count; 1152 1153 } while (total < runtime && !kthread_should_stop()); 1154 1155 /* 1156 * Finish the above in the view for interrupts. 1157 */ 1158 barrier(); 1159 1160 osn_var->sampling = false; 1161 1162 /* 1163 * Make sure sampling data is no longer updated. 1164 */ 1165 barrier(); 1166 1167 /* 1168 * Save noise info. 1169 */ 1170 s.noise = time_to_us(sum_noise); 1171 s.runtime = time_to_us(total); 1172 s.max_sample = time_to_us(max_noise); 1173 s.hw_count = hw_count; 1174 1175 /* Save interference stats info */ 1176 diff_osn_sample_stats(osn_var, &s); 1177 1178 trace_osnoise_sample(&s); 1179 1180 /* Keep a running maximum ever recorded osnoise "latency" */ 1181 if (max_noise > tr->max_latency) { 1182 tr->max_latency = max_noise; 1183 latency_fsnotify(tr); 1184 } 1185 1186 if (osnoise_data.stop_tracing_total) 1187 if (s.noise > osnoise_data.stop_tracing_total) 1188 osnoise_stop_tracing(); 1189 1190 return 0; 1191 out: 1192 return ret; 1193 } 1194 1195 static struct cpumask osnoise_cpumask; 1196 static struct cpumask save_cpumask; 1197 1198 /* 1199 * osnoise_main - The osnoise detection kernel thread 1200 * 1201 * Calls run_osnoise() function to measure the osnoise for the configured runtime, 1202 * every period. 1203 */ 1204 static int osnoise_main(void *data) 1205 { 1206 u64 interval; 1207 1208 while (!kthread_should_stop()) { 1209 1210 run_osnoise(); 1211 1212 mutex_lock(&interface_lock); 1213 interval = osnoise_data.sample_period - osnoise_data.sample_runtime; 1214 mutex_unlock(&interface_lock); 1215 1216 do_div(interval, USEC_PER_MSEC); 1217 1218 /* 1219 * differently from hwlat_detector, the osnoise tracer can run 1220 * without a pause because preemption is on. 1221 */ 1222 if (interval < 1) { 1223 /* Let synchronize_rcu_tasks() make progress */ 1224 cond_resched_tasks_rcu_qs(); 1225 continue; 1226 } 1227 1228 if (msleep_interruptible(interval)) 1229 break; 1230 } 1231 1232 return 0; 1233 } 1234 1235 #ifdef CONFIG_TIMERLAT_TRACER 1236 /* 1237 * timerlat_irq - hrtimer handler for timerlat. 1238 */ 1239 static enum hrtimer_restart timerlat_irq(struct hrtimer *timer) 1240 { 1241 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1242 struct trace_array *tr = osnoise_trace; 1243 struct timerlat_variables *tlat; 1244 struct timerlat_sample s; 1245 u64 now; 1246 u64 diff; 1247 1248 /* 1249 * I am not sure if the timer was armed for this CPU. So, get 1250 * the timerlat struct from the timer itself, not from this 1251 * CPU. 1252 */ 1253 tlat = container_of(timer, struct timerlat_variables, timer); 1254 1255 now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer)); 1256 1257 /* 1258 * Enable the osnoise: events for thread an softirq. 1259 */ 1260 tlat->tracing_thread = true; 1261 1262 osn_var->thread.arrival_time = time_get(); 1263 1264 /* 1265 * A hardirq is running: the timer IRQ. It is for sure preempting 1266 * a thread, and potentially preempting a softirq. 1267 * 1268 * At this point, it is not interesting to know the duration of the 1269 * preempted thread (and maybe softirq), but how much time they will 1270 * delay the beginning of the execution of the timer thread. 1271 * 1272 * To get the correct (net) delay added by the softirq, its delta_start 1273 * is set as the IRQ one. In this way, at the return of the IRQ, the delta 1274 * start of the sofitrq will be zeroed, accounting then only the time 1275 * after that. 1276 * 1277 * The thread follows the same principle. However, if a softirq is 1278 * running, the thread needs to receive the softirq delta_start. The 1279 * reason being is that the softirq will be the last to be unfolded, 1280 * resseting the thread delay to zero. 1281 */ 1282 #ifndef CONFIG_PREEMPT_RT 1283 if (osn_var->softirq.delta_start) { 1284 copy_int_safe_time(osn_var, &osn_var->thread.delta_start, 1285 &osn_var->softirq.delta_start); 1286 1287 copy_int_safe_time(osn_var, &osn_var->softirq.delta_start, 1288 &osn_var->irq.delta_start); 1289 } else { 1290 copy_int_safe_time(osn_var, &osn_var->thread.delta_start, 1291 &osn_var->irq.delta_start); 1292 } 1293 #else /* CONFIG_PREEMPT_RT */ 1294 /* 1295 * The sofirqs run as threads on RT, so there is not need 1296 * to keep track of it. 1297 */ 1298 copy_int_safe_time(osn_var, &osn_var->thread.delta_start, &osn_var->irq.delta_start); 1299 #endif /* CONFIG_PREEMPT_RT */ 1300 1301 /* 1302 * Compute the current time with the expected time. 1303 */ 1304 diff = now - tlat->abs_period; 1305 1306 tlat->count++; 1307 s.seqnum = tlat->count; 1308 s.timer_latency = diff; 1309 s.context = IRQ_CONTEXT; 1310 1311 trace_timerlat_sample(&s); 1312 1313 /* Keep a running maximum ever recorded os noise "latency" */ 1314 if (diff > tr->max_latency) { 1315 tr->max_latency = diff; 1316 latency_fsnotify(tr); 1317 } 1318 1319 if (osnoise_data.stop_tracing) 1320 if (time_to_us(diff) >= osnoise_data.stop_tracing) 1321 osnoise_stop_tracing(); 1322 1323 wake_up_process(tlat->kthread); 1324 1325 if (osnoise_data.print_stack) 1326 timerlat_save_stack(0); 1327 1328 return HRTIMER_NORESTART; 1329 } 1330 1331 /* 1332 * wait_next_period - Wait for the next period for timerlat 1333 */ 1334 static int wait_next_period(struct timerlat_variables *tlat) 1335 { 1336 ktime_t next_abs_period, now; 1337 u64 rel_period = osnoise_data.timerlat_period * 1000; 1338 1339 now = hrtimer_cb_get_time(&tlat->timer); 1340 next_abs_period = ns_to_ktime(tlat->abs_period + rel_period); 1341 1342 /* 1343 * Save the next abs_period. 1344 */ 1345 tlat->abs_period = (u64) ktime_to_ns(next_abs_period); 1346 1347 /* 1348 * If the new abs_period is in the past, skip the activation. 1349 */ 1350 while (ktime_compare(now, next_abs_period) > 0) { 1351 next_abs_period = ns_to_ktime(tlat->abs_period + rel_period); 1352 tlat->abs_period = (u64) ktime_to_ns(next_abs_period); 1353 } 1354 1355 set_current_state(TASK_INTERRUPTIBLE); 1356 1357 hrtimer_start(&tlat->timer, next_abs_period, HRTIMER_MODE_ABS_PINNED_HARD); 1358 schedule(); 1359 return 1; 1360 } 1361 1362 /* 1363 * timerlat_main- Timerlat main 1364 */ 1365 static int timerlat_main(void *data) 1366 { 1367 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1368 struct timerlat_variables *tlat = this_cpu_tmr_var(); 1369 struct timerlat_sample s; 1370 struct sched_param sp; 1371 u64 now, diff; 1372 1373 /* 1374 * Make the thread RT, that is how cyclictest is usually used. 1375 */ 1376 sp.sched_priority = DEFAULT_TIMERLAT_PRIO; 1377 sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); 1378 1379 tlat->count = 0; 1380 tlat->tracing_thread = false; 1381 1382 hrtimer_init(&tlat->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD); 1383 tlat->timer.function = timerlat_irq; 1384 tlat->kthread = current; 1385 osn_var->pid = current->pid; 1386 /* 1387 * Anotate the arrival time. 1388 */ 1389 tlat->abs_period = hrtimer_cb_get_time(&tlat->timer); 1390 1391 wait_next_period(tlat); 1392 1393 osn_var->sampling = 1; 1394 1395 while (!kthread_should_stop()) { 1396 now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer)); 1397 diff = now - tlat->abs_period; 1398 1399 s.seqnum = tlat->count; 1400 s.timer_latency = diff; 1401 s.context = THREAD_CONTEXT; 1402 1403 trace_timerlat_sample(&s); 1404 1405 #ifdef CONFIG_STACKTRACE 1406 if (osnoise_data.print_stack) 1407 if (osnoise_data.print_stack <= time_to_us(diff)) 1408 timerlat_dump_stack(); 1409 #endif /* CONFIG_STACKTRACE */ 1410 1411 tlat->tracing_thread = false; 1412 if (osnoise_data.stop_tracing_total) 1413 if (time_to_us(diff) >= osnoise_data.stop_tracing_total) 1414 osnoise_stop_tracing(); 1415 1416 wait_next_period(tlat); 1417 } 1418 1419 hrtimer_cancel(&tlat->timer); 1420 return 0; 1421 } 1422 #endif /* CONFIG_TIMERLAT_TRACER */ 1423 1424 /* 1425 * stop_kthread - stop a workload thread 1426 */ 1427 static void stop_kthread(unsigned int cpu) 1428 { 1429 struct task_struct *kthread; 1430 1431 kthread = per_cpu(per_cpu_osnoise_var, cpu).kthread; 1432 if (kthread) 1433 kthread_stop(kthread); 1434 per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL; 1435 } 1436 1437 /* 1438 * stop_per_cpu_kthread - Stop per-cpu threads 1439 * 1440 * Stop the osnoise sampling htread. Use this on unload and at system 1441 * shutdown. 1442 */ 1443 static void stop_per_cpu_kthreads(void) 1444 { 1445 int cpu; 1446 1447 get_online_cpus(); 1448 1449 for_each_online_cpu(cpu) 1450 stop_kthread(cpu); 1451 1452 put_online_cpus(); 1453 } 1454 1455 /* 1456 * start_kthread - Start a workload tread 1457 */ 1458 static int start_kthread(unsigned int cpu) 1459 { 1460 struct task_struct *kthread; 1461 void *main = osnoise_main; 1462 char comm[24]; 1463 1464 #ifdef CONFIG_TIMERLAT_TRACER 1465 if (osnoise_data.timerlat_tracer) { 1466 snprintf(comm, 24, "timerlat/%d", cpu); 1467 main = timerlat_main; 1468 } else { 1469 snprintf(comm, 24, "osnoise/%d", cpu); 1470 } 1471 #else 1472 snprintf(comm, 24, "osnoise/%d", cpu); 1473 #endif 1474 kthread = kthread_create_on_cpu(main, NULL, cpu, comm); 1475 1476 if (IS_ERR(kthread)) { 1477 pr_err(BANNER "could not start sampling thread\n"); 1478 stop_per_cpu_kthreads(); 1479 return -ENOMEM; 1480 } 1481 1482 per_cpu(per_cpu_osnoise_var, cpu).kthread = kthread; 1483 wake_up_process(kthread); 1484 1485 return 0; 1486 } 1487 1488 /* 1489 * start_per_cpu_kthread - Kick off per-cpu osnoise sampling kthreads 1490 * 1491 * This starts the kernel thread that will look for osnoise on many 1492 * cpus. 1493 */ 1494 static int start_per_cpu_kthreads(struct trace_array *tr) 1495 { 1496 struct cpumask *current_mask = &save_cpumask; 1497 int retval; 1498 int cpu; 1499 1500 get_online_cpus(); 1501 /* 1502 * Run only on CPUs in which trace and osnoise are allowed to run. 1503 */ 1504 cpumask_and(current_mask, tr->tracing_cpumask, &osnoise_cpumask); 1505 /* 1506 * And the CPU is online. 1507 */ 1508 cpumask_and(current_mask, cpu_online_mask, current_mask); 1509 1510 for_each_possible_cpu(cpu) 1511 per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL; 1512 1513 for_each_cpu(cpu, current_mask) { 1514 retval = start_kthread(cpu); 1515 if (retval) { 1516 stop_per_cpu_kthreads(); 1517 return retval; 1518 } 1519 } 1520 1521 put_online_cpus(); 1522 1523 return 0; 1524 } 1525 1526 #ifdef CONFIG_HOTPLUG_CPU 1527 static void osnoise_hotplug_workfn(struct work_struct *dummy) 1528 { 1529 struct trace_array *tr = osnoise_trace; 1530 unsigned int cpu = smp_processor_id(); 1531 1532 1533 mutex_lock(&trace_types_lock); 1534 1535 if (!osnoise_busy) 1536 goto out_unlock_trace; 1537 1538 mutex_lock(&interface_lock); 1539 get_online_cpus(); 1540 1541 if (!cpumask_test_cpu(cpu, &osnoise_cpumask)) 1542 goto out_unlock; 1543 1544 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask)) 1545 goto out_unlock; 1546 1547 start_kthread(cpu); 1548 1549 out_unlock: 1550 put_online_cpus(); 1551 mutex_unlock(&interface_lock); 1552 out_unlock_trace: 1553 mutex_unlock(&trace_types_lock); 1554 } 1555 1556 static DECLARE_WORK(osnoise_hotplug_work, osnoise_hotplug_workfn); 1557 1558 /* 1559 * osnoise_cpu_init - CPU hotplug online callback function 1560 */ 1561 static int osnoise_cpu_init(unsigned int cpu) 1562 { 1563 schedule_work_on(cpu, &osnoise_hotplug_work); 1564 return 0; 1565 } 1566 1567 /* 1568 * osnoise_cpu_die - CPU hotplug offline callback function 1569 */ 1570 static int osnoise_cpu_die(unsigned int cpu) 1571 { 1572 stop_kthread(cpu); 1573 return 0; 1574 } 1575 1576 static void osnoise_init_hotplug_support(void) 1577 { 1578 int ret; 1579 1580 ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "trace/osnoise:online", 1581 osnoise_cpu_init, osnoise_cpu_die); 1582 if (ret < 0) 1583 pr_warn(BANNER "Error to init cpu hotplug support\n"); 1584 1585 return; 1586 } 1587 #else /* CONFIG_HOTPLUG_CPU */ 1588 static void osnoise_init_hotplug_support(void) 1589 { 1590 return; 1591 } 1592 #endif /* CONFIG_HOTPLUG_CPU */ 1593 1594 /* 1595 * osnoise_cpus_read - Read function for reading the "cpus" file 1596 * @filp: The active open file structure 1597 * @ubuf: The userspace provided buffer to read value into 1598 * @cnt: The maximum number of bytes to read 1599 * @ppos: The current "file" position 1600 * 1601 * Prints the "cpus" output into the user-provided buffer. 1602 */ 1603 static ssize_t 1604 osnoise_cpus_read(struct file *filp, char __user *ubuf, size_t count, 1605 loff_t *ppos) 1606 { 1607 char *mask_str; 1608 int len; 1609 1610 mutex_lock(&interface_lock); 1611 1612 len = snprintf(NULL, 0, "%*pbl\n", cpumask_pr_args(&osnoise_cpumask)) + 1; 1613 mask_str = kmalloc(len, GFP_KERNEL); 1614 if (!mask_str) { 1615 count = -ENOMEM; 1616 goto out_unlock; 1617 } 1618 1619 len = snprintf(mask_str, len, "%*pbl\n", cpumask_pr_args(&osnoise_cpumask)); 1620 if (len >= count) { 1621 count = -EINVAL; 1622 goto out_free; 1623 } 1624 1625 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len); 1626 1627 out_free: 1628 kfree(mask_str); 1629 out_unlock: 1630 mutex_unlock(&interface_lock); 1631 1632 return count; 1633 } 1634 1635 static void osnoise_tracer_start(struct trace_array *tr); 1636 static void osnoise_tracer_stop(struct trace_array *tr); 1637 1638 /* 1639 * osnoise_cpus_write - Write function for "cpus" entry 1640 * @filp: The active open file structure 1641 * @ubuf: The user buffer that contains the value to write 1642 * @cnt: The maximum number of bytes to write to "file" 1643 * @ppos: The current position in @file 1644 * 1645 * This function provides a write implementation for the "cpus" 1646 * interface to the osnoise trace. By default, it lists all CPUs, 1647 * in this way, allowing osnoise threads to run on any online CPU 1648 * of the system. It serves to restrict the execution of osnoise to the 1649 * set of CPUs writing via this interface. Note that osnoise also 1650 * respects the "tracing_cpumask." Hence, osnoise threads will run only 1651 * on the set of CPUs allowed here AND on "tracing_cpumask." Why not 1652 * have just "tracing_cpumask?" Because the user might be interested 1653 * in tracing what is running on other CPUs. For instance, one might 1654 * run osnoise in one HT CPU while observing what is running on the 1655 * sibling HT CPU. 1656 */ 1657 static ssize_t 1658 osnoise_cpus_write(struct file *filp, const char __user *ubuf, size_t count, 1659 loff_t *ppos) 1660 { 1661 struct trace_array *tr = osnoise_trace; 1662 cpumask_var_t osnoise_cpumask_new; 1663 int running, err; 1664 char buf[256]; 1665 1666 if (count >= 256) 1667 return -EINVAL; 1668 1669 if (copy_from_user(buf, ubuf, count)) 1670 return -EFAULT; 1671 1672 if (!zalloc_cpumask_var(&osnoise_cpumask_new, GFP_KERNEL)) 1673 return -ENOMEM; 1674 1675 err = cpulist_parse(buf, osnoise_cpumask_new); 1676 if (err) 1677 goto err_free; 1678 1679 /* 1680 * trace_types_lock is taken to avoid concurrency on start/stop 1681 * and osnoise_busy. 1682 */ 1683 mutex_lock(&trace_types_lock); 1684 running = osnoise_busy; 1685 if (running) 1686 osnoise_tracer_stop(tr); 1687 1688 mutex_lock(&interface_lock); 1689 /* 1690 * osnoise_cpumask is read by CPU hotplug operations. 1691 */ 1692 get_online_cpus(); 1693 1694 cpumask_copy(&osnoise_cpumask, osnoise_cpumask_new); 1695 1696 put_online_cpus(); 1697 mutex_unlock(&interface_lock); 1698 1699 if (running) 1700 osnoise_tracer_start(tr); 1701 mutex_unlock(&trace_types_lock); 1702 1703 free_cpumask_var(osnoise_cpumask_new); 1704 return count; 1705 1706 err_free: 1707 free_cpumask_var(osnoise_cpumask_new); 1708 1709 return err; 1710 } 1711 1712 /* 1713 * osnoise/runtime_us: cannot be greater than the period. 1714 */ 1715 static struct trace_min_max_param osnoise_runtime = { 1716 .lock = &interface_lock, 1717 .val = &osnoise_data.sample_runtime, 1718 .max = &osnoise_data.sample_period, 1719 .min = NULL, 1720 }; 1721 1722 /* 1723 * osnoise/period_us: cannot be smaller than the runtime. 1724 */ 1725 static struct trace_min_max_param osnoise_period = { 1726 .lock = &interface_lock, 1727 .val = &osnoise_data.sample_period, 1728 .max = NULL, 1729 .min = &osnoise_data.sample_runtime, 1730 }; 1731 1732 /* 1733 * osnoise/stop_tracing_us: no limit. 1734 */ 1735 static struct trace_min_max_param osnoise_stop_tracing_in = { 1736 .lock = &interface_lock, 1737 .val = &osnoise_data.stop_tracing, 1738 .max = NULL, 1739 .min = NULL, 1740 }; 1741 1742 /* 1743 * osnoise/stop_tracing_total_us: no limit. 1744 */ 1745 static struct trace_min_max_param osnoise_stop_tracing_total = { 1746 .lock = &interface_lock, 1747 .val = &osnoise_data.stop_tracing_total, 1748 .max = NULL, 1749 .min = NULL, 1750 }; 1751 1752 #ifdef CONFIG_TIMERLAT_TRACER 1753 /* 1754 * osnoise/print_stack: print the stacktrace of the IRQ handler if the total 1755 * latency is higher than val. 1756 */ 1757 static struct trace_min_max_param osnoise_print_stack = { 1758 .lock = &interface_lock, 1759 .val = &osnoise_data.print_stack, 1760 .max = NULL, 1761 .min = NULL, 1762 }; 1763 1764 /* 1765 * osnoise/timerlat_period: min 100 us, max 1 s 1766 */ 1767 u64 timerlat_min_period = 100; 1768 u64 timerlat_max_period = 1000000; 1769 static struct trace_min_max_param timerlat_period = { 1770 .lock = &interface_lock, 1771 .val = &osnoise_data.timerlat_period, 1772 .max = &timerlat_max_period, 1773 .min = &timerlat_min_period, 1774 }; 1775 #endif 1776 1777 static const struct file_operations cpus_fops = { 1778 .open = tracing_open_generic, 1779 .read = osnoise_cpus_read, 1780 .write = osnoise_cpus_write, 1781 .llseek = generic_file_llseek, 1782 }; 1783 1784 /* 1785 * init_tracefs - A function to initialize the tracefs interface files 1786 * 1787 * This function creates entries in tracefs for "osnoise" and "timerlat". 1788 * It creates these directories in the tracing directory, and within that 1789 * directory the use can change and view the configs. 1790 */ 1791 static int init_tracefs(void) 1792 { 1793 struct dentry *top_dir; 1794 struct dentry *tmp; 1795 int ret; 1796 1797 ret = tracing_init_dentry(); 1798 if (ret) 1799 return -ENOMEM; 1800 1801 top_dir = tracefs_create_dir("osnoise", NULL); 1802 if (!top_dir) 1803 return 0; 1804 1805 tmp = tracefs_create_file("period_us", 0640, top_dir, 1806 &osnoise_period, &trace_min_max_fops); 1807 if (!tmp) 1808 goto err; 1809 1810 tmp = tracefs_create_file("runtime_us", 0644, top_dir, 1811 &osnoise_runtime, &trace_min_max_fops); 1812 if (!tmp) 1813 goto err; 1814 1815 tmp = tracefs_create_file("stop_tracing_us", 0640, top_dir, 1816 &osnoise_stop_tracing_in, &trace_min_max_fops); 1817 if (!tmp) 1818 goto err; 1819 1820 tmp = tracefs_create_file("stop_tracing_total_us", 0640, top_dir, 1821 &osnoise_stop_tracing_total, &trace_min_max_fops); 1822 if (!tmp) 1823 goto err; 1824 1825 tmp = trace_create_file("cpus", 0644, top_dir, NULL, &cpus_fops); 1826 if (!tmp) 1827 goto err; 1828 #ifdef CONFIG_TIMERLAT_TRACER 1829 #ifdef CONFIG_STACKTRACE 1830 tmp = tracefs_create_file("print_stack", 0640, top_dir, 1831 &osnoise_print_stack, &trace_min_max_fops); 1832 if (!tmp) 1833 goto err; 1834 #endif 1835 1836 tmp = tracefs_create_file("timerlat_period_us", 0640, top_dir, 1837 &timerlat_period, &trace_min_max_fops); 1838 if (!tmp) 1839 goto err; 1840 #endif 1841 1842 return 0; 1843 1844 err: 1845 tracefs_remove(top_dir); 1846 return -ENOMEM; 1847 } 1848 1849 static int osnoise_hook_events(void) 1850 { 1851 int retval; 1852 1853 /* 1854 * Trace is already hooked, we are re-enabling from 1855 * a stop_tracing_*. 1856 */ 1857 if (trace_osnoise_callback_enabled) 1858 return 0; 1859 1860 retval = hook_irq_events(); 1861 if (retval) 1862 return -EINVAL; 1863 1864 retval = hook_softirq_events(); 1865 if (retval) 1866 goto out_unhook_irq; 1867 1868 retval = hook_thread_events(); 1869 /* 1870 * All fine! 1871 */ 1872 if (!retval) 1873 return 0; 1874 1875 unhook_softirq_events(); 1876 out_unhook_irq: 1877 unhook_irq_events(); 1878 return -EINVAL; 1879 } 1880 1881 static int __osnoise_tracer_start(struct trace_array *tr) 1882 { 1883 int retval; 1884 1885 osn_var_reset_all(); 1886 1887 retval = osnoise_hook_events(); 1888 if (retval) 1889 return retval; 1890 /* 1891 * Make sure NMIs see reseted values. 1892 */ 1893 barrier(); 1894 trace_osnoise_callback_enabled = true; 1895 1896 retval = start_per_cpu_kthreads(tr); 1897 if (retval) { 1898 unhook_irq_events(); 1899 return retval; 1900 } 1901 1902 osnoise_busy = true; 1903 1904 return 0; 1905 } 1906 1907 static void osnoise_tracer_start(struct trace_array *tr) 1908 { 1909 int retval; 1910 1911 if (osnoise_busy) 1912 return; 1913 1914 retval = __osnoise_tracer_start(tr); 1915 if (retval) 1916 pr_err(BANNER "Error starting osnoise tracer\n"); 1917 1918 } 1919 1920 static void osnoise_tracer_stop(struct trace_array *tr) 1921 { 1922 if (!osnoise_busy) 1923 return; 1924 1925 trace_osnoise_callback_enabled = false; 1926 barrier(); 1927 1928 stop_per_cpu_kthreads(); 1929 1930 unhook_irq_events(); 1931 unhook_softirq_events(); 1932 unhook_thread_events(); 1933 1934 osnoise_busy = false; 1935 } 1936 1937 static int osnoise_tracer_init(struct trace_array *tr) 1938 { 1939 1940 /* Only allow one instance to enable this */ 1941 if (osnoise_busy) 1942 return -EBUSY; 1943 1944 osnoise_trace = tr; 1945 tr->max_latency = 0; 1946 1947 osnoise_tracer_start(tr); 1948 1949 return 0; 1950 } 1951 1952 static void osnoise_tracer_reset(struct trace_array *tr) 1953 { 1954 osnoise_tracer_stop(tr); 1955 } 1956 1957 static struct tracer osnoise_tracer __read_mostly = { 1958 .name = "osnoise", 1959 .init = osnoise_tracer_init, 1960 .reset = osnoise_tracer_reset, 1961 .start = osnoise_tracer_start, 1962 .stop = osnoise_tracer_stop, 1963 .print_header = print_osnoise_headers, 1964 .allow_instances = true, 1965 }; 1966 1967 #ifdef CONFIG_TIMERLAT_TRACER 1968 static void timerlat_tracer_start(struct trace_array *tr) 1969 { 1970 int retval; 1971 1972 if (osnoise_busy) 1973 return; 1974 1975 osnoise_data.timerlat_tracer = 1; 1976 1977 retval = __osnoise_tracer_start(tr); 1978 if (retval) 1979 goto out_err; 1980 1981 return; 1982 out_err: 1983 pr_err(BANNER "Error starting timerlat tracer\n"); 1984 } 1985 1986 static void timerlat_tracer_stop(struct trace_array *tr) 1987 { 1988 int cpu; 1989 1990 if (!osnoise_busy) 1991 return; 1992 1993 for_each_online_cpu(cpu) 1994 per_cpu(per_cpu_osnoise_var, cpu).sampling = 0; 1995 1996 osnoise_tracer_stop(tr); 1997 1998 osnoise_data.timerlat_tracer = 0; 1999 } 2000 2001 static int timerlat_tracer_init(struct trace_array *tr) 2002 { 2003 /* Only allow one instance to enable this */ 2004 if (osnoise_busy) 2005 return -EBUSY; 2006 2007 osnoise_trace = tr; 2008 2009 tr->max_latency = 0; 2010 2011 timerlat_tracer_start(tr); 2012 2013 return 0; 2014 } 2015 2016 static void timerlat_tracer_reset(struct trace_array *tr) 2017 { 2018 timerlat_tracer_stop(tr); 2019 } 2020 2021 static struct tracer timerlat_tracer __read_mostly = { 2022 .name = "timerlat", 2023 .init = timerlat_tracer_init, 2024 .reset = timerlat_tracer_reset, 2025 .start = timerlat_tracer_start, 2026 .stop = timerlat_tracer_stop, 2027 .print_header = print_timerlat_headers, 2028 .allow_instances = true, 2029 }; 2030 #endif /* CONFIG_TIMERLAT_TRACER */ 2031 2032 __init static int init_osnoise_tracer(void) 2033 { 2034 int ret; 2035 2036 mutex_init(&interface_lock); 2037 2038 cpumask_copy(&osnoise_cpumask, cpu_all_mask); 2039 2040 ret = register_tracer(&osnoise_tracer); 2041 if (ret) { 2042 pr_err(BANNER "Error registering osnoise!\n"); 2043 return ret; 2044 } 2045 2046 #ifdef CONFIG_TIMERLAT_TRACER 2047 ret = register_tracer(&timerlat_tracer); 2048 if (ret) { 2049 pr_err(BANNER "Error registering timerlat\n"); 2050 return ret; 2051 } 2052 #endif 2053 osnoise_init_hotplug_support(); 2054 2055 init_tracefs(); 2056 2057 return 0; 2058 } 2059 late_initcall(init_osnoise_tracer); 2060