1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * OS Noise Tracer: computes the OS Noise suffered by a running thread. 4 * Timerlat Tracer: measures the wakeup latency of a timer triggered IRQ and thread. 5 * 6 * Based on "hwlat_detector" tracer by: 7 * Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. <jcm@redhat.com> 8 * Copyright (C) 2013-2016 Steven Rostedt, Red Hat, Inc. <srostedt@redhat.com> 9 * With feedback from Clark Williams <williams@redhat.com> 10 * 11 * And also based on the rtsl tracer presented on: 12 * DE OLIVEIRA, Daniel Bristot, et al. Demystifying the real-time linux 13 * scheduling latency. In: 32nd Euromicro Conference on Real-Time Systems 14 * (ECRTS 2020). Schloss Dagstuhl-Leibniz-Zentrum fur Informatik, 2020. 15 * 16 * Copyright (C) 2021 Daniel Bristot de Oliveira, Red Hat, Inc. <bristot@redhat.com> 17 */ 18 19 #include <linux/kthread.h> 20 #include <linux/tracefs.h> 21 #include <linux/uaccess.h> 22 #include <linux/cpumask.h> 23 #include <linux/delay.h> 24 #include <linux/sched/clock.h> 25 #include <uapi/linux/sched/types.h> 26 #include <linux/sched.h> 27 #include "trace.h" 28 29 #ifdef CONFIG_X86_LOCAL_APIC 30 #include <asm/trace/irq_vectors.h> 31 #undef TRACE_INCLUDE_PATH 32 #undef TRACE_INCLUDE_FILE 33 #endif /* CONFIG_X86_LOCAL_APIC */ 34 35 #include <trace/events/irq.h> 36 #include <trace/events/sched.h> 37 38 #define CREATE_TRACE_POINTS 39 #include <trace/events/osnoise.h> 40 41 /* 42 * Default values. 43 */ 44 #define BANNER "osnoise: " 45 #define DEFAULT_SAMPLE_PERIOD 1000000 /* 1s */ 46 #define DEFAULT_SAMPLE_RUNTIME 1000000 /* 1s */ 47 48 #define DEFAULT_TIMERLAT_PERIOD 1000 /* 1ms */ 49 #define DEFAULT_TIMERLAT_PRIO 95 /* FIFO 95 */ 50 51 /* 52 * osnoise/options entries. 53 */ 54 enum osnoise_options_index { 55 OSN_DEFAULTS = 0, 56 OSN_WORKLOAD, 57 OSN_PANIC_ON_STOP, 58 OSN_PREEMPT_DISABLE, 59 OSN_IRQ_DISABLE, 60 OSN_MAX 61 }; 62 63 static const char * const osnoise_options_str[OSN_MAX] = { 64 "DEFAULTS", 65 "OSNOISE_WORKLOAD", 66 "PANIC_ON_STOP", 67 "OSNOISE_PREEMPT_DISABLE", 68 "OSNOISE_IRQ_DISABLE" }; 69 70 #define OSN_DEFAULT_OPTIONS 0x2 71 static unsigned long osnoise_options = OSN_DEFAULT_OPTIONS; 72 73 /* 74 * trace_array of the enabled osnoise/timerlat instances. 75 */ 76 struct osnoise_instance { 77 struct list_head list; 78 struct trace_array *tr; 79 }; 80 81 static struct list_head osnoise_instances; 82 83 static bool osnoise_has_registered_instances(void) 84 { 85 return !!list_first_or_null_rcu(&osnoise_instances, 86 struct osnoise_instance, 87 list); 88 } 89 90 /* 91 * osnoise_instance_registered - check if a tr is already registered 92 */ 93 static int osnoise_instance_registered(struct trace_array *tr) 94 { 95 struct osnoise_instance *inst; 96 int found = 0; 97 98 rcu_read_lock(); 99 list_for_each_entry_rcu(inst, &osnoise_instances, list) { 100 if (inst->tr == tr) 101 found = 1; 102 } 103 rcu_read_unlock(); 104 105 return found; 106 } 107 108 /* 109 * osnoise_register_instance - register a new trace instance 110 * 111 * Register a trace_array *tr in the list of instances running 112 * osnoise/timerlat tracers. 113 */ 114 static int osnoise_register_instance(struct trace_array *tr) 115 { 116 struct osnoise_instance *inst; 117 118 /* 119 * register/unregister serialization is provided by trace's 120 * trace_types_lock. 121 */ 122 lockdep_assert_held(&trace_types_lock); 123 124 inst = kmalloc(sizeof(*inst), GFP_KERNEL); 125 if (!inst) 126 return -ENOMEM; 127 128 INIT_LIST_HEAD_RCU(&inst->list); 129 inst->tr = tr; 130 list_add_tail_rcu(&inst->list, &osnoise_instances); 131 132 return 0; 133 } 134 135 /* 136 * osnoise_unregister_instance - unregister a registered trace instance 137 * 138 * Remove the trace_array *tr from the list of instances running 139 * osnoise/timerlat tracers. 140 */ 141 static void osnoise_unregister_instance(struct trace_array *tr) 142 { 143 struct osnoise_instance *inst; 144 int found = 0; 145 146 /* 147 * register/unregister serialization is provided by trace's 148 * trace_types_lock. 149 */ 150 lockdep_assert_held(&trace_types_lock); 151 152 list_for_each_entry_rcu(inst, &osnoise_instances, list) { 153 if (inst->tr == tr) { 154 list_del_rcu(&inst->list); 155 found = 1; 156 break; 157 } 158 } 159 160 if (!found) 161 return; 162 163 kvfree_rcu(inst); 164 } 165 166 /* 167 * NMI runtime info. 168 */ 169 struct osn_nmi { 170 u64 count; 171 u64 delta_start; 172 }; 173 174 /* 175 * IRQ runtime info. 176 */ 177 struct osn_irq { 178 u64 count; 179 u64 arrival_time; 180 u64 delta_start; 181 }; 182 183 #define IRQ_CONTEXT 0 184 #define THREAD_CONTEXT 1 185 /* 186 * sofirq runtime info. 187 */ 188 struct osn_softirq { 189 u64 count; 190 u64 arrival_time; 191 u64 delta_start; 192 }; 193 194 /* 195 * thread runtime info. 196 */ 197 struct osn_thread { 198 u64 count; 199 u64 arrival_time; 200 u64 delta_start; 201 }; 202 203 /* 204 * Runtime information: this structure saves the runtime information used by 205 * one sampling thread. 206 */ 207 struct osnoise_variables { 208 struct task_struct *kthread; 209 bool sampling; 210 pid_t pid; 211 struct osn_nmi nmi; 212 struct osn_irq irq; 213 struct osn_softirq softirq; 214 struct osn_thread thread; 215 local_t int_counter; 216 }; 217 218 /* 219 * Per-cpu runtime information. 220 */ 221 DEFINE_PER_CPU(struct osnoise_variables, per_cpu_osnoise_var); 222 223 /* 224 * this_cpu_osn_var - Return the per-cpu osnoise_variables on its relative CPU 225 */ 226 static inline struct osnoise_variables *this_cpu_osn_var(void) 227 { 228 return this_cpu_ptr(&per_cpu_osnoise_var); 229 } 230 231 #ifdef CONFIG_TIMERLAT_TRACER 232 /* 233 * Runtime information for the timer mode. 234 */ 235 struct timerlat_variables { 236 struct task_struct *kthread; 237 struct hrtimer timer; 238 u64 rel_period; 239 u64 abs_period; 240 bool tracing_thread; 241 u64 count; 242 }; 243 244 DEFINE_PER_CPU(struct timerlat_variables, per_cpu_timerlat_var); 245 246 /* 247 * this_cpu_tmr_var - Return the per-cpu timerlat_variables on its relative CPU 248 */ 249 static inline struct timerlat_variables *this_cpu_tmr_var(void) 250 { 251 return this_cpu_ptr(&per_cpu_timerlat_var); 252 } 253 254 /* 255 * tlat_var_reset - Reset the values of the given timerlat_variables 256 */ 257 static inline void tlat_var_reset(void) 258 { 259 struct timerlat_variables *tlat_var; 260 int cpu; 261 /* 262 * So far, all the values are initialized as 0, so 263 * zeroing the structure is perfect. 264 */ 265 for_each_cpu(cpu, cpu_online_mask) { 266 tlat_var = per_cpu_ptr(&per_cpu_timerlat_var, cpu); 267 memset(tlat_var, 0, sizeof(*tlat_var)); 268 } 269 } 270 #else /* CONFIG_TIMERLAT_TRACER */ 271 #define tlat_var_reset() do {} while (0) 272 #endif /* CONFIG_TIMERLAT_TRACER */ 273 274 /* 275 * osn_var_reset - Reset the values of the given osnoise_variables 276 */ 277 static inline void osn_var_reset(void) 278 { 279 struct osnoise_variables *osn_var; 280 int cpu; 281 282 /* 283 * So far, all the values are initialized as 0, so 284 * zeroing the structure is perfect. 285 */ 286 for_each_cpu(cpu, cpu_online_mask) { 287 osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu); 288 memset(osn_var, 0, sizeof(*osn_var)); 289 } 290 } 291 292 /* 293 * osn_var_reset_all - Reset the value of all per-cpu osnoise_variables 294 */ 295 static inline void osn_var_reset_all(void) 296 { 297 osn_var_reset(); 298 tlat_var_reset(); 299 } 300 301 /* 302 * Tells NMIs to call back to the osnoise tracer to record timestamps. 303 */ 304 bool trace_osnoise_callback_enabled; 305 306 /* 307 * osnoise sample structure definition. Used to store the statistics of a 308 * sample run. 309 */ 310 struct osnoise_sample { 311 u64 runtime; /* runtime */ 312 u64 noise; /* noise */ 313 u64 max_sample; /* max single noise sample */ 314 int hw_count; /* # HW (incl. hypervisor) interference */ 315 int nmi_count; /* # NMIs during this sample */ 316 int irq_count; /* # IRQs during this sample */ 317 int softirq_count; /* # softirqs during this sample */ 318 int thread_count; /* # threads during this sample */ 319 }; 320 321 #ifdef CONFIG_TIMERLAT_TRACER 322 /* 323 * timerlat sample structure definition. Used to store the statistics of 324 * a sample run. 325 */ 326 struct timerlat_sample { 327 u64 timer_latency; /* timer_latency */ 328 unsigned int seqnum; /* unique sequence */ 329 int context; /* timer context */ 330 }; 331 #endif 332 333 /* 334 * Protect the interface. 335 */ 336 struct mutex interface_lock; 337 338 /* 339 * Tracer data. 340 */ 341 static struct osnoise_data { 342 u64 sample_period; /* total sampling period */ 343 u64 sample_runtime; /* active sampling portion of period */ 344 u64 stop_tracing; /* stop trace in the internal operation (loop/irq) */ 345 u64 stop_tracing_total; /* stop trace in the final operation (report/thread) */ 346 #ifdef CONFIG_TIMERLAT_TRACER 347 u64 timerlat_period; /* timerlat period */ 348 u64 print_stack; /* print IRQ stack if total > */ 349 int timerlat_tracer; /* timerlat tracer */ 350 #endif 351 bool tainted; /* infor users and developers about a problem */ 352 } osnoise_data = { 353 .sample_period = DEFAULT_SAMPLE_PERIOD, 354 .sample_runtime = DEFAULT_SAMPLE_RUNTIME, 355 .stop_tracing = 0, 356 .stop_tracing_total = 0, 357 #ifdef CONFIG_TIMERLAT_TRACER 358 .print_stack = 0, 359 .timerlat_period = DEFAULT_TIMERLAT_PERIOD, 360 .timerlat_tracer = 0, 361 #endif 362 }; 363 364 #ifdef CONFIG_TIMERLAT_TRACER 365 static inline bool timerlat_enabled(void) 366 { 367 return osnoise_data.timerlat_tracer; 368 } 369 370 static inline int timerlat_softirq_exit(struct osnoise_variables *osn_var) 371 { 372 struct timerlat_variables *tlat_var = this_cpu_tmr_var(); 373 /* 374 * If the timerlat is enabled, but the irq handler did 375 * not run yet enabling timerlat_tracer, do not trace. 376 */ 377 if (!tlat_var->tracing_thread) { 378 osn_var->softirq.arrival_time = 0; 379 osn_var->softirq.delta_start = 0; 380 return 0; 381 } 382 return 1; 383 } 384 385 static inline int timerlat_thread_exit(struct osnoise_variables *osn_var) 386 { 387 struct timerlat_variables *tlat_var = this_cpu_tmr_var(); 388 /* 389 * If the timerlat is enabled, but the irq handler did 390 * not run yet enabling timerlat_tracer, do not trace. 391 */ 392 if (!tlat_var->tracing_thread) { 393 osn_var->thread.delta_start = 0; 394 osn_var->thread.arrival_time = 0; 395 return 0; 396 } 397 return 1; 398 } 399 #else /* CONFIG_TIMERLAT_TRACER */ 400 static inline bool timerlat_enabled(void) 401 { 402 return false; 403 } 404 405 static inline int timerlat_softirq_exit(struct osnoise_variables *osn_var) 406 { 407 return 1; 408 } 409 static inline int timerlat_thread_exit(struct osnoise_variables *osn_var) 410 { 411 return 1; 412 } 413 #endif 414 415 #ifdef CONFIG_PREEMPT_RT 416 /* 417 * Print the osnoise header info. 418 */ 419 static void print_osnoise_headers(struct seq_file *s) 420 { 421 if (osnoise_data.tainted) 422 seq_puts(s, "# osnoise is tainted!\n"); 423 424 seq_puts(s, "# _-------=> irqs-off\n"); 425 seq_puts(s, "# / _------=> need-resched\n"); 426 seq_puts(s, "# | / _-----=> need-resched-lazy\n"); 427 seq_puts(s, "# || / _----=> hardirq/softirq\n"); 428 seq_puts(s, "# ||| / _---=> preempt-depth\n"); 429 seq_puts(s, "# |||| / _--=> preempt-lazy-depth\n"); 430 seq_puts(s, "# ||||| / _-=> migrate-disable\n"); 431 432 seq_puts(s, "# |||||| / "); 433 seq_puts(s, " MAX\n"); 434 435 seq_puts(s, "# ||||| / "); 436 seq_puts(s, " SINGLE Interference counters:\n"); 437 438 seq_puts(s, "# ||||||| RUNTIME "); 439 seq_puts(s, " NOISE %% OF CPU NOISE +-----------------------------+\n"); 440 441 seq_puts(s, "# TASK-PID CPU# ||||||| TIMESTAMP IN US "); 442 seq_puts(s, " IN US AVAILABLE IN US HW NMI IRQ SIRQ THREAD\n"); 443 444 seq_puts(s, "# | | | ||||||| | | "); 445 seq_puts(s, " | | | | | | | |\n"); 446 } 447 #else /* CONFIG_PREEMPT_RT */ 448 static void print_osnoise_headers(struct seq_file *s) 449 { 450 if (osnoise_data.tainted) 451 seq_puts(s, "# osnoise is tainted!\n"); 452 453 seq_puts(s, "# _-----=> irqs-off\n"); 454 seq_puts(s, "# / _----=> need-resched\n"); 455 seq_puts(s, "# | / _---=> hardirq/softirq\n"); 456 seq_puts(s, "# || / _--=> preempt-depth\n"); 457 seq_puts(s, "# ||| / _-=> migrate-disable "); 458 seq_puts(s, " MAX\n"); 459 seq_puts(s, "# |||| / delay "); 460 seq_puts(s, " SINGLE Interference counters:\n"); 461 462 seq_puts(s, "# ||||| RUNTIME "); 463 seq_puts(s, " NOISE %% OF CPU NOISE +-----------------------------+\n"); 464 465 seq_puts(s, "# TASK-PID CPU# ||||| TIMESTAMP IN US "); 466 seq_puts(s, " IN US AVAILABLE IN US HW NMI IRQ SIRQ THREAD\n"); 467 468 seq_puts(s, "# | | | ||||| | | "); 469 seq_puts(s, " | | | | | | | |\n"); 470 } 471 #endif /* CONFIG_PREEMPT_RT */ 472 473 /* 474 * osnoise_taint - report an osnoise error. 475 */ 476 #define osnoise_taint(msg) ({ \ 477 struct osnoise_instance *inst; \ 478 struct trace_buffer *buffer; \ 479 \ 480 rcu_read_lock(); \ 481 list_for_each_entry_rcu(inst, &osnoise_instances, list) { \ 482 buffer = inst->tr->array_buffer.buffer; \ 483 trace_array_printk_buf(buffer, _THIS_IP_, msg); \ 484 } \ 485 rcu_read_unlock(); \ 486 osnoise_data.tainted = true; \ 487 }) 488 489 /* 490 * Record an osnoise_sample into the tracer buffer. 491 */ 492 static void 493 __trace_osnoise_sample(struct osnoise_sample *sample, struct trace_buffer *buffer) 494 { 495 struct trace_event_call *call = &event_osnoise; 496 struct ring_buffer_event *event; 497 struct osnoise_entry *entry; 498 499 event = trace_buffer_lock_reserve(buffer, TRACE_OSNOISE, sizeof(*entry), 500 tracing_gen_ctx()); 501 if (!event) 502 return; 503 entry = ring_buffer_event_data(event); 504 entry->runtime = sample->runtime; 505 entry->noise = sample->noise; 506 entry->max_sample = sample->max_sample; 507 entry->hw_count = sample->hw_count; 508 entry->nmi_count = sample->nmi_count; 509 entry->irq_count = sample->irq_count; 510 entry->softirq_count = sample->softirq_count; 511 entry->thread_count = sample->thread_count; 512 513 if (!call_filter_check_discard(call, entry, buffer, event)) 514 trace_buffer_unlock_commit_nostack(buffer, event); 515 } 516 517 /* 518 * Record an osnoise_sample on all osnoise instances. 519 */ 520 static void trace_osnoise_sample(struct osnoise_sample *sample) 521 { 522 struct osnoise_instance *inst; 523 struct trace_buffer *buffer; 524 525 rcu_read_lock(); 526 list_for_each_entry_rcu(inst, &osnoise_instances, list) { 527 buffer = inst->tr->array_buffer.buffer; 528 __trace_osnoise_sample(sample, buffer); 529 } 530 rcu_read_unlock(); 531 } 532 533 #ifdef CONFIG_TIMERLAT_TRACER 534 /* 535 * Print the timerlat header info. 536 */ 537 #ifdef CONFIG_PREEMPT_RT 538 static void print_timerlat_headers(struct seq_file *s) 539 { 540 seq_puts(s, "# _-------=> irqs-off\n"); 541 seq_puts(s, "# / _------=> need-resched\n"); 542 seq_puts(s, "# | / _-----=> need-resched-lazy\n"); 543 seq_puts(s, "# || / _----=> hardirq/softirq\n"); 544 seq_puts(s, "# ||| / _---=> preempt-depth\n"); 545 seq_puts(s, "# |||| / _--=> preempt-lazy-depth\n"); 546 seq_puts(s, "# ||||| / _-=> migrate-disable\n"); 547 seq_puts(s, "# |||||| /\n"); 548 seq_puts(s, "# ||||||| ACTIVATION\n"); 549 seq_puts(s, "# TASK-PID CPU# ||||||| TIMESTAMP ID "); 550 seq_puts(s, " CONTEXT LATENCY\n"); 551 seq_puts(s, "# | | | ||||||| | | "); 552 seq_puts(s, " | |\n"); 553 } 554 #else /* CONFIG_PREEMPT_RT */ 555 static void print_timerlat_headers(struct seq_file *s) 556 { 557 seq_puts(s, "# _-----=> irqs-off\n"); 558 seq_puts(s, "# / _----=> need-resched\n"); 559 seq_puts(s, "# | / _---=> hardirq/softirq\n"); 560 seq_puts(s, "# || / _--=> preempt-depth\n"); 561 seq_puts(s, "# ||| / _-=> migrate-disable\n"); 562 seq_puts(s, "# |||| / delay\n"); 563 seq_puts(s, "# ||||| ACTIVATION\n"); 564 seq_puts(s, "# TASK-PID CPU# ||||| TIMESTAMP ID "); 565 seq_puts(s, " CONTEXT LATENCY\n"); 566 seq_puts(s, "# | | | ||||| | | "); 567 seq_puts(s, " | |\n"); 568 } 569 #endif /* CONFIG_PREEMPT_RT */ 570 571 static void 572 __trace_timerlat_sample(struct timerlat_sample *sample, struct trace_buffer *buffer) 573 { 574 struct trace_event_call *call = &event_osnoise; 575 struct ring_buffer_event *event; 576 struct timerlat_entry *entry; 577 578 event = trace_buffer_lock_reserve(buffer, TRACE_TIMERLAT, sizeof(*entry), 579 tracing_gen_ctx()); 580 if (!event) 581 return; 582 entry = ring_buffer_event_data(event); 583 entry->seqnum = sample->seqnum; 584 entry->context = sample->context; 585 entry->timer_latency = sample->timer_latency; 586 587 if (!call_filter_check_discard(call, entry, buffer, event)) 588 trace_buffer_unlock_commit_nostack(buffer, event); 589 } 590 591 /* 592 * Record an timerlat_sample into the tracer buffer. 593 */ 594 static void trace_timerlat_sample(struct timerlat_sample *sample) 595 { 596 struct osnoise_instance *inst; 597 struct trace_buffer *buffer; 598 599 rcu_read_lock(); 600 list_for_each_entry_rcu(inst, &osnoise_instances, list) { 601 buffer = inst->tr->array_buffer.buffer; 602 __trace_timerlat_sample(sample, buffer); 603 } 604 rcu_read_unlock(); 605 } 606 607 #ifdef CONFIG_STACKTRACE 608 609 #define MAX_CALLS 256 610 611 /* 612 * Stack trace will take place only at IRQ level, so, no need 613 * to control nesting here. 614 */ 615 struct trace_stack { 616 int stack_size; 617 int nr_entries; 618 unsigned long calls[MAX_CALLS]; 619 }; 620 621 static DEFINE_PER_CPU(struct trace_stack, trace_stack); 622 623 /* 624 * timerlat_save_stack - save a stack trace without printing 625 * 626 * Save the current stack trace without printing. The 627 * stack will be printed later, after the end of the measurement. 628 */ 629 static void timerlat_save_stack(int skip) 630 { 631 unsigned int size, nr_entries; 632 struct trace_stack *fstack; 633 634 fstack = this_cpu_ptr(&trace_stack); 635 636 size = ARRAY_SIZE(fstack->calls); 637 638 nr_entries = stack_trace_save(fstack->calls, size, skip); 639 640 fstack->stack_size = nr_entries * sizeof(unsigned long); 641 fstack->nr_entries = nr_entries; 642 643 return; 644 645 } 646 647 static void 648 __timerlat_dump_stack(struct trace_buffer *buffer, struct trace_stack *fstack, unsigned int size) 649 { 650 struct trace_event_call *call = &event_osnoise; 651 struct ring_buffer_event *event; 652 struct stack_entry *entry; 653 654 event = trace_buffer_lock_reserve(buffer, TRACE_STACK, sizeof(*entry) + size, 655 tracing_gen_ctx()); 656 if (!event) 657 return; 658 659 entry = ring_buffer_event_data(event); 660 661 memcpy(&entry->caller, fstack->calls, size); 662 entry->size = fstack->nr_entries; 663 664 if (!call_filter_check_discard(call, entry, buffer, event)) 665 trace_buffer_unlock_commit_nostack(buffer, event); 666 } 667 668 /* 669 * timerlat_dump_stack - dump a stack trace previously saved 670 */ 671 static void timerlat_dump_stack(u64 latency) 672 { 673 struct osnoise_instance *inst; 674 struct trace_buffer *buffer; 675 struct trace_stack *fstack; 676 unsigned int size; 677 678 /* 679 * trace only if latency > print_stack config, if enabled. 680 */ 681 if (!osnoise_data.print_stack || osnoise_data.print_stack > latency) 682 return; 683 684 preempt_disable_notrace(); 685 fstack = this_cpu_ptr(&trace_stack); 686 size = fstack->stack_size; 687 688 rcu_read_lock(); 689 list_for_each_entry_rcu(inst, &osnoise_instances, list) { 690 buffer = inst->tr->array_buffer.buffer; 691 __timerlat_dump_stack(buffer, fstack, size); 692 693 } 694 rcu_read_unlock(); 695 preempt_enable_notrace(); 696 } 697 #else /* CONFIG_STACKTRACE */ 698 #define timerlat_dump_stack(u64 latency) do {} while (0) 699 #define timerlat_save_stack(a) do {} while (0) 700 #endif /* CONFIG_STACKTRACE */ 701 #endif /* CONFIG_TIMERLAT_TRACER */ 702 703 /* 704 * Macros to encapsulate the time capturing infrastructure. 705 */ 706 #define time_get() trace_clock_local() 707 #define time_to_us(x) div_u64(x, 1000) 708 #define time_sub(a, b) ((a) - (b)) 709 710 /* 711 * cond_move_irq_delta_start - Forward the delta_start of a running IRQ 712 * 713 * If an IRQ is preempted by an NMI, its delta_start is pushed forward 714 * to discount the NMI interference. 715 * 716 * See get_int_safe_duration(). 717 */ 718 static inline void 719 cond_move_irq_delta_start(struct osnoise_variables *osn_var, u64 duration) 720 { 721 if (osn_var->irq.delta_start) 722 osn_var->irq.delta_start += duration; 723 } 724 725 #ifndef CONFIG_PREEMPT_RT 726 /* 727 * cond_move_softirq_delta_start - Forward the delta_start of a running softirq. 728 * 729 * If a softirq is preempted by an IRQ or NMI, its delta_start is pushed 730 * forward to discount the interference. 731 * 732 * See get_int_safe_duration(). 733 */ 734 static inline void 735 cond_move_softirq_delta_start(struct osnoise_variables *osn_var, u64 duration) 736 { 737 if (osn_var->softirq.delta_start) 738 osn_var->softirq.delta_start += duration; 739 } 740 #else /* CONFIG_PREEMPT_RT */ 741 #define cond_move_softirq_delta_start(osn_var, duration) do {} while (0) 742 #endif 743 744 /* 745 * cond_move_thread_delta_start - Forward the delta_start of a running thread 746 * 747 * If a noisy thread is preempted by an softirq, IRQ or NMI, its delta_start 748 * is pushed forward to discount the interference. 749 * 750 * See get_int_safe_duration(). 751 */ 752 static inline void 753 cond_move_thread_delta_start(struct osnoise_variables *osn_var, u64 duration) 754 { 755 if (osn_var->thread.delta_start) 756 osn_var->thread.delta_start += duration; 757 } 758 759 /* 760 * get_int_safe_duration - Get the duration of a window 761 * 762 * The irq, softirq and thread varaibles need to have its duration without 763 * the interference from higher priority interrupts. Instead of keeping a 764 * variable to discount the interrupt interference from these variables, the 765 * starting time of these variables are pushed forward with the interrupt's 766 * duration. In this way, a single variable is used to: 767 * 768 * - Know if a given window is being measured. 769 * - Account its duration. 770 * - Discount the interference. 771 * 772 * To avoid getting inconsistent values, e.g.,: 773 * 774 * now = time_get() 775 * ---> interrupt! 776 * delta_start -= int duration; 777 * <--- 778 * duration = now - delta_start; 779 * 780 * result: negative duration if the variable duration before the 781 * interrupt was smaller than the interrupt execution. 782 * 783 * A counter of interrupts is used. If the counter increased, try 784 * to capture an interference safe duration. 785 */ 786 static inline s64 787 get_int_safe_duration(struct osnoise_variables *osn_var, u64 *delta_start) 788 { 789 u64 int_counter, now; 790 s64 duration; 791 792 do { 793 int_counter = local_read(&osn_var->int_counter); 794 /* synchronize with interrupts */ 795 barrier(); 796 797 now = time_get(); 798 duration = (now - *delta_start); 799 800 /* synchronize with interrupts */ 801 barrier(); 802 } while (int_counter != local_read(&osn_var->int_counter)); 803 804 /* 805 * This is an evidence of race conditions that cause 806 * a value to be "discounted" too much. 807 */ 808 if (duration < 0) 809 osnoise_taint("Negative duration!\n"); 810 811 *delta_start = 0; 812 813 return duration; 814 } 815 816 /* 817 * 818 * set_int_safe_time - Save the current time on *time, aware of interference 819 * 820 * Get the time, taking into consideration a possible interference from 821 * higher priority interrupts. 822 * 823 * See get_int_safe_duration() for an explanation. 824 */ 825 static u64 826 set_int_safe_time(struct osnoise_variables *osn_var, u64 *time) 827 { 828 u64 int_counter; 829 830 do { 831 int_counter = local_read(&osn_var->int_counter); 832 /* synchronize with interrupts */ 833 barrier(); 834 835 *time = time_get(); 836 837 /* synchronize with interrupts */ 838 barrier(); 839 } while (int_counter != local_read(&osn_var->int_counter)); 840 841 return int_counter; 842 } 843 844 #ifdef CONFIG_TIMERLAT_TRACER 845 /* 846 * copy_int_safe_time - Copy *src into *desc aware of interference 847 */ 848 static u64 849 copy_int_safe_time(struct osnoise_variables *osn_var, u64 *dst, u64 *src) 850 { 851 u64 int_counter; 852 853 do { 854 int_counter = local_read(&osn_var->int_counter); 855 /* synchronize with interrupts */ 856 barrier(); 857 858 *dst = *src; 859 860 /* synchronize with interrupts */ 861 barrier(); 862 } while (int_counter != local_read(&osn_var->int_counter)); 863 864 return int_counter; 865 } 866 #endif /* CONFIG_TIMERLAT_TRACER */ 867 868 /* 869 * trace_osnoise_callback - NMI entry/exit callback 870 * 871 * This function is called at the entry and exit NMI code. The bool enter 872 * distinguishes between either case. This function is used to note a NMI 873 * occurrence, compute the noise caused by the NMI, and to remove the noise 874 * it is potentially causing on other interference variables. 875 */ 876 void trace_osnoise_callback(bool enter) 877 { 878 struct osnoise_variables *osn_var = this_cpu_osn_var(); 879 u64 duration; 880 881 if (!osn_var->sampling) 882 return; 883 884 /* 885 * Currently trace_clock_local() calls sched_clock() and the 886 * generic version is not NMI safe. 887 */ 888 if (!IS_ENABLED(CONFIG_GENERIC_SCHED_CLOCK)) { 889 if (enter) { 890 osn_var->nmi.delta_start = time_get(); 891 local_inc(&osn_var->int_counter); 892 } else { 893 duration = time_get() - osn_var->nmi.delta_start; 894 895 trace_nmi_noise(osn_var->nmi.delta_start, duration); 896 897 cond_move_irq_delta_start(osn_var, duration); 898 cond_move_softirq_delta_start(osn_var, duration); 899 cond_move_thread_delta_start(osn_var, duration); 900 } 901 } 902 903 if (enter) 904 osn_var->nmi.count++; 905 } 906 907 /* 908 * osnoise_trace_irq_entry - Note the starting of an IRQ 909 * 910 * Save the starting time of an IRQ. As IRQs are non-preemptive to other IRQs, 911 * it is safe to use a single variable (ons_var->irq) to save the statistics. 912 * The arrival_time is used to report... the arrival time. The delta_start 913 * is used to compute the duration at the IRQ exit handler. See 914 * cond_move_irq_delta_start(). 915 */ 916 void osnoise_trace_irq_entry(int id) 917 { 918 struct osnoise_variables *osn_var = this_cpu_osn_var(); 919 920 if (!osn_var->sampling) 921 return; 922 /* 923 * This value will be used in the report, but not to compute 924 * the execution time, so it is safe to get it unsafe. 925 */ 926 osn_var->irq.arrival_time = time_get(); 927 set_int_safe_time(osn_var, &osn_var->irq.delta_start); 928 osn_var->irq.count++; 929 930 local_inc(&osn_var->int_counter); 931 } 932 933 /* 934 * osnoise_irq_exit - Note the end of an IRQ, sava data and trace 935 * 936 * Computes the duration of the IRQ noise, and trace it. Also discounts the 937 * interference from other sources of noise could be currently being accounted. 938 */ 939 void osnoise_trace_irq_exit(int id, const char *desc) 940 { 941 struct osnoise_variables *osn_var = this_cpu_osn_var(); 942 s64 duration; 943 944 if (!osn_var->sampling) 945 return; 946 947 duration = get_int_safe_duration(osn_var, &osn_var->irq.delta_start); 948 trace_irq_noise(id, desc, osn_var->irq.arrival_time, duration); 949 osn_var->irq.arrival_time = 0; 950 cond_move_softirq_delta_start(osn_var, duration); 951 cond_move_thread_delta_start(osn_var, duration); 952 } 953 954 /* 955 * trace_irqentry_callback - Callback to the irq:irq_entry traceevent 956 * 957 * Used to note the starting of an IRQ occurece. 958 */ 959 static void trace_irqentry_callback(void *data, int irq, 960 struct irqaction *action) 961 { 962 osnoise_trace_irq_entry(irq); 963 } 964 965 /* 966 * trace_irqexit_callback - Callback to the irq:irq_exit traceevent 967 * 968 * Used to note the end of an IRQ occurece. 969 */ 970 static void trace_irqexit_callback(void *data, int irq, 971 struct irqaction *action, int ret) 972 { 973 osnoise_trace_irq_exit(irq, action->name); 974 } 975 976 /* 977 * arch specific register function. 978 */ 979 int __weak osnoise_arch_register(void) 980 { 981 return 0; 982 } 983 984 /* 985 * arch specific unregister function. 986 */ 987 void __weak osnoise_arch_unregister(void) 988 { 989 return; 990 } 991 992 /* 993 * hook_irq_events - Hook IRQ handling events 994 * 995 * This function hooks the IRQ related callbacks to the respective trace 996 * events. 997 */ 998 static int hook_irq_events(void) 999 { 1000 int ret; 1001 1002 ret = register_trace_irq_handler_entry(trace_irqentry_callback, NULL); 1003 if (ret) 1004 goto out_err; 1005 1006 ret = register_trace_irq_handler_exit(trace_irqexit_callback, NULL); 1007 if (ret) 1008 goto out_unregister_entry; 1009 1010 ret = osnoise_arch_register(); 1011 if (ret) 1012 goto out_irq_exit; 1013 1014 return 0; 1015 1016 out_irq_exit: 1017 unregister_trace_irq_handler_exit(trace_irqexit_callback, NULL); 1018 out_unregister_entry: 1019 unregister_trace_irq_handler_entry(trace_irqentry_callback, NULL); 1020 out_err: 1021 return -EINVAL; 1022 } 1023 1024 /* 1025 * unhook_irq_events - Unhook IRQ handling events 1026 * 1027 * This function unhooks the IRQ related callbacks to the respective trace 1028 * events. 1029 */ 1030 static void unhook_irq_events(void) 1031 { 1032 osnoise_arch_unregister(); 1033 unregister_trace_irq_handler_exit(trace_irqexit_callback, NULL); 1034 unregister_trace_irq_handler_entry(trace_irqentry_callback, NULL); 1035 } 1036 1037 #ifndef CONFIG_PREEMPT_RT 1038 /* 1039 * trace_softirq_entry_callback - Note the starting of a softirq 1040 * 1041 * Save the starting time of a softirq. As softirqs are non-preemptive to 1042 * other softirqs, it is safe to use a single variable (ons_var->softirq) 1043 * to save the statistics. The arrival_time is used to report... the 1044 * arrival time. The delta_start is used to compute the duration at the 1045 * softirq exit handler. See cond_move_softirq_delta_start(). 1046 */ 1047 static void trace_softirq_entry_callback(void *data, unsigned int vec_nr) 1048 { 1049 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1050 1051 if (!osn_var->sampling) 1052 return; 1053 /* 1054 * This value will be used in the report, but not to compute 1055 * the execution time, so it is safe to get it unsafe. 1056 */ 1057 osn_var->softirq.arrival_time = time_get(); 1058 set_int_safe_time(osn_var, &osn_var->softirq.delta_start); 1059 osn_var->softirq.count++; 1060 1061 local_inc(&osn_var->int_counter); 1062 } 1063 1064 /* 1065 * trace_softirq_exit_callback - Note the end of an softirq 1066 * 1067 * Computes the duration of the softirq noise, and trace it. Also discounts the 1068 * interference from other sources of noise could be currently being accounted. 1069 */ 1070 static void trace_softirq_exit_callback(void *data, unsigned int vec_nr) 1071 { 1072 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1073 s64 duration; 1074 1075 if (!osn_var->sampling) 1076 return; 1077 1078 if (unlikely(timerlat_enabled())) 1079 if (!timerlat_softirq_exit(osn_var)) 1080 return; 1081 1082 duration = get_int_safe_duration(osn_var, &osn_var->softirq.delta_start); 1083 trace_softirq_noise(vec_nr, osn_var->softirq.arrival_time, duration); 1084 cond_move_thread_delta_start(osn_var, duration); 1085 osn_var->softirq.arrival_time = 0; 1086 } 1087 1088 /* 1089 * hook_softirq_events - Hook softirq handling events 1090 * 1091 * This function hooks the softirq related callbacks to the respective trace 1092 * events. 1093 */ 1094 static int hook_softirq_events(void) 1095 { 1096 int ret; 1097 1098 ret = register_trace_softirq_entry(trace_softirq_entry_callback, NULL); 1099 if (ret) 1100 goto out_err; 1101 1102 ret = register_trace_softirq_exit(trace_softirq_exit_callback, NULL); 1103 if (ret) 1104 goto out_unreg_entry; 1105 1106 return 0; 1107 1108 out_unreg_entry: 1109 unregister_trace_softirq_entry(trace_softirq_entry_callback, NULL); 1110 out_err: 1111 return -EINVAL; 1112 } 1113 1114 /* 1115 * unhook_softirq_events - Unhook softirq handling events 1116 * 1117 * This function hooks the softirq related callbacks to the respective trace 1118 * events. 1119 */ 1120 static void unhook_softirq_events(void) 1121 { 1122 unregister_trace_softirq_entry(trace_softirq_entry_callback, NULL); 1123 unregister_trace_softirq_exit(trace_softirq_exit_callback, NULL); 1124 } 1125 #else /* CONFIG_PREEMPT_RT */ 1126 /* 1127 * softirq are threads on the PREEMPT_RT mode. 1128 */ 1129 static int hook_softirq_events(void) 1130 { 1131 return 0; 1132 } 1133 static void unhook_softirq_events(void) 1134 { 1135 } 1136 #endif 1137 1138 /* 1139 * thread_entry - Record the starting of a thread noise window 1140 * 1141 * It saves the context switch time for a noisy thread, and increments 1142 * the interference counters. 1143 */ 1144 static void 1145 thread_entry(struct osnoise_variables *osn_var, struct task_struct *t) 1146 { 1147 if (!osn_var->sampling) 1148 return; 1149 /* 1150 * The arrival time will be used in the report, but not to compute 1151 * the execution time, so it is safe to get it unsafe. 1152 */ 1153 osn_var->thread.arrival_time = time_get(); 1154 1155 set_int_safe_time(osn_var, &osn_var->thread.delta_start); 1156 1157 osn_var->thread.count++; 1158 local_inc(&osn_var->int_counter); 1159 } 1160 1161 /* 1162 * thread_exit - Report the end of a thread noise window 1163 * 1164 * It computes the total noise from a thread, tracing if needed. 1165 */ 1166 static void 1167 thread_exit(struct osnoise_variables *osn_var, struct task_struct *t) 1168 { 1169 s64 duration; 1170 1171 if (!osn_var->sampling) 1172 return; 1173 1174 if (unlikely(timerlat_enabled())) 1175 if (!timerlat_thread_exit(osn_var)) 1176 return; 1177 1178 duration = get_int_safe_duration(osn_var, &osn_var->thread.delta_start); 1179 1180 trace_thread_noise(t, osn_var->thread.arrival_time, duration); 1181 1182 osn_var->thread.arrival_time = 0; 1183 } 1184 1185 /* 1186 * trace_sched_switch - sched:sched_switch trace event handler 1187 * 1188 * This function is hooked to the sched:sched_switch trace event, and it is 1189 * used to record the beginning and to report the end of a thread noise window. 1190 */ 1191 static void 1192 trace_sched_switch_callback(void *data, bool preempt, 1193 struct task_struct *p, 1194 struct task_struct *n, 1195 unsigned int prev_state) 1196 { 1197 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1198 int workload = test_bit(OSN_WORKLOAD, &osnoise_options); 1199 1200 if ((p->pid != osn_var->pid) || !workload) 1201 thread_exit(osn_var, p); 1202 1203 if ((n->pid != osn_var->pid) || !workload) 1204 thread_entry(osn_var, n); 1205 } 1206 1207 /* 1208 * hook_thread_events - Hook the insturmentation for thread noise 1209 * 1210 * Hook the osnoise tracer callbacks to handle the noise from other 1211 * threads on the necessary kernel events. 1212 */ 1213 static int hook_thread_events(void) 1214 { 1215 int ret; 1216 1217 ret = register_trace_sched_switch(trace_sched_switch_callback, NULL); 1218 if (ret) 1219 return -EINVAL; 1220 1221 return 0; 1222 } 1223 1224 /* 1225 * unhook_thread_events - *nhook the insturmentation for thread noise 1226 * 1227 * Unook the osnoise tracer callbacks to handle the noise from other 1228 * threads on the necessary kernel events. 1229 */ 1230 static void unhook_thread_events(void) 1231 { 1232 unregister_trace_sched_switch(trace_sched_switch_callback, NULL); 1233 } 1234 1235 /* 1236 * save_osn_sample_stats - Save the osnoise_sample statistics 1237 * 1238 * Save the osnoise_sample statistics before the sampling phase. These 1239 * values will be used later to compute the diff betwneen the statistics 1240 * before and after the osnoise sampling. 1241 */ 1242 static void 1243 save_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample *s) 1244 { 1245 s->nmi_count = osn_var->nmi.count; 1246 s->irq_count = osn_var->irq.count; 1247 s->softirq_count = osn_var->softirq.count; 1248 s->thread_count = osn_var->thread.count; 1249 } 1250 1251 /* 1252 * diff_osn_sample_stats - Compute the osnoise_sample statistics 1253 * 1254 * After a sample period, compute the difference on the osnoise_sample 1255 * statistics. The struct osnoise_sample *s contains the statistics saved via 1256 * save_osn_sample_stats() before the osnoise sampling. 1257 */ 1258 static void 1259 diff_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample *s) 1260 { 1261 s->nmi_count = osn_var->nmi.count - s->nmi_count; 1262 s->irq_count = osn_var->irq.count - s->irq_count; 1263 s->softirq_count = osn_var->softirq.count - s->softirq_count; 1264 s->thread_count = osn_var->thread.count - s->thread_count; 1265 } 1266 1267 /* 1268 * osnoise_stop_tracing - Stop tracing and the tracer. 1269 */ 1270 static __always_inline void osnoise_stop_tracing(void) 1271 { 1272 struct osnoise_instance *inst; 1273 struct trace_array *tr; 1274 1275 rcu_read_lock(); 1276 list_for_each_entry_rcu(inst, &osnoise_instances, list) { 1277 tr = inst->tr; 1278 trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_, 1279 "stop tracing hit on cpu %d\n", smp_processor_id()); 1280 1281 if (test_bit(OSN_PANIC_ON_STOP, &osnoise_options)) 1282 panic("tracer hit stop condition on CPU %d\n", smp_processor_id()); 1283 1284 tracer_tracing_off(tr); 1285 } 1286 rcu_read_unlock(); 1287 } 1288 1289 /* 1290 * notify_new_max_latency - Notify a new max latency via fsnotify interface. 1291 */ 1292 static void notify_new_max_latency(u64 latency) 1293 { 1294 struct osnoise_instance *inst; 1295 struct trace_array *tr; 1296 1297 rcu_read_lock(); 1298 list_for_each_entry_rcu(inst, &osnoise_instances, list) { 1299 tr = inst->tr; 1300 if (tr->max_latency < latency) { 1301 tr->max_latency = latency; 1302 latency_fsnotify(tr); 1303 } 1304 } 1305 rcu_read_unlock(); 1306 } 1307 1308 /* 1309 * run_osnoise - Sample the time and look for osnoise 1310 * 1311 * Used to capture the time, looking for potential osnoise latency repeatedly. 1312 * Different from hwlat_detector, it is called with preemption and interrupts 1313 * enabled. This allows irqs, softirqs and threads to run, interfering on the 1314 * osnoise sampling thread, as they would do with a regular thread. 1315 */ 1316 static int run_osnoise(void) 1317 { 1318 bool disable_irq = test_bit(OSN_IRQ_DISABLE, &osnoise_options); 1319 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1320 u64 start, sample, last_sample; 1321 u64 last_int_count, int_count; 1322 s64 noise = 0, max_noise = 0; 1323 s64 total, last_total = 0; 1324 struct osnoise_sample s; 1325 bool disable_preemption; 1326 unsigned int threshold; 1327 u64 runtime, stop_in; 1328 u64 sum_noise = 0; 1329 int hw_count = 0; 1330 int ret = -1; 1331 1332 /* 1333 * Disabling preemption is only required if IRQs are enabled, 1334 * and the options is set on. 1335 */ 1336 disable_preemption = !disable_irq && test_bit(OSN_PREEMPT_DISABLE, &osnoise_options); 1337 1338 /* 1339 * Considers the current thread as the workload. 1340 */ 1341 osn_var->pid = current->pid; 1342 1343 /* 1344 * Save the current stats for the diff 1345 */ 1346 save_osn_sample_stats(osn_var, &s); 1347 1348 /* 1349 * if threshold is 0, use the default value of 5 us. 1350 */ 1351 threshold = tracing_thresh ? : 5000; 1352 1353 /* 1354 * Apply PREEMPT and IRQ disabled options. 1355 */ 1356 if (disable_irq) 1357 local_irq_disable(); 1358 1359 if (disable_preemption) 1360 preempt_disable(); 1361 1362 /* 1363 * Make sure NMIs see sampling first 1364 */ 1365 osn_var->sampling = true; 1366 barrier(); 1367 1368 /* 1369 * Transform the *_us config to nanoseconds to avoid the 1370 * division on the main loop. 1371 */ 1372 runtime = osnoise_data.sample_runtime * NSEC_PER_USEC; 1373 stop_in = osnoise_data.stop_tracing * NSEC_PER_USEC; 1374 1375 /* 1376 * Start timestemp 1377 */ 1378 start = time_get(); 1379 1380 /* 1381 * "previous" loop. 1382 */ 1383 last_int_count = set_int_safe_time(osn_var, &last_sample); 1384 1385 do { 1386 /* 1387 * Get sample! 1388 */ 1389 int_count = set_int_safe_time(osn_var, &sample); 1390 1391 noise = time_sub(sample, last_sample); 1392 1393 /* 1394 * This shouldn't happen. 1395 */ 1396 if (noise < 0) { 1397 osnoise_taint("negative noise!"); 1398 goto out; 1399 } 1400 1401 /* 1402 * Sample runtime. 1403 */ 1404 total = time_sub(sample, start); 1405 1406 /* 1407 * Check for possible overflows. 1408 */ 1409 if (total < last_total) { 1410 osnoise_taint("total overflow!"); 1411 break; 1412 } 1413 1414 last_total = total; 1415 1416 if (noise >= threshold) { 1417 int interference = int_count - last_int_count; 1418 1419 if (noise > max_noise) 1420 max_noise = noise; 1421 1422 if (!interference) 1423 hw_count++; 1424 1425 sum_noise += noise; 1426 1427 trace_sample_threshold(last_sample, noise, interference); 1428 1429 if (osnoise_data.stop_tracing) 1430 if (noise > stop_in) 1431 osnoise_stop_tracing(); 1432 } 1433 1434 /* 1435 * In some cases, notably when running on a nohz_full CPU with 1436 * a stopped tick PREEMPT_RCU has no way to account for QSs. 1437 * This will eventually cause unwarranted noise as PREEMPT_RCU 1438 * will force preemption as the means of ending the current 1439 * grace period. We avoid this problem by calling 1440 * rcu_momentary_dyntick_idle(), which performs a zero duration 1441 * EQS allowing PREEMPT_RCU to end the current grace period. 1442 * This call shouldn't be wrapped inside an RCU critical 1443 * section. 1444 * 1445 * Note that in non PREEMPT_RCU kernels QSs are handled through 1446 * cond_resched() 1447 */ 1448 if (IS_ENABLED(CONFIG_PREEMPT_RCU)) { 1449 if (!disable_irq) 1450 local_irq_disable(); 1451 1452 rcu_momentary_dyntick_idle(); 1453 1454 if (!disable_irq) 1455 local_irq_enable(); 1456 } 1457 1458 /* 1459 * For the non-preemptive kernel config: let threads runs, if 1460 * they so wish, unless set not do to so. 1461 */ 1462 if (!disable_irq && !disable_preemption) 1463 cond_resched(); 1464 1465 last_sample = sample; 1466 last_int_count = int_count; 1467 1468 } while (total < runtime && !kthread_should_stop()); 1469 1470 /* 1471 * Finish the above in the view for interrupts. 1472 */ 1473 barrier(); 1474 1475 osn_var->sampling = false; 1476 1477 /* 1478 * Make sure sampling data is no longer updated. 1479 */ 1480 barrier(); 1481 1482 /* 1483 * Return to the preemptive state. 1484 */ 1485 if (disable_preemption) 1486 preempt_enable(); 1487 1488 if (disable_irq) 1489 local_irq_enable(); 1490 1491 /* 1492 * Save noise info. 1493 */ 1494 s.noise = time_to_us(sum_noise); 1495 s.runtime = time_to_us(total); 1496 s.max_sample = time_to_us(max_noise); 1497 s.hw_count = hw_count; 1498 1499 /* Save interference stats info */ 1500 diff_osn_sample_stats(osn_var, &s); 1501 1502 trace_osnoise_sample(&s); 1503 1504 notify_new_max_latency(max_noise); 1505 1506 if (osnoise_data.stop_tracing_total) 1507 if (s.noise > osnoise_data.stop_tracing_total) 1508 osnoise_stop_tracing(); 1509 1510 return 0; 1511 out: 1512 return ret; 1513 } 1514 1515 static struct cpumask osnoise_cpumask; 1516 static struct cpumask save_cpumask; 1517 1518 /* 1519 * osnoise_sleep - sleep until the next period 1520 */ 1521 static void osnoise_sleep(void) 1522 { 1523 u64 interval; 1524 ktime_t wake_time; 1525 1526 mutex_lock(&interface_lock); 1527 interval = osnoise_data.sample_period - osnoise_data.sample_runtime; 1528 mutex_unlock(&interface_lock); 1529 1530 /* 1531 * differently from hwlat_detector, the osnoise tracer can run 1532 * without a pause because preemption is on. 1533 */ 1534 if (!interval) { 1535 /* Let synchronize_rcu_tasks() make progress */ 1536 cond_resched_tasks_rcu_qs(); 1537 return; 1538 } 1539 1540 wake_time = ktime_add_us(ktime_get(), interval); 1541 __set_current_state(TASK_INTERRUPTIBLE); 1542 1543 while (schedule_hrtimeout_range(&wake_time, 0, HRTIMER_MODE_ABS)) { 1544 if (kthread_should_stop()) 1545 break; 1546 } 1547 } 1548 1549 /* 1550 * osnoise_main - The osnoise detection kernel thread 1551 * 1552 * Calls run_osnoise() function to measure the osnoise for the configured runtime, 1553 * every period. 1554 */ 1555 static int osnoise_main(void *data) 1556 { 1557 1558 while (!kthread_should_stop()) { 1559 run_osnoise(); 1560 osnoise_sleep(); 1561 } 1562 1563 return 0; 1564 } 1565 1566 #ifdef CONFIG_TIMERLAT_TRACER 1567 /* 1568 * timerlat_irq - hrtimer handler for timerlat. 1569 */ 1570 static enum hrtimer_restart timerlat_irq(struct hrtimer *timer) 1571 { 1572 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1573 struct timerlat_variables *tlat; 1574 struct timerlat_sample s; 1575 u64 now; 1576 u64 diff; 1577 1578 /* 1579 * I am not sure if the timer was armed for this CPU. So, get 1580 * the timerlat struct from the timer itself, not from this 1581 * CPU. 1582 */ 1583 tlat = container_of(timer, struct timerlat_variables, timer); 1584 1585 now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer)); 1586 1587 /* 1588 * Enable the osnoise: events for thread an softirq. 1589 */ 1590 tlat->tracing_thread = true; 1591 1592 osn_var->thread.arrival_time = time_get(); 1593 1594 /* 1595 * A hardirq is running: the timer IRQ. It is for sure preempting 1596 * a thread, and potentially preempting a softirq. 1597 * 1598 * At this point, it is not interesting to know the duration of the 1599 * preempted thread (and maybe softirq), but how much time they will 1600 * delay the beginning of the execution of the timer thread. 1601 * 1602 * To get the correct (net) delay added by the softirq, its delta_start 1603 * is set as the IRQ one. In this way, at the return of the IRQ, the delta 1604 * start of the sofitrq will be zeroed, accounting then only the time 1605 * after that. 1606 * 1607 * The thread follows the same principle. However, if a softirq is 1608 * running, the thread needs to receive the softirq delta_start. The 1609 * reason being is that the softirq will be the last to be unfolded, 1610 * resseting the thread delay to zero. 1611 * 1612 * The PREEMPT_RT is a special case, though. As softirqs run as threads 1613 * on RT, moving the thread is enough. 1614 */ 1615 if (!IS_ENABLED(CONFIG_PREEMPT_RT) && osn_var->softirq.delta_start) { 1616 copy_int_safe_time(osn_var, &osn_var->thread.delta_start, 1617 &osn_var->softirq.delta_start); 1618 1619 copy_int_safe_time(osn_var, &osn_var->softirq.delta_start, 1620 &osn_var->irq.delta_start); 1621 } else { 1622 copy_int_safe_time(osn_var, &osn_var->thread.delta_start, 1623 &osn_var->irq.delta_start); 1624 } 1625 1626 /* 1627 * Compute the current time with the expected time. 1628 */ 1629 diff = now - tlat->abs_period; 1630 1631 tlat->count++; 1632 s.seqnum = tlat->count; 1633 s.timer_latency = diff; 1634 s.context = IRQ_CONTEXT; 1635 1636 trace_timerlat_sample(&s); 1637 1638 if (osnoise_data.stop_tracing) { 1639 if (time_to_us(diff) >= osnoise_data.stop_tracing) { 1640 1641 /* 1642 * At this point, if stop_tracing is set and <= print_stack, 1643 * print_stack is set and would be printed in the thread handler. 1644 * 1645 * Thus, print the stack trace as it is helpful to define the 1646 * root cause of an IRQ latency. 1647 */ 1648 if (osnoise_data.stop_tracing <= osnoise_data.print_stack) { 1649 timerlat_save_stack(0); 1650 timerlat_dump_stack(time_to_us(diff)); 1651 } 1652 1653 osnoise_stop_tracing(); 1654 notify_new_max_latency(diff); 1655 1656 return HRTIMER_NORESTART; 1657 } 1658 } 1659 1660 wake_up_process(tlat->kthread); 1661 1662 if (osnoise_data.print_stack) 1663 timerlat_save_stack(0); 1664 1665 return HRTIMER_NORESTART; 1666 } 1667 1668 /* 1669 * wait_next_period - Wait for the next period for timerlat 1670 */ 1671 static int wait_next_period(struct timerlat_variables *tlat) 1672 { 1673 ktime_t next_abs_period, now; 1674 u64 rel_period = osnoise_data.timerlat_period * 1000; 1675 1676 now = hrtimer_cb_get_time(&tlat->timer); 1677 next_abs_period = ns_to_ktime(tlat->abs_period + rel_period); 1678 1679 /* 1680 * Save the next abs_period. 1681 */ 1682 tlat->abs_period = (u64) ktime_to_ns(next_abs_period); 1683 1684 /* 1685 * If the new abs_period is in the past, skip the activation. 1686 */ 1687 while (ktime_compare(now, next_abs_period) > 0) { 1688 next_abs_period = ns_to_ktime(tlat->abs_period + rel_period); 1689 tlat->abs_period = (u64) ktime_to_ns(next_abs_period); 1690 } 1691 1692 set_current_state(TASK_INTERRUPTIBLE); 1693 1694 hrtimer_start(&tlat->timer, next_abs_period, HRTIMER_MODE_ABS_PINNED_HARD); 1695 schedule(); 1696 return 1; 1697 } 1698 1699 /* 1700 * timerlat_main- Timerlat main 1701 */ 1702 static int timerlat_main(void *data) 1703 { 1704 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1705 struct timerlat_variables *tlat = this_cpu_tmr_var(); 1706 struct timerlat_sample s; 1707 struct sched_param sp; 1708 u64 now, diff; 1709 1710 /* 1711 * Make the thread RT, that is how cyclictest is usually used. 1712 */ 1713 sp.sched_priority = DEFAULT_TIMERLAT_PRIO; 1714 sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); 1715 1716 tlat->count = 0; 1717 tlat->tracing_thread = false; 1718 1719 hrtimer_init(&tlat->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD); 1720 tlat->timer.function = timerlat_irq; 1721 tlat->kthread = current; 1722 osn_var->pid = current->pid; 1723 /* 1724 * Anotate the arrival time. 1725 */ 1726 tlat->abs_period = hrtimer_cb_get_time(&tlat->timer); 1727 1728 wait_next_period(tlat); 1729 1730 osn_var->sampling = 1; 1731 1732 while (!kthread_should_stop()) { 1733 now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer)); 1734 diff = now - tlat->abs_period; 1735 1736 s.seqnum = tlat->count; 1737 s.timer_latency = diff; 1738 s.context = THREAD_CONTEXT; 1739 1740 trace_timerlat_sample(&s); 1741 1742 timerlat_dump_stack(time_to_us(diff)); 1743 1744 tlat->tracing_thread = false; 1745 if (osnoise_data.stop_tracing_total) 1746 if (time_to_us(diff) >= osnoise_data.stop_tracing_total) 1747 osnoise_stop_tracing(); 1748 1749 wait_next_period(tlat); 1750 } 1751 1752 hrtimer_cancel(&tlat->timer); 1753 return 0; 1754 } 1755 #else /* CONFIG_TIMERLAT_TRACER */ 1756 static int timerlat_main(void *data) 1757 { 1758 return 0; 1759 } 1760 #endif /* CONFIG_TIMERLAT_TRACER */ 1761 1762 /* 1763 * stop_kthread - stop a workload thread 1764 */ 1765 static void stop_kthread(unsigned int cpu) 1766 { 1767 struct task_struct *kthread; 1768 1769 kthread = per_cpu(per_cpu_osnoise_var, cpu).kthread; 1770 if (kthread) { 1771 kthread_stop(kthread); 1772 per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL; 1773 } else { 1774 if (!test_bit(OSN_WORKLOAD, &osnoise_options)) { 1775 per_cpu(per_cpu_osnoise_var, cpu).sampling = false; 1776 barrier(); 1777 return; 1778 } 1779 } 1780 } 1781 1782 /* 1783 * stop_per_cpu_kthread - Stop per-cpu threads 1784 * 1785 * Stop the osnoise sampling htread. Use this on unload and at system 1786 * shutdown. 1787 */ 1788 static void stop_per_cpu_kthreads(void) 1789 { 1790 int cpu; 1791 1792 cpus_read_lock(); 1793 1794 for_each_online_cpu(cpu) 1795 stop_kthread(cpu); 1796 1797 cpus_read_unlock(); 1798 } 1799 1800 /* 1801 * start_kthread - Start a workload tread 1802 */ 1803 static int start_kthread(unsigned int cpu) 1804 { 1805 struct task_struct *kthread; 1806 void *main = osnoise_main; 1807 char comm[24]; 1808 1809 if (timerlat_enabled()) { 1810 snprintf(comm, 24, "timerlat/%d", cpu); 1811 main = timerlat_main; 1812 } else { 1813 /* if no workload, just return */ 1814 if (!test_bit(OSN_WORKLOAD, &osnoise_options)) { 1815 per_cpu(per_cpu_osnoise_var, cpu).sampling = true; 1816 barrier(); 1817 return 0; 1818 } 1819 1820 snprintf(comm, 24, "osnoise/%d", cpu); 1821 } 1822 1823 kthread = kthread_run_on_cpu(main, NULL, cpu, comm); 1824 1825 if (IS_ERR(kthread)) { 1826 pr_err(BANNER "could not start sampling thread\n"); 1827 stop_per_cpu_kthreads(); 1828 return -ENOMEM; 1829 } 1830 1831 per_cpu(per_cpu_osnoise_var, cpu).kthread = kthread; 1832 1833 return 0; 1834 } 1835 1836 /* 1837 * start_per_cpu_kthread - Kick off per-cpu osnoise sampling kthreads 1838 * 1839 * This starts the kernel thread that will look for osnoise on many 1840 * cpus. 1841 */ 1842 static int start_per_cpu_kthreads(void) 1843 { 1844 struct cpumask *current_mask = &save_cpumask; 1845 int retval = 0; 1846 int cpu; 1847 1848 cpus_read_lock(); 1849 /* 1850 * Run only on online CPUs in which osnoise is allowed to run. 1851 */ 1852 cpumask_and(current_mask, cpu_online_mask, &osnoise_cpumask); 1853 1854 for_each_possible_cpu(cpu) 1855 per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL; 1856 1857 for_each_cpu(cpu, current_mask) { 1858 retval = start_kthread(cpu); 1859 if (retval) { 1860 cpus_read_unlock(); 1861 stop_per_cpu_kthreads(); 1862 return retval; 1863 } 1864 } 1865 1866 cpus_read_unlock(); 1867 1868 return retval; 1869 } 1870 1871 #ifdef CONFIG_HOTPLUG_CPU 1872 static void osnoise_hotplug_workfn(struct work_struct *dummy) 1873 { 1874 unsigned int cpu = smp_processor_id(); 1875 1876 mutex_lock(&trace_types_lock); 1877 1878 if (!osnoise_has_registered_instances()) 1879 goto out_unlock_trace; 1880 1881 mutex_lock(&interface_lock); 1882 cpus_read_lock(); 1883 1884 if (!cpumask_test_cpu(cpu, &osnoise_cpumask)) 1885 goto out_unlock; 1886 1887 start_kthread(cpu); 1888 1889 out_unlock: 1890 cpus_read_unlock(); 1891 mutex_unlock(&interface_lock); 1892 out_unlock_trace: 1893 mutex_unlock(&trace_types_lock); 1894 } 1895 1896 static DECLARE_WORK(osnoise_hotplug_work, osnoise_hotplug_workfn); 1897 1898 /* 1899 * osnoise_cpu_init - CPU hotplug online callback function 1900 */ 1901 static int osnoise_cpu_init(unsigned int cpu) 1902 { 1903 schedule_work_on(cpu, &osnoise_hotplug_work); 1904 return 0; 1905 } 1906 1907 /* 1908 * osnoise_cpu_die - CPU hotplug offline callback function 1909 */ 1910 static int osnoise_cpu_die(unsigned int cpu) 1911 { 1912 stop_kthread(cpu); 1913 return 0; 1914 } 1915 1916 static void osnoise_init_hotplug_support(void) 1917 { 1918 int ret; 1919 1920 ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "trace/osnoise:online", 1921 osnoise_cpu_init, osnoise_cpu_die); 1922 if (ret < 0) 1923 pr_warn(BANNER "Error to init cpu hotplug support\n"); 1924 1925 return; 1926 } 1927 #else /* CONFIG_HOTPLUG_CPU */ 1928 static void osnoise_init_hotplug_support(void) 1929 { 1930 return; 1931 } 1932 #endif /* CONFIG_HOTPLUG_CPU */ 1933 1934 /* 1935 * seq file functions for the osnoise/options file. 1936 */ 1937 static void *s_options_start(struct seq_file *s, loff_t *pos) 1938 { 1939 int option = *pos; 1940 1941 mutex_lock(&interface_lock); 1942 1943 if (option >= OSN_MAX) 1944 return NULL; 1945 1946 return pos; 1947 } 1948 1949 static void *s_options_next(struct seq_file *s, void *v, loff_t *pos) 1950 { 1951 int option = ++(*pos); 1952 1953 if (option >= OSN_MAX) 1954 return NULL; 1955 1956 return pos; 1957 } 1958 1959 static int s_options_show(struct seq_file *s, void *v) 1960 { 1961 loff_t *pos = v; 1962 int option = *pos; 1963 1964 if (option == OSN_DEFAULTS) { 1965 if (osnoise_options == OSN_DEFAULT_OPTIONS) 1966 seq_printf(s, "%s", osnoise_options_str[option]); 1967 else 1968 seq_printf(s, "NO_%s", osnoise_options_str[option]); 1969 goto out; 1970 } 1971 1972 if (test_bit(option, &osnoise_options)) 1973 seq_printf(s, "%s", osnoise_options_str[option]); 1974 else 1975 seq_printf(s, "NO_%s", osnoise_options_str[option]); 1976 1977 out: 1978 if (option != OSN_MAX) 1979 seq_puts(s, " "); 1980 1981 return 0; 1982 } 1983 1984 static void s_options_stop(struct seq_file *s, void *v) 1985 { 1986 seq_puts(s, "\n"); 1987 mutex_unlock(&interface_lock); 1988 } 1989 1990 static const struct seq_operations osnoise_options_seq_ops = { 1991 .start = s_options_start, 1992 .next = s_options_next, 1993 .show = s_options_show, 1994 .stop = s_options_stop 1995 }; 1996 1997 static int osnoise_options_open(struct inode *inode, struct file *file) 1998 { 1999 return seq_open(file, &osnoise_options_seq_ops); 2000 }; 2001 2002 /** 2003 * osnoise_options_write - Write function for "options" entry 2004 * @filp: The active open file structure 2005 * @ubuf: The user buffer that contains the value to write 2006 * @cnt: The maximum number of bytes to write to "file" 2007 * @ppos: The current position in @file 2008 * 2009 * Writing the option name sets the option, writing the "NO_" 2010 * prefix in front of the option name disables it. 2011 * 2012 * Writing "DEFAULTS" resets the option values to the default ones. 2013 */ 2014 static ssize_t osnoise_options_write(struct file *filp, const char __user *ubuf, 2015 size_t cnt, loff_t *ppos) 2016 { 2017 int running, option, enable, retval; 2018 char buf[256], *option_str; 2019 2020 if (cnt >= 256) 2021 return -EINVAL; 2022 2023 if (copy_from_user(buf, ubuf, cnt)) 2024 return -EFAULT; 2025 2026 buf[cnt] = 0; 2027 2028 if (strncmp(buf, "NO_", 3)) { 2029 option_str = strstrip(buf); 2030 enable = true; 2031 } else { 2032 option_str = strstrip(&buf[3]); 2033 enable = false; 2034 } 2035 2036 option = match_string(osnoise_options_str, OSN_MAX, option_str); 2037 if (option < 0) 2038 return -EINVAL; 2039 2040 /* 2041 * trace_types_lock is taken to avoid concurrency on start/stop. 2042 */ 2043 mutex_lock(&trace_types_lock); 2044 running = osnoise_has_registered_instances(); 2045 if (running) 2046 stop_per_cpu_kthreads(); 2047 2048 mutex_lock(&interface_lock); 2049 /* 2050 * avoid CPU hotplug operations that might read options. 2051 */ 2052 cpus_read_lock(); 2053 2054 retval = cnt; 2055 2056 if (enable) { 2057 if (option == OSN_DEFAULTS) 2058 osnoise_options = OSN_DEFAULT_OPTIONS; 2059 else 2060 set_bit(option, &osnoise_options); 2061 } else { 2062 if (option == OSN_DEFAULTS) 2063 retval = -EINVAL; 2064 else 2065 clear_bit(option, &osnoise_options); 2066 } 2067 2068 cpus_read_unlock(); 2069 mutex_unlock(&interface_lock); 2070 2071 if (running) 2072 start_per_cpu_kthreads(); 2073 mutex_unlock(&trace_types_lock); 2074 2075 return retval; 2076 } 2077 2078 /* 2079 * osnoise_cpus_read - Read function for reading the "cpus" file 2080 * @filp: The active open file structure 2081 * @ubuf: The userspace provided buffer to read value into 2082 * @cnt: The maximum number of bytes to read 2083 * @ppos: The current "file" position 2084 * 2085 * Prints the "cpus" output into the user-provided buffer. 2086 */ 2087 static ssize_t 2088 osnoise_cpus_read(struct file *filp, char __user *ubuf, size_t count, 2089 loff_t *ppos) 2090 { 2091 char *mask_str; 2092 int len; 2093 2094 mutex_lock(&interface_lock); 2095 2096 len = snprintf(NULL, 0, "%*pbl\n", cpumask_pr_args(&osnoise_cpumask)) + 1; 2097 mask_str = kmalloc(len, GFP_KERNEL); 2098 if (!mask_str) { 2099 count = -ENOMEM; 2100 goto out_unlock; 2101 } 2102 2103 len = snprintf(mask_str, len, "%*pbl\n", cpumask_pr_args(&osnoise_cpumask)); 2104 if (len >= count) { 2105 count = -EINVAL; 2106 goto out_free; 2107 } 2108 2109 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len); 2110 2111 out_free: 2112 kfree(mask_str); 2113 out_unlock: 2114 mutex_unlock(&interface_lock); 2115 2116 return count; 2117 } 2118 2119 /* 2120 * osnoise_cpus_write - Write function for "cpus" entry 2121 * @filp: The active open file structure 2122 * @ubuf: The user buffer that contains the value to write 2123 * @cnt: The maximum number of bytes to write to "file" 2124 * @ppos: The current position in @file 2125 * 2126 * This function provides a write implementation for the "cpus" 2127 * interface to the osnoise trace. By default, it lists all CPUs, 2128 * in this way, allowing osnoise threads to run on any online CPU 2129 * of the system. It serves to restrict the execution of osnoise to the 2130 * set of CPUs writing via this interface. Why not use "tracing_cpumask"? 2131 * Because the user might be interested in tracing what is running on 2132 * other CPUs. For instance, one might run osnoise in one HT CPU 2133 * while observing what is running on the sibling HT CPU. 2134 */ 2135 static ssize_t 2136 osnoise_cpus_write(struct file *filp, const char __user *ubuf, size_t count, 2137 loff_t *ppos) 2138 { 2139 cpumask_var_t osnoise_cpumask_new; 2140 int running, err; 2141 char buf[256]; 2142 2143 if (count >= 256) 2144 return -EINVAL; 2145 2146 if (copy_from_user(buf, ubuf, count)) 2147 return -EFAULT; 2148 2149 if (!zalloc_cpumask_var(&osnoise_cpumask_new, GFP_KERNEL)) 2150 return -ENOMEM; 2151 2152 err = cpulist_parse(buf, osnoise_cpumask_new); 2153 if (err) 2154 goto err_free; 2155 2156 /* 2157 * trace_types_lock is taken to avoid concurrency on start/stop. 2158 */ 2159 mutex_lock(&trace_types_lock); 2160 running = osnoise_has_registered_instances(); 2161 if (running) 2162 stop_per_cpu_kthreads(); 2163 2164 mutex_lock(&interface_lock); 2165 /* 2166 * osnoise_cpumask is read by CPU hotplug operations. 2167 */ 2168 cpus_read_lock(); 2169 2170 cpumask_copy(&osnoise_cpumask, osnoise_cpumask_new); 2171 2172 cpus_read_unlock(); 2173 mutex_unlock(&interface_lock); 2174 2175 if (running) 2176 start_per_cpu_kthreads(); 2177 mutex_unlock(&trace_types_lock); 2178 2179 free_cpumask_var(osnoise_cpumask_new); 2180 return count; 2181 2182 err_free: 2183 free_cpumask_var(osnoise_cpumask_new); 2184 2185 return err; 2186 } 2187 2188 /* 2189 * osnoise/runtime_us: cannot be greater than the period. 2190 */ 2191 static struct trace_min_max_param osnoise_runtime = { 2192 .lock = &interface_lock, 2193 .val = &osnoise_data.sample_runtime, 2194 .max = &osnoise_data.sample_period, 2195 .min = NULL, 2196 }; 2197 2198 /* 2199 * osnoise/period_us: cannot be smaller than the runtime. 2200 */ 2201 static struct trace_min_max_param osnoise_period = { 2202 .lock = &interface_lock, 2203 .val = &osnoise_data.sample_period, 2204 .max = NULL, 2205 .min = &osnoise_data.sample_runtime, 2206 }; 2207 2208 /* 2209 * osnoise/stop_tracing_us: no limit. 2210 */ 2211 static struct trace_min_max_param osnoise_stop_tracing_in = { 2212 .lock = &interface_lock, 2213 .val = &osnoise_data.stop_tracing, 2214 .max = NULL, 2215 .min = NULL, 2216 }; 2217 2218 /* 2219 * osnoise/stop_tracing_total_us: no limit. 2220 */ 2221 static struct trace_min_max_param osnoise_stop_tracing_total = { 2222 .lock = &interface_lock, 2223 .val = &osnoise_data.stop_tracing_total, 2224 .max = NULL, 2225 .min = NULL, 2226 }; 2227 2228 #ifdef CONFIG_TIMERLAT_TRACER 2229 /* 2230 * osnoise/print_stack: print the stacktrace of the IRQ handler if the total 2231 * latency is higher than val. 2232 */ 2233 static struct trace_min_max_param osnoise_print_stack = { 2234 .lock = &interface_lock, 2235 .val = &osnoise_data.print_stack, 2236 .max = NULL, 2237 .min = NULL, 2238 }; 2239 2240 /* 2241 * osnoise/timerlat_period: min 100 us, max 1 s 2242 */ 2243 u64 timerlat_min_period = 100; 2244 u64 timerlat_max_period = 1000000; 2245 static struct trace_min_max_param timerlat_period = { 2246 .lock = &interface_lock, 2247 .val = &osnoise_data.timerlat_period, 2248 .max = &timerlat_max_period, 2249 .min = &timerlat_min_period, 2250 }; 2251 #endif 2252 2253 static const struct file_operations cpus_fops = { 2254 .open = tracing_open_generic, 2255 .read = osnoise_cpus_read, 2256 .write = osnoise_cpus_write, 2257 .llseek = generic_file_llseek, 2258 }; 2259 2260 static const struct file_operations osnoise_options_fops = { 2261 .open = osnoise_options_open, 2262 .read = seq_read, 2263 .llseek = seq_lseek, 2264 .release = seq_release, 2265 .write = osnoise_options_write 2266 }; 2267 2268 #ifdef CONFIG_TIMERLAT_TRACER 2269 #ifdef CONFIG_STACKTRACE 2270 static int init_timerlat_stack_tracefs(struct dentry *top_dir) 2271 { 2272 struct dentry *tmp; 2273 2274 tmp = tracefs_create_file("print_stack", TRACE_MODE_WRITE, top_dir, 2275 &osnoise_print_stack, &trace_min_max_fops); 2276 if (!tmp) 2277 return -ENOMEM; 2278 2279 return 0; 2280 } 2281 #else /* CONFIG_STACKTRACE */ 2282 static int init_timerlat_stack_tracefs(struct dentry *top_dir) 2283 { 2284 return 0; 2285 } 2286 #endif /* CONFIG_STACKTRACE */ 2287 2288 /* 2289 * init_timerlat_tracefs - A function to initialize the timerlat interface files 2290 */ 2291 static int init_timerlat_tracefs(struct dentry *top_dir) 2292 { 2293 struct dentry *tmp; 2294 2295 tmp = tracefs_create_file("timerlat_period_us", TRACE_MODE_WRITE, top_dir, 2296 &timerlat_period, &trace_min_max_fops); 2297 if (!tmp) 2298 return -ENOMEM; 2299 2300 return init_timerlat_stack_tracefs(top_dir); 2301 } 2302 #else /* CONFIG_TIMERLAT_TRACER */ 2303 static int init_timerlat_tracefs(struct dentry *top_dir) 2304 { 2305 return 0; 2306 } 2307 #endif /* CONFIG_TIMERLAT_TRACER */ 2308 2309 /* 2310 * init_tracefs - A function to initialize the tracefs interface files 2311 * 2312 * This function creates entries in tracefs for "osnoise" and "timerlat". 2313 * It creates these directories in the tracing directory, and within that 2314 * directory the use can change and view the configs. 2315 */ 2316 static int init_tracefs(void) 2317 { 2318 struct dentry *top_dir; 2319 struct dentry *tmp; 2320 int ret; 2321 2322 ret = tracing_init_dentry(); 2323 if (ret) 2324 return -ENOMEM; 2325 2326 top_dir = tracefs_create_dir("osnoise", NULL); 2327 if (!top_dir) 2328 return 0; 2329 2330 tmp = tracefs_create_file("period_us", TRACE_MODE_WRITE, top_dir, 2331 &osnoise_period, &trace_min_max_fops); 2332 if (!tmp) 2333 goto err; 2334 2335 tmp = tracefs_create_file("runtime_us", TRACE_MODE_WRITE, top_dir, 2336 &osnoise_runtime, &trace_min_max_fops); 2337 if (!tmp) 2338 goto err; 2339 2340 tmp = tracefs_create_file("stop_tracing_us", TRACE_MODE_WRITE, top_dir, 2341 &osnoise_stop_tracing_in, &trace_min_max_fops); 2342 if (!tmp) 2343 goto err; 2344 2345 tmp = tracefs_create_file("stop_tracing_total_us", TRACE_MODE_WRITE, top_dir, 2346 &osnoise_stop_tracing_total, &trace_min_max_fops); 2347 if (!tmp) 2348 goto err; 2349 2350 tmp = trace_create_file("cpus", TRACE_MODE_WRITE, top_dir, NULL, &cpus_fops); 2351 if (!tmp) 2352 goto err; 2353 2354 tmp = trace_create_file("options", TRACE_MODE_WRITE, top_dir, NULL, 2355 &osnoise_options_fops); 2356 if (!tmp) 2357 goto err; 2358 2359 ret = init_timerlat_tracefs(top_dir); 2360 if (ret) 2361 goto err; 2362 2363 return 0; 2364 2365 err: 2366 tracefs_remove(top_dir); 2367 return -ENOMEM; 2368 } 2369 2370 static int osnoise_hook_events(void) 2371 { 2372 int retval; 2373 2374 /* 2375 * Trace is already hooked, we are re-enabling from 2376 * a stop_tracing_*. 2377 */ 2378 if (trace_osnoise_callback_enabled) 2379 return 0; 2380 2381 retval = hook_irq_events(); 2382 if (retval) 2383 return -EINVAL; 2384 2385 retval = hook_softirq_events(); 2386 if (retval) 2387 goto out_unhook_irq; 2388 2389 retval = hook_thread_events(); 2390 /* 2391 * All fine! 2392 */ 2393 if (!retval) 2394 return 0; 2395 2396 unhook_softirq_events(); 2397 out_unhook_irq: 2398 unhook_irq_events(); 2399 return -EINVAL; 2400 } 2401 2402 static void osnoise_unhook_events(void) 2403 { 2404 unhook_thread_events(); 2405 unhook_softirq_events(); 2406 unhook_irq_events(); 2407 } 2408 2409 /* 2410 * osnoise_workload_start - start the workload and hook to events 2411 */ 2412 static int osnoise_workload_start(void) 2413 { 2414 int retval; 2415 2416 /* 2417 * Instances need to be registered after calling workload 2418 * start. Hence, if there is already an instance, the 2419 * workload was already registered. Otherwise, this 2420 * code is on the way to register the first instance, 2421 * and the workload will start. 2422 */ 2423 if (osnoise_has_registered_instances()) 2424 return 0; 2425 2426 osn_var_reset_all(); 2427 2428 retval = osnoise_hook_events(); 2429 if (retval) 2430 return retval; 2431 2432 /* 2433 * Make sure that ftrace_nmi_enter/exit() see reset values 2434 * before enabling trace_osnoise_callback_enabled. 2435 */ 2436 barrier(); 2437 trace_osnoise_callback_enabled = true; 2438 2439 retval = start_per_cpu_kthreads(); 2440 if (retval) { 2441 trace_osnoise_callback_enabled = false; 2442 /* 2443 * Make sure that ftrace_nmi_enter/exit() see 2444 * trace_osnoise_callback_enabled as false before continuing. 2445 */ 2446 barrier(); 2447 2448 osnoise_unhook_events(); 2449 return retval; 2450 } 2451 2452 return 0; 2453 } 2454 2455 /* 2456 * osnoise_workload_stop - stop the workload and unhook the events 2457 */ 2458 static void osnoise_workload_stop(void) 2459 { 2460 /* 2461 * Instances need to be unregistered before calling 2462 * stop. Hence, if there is a registered instance, more 2463 * than one instance is running, and the workload will not 2464 * yet stop. Otherwise, this code is on the way to disable 2465 * the last instance, and the workload can stop. 2466 */ 2467 if (osnoise_has_registered_instances()) 2468 return; 2469 2470 /* 2471 * If callbacks were already disabled in a previous stop 2472 * call, there is no need to disable then again. 2473 * 2474 * For instance, this happens when tracing is stopped via: 2475 * echo 0 > tracing_on 2476 * echo nop > current_tracer. 2477 */ 2478 if (!trace_osnoise_callback_enabled) 2479 return; 2480 2481 trace_osnoise_callback_enabled = false; 2482 /* 2483 * Make sure that ftrace_nmi_enter/exit() see 2484 * trace_osnoise_callback_enabled as false before continuing. 2485 */ 2486 barrier(); 2487 2488 stop_per_cpu_kthreads(); 2489 2490 osnoise_unhook_events(); 2491 } 2492 2493 static void osnoise_tracer_start(struct trace_array *tr) 2494 { 2495 int retval; 2496 2497 /* 2498 * If the instance is already registered, there is no need to 2499 * register it again. 2500 */ 2501 if (osnoise_instance_registered(tr)) 2502 return; 2503 2504 retval = osnoise_workload_start(); 2505 if (retval) 2506 pr_err(BANNER "Error starting osnoise tracer\n"); 2507 2508 osnoise_register_instance(tr); 2509 } 2510 2511 static void osnoise_tracer_stop(struct trace_array *tr) 2512 { 2513 osnoise_unregister_instance(tr); 2514 osnoise_workload_stop(); 2515 } 2516 2517 static int osnoise_tracer_init(struct trace_array *tr) 2518 { 2519 /* 2520 * Only allow osnoise tracer if timerlat tracer is not running 2521 * already. 2522 */ 2523 if (timerlat_enabled()) 2524 return -EBUSY; 2525 2526 tr->max_latency = 0; 2527 2528 osnoise_tracer_start(tr); 2529 return 0; 2530 } 2531 2532 static void osnoise_tracer_reset(struct trace_array *tr) 2533 { 2534 osnoise_tracer_stop(tr); 2535 } 2536 2537 static struct tracer osnoise_tracer __read_mostly = { 2538 .name = "osnoise", 2539 .init = osnoise_tracer_init, 2540 .reset = osnoise_tracer_reset, 2541 .start = osnoise_tracer_start, 2542 .stop = osnoise_tracer_stop, 2543 .print_header = print_osnoise_headers, 2544 .allow_instances = true, 2545 }; 2546 2547 #ifdef CONFIG_TIMERLAT_TRACER 2548 static void timerlat_tracer_start(struct trace_array *tr) 2549 { 2550 int retval; 2551 2552 /* 2553 * If the instance is already registered, there is no need to 2554 * register it again. 2555 */ 2556 if (osnoise_instance_registered(tr)) 2557 return; 2558 2559 retval = osnoise_workload_start(); 2560 if (retval) 2561 pr_err(BANNER "Error starting timerlat tracer\n"); 2562 2563 osnoise_register_instance(tr); 2564 2565 return; 2566 } 2567 2568 static void timerlat_tracer_stop(struct trace_array *tr) 2569 { 2570 int cpu; 2571 2572 osnoise_unregister_instance(tr); 2573 2574 /* 2575 * Instruct the threads to stop only if this is the last instance. 2576 */ 2577 if (!osnoise_has_registered_instances()) { 2578 for_each_online_cpu(cpu) 2579 per_cpu(per_cpu_osnoise_var, cpu).sampling = 0; 2580 } 2581 2582 osnoise_workload_stop(); 2583 } 2584 2585 static int timerlat_tracer_init(struct trace_array *tr) 2586 { 2587 /* 2588 * Only allow timerlat tracer if osnoise tracer is not running already. 2589 */ 2590 if (osnoise_has_registered_instances() && !osnoise_data.timerlat_tracer) 2591 return -EBUSY; 2592 2593 /* 2594 * If this is the first instance, set timerlat_tracer to block 2595 * osnoise tracer start. 2596 */ 2597 if (!osnoise_has_registered_instances()) 2598 osnoise_data.timerlat_tracer = 1; 2599 2600 tr->max_latency = 0; 2601 timerlat_tracer_start(tr); 2602 2603 return 0; 2604 } 2605 2606 static void timerlat_tracer_reset(struct trace_array *tr) 2607 { 2608 timerlat_tracer_stop(tr); 2609 2610 /* 2611 * If this is the last instance, reset timerlat_tracer allowing 2612 * osnoise to be started. 2613 */ 2614 if (!osnoise_has_registered_instances()) 2615 osnoise_data.timerlat_tracer = 0; 2616 } 2617 2618 static struct tracer timerlat_tracer __read_mostly = { 2619 .name = "timerlat", 2620 .init = timerlat_tracer_init, 2621 .reset = timerlat_tracer_reset, 2622 .start = timerlat_tracer_start, 2623 .stop = timerlat_tracer_stop, 2624 .print_header = print_timerlat_headers, 2625 .allow_instances = true, 2626 }; 2627 2628 __init static int init_timerlat_tracer(void) 2629 { 2630 return register_tracer(&timerlat_tracer); 2631 } 2632 #else /* CONFIG_TIMERLAT_TRACER */ 2633 __init static int init_timerlat_tracer(void) 2634 { 2635 return 0; 2636 } 2637 #endif /* CONFIG_TIMERLAT_TRACER */ 2638 2639 __init static int init_osnoise_tracer(void) 2640 { 2641 int ret; 2642 2643 mutex_init(&interface_lock); 2644 2645 cpumask_copy(&osnoise_cpumask, cpu_all_mask); 2646 2647 ret = register_tracer(&osnoise_tracer); 2648 if (ret) { 2649 pr_err(BANNER "Error registering osnoise!\n"); 2650 return ret; 2651 } 2652 2653 ret = init_timerlat_tracer(); 2654 if (ret) { 2655 pr_err(BANNER "Error registering timerlat!\n"); 2656 return ret; 2657 } 2658 2659 osnoise_init_hotplug_support(); 2660 2661 INIT_LIST_HEAD_RCU(&osnoise_instances); 2662 2663 init_tracefs(); 2664 2665 return 0; 2666 } 2667 late_initcall(init_osnoise_tracer); 2668