1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * OS Noise Tracer: computes the OS Noise suffered by a running thread. 4 * Timerlat Tracer: measures the wakeup latency of a timer triggered IRQ and thread. 5 * 6 * Based on "hwlat_detector" tracer by: 7 * Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. <jcm@redhat.com> 8 * Copyright (C) 2013-2016 Steven Rostedt, Red Hat, Inc. <srostedt@redhat.com> 9 * With feedback from Clark Williams <williams@redhat.com> 10 * 11 * And also based on the rtsl tracer presented on: 12 * DE OLIVEIRA, Daniel Bristot, et al. Demystifying the real-time linux 13 * scheduling latency. In: 32nd Euromicro Conference on Real-Time Systems 14 * (ECRTS 2020). Schloss Dagstuhl-Leibniz-Zentrum fur Informatik, 2020. 15 * 16 * Copyright (C) 2021 Daniel Bristot de Oliveira, Red Hat, Inc. <bristot@redhat.com> 17 */ 18 19 #include <linux/kthread.h> 20 #include <linux/tracefs.h> 21 #include <linux/uaccess.h> 22 #include <linux/cpumask.h> 23 #include <linux/delay.h> 24 #include <linux/sched/clock.h> 25 #include <uapi/linux/sched/types.h> 26 #include <linux/sched.h> 27 #include "trace.h" 28 29 #ifdef CONFIG_X86_LOCAL_APIC 30 #include <asm/trace/irq_vectors.h> 31 #undef TRACE_INCLUDE_PATH 32 #undef TRACE_INCLUDE_FILE 33 #endif /* CONFIG_X86_LOCAL_APIC */ 34 35 #include <trace/events/irq.h> 36 #include <trace/events/sched.h> 37 38 #define CREATE_TRACE_POINTS 39 #include <trace/events/osnoise.h> 40 41 /* 42 * Default values. 43 */ 44 #define BANNER "osnoise: " 45 #define DEFAULT_SAMPLE_PERIOD 1000000 /* 1s */ 46 #define DEFAULT_SAMPLE_RUNTIME 1000000 /* 1s */ 47 48 #define DEFAULT_TIMERLAT_PERIOD 1000 /* 1ms */ 49 #define DEFAULT_TIMERLAT_PRIO 95 /* FIFO 95 */ 50 51 /* 52 * osnoise/options entries. 53 */ 54 enum osnoise_options_index { 55 OSN_DEFAULTS = 0, 56 OSN_WORKLOAD, 57 OSN_PANIC_ON_STOP, 58 OSN_PREEMPT_DISABLE, 59 OSN_IRQ_DISABLE, 60 OSN_MAX 61 }; 62 63 static const char * const osnoise_options_str[OSN_MAX] = { 64 "DEFAULTS", 65 "OSNOISE_WORKLOAD", 66 "PANIC_ON_STOP", 67 "OSNOISE_PREEMPT_DISABLE", 68 "OSNOISE_IRQ_DISABLE" }; 69 70 #define OSN_DEFAULT_OPTIONS 0x2 71 static unsigned long osnoise_options = OSN_DEFAULT_OPTIONS; 72 73 /* 74 * trace_array of the enabled osnoise/timerlat instances. 75 */ 76 struct osnoise_instance { 77 struct list_head list; 78 struct trace_array *tr; 79 }; 80 81 static struct list_head osnoise_instances; 82 83 static bool osnoise_has_registered_instances(void) 84 { 85 return !!list_first_or_null_rcu(&osnoise_instances, 86 struct osnoise_instance, 87 list); 88 } 89 90 /* 91 * osnoise_instance_registered - check if a tr is already registered 92 */ 93 static int osnoise_instance_registered(struct trace_array *tr) 94 { 95 struct osnoise_instance *inst; 96 int found = 0; 97 98 rcu_read_lock(); 99 list_for_each_entry_rcu(inst, &osnoise_instances, list) { 100 if (inst->tr == tr) 101 found = 1; 102 } 103 rcu_read_unlock(); 104 105 return found; 106 } 107 108 /* 109 * osnoise_register_instance - register a new trace instance 110 * 111 * Register a trace_array *tr in the list of instances running 112 * osnoise/timerlat tracers. 113 */ 114 static int osnoise_register_instance(struct trace_array *tr) 115 { 116 struct osnoise_instance *inst; 117 118 /* 119 * register/unregister serialization is provided by trace's 120 * trace_types_lock. 121 */ 122 lockdep_assert_held(&trace_types_lock); 123 124 inst = kmalloc(sizeof(*inst), GFP_KERNEL); 125 if (!inst) 126 return -ENOMEM; 127 128 INIT_LIST_HEAD_RCU(&inst->list); 129 inst->tr = tr; 130 list_add_tail_rcu(&inst->list, &osnoise_instances); 131 132 return 0; 133 } 134 135 /* 136 * osnoise_unregister_instance - unregister a registered trace instance 137 * 138 * Remove the trace_array *tr from the list of instances running 139 * osnoise/timerlat tracers. 140 */ 141 static void osnoise_unregister_instance(struct trace_array *tr) 142 { 143 struct osnoise_instance *inst; 144 int found = 0; 145 146 /* 147 * register/unregister serialization is provided by trace's 148 * trace_types_lock. 149 */ 150 list_for_each_entry_rcu(inst, &osnoise_instances, list, 151 lockdep_is_held(&trace_types_lock)) { 152 if (inst->tr == tr) { 153 list_del_rcu(&inst->list); 154 found = 1; 155 break; 156 } 157 } 158 159 if (!found) 160 return; 161 162 kvfree_rcu(inst); 163 } 164 165 /* 166 * NMI runtime info. 167 */ 168 struct osn_nmi { 169 u64 count; 170 u64 delta_start; 171 }; 172 173 /* 174 * IRQ runtime info. 175 */ 176 struct osn_irq { 177 u64 count; 178 u64 arrival_time; 179 u64 delta_start; 180 }; 181 182 #define IRQ_CONTEXT 0 183 #define THREAD_CONTEXT 1 184 /* 185 * sofirq runtime info. 186 */ 187 struct osn_softirq { 188 u64 count; 189 u64 arrival_time; 190 u64 delta_start; 191 }; 192 193 /* 194 * thread runtime info. 195 */ 196 struct osn_thread { 197 u64 count; 198 u64 arrival_time; 199 u64 delta_start; 200 }; 201 202 /* 203 * Runtime information: this structure saves the runtime information used by 204 * one sampling thread. 205 */ 206 struct osnoise_variables { 207 struct task_struct *kthread; 208 bool sampling; 209 pid_t pid; 210 struct osn_nmi nmi; 211 struct osn_irq irq; 212 struct osn_softirq softirq; 213 struct osn_thread thread; 214 local_t int_counter; 215 }; 216 217 /* 218 * Per-cpu runtime information. 219 */ 220 DEFINE_PER_CPU(struct osnoise_variables, per_cpu_osnoise_var); 221 222 /* 223 * this_cpu_osn_var - Return the per-cpu osnoise_variables on its relative CPU 224 */ 225 static inline struct osnoise_variables *this_cpu_osn_var(void) 226 { 227 return this_cpu_ptr(&per_cpu_osnoise_var); 228 } 229 230 #ifdef CONFIG_TIMERLAT_TRACER 231 /* 232 * Runtime information for the timer mode. 233 */ 234 struct timerlat_variables { 235 struct task_struct *kthread; 236 struct hrtimer timer; 237 u64 rel_period; 238 u64 abs_period; 239 bool tracing_thread; 240 u64 count; 241 }; 242 243 DEFINE_PER_CPU(struct timerlat_variables, per_cpu_timerlat_var); 244 245 /* 246 * this_cpu_tmr_var - Return the per-cpu timerlat_variables on its relative CPU 247 */ 248 static inline struct timerlat_variables *this_cpu_tmr_var(void) 249 { 250 return this_cpu_ptr(&per_cpu_timerlat_var); 251 } 252 253 /* 254 * tlat_var_reset - Reset the values of the given timerlat_variables 255 */ 256 static inline void tlat_var_reset(void) 257 { 258 struct timerlat_variables *tlat_var; 259 int cpu; 260 /* 261 * So far, all the values are initialized as 0, so 262 * zeroing the structure is perfect. 263 */ 264 for_each_cpu(cpu, cpu_online_mask) { 265 tlat_var = per_cpu_ptr(&per_cpu_timerlat_var, cpu); 266 memset(tlat_var, 0, sizeof(*tlat_var)); 267 } 268 } 269 #else /* CONFIG_TIMERLAT_TRACER */ 270 #define tlat_var_reset() do {} while (0) 271 #endif /* CONFIG_TIMERLAT_TRACER */ 272 273 /* 274 * osn_var_reset - Reset the values of the given osnoise_variables 275 */ 276 static inline void osn_var_reset(void) 277 { 278 struct osnoise_variables *osn_var; 279 int cpu; 280 281 /* 282 * So far, all the values are initialized as 0, so 283 * zeroing the structure is perfect. 284 */ 285 for_each_cpu(cpu, cpu_online_mask) { 286 osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu); 287 memset(osn_var, 0, sizeof(*osn_var)); 288 } 289 } 290 291 /* 292 * osn_var_reset_all - Reset the value of all per-cpu osnoise_variables 293 */ 294 static inline void osn_var_reset_all(void) 295 { 296 osn_var_reset(); 297 tlat_var_reset(); 298 } 299 300 /* 301 * Tells NMIs to call back to the osnoise tracer to record timestamps. 302 */ 303 bool trace_osnoise_callback_enabled; 304 305 /* 306 * osnoise sample structure definition. Used to store the statistics of a 307 * sample run. 308 */ 309 struct osnoise_sample { 310 u64 runtime; /* runtime */ 311 u64 noise; /* noise */ 312 u64 max_sample; /* max single noise sample */ 313 int hw_count; /* # HW (incl. hypervisor) interference */ 314 int nmi_count; /* # NMIs during this sample */ 315 int irq_count; /* # IRQs during this sample */ 316 int softirq_count; /* # softirqs during this sample */ 317 int thread_count; /* # threads during this sample */ 318 }; 319 320 #ifdef CONFIG_TIMERLAT_TRACER 321 /* 322 * timerlat sample structure definition. Used to store the statistics of 323 * a sample run. 324 */ 325 struct timerlat_sample { 326 u64 timer_latency; /* timer_latency */ 327 unsigned int seqnum; /* unique sequence */ 328 int context; /* timer context */ 329 }; 330 #endif 331 332 /* 333 * Protect the interface. 334 */ 335 struct mutex interface_lock; 336 337 /* 338 * Tracer data. 339 */ 340 static struct osnoise_data { 341 u64 sample_period; /* total sampling period */ 342 u64 sample_runtime; /* active sampling portion of period */ 343 u64 stop_tracing; /* stop trace in the internal operation (loop/irq) */ 344 u64 stop_tracing_total; /* stop trace in the final operation (report/thread) */ 345 #ifdef CONFIG_TIMERLAT_TRACER 346 u64 timerlat_period; /* timerlat period */ 347 u64 print_stack; /* print IRQ stack if total > */ 348 int timerlat_tracer; /* timerlat tracer */ 349 #endif 350 bool tainted; /* infor users and developers about a problem */ 351 } osnoise_data = { 352 .sample_period = DEFAULT_SAMPLE_PERIOD, 353 .sample_runtime = DEFAULT_SAMPLE_RUNTIME, 354 .stop_tracing = 0, 355 .stop_tracing_total = 0, 356 #ifdef CONFIG_TIMERLAT_TRACER 357 .print_stack = 0, 358 .timerlat_period = DEFAULT_TIMERLAT_PERIOD, 359 .timerlat_tracer = 0, 360 #endif 361 }; 362 363 #ifdef CONFIG_TIMERLAT_TRACER 364 static inline bool timerlat_enabled(void) 365 { 366 return osnoise_data.timerlat_tracer; 367 } 368 369 static inline int timerlat_softirq_exit(struct osnoise_variables *osn_var) 370 { 371 struct timerlat_variables *tlat_var = this_cpu_tmr_var(); 372 /* 373 * If the timerlat is enabled, but the irq handler did 374 * not run yet enabling timerlat_tracer, do not trace. 375 */ 376 if (!tlat_var->tracing_thread) { 377 osn_var->softirq.arrival_time = 0; 378 osn_var->softirq.delta_start = 0; 379 return 0; 380 } 381 return 1; 382 } 383 384 static inline int timerlat_thread_exit(struct osnoise_variables *osn_var) 385 { 386 struct timerlat_variables *tlat_var = this_cpu_tmr_var(); 387 /* 388 * If the timerlat is enabled, but the irq handler did 389 * not run yet enabling timerlat_tracer, do not trace. 390 */ 391 if (!tlat_var->tracing_thread) { 392 osn_var->thread.delta_start = 0; 393 osn_var->thread.arrival_time = 0; 394 return 0; 395 } 396 return 1; 397 } 398 #else /* CONFIG_TIMERLAT_TRACER */ 399 static inline bool timerlat_enabled(void) 400 { 401 return false; 402 } 403 404 static inline int timerlat_softirq_exit(struct osnoise_variables *osn_var) 405 { 406 return 1; 407 } 408 static inline int timerlat_thread_exit(struct osnoise_variables *osn_var) 409 { 410 return 1; 411 } 412 #endif 413 414 #ifdef CONFIG_PREEMPT_RT 415 /* 416 * Print the osnoise header info. 417 */ 418 static void print_osnoise_headers(struct seq_file *s) 419 { 420 if (osnoise_data.tainted) 421 seq_puts(s, "# osnoise is tainted!\n"); 422 423 seq_puts(s, "# _-------=> irqs-off\n"); 424 seq_puts(s, "# / _------=> need-resched\n"); 425 seq_puts(s, "# | / _-----=> need-resched-lazy\n"); 426 seq_puts(s, "# || / _----=> hardirq/softirq\n"); 427 seq_puts(s, "# ||| / _---=> preempt-depth\n"); 428 seq_puts(s, "# |||| / _--=> preempt-lazy-depth\n"); 429 seq_puts(s, "# ||||| / _-=> migrate-disable\n"); 430 431 seq_puts(s, "# |||||| / "); 432 seq_puts(s, " MAX\n"); 433 434 seq_puts(s, "# ||||| / "); 435 seq_puts(s, " SINGLE Interference counters:\n"); 436 437 seq_puts(s, "# ||||||| RUNTIME "); 438 seq_puts(s, " NOISE %% OF CPU NOISE +-----------------------------+\n"); 439 440 seq_puts(s, "# TASK-PID CPU# ||||||| TIMESTAMP IN US "); 441 seq_puts(s, " IN US AVAILABLE IN US HW NMI IRQ SIRQ THREAD\n"); 442 443 seq_puts(s, "# | | | ||||||| | | "); 444 seq_puts(s, " | | | | | | | |\n"); 445 } 446 #else /* CONFIG_PREEMPT_RT */ 447 static void print_osnoise_headers(struct seq_file *s) 448 { 449 if (osnoise_data.tainted) 450 seq_puts(s, "# osnoise is tainted!\n"); 451 452 seq_puts(s, "# _-----=> irqs-off\n"); 453 seq_puts(s, "# / _----=> need-resched\n"); 454 seq_puts(s, "# | / _---=> hardirq/softirq\n"); 455 seq_puts(s, "# || / _--=> preempt-depth\n"); 456 seq_puts(s, "# ||| / _-=> migrate-disable "); 457 seq_puts(s, " MAX\n"); 458 seq_puts(s, "# |||| / delay "); 459 seq_puts(s, " SINGLE Interference counters:\n"); 460 461 seq_puts(s, "# ||||| RUNTIME "); 462 seq_puts(s, " NOISE %% OF CPU NOISE +-----------------------------+\n"); 463 464 seq_puts(s, "# TASK-PID CPU# ||||| TIMESTAMP IN US "); 465 seq_puts(s, " IN US AVAILABLE IN US HW NMI IRQ SIRQ THREAD\n"); 466 467 seq_puts(s, "# | | | ||||| | | "); 468 seq_puts(s, " | | | | | | | |\n"); 469 } 470 #endif /* CONFIG_PREEMPT_RT */ 471 472 /* 473 * osnoise_taint - report an osnoise error. 474 */ 475 #define osnoise_taint(msg) ({ \ 476 struct osnoise_instance *inst; \ 477 struct trace_buffer *buffer; \ 478 \ 479 rcu_read_lock(); \ 480 list_for_each_entry_rcu(inst, &osnoise_instances, list) { \ 481 buffer = inst->tr->array_buffer.buffer; \ 482 trace_array_printk_buf(buffer, _THIS_IP_, msg); \ 483 } \ 484 rcu_read_unlock(); \ 485 osnoise_data.tainted = true; \ 486 }) 487 488 /* 489 * Record an osnoise_sample into the tracer buffer. 490 */ 491 static void 492 __trace_osnoise_sample(struct osnoise_sample *sample, struct trace_buffer *buffer) 493 { 494 struct trace_event_call *call = &event_osnoise; 495 struct ring_buffer_event *event; 496 struct osnoise_entry *entry; 497 498 event = trace_buffer_lock_reserve(buffer, TRACE_OSNOISE, sizeof(*entry), 499 tracing_gen_ctx()); 500 if (!event) 501 return; 502 entry = ring_buffer_event_data(event); 503 entry->runtime = sample->runtime; 504 entry->noise = sample->noise; 505 entry->max_sample = sample->max_sample; 506 entry->hw_count = sample->hw_count; 507 entry->nmi_count = sample->nmi_count; 508 entry->irq_count = sample->irq_count; 509 entry->softirq_count = sample->softirq_count; 510 entry->thread_count = sample->thread_count; 511 512 if (!call_filter_check_discard(call, entry, buffer, event)) 513 trace_buffer_unlock_commit_nostack(buffer, event); 514 } 515 516 /* 517 * Record an osnoise_sample on all osnoise instances. 518 */ 519 static void trace_osnoise_sample(struct osnoise_sample *sample) 520 { 521 struct osnoise_instance *inst; 522 struct trace_buffer *buffer; 523 524 rcu_read_lock(); 525 list_for_each_entry_rcu(inst, &osnoise_instances, list) { 526 buffer = inst->tr->array_buffer.buffer; 527 __trace_osnoise_sample(sample, buffer); 528 } 529 rcu_read_unlock(); 530 } 531 532 #ifdef CONFIG_TIMERLAT_TRACER 533 /* 534 * Print the timerlat header info. 535 */ 536 #ifdef CONFIG_PREEMPT_RT 537 static void print_timerlat_headers(struct seq_file *s) 538 { 539 seq_puts(s, "# _-------=> irqs-off\n"); 540 seq_puts(s, "# / _------=> need-resched\n"); 541 seq_puts(s, "# | / _-----=> need-resched-lazy\n"); 542 seq_puts(s, "# || / _----=> hardirq/softirq\n"); 543 seq_puts(s, "# ||| / _---=> preempt-depth\n"); 544 seq_puts(s, "# |||| / _--=> preempt-lazy-depth\n"); 545 seq_puts(s, "# ||||| / _-=> migrate-disable\n"); 546 seq_puts(s, "# |||||| /\n"); 547 seq_puts(s, "# ||||||| ACTIVATION\n"); 548 seq_puts(s, "# TASK-PID CPU# ||||||| TIMESTAMP ID "); 549 seq_puts(s, " CONTEXT LATENCY\n"); 550 seq_puts(s, "# | | | ||||||| | | "); 551 seq_puts(s, " | |\n"); 552 } 553 #else /* CONFIG_PREEMPT_RT */ 554 static void print_timerlat_headers(struct seq_file *s) 555 { 556 seq_puts(s, "# _-----=> irqs-off\n"); 557 seq_puts(s, "# / _----=> need-resched\n"); 558 seq_puts(s, "# | / _---=> hardirq/softirq\n"); 559 seq_puts(s, "# || / _--=> preempt-depth\n"); 560 seq_puts(s, "# ||| / _-=> migrate-disable\n"); 561 seq_puts(s, "# |||| / delay\n"); 562 seq_puts(s, "# ||||| ACTIVATION\n"); 563 seq_puts(s, "# TASK-PID CPU# ||||| TIMESTAMP ID "); 564 seq_puts(s, " CONTEXT LATENCY\n"); 565 seq_puts(s, "# | | | ||||| | | "); 566 seq_puts(s, " | |\n"); 567 } 568 #endif /* CONFIG_PREEMPT_RT */ 569 570 static void 571 __trace_timerlat_sample(struct timerlat_sample *sample, struct trace_buffer *buffer) 572 { 573 struct trace_event_call *call = &event_osnoise; 574 struct ring_buffer_event *event; 575 struct timerlat_entry *entry; 576 577 event = trace_buffer_lock_reserve(buffer, TRACE_TIMERLAT, sizeof(*entry), 578 tracing_gen_ctx()); 579 if (!event) 580 return; 581 entry = ring_buffer_event_data(event); 582 entry->seqnum = sample->seqnum; 583 entry->context = sample->context; 584 entry->timer_latency = sample->timer_latency; 585 586 if (!call_filter_check_discard(call, entry, buffer, event)) 587 trace_buffer_unlock_commit_nostack(buffer, event); 588 } 589 590 /* 591 * Record an timerlat_sample into the tracer buffer. 592 */ 593 static void trace_timerlat_sample(struct timerlat_sample *sample) 594 { 595 struct osnoise_instance *inst; 596 struct trace_buffer *buffer; 597 598 rcu_read_lock(); 599 list_for_each_entry_rcu(inst, &osnoise_instances, list) { 600 buffer = inst->tr->array_buffer.buffer; 601 __trace_timerlat_sample(sample, buffer); 602 } 603 rcu_read_unlock(); 604 } 605 606 #ifdef CONFIG_STACKTRACE 607 608 #define MAX_CALLS 256 609 610 /* 611 * Stack trace will take place only at IRQ level, so, no need 612 * to control nesting here. 613 */ 614 struct trace_stack { 615 int stack_size; 616 int nr_entries; 617 unsigned long calls[MAX_CALLS]; 618 }; 619 620 static DEFINE_PER_CPU(struct trace_stack, trace_stack); 621 622 /* 623 * timerlat_save_stack - save a stack trace without printing 624 * 625 * Save the current stack trace without printing. The 626 * stack will be printed later, after the end of the measurement. 627 */ 628 static void timerlat_save_stack(int skip) 629 { 630 unsigned int size, nr_entries; 631 struct trace_stack *fstack; 632 633 fstack = this_cpu_ptr(&trace_stack); 634 635 size = ARRAY_SIZE(fstack->calls); 636 637 nr_entries = stack_trace_save(fstack->calls, size, skip); 638 639 fstack->stack_size = nr_entries * sizeof(unsigned long); 640 fstack->nr_entries = nr_entries; 641 642 return; 643 644 } 645 646 static void 647 __timerlat_dump_stack(struct trace_buffer *buffer, struct trace_stack *fstack, unsigned int size) 648 { 649 struct trace_event_call *call = &event_osnoise; 650 struct ring_buffer_event *event; 651 struct stack_entry *entry; 652 653 event = trace_buffer_lock_reserve(buffer, TRACE_STACK, sizeof(*entry) + size, 654 tracing_gen_ctx()); 655 if (!event) 656 return; 657 658 entry = ring_buffer_event_data(event); 659 660 memcpy(&entry->caller, fstack->calls, size); 661 entry->size = fstack->nr_entries; 662 663 if (!call_filter_check_discard(call, entry, buffer, event)) 664 trace_buffer_unlock_commit_nostack(buffer, event); 665 } 666 667 /* 668 * timerlat_dump_stack - dump a stack trace previously saved 669 */ 670 static void timerlat_dump_stack(u64 latency) 671 { 672 struct osnoise_instance *inst; 673 struct trace_buffer *buffer; 674 struct trace_stack *fstack; 675 unsigned int size; 676 677 /* 678 * trace only if latency > print_stack config, if enabled. 679 */ 680 if (!osnoise_data.print_stack || osnoise_data.print_stack > latency) 681 return; 682 683 preempt_disable_notrace(); 684 fstack = this_cpu_ptr(&trace_stack); 685 size = fstack->stack_size; 686 687 rcu_read_lock(); 688 list_for_each_entry_rcu(inst, &osnoise_instances, list) { 689 buffer = inst->tr->array_buffer.buffer; 690 __timerlat_dump_stack(buffer, fstack, size); 691 692 } 693 rcu_read_unlock(); 694 preempt_enable_notrace(); 695 } 696 #else /* CONFIG_STACKTRACE */ 697 #define timerlat_dump_stack(u64 latency) do {} while (0) 698 #define timerlat_save_stack(a) do {} while (0) 699 #endif /* CONFIG_STACKTRACE */ 700 #endif /* CONFIG_TIMERLAT_TRACER */ 701 702 /* 703 * Macros to encapsulate the time capturing infrastructure. 704 */ 705 #define time_get() trace_clock_local() 706 #define time_to_us(x) div_u64(x, 1000) 707 #define time_sub(a, b) ((a) - (b)) 708 709 /* 710 * cond_move_irq_delta_start - Forward the delta_start of a running IRQ 711 * 712 * If an IRQ is preempted by an NMI, its delta_start is pushed forward 713 * to discount the NMI interference. 714 * 715 * See get_int_safe_duration(). 716 */ 717 static inline void 718 cond_move_irq_delta_start(struct osnoise_variables *osn_var, u64 duration) 719 { 720 if (osn_var->irq.delta_start) 721 osn_var->irq.delta_start += duration; 722 } 723 724 #ifndef CONFIG_PREEMPT_RT 725 /* 726 * cond_move_softirq_delta_start - Forward the delta_start of a running softirq. 727 * 728 * If a softirq is preempted by an IRQ or NMI, its delta_start is pushed 729 * forward to discount the interference. 730 * 731 * See get_int_safe_duration(). 732 */ 733 static inline void 734 cond_move_softirq_delta_start(struct osnoise_variables *osn_var, u64 duration) 735 { 736 if (osn_var->softirq.delta_start) 737 osn_var->softirq.delta_start += duration; 738 } 739 #else /* CONFIG_PREEMPT_RT */ 740 #define cond_move_softirq_delta_start(osn_var, duration) do {} while (0) 741 #endif 742 743 /* 744 * cond_move_thread_delta_start - Forward the delta_start of a running thread 745 * 746 * If a noisy thread is preempted by an softirq, IRQ or NMI, its delta_start 747 * is pushed forward to discount the interference. 748 * 749 * See get_int_safe_duration(). 750 */ 751 static inline void 752 cond_move_thread_delta_start(struct osnoise_variables *osn_var, u64 duration) 753 { 754 if (osn_var->thread.delta_start) 755 osn_var->thread.delta_start += duration; 756 } 757 758 /* 759 * get_int_safe_duration - Get the duration of a window 760 * 761 * The irq, softirq and thread varaibles need to have its duration without 762 * the interference from higher priority interrupts. Instead of keeping a 763 * variable to discount the interrupt interference from these variables, the 764 * starting time of these variables are pushed forward with the interrupt's 765 * duration. In this way, a single variable is used to: 766 * 767 * - Know if a given window is being measured. 768 * - Account its duration. 769 * - Discount the interference. 770 * 771 * To avoid getting inconsistent values, e.g.,: 772 * 773 * now = time_get() 774 * ---> interrupt! 775 * delta_start -= int duration; 776 * <--- 777 * duration = now - delta_start; 778 * 779 * result: negative duration if the variable duration before the 780 * interrupt was smaller than the interrupt execution. 781 * 782 * A counter of interrupts is used. If the counter increased, try 783 * to capture an interference safe duration. 784 */ 785 static inline s64 786 get_int_safe_duration(struct osnoise_variables *osn_var, u64 *delta_start) 787 { 788 u64 int_counter, now; 789 s64 duration; 790 791 do { 792 int_counter = local_read(&osn_var->int_counter); 793 /* synchronize with interrupts */ 794 barrier(); 795 796 now = time_get(); 797 duration = (now - *delta_start); 798 799 /* synchronize with interrupts */ 800 barrier(); 801 } while (int_counter != local_read(&osn_var->int_counter)); 802 803 /* 804 * This is an evidence of race conditions that cause 805 * a value to be "discounted" too much. 806 */ 807 if (duration < 0) 808 osnoise_taint("Negative duration!\n"); 809 810 *delta_start = 0; 811 812 return duration; 813 } 814 815 /* 816 * 817 * set_int_safe_time - Save the current time on *time, aware of interference 818 * 819 * Get the time, taking into consideration a possible interference from 820 * higher priority interrupts. 821 * 822 * See get_int_safe_duration() for an explanation. 823 */ 824 static u64 825 set_int_safe_time(struct osnoise_variables *osn_var, u64 *time) 826 { 827 u64 int_counter; 828 829 do { 830 int_counter = local_read(&osn_var->int_counter); 831 /* synchronize with interrupts */ 832 barrier(); 833 834 *time = time_get(); 835 836 /* synchronize with interrupts */ 837 barrier(); 838 } while (int_counter != local_read(&osn_var->int_counter)); 839 840 return int_counter; 841 } 842 843 #ifdef CONFIG_TIMERLAT_TRACER 844 /* 845 * copy_int_safe_time - Copy *src into *desc aware of interference 846 */ 847 static u64 848 copy_int_safe_time(struct osnoise_variables *osn_var, u64 *dst, u64 *src) 849 { 850 u64 int_counter; 851 852 do { 853 int_counter = local_read(&osn_var->int_counter); 854 /* synchronize with interrupts */ 855 barrier(); 856 857 *dst = *src; 858 859 /* synchronize with interrupts */ 860 barrier(); 861 } while (int_counter != local_read(&osn_var->int_counter)); 862 863 return int_counter; 864 } 865 #endif /* CONFIG_TIMERLAT_TRACER */ 866 867 /* 868 * trace_osnoise_callback - NMI entry/exit callback 869 * 870 * This function is called at the entry and exit NMI code. The bool enter 871 * distinguishes between either case. This function is used to note a NMI 872 * occurrence, compute the noise caused by the NMI, and to remove the noise 873 * it is potentially causing on other interference variables. 874 */ 875 void trace_osnoise_callback(bool enter) 876 { 877 struct osnoise_variables *osn_var = this_cpu_osn_var(); 878 u64 duration; 879 880 if (!osn_var->sampling) 881 return; 882 883 /* 884 * Currently trace_clock_local() calls sched_clock() and the 885 * generic version is not NMI safe. 886 */ 887 if (!IS_ENABLED(CONFIG_GENERIC_SCHED_CLOCK)) { 888 if (enter) { 889 osn_var->nmi.delta_start = time_get(); 890 local_inc(&osn_var->int_counter); 891 } else { 892 duration = time_get() - osn_var->nmi.delta_start; 893 894 trace_nmi_noise(osn_var->nmi.delta_start, duration); 895 896 cond_move_irq_delta_start(osn_var, duration); 897 cond_move_softirq_delta_start(osn_var, duration); 898 cond_move_thread_delta_start(osn_var, duration); 899 } 900 } 901 902 if (enter) 903 osn_var->nmi.count++; 904 } 905 906 /* 907 * osnoise_trace_irq_entry - Note the starting of an IRQ 908 * 909 * Save the starting time of an IRQ. As IRQs are non-preemptive to other IRQs, 910 * it is safe to use a single variable (ons_var->irq) to save the statistics. 911 * The arrival_time is used to report... the arrival time. The delta_start 912 * is used to compute the duration at the IRQ exit handler. See 913 * cond_move_irq_delta_start(). 914 */ 915 void osnoise_trace_irq_entry(int id) 916 { 917 struct osnoise_variables *osn_var = this_cpu_osn_var(); 918 919 if (!osn_var->sampling) 920 return; 921 /* 922 * This value will be used in the report, but not to compute 923 * the execution time, so it is safe to get it unsafe. 924 */ 925 osn_var->irq.arrival_time = time_get(); 926 set_int_safe_time(osn_var, &osn_var->irq.delta_start); 927 osn_var->irq.count++; 928 929 local_inc(&osn_var->int_counter); 930 } 931 932 /* 933 * osnoise_irq_exit - Note the end of an IRQ, sava data and trace 934 * 935 * Computes the duration of the IRQ noise, and trace it. Also discounts the 936 * interference from other sources of noise could be currently being accounted. 937 */ 938 void osnoise_trace_irq_exit(int id, const char *desc) 939 { 940 struct osnoise_variables *osn_var = this_cpu_osn_var(); 941 s64 duration; 942 943 if (!osn_var->sampling) 944 return; 945 946 duration = get_int_safe_duration(osn_var, &osn_var->irq.delta_start); 947 trace_irq_noise(id, desc, osn_var->irq.arrival_time, duration); 948 osn_var->irq.arrival_time = 0; 949 cond_move_softirq_delta_start(osn_var, duration); 950 cond_move_thread_delta_start(osn_var, duration); 951 } 952 953 /* 954 * trace_irqentry_callback - Callback to the irq:irq_entry traceevent 955 * 956 * Used to note the starting of an IRQ occurece. 957 */ 958 static void trace_irqentry_callback(void *data, int irq, 959 struct irqaction *action) 960 { 961 osnoise_trace_irq_entry(irq); 962 } 963 964 /* 965 * trace_irqexit_callback - Callback to the irq:irq_exit traceevent 966 * 967 * Used to note the end of an IRQ occurece. 968 */ 969 static void trace_irqexit_callback(void *data, int irq, 970 struct irqaction *action, int ret) 971 { 972 osnoise_trace_irq_exit(irq, action->name); 973 } 974 975 /* 976 * arch specific register function. 977 */ 978 int __weak osnoise_arch_register(void) 979 { 980 return 0; 981 } 982 983 /* 984 * arch specific unregister function. 985 */ 986 void __weak osnoise_arch_unregister(void) 987 { 988 return; 989 } 990 991 /* 992 * hook_irq_events - Hook IRQ handling events 993 * 994 * This function hooks the IRQ related callbacks to the respective trace 995 * events. 996 */ 997 static int hook_irq_events(void) 998 { 999 int ret; 1000 1001 ret = register_trace_irq_handler_entry(trace_irqentry_callback, NULL); 1002 if (ret) 1003 goto out_err; 1004 1005 ret = register_trace_irq_handler_exit(trace_irqexit_callback, NULL); 1006 if (ret) 1007 goto out_unregister_entry; 1008 1009 ret = osnoise_arch_register(); 1010 if (ret) 1011 goto out_irq_exit; 1012 1013 return 0; 1014 1015 out_irq_exit: 1016 unregister_trace_irq_handler_exit(trace_irqexit_callback, NULL); 1017 out_unregister_entry: 1018 unregister_trace_irq_handler_entry(trace_irqentry_callback, NULL); 1019 out_err: 1020 return -EINVAL; 1021 } 1022 1023 /* 1024 * unhook_irq_events - Unhook IRQ handling events 1025 * 1026 * This function unhooks the IRQ related callbacks to the respective trace 1027 * events. 1028 */ 1029 static void unhook_irq_events(void) 1030 { 1031 osnoise_arch_unregister(); 1032 unregister_trace_irq_handler_exit(trace_irqexit_callback, NULL); 1033 unregister_trace_irq_handler_entry(trace_irqentry_callback, NULL); 1034 } 1035 1036 #ifndef CONFIG_PREEMPT_RT 1037 /* 1038 * trace_softirq_entry_callback - Note the starting of a softirq 1039 * 1040 * Save the starting time of a softirq. As softirqs are non-preemptive to 1041 * other softirqs, it is safe to use a single variable (ons_var->softirq) 1042 * to save the statistics. The arrival_time is used to report... the 1043 * arrival time. The delta_start is used to compute the duration at the 1044 * softirq exit handler. See cond_move_softirq_delta_start(). 1045 */ 1046 static void trace_softirq_entry_callback(void *data, unsigned int vec_nr) 1047 { 1048 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1049 1050 if (!osn_var->sampling) 1051 return; 1052 /* 1053 * This value will be used in the report, but not to compute 1054 * the execution time, so it is safe to get it unsafe. 1055 */ 1056 osn_var->softirq.arrival_time = time_get(); 1057 set_int_safe_time(osn_var, &osn_var->softirq.delta_start); 1058 osn_var->softirq.count++; 1059 1060 local_inc(&osn_var->int_counter); 1061 } 1062 1063 /* 1064 * trace_softirq_exit_callback - Note the end of an softirq 1065 * 1066 * Computes the duration of the softirq noise, and trace it. Also discounts the 1067 * interference from other sources of noise could be currently being accounted. 1068 */ 1069 static void trace_softirq_exit_callback(void *data, unsigned int vec_nr) 1070 { 1071 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1072 s64 duration; 1073 1074 if (!osn_var->sampling) 1075 return; 1076 1077 if (unlikely(timerlat_enabled())) 1078 if (!timerlat_softirq_exit(osn_var)) 1079 return; 1080 1081 duration = get_int_safe_duration(osn_var, &osn_var->softirq.delta_start); 1082 trace_softirq_noise(vec_nr, osn_var->softirq.arrival_time, duration); 1083 cond_move_thread_delta_start(osn_var, duration); 1084 osn_var->softirq.arrival_time = 0; 1085 } 1086 1087 /* 1088 * hook_softirq_events - Hook softirq handling events 1089 * 1090 * This function hooks the softirq related callbacks to the respective trace 1091 * events. 1092 */ 1093 static int hook_softirq_events(void) 1094 { 1095 int ret; 1096 1097 ret = register_trace_softirq_entry(trace_softirq_entry_callback, NULL); 1098 if (ret) 1099 goto out_err; 1100 1101 ret = register_trace_softirq_exit(trace_softirq_exit_callback, NULL); 1102 if (ret) 1103 goto out_unreg_entry; 1104 1105 return 0; 1106 1107 out_unreg_entry: 1108 unregister_trace_softirq_entry(trace_softirq_entry_callback, NULL); 1109 out_err: 1110 return -EINVAL; 1111 } 1112 1113 /* 1114 * unhook_softirq_events - Unhook softirq handling events 1115 * 1116 * This function hooks the softirq related callbacks to the respective trace 1117 * events. 1118 */ 1119 static void unhook_softirq_events(void) 1120 { 1121 unregister_trace_softirq_entry(trace_softirq_entry_callback, NULL); 1122 unregister_trace_softirq_exit(trace_softirq_exit_callback, NULL); 1123 } 1124 #else /* CONFIG_PREEMPT_RT */ 1125 /* 1126 * softirq are threads on the PREEMPT_RT mode. 1127 */ 1128 static int hook_softirq_events(void) 1129 { 1130 return 0; 1131 } 1132 static void unhook_softirq_events(void) 1133 { 1134 } 1135 #endif 1136 1137 /* 1138 * thread_entry - Record the starting of a thread noise window 1139 * 1140 * It saves the context switch time for a noisy thread, and increments 1141 * the interference counters. 1142 */ 1143 static void 1144 thread_entry(struct osnoise_variables *osn_var, struct task_struct *t) 1145 { 1146 if (!osn_var->sampling) 1147 return; 1148 /* 1149 * The arrival time will be used in the report, but not to compute 1150 * the execution time, so it is safe to get it unsafe. 1151 */ 1152 osn_var->thread.arrival_time = time_get(); 1153 1154 set_int_safe_time(osn_var, &osn_var->thread.delta_start); 1155 1156 osn_var->thread.count++; 1157 local_inc(&osn_var->int_counter); 1158 } 1159 1160 /* 1161 * thread_exit - Report the end of a thread noise window 1162 * 1163 * It computes the total noise from a thread, tracing if needed. 1164 */ 1165 static void 1166 thread_exit(struct osnoise_variables *osn_var, struct task_struct *t) 1167 { 1168 s64 duration; 1169 1170 if (!osn_var->sampling) 1171 return; 1172 1173 if (unlikely(timerlat_enabled())) 1174 if (!timerlat_thread_exit(osn_var)) 1175 return; 1176 1177 duration = get_int_safe_duration(osn_var, &osn_var->thread.delta_start); 1178 1179 trace_thread_noise(t, osn_var->thread.arrival_time, duration); 1180 1181 osn_var->thread.arrival_time = 0; 1182 } 1183 1184 /* 1185 * trace_sched_switch - sched:sched_switch trace event handler 1186 * 1187 * This function is hooked to the sched:sched_switch trace event, and it is 1188 * used to record the beginning and to report the end of a thread noise window. 1189 */ 1190 static void 1191 trace_sched_switch_callback(void *data, bool preempt, 1192 struct task_struct *p, 1193 struct task_struct *n, 1194 unsigned int prev_state) 1195 { 1196 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1197 int workload = test_bit(OSN_WORKLOAD, &osnoise_options); 1198 1199 if ((p->pid != osn_var->pid) || !workload) 1200 thread_exit(osn_var, p); 1201 1202 if ((n->pid != osn_var->pid) || !workload) 1203 thread_entry(osn_var, n); 1204 } 1205 1206 /* 1207 * hook_thread_events - Hook the insturmentation for thread noise 1208 * 1209 * Hook the osnoise tracer callbacks to handle the noise from other 1210 * threads on the necessary kernel events. 1211 */ 1212 static int hook_thread_events(void) 1213 { 1214 int ret; 1215 1216 ret = register_trace_sched_switch(trace_sched_switch_callback, NULL); 1217 if (ret) 1218 return -EINVAL; 1219 1220 return 0; 1221 } 1222 1223 /* 1224 * unhook_thread_events - *nhook the insturmentation for thread noise 1225 * 1226 * Unook the osnoise tracer callbacks to handle the noise from other 1227 * threads on the necessary kernel events. 1228 */ 1229 static void unhook_thread_events(void) 1230 { 1231 unregister_trace_sched_switch(trace_sched_switch_callback, NULL); 1232 } 1233 1234 /* 1235 * save_osn_sample_stats - Save the osnoise_sample statistics 1236 * 1237 * Save the osnoise_sample statistics before the sampling phase. These 1238 * values will be used later to compute the diff betwneen the statistics 1239 * before and after the osnoise sampling. 1240 */ 1241 static void 1242 save_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample *s) 1243 { 1244 s->nmi_count = osn_var->nmi.count; 1245 s->irq_count = osn_var->irq.count; 1246 s->softirq_count = osn_var->softirq.count; 1247 s->thread_count = osn_var->thread.count; 1248 } 1249 1250 /* 1251 * diff_osn_sample_stats - Compute the osnoise_sample statistics 1252 * 1253 * After a sample period, compute the difference on the osnoise_sample 1254 * statistics. The struct osnoise_sample *s contains the statistics saved via 1255 * save_osn_sample_stats() before the osnoise sampling. 1256 */ 1257 static void 1258 diff_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample *s) 1259 { 1260 s->nmi_count = osn_var->nmi.count - s->nmi_count; 1261 s->irq_count = osn_var->irq.count - s->irq_count; 1262 s->softirq_count = osn_var->softirq.count - s->softirq_count; 1263 s->thread_count = osn_var->thread.count - s->thread_count; 1264 } 1265 1266 /* 1267 * osnoise_stop_tracing - Stop tracing and the tracer. 1268 */ 1269 static __always_inline void osnoise_stop_tracing(void) 1270 { 1271 struct osnoise_instance *inst; 1272 struct trace_array *tr; 1273 1274 rcu_read_lock(); 1275 list_for_each_entry_rcu(inst, &osnoise_instances, list) { 1276 tr = inst->tr; 1277 trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_, 1278 "stop tracing hit on cpu %d\n", smp_processor_id()); 1279 1280 if (test_bit(OSN_PANIC_ON_STOP, &osnoise_options)) 1281 panic("tracer hit stop condition on CPU %d\n", smp_processor_id()); 1282 1283 tracer_tracing_off(tr); 1284 } 1285 rcu_read_unlock(); 1286 } 1287 1288 /* 1289 * notify_new_max_latency - Notify a new max latency via fsnotify interface. 1290 */ 1291 static void notify_new_max_latency(u64 latency) 1292 { 1293 struct osnoise_instance *inst; 1294 struct trace_array *tr; 1295 1296 rcu_read_lock(); 1297 list_for_each_entry_rcu(inst, &osnoise_instances, list) { 1298 tr = inst->tr; 1299 if (tr->max_latency < latency) { 1300 tr->max_latency = latency; 1301 latency_fsnotify(tr); 1302 } 1303 } 1304 rcu_read_unlock(); 1305 } 1306 1307 /* 1308 * run_osnoise - Sample the time and look for osnoise 1309 * 1310 * Used to capture the time, looking for potential osnoise latency repeatedly. 1311 * Different from hwlat_detector, it is called with preemption and interrupts 1312 * enabled. This allows irqs, softirqs and threads to run, interfering on the 1313 * osnoise sampling thread, as they would do with a regular thread. 1314 */ 1315 static int run_osnoise(void) 1316 { 1317 bool disable_irq = test_bit(OSN_IRQ_DISABLE, &osnoise_options); 1318 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1319 u64 start, sample, last_sample; 1320 u64 last_int_count, int_count; 1321 s64 noise = 0, max_noise = 0; 1322 s64 total, last_total = 0; 1323 struct osnoise_sample s; 1324 bool disable_preemption; 1325 unsigned int threshold; 1326 u64 runtime, stop_in; 1327 u64 sum_noise = 0; 1328 int hw_count = 0; 1329 int ret = -1; 1330 1331 /* 1332 * Disabling preemption is only required if IRQs are enabled, 1333 * and the options is set on. 1334 */ 1335 disable_preemption = !disable_irq && test_bit(OSN_PREEMPT_DISABLE, &osnoise_options); 1336 1337 /* 1338 * Considers the current thread as the workload. 1339 */ 1340 osn_var->pid = current->pid; 1341 1342 /* 1343 * Save the current stats for the diff 1344 */ 1345 save_osn_sample_stats(osn_var, &s); 1346 1347 /* 1348 * if threshold is 0, use the default value of 5 us. 1349 */ 1350 threshold = tracing_thresh ? : 5000; 1351 1352 /* 1353 * Apply PREEMPT and IRQ disabled options. 1354 */ 1355 if (disable_irq) 1356 local_irq_disable(); 1357 1358 if (disable_preemption) 1359 preempt_disable(); 1360 1361 /* 1362 * Make sure NMIs see sampling first 1363 */ 1364 osn_var->sampling = true; 1365 barrier(); 1366 1367 /* 1368 * Transform the *_us config to nanoseconds to avoid the 1369 * division on the main loop. 1370 */ 1371 runtime = osnoise_data.sample_runtime * NSEC_PER_USEC; 1372 stop_in = osnoise_data.stop_tracing * NSEC_PER_USEC; 1373 1374 /* 1375 * Start timestemp 1376 */ 1377 start = time_get(); 1378 1379 /* 1380 * "previous" loop. 1381 */ 1382 last_int_count = set_int_safe_time(osn_var, &last_sample); 1383 1384 do { 1385 /* 1386 * Get sample! 1387 */ 1388 int_count = set_int_safe_time(osn_var, &sample); 1389 1390 noise = time_sub(sample, last_sample); 1391 1392 /* 1393 * This shouldn't happen. 1394 */ 1395 if (noise < 0) { 1396 osnoise_taint("negative noise!"); 1397 goto out; 1398 } 1399 1400 /* 1401 * Sample runtime. 1402 */ 1403 total = time_sub(sample, start); 1404 1405 /* 1406 * Check for possible overflows. 1407 */ 1408 if (total < last_total) { 1409 osnoise_taint("total overflow!"); 1410 break; 1411 } 1412 1413 last_total = total; 1414 1415 if (noise >= threshold) { 1416 int interference = int_count - last_int_count; 1417 1418 if (noise > max_noise) 1419 max_noise = noise; 1420 1421 if (!interference) 1422 hw_count++; 1423 1424 sum_noise += noise; 1425 1426 trace_sample_threshold(last_sample, noise, interference); 1427 1428 if (osnoise_data.stop_tracing) 1429 if (noise > stop_in) 1430 osnoise_stop_tracing(); 1431 } 1432 1433 /* 1434 * In some cases, notably when running on a nohz_full CPU with 1435 * a stopped tick PREEMPT_RCU has no way to account for QSs. 1436 * This will eventually cause unwarranted noise as PREEMPT_RCU 1437 * will force preemption as the means of ending the current 1438 * grace period. We avoid this problem by calling 1439 * rcu_momentary_dyntick_idle(), which performs a zero duration 1440 * EQS allowing PREEMPT_RCU to end the current grace period. 1441 * This call shouldn't be wrapped inside an RCU critical 1442 * section. 1443 * 1444 * Note that in non PREEMPT_RCU kernels QSs are handled through 1445 * cond_resched() 1446 */ 1447 if (IS_ENABLED(CONFIG_PREEMPT_RCU)) { 1448 if (!disable_irq) 1449 local_irq_disable(); 1450 1451 rcu_momentary_dyntick_idle(); 1452 1453 if (!disable_irq) 1454 local_irq_enable(); 1455 } 1456 1457 /* 1458 * For the non-preemptive kernel config: let threads runs, if 1459 * they so wish, unless set not do to so. 1460 */ 1461 if (!disable_irq && !disable_preemption) 1462 cond_resched(); 1463 1464 last_sample = sample; 1465 last_int_count = int_count; 1466 1467 } while (total < runtime && !kthread_should_stop()); 1468 1469 /* 1470 * Finish the above in the view for interrupts. 1471 */ 1472 barrier(); 1473 1474 osn_var->sampling = false; 1475 1476 /* 1477 * Make sure sampling data is no longer updated. 1478 */ 1479 barrier(); 1480 1481 /* 1482 * Return to the preemptive state. 1483 */ 1484 if (disable_preemption) 1485 preempt_enable(); 1486 1487 if (disable_irq) 1488 local_irq_enable(); 1489 1490 /* 1491 * Save noise info. 1492 */ 1493 s.noise = time_to_us(sum_noise); 1494 s.runtime = time_to_us(total); 1495 s.max_sample = time_to_us(max_noise); 1496 s.hw_count = hw_count; 1497 1498 /* Save interference stats info */ 1499 diff_osn_sample_stats(osn_var, &s); 1500 1501 trace_osnoise_sample(&s); 1502 1503 notify_new_max_latency(max_noise); 1504 1505 if (osnoise_data.stop_tracing_total) 1506 if (s.noise > osnoise_data.stop_tracing_total) 1507 osnoise_stop_tracing(); 1508 1509 return 0; 1510 out: 1511 return ret; 1512 } 1513 1514 static struct cpumask osnoise_cpumask; 1515 static struct cpumask save_cpumask; 1516 1517 /* 1518 * osnoise_sleep - sleep until the next period 1519 */ 1520 static void osnoise_sleep(void) 1521 { 1522 u64 interval; 1523 ktime_t wake_time; 1524 1525 mutex_lock(&interface_lock); 1526 interval = osnoise_data.sample_period - osnoise_data.sample_runtime; 1527 mutex_unlock(&interface_lock); 1528 1529 /* 1530 * differently from hwlat_detector, the osnoise tracer can run 1531 * without a pause because preemption is on. 1532 */ 1533 if (!interval) { 1534 /* Let synchronize_rcu_tasks() make progress */ 1535 cond_resched_tasks_rcu_qs(); 1536 return; 1537 } 1538 1539 wake_time = ktime_add_us(ktime_get(), interval); 1540 __set_current_state(TASK_INTERRUPTIBLE); 1541 1542 while (schedule_hrtimeout_range(&wake_time, 0, HRTIMER_MODE_ABS)) { 1543 if (kthread_should_stop()) 1544 break; 1545 } 1546 } 1547 1548 /* 1549 * osnoise_main - The osnoise detection kernel thread 1550 * 1551 * Calls run_osnoise() function to measure the osnoise for the configured runtime, 1552 * every period. 1553 */ 1554 static int osnoise_main(void *data) 1555 { 1556 1557 while (!kthread_should_stop()) { 1558 run_osnoise(); 1559 osnoise_sleep(); 1560 } 1561 1562 return 0; 1563 } 1564 1565 #ifdef CONFIG_TIMERLAT_TRACER 1566 /* 1567 * timerlat_irq - hrtimer handler for timerlat. 1568 */ 1569 static enum hrtimer_restart timerlat_irq(struct hrtimer *timer) 1570 { 1571 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1572 struct timerlat_variables *tlat; 1573 struct timerlat_sample s; 1574 u64 now; 1575 u64 diff; 1576 1577 /* 1578 * I am not sure if the timer was armed for this CPU. So, get 1579 * the timerlat struct from the timer itself, not from this 1580 * CPU. 1581 */ 1582 tlat = container_of(timer, struct timerlat_variables, timer); 1583 1584 now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer)); 1585 1586 /* 1587 * Enable the osnoise: events for thread an softirq. 1588 */ 1589 tlat->tracing_thread = true; 1590 1591 osn_var->thread.arrival_time = time_get(); 1592 1593 /* 1594 * A hardirq is running: the timer IRQ. It is for sure preempting 1595 * a thread, and potentially preempting a softirq. 1596 * 1597 * At this point, it is not interesting to know the duration of the 1598 * preempted thread (and maybe softirq), but how much time they will 1599 * delay the beginning of the execution of the timer thread. 1600 * 1601 * To get the correct (net) delay added by the softirq, its delta_start 1602 * is set as the IRQ one. In this way, at the return of the IRQ, the delta 1603 * start of the sofitrq will be zeroed, accounting then only the time 1604 * after that. 1605 * 1606 * The thread follows the same principle. However, if a softirq is 1607 * running, the thread needs to receive the softirq delta_start. The 1608 * reason being is that the softirq will be the last to be unfolded, 1609 * resseting the thread delay to zero. 1610 * 1611 * The PREEMPT_RT is a special case, though. As softirqs run as threads 1612 * on RT, moving the thread is enough. 1613 */ 1614 if (!IS_ENABLED(CONFIG_PREEMPT_RT) && osn_var->softirq.delta_start) { 1615 copy_int_safe_time(osn_var, &osn_var->thread.delta_start, 1616 &osn_var->softirq.delta_start); 1617 1618 copy_int_safe_time(osn_var, &osn_var->softirq.delta_start, 1619 &osn_var->irq.delta_start); 1620 } else { 1621 copy_int_safe_time(osn_var, &osn_var->thread.delta_start, 1622 &osn_var->irq.delta_start); 1623 } 1624 1625 /* 1626 * Compute the current time with the expected time. 1627 */ 1628 diff = now - tlat->abs_period; 1629 1630 tlat->count++; 1631 s.seqnum = tlat->count; 1632 s.timer_latency = diff; 1633 s.context = IRQ_CONTEXT; 1634 1635 trace_timerlat_sample(&s); 1636 1637 if (osnoise_data.stop_tracing) { 1638 if (time_to_us(diff) >= osnoise_data.stop_tracing) { 1639 1640 /* 1641 * At this point, if stop_tracing is set and <= print_stack, 1642 * print_stack is set and would be printed in the thread handler. 1643 * 1644 * Thus, print the stack trace as it is helpful to define the 1645 * root cause of an IRQ latency. 1646 */ 1647 if (osnoise_data.stop_tracing <= osnoise_data.print_stack) { 1648 timerlat_save_stack(0); 1649 timerlat_dump_stack(time_to_us(diff)); 1650 } 1651 1652 osnoise_stop_tracing(); 1653 notify_new_max_latency(diff); 1654 1655 return HRTIMER_NORESTART; 1656 } 1657 } 1658 1659 wake_up_process(tlat->kthread); 1660 1661 if (osnoise_data.print_stack) 1662 timerlat_save_stack(0); 1663 1664 return HRTIMER_NORESTART; 1665 } 1666 1667 /* 1668 * wait_next_period - Wait for the next period for timerlat 1669 */ 1670 static int wait_next_period(struct timerlat_variables *tlat) 1671 { 1672 ktime_t next_abs_period, now; 1673 u64 rel_period = osnoise_data.timerlat_period * 1000; 1674 1675 now = hrtimer_cb_get_time(&tlat->timer); 1676 next_abs_period = ns_to_ktime(tlat->abs_period + rel_period); 1677 1678 /* 1679 * Save the next abs_period. 1680 */ 1681 tlat->abs_period = (u64) ktime_to_ns(next_abs_period); 1682 1683 /* 1684 * If the new abs_period is in the past, skip the activation. 1685 */ 1686 while (ktime_compare(now, next_abs_period) > 0) { 1687 next_abs_period = ns_to_ktime(tlat->abs_period + rel_period); 1688 tlat->abs_period = (u64) ktime_to_ns(next_abs_period); 1689 } 1690 1691 set_current_state(TASK_INTERRUPTIBLE); 1692 1693 hrtimer_start(&tlat->timer, next_abs_period, HRTIMER_MODE_ABS_PINNED_HARD); 1694 schedule(); 1695 return 1; 1696 } 1697 1698 /* 1699 * timerlat_main- Timerlat main 1700 */ 1701 static int timerlat_main(void *data) 1702 { 1703 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1704 struct timerlat_variables *tlat = this_cpu_tmr_var(); 1705 struct timerlat_sample s; 1706 struct sched_param sp; 1707 u64 now, diff; 1708 1709 /* 1710 * Make the thread RT, that is how cyclictest is usually used. 1711 */ 1712 sp.sched_priority = DEFAULT_TIMERLAT_PRIO; 1713 sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); 1714 1715 tlat->count = 0; 1716 tlat->tracing_thread = false; 1717 1718 hrtimer_init(&tlat->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD); 1719 tlat->timer.function = timerlat_irq; 1720 tlat->kthread = current; 1721 osn_var->pid = current->pid; 1722 /* 1723 * Anotate the arrival time. 1724 */ 1725 tlat->abs_period = hrtimer_cb_get_time(&tlat->timer); 1726 1727 wait_next_period(tlat); 1728 1729 osn_var->sampling = 1; 1730 1731 while (!kthread_should_stop()) { 1732 now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer)); 1733 diff = now - tlat->abs_period; 1734 1735 s.seqnum = tlat->count; 1736 s.timer_latency = diff; 1737 s.context = THREAD_CONTEXT; 1738 1739 trace_timerlat_sample(&s); 1740 1741 timerlat_dump_stack(time_to_us(diff)); 1742 1743 tlat->tracing_thread = false; 1744 if (osnoise_data.stop_tracing_total) 1745 if (time_to_us(diff) >= osnoise_data.stop_tracing_total) 1746 osnoise_stop_tracing(); 1747 1748 wait_next_period(tlat); 1749 } 1750 1751 hrtimer_cancel(&tlat->timer); 1752 return 0; 1753 } 1754 #else /* CONFIG_TIMERLAT_TRACER */ 1755 static int timerlat_main(void *data) 1756 { 1757 return 0; 1758 } 1759 #endif /* CONFIG_TIMERLAT_TRACER */ 1760 1761 /* 1762 * stop_kthread - stop a workload thread 1763 */ 1764 static void stop_kthread(unsigned int cpu) 1765 { 1766 struct task_struct *kthread; 1767 1768 kthread = per_cpu(per_cpu_osnoise_var, cpu).kthread; 1769 if (kthread) { 1770 kthread_stop(kthread); 1771 per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL; 1772 } else { 1773 if (!test_bit(OSN_WORKLOAD, &osnoise_options)) { 1774 per_cpu(per_cpu_osnoise_var, cpu).sampling = false; 1775 barrier(); 1776 return; 1777 } 1778 } 1779 } 1780 1781 /* 1782 * stop_per_cpu_kthread - Stop per-cpu threads 1783 * 1784 * Stop the osnoise sampling htread. Use this on unload and at system 1785 * shutdown. 1786 */ 1787 static void stop_per_cpu_kthreads(void) 1788 { 1789 int cpu; 1790 1791 cpus_read_lock(); 1792 1793 for_each_online_cpu(cpu) 1794 stop_kthread(cpu); 1795 1796 cpus_read_unlock(); 1797 } 1798 1799 /* 1800 * start_kthread - Start a workload tread 1801 */ 1802 static int start_kthread(unsigned int cpu) 1803 { 1804 struct task_struct *kthread; 1805 void *main = osnoise_main; 1806 char comm[24]; 1807 1808 if (timerlat_enabled()) { 1809 snprintf(comm, 24, "timerlat/%d", cpu); 1810 main = timerlat_main; 1811 } else { 1812 /* if no workload, just return */ 1813 if (!test_bit(OSN_WORKLOAD, &osnoise_options)) { 1814 per_cpu(per_cpu_osnoise_var, cpu).sampling = true; 1815 barrier(); 1816 return 0; 1817 } 1818 1819 snprintf(comm, 24, "osnoise/%d", cpu); 1820 } 1821 1822 kthread = kthread_run_on_cpu(main, NULL, cpu, comm); 1823 1824 if (IS_ERR(kthread)) { 1825 pr_err(BANNER "could not start sampling thread\n"); 1826 stop_per_cpu_kthreads(); 1827 return -ENOMEM; 1828 } 1829 1830 per_cpu(per_cpu_osnoise_var, cpu).kthread = kthread; 1831 1832 return 0; 1833 } 1834 1835 /* 1836 * start_per_cpu_kthread - Kick off per-cpu osnoise sampling kthreads 1837 * 1838 * This starts the kernel thread that will look for osnoise on many 1839 * cpus. 1840 */ 1841 static int start_per_cpu_kthreads(void) 1842 { 1843 struct cpumask *current_mask = &save_cpumask; 1844 int retval = 0; 1845 int cpu; 1846 1847 cpus_read_lock(); 1848 /* 1849 * Run only on online CPUs in which osnoise is allowed to run. 1850 */ 1851 cpumask_and(current_mask, cpu_online_mask, &osnoise_cpumask); 1852 1853 for_each_possible_cpu(cpu) 1854 per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL; 1855 1856 for_each_cpu(cpu, current_mask) { 1857 retval = start_kthread(cpu); 1858 if (retval) { 1859 cpus_read_unlock(); 1860 stop_per_cpu_kthreads(); 1861 return retval; 1862 } 1863 } 1864 1865 cpus_read_unlock(); 1866 1867 return retval; 1868 } 1869 1870 #ifdef CONFIG_HOTPLUG_CPU 1871 static void osnoise_hotplug_workfn(struct work_struct *dummy) 1872 { 1873 unsigned int cpu = smp_processor_id(); 1874 1875 mutex_lock(&trace_types_lock); 1876 1877 if (!osnoise_has_registered_instances()) 1878 goto out_unlock_trace; 1879 1880 mutex_lock(&interface_lock); 1881 cpus_read_lock(); 1882 1883 if (!cpumask_test_cpu(cpu, &osnoise_cpumask)) 1884 goto out_unlock; 1885 1886 start_kthread(cpu); 1887 1888 out_unlock: 1889 cpus_read_unlock(); 1890 mutex_unlock(&interface_lock); 1891 out_unlock_trace: 1892 mutex_unlock(&trace_types_lock); 1893 } 1894 1895 static DECLARE_WORK(osnoise_hotplug_work, osnoise_hotplug_workfn); 1896 1897 /* 1898 * osnoise_cpu_init - CPU hotplug online callback function 1899 */ 1900 static int osnoise_cpu_init(unsigned int cpu) 1901 { 1902 schedule_work_on(cpu, &osnoise_hotplug_work); 1903 return 0; 1904 } 1905 1906 /* 1907 * osnoise_cpu_die - CPU hotplug offline callback function 1908 */ 1909 static int osnoise_cpu_die(unsigned int cpu) 1910 { 1911 stop_kthread(cpu); 1912 return 0; 1913 } 1914 1915 static void osnoise_init_hotplug_support(void) 1916 { 1917 int ret; 1918 1919 ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "trace/osnoise:online", 1920 osnoise_cpu_init, osnoise_cpu_die); 1921 if (ret < 0) 1922 pr_warn(BANNER "Error to init cpu hotplug support\n"); 1923 1924 return; 1925 } 1926 #else /* CONFIG_HOTPLUG_CPU */ 1927 static void osnoise_init_hotplug_support(void) 1928 { 1929 return; 1930 } 1931 #endif /* CONFIG_HOTPLUG_CPU */ 1932 1933 /* 1934 * seq file functions for the osnoise/options file. 1935 */ 1936 static void *s_options_start(struct seq_file *s, loff_t *pos) 1937 { 1938 int option = *pos; 1939 1940 mutex_lock(&interface_lock); 1941 1942 if (option >= OSN_MAX) 1943 return NULL; 1944 1945 return pos; 1946 } 1947 1948 static void *s_options_next(struct seq_file *s, void *v, loff_t *pos) 1949 { 1950 int option = ++(*pos); 1951 1952 if (option >= OSN_MAX) 1953 return NULL; 1954 1955 return pos; 1956 } 1957 1958 static int s_options_show(struct seq_file *s, void *v) 1959 { 1960 loff_t *pos = v; 1961 int option = *pos; 1962 1963 if (option == OSN_DEFAULTS) { 1964 if (osnoise_options == OSN_DEFAULT_OPTIONS) 1965 seq_printf(s, "%s", osnoise_options_str[option]); 1966 else 1967 seq_printf(s, "NO_%s", osnoise_options_str[option]); 1968 goto out; 1969 } 1970 1971 if (test_bit(option, &osnoise_options)) 1972 seq_printf(s, "%s", osnoise_options_str[option]); 1973 else 1974 seq_printf(s, "NO_%s", osnoise_options_str[option]); 1975 1976 out: 1977 if (option != OSN_MAX) 1978 seq_puts(s, " "); 1979 1980 return 0; 1981 } 1982 1983 static void s_options_stop(struct seq_file *s, void *v) 1984 { 1985 seq_puts(s, "\n"); 1986 mutex_unlock(&interface_lock); 1987 } 1988 1989 static const struct seq_operations osnoise_options_seq_ops = { 1990 .start = s_options_start, 1991 .next = s_options_next, 1992 .show = s_options_show, 1993 .stop = s_options_stop 1994 }; 1995 1996 static int osnoise_options_open(struct inode *inode, struct file *file) 1997 { 1998 return seq_open(file, &osnoise_options_seq_ops); 1999 }; 2000 2001 /** 2002 * osnoise_options_write - Write function for "options" entry 2003 * @filp: The active open file structure 2004 * @ubuf: The user buffer that contains the value to write 2005 * @cnt: The maximum number of bytes to write to "file" 2006 * @ppos: The current position in @file 2007 * 2008 * Writing the option name sets the option, writing the "NO_" 2009 * prefix in front of the option name disables it. 2010 * 2011 * Writing "DEFAULTS" resets the option values to the default ones. 2012 */ 2013 static ssize_t osnoise_options_write(struct file *filp, const char __user *ubuf, 2014 size_t cnt, loff_t *ppos) 2015 { 2016 int running, option, enable, retval; 2017 char buf[256], *option_str; 2018 2019 if (cnt >= 256) 2020 return -EINVAL; 2021 2022 if (copy_from_user(buf, ubuf, cnt)) 2023 return -EFAULT; 2024 2025 buf[cnt] = 0; 2026 2027 if (strncmp(buf, "NO_", 3)) { 2028 option_str = strstrip(buf); 2029 enable = true; 2030 } else { 2031 option_str = strstrip(&buf[3]); 2032 enable = false; 2033 } 2034 2035 option = match_string(osnoise_options_str, OSN_MAX, option_str); 2036 if (option < 0) 2037 return -EINVAL; 2038 2039 /* 2040 * trace_types_lock is taken to avoid concurrency on start/stop. 2041 */ 2042 mutex_lock(&trace_types_lock); 2043 running = osnoise_has_registered_instances(); 2044 if (running) 2045 stop_per_cpu_kthreads(); 2046 2047 mutex_lock(&interface_lock); 2048 /* 2049 * avoid CPU hotplug operations that might read options. 2050 */ 2051 cpus_read_lock(); 2052 2053 retval = cnt; 2054 2055 if (enable) { 2056 if (option == OSN_DEFAULTS) 2057 osnoise_options = OSN_DEFAULT_OPTIONS; 2058 else 2059 set_bit(option, &osnoise_options); 2060 } else { 2061 if (option == OSN_DEFAULTS) 2062 retval = -EINVAL; 2063 else 2064 clear_bit(option, &osnoise_options); 2065 } 2066 2067 cpus_read_unlock(); 2068 mutex_unlock(&interface_lock); 2069 2070 if (running) 2071 start_per_cpu_kthreads(); 2072 mutex_unlock(&trace_types_lock); 2073 2074 return retval; 2075 } 2076 2077 /* 2078 * osnoise_cpus_read - Read function for reading the "cpus" file 2079 * @filp: The active open file structure 2080 * @ubuf: The userspace provided buffer to read value into 2081 * @cnt: The maximum number of bytes to read 2082 * @ppos: The current "file" position 2083 * 2084 * Prints the "cpus" output into the user-provided buffer. 2085 */ 2086 static ssize_t 2087 osnoise_cpus_read(struct file *filp, char __user *ubuf, size_t count, 2088 loff_t *ppos) 2089 { 2090 char *mask_str; 2091 int len; 2092 2093 mutex_lock(&interface_lock); 2094 2095 len = snprintf(NULL, 0, "%*pbl\n", cpumask_pr_args(&osnoise_cpumask)) + 1; 2096 mask_str = kmalloc(len, GFP_KERNEL); 2097 if (!mask_str) { 2098 count = -ENOMEM; 2099 goto out_unlock; 2100 } 2101 2102 len = snprintf(mask_str, len, "%*pbl\n", cpumask_pr_args(&osnoise_cpumask)); 2103 if (len >= count) { 2104 count = -EINVAL; 2105 goto out_free; 2106 } 2107 2108 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len); 2109 2110 out_free: 2111 kfree(mask_str); 2112 out_unlock: 2113 mutex_unlock(&interface_lock); 2114 2115 return count; 2116 } 2117 2118 /* 2119 * osnoise_cpus_write - Write function for "cpus" entry 2120 * @filp: The active open file structure 2121 * @ubuf: The user buffer that contains the value to write 2122 * @cnt: The maximum number of bytes to write to "file" 2123 * @ppos: The current position in @file 2124 * 2125 * This function provides a write implementation for the "cpus" 2126 * interface to the osnoise trace. By default, it lists all CPUs, 2127 * in this way, allowing osnoise threads to run on any online CPU 2128 * of the system. It serves to restrict the execution of osnoise to the 2129 * set of CPUs writing via this interface. Why not use "tracing_cpumask"? 2130 * Because the user might be interested in tracing what is running on 2131 * other CPUs. For instance, one might run osnoise in one HT CPU 2132 * while observing what is running on the sibling HT CPU. 2133 */ 2134 static ssize_t 2135 osnoise_cpus_write(struct file *filp, const char __user *ubuf, size_t count, 2136 loff_t *ppos) 2137 { 2138 cpumask_var_t osnoise_cpumask_new; 2139 int running, err; 2140 char buf[256]; 2141 2142 if (count >= 256) 2143 return -EINVAL; 2144 2145 if (copy_from_user(buf, ubuf, count)) 2146 return -EFAULT; 2147 2148 if (!zalloc_cpumask_var(&osnoise_cpumask_new, GFP_KERNEL)) 2149 return -ENOMEM; 2150 2151 err = cpulist_parse(buf, osnoise_cpumask_new); 2152 if (err) 2153 goto err_free; 2154 2155 /* 2156 * trace_types_lock is taken to avoid concurrency on start/stop. 2157 */ 2158 mutex_lock(&trace_types_lock); 2159 running = osnoise_has_registered_instances(); 2160 if (running) 2161 stop_per_cpu_kthreads(); 2162 2163 mutex_lock(&interface_lock); 2164 /* 2165 * osnoise_cpumask is read by CPU hotplug operations. 2166 */ 2167 cpus_read_lock(); 2168 2169 cpumask_copy(&osnoise_cpumask, osnoise_cpumask_new); 2170 2171 cpus_read_unlock(); 2172 mutex_unlock(&interface_lock); 2173 2174 if (running) 2175 start_per_cpu_kthreads(); 2176 mutex_unlock(&trace_types_lock); 2177 2178 free_cpumask_var(osnoise_cpumask_new); 2179 return count; 2180 2181 err_free: 2182 free_cpumask_var(osnoise_cpumask_new); 2183 2184 return err; 2185 } 2186 2187 /* 2188 * osnoise/runtime_us: cannot be greater than the period. 2189 */ 2190 static struct trace_min_max_param osnoise_runtime = { 2191 .lock = &interface_lock, 2192 .val = &osnoise_data.sample_runtime, 2193 .max = &osnoise_data.sample_period, 2194 .min = NULL, 2195 }; 2196 2197 /* 2198 * osnoise/period_us: cannot be smaller than the runtime. 2199 */ 2200 static struct trace_min_max_param osnoise_period = { 2201 .lock = &interface_lock, 2202 .val = &osnoise_data.sample_period, 2203 .max = NULL, 2204 .min = &osnoise_data.sample_runtime, 2205 }; 2206 2207 /* 2208 * osnoise/stop_tracing_us: no limit. 2209 */ 2210 static struct trace_min_max_param osnoise_stop_tracing_in = { 2211 .lock = &interface_lock, 2212 .val = &osnoise_data.stop_tracing, 2213 .max = NULL, 2214 .min = NULL, 2215 }; 2216 2217 /* 2218 * osnoise/stop_tracing_total_us: no limit. 2219 */ 2220 static struct trace_min_max_param osnoise_stop_tracing_total = { 2221 .lock = &interface_lock, 2222 .val = &osnoise_data.stop_tracing_total, 2223 .max = NULL, 2224 .min = NULL, 2225 }; 2226 2227 #ifdef CONFIG_TIMERLAT_TRACER 2228 /* 2229 * osnoise/print_stack: print the stacktrace of the IRQ handler if the total 2230 * latency is higher than val. 2231 */ 2232 static struct trace_min_max_param osnoise_print_stack = { 2233 .lock = &interface_lock, 2234 .val = &osnoise_data.print_stack, 2235 .max = NULL, 2236 .min = NULL, 2237 }; 2238 2239 /* 2240 * osnoise/timerlat_period: min 100 us, max 1 s 2241 */ 2242 u64 timerlat_min_period = 100; 2243 u64 timerlat_max_period = 1000000; 2244 static struct trace_min_max_param timerlat_period = { 2245 .lock = &interface_lock, 2246 .val = &osnoise_data.timerlat_period, 2247 .max = &timerlat_max_period, 2248 .min = &timerlat_min_period, 2249 }; 2250 #endif 2251 2252 static const struct file_operations cpus_fops = { 2253 .open = tracing_open_generic, 2254 .read = osnoise_cpus_read, 2255 .write = osnoise_cpus_write, 2256 .llseek = generic_file_llseek, 2257 }; 2258 2259 static const struct file_operations osnoise_options_fops = { 2260 .open = osnoise_options_open, 2261 .read = seq_read, 2262 .llseek = seq_lseek, 2263 .release = seq_release, 2264 .write = osnoise_options_write 2265 }; 2266 2267 #ifdef CONFIG_TIMERLAT_TRACER 2268 #ifdef CONFIG_STACKTRACE 2269 static int init_timerlat_stack_tracefs(struct dentry *top_dir) 2270 { 2271 struct dentry *tmp; 2272 2273 tmp = tracefs_create_file("print_stack", TRACE_MODE_WRITE, top_dir, 2274 &osnoise_print_stack, &trace_min_max_fops); 2275 if (!tmp) 2276 return -ENOMEM; 2277 2278 return 0; 2279 } 2280 #else /* CONFIG_STACKTRACE */ 2281 static int init_timerlat_stack_tracefs(struct dentry *top_dir) 2282 { 2283 return 0; 2284 } 2285 #endif /* CONFIG_STACKTRACE */ 2286 2287 /* 2288 * init_timerlat_tracefs - A function to initialize the timerlat interface files 2289 */ 2290 static int init_timerlat_tracefs(struct dentry *top_dir) 2291 { 2292 struct dentry *tmp; 2293 2294 tmp = tracefs_create_file("timerlat_period_us", TRACE_MODE_WRITE, top_dir, 2295 &timerlat_period, &trace_min_max_fops); 2296 if (!tmp) 2297 return -ENOMEM; 2298 2299 return init_timerlat_stack_tracefs(top_dir); 2300 } 2301 #else /* CONFIG_TIMERLAT_TRACER */ 2302 static int init_timerlat_tracefs(struct dentry *top_dir) 2303 { 2304 return 0; 2305 } 2306 #endif /* CONFIG_TIMERLAT_TRACER */ 2307 2308 /* 2309 * init_tracefs - A function to initialize the tracefs interface files 2310 * 2311 * This function creates entries in tracefs for "osnoise" and "timerlat". 2312 * It creates these directories in the tracing directory, and within that 2313 * directory the use can change and view the configs. 2314 */ 2315 static int init_tracefs(void) 2316 { 2317 struct dentry *top_dir; 2318 struct dentry *tmp; 2319 int ret; 2320 2321 ret = tracing_init_dentry(); 2322 if (ret) 2323 return -ENOMEM; 2324 2325 top_dir = tracefs_create_dir("osnoise", NULL); 2326 if (!top_dir) 2327 return 0; 2328 2329 tmp = tracefs_create_file("period_us", TRACE_MODE_WRITE, top_dir, 2330 &osnoise_period, &trace_min_max_fops); 2331 if (!tmp) 2332 goto err; 2333 2334 tmp = tracefs_create_file("runtime_us", TRACE_MODE_WRITE, top_dir, 2335 &osnoise_runtime, &trace_min_max_fops); 2336 if (!tmp) 2337 goto err; 2338 2339 tmp = tracefs_create_file("stop_tracing_us", TRACE_MODE_WRITE, top_dir, 2340 &osnoise_stop_tracing_in, &trace_min_max_fops); 2341 if (!tmp) 2342 goto err; 2343 2344 tmp = tracefs_create_file("stop_tracing_total_us", TRACE_MODE_WRITE, top_dir, 2345 &osnoise_stop_tracing_total, &trace_min_max_fops); 2346 if (!tmp) 2347 goto err; 2348 2349 tmp = trace_create_file("cpus", TRACE_MODE_WRITE, top_dir, NULL, &cpus_fops); 2350 if (!tmp) 2351 goto err; 2352 2353 tmp = trace_create_file("options", TRACE_MODE_WRITE, top_dir, NULL, 2354 &osnoise_options_fops); 2355 if (!tmp) 2356 goto err; 2357 2358 ret = init_timerlat_tracefs(top_dir); 2359 if (ret) 2360 goto err; 2361 2362 return 0; 2363 2364 err: 2365 tracefs_remove(top_dir); 2366 return -ENOMEM; 2367 } 2368 2369 static int osnoise_hook_events(void) 2370 { 2371 int retval; 2372 2373 /* 2374 * Trace is already hooked, we are re-enabling from 2375 * a stop_tracing_*. 2376 */ 2377 if (trace_osnoise_callback_enabled) 2378 return 0; 2379 2380 retval = hook_irq_events(); 2381 if (retval) 2382 return -EINVAL; 2383 2384 retval = hook_softirq_events(); 2385 if (retval) 2386 goto out_unhook_irq; 2387 2388 retval = hook_thread_events(); 2389 /* 2390 * All fine! 2391 */ 2392 if (!retval) 2393 return 0; 2394 2395 unhook_softirq_events(); 2396 out_unhook_irq: 2397 unhook_irq_events(); 2398 return -EINVAL; 2399 } 2400 2401 static void osnoise_unhook_events(void) 2402 { 2403 unhook_thread_events(); 2404 unhook_softirq_events(); 2405 unhook_irq_events(); 2406 } 2407 2408 /* 2409 * osnoise_workload_start - start the workload and hook to events 2410 */ 2411 static int osnoise_workload_start(void) 2412 { 2413 int retval; 2414 2415 /* 2416 * Instances need to be registered after calling workload 2417 * start. Hence, if there is already an instance, the 2418 * workload was already registered. Otherwise, this 2419 * code is on the way to register the first instance, 2420 * and the workload will start. 2421 */ 2422 if (osnoise_has_registered_instances()) 2423 return 0; 2424 2425 osn_var_reset_all(); 2426 2427 retval = osnoise_hook_events(); 2428 if (retval) 2429 return retval; 2430 2431 /* 2432 * Make sure that ftrace_nmi_enter/exit() see reset values 2433 * before enabling trace_osnoise_callback_enabled. 2434 */ 2435 barrier(); 2436 trace_osnoise_callback_enabled = true; 2437 2438 retval = start_per_cpu_kthreads(); 2439 if (retval) { 2440 trace_osnoise_callback_enabled = false; 2441 /* 2442 * Make sure that ftrace_nmi_enter/exit() see 2443 * trace_osnoise_callback_enabled as false before continuing. 2444 */ 2445 barrier(); 2446 2447 osnoise_unhook_events(); 2448 return retval; 2449 } 2450 2451 return 0; 2452 } 2453 2454 /* 2455 * osnoise_workload_stop - stop the workload and unhook the events 2456 */ 2457 static void osnoise_workload_stop(void) 2458 { 2459 /* 2460 * Instances need to be unregistered before calling 2461 * stop. Hence, if there is a registered instance, more 2462 * than one instance is running, and the workload will not 2463 * yet stop. Otherwise, this code is on the way to disable 2464 * the last instance, and the workload can stop. 2465 */ 2466 if (osnoise_has_registered_instances()) 2467 return; 2468 2469 /* 2470 * If callbacks were already disabled in a previous stop 2471 * call, there is no need to disable then again. 2472 * 2473 * For instance, this happens when tracing is stopped via: 2474 * echo 0 > tracing_on 2475 * echo nop > current_tracer. 2476 */ 2477 if (!trace_osnoise_callback_enabled) 2478 return; 2479 2480 trace_osnoise_callback_enabled = false; 2481 /* 2482 * Make sure that ftrace_nmi_enter/exit() see 2483 * trace_osnoise_callback_enabled as false before continuing. 2484 */ 2485 barrier(); 2486 2487 stop_per_cpu_kthreads(); 2488 2489 osnoise_unhook_events(); 2490 } 2491 2492 static void osnoise_tracer_start(struct trace_array *tr) 2493 { 2494 int retval; 2495 2496 /* 2497 * If the instance is already registered, there is no need to 2498 * register it again. 2499 */ 2500 if (osnoise_instance_registered(tr)) 2501 return; 2502 2503 retval = osnoise_workload_start(); 2504 if (retval) 2505 pr_err(BANNER "Error starting osnoise tracer\n"); 2506 2507 osnoise_register_instance(tr); 2508 } 2509 2510 static void osnoise_tracer_stop(struct trace_array *tr) 2511 { 2512 osnoise_unregister_instance(tr); 2513 osnoise_workload_stop(); 2514 } 2515 2516 static int osnoise_tracer_init(struct trace_array *tr) 2517 { 2518 /* 2519 * Only allow osnoise tracer if timerlat tracer is not running 2520 * already. 2521 */ 2522 if (timerlat_enabled()) 2523 return -EBUSY; 2524 2525 tr->max_latency = 0; 2526 2527 osnoise_tracer_start(tr); 2528 return 0; 2529 } 2530 2531 static void osnoise_tracer_reset(struct trace_array *tr) 2532 { 2533 osnoise_tracer_stop(tr); 2534 } 2535 2536 static struct tracer osnoise_tracer __read_mostly = { 2537 .name = "osnoise", 2538 .init = osnoise_tracer_init, 2539 .reset = osnoise_tracer_reset, 2540 .start = osnoise_tracer_start, 2541 .stop = osnoise_tracer_stop, 2542 .print_header = print_osnoise_headers, 2543 .allow_instances = true, 2544 }; 2545 2546 #ifdef CONFIG_TIMERLAT_TRACER 2547 static void timerlat_tracer_start(struct trace_array *tr) 2548 { 2549 int retval; 2550 2551 /* 2552 * If the instance is already registered, there is no need to 2553 * register it again. 2554 */ 2555 if (osnoise_instance_registered(tr)) 2556 return; 2557 2558 retval = osnoise_workload_start(); 2559 if (retval) 2560 pr_err(BANNER "Error starting timerlat tracer\n"); 2561 2562 osnoise_register_instance(tr); 2563 2564 return; 2565 } 2566 2567 static void timerlat_tracer_stop(struct trace_array *tr) 2568 { 2569 int cpu; 2570 2571 osnoise_unregister_instance(tr); 2572 2573 /* 2574 * Instruct the threads to stop only if this is the last instance. 2575 */ 2576 if (!osnoise_has_registered_instances()) { 2577 for_each_online_cpu(cpu) 2578 per_cpu(per_cpu_osnoise_var, cpu).sampling = 0; 2579 } 2580 2581 osnoise_workload_stop(); 2582 } 2583 2584 static int timerlat_tracer_init(struct trace_array *tr) 2585 { 2586 /* 2587 * Only allow timerlat tracer if osnoise tracer is not running already. 2588 */ 2589 if (osnoise_has_registered_instances() && !osnoise_data.timerlat_tracer) 2590 return -EBUSY; 2591 2592 /* 2593 * If this is the first instance, set timerlat_tracer to block 2594 * osnoise tracer start. 2595 */ 2596 if (!osnoise_has_registered_instances()) 2597 osnoise_data.timerlat_tracer = 1; 2598 2599 tr->max_latency = 0; 2600 timerlat_tracer_start(tr); 2601 2602 return 0; 2603 } 2604 2605 static void timerlat_tracer_reset(struct trace_array *tr) 2606 { 2607 timerlat_tracer_stop(tr); 2608 2609 /* 2610 * If this is the last instance, reset timerlat_tracer allowing 2611 * osnoise to be started. 2612 */ 2613 if (!osnoise_has_registered_instances()) 2614 osnoise_data.timerlat_tracer = 0; 2615 } 2616 2617 static struct tracer timerlat_tracer __read_mostly = { 2618 .name = "timerlat", 2619 .init = timerlat_tracer_init, 2620 .reset = timerlat_tracer_reset, 2621 .start = timerlat_tracer_start, 2622 .stop = timerlat_tracer_stop, 2623 .print_header = print_timerlat_headers, 2624 .allow_instances = true, 2625 }; 2626 2627 __init static int init_timerlat_tracer(void) 2628 { 2629 return register_tracer(&timerlat_tracer); 2630 } 2631 #else /* CONFIG_TIMERLAT_TRACER */ 2632 __init static int init_timerlat_tracer(void) 2633 { 2634 return 0; 2635 } 2636 #endif /* CONFIG_TIMERLAT_TRACER */ 2637 2638 __init static int init_osnoise_tracer(void) 2639 { 2640 int ret; 2641 2642 mutex_init(&interface_lock); 2643 2644 cpumask_copy(&osnoise_cpumask, cpu_all_mask); 2645 2646 ret = register_tracer(&osnoise_tracer); 2647 if (ret) { 2648 pr_err(BANNER "Error registering osnoise!\n"); 2649 return ret; 2650 } 2651 2652 ret = init_timerlat_tracer(); 2653 if (ret) { 2654 pr_err(BANNER "Error registering timerlat!\n"); 2655 return ret; 2656 } 2657 2658 osnoise_init_hotplug_support(); 2659 2660 INIT_LIST_HEAD_RCU(&osnoise_instances); 2661 2662 init_tracefs(); 2663 2664 return 0; 2665 } 2666 late_initcall(init_osnoise_tracer); 2667