1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * OS Noise Tracer: computes the OS Noise suffered by a running thread. 4 * Timerlat Tracer: measures the wakeup latency of a timer triggered IRQ and thread. 5 * 6 * Based on "hwlat_detector" tracer by: 7 * Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. <jcm@redhat.com> 8 * Copyright (C) 2013-2016 Steven Rostedt, Red Hat, Inc. <srostedt@redhat.com> 9 * With feedback from Clark Williams <williams@redhat.com> 10 * 11 * And also based on the rtsl tracer presented on: 12 * DE OLIVEIRA, Daniel Bristot, et al. Demystifying the real-time linux 13 * scheduling latency. In: 32nd Euromicro Conference on Real-Time Systems 14 * (ECRTS 2020). Schloss Dagstuhl-Leibniz-Zentrum fur Informatik, 2020. 15 * 16 * Copyright (C) 2021 Daniel Bristot de Oliveira, Red Hat, Inc. <bristot@redhat.com> 17 */ 18 19 #include <linux/kthread.h> 20 #include <linux/tracefs.h> 21 #include <linux/uaccess.h> 22 #include <linux/cpumask.h> 23 #include <linux/delay.h> 24 #include <linux/sched/clock.h> 25 #include <uapi/linux/sched/types.h> 26 #include <linux/sched.h> 27 #include "trace.h" 28 29 #ifdef CONFIG_X86_LOCAL_APIC 30 #include <asm/trace/irq_vectors.h> 31 #undef TRACE_INCLUDE_PATH 32 #undef TRACE_INCLUDE_FILE 33 #endif /* CONFIG_X86_LOCAL_APIC */ 34 35 #include <trace/events/irq.h> 36 #include <trace/events/sched.h> 37 38 #define CREATE_TRACE_POINTS 39 #include <trace/events/osnoise.h> 40 41 /* 42 * Default values. 43 */ 44 #define BANNER "osnoise: " 45 #define DEFAULT_SAMPLE_PERIOD 1000000 /* 1s */ 46 #define DEFAULT_SAMPLE_RUNTIME 1000000 /* 1s */ 47 48 #define DEFAULT_TIMERLAT_PERIOD 1000 /* 1ms */ 49 #define DEFAULT_TIMERLAT_PRIO 95 /* FIFO 95 */ 50 51 /* 52 * osnoise/options entries. 53 */ 54 enum osnoise_options_index { 55 OSN_DEFAULTS = 0, 56 OSN_WORKLOAD, 57 OSN_PANIC_ON_STOP, 58 OSN_PREEMPT_DISABLE, 59 OSN_IRQ_DISABLE, 60 OSN_MAX 61 }; 62 63 static const char * const osnoise_options_str[OSN_MAX] = { 64 "DEFAULTS", 65 "OSNOISE_WORKLOAD", 66 "PANIC_ON_STOP", 67 "OSNOISE_PREEMPT_DISABLE", 68 "OSNOISE_IRQ_DISABLE" }; 69 70 #define OSN_DEFAULT_OPTIONS 0x2 71 static unsigned long osnoise_options = OSN_DEFAULT_OPTIONS; 72 73 /* 74 * trace_array of the enabled osnoise/timerlat instances. 75 */ 76 struct osnoise_instance { 77 struct list_head list; 78 struct trace_array *tr; 79 }; 80 81 static struct list_head osnoise_instances; 82 83 static bool osnoise_has_registered_instances(void) 84 { 85 return !!list_first_or_null_rcu(&osnoise_instances, 86 struct osnoise_instance, 87 list); 88 } 89 90 /* 91 * osnoise_instance_registered - check if a tr is already registered 92 */ 93 static int osnoise_instance_registered(struct trace_array *tr) 94 { 95 struct osnoise_instance *inst; 96 int found = 0; 97 98 rcu_read_lock(); 99 list_for_each_entry_rcu(inst, &osnoise_instances, list) { 100 if (inst->tr == tr) 101 found = 1; 102 } 103 rcu_read_unlock(); 104 105 return found; 106 } 107 108 /* 109 * osnoise_register_instance - register a new trace instance 110 * 111 * Register a trace_array *tr in the list of instances running 112 * osnoise/timerlat tracers. 113 */ 114 static int osnoise_register_instance(struct trace_array *tr) 115 { 116 struct osnoise_instance *inst; 117 118 /* 119 * register/unregister serialization is provided by trace's 120 * trace_types_lock. 121 */ 122 lockdep_assert_held(&trace_types_lock); 123 124 inst = kmalloc(sizeof(*inst), GFP_KERNEL); 125 if (!inst) 126 return -ENOMEM; 127 128 INIT_LIST_HEAD_RCU(&inst->list); 129 inst->tr = tr; 130 list_add_tail_rcu(&inst->list, &osnoise_instances); 131 132 return 0; 133 } 134 135 /* 136 * osnoise_unregister_instance - unregister a registered trace instance 137 * 138 * Remove the trace_array *tr from the list of instances running 139 * osnoise/timerlat tracers. 140 */ 141 static void osnoise_unregister_instance(struct trace_array *tr) 142 { 143 struct osnoise_instance *inst; 144 int found = 0; 145 146 /* 147 * register/unregister serialization is provided by trace's 148 * trace_types_lock. 149 */ 150 list_for_each_entry_rcu(inst, &osnoise_instances, list, 151 lockdep_is_held(&trace_types_lock)) { 152 if (inst->tr == tr) { 153 list_del_rcu(&inst->list); 154 found = 1; 155 break; 156 } 157 } 158 159 if (!found) 160 return; 161 162 kvfree_rcu_mightsleep(inst); 163 } 164 165 /* 166 * NMI runtime info. 167 */ 168 struct osn_nmi { 169 u64 count; 170 u64 delta_start; 171 }; 172 173 /* 174 * IRQ runtime info. 175 */ 176 struct osn_irq { 177 u64 count; 178 u64 arrival_time; 179 u64 delta_start; 180 }; 181 182 #define IRQ_CONTEXT 0 183 #define THREAD_CONTEXT 1 184 /* 185 * sofirq runtime info. 186 */ 187 struct osn_softirq { 188 u64 count; 189 u64 arrival_time; 190 u64 delta_start; 191 }; 192 193 /* 194 * thread runtime info. 195 */ 196 struct osn_thread { 197 u64 count; 198 u64 arrival_time; 199 u64 delta_start; 200 }; 201 202 /* 203 * Runtime information: this structure saves the runtime information used by 204 * one sampling thread. 205 */ 206 struct osnoise_variables { 207 struct task_struct *kthread; 208 bool sampling; 209 pid_t pid; 210 struct osn_nmi nmi; 211 struct osn_irq irq; 212 struct osn_softirq softirq; 213 struct osn_thread thread; 214 local_t int_counter; 215 }; 216 217 /* 218 * Per-cpu runtime information. 219 */ 220 static DEFINE_PER_CPU(struct osnoise_variables, per_cpu_osnoise_var); 221 222 /* 223 * this_cpu_osn_var - Return the per-cpu osnoise_variables on its relative CPU 224 */ 225 static inline struct osnoise_variables *this_cpu_osn_var(void) 226 { 227 return this_cpu_ptr(&per_cpu_osnoise_var); 228 } 229 230 #ifdef CONFIG_TIMERLAT_TRACER 231 /* 232 * Runtime information for the timer mode. 233 */ 234 struct timerlat_variables { 235 struct task_struct *kthread; 236 struct hrtimer timer; 237 u64 rel_period; 238 u64 abs_period; 239 bool tracing_thread; 240 u64 count; 241 }; 242 243 static DEFINE_PER_CPU(struct timerlat_variables, per_cpu_timerlat_var); 244 245 /* 246 * this_cpu_tmr_var - Return the per-cpu timerlat_variables on its relative CPU 247 */ 248 static inline struct timerlat_variables *this_cpu_tmr_var(void) 249 { 250 return this_cpu_ptr(&per_cpu_timerlat_var); 251 } 252 253 /* 254 * tlat_var_reset - Reset the values of the given timerlat_variables 255 */ 256 static inline void tlat_var_reset(void) 257 { 258 struct timerlat_variables *tlat_var; 259 int cpu; 260 /* 261 * So far, all the values are initialized as 0, so 262 * zeroing the structure is perfect. 263 */ 264 for_each_cpu(cpu, cpu_online_mask) { 265 tlat_var = per_cpu_ptr(&per_cpu_timerlat_var, cpu); 266 memset(tlat_var, 0, sizeof(*tlat_var)); 267 } 268 } 269 #else /* CONFIG_TIMERLAT_TRACER */ 270 #define tlat_var_reset() do {} while (0) 271 #endif /* CONFIG_TIMERLAT_TRACER */ 272 273 /* 274 * osn_var_reset - Reset the values of the given osnoise_variables 275 */ 276 static inline void osn_var_reset(void) 277 { 278 struct osnoise_variables *osn_var; 279 int cpu; 280 281 /* 282 * So far, all the values are initialized as 0, so 283 * zeroing the structure is perfect. 284 */ 285 for_each_cpu(cpu, cpu_online_mask) { 286 osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu); 287 memset(osn_var, 0, sizeof(*osn_var)); 288 } 289 } 290 291 /* 292 * osn_var_reset_all - Reset the value of all per-cpu osnoise_variables 293 */ 294 static inline void osn_var_reset_all(void) 295 { 296 osn_var_reset(); 297 tlat_var_reset(); 298 } 299 300 /* 301 * Tells NMIs to call back to the osnoise tracer to record timestamps. 302 */ 303 bool trace_osnoise_callback_enabled; 304 305 /* 306 * osnoise sample structure definition. Used to store the statistics of a 307 * sample run. 308 */ 309 struct osnoise_sample { 310 u64 runtime; /* runtime */ 311 u64 noise; /* noise */ 312 u64 max_sample; /* max single noise sample */ 313 int hw_count; /* # HW (incl. hypervisor) interference */ 314 int nmi_count; /* # NMIs during this sample */ 315 int irq_count; /* # IRQs during this sample */ 316 int softirq_count; /* # softirqs during this sample */ 317 int thread_count; /* # threads during this sample */ 318 }; 319 320 #ifdef CONFIG_TIMERLAT_TRACER 321 /* 322 * timerlat sample structure definition. Used to store the statistics of 323 * a sample run. 324 */ 325 struct timerlat_sample { 326 u64 timer_latency; /* timer_latency */ 327 unsigned int seqnum; /* unique sequence */ 328 int context; /* timer context */ 329 }; 330 #endif 331 332 /* 333 * Protect the interface. 334 */ 335 static struct mutex interface_lock; 336 337 /* 338 * Tracer data. 339 */ 340 static struct osnoise_data { 341 u64 sample_period; /* total sampling period */ 342 u64 sample_runtime; /* active sampling portion of period */ 343 u64 stop_tracing; /* stop trace in the internal operation (loop/irq) */ 344 u64 stop_tracing_total; /* stop trace in the final operation (report/thread) */ 345 #ifdef CONFIG_TIMERLAT_TRACER 346 u64 timerlat_period; /* timerlat period */ 347 u64 print_stack; /* print IRQ stack if total > */ 348 int timerlat_tracer; /* timerlat tracer */ 349 #endif 350 bool tainted; /* infor users and developers about a problem */ 351 } osnoise_data = { 352 .sample_period = DEFAULT_SAMPLE_PERIOD, 353 .sample_runtime = DEFAULT_SAMPLE_RUNTIME, 354 .stop_tracing = 0, 355 .stop_tracing_total = 0, 356 #ifdef CONFIG_TIMERLAT_TRACER 357 .print_stack = 0, 358 .timerlat_period = DEFAULT_TIMERLAT_PERIOD, 359 .timerlat_tracer = 0, 360 #endif 361 }; 362 363 #ifdef CONFIG_TIMERLAT_TRACER 364 static inline bool timerlat_enabled(void) 365 { 366 return osnoise_data.timerlat_tracer; 367 } 368 369 static inline int timerlat_softirq_exit(struct osnoise_variables *osn_var) 370 { 371 struct timerlat_variables *tlat_var = this_cpu_tmr_var(); 372 /* 373 * If the timerlat is enabled, but the irq handler did 374 * not run yet enabling timerlat_tracer, do not trace. 375 */ 376 if (!tlat_var->tracing_thread) { 377 osn_var->softirq.arrival_time = 0; 378 osn_var->softirq.delta_start = 0; 379 return 0; 380 } 381 return 1; 382 } 383 384 static inline int timerlat_thread_exit(struct osnoise_variables *osn_var) 385 { 386 struct timerlat_variables *tlat_var = this_cpu_tmr_var(); 387 /* 388 * If the timerlat is enabled, but the irq handler did 389 * not run yet enabling timerlat_tracer, do not trace. 390 */ 391 if (!tlat_var->tracing_thread) { 392 osn_var->thread.delta_start = 0; 393 osn_var->thread.arrival_time = 0; 394 return 0; 395 } 396 return 1; 397 } 398 #else /* CONFIG_TIMERLAT_TRACER */ 399 static inline bool timerlat_enabled(void) 400 { 401 return false; 402 } 403 404 static inline int timerlat_softirq_exit(struct osnoise_variables *osn_var) 405 { 406 return 1; 407 } 408 static inline int timerlat_thread_exit(struct osnoise_variables *osn_var) 409 { 410 return 1; 411 } 412 #endif 413 414 #ifdef CONFIG_PREEMPT_RT 415 /* 416 * Print the osnoise header info. 417 */ 418 static void print_osnoise_headers(struct seq_file *s) 419 { 420 if (osnoise_data.tainted) 421 seq_puts(s, "# osnoise is tainted!\n"); 422 423 seq_puts(s, "# _-------=> irqs-off\n"); 424 seq_puts(s, "# / _------=> need-resched\n"); 425 seq_puts(s, "# | / _-----=> need-resched-lazy\n"); 426 seq_puts(s, "# || / _----=> hardirq/softirq\n"); 427 seq_puts(s, "# ||| / _---=> preempt-depth\n"); 428 seq_puts(s, "# |||| / _--=> preempt-lazy-depth\n"); 429 seq_puts(s, "# ||||| / _-=> migrate-disable\n"); 430 431 seq_puts(s, "# |||||| / "); 432 seq_puts(s, " MAX\n"); 433 434 seq_puts(s, "# ||||| / "); 435 seq_puts(s, " SINGLE Interference counters:\n"); 436 437 seq_puts(s, "# ||||||| RUNTIME "); 438 seq_puts(s, " NOISE %% OF CPU NOISE +-----------------------------+\n"); 439 440 seq_puts(s, "# TASK-PID CPU# ||||||| TIMESTAMP IN US "); 441 seq_puts(s, " IN US AVAILABLE IN US HW NMI IRQ SIRQ THREAD\n"); 442 443 seq_puts(s, "# | | | ||||||| | | "); 444 seq_puts(s, " | | | | | | | |\n"); 445 } 446 #else /* CONFIG_PREEMPT_RT */ 447 static void print_osnoise_headers(struct seq_file *s) 448 { 449 if (osnoise_data.tainted) 450 seq_puts(s, "# osnoise is tainted!\n"); 451 452 seq_puts(s, "# _-----=> irqs-off\n"); 453 seq_puts(s, "# / _----=> need-resched\n"); 454 seq_puts(s, "# | / _---=> hardirq/softirq\n"); 455 seq_puts(s, "# || / _--=> preempt-depth\n"); 456 seq_puts(s, "# ||| / _-=> migrate-disable "); 457 seq_puts(s, " MAX\n"); 458 seq_puts(s, "# |||| / delay "); 459 seq_puts(s, " SINGLE Interference counters:\n"); 460 461 seq_puts(s, "# ||||| RUNTIME "); 462 seq_puts(s, " NOISE %% OF CPU NOISE +-----------------------------+\n"); 463 464 seq_puts(s, "# TASK-PID CPU# ||||| TIMESTAMP IN US "); 465 seq_puts(s, " IN US AVAILABLE IN US HW NMI IRQ SIRQ THREAD\n"); 466 467 seq_puts(s, "# | | | ||||| | | "); 468 seq_puts(s, " | | | | | | | |\n"); 469 } 470 #endif /* CONFIG_PREEMPT_RT */ 471 472 /* 473 * osnoise_taint - report an osnoise error. 474 */ 475 #define osnoise_taint(msg) ({ \ 476 struct osnoise_instance *inst; \ 477 struct trace_buffer *buffer; \ 478 \ 479 rcu_read_lock(); \ 480 list_for_each_entry_rcu(inst, &osnoise_instances, list) { \ 481 buffer = inst->tr->array_buffer.buffer; \ 482 trace_array_printk_buf(buffer, _THIS_IP_, msg); \ 483 } \ 484 rcu_read_unlock(); \ 485 osnoise_data.tainted = true; \ 486 }) 487 488 /* 489 * Record an osnoise_sample into the tracer buffer. 490 */ 491 static void 492 __trace_osnoise_sample(struct osnoise_sample *sample, struct trace_buffer *buffer) 493 { 494 struct trace_event_call *call = &event_osnoise; 495 struct ring_buffer_event *event; 496 struct osnoise_entry *entry; 497 498 event = trace_buffer_lock_reserve(buffer, TRACE_OSNOISE, sizeof(*entry), 499 tracing_gen_ctx()); 500 if (!event) 501 return; 502 entry = ring_buffer_event_data(event); 503 entry->runtime = sample->runtime; 504 entry->noise = sample->noise; 505 entry->max_sample = sample->max_sample; 506 entry->hw_count = sample->hw_count; 507 entry->nmi_count = sample->nmi_count; 508 entry->irq_count = sample->irq_count; 509 entry->softirq_count = sample->softirq_count; 510 entry->thread_count = sample->thread_count; 511 512 if (!call_filter_check_discard(call, entry, buffer, event)) 513 trace_buffer_unlock_commit_nostack(buffer, event); 514 } 515 516 /* 517 * Record an osnoise_sample on all osnoise instances. 518 */ 519 static void trace_osnoise_sample(struct osnoise_sample *sample) 520 { 521 struct osnoise_instance *inst; 522 struct trace_buffer *buffer; 523 524 rcu_read_lock(); 525 list_for_each_entry_rcu(inst, &osnoise_instances, list) { 526 buffer = inst->tr->array_buffer.buffer; 527 __trace_osnoise_sample(sample, buffer); 528 } 529 rcu_read_unlock(); 530 } 531 532 #ifdef CONFIG_TIMERLAT_TRACER 533 /* 534 * Print the timerlat header info. 535 */ 536 #ifdef CONFIG_PREEMPT_RT 537 static void print_timerlat_headers(struct seq_file *s) 538 { 539 seq_puts(s, "# _-------=> irqs-off\n"); 540 seq_puts(s, "# / _------=> need-resched\n"); 541 seq_puts(s, "# | / _-----=> need-resched-lazy\n"); 542 seq_puts(s, "# || / _----=> hardirq/softirq\n"); 543 seq_puts(s, "# ||| / _---=> preempt-depth\n"); 544 seq_puts(s, "# |||| / _--=> preempt-lazy-depth\n"); 545 seq_puts(s, "# ||||| / _-=> migrate-disable\n"); 546 seq_puts(s, "# |||||| /\n"); 547 seq_puts(s, "# ||||||| ACTIVATION\n"); 548 seq_puts(s, "# TASK-PID CPU# ||||||| TIMESTAMP ID "); 549 seq_puts(s, " CONTEXT LATENCY\n"); 550 seq_puts(s, "# | | | ||||||| | | "); 551 seq_puts(s, " | |\n"); 552 } 553 #else /* CONFIG_PREEMPT_RT */ 554 static void print_timerlat_headers(struct seq_file *s) 555 { 556 seq_puts(s, "# _-----=> irqs-off\n"); 557 seq_puts(s, "# / _----=> need-resched\n"); 558 seq_puts(s, "# | / _---=> hardirq/softirq\n"); 559 seq_puts(s, "# || / _--=> preempt-depth\n"); 560 seq_puts(s, "# ||| / _-=> migrate-disable\n"); 561 seq_puts(s, "# |||| / delay\n"); 562 seq_puts(s, "# ||||| ACTIVATION\n"); 563 seq_puts(s, "# TASK-PID CPU# ||||| TIMESTAMP ID "); 564 seq_puts(s, " CONTEXT LATENCY\n"); 565 seq_puts(s, "# | | | ||||| | | "); 566 seq_puts(s, " | |\n"); 567 } 568 #endif /* CONFIG_PREEMPT_RT */ 569 570 static void 571 __trace_timerlat_sample(struct timerlat_sample *sample, struct trace_buffer *buffer) 572 { 573 struct trace_event_call *call = &event_osnoise; 574 struct ring_buffer_event *event; 575 struct timerlat_entry *entry; 576 577 event = trace_buffer_lock_reserve(buffer, TRACE_TIMERLAT, sizeof(*entry), 578 tracing_gen_ctx()); 579 if (!event) 580 return; 581 entry = ring_buffer_event_data(event); 582 entry->seqnum = sample->seqnum; 583 entry->context = sample->context; 584 entry->timer_latency = sample->timer_latency; 585 586 if (!call_filter_check_discard(call, entry, buffer, event)) 587 trace_buffer_unlock_commit_nostack(buffer, event); 588 } 589 590 /* 591 * Record an timerlat_sample into the tracer buffer. 592 */ 593 static void trace_timerlat_sample(struct timerlat_sample *sample) 594 { 595 struct osnoise_instance *inst; 596 struct trace_buffer *buffer; 597 598 rcu_read_lock(); 599 list_for_each_entry_rcu(inst, &osnoise_instances, list) { 600 buffer = inst->tr->array_buffer.buffer; 601 __trace_timerlat_sample(sample, buffer); 602 } 603 rcu_read_unlock(); 604 } 605 606 #ifdef CONFIG_STACKTRACE 607 608 #define MAX_CALLS 256 609 610 /* 611 * Stack trace will take place only at IRQ level, so, no need 612 * to control nesting here. 613 */ 614 struct trace_stack { 615 int stack_size; 616 int nr_entries; 617 unsigned long calls[MAX_CALLS]; 618 }; 619 620 static DEFINE_PER_CPU(struct trace_stack, trace_stack); 621 622 /* 623 * timerlat_save_stack - save a stack trace without printing 624 * 625 * Save the current stack trace without printing. The 626 * stack will be printed later, after the end of the measurement. 627 */ 628 static void timerlat_save_stack(int skip) 629 { 630 unsigned int size, nr_entries; 631 struct trace_stack *fstack; 632 633 fstack = this_cpu_ptr(&trace_stack); 634 635 size = ARRAY_SIZE(fstack->calls); 636 637 nr_entries = stack_trace_save(fstack->calls, size, skip); 638 639 fstack->stack_size = nr_entries * sizeof(unsigned long); 640 fstack->nr_entries = nr_entries; 641 642 return; 643 644 } 645 646 static void 647 __timerlat_dump_stack(struct trace_buffer *buffer, struct trace_stack *fstack, unsigned int size) 648 { 649 struct trace_event_call *call = &event_osnoise; 650 struct ring_buffer_event *event; 651 struct stack_entry *entry; 652 653 event = trace_buffer_lock_reserve(buffer, TRACE_STACK, sizeof(*entry) + size, 654 tracing_gen_ctx()); 655 if (!event) 656 return; 657 658 entry = ring_buffer_event_data(event); 659 660 memcpy(&entry->caller, fstack->calls, size); 661 entry->size = fstack->nr_entries; 662 663 if (!call_filter_check_discard(call, entry, buffer, event)) 664 trace_buffer_unlock_commit_nostack(buffer, event); 665 } 666 667 /* 668 * timerlat_dump_stack - dump a stack trace previously saved 669 */ 670 static void timerlat_dump_stack(u64 latency) 671 { 672 struct osnoise_instance *inst; 673 struct trace_buffer *buffer; 674 struct trace_stack *fstack; 675 unsigned int size; 676 677 /* 678 * trace only if latency > print_stack config, if enabled. 679 */ 680 if (!osnoise_data.print_stack || osnoise_data.print_stack > latency) 681 return; 682 683 preempt_disable_notrace(); 684 fstack = this_cpu_ptr(&trace_stack); 685 size = fstack->stack_size; 686 687 rcu_read_lock(); 688 list_for_each_entry_rcu(inst, &osnoise_instances, list) { 689 buffer = inst->tr->array_buffer.buffer; 690 __timerlat_dump_stack(buffer, fstack, size); 691 692 } 693 rcu_read_unlock(); 694 preempt_enable_notrace(); 695 } 696 #else /* CONFIG_STACKTRACE */ 697 #define timerlat_dump_stack(u64 latency) do {} while (0) 698 #define timerlat_save_stack(a) do {} while (0) 699 #endif /* CONFIG_STACKTRACE */ 700 #endif /* CONFIG_TIMERLAT_TRACER */ 701 702 /* 703 * Macros to encapsulate the time capturing infrastructure. 704 */ 705 #define time_get() trace_clock_local() 706 #define time_to_us(x) div_u64(x, 1000) 707 #define time_sub(a, b) ((a) - (b)) 708 709 /* 710 * cond_move_irq_delta_start - Forward the delta_start of a running IRQ 711 * 712 * If an IRQ is preempted by an NMI, its delta_start is pushed forward 713 * to discount the NMI interference. 714 * 715 * See get_int_safe_duration(). 716 */ 717 static inline void 718 cond_move_irq_delta_start(struct osnoise_variables *osn_var, u64 duration) 719 { 720 if (osn_var->irq.delta_start) 721 osn_var->irq.delta_start += duration; 722 } 723 724 #ifndef CONFIG_PREEMPT_RT 725 /* 726 * cond_move_softirq_delta_start - Forward the delta_start of a running softirq. 727 * 728 * If a softirq is preempted by an IRQ or NMI, its delta_start is pushed 729 * forward to discount the interference. 730 * 731 * See get_int_safe_duration(). 732 */ 733 static inline void 734 cond_move_softirq_delta_start(struct osnoise_variables *osn_var, u64 duration) 735 { 736 if (osn_var->softirq.delta_start) 737 osn_var->softirq.delta_start += duration; 738 } 739 #else /* CONFIG_PREEMPT_RT */ 740 #define cond_move_softirq_delta_start(osn_var, duration) do {} while (0) 741 #endif 742 743 /* 744 * cond_move_thread_delta_start - Forward the delta_start of a running thread 745 * 746 * If a noisy thread is preempted by an softirq, IRQ or NMI, its delta_start 747 * is pushed forward to discount the interference. 748 * 749 * See get_int_safe_duration(). 750 */ 751 static inline void 752 cond_move_thread_delta_start(struct osnoise_variables *osn_var, u64 duration) 753 { 754 if (osn_var->thread.delta_start) 755 osn_var->thread.delta_start += duration; 756 } 757 758 /* 759 * get_int_safe_duration - Get the duration of a window 760 * 761 * The irq, softirq and thread varaibles need to have its duration without 762 * the interference from higher priority interrupts. Instead of keeping a 763 * variable to discount the interrupt interference from these variables, the 764 * starting time of these variables are pushed forward with the interrupt's 765 * duration. In this way, a single variable is used to: 766 * 767 * - Know if a given window is being measured. 768 * - Account its duration. 769 * - Discount the interference. 770 * 771 * To avoid getting inconsistent values, e.g.,: 772 * 773 * now = time_get() 774 * ---> interrupt! 775 * delta_start -= int duration; 776 * <--- 777 * duration = now - delta_start; 778 * 779 * result: negative duration if the variable duration before the 780 * interrupt was smaller than the interrupt execution. 781 * 782 * A counter of interrupts is used. If the counter increased, try 783 * to capture an interference safe duration. 784 */ 785 static inline s64 786 get_int_safe_duration(struct osnoise_variables *osn_var, u64 *delta_start) 787 { 788 u64 int_counter, now; 789 s64 duration; 790 791 do { 792 int_counter = local_read(&osn_var->int_counter); 793 /* synchronize with interrupts */ 794 barrier(); 795 796 now = time_get(); 797 duration = (now - *delta_start); 798 799 /* synchronize with interrupts */ 800 barrier(); 801 } while (int_counter != local_read(&osn_var->int_counter)); 802 803 /* 804 * This is an evidence of race conditions that cause 805 * a value to be "discounted" too much. 806 */ 807 if (duration < 0) 808 osnoise_taint("Negative duration!\n"); 809 810 *delta_start = 0; 811 812 return duration; 813 } 814 815 /* 816 * 817 * set_int_safe_time - Save the current time on *time, aware of interference 818 * 819 * Get the time, taking into consideration a possible interference from 820 * higher priority interrupts. 821 * 822 * See get_int_safe_duration() for an explanation. 823 */ 824 static u64 825 set_int_safe_time(struct osnoise_variables *osn_var, u64 *time) 826 { 827 u64 int_counter; 828 829 do { 830 int_counter = local_read(&osn_var->int_counter); 831 /* synchronize with interrupts */ 832 barrier(); 833 834 *time = time_get(); 835 836 /* synchronize with interrupts */ 837 barrier(); 838 } while (int_counter != local_read(&osn_var->int_counter)); 839 840 return int_counter; 841 } 842 843 #ifdef CONFIG_TIMERLAT_TRACER 844 /* 845 * copy_int_safe_time - Copy *src into *desc aware of interference 846 */ 847 static u64 848 copy_int_safe_time(struct osnoise_variables *osn_var, u64 *dst, u64 *src) 849 { 850 u64 int_counter; 851 852 do { 853 int_counter = local_read(&osn_var->int_counter); 854 /* synchronize with interrupts */ 855 barrier(); 856 857 *dst = *src; 858 859 /* synchronize with interrupts */ 860 barrier(); 861 } while (int_counter != local_read(&osn_var->int_counter)); 862 863 return int_counter; 864 } 865 #endif /* CONFIG_TIMERLAT_TRACER */ 866 867 /* 868 * trace_osnoise_callback - NMI entry/exit callback 869 * 870 * This function is called at the entry and exit NMI code. The bool enter 871 * distinguishes between either case. This function is used to note a NMI 872 * occurrence, compute the noise caused by the NMI, and to remove the noise 873 * it is potentially causing on other interference variables. 874 */ 875 void trace_osnoise_callback(bool enter) 876 { 877 struct osnoise_variables *osn_var = this_cpu_osn_var(); 878 u64 duration; 879 880 if (!osn_var->sampling) 881 return; 882 883 /* 884 * Currently trace_clock_local() calls sched_clock() and the 885 * generic version is not NMI safe. 886 */ 887 if (!IS_ENABLED(CONFIG_GENERIC_SCHED_CLOCK)) { 888 if (enter) { 889 osn_var->nmi.delta_start = time_get(); 890 local_inc(&osn_var->int_counter); 891 } else { 892 duration = time_get() - osn_var->nmi.delta_start; 893 894 trace_nmi_noise(osn_var->nmi.delta_start, duration); 895 896 cond_move_irq_delta_start(osn_var, duration); 897 cond_move_softirq_delta_start(osn_var, duration); 898 cond_move_thread_delta_start(osn_var, duration); 899 } 900 } 901 902 if (enter) 903 osn_var->nmi.count++; 904 } 905 906 /* 907 * osnoise_trace_irq_entry - Note the starting of an IRQ 908 * 909 * Save the starting time of an IRQ. As IRQs are non-preemptive to other IRQs, 910 * it is safe to use a single variable (ons_var->irq) to save the statistics. 911 * The arrival_time is used to report... the arrival time. The delta_start 912 * is used to compute the duration at the IRQ exit handler. See 913 * cond_move_irq_delta_start(). 914 */ 915 void osnoise_trace_irq_entry(int id) 916 { 917 struct osnoise_variables *osn_var = this_cpu_osn_var(); 918 919 if (!osn_var->sampling) 920 return; 921 /* 922 * This value will be used in the report, but not to compute 923 * the execution time, so it is safe to get it unsafe. 924 */ 925 osn_var->irq.arrival_time = time_get(); 926 set_int_safe_time(osn_var, &osn_var->irq.delta_start); 927 osn_var->irq.count++; 928 929 local_inc(&osn_var->int_counter); 930 } 931 932 /* 933 * osnoise_irq_exit - Note the end of an IRQ, sava data and trace 934 * 935 * Computes the duration of the IRQ noise, and trace it. Also discounts the 936 * interference from other sources of noise could be currently being accounted. 937 */ 938 void osnoise_trace_irq_exit(int id, const char *desc) 939 { 940 struct osnoise_variables *osn_var = this_cpu_osn_var(); 941 s64 duration; 942 943 if (!osn_var->sampling) 944 return; 945 946 duration = get_int_safe_duration(osn_var, &osn_var->irq.delta_start); 947 trace_irq_noise(id, desc, osn_var->irq.arrival_time, duration); 948 osn_var->irq.arrival_time = 0; 949 cond_move_softirq_delta_start(osn_var, duration); 950 cond_move_thread_delta_start(osn_var, duration); 951 } 952 953 /* 954 * trace_irqentry_callback - Callback to the irq:irq_entry traceevent 955 * 956 * Used to note the starting of an IRQ occurece. 957 */ 958 static void trace_irqentry_callback(void *data, int irq, 959 struct irqaction *action) 960 { 961 osnoise_trace_irq_entry(irq); 962 } 963 964 /* 965 * trace_irqexit_callback - Callback to the irq:irq_exit traceevent 966 * 967 * Used to note the end of an IRQ occurece. 968 */ 969 static void trace_irqexit_callback(void *data, int irq, 970 struct irqaction *action, int ret) 971 { 972 osnoise_trace_irq_exit(irq, action->name); 973 } 974 975 /* 976 * arch specific register function. 977 */ 978 int __weak osnoise_arch_register(void) 979 { 980 return 0; 981 } 982 983 /* 984 * arch specific unregister function. 985 */ 986 void __weak osnoise_arch_unregister(void) 987 { 988 return; 989 } 990 991 /* 992 * hook_irq_events - Hook IRQ handling events 993 * 994 * This function hooks the IRQ related callbacks to the respective trace 995 * events. 996 */ 997 static int hook_irq_events(void) 998 { 999 int ret; 1000 1001 ret = register_trace_irq_handler_entry(trace_irqentry_callback, NULL); 1002 if (ret) 1003 goto out_err; 1004 1005 ret = register_trace_irq_handler_exit(trace_irqexit_callback, NULL); 1006 if (ret) 1007 goto out_unregister_entry; 1008 1009 ret = osnoise_arch_register(); 1010 if (ret) 1011 goto out_irq_exit; 1012 1013 return 0; 1014 1015 out_irq_exit: 1016 unregister_trace_irq_handler_exit(trace_irqexit_callback, NULL); 1017 out_unregister_entry: 1018 unregister_trace_irq_handler_entry(trace_irqentry_callback, NULL); 1019 out_err: 1020 return -EINVAL; 1021 } 1022 1023 /* 1024 * unhook_irq_events - Unhook IRQ handling events 1025 * 1026 * This function unhooks the IRQ related callbacks to the respective trace 1027 * events. 1028 */ 1029 static void unhook_irq_events(void) 1030 { 1031 osnoise_arch_unregister(); 1032 unregister_trace_irq_handler_exit(trace_irqexit_callback, NULL); 1033 unregister_trace_irq_handler_entry(trace_irqentry_callback, NULL); 1034 } 1035 1036 #ifndef CONFIG_PREEMPT_RT 1037 /* 1038 * trace_softirq_entry_callback - Note the starting of a softirq 1039 * 1040 * Save the starting time of a softirq. As softirqs are non-preemptive to 1041 * other softirqs, it is safe to use a single variable (ons_var->softirq) 1042 * to save the statistics. The arrival_time is used to report... the 1043 * arrival time. The delta_start is used to compute the duration at the 1044 * softirq exit handler. See cond_move_softirq_delta_start(). 1045 */ 1046 static void trace_softirq_entry_callback(void *data, unsigned int vec_nr) 1047 { 1048 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1049 1050 if (!osn_var->sampling) 1051 return; 1052 /* 1053 * This value will be used in the report, but not to compute 1054 * the execution time, so it is safe to get it unsafe. 1055 */ 1056 osn_var->softirq.arrival_time = time_get(); 1057 set_int_safe_time(osn_var, &osn_var->softirq.delta_start); 1058 osn_var->softirq.count++; 1059 1060 local_inc(&osn_var->int_counter); 1061 } 1062 1063 /* 1064 * trace_softirq_exit_callback - Note the end of an softirq 1065 * 1066 * Computes the duration of the softirq noise, and trace it. Also discounts the 1067 * interference from other sources of noise could be currently being accounted. 1068 */ 1069 static void trace_softirq_exit_callback(void *data, unsigned int vec_nr) 1070 { 1071 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1072 s64 duration; 1073 1074 if (!osn_var->sampling) 1075 return; 1076 1077 if (unlikely(timerlat_enabled())) 1078 if (!timerlat_softirq_exit(osn_var)) 1079 return; 1080 1081 duration = get_int_safe_duration(osn_var, &osn_var->softirq.delta_start); 1082 trace_softirq_noise(vec_nr, osn_var->softirq.arrival_time, duration); 1083 cond_move_thread_delta_start(osn_var, duration); 1084 osn_var->softirq.arrival_time = 0; 1085 } 1086 1087 /* 1088 * hook_softirq_events - Hook softirq handling events 1089 * 1090 * This function hooks the softirq related callbacks to the respective trace 1091 * events. 1092 */ 1093 static int hook_softirq_events(void) 1094 { 1095 int ret; 1096 1097 ret = register_trace_softirq_entry(trace_softirq_entry_callback, NULL); 1098 if (ret) 1099 goto out_err; 1100 1101 ret = register_trace_softirq_exit(trace_softirq_exit_callback, NULL); 1102 if (ret) 1103 goto out_unreg_entry; 1104 1105 return 0; 1106 1107 out_unreg_entry: 1108 unregister_trace_softirq_entry(trace_softirq_entry_callback, NULL); 1109 out_err: 1110 return -EINVAL; 1111 } 1112 1113 /* 1114 * unhook_softirq_events - Unhook softirq handling events 1115 * 1116 * This function hooks the softirq related callbacks to the respective trace 1117 * events. 1118 */ 1119 static void unhook_softirq_events(void) 1120 { 1121 unregister_trace_softirq_entry(trace_softirq_entry_callback, NULL); 1122 unregister_trace_softirq_exit(trace_softirq_exit_callback, NULL); 1123 } 1124 #else /* CONFIG_PREEMPT_RT */ 1125 /* 1126 * softirq are threads on the PREEMPT_RT mode. 1127 */ 1128 static int hook_softirq_events(void) 1129 { 1130 return 0; 1131 } 1132 static void unhook_softirq_events(void) 1133 { 1134 } 1135 #endif 1136 1137 /* 1138 * thread_entry - Record the starting of a thread noise window 1139 * 1140 * It saves the context switch time for a noisy thread, and increments 1141 * the interference counters. 1142 */ 1143 static void 1144 thread_entry(struct osnoise_variables *osn_var, struct task_struct *t) 1145 { 1146 if (!osn_var->sampling) 1147 return; 1148 /* 1149 * The arrival time will be used in the report, but not to compute 1150 * the execution time, so it is safe to get it unsafe. 1151 */ 1152 osn_var->thread.arrival_time = time_get(); 1153 1154 set_int_safe_time(osn_var, &osn_var->thread.delta_start); 1155 1156 osn_var->thread.count++; 1157 local_inc(&osn_var->int_counter); 1158 } 1159 1160 /* 1161 * thread_exit - Report the end of a thread noise window 1162 * 1163 * It computes the total noise from a thread, tracing if needed. 1164 */ 1165 static void 1166 thread_exit(struct osnoise_variables *osn_var, struct task_struct *t) 1167 { 1168 s64 duration; 1169 1170 if (!osn_var->sampling) 1171 return; 1172 1173 if (unlikely(timerlat_enabled())) 1174 if (!timerlat_thread_exit(osn_var)) 1175 return; 1176 1177 duration = get_int_safe_duration(osn_var, &osn_var->thread.delta_start); 1178 1179 trace_thread_noise(t, osn_var->thread.arrival_time, duration); 1180 1181 osn_var->thread.arrival_time = 0; 1182 } 1183 1184 /* 1185 * trace_sched_switch - sched:sched_switch trace event handler 1186 * 1187 * This function is hooked to the sched:sched_switch trace event, and it is 1188 * used to record the beginning and to report the end of a thread noise window. 1189 */ 1190 static void 1191 trace_sched_switch_callback(void *data, bool preempt, 1192 struct task_struct *p, 1193 struct task_struct *n, 1194 unsigned int prev_state) 1195 { 1196 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1197 int workload = test_bit(OSN_WORKLOAD, &osnoise_options); 1198 1199 if ((p->pid != osn_var->pid) || !workload) 1200 thread_exit(osn_var, p); 1201 1202 if ((n->pid != osn_var->pid) || !workload) 1203 thread_entry(osn_var, n); 1204 } 1205 1206 /* 1207 * hook_thread_events - Hook the insturmentation for thread noise 1208 * 1209 * Hook the osnoise tracer callbacks to handle the noise from other 1210 * threads on the necessary kernel events. 1211 */ 1212 static int hook_thread_events(void) 1213 { 1214 int ret; 1215 1216 ret = register_trace_sched_switch(trace_sched_switch_callback, NULL); 1217 if (ret) 1218 return -EINVAL; 1219 1220 return 0; 1221 } 1222 1223 /* 1224 * unhook_thread_events - *nhook the insturmentation for thread noise 1225 * 1226 * Unook the osnoise tracer callbacks to handle the noise from other 1227 * threads on the necessary kernel events. 1228 */ 1229 static void unhook_thread_events(void) 1230 { 1231 unregister_trace_sched_switch(trace_sched_switch_callback, NULL); 1232 } 1233 1234 /* 1235 * save_osn_sample_stats - Save the osnoise_sample statistics 1236 * 1237 * Save the osnoise_sample statistics before the sampling phase. These 1238 * values will be used later to compute the diff betwneen the statistics 1239 * before and after the osnoise sampling. 1240 */ 1241 static void 1242 save_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample *s) 1243 { 1244 s->nmi_count = osn_var->nmi.count; 1245 s->irq_count = osn_var->irq.count; 1246 s->softirq_count = osn_var->softirq.count; 1247 s->thread_count = osn_var->thread.count; 1248 } 1249 1250 /* 1251 * diff_osn_sample_stats - Compute the osnoise_sample statistics 1252 * 1253 * After a sample period, compute the difference on the osnoise_sample 1254 * statistics. The struct osnoise_sample *s contains the statistics saved via 1255 * save_osn_sample_stats() before the osnoise sampling. 1256 */ 1257 static void 1258 diff_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample *s) 1259 { 1260 s->nmi_count = osn_var->nmi.count - s->nmi_count; 1261 s->irq_count = osn_var->irq.count - s->irq_count; 1262 s->softirq_count = osn_var->softirq.count - s->softirq_count; 1263 s->thread_count = osn_var->thread.count - s->thread_count; 1264 } 1265 1266 /* 1267 * osnoise_stop_tracing - Stop tracing and the tracer. 1268 */ 1269 static __always_inline void osnoise_stop_tracing(void) 1270 { 1271 struct osnoise_instance *inst; 1272 struct trace_array *tr; 1273 1274 rcu_read_lock(); 1275 list_for_each_entry_rcu(inst, &osnoise_instances, list) { 1276 tr = inst->tr; 1277 trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_, 1278 "stop tracing hit on cpu %d\n", smp_processor_id()); 1279 1280 if (test_bit(OSN_PANIC_ON_STOP, &osnoise_options)) 1281 panic("tracer hit stop condition on CPU %d\n", smp_processor_id()); 1282 1283 tracer_tracing_off(tr); 1284 } 1285 rcu_read_unlock(); 1286 } 1287 1288 /* 1289 * notify_new_max_latency - Notify a new max latency via fsnotify interface. 1290 */ 1291 static void notify_new_max_latency(u64 latency) 1292 { 1293 struct osnoise_instance *inst; 1294 struct trace_array *tr; 1295 1296 rcu_read_lock(); 1297 list_for_each_entry_rcu(inst, &osnoise_instances, list) { 1298 tr = inst->tr; 1299 if (tracer_tracing_is_on(tr) && tr->max_latency < latency) { 1300 tr->max_latency = latency; 1301 latency_fsnotify(tr); 1302 } 1303 } 1304 rcu_read_unlock(); 1305 } 1306 1307 /* 1308 * run_osnoise - Sample the time and look for osnoise 1309 * 1310 * Used to capture the time, looking for potential osnoise latency repeatedly. 1311 * Different from hwlat_detector, it is called with preemption and interrupts 1312 * enabled. This allows irqs, softirqs and threads to run, interfering on the 1313 * osnoise sampling thread, as they would do with a regular thread. 1314 */ 1315 static int run_osnoise(void) 1316 { 1317 bool disable_irq = test_bit(OSN_IRQ_DISABLE, &osnoise_options); 1318 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1319 u64 start, sample, last_sample; 1320 u64 last_int_count, int_count; 1321 s64 noise = 0, max_noise = 0; 1322 s64 total, last_total = 0; 1323 struct osnoise_sample s; 1324 bool disable_preemption; 1325 unsigned int threshold; 1326 u64 runtime, stop_in; 1327 u64 sum_noise = 0; 1328 int hw_count = 0; 1329 int ret = -1; 1330 1331 /* 1332 * Disabling preemption is only required if IRQs are enabled, 1333 * and the options is set on. 1334 */ 1335 disable_preemption = !disable_irq && test_bit(OSN_PREEMPT_DISABLE, &osnoise_options); 1336 1337 /* 1338 * Considers the current thread as the workload. 1339 */ 1340 osn_var->pid = current->pid; 1341 1342 /* 1343 * Save the current stats for the diff 1344 */ 1345 save_osn_sample_stats(osn_var, &s); 1346 1347 /* 1348 * if threshold is 0, use the default value of 5 us. 1349 */ 1350 threshold = tracing_thresh ? : 5000; 1351 1352 /* 1353 * Apply PREEMPT and IRQ disabled options. 1354 */ 1355 if (disable_irq) 1356 local_irq_disable(); 1357 1358 if (disable_preemption) 1359 preempt_disable(); 1360 1361 /* 1362 * Make sure NMIs see sampling first 1363 */ 1364 osn_var->sampling = true; 1365 barrier(); 1366 1367 /* 1368 * Transform the *_us config to nanoseconds to avoid the 1369 * division on the main loop. 1370 */ 1371 runtime = osnoise_data.sample_runtime * NSEC_PER_USEC; 1372 stop_in = osnoise_data.stop_tracing * NSEC_PER_USEC; 1373 1374 /* 1375 * Start timestemp 1376 */ 1377 start = time_get(); 1378 1379 /* 1380 * "previous" loop. 1381 */ 1382 last_int_count = set_int_safe_time(osn_var, &last_sample); 1383 1384 do { 1385 /* 1386 * Get sample! 1387 */ 1388 int_count = set_int_safe_time(osn_var, &sample); 1389 1390 noise = time_sub(sample, last_sample); 1391 1392 /* 1393 * This shouldn't happen. 1394 */ 1395 if (noise < 0) { 1396 osnoise_taint("negative noise!"); 1397 goto out; 1398 } 1399 1400 /* 1401 * Sample runtime. 1402 */ 1403 total = time_sub(sample, start); 1404 1405 /* 1406 * Check for possible overflows. 1407 */ 1408 if (total < last_total) { 1409 osnoise_taint("total overflow!"); 1410 break; 1411 } 1412 1413 last_total = total; 1414 1415 if (noise >= threshold) { 1416 int interference = int_count - last_int_count; 1417 1418 if (noise > max_noise) 1419 max_noise = noise; 1420 1421 if (!interference) 1422 hw_count++; 1423 1424 sum_noise += noise; 1425 1426 trace_sample_threshold(last_sample, noise, interference); 1427 1428 if (osnoise_data.stop_tracing) 1429 if (noise > stop_in) 1430 osnoise_stop_tracing(); 1431 } 1432 1433 /* 1434 * In some cases, notably when running on a nohz_full CPU with 1435 * a stopped tick PREEMPT_RCU has no way to account for QSs. 1436 * This will eventually cause unwarranted noise as PREEMPT_RCU 1437 * will force preemption as the means of ending the current 1438 * grace period. We avoid this problem by calling 1439 * rcu_momentary_dyntick_idle(), which performs a zero duration 1440 * EQS allowing PREEMPT_RCU to end the current grace period. 1441 * This call shouldn't be wrapped inside an RCU critical 1442 * section. 1443 * 1444 * Note that in non PREEMPT_RCU kernels QSs are handled through 1445 * cond_resched() 1446 */ 1447 if (IS_ENABLED(CONFIG_PREEMPT_RCU)) { 1448 if (!disable_irq) 1449 local_irq_disable(); 1450 1451 rcu_momentary_dyntick_idle(); 1452 1453 if (!disable_irq) 1454 local_irq_enable(); 1455 } 1456 1457 /* 1458 * For the non-preemptive kernel config: let threads runs, if 1459 * they so wish, unless set not do to so. 1460 */ 1461 if (!disable_irq && !disable_preemption) 1462 cond_resched(); 1463 1464 last_sample = sample; 1465 last_int_count = int_count; 1466 1467 } while (total < runtime && !kthread_should_stop()); 1468 1469 /* 1470 * Finish the above in the view for interrupts. 1471 */ 1472 barrier(); 1473 1474 osn_var->sampling = false; 1475 1476 /* 1477 * Make sure sampling data is no longer updated. 1478 */ 1479 barrier(); 1480 1481 /* 1482 * Return to the preemptive state. 1483 */ 1484 if (disable_preemption) 1485 preempt_enable(); 1486 1487 if (disable_irq) 1488 local_irq_enable(); 1489 1490 /* 1491 * Save noise info. 1492 */ 1493 s.noise = time_to_us(sum_noise); 1494 s.runtime = time_to_us(total); 1495 s.max_sample = time_to_us(max_noise); 1496 s.hw_count = hw_count; 1497 1498 /* Save interference stats info */ 1499 diff_osn_sample_stats(osn_var, &s); 1500 1501 trace_osnoise_sample(&s); 1502 1503 notify_new_max_latency(max_noise); 1504 1505 if (osnoise_data.stop_tracing_total) 1506 if (s.noise > osnoise_data.stop_tracing_total) 1507 osnoise_stop_tracing(); 1508 1509 return 0; 1510 out: 1511 return ret; 1512 } 1513 1514 static struct cpumask osnoise_cpumask; 1515 static struct cpumask save_cpumask; 1516 1517 /* 1518 * osnoise_sleep - sleep until the next period 1519 */ 1520 static void osnoise_sleep(void) 1521 { 1522 u64 interval; 1523 ktime_t wake_time; 1524 1525 mutex_lock(&interface_lock); 1526 interval = osnoise_data.sample_period - osnoise_data.sample_runtime; 1527 mutex_unlock(&interface_lock); 1528 1529 /* 1530 * differently from hwlat_detector, the osnoise tracer can run 1531 * without a pause because preemption is on. 1532 */ 1533 if (!interval) { 1534 /* Let synchronize_rcu_tasks() make progress */ 1535 cond_resched_tasks_rcu_qs(); 1536 return; 1537 } 1538 1539 wake_time = ktime_add_us(ktime_get(), interval); 1540 __set_current_state(TASK_INTERRUPTIBLE); 1541 1542 while (schedule_hrtimeout(&wake_time, HRTIMER_MODE_ABS)) { 1543 if (kthread_should_stop()) 1544 break; 1545 } 1546 } 1547 1548 /* 1549 * osnoise_main - The osnoise detection kernel thread 1550 * 1551 * Calls run_osnoise() function to measure the osnoise for the configured runtime, 1552 * every period. 1553 */ 1554 static int osnoise_main(void *data) 1555 { 1556 1557 while (!kthread_should_stop()) { 1558 run_osnoise(); 1559 osnoise_sleep(); 1560 } 1561 1562 return 0; 1563 } 1564 1565 #ifdef CONFIG_TIMERLAT_TRACER 1566 /* 1567 * timerlat_irq - hrtimer handler for timerlat. 1568 */ 1569 static enum hrtimer_restart timerlat_irq(struct hrtimer *timer) 1570 { 1571 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1572 struct timerlat_variables *tlat; 1573 struct timerlat_sample s; 1574 u64 now; 1575 u64 diff; 1576 1577 /* 1578 * I am not sure if the timer was armed for this CPU. So, get 1579 * the timerlat struct from the timer itself, not from this 1580 * CPU. 1581 */ 1582 tlat = container_of(timer, struct timerlat_variables, timer); 1583 1584 now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer)); 1585 1586 /* 1587 * Enable the osnoise: events for thread an softirq. 1588 */ 1589 tlat->tracing_thread = true; 1590 1591 osn_var->thread.arrival_time = time_get(); 1592 1593 /* 1594 * A hardirq is running: the timer IRQ. It is for sure preempting 1595 * a thread, and potentially preempting a softirq. 1596 * 1597 * At this point, it is not interesting to know the duration of the 1598 * preempted thread (and maybe softirq), but how much time they will 1599 * delay the beginning of the execution of the timer thread. 1600 * 1601 * To get the correct (net) delay added by the softirq, its delta_start 1602 * is set as the IRQ one. In this way, at the return of the IRQ, the delta 1603 * start of the sofitrq will be zeroed, accounting then only the time 1604 * after that. 1605 * 1606 * The thread follows the same principle. However, if a softirq is 1607 * running, the thread needs to receive the softirq delta_start. The 1608 * reason being is that the softirq will be the last to be unfolded, 1609 * resseting the thread delay to zero. 1610 * 1611 * The PREEMPT_RT is a special case, though. As softirqs run as threads 1612 * on RT, moving the thread is enough. 1613 */ 1614 if (!IS_ENABLED(CONFIG_PREEMPT_RT) && osn_var->softirq.delta_start) { 1615 copy_int_safe_time(osn_var, &osn_var->thread.delta_start, 1616 &osn_var->softirq.delta_start); 1617 1618 copy_int_safe_time(osn_var, &osn_var->softirq.delta_start, 1619 &osn_var->irq.delta_start); 1620 } else { 1621 copy_int_safe_time(osn_var, &osn_var->thread.delta_start, 1622 &osn_var->irq.delta_start); 1623 } 1624 1625 /* 1626 * Compute the current time with the expected time. 1627 */ 1628 diff = now - tlat->abs_period; 1629 1630 tlat->count++; 1631 s.seqnum = tlat->count; 1632 s.timer_latency = diff; 1633 s.context = IRQ_CONTEXT; 1634 1635 trace_timerlat_sample(&s); 1636 1637 if (osnoise_data.stop_tracing) { 1638 if (time_to_us(diff) >= osnoise_data.stop_tracing) { 1639 1640 /* 1641 * At this point, if stop_tracing is set and <= print_stack, 1642 * print_stack is set and would be printed in the thread handler. 1643 * 1644 * Thus, print the stack trace as it is helpful to define the 1645 * root cause of an IRQ latency. 1646 */ 1647 if (osnoise_data.stop_tracing <= osnoise_data.print_stack) { 1648 timerlat_save_stack(0); 1649 timerlat_dump_stack(time_to_us(diff)); 1650 } 1651 1652 osnoise_stop_tracing(); 1653 notify_new_max_latency(diff); 1654 1655 return HRTIMER_NORESTART; 1656 } 1657 } 1658 1659 wake_up_process(tlat->kthread); 1660 1661 if (osnoise_data.print_stack) 1662 timerlat_save_stack(0); 1663 1664 return HRTIMER_NORESTART; 1665 } 1666 1667 /* 1668 * wait_next_period - Wait for the next period for timerlat 1669 */ 1670 static int wait_next_period(struct timerlat_variables *tlat) 1671 { 1672 ktime_t next_abs_period, now; 1673 u64 rel_period = osnoise_data.timerlat_period * 1000; 1674 1675 now = hrtimer_cb_get_time(&tlat->timer); 1676 next_abs_period = ns_to_ktime(tlat->abs_period + rel_period); 1677 1678 /* 1679 * Save the next abs_period. 1680 */ 1681 tlat->abs_period = (u64) ktime_to_ns(next_abs_period); 1682 1683 /* 1684 * If the new abs_period is in the past, skip the activation. 1685 */ 1686 while (ktime_compare(now, next_abs_period) > 0) { 1687 next_abs_period = ns_to_ktime(tlat->abs_period + rel_period); 1688 tlat->abs_period = (u64) ktime_to_ns(next_abs_period); 1689 } 1690 1691 set_current_state(TASK_INTERRUPTIBLE); 1692 1693 hrtimer_start(&tlat->timer, next_abs_period, HRTIMER_MODE_ABS_PINNED_HARD); 1694 schedule(); 1695 return 1; 1696 } 1697 1698 /* 1699 * timerlat_main- Timerlat main 1700 */ 1701 static int timerlat_main(void *data) 1702 { 1703 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1704 struct timerlat_variables *tlat = this_cpu_tmr_var(); 1705 struct timerlat_sample s; 1706 struct sched_param sp; 1707 u64 now, diff; 1708 1709 /* 1710 * Make the thread RT, that is how cyclictest is usually used. 1711 */ 1712 sp.sched_priority = DEFAULT_TIMERLAT_PRIO; 1713 sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); 1714 1715 tlat->count = 0; 1716 tlat->tracing_thread = false; 1717 1718 hrtimer_init(&tlat->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD); 1719 tlat->timer.function = timerlat_irq; 1720 tlat->kthread = current; 1721 osn_var->pid = current->pid; 1722 /* 1723 * Anotate the arrival time. 1724 */ 1725 tlat->abs_period = hrtimer_cb_get_time(&tlat->timer); 1726 1727 wait_next_period(tlat); 1728 1729 osn_var->sampling = 1; 1730 1731 while (!kthread_should_stop()) { 1732 now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer)); 1733 diff = now - tlat->abs_period; 1734 1735 s.seqnum = tlat->count; 1736 s.timer_latency = diff; 1737 s.context = THREAD_CONTEXT; 1738 1739 trace_timerlat_sample(&s); 1740 1741 notify_new_max_latency(diff); 1742 1743 timerlat_dump_stack(time_to_us(diff)); 1744 1745 tlat->tracing_thread = false; 1746 if (osnoise_data.stop_tracing_total) 1747 if (time_to_us(diff) >= osnoise_data.stop_tracing_total) 1748 osnoise_stop_tracing(); 1749 1750 wait_next_period(tlat); 1751 } 1752 1753 hrtimer_cancel(&tlat->timer); 1754 return 0; 1755 } 1756 #else /* CONFIG_TIMERLAT_TRACER */ 1757 static int timerlat_main(void *data) 1758 { 1759 return 0; 1760 } 1761 #endif /* CONFIG_TIMERLAT_TRACER */ 1762 1763 /* 1764 * stop_kthread - stop a workload thread 1765 */ 1766 static void stop_kthread(unsigned int cpu) 1767 { 1768 struct task_struct *kthread; 1769 1770 kthread = per_cpu(per_cpu_osnoise_var, cpu).kthread; 1771 if (kthread) { 1772 kthread_stop(kthread); 1773 per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL; 1774 } else { 1775 if (!test_bit(OSN_WORKLOAD, &osnoise_options)) { 1776 per_cpu(per_cpu_osnoise_var, cpu).sampling = false; 1777 barrier(); 1778 return; 1779 } 1780 } 1781 } 1782 1783 /* 1784 * stop_per_cpu_kthread - Stop per-cpu threads 1785 * 1786 * Stop the osnoise sampling htread. Use this on unload and at system 1787 * shutdown. 1788 */ 1789 static void stop_per_cpu_kthreads(void) 1790 { 1791 int cpu; 1792 1793 cpus_read_lock(); 1794 1795 for_each_online_cpu(cpu) 1796 stop_kthread(cpu); 1797 1798 cpus_read_unlock(); 1799 } 1800 1801 /* 1802 * start_kthread - Start a workload tread 1803 */ 1804 static int start_kthread(unsigned int cpu) 1805 { 1806 struct task_struct *kthread; 1807 void *main = osnoise_main; 1808 char comm[24]; 1809 1810 if (timerlat_enabled()) { 1811 snprintf(comm, 24, "timerlat/%d", cpu); 1812 main = timerlat_main; 1813 } else { 1814 /* if no workload, just return */ 1815 if (!test_bit(OSN_WORKLOAD, &osnoise_options)) { 1816 per_cpu(per_cpu_osnoise_var, cpu).sampling = true; 1817 barrier(); 1818 return 0; 1819 } 1820 1821 snprintf(comm, 24, "osnoise/%d", cpu); 1822 } 1823 1824 kthread = kthread_run_on_cpu(main, NULL, cpu, comm); 1825 1826 if (IS_ERR(kthread)) { 1827 pr_err(BANNER "could not start sampling thread\n"); 1828 stop_per_cpu_kthreads(); 1829 return -ENOMEM; 1830 } 1831 1832 per_cpu(per_cpu_osnoise_var, cpu).kthread = kthread; 1833 1834 return 0; 1835 } 1836 1837 /* 1838 * start_per_cpu_kthread - Kick off per-cpu osnoise sampling kthreads 1839 * 1840 * This starts the kernel thread that will look for osnoise on many 1841 * cpus. 1842 */ 1843 static int start_per_cpu_kthreads(void) 1844 { 1845 struct cpumask *current_mask = &save_cpumask; 1846 int retval = 0; 1847 int cpu; 1848 1849 cpus_read_lock(); 1850 /* 1851 * Run only on online CPUs in which osnoise is allowed to run. 1852 */ 1853 cpumask_and(current_mask, cpu_online_mask, &osnoise_cpumask); 1854 1855 for_each_possible_cpu(cpu) 1856 per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL; 1857 1858 for_each_cpu(cpu, current_mask) { 1859 retval = start_kthread(cpu); 1860 if (retval) { 1861 cpus_read_unlock(); 1862 stop_per_cpu_kthreads(); 1863 return retval; 1864 } 1865 } 1866 1867 cpus_read_unlock(); 1868 1869 return retval; 1870 } 1871 1872 #ifdef CONFIG_HOTPLUG_CPU 1873 static void osnoise_hotplug_workfn(struct work_struct *dummy) 1874 { 1875 unsigned int cpu = smp_processor_id(); 1876 1877 mutex_lock(&trace_types_lock); 1878 1879 if (!osnoise_has_registered_instances()) 1880 goto out_unlock_trace; 1881 1882 mutex_lock(&interface_lock); 1883 cpus_read_lock(); 1884 1885 if (!cpumask_test_cpu(cpu, &osnoise_cpumask)) 1886 goto out_unlock; 1887 1888 start_kthread(cpu); 1889 1890 out_unlock: 1891 cpus_read_unlock(); 1892 mutex_unlock(&interface_lock); 1893 out_unlock_trace: 1894 mutex_unlock(&trace_types_lock); 1895 } 1896 1897 static DECLARE_WORK(osnoise_hotplug_work, osnoise_hotplug_workfn); 1898 1899 /* 1900 * osnoise_cpu_init - CPU hotplug online callback function 1901 */ 1902 static int osnoise_cpu_init(unsigned int cpu) 1903 { 1904 schedule_work_on(cpu, &osnoise_hotplug_work); 1905 return 0; 1906 } 1907 1908 /* 1909 * osnoise_cpu_die - CPU hotplug offline callback function 1910 */ 1911 static int osnoise_cpu_die(unsigned int cpu) 1912 { 1913 stop_kthread(cpu); 1914 return 0; 1915 } 1916 1917 static void osnoise_init_hotplug_support(void) 1918 { 1919 int ret; 1920 1921 ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "trace/osnoise:online", 1922 osnoise_cpu_init, osnoise_cpu_die); 1923 if (ret < 0) 1924 pr_warn(BANNER "Error to init cpu hotplug support\n"); 1925 1926 return; 1927 } 1928 #else /* CONFIG_HOTPLUG_CPU */ 1929 static void osnoise_init_hotplug_support(void) 1930 { 1931 return; 1932 } 1933 #endif /* CONFIG_HOTPLUG_CPU */ 1934 1935 /* 1936 * seq file functions for the osnoise/options file. 1937 */ 1938 static void *s_options_start(struct seq_file *s, loff_t *pos) 1939 { 1940 int option = *pos; 1941 1942 mutex_lock(&interface_lock); 1943 1944 if (option >= OSN_MAX) 1945 return NULL; 1946 1947 return pos; 1948 } 1949 1950 static void *s_options_next(struct seq_file *s, void *v, loff_t *pos) 1951 { 1952 int option = ++(*pos); 1953 1954 if (option >= OSN_MAX) 1955 return NULL; 1956 1957 return pos; 1958 } 1959 1960 static int s_options_show(struct seq_file *s, void *v) 1961 { 1962 loff_t *pos = v; 1963 int option = *pos; 1964 1965 if (option == OSN_DEFAULTS) { 1966 if (osnoise_options == OSN_DEFAULT_OPTIONS) 1967 seq_printf(s, "%s", osnoise_options_str[option]); 1968 else 1969 seq_printf(s, "NO_%s", osnoise_options_str[option]); 1970 goto out; 1971 } 1972 1973 if (test_bit(option, &osnoise_options)) 1974 seq_printf(s, "%s", osnoise_options_str[option]); 1975 else 1976 seq_printf(s, "NO_%s", osnoise_options_str[option]); 1977 1978 out: 1979 if (option != OSN_MAX) 1980 seq_puts(s, " "); 1981 1982 return 0; 1983 } 1984 1985 static void s_options_stop(struct seq_file *s, void *v) 1986 { 1987 seq_puts(s, "\n"); 1988 mutex_unlock(&interface_lock); 1989 } 1990 1991 static const struct seq_operations osnoise_options_seq_ops = { 1992 .start = s_options_start, 1993 .next = s_options_next, 1994 .show = s_options_show, 1995 .stop = s_options_stop 1996 }; 1997 1998 static int osnoise_options_open(struct inode *inode, struct file *file) 1999 { 2000 return seq_open(file, &osnoise_options_seq_ops); 2001 }; 2002 2003 /** 2004 * osnoise_options_write - Write function for "options" entry 2005 * @filp: The active open file structure 2006 * @ubuf: The user buffer that contains the value to write 2007 * @cnt: The maximum number of bytes to write to "file" 2008 * @ppos: The current position in @file 2009 * 2010 * Writing the option name sets the option, writing the "NO_" 2011 * prefix in front of the option name disables it. 2012 * 2013 * Writing "DEFAULTS" resets the option values to the default ones. 2014 */ 2015 static ssize_t osnoise_options_write(struct file *filp, const char __user *ubuf, 2016 size_t cnt, loff_t *ppos) 2017 { 2018 int running, option, enable, retval; 2019 char buf[256], *option_str; 2020 2021 if (cnt >= 256) 2022 return -EINVAL; 2023 2024 if (copy_from_user(buf, ubuf, cnt)) 2025 return -EFAULT; 2026 2027 buf[cnt] = 0; 2028 2029 if (strncmp(buf, "NO_", 3)) { 2030 option_str = strstrip(buf); 2031 enable = true; 2032 } else { 2033 option_str = strstrip(&buf[3]); 2034 enable = false; 2035 } 2036 2037 option = match_string(osnoise_options_str, OSN_MAX, option_str); 2038 if (option < 0) 2039 return -EINVAL; 2040 2041 /* 2042 * trace_types_lock is taken to avoid concurrency on start/stop. 2043 */ 2044 mutex_lock(&trace_types_lock); 2045 running = osnoise_has_registered_instances(); 2046 if (running) 2047 stop_per_cpu_kthreads(); 2048 2049 mutex_lock(&interface_lock); 2050 /* 2051 * avoid CPU hotplug operations that might read options. 2052 */ 2053 cpus_read_lock(); 2054 2055 retval = cnt; 2056 2057 if (enable) { 2058 if (option == OSN_DEFAULTS) 2059 osnoise_options = OSN_DEFAULT_OPTIONS; 2060 else 2061 set_bit(option, &osnoise_options); 2062 } else { 2063 if (option == OSN_DEFAULTS) 2064 retval = -EINVAL; 2065 else 2066 clear_bit(option, &osnoise_options); 2067 } 2068 2069 cpus_read_unlock(); 2070 mutex_unlock(&interface_lock); 2071 2072 if (running) 2073 start_per_cpu_kthreads(); 2074 mutex_unlock(&trace_types_lock); 2075 2076 return retval; 2077 } 2078 2079 /* 2080 * osnoise_cpus_read - Read function for reading the "cpus" file 2081 * @filp: The active open file structure 2082 * @ubuf: The userspace provided buffer to read value into 2083 * @cnt: The maximum number of bytes to read 2084 * @ppos: The current "file" position 2085 * 2086 * Prints the "cpus" output into the user-provided buffer. 2087 */ 2088 static ssize_t 2089 osnoise_cpus_read(struct file *filp, char __user *ubuf, size_t count, 2090 loff_t *ppos) 2091 { 2092 char *mask_str; 2093 int len; 2094 2095 mutex_lock(&interface_lock); 2096 2097 len = snprintf(NULL, 0, "%*pbl\n", cpumask_pr_args(&osnoise_cpumask)) + 1; 2098 mask_str = kmalloc(len, GFP_KERNEL); 2099 if (!mask_str) { 2100 count = -ENOMEM; 2101 goto out_unlock; 2102 } 2103 2104 len = snprintf(mask_str, len, "%*pbl\n", cpumask_pr_args(&osnoise_cpumask)); 2105 if (len >= count) { 2106 count = -EINVAL; 2107 goto out_free; 2108 } 2109 2110 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len); 2111 2112 out_free: 2113 kfree(mask_str); 2114 out_unlock: 2115 mutex_unlock(&interface_lock); 2116 2117 return count; 2118 } 2119 2120 /* 2121 * osnoise_cpus_write - Write function for "cpus" entry 2122 * @filp: The active open file structure 2123 * @ubuf: The user buffer that contains the value to write 2124 * @cnt: The maximum number of bytes to write to "file" 2125 * @ppos: The current position in @file 2126 * 2127 * This function provides a write implementation for the "cpus" 2128 * interface to the osnoise trace. By default, it lists all CPUs, 2129 * in this way, allowing osnoise threads to run on any online CPU 2130 * of the system. It serves to restrict the execution of osnoise to the 2131 * set of CPUs writing via this interface. Why not use "tracing_cpumask"? 2132 * Because the user might be interested in tracing what is running on 2133 * other CPUs. For instance, one might run osnoise in one HT CPU 2134 * while observing what is running on the sibling HT CPU. 2135 */ 2136 static ssize_t 2137 osnoise_cpus_write(struct file *filp, const char __user *ubuf, size_t count, 2138 loff_t *ppos) 2139 { 2140 cpumask_var_t osnoise_cpumask_new; 2141 int running, err; 2142 char buf[256]; 2143 2144 if (count >= 256) 2145 return -EINVAL; 2146 2147 if (copy_from_user(buf, ubuf, count)) 2148 return -EFAULT; 2149 2150 if (!zalloc_cpumask_var(&osnoise_cpumask_new, GFP_KERNEL)) 2151 return -ENOMEM; 2152 2153 err = cpulist_parse(buf, osnoise_cpumask_new); 2154 if (err) 2155 goto err_free; 2156 2157 /* 2158 * trace_types_lock is taken to avoid concurrency on start/stop. 2159 */ 2160 mutex_lock(&trace_types_lock); 2161 running = osnoise_has_registered_instances(); 2162 if (running) 2163 stop_per_cpu_kthreads(); 2164 2165 mutex_lock(&interface_lock); 2166 /* 2167 * osnoise_cpumask is read by CPU hotplug operations. 2168 */ 2169 cpus_read_lock(); 2170 2171 cpumask_copy(&osnoise_cpumask, osnoise_cpumask_new); 2172 2173 cpus_read_unlock(); 2174 mutex_unlock(&interface_lock); 2175 2176 if (running) 2177 start_per_cpu_kthreads(); 2178 mutex_unlock(&trace_types_lock); 2179 2180 free_cpumask_var(osnoise_cpumask_new); 2181 return count; 2182 2183 err_free: 2184 free_cpumask_var(osnoise_cpumask_new); 2185 2186 return err; 2187 } 2188 2189 /* 2190 * osnoise/runtime_us: cannot be greater than the period. 2191 */ 2192 static struct trace_min_max_param osnoise_runtime = { 2193 .lock = &interface_lock, 2194 .val = &osnoise_data.sample_runtime, 2195 .max = &osnoise_data.sample_period, 2196 .min = NULL, 2197 }; 2198 2199 /* 2200 * osnoise/period_us: cannot be smaller than the runtime. 2201 */ 2202 static struct trace_min_max_param osnoise_period = { 2203 .lock = &interface_lock, 2204 .val = &osnoise_data.sample_period, 2205 .max = NULL, 2206 .min = &osnoise_data.sample_runtime, 2207 }; 2208 2209 /* 2210 * osnoise/stop_tracing_us: no limit. 2211 */ 2212 static struct trace_min_max_param osnoise_stop_tracing_in = { 2213 .lock = &interface_lock, 2214 .val = &osnoise_data.stop_tracing, 2215 .max = NULL, 2216 .min = NULL, 2217 }; 2218 2219 /* 2220 * osnoise/stop_tracing_total_us: no limit. 2221 */ 2222 static struct trace_min_max_param osnoise_stop_tracing_total = { 2223 .lock = &interface_lock, 2224 .val = &osnoise_data.stop_tracing_total, 2225 .max = NULL, 2226 .min = NULL, 2227 }; 2228 2229 #ifdef CONFIG_TIMERLAT_TRACER 2230 /* 2231 * osnoise/print_stack: print the stacktrace of the IRQ handler if the total 2232 * latency is higher than val. 2233 */ 2234 static struct trace_min_max_param osnoise_print_stack = { 2235 .lock = &interface_lock, 2236 .val = &osnoise_data.print_stack, 2237 .max = NULL, 2238 .min = NULL, 2239 }; 2240 2241 /* 2242 * osnoise/timerlat_period: min 100 us, max 1 s 2243 */ 2244 static u64 timerlat_min_period = 100; 2245 static u64 timerlat_max_period = 1000000; 2246 static struct trace_min_max_param timerlat_period = { 2247 .lock = &interface_lock, 2248 .val = &osnoise_data.timerlat_period, 2249 .max = &timerlat_max_period, 2250 .min = &timerlat_min_period, 2251 }; 2252 #endif 2253 2254 static const struct file_operations cpus_fops = { 2255 .open = tracing_open_generic, 2256 .read = osnoise_cpus_read, 2257 .write = osnoise_cpus_write, 2258 .llseek = generic_file_llseek, 2259 }; 2260 2261 static const struct file_operations osnoise_options_fops = { 2262 .open = osnoise_options_open, 2263 .read = seq_read, 2264 .llseek = seq_lseek, 2265 .release = seq_release, 2266 .write = osnoise_options_write 2267 }; 2268 2269 #ifdef CONFIG_TIMERLAT_TRACER 2270 #ifdef CONFIG_STACKTRACE 2271 static int init_timerlat_stack_tracefs(struct dentry *top_dir) 2272 { 2273 struct dentry *tmp; 2274 2275 tmp = tracefs_create_file("print_stack", TRACE_MODE_WRITE, top_dir, 2276 &osnoise_print_stack, &trace_min_max_fops); 2277 if (!tmp) 2278 return -ENOMEM; 2279 2280 return 0; 2281 } 2282 #else /* CONFIG_STACKTRACE */ 2283 static int init_timerlat_stack_tracefs(struct dentry *top_dir) 2284 { 2285 return 0; 2286 } 2287 #endif /* CONFIG_STACKTRACE */ 2288 2289 /* 2290 * init_timerlat_tracefs - A function to initialize the timerlat interface files 2291 */ 2292 static int init_timerlat_tracefs(struct dentry *top_dir) 2293 { 2294 struct dentry *tmp; 2295 2296 tmp = tracefs_create_file("timerlat_period_us", TRACE_MODE_WRITE, top_dir, 2297 &timerlat_period, &trace_min_max_fops); 2298 if (!tmp) 2299 return -ENOMEM; 2300 2301 return init_timerlat_stack_tracefs(top_dir); 2302 } 2303 #else /* CONFIG_TIMERLAT_TRACER */ 2304 static int init_timerlat_tracefs(struct dentry *top_dir) 2305 { 2306 return 0; 2307 } 2308 #endif /* CONFIG_TIMERLAT_TRACER */ 2309 2310 /* 2311 * init_tracefs - A function to initialize the tracefs interface files 2312 * 2313 * This function creates entries in tracefs for "osnoise" and "timerlat". 2314 * It creates these directories in the tracing directory, and within that 2315 * directory the use can change and view the configs. 2316 */ 2317 static int init_tracefs(void) 2318 { 2319 struct dentry *top_dir; 2320 struct dentry *tmp; 2321 int ret; 2322 2323 ret = tracing_init_dentry(); 2324 if (ret) 2325 return -ENOMEM; 2326 2327 top_dir = tracefs_create_dir("osnoise", NULL); 2328 if (!top_dir) 2329 return 0; 2330 2331 tmp = tracefs_create_file("period_us", TRACE_MODE_WRITE, top_dir, 2332 &osnoise_period, &trace_min_max_fops); 2333 if (!tmp) 2334 goto err; 2335 2336 tmp = tracefs_create_file("runtime_us", TRACE_MODE_WRITE, top_dir, 2337 &osnoise_runtime, &trace_min_max_fops); 2338 if (!tmp) 2339 goto err; 2340 2341 tmp = tracefs_create_file("stop_tracing_us", TRACE_MODE_WRITE, top_dir, 2342 &osnoise_stop_tracing_in, &trace_min_max_fops); 2343 if (!tmp) 2344 goto err; 2345 2346 tmp = tracefs_create_file("stop_tracing_total_us", TRACE_MODE_WRITE, top_dir, 2347 &osnoise_stop_tracing_total, &trace_min_max_fops); 2348 if (!tmp) 2349 goto err; 2350 2351 tmp = trace_create_file("cpus", TRACE_MODE_WRITE, top_dir, NULL, &cpus_fops); 2352 if (!tmp) 2353 goto err; 2354 2355 tmp = trace_create_file("options", TRACE_MODE_WRITE, top_dir, NULL, 2356 &osnoise_options_fops); 2357 if (!tmp) 2358 goto err; 2359 2360 ret = init_timerlat_tracefs(top_dir); 2361 if (ret) 2362 goto err; 2363 2364 return 0; 2365 2366 err: 2367 tracefs_remove(top_dir); 2368 return -ENOMEM; 2369 } 2370 2371 static int osnoise_hook_events(void) 2372 { 2373 int retval; 2374 2375 /* 2376 * Trace is already hooked, we are re-enabling from 2377 * a stop_tracing_*. 2378 */ 2379 if (trace_osnoise_callback_enabled) 2380 return 0; 2381 2382 retval = hook_irq_events(); 2383 if (retval) 2384 return -EINVAL; 2385 2386 retval = hook_softirq_events(); 2387 if (retval) 2388 goto out_unhook_irq; 2389 2390 retval = hook_thread_events(); 2391 /* 2392 * All fine! 2393 */ 2394 if (!retval) 2395 return 0; 2396 2397 unhook_softirq_events(); 2398 out_unhook_irq: 2399 unhook_irq_events(); 2400 return -EINVAL; 2401 } 2402 2403 static void osnoise_unhook_events(void) 2404 { 2405 unhook_thread_events(); 2406 unhook_softirq_events(); 2407 unhook_irq_events(); 2408 } 2409 2410 /* 2411 * osnoise_workload_start - start the workload and hook to events 2412 */ 2413 static int osnoise_workload_start(void) 2414 { 2415 int retval; 2416 2417 /* 2418 * Instances need to be registered after calling workload 2419 * start. Hence, if there is already an instance, the 2420 * workload was already registered. Otherwise, this 2421 * code is on the way to register the first instance, 2422 * and the workload will start. 2423 */ 2424 if (osnoise_has_registered_instances()) 2425 return 0; 2426 2427 osn_var_reset_all(); 2428 2429 retval = osnoise_hook_events(); 2430 if (retval) 2431 return retval; 2432 2433 /* 2434 * Make sure that ftrace_nmi_enter/exit() see reset values 2435 * before enabling trace_osnoise_callback_enabled. 2436 */ 2437 barrier(); 2438 trace_osnoise_callback_enabled = true; 2439 2440 retval = start_per_cpu_kthreads(); 2441 if (retval) { 2442 trace_osnoise_callback_enabled = false; 2443 /* 2444 * Make sure that ftrace_nmi_enter/exit() see 2445 * trace_osnoise_callback_enabled as false before continuing. 2446 */ 2447 barrier(); 2448 2449 osnoise_unhook_events(); 2450 return retval; 2451 } 2452 2453 return 0; 2454 } 2455 2456 /* 2457 * osnoise_workload_stop - stop the workload and unhook the events 2458 */ 2459 static void osnoise_workload_stop(void) 2460 { 2461 /* 2462 * Instances need to be unregistered before calling 2463 * stop. Hence, if there is a registered instance, more 2464 * than one instance is running, and the workload will not 2465 * yet stop. Otherwise, this code is on the way to disable 2466 * the last instance, and the workload can stop. 2467 */ 2468 if (osnoise_has_registered_instances()) 2469 return; 2470 2471 /* 2472 * If callbacks were already disabled in a previous stop 2473 * call, there is no need to disable then again. 2474 * 2475 * For instance, this happens when tracing is stopped via: 2476 * echo 0 > tracing_on 2477 * echo nop > current_tracer. 2478 */ 2479 if (!trace_osnoise_callback_enabled) 2480 return; 2481 2482 trace_osnoise_callback_enabled = false; 2483 /* 2484 * Make sure that ftrace_nmi_enter/exit() see 2485 * trace_osnoise_callback_enabled as false before continuing. 2486 */ 2487 barrier(); 2488 2489 stop_per_cpu_kthreads(); 2490 2491 osnoise_unhook_events(); 2492 } 2493 2494 static void osnoise_tracer_start(struct trace_array *tr) 2495 { 2496 int retval; 2497 2498 /* 2499 * If the instance is already registered, there is no need to 2500 * register it again. 2501 */ 2502 if (osnoise_instance_registered(tr)) 2503 return; 2504 2505 retval = osnoise_workload_start(); 2506 if (retval) 2507 pr_err(BANNER "Error starting osnoise tracer\n"); 2508 2509 osnoise_register_instance(tr); 2510 } 2511 2512 static void osnoise_tracer_stop(struct trace_array *tr) 2513 { 2514 osnoise_unregister_instance(tr); 2515 osnoise_workload_stop(); 2516 } 2517 2518 static int osnoise_tracer_init(struct trace_array *tr) 2519 { 2520 /* 2521 * Only allow osnoise tracer if timerlat tracer is not running 2522 * already. 2523 */ 2524 if (timerlat_enabled()) 2525 return -EBUSY; 2526 2527 tr->max_latency = 0; 2528 2529 osnoise_tracer_start(tr); 2530 return 0; 2531 } 2532 2533 static void osnoise_tracer_reset(struct trace_array *tr) 2534 { 2535 osnoise_tracer_stop(tr); 2536 } 2537 2538 static struct tracer osnoise_tracer __read_mostly = { 2539 .name = "osnoise", 2540 .init = osnoise_tracer_init, 2541 .reset = osnoise_tracer_reset, 2542 .start = osnoise_tracer_start, 2543 .stop = osnoise_tracer_stop, 2544 .print_header = print_osnoise_headers, 2545 .allow_instances = true, 2546 }; 2547 2548 #ifdef CONFIG_TIMERLAT_TRACER 2549 static void timerlat_tracer_start(struct trace_array *tr) 2550 { 2551 int retval; 2552 2553 /* 2554 * If the instance is already registered, there is no need to 2555 * register it again. 2556 */ 2557 if (osnoise_instance_registered(tr)) 2558 return; 2559 2560 retval = osnoise_workload_start(); 2561 if (retval) 2562 pr_err(BANNER "Error starting timerlat tracer\n"); 2563 2564 osnoise_register_instance(tr); 2565 2566 return; 2567 } 2568 2569 static void timerlat_tracer_stop(struct trace_array *tr) 2570 { 2571 int cpu; 2572 2573 osnoise_unregister_instance(tr); 2574 2575 /* 2576 * Instruct the threads to stop only if this is the last instance. 2577 */ 2578 if (!osnoise_has_registered_instances()) { 2579 for_each_online_cpu(cpu) 2580 per_cpu(per_cpu_osnoise_var, cpu).sampling = 0; 2581 } 2582 2583 osnoise_workload_stop(); 2584 } 2585 2586 static int timerlat_tracer_init(struct trace_array *tr) 2587 { 2588 /* 2589 * Only allow timerlat tracer if osnoise tracer is not running already. 2590 */ 2591 if (osnoise_has_registered_instances() && !osnoise_data.timerlat_tracer) 2592 return -EBUSY; 2593 2594 /* 2595 * If this is the first instance, set timerlat_tracer to block 2596 * osnoise tracer start. 2597 */ 2598 if (!osnoise_has_registered_instances()) 2599 osnoise_data.timerlat_tracer = 1; 2600 2601 tr->max_latency = 0; 2602 timerlat_tracer_start(tr); 2603 2604 return 0; 2605 } 2606 2607 static void timerlat_tracer_reset(struct trace_array *tr) 2608 { 2609 timerlat_tracer_stop(tr); 2610 2611 /* 2612 * If this is the last instance, reset timerlat_tracer allowing 2613 * osnoise to be started. 2614 */ 2615 if (!osnoise_has_registered_instances()) 2616 osnoise_data.timerlat_tracer = 0; 2617 } 2618 2619 static struct tracer timerlat_tracer __read_mostly = { 2620 .name = "timerlat", 2621 .init = timerlat_tracer_init, 2622 .reset = timerlat_tracer_reset, 2623 .start = timerlat_tracer_start, 2624 .stop = timerlat_tracer_stop, 2625 .print_header = print_timerlat_headers, 2626 .allow_instances = true, 2627 }; 2628 2629 __init static int init_timerlat_tracer(void) 2630 { 2631 return register_tracer(&timerlat_tracer); 2632 } 2633 #else /* CONFIG_TIMERLAT_TRACER */ 2634 __init static int init_timerlat_tracer(void) 2635 { 2636 return 0; 2637 } 2638 #endif /* CONFIG_TIMERLAT_TRACER */ 2639 2640 __init static int init_osnoise_tracer(void) 2641 { 2642 int ret; 2643 2644 mutex_init(&interface_lock); 2645 2646 cpumask_copy(&osnoise_cpumask, cpu_all_mask); 2647 2648 ret = register_tracer(&osnoise_tracer); 2649 if (ret) { 2650 pr_err(BANNER "Error registering osnoise!\n"); 2651 return ret; 2652 } 2653 2654 ret = init_timerlat_tracer(); 2655 if (ret) { 2656 pr_err(BANNER "Error registering timerlat!\n"); 2657 return ret; 2658 } 2659 2660 osnoise_init_hotplug_support(); 2661 2662 INIT_LIST_HEAD_RCU(&osnoise_instances); 2663 2664 init_tracefs(); 2665 2666 return 0; 2667 } 2668 late_initcall(init_osnoise_tracer); 2669