1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * OS Noise Tracer: computes the OS Noise suffered by a running thread. 4 * Timerlat Tracer: measures the wakeup latency of a timer triggered IRQ and thread. 5 * 6 * Based on "hwlat_detector" tracer by: 7 * Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. <jcm@redhat.com> 8 * Copyright (C) 2013-2016 Steven Rostedt, Red Hat, Inc. <srostedt@redhat.com> 9 * With feedback from Clark Williams <williams@redhat.com> 10 * 11 * And also based on the rtsl tracer presented on: 12 * DE OLIVEIRA, Daniel Bristot, et al. Demystifying the real-time linux 13 * scheduling latency. In: 32nd Euromicro Conference on Real-Time Systems 14 * (ECRTS 2020). Schloss Dagstuhl-Leibniz-Zentrum fur Informatik, 2020. 15 * 16 * Copyright (C) 2021 Daniel Bristot de Oliveira, Red Hat, Inc. <bristot@redhat.com> 17 */ 18 19 #include <linux/kthread.h> 20 #include <linux/tracefs.h> 21 #include <linux/uaccess.h> 22 #include <linux/cpumask.h> 23 #include <linux/delay.h> 24 #include <linux/sched/clock.h> 25 #include <uapi/linux/sched/types.h> 26 #include <linux/sched.h> 27 #include "trace.h" 28 29 #ifdef CONFIG_X86_LOCAL_APIC 30 #include <asm/trace/irq_vectors.h> 31 #undef TRACE_INCLUDE_PATH 32 #undef TRACE_INCLUDE_FILE 33 #endif /* CONFIG_X86_LOCAL_APIC */ 34 35 #include <trace/events/irq.h> 36 #include <trace/events/sched.h> 37 38 #define CREATE_TRACE_POINTS 39 #include <trace/events/osnoise.h> 40 41 /* 42 * Default values. 43 */ 44 #define BANNER "osnoise: " 45 #define DEFAULT_SAMPLE_PERIOD 1000000 /* 1s */ 46 #define DEFAULT_SAMPLE_RUNTIME 1000000 /* 1s */ 47 48 #define DEFAULT_TIMERLAT_PERIOD 1000 /* 1ms */ 49 #define DEFAULT_TIMERLAT_PRIO 95 /* FIFO 95 */ 50 51 /* 52 * osnoise/options entries. 53 */ 54 enum osnoise_options_index { 55 OSN_DEFAULTS = 0, 56 OSN_WORKLOAD, 57 OSN_PANIC_ON_STOP, 58 OSN_PREEMPT_DISABLE, 59 OSN_IRQ_DISABLE, 60 OSN_MAX 61 }; 62 63 static const char * const osnoise_options_str[OSN_MAX] = { 64 "DEFAULTS", 65 "OSNOISE_WORKLOAD", 66 "PANIC_ON_STOP", 67 "OSNOISE_PREEMPT_DISABLE", 68 "OSNOISE_IRQ_DISABLE" }; 69 70 #define OSN_DEFAULT_OPTIONS 0x2 71 static unsigned long osnoise_options = OSN_DEFAULT_OPTIONS; 72 73 /* 74 * trace_array of the enabled osnoise/timerlat instances. 75 */ 76 struct osnoise_instance { 77 struct list_head list; 78 struct trace_array *tr; 79 }; 80 81 static struct list_head osnoise_instances; 82 83 static bool osnoise_has_registered_instances(void) 84 { 85 return !!list_first_or_null_rcu(&osnoise_instances, 86 struct osnoise_instance, 87 list); 88 } 89 90 /* 91 * osnoise_instance_registered - check if a tr is already registered 92 */ 93 static int osnoise_instance_registered(struct trace_array *tr) 94 { 95 struct osnoise_instance *inst; 96 int found = 0; 97 98 rcu_read_lock(); 99 list_for_each_entry_rcu(inst, &osnoise_instances, list) { 100 if (inst->tr == tr) 101 found = 1; 102 } 103 rcu_read_unlock(); 104 105 return found; 106 } 107 108 /* 109 * osnoise_register_instance - register a new trace instance 110 * 111 * Register a trace_array *tr in the list of instances running 112 * osnoise/timerlat tracers. 113 */ 114 static int osnoise_register_instance(struct trace_array *tr) 115 { 116 struct osnoise_instance *inst; 117 118 /* 119 * register/unregister serialization is provided by trace's 120 * trace_types_lock. 121 */ 122 lockdep_assert_held(&trace_types_lock); 123 124 inst = kmalloc(sizeof(*inst), GFP_KERNEL); 125 if (!inst) 126 return -ENOMEM; 127 128 INIT_LIST_HEAD_RCU(&inst->list); 129 inst->tr = tr; 130 list_add_tail_rcu(&inst->list, &osnoise_instances); 131 132 return 0; 133 } 134 135 /* 136 * osnoise_unregister_instance - unregister a registered trace instance 137 * 138 * Remove the trace_array *tr from the list of instances running 139 * osnoise/timerlat tracers. 140 */ 141 static void osnoise_unregister_instance(struct trace_array *tr) 142 { 143 struct osnoise_instance *inst; 144 int found = 0; 145 146 /* 147 * register/unregister serialization is provided by trace's 148 * trace_types_lock. 149 */ 150 list_for_each_entry_rcu(inst, &osnoise_instances, list, 151 lockdep_is_held(&trace_types_lock)) { 152 if (inst->tr == tr) { 153 list_del_rcu(&inst->list); 154 found = 1; 155 break; 156 } 157 } 158 159 if (!found) 160 return; 161 162 kvfree_rcu_mightsleep(inst); 163 } 164 165 /* 166 * NMI runtime info. 167 */ 168 struct osn_nmi { 169 u64 count; 170 u64 delta_start; 171 }; 172 173 /* 174 * IRQ runtime info. 175 */ 176 struct osn_irq { 177 u64 count; 178 u64 arrival_time; 179 u64 delta_start; 180 }; 181 182 #define IRQ_CONTEXT 0 183 #define THREAD_CONTEXT 1 184 /* 185 * sofirq runtime info. 186 */ 187 struct osn_softirq { 188 u64 count; 189 u64 arrival_time; 190 u64 delta_start; 191 }; 192 193 /* 194 * thread runtime info. 195 */ 196 struct osn_thread { 197 u64 count; 198 u64 arrival_time; 199 u64 delta_start; 200 }; 201 202 /* 203 * Runtime information: this structure saves the runtime information used by 204 * one sampling thread. 205 */ 206 struct osnoise_variables { 207 struct task_struct *kthread; 208 bool sampling; 209 pid_t pid; 210 struct osn_nmi nmi; 211 struct osn_irq irq; 212 struct osn_softirq softirq; 213 struct osn_thread thread; 214 local_t int_counter; 215 }; 216 217 /* 218 * Per-cpu runtime information. 219 */ 220 static DEFINE_PER_CPU(struct osnoise_variables, per_cpu_osnoise_var); 221 222 /* 223 * this_cpu_osn_var - Return the per-cpu osnoise_variables on its relative CPU 224 */ 225 static inline struct osnoise_variables *this_cpu_osn_var(void) 226 { 227 return this_cpu_ptr(&per_cpu_osnoise_var); 228 } 229 230 #ifdef CONFIG_TIMERLAT_TRACER 231 /* 232 * Runtime information for the timer mode. 233 */ 234 struct timerlat_variables { 235 struct task_struct *kthread; 236 struct hrtimer timer; 237 u64 rel_period; 238 u64 abs_period; 239 bool tracing_thread; 240 u64 count; 241 }; 242 243 static DEFINE_PER_CPU(struct timerlat_variables, per_cpu_timerlat_var); 244 245 /* 246 * this_cpu_tmr_var - Return the per-cpu timerlat_variables on its relative CPU 247 */ 248 static inline struct timerlat_variables *this_cpu_tmr_var(void) 249 { 250 return this_cpu_ptr(&per_cpu_timerlat_var); 251 } 252 253 /* 254 * tlat_var_reset - Reset the values of the given timerlat_variables 255 */ 256 static inline void tlat_var_reset(void) 257 { 258 struct timerlat_variables *tlat_var; 259 int cpu; 260 /* 261 * So far, all the values are initialized as 0, so 262 * zeroing the structure is perfect. 263 */ 264 for_each_cpu(cpu, cpu_online_mask) { 265 tlat_var = per_cpu_ptr(&per_cpu_timerlat_var, cpu); 266 memset(tlat_var, 0, sizeof(*tlat_var)); 267 } 268 } 269 #else /* CONFIG_TIMERLAT_TRACER */ 270 #define tlat_var_reset() do {} while (0) 271 #endif /* CONFIG_TIMERLAT_TRACER */ 272 273 /* 274 * osn_var_reset - Reset the values of the given osnoise_variables 275 */ 276 static inline void osn_var_reset(void) 277 { 278 struct osnoise_variables *osn_var; 279 int cpu; 280 281 /* 282 * So far, all the values are initialized as 0, so 283 * zeroing the structure is perfect. 284 */ 285 for_each_cpu(cpu, cpu_online_mask) { 286 osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu); 287 memset(osn_var, 0, sizeof(*osn_var)); 288 } 289 } 290 291 /* 292 * osn_var_reset_all - Reset the value of all per-cpu osnoise_variables 293 */ 294 static inline void osn_var_reset_all(void) 295 { 296 osn_var_reset(); 297 tlat_var_reset(); 298 } 299 300 /* 301 * Tells NMIs to call back to the osnoise tracer to record timestamps. 302 */ 303 bool trace_osnoise_callback_enabled; 304 305 /* 306 * osnoise sample structure definition. Used to store the statistics of a 307 * sample run. 308 */ 309 struct osnoise_sample { 310 u64 runtime; /* runtime */ 311 u64 noise; /* noise */ 312 u64 max_sample; /* max single noise sample */ 313 int hw_count; /* # HW (incl. hypervisor) interference */ 314 int nmi_count; /* # NMIs during this sample */ 315 int irq_count; /* # IRQs during this sample */ 316 int softirq_count; /* # softirqs during this sample */ 317 int thread_count; /* # threads during this sample */ 318 }; 319 320 #ifdef CONFIG_TIMERLAT_TRACER 321 /* 322 * timerlat sample structure definition. Used to store the statistics of 323 * a sample run. 324 */ 325 struct timerlat_sample { 326 u64 timer_latency; /* timer_latency */ 327 unsigned int seqnum; /* unique sequence */ 328 int context; /* timer context */ 329 }; 330 #endif 331 332 /* 333 * Protect the interface. 334 */ 335 static struct mutex interface_lock; 336 337 /* 338 * Tracer data. 339 */ 340 static struct osnoise_data { 341 u64 sample_period; /* total sampling period */ 342 u64 sample_runtime; /* active sampling portion of period */ 343 u64 stop_tracing; /* stop trace in the internal operation (loop/irq) */ 344 u64 stop_tracing_total; /* stop trace in the final operation (report/thread) */ 345 #ifdef CONFIG_TIMERLAT_TRACER 346 u64 timerlat_period; /* timerlat period */ 347 u64 print_stack; /* print IRQ stack if total > */ 348 int timerlat_tracer; /* timerlat tracer */ 349 #endif 350 bool tainted; /* infor users and developers about a problem */ 351 } osnoise_data = { 352 .sample_period = DEFAULT_SAMPLE_PERIOD, 353 .sample_runtime = DEFAULT_SAMPLE_RUNTIME, 354 .stop_tracing = 0, 355 .stop_tracing_total = 0, 356 #ifdef CONFIG_TIMERLAT_TRACER 357 .print_stack = 0, 358 .timerlat_period = DEFAULT_TIMERLAT_PERIOD, 359 .timerlat_tracer = 0, 360 #endif 361 }; 362 363 #ifdef CONFIG_TIMERLAT_TRACER 364 static inline bool timerlat_enabled(void) 365 { 366 return osnoise_data.timerlat_tracer; 367 } 368 369 static inline int timerlat_softirq_exit(struct osnoise_variables *osn_var) 370 { 371 struct timerlat_variables *tlat_var = this_cpu_tmr_var(); 372 /* 373 * If the timerlat is enabled, but the irq handler did 374 * not run yet enabling timerlat_tracer, do not trace. 375 */ 376 if (!tlat_var->tracing_thread) { 377 osn_var->softirq.arrival_time = 0; 378 osn_var->softirq.delta_start = 0; 379 return 0; 380 } 381 return 1; 382 } 383 384 static inline int timerlat_thread_exit(struct osnoise_variables *osn_var) 385 { 386 struct timerlat_variables *tlat_var = this_cpu_tmr_var(); 387 /* 388 * If the timerlat is enabled, but the irq handler did 389 * not run yet enabling timerlat_tracer, do not trace. 390 */ 391 if (!tlat_var->tracing_thread) { 392 osn_var->thread.delta_start = 0; 393 osn_var->thread.arrival_time = 0; 394 return 0; 395 } 396 return 1; 397 } 398 #else /* CONFIG_TIMERLAT_TRACER */ 399 static inline bool timerlat_enabled(void) 400 { 401 return false; 402 } 403 404 static inline int timerlat_softirq_exit(struct osnoise_variables *osn_var) 405 { 406 return 1; 407 } 408 static inline int timerlat_thread_exit(struct osnoise_variables *osn_var) 409 { 410 return 1; 411 } 412 #endif 413 414 #ifdef CONFIG_PREEMPT_RT 415 /* 416 * Print the osnoise header info. 417 */ 418 static void print_osnoise_headers(struct seq_file *s) 419 { 420 if (osnoise_data.tainted) 421 seq_puts(s, "# osnoise is tainted!\n"); 422 423 seq_puts(s, "# _-------=> irqs-off\n"); 424 seq_puts(s, "# / _------=> need-resched\n"); 425 seq_puts(s, "# | / _-----=> need-resched-lazy\n"); 426 seq_puts(s, "# || / _----=> hardirq/softirq\n"); 427 seq_puts(s, "# ||| / _---=> preempt-depth\n"); 428 seq_puts(s, "# |||| / _--=> preempt-lazy-depth\n"); 429 seq_puts(s, "# ||||| / _-=> migrate-disable\n"); 430 431 seq_puts(s, "# |||||| / "); 432 seq_puts(s, " MAX\n"); 433 434 seq_puts(s, "# ||||| / "); 435 seq_puts(s, " SINGLE Interference counters:\n"); 436 437 seq_puts(s, "# ||||||| RUNTIME "); 438 seq_puts(s, " NOISE %% OF CPU NOISE +-----------------------------+\n"); 439 440 seq_puts(s, "# TASK-PID CPU# ||||||| TIMESTAMP IN US "); 441 seq_puts(s, " IN US AVAILABLE IN US HW NMI IRQ SIRQ THREAD\n"); 442 443 seq_puts(s, "# | | | ||||||| | | "); 444 seq_puts(s, " | | | | | | | |\n"); 445 } 446 #else /* CONFIG_PREEMPT_RT */ 447 static void print_osnoise_headers(struct seq_file *s) 448 { 449 if (osnoise_data.tainted) 450 seq_puts(s, "# osnoise is tainted!\n"); 451 452 seq_puts(s, "# _-----=> irqs-off\n"); 453 seq_puts(s, "# / _----=> need-resched\n"); 454 seq_puts(s, "# | / _---=> hardirq/softirq\n"); 455 seq_puts(s, "# || / _--=> preempt-depth\n"); 456 seq_puts(s, "# ||| / _-=> migrate-disable "); 457 seq_puts(s, " MAX\n"); 458 seq_puts(s, "# |||| / delay "); 459 seq_puts(s, " SINGLE Interference counters:\n"); 460 461 seq_puts(s, "# ||||| RUNTIME "); 462 seq_puts(s, " NOISE %% OF CPU NOISE +-----------------------------+\n"); 463 464 seq_puts(s, "# TASK-PID CPU# ||||| TIMESTAMP IN US "); 465 seq_puts(s, " IN US AVAILABLE IN US HW NMI IRQ SIRQ THREAD\n"); 466 467 seq_puts(s, "# | | | ||||| | | "); 468 seq_puts(s, " | | | | | | | |\n"); 469 } 470 #endif /* CONFIG_PREEMPT_RT */ 471 472 /* 473 * osnoise_taint - report an osnoise error. 474 */ 475 #define osnoise_taint(msg) ({ \ 476 struct osnoise_instance *inst; \ 477 struct trace_buffer *buffer; \ 478 \ 479 rcu_read_lock(); \ 480 list_for_each_entry_rcu(inst, &osnoise_instances, list) { \ 481 buffer = inst->tr->array_buffer.buffer; \ 482 trace_array_printk_buf(buffer, _THIS_IP_, msg); \ 483 } \ 484 rcu_read_unlock(); \ 485 osnoise_data.tainted = true; \ 486 }) 487 488 /* 489 * Record an osnoise_sample into the tracer buffer. 490 */ 491 static void 492 __trace_osnoise_sample(struct osnoise_sample *sample, struct trace_buffer *buffer) 493 { 494 struct trace_event_call *call = &event_osnoise; 495 struct ring_buffer_event *event; 496 struct osnoise_entry *entry; 497 498 event = trace_buffer_lock_reserve(buffer, TRACE_OSNOISE, sizeof(*entry), 499 tracing_gen_ctx()); 500 if (!event) 501 return; 502 entry = ring_buffer_event_data(event); 503 entry->runtime = sample->runtime; 504 entry->noise = sample->noise; 505 entry->max_sample = sample->max_sample; 506 entry->hw_count = sample->hw_count; 507 entry->nmi_count = sample->nmi_count; 508 entry->irq_count = sample->irq_count; 509 entry->softirq_count = sample->softirq_count; 510 entry->thread_count = sample->thread_count; 511 512 if (!call_filter_check_discard(call, entry, buffer, event)) 513 trace_buffer_unlock_commit_nostack(buffer, event); 514 } 515 516 /* 517 * Record an osnoise_sample on all osnoise instances. 518 */ 519 static void trace_osnoise_sample(struct osnoise_sample *sample) 520 { 521 struct osnoise_instance *inst; 522 struct trace_buffer *buffer; 523 524 rcu_read_lock(); 525 list_for_each_entry_rcu(inst, &osnoise_instances, list) { 526 buffer = inst->tr->array_buffer.buffer; 527 __trace_osnoise_sample(sample, buffer); 528 } 529 rcu_read_unlock(); 530 } 531 532 #ifdef CONFIG_TIMERLAT_TRACER 533 /* 534 * Print the timerlat header info. 535 */ 536 #ifdef CONFIG_PREEMPT_RT 537 static void print_timerlat_headers(struct seq_file *s) 538 { 539 seq_puts(s, "# _-------=> irqs-off\n"); 540 seq_puts(s, "# / _------=> need-resched\n"); 541 seq_puts(s, "# | / _-----=> need-resched-lazy\n"); 542 seq_puts(s, "# || / _----=> hardirq/softirq\n"); 543 seq_puts(s, "# ||| / _---=> preempt-depth\n"); 544 seq_puts(s, "# |||| / _--=> preempt-lazy-depth\n"); 545 seq_puts(s, "# ||||| / _-=> migrate-disable\n"); 546 seq_puts(s, "# |||||| /\n"); 547 seq_puts(s, "# ||||||| ACTIVATION\n"); 548 seq_puts(s, "# TASK-PID CPU# ||||||| TIMESTAMP ID "); 549 seq_puts(s, " CONTEXT LATENCY\n"); 550 seq_puts(s, "# | | | ||||||| | | "); 551 seq_puts(s, " | |\n"); 552 } 553 #else /* CONFIG_PREEMPT_RT */ 554 static void print_timerlat_headers(struct seq_file *s) 555 { 556 seq_puts(s, "# _-----=> irqs-off\n"); 557 seq_puts(s, "# / _----=> need-resched\n"); 558 seq_puts(s, "# | / _---=> hardirq/softirq\n"); 559 seq_puts(s, "# || / _--=> preempt-depth\n"); 560 seq_puts(s, "# ||| / _-=> migrate-disable\n"); 561 seq_puts(s, "# |||| / delay\n"); 562 seq_puts(s, "# ||||| ACTIVATION\n"); 563 seq_puts(s, "# TASK-PID CPU# ||||| TIMESTAMP ID "); 564 seq_puts(s, " CONTEXT LATENCY\n"); 565 seq_puts(s, "# | | | ||||| | | "); 566 seq_puts(s, " | |\n"); 567 } 568 #endif /* CONFIG_PREEMPT_RT */ 569 570 static void 571 __trace_timerlat_sample(struct timerlat_sample *sample, struct trace_buffer *buffer) 572 { 573 struct trace_event_call *call = &event_osnoise; 574 struct ring_buffer_event *event; 575 struct timerlat_entry *entry; 576 577 event = trace_buffer_lock_reserve(buffer, TRACE_TIMERLAT, sizeof(*entry), 578 tracing_gen_ctx()); 579 if (!event) 580 return; 581 entry = ring_buffer_event_data(event); 582 entry->seqnum = sample->seqnum; 583 entry->context = sample->context; 584 entry->timer_latency = sample->timer_latency; 585 586 if (!call_filter_check_discard(call, entry, buffer, event)) 587 trace_buffer_unlock_commit_nostack(buffer, event); 588 } 589 590 /* 591 * Record an timerlat_sample into the tracer buffer. 592 */ 593 static void trace_timerlat_sample(struct timerlat_sample *sample) 594 { 595 struct osnoise_instance *inst; 596 struct trace_buffer *buffer; 597 598 rcu_read_lock(); 599 list_for_each_entry_rcu(inst, &osnoise_instances, list) { 600 buffer = inst->tr->array_buffer.buffer; 601 __trace_timerlat_sample(sample, buffer); 602 } 603 rcu_read_unlock(); 604 } 605 606 #ifdef CONFIG_STACKTRACE 607 608 #define MAX_CALLS 256 609 610 /* 611 * Stack trace will take place only at IRQ level, so, no need 612 * to control nesting here. 613 */ 614 struct trace_stack { 615 int stack_size; 616 int nr_entries; 617 unsigned long calls[MAX_CALLS]; 618 }; 619 620 static DEFINE_PER_CPU(struct trace_stack, trace_stack); 621 622 /* 623 * timerlat_save_stack - save a stack trace without printing 624 * 625 * Save the current stack trace without printing. The 626 * stack will be printed later, after the end of the measurement. 627 */ 628 static void timerlat_save_stack(int skip) 629 { 630 unsigned int size, nr_entries; 631 struct trace_stack *fstack; 632 633 fstack = this_cpu_ptr(&trace_stack); 634 635 size = ARRAY_SIZE(fstack->calls); 636 637 nr_entries = stack_trace_save(fstack->calls, size, skip); 638 639 fstack->stack_size = nr_entries * sizeof(unsigned long); 640 fstack->nr_entries = nr_entries; 641 642 return; 643 644 } 645 646 static void 647 __timerlat_dump_stack(struct trace_buffer *buffer, struct trace_stack *fstack, unsigned int size) 648 { 649 struct trace_event_call *call = &event_osnoise; 650 struct ring_buffer_event *event; 651 struct stack_entry *entry; 652 653 event = trace_buffer_lock_reserve(buffer, TRACE_STACK, sizeof(*entry) + size, 654 tracing_gen_ctx()); 655 if (!event) 656 return; 657 658 entry = ring_buffer_event_data(event); 659 660 memcpy(&entry->caller, fstack->calls, size); 661 entry->size = fstack->nr_entries; 662 663 if (!call_filter_check_discard(call, entry, buffer, event)) 664 trace_buffer_unlock_commit_nostack(buffer, event); 665 } 666 667 /* 668 * timerlat_dump_stack - dump a stack trace previously saved 669 */ 670 static void timerlat_dump_stack(u64 latency) 671 { 672 struct osnoise_instance *inst; 673 struct trace_buffer *buffer; 674 struct trace_stack *fstack; 675 unsigned int size; 676 677 /* 678 * trace only if latency > print_stack config, if enabled. 679 */ 680 if (!osnoise_data.print_stack || osnoise_data.print_stack > latency) 681 return; 682 683 preempt_disable_notrace(); 684 fstack = this_cpu_ptr(&trace_stack); 685 size = fstack->stack_size; 686 687 rcu_read_lock(); 688 list_for_each_entry_rcu(inst, &osnoise_instances, list) { 689 buffer = inst->tr->array_buffer.buffer; 690 __timerlat_dump_stack(buffer, fstack, size); 691 692 } 693 rcu_read_unlock(); 694 preempt_enable_notrace(); 695 } 696 #else /* CONFIG_STACKTRACE */ 697 #define timerlat_dump_stack(u64 latency) do {} while (0) 698 #define timerlat_save_stack(a) do {} while (0) 699 #endif /* CONFIG_STACKTRACE */ 700 #endif /* CONFIG_TIMERLAT_TRACER */ 701 702 /* 703 * Macros to encapsulate the time capturing infrastructure. 704 */ 705 #define time_get() trace_clock_local() 706 #define time_to_us(x) div_u64(x, 1000) 707 #define time_sub(a, b) ((a) - (b)) 708 709 /* 710 * cond_move_irq_delta_start - Forward the delta_start of a running IRQ 711 * 712 * If an IRQ is preempted by an NMI, its delta_start is pushed forward 713 * to discount the NMI interference. 714 * 715 * See get_int_safe_duration(). 716 */ 717 static inline void 718 cond_move_irq_delta_start(struct osnoise_variables *osn_var, u64 duration) 719 { 720 if (osn_var->irq.delta_start) 721 osn_var->irq.delta_start += duration; 722 } 723 724 #ifndef CONFIG_PREEMPT_RT 725 /* 726 * cond_move_softirq_delta_start - Forward the delta_start of a running softirq. 727 * 728 * If a softirq is preempted by an IRQ or NMI, its delta_start is pushed 729 * forward to discount the interference. 730 * 731 * See get_int_safe_duration(). 732 */ 733 static inline void 734 cond_move_softirq_delta_start(struct osnoise_variables *osn_var, u64 duration) 735 { 736 if (osn_var->softirq.delta_start) 737 osn_var->softirq.delta_start += duration; 738 } 739 #else /* CONFIG_PREEMPT_RT */ 740 #define cond_move_softirq_delta_start(osn_var, duration) do {} while (0) 741 #endif 742 743 /* 744 * cond_move_thread_delta_start - Forward the delta_start of a running thread 745 * 746 * If a noisy thread is preempted by an softirq, IRQ or NMI, its delta_start 747 * is pushed forward to discount the interference. 748 * 749 * See get_int_safe_duration(). 750 */ 751 static inline void 752 cond_move_thread_delta_start(struct osnoise_variables *osn_var, u64 duration) 753 { 754 if (osn_var->thread.delta_start) 755 osn_var->thread.delta_start += duration; 756 } 757 758 /* 759 * get_int_safe_duration - Get the duration of a window 760 * 761 * The irq, softirq and thread varaibles need to have its duration without 762 * the interference from higher priority interrupts. Instead of keeping a 763 * variable to discount the interrupt interference from these variables, the 764 * starting time of these variables are pushed forward with the interrupt's 765 * duration. In this way, a single variable is used to: 766 * 767 * - Know if a given window is being measured. 768 * - Account its duration. 769 * - Discount the interference. 770 * 771 * To avoid getting inconsistent values, e.g.,: 772 * 773 * now = time_get() 774 * ---> interrupt! 775 * delta_start -= int duration; 776 * <--- 777 * duration = now - delta_start; 778 * 779 * result: negative duration if the variable duration before the 780 * interrupt was smaller than the interrupt execution. 781 * 782 * A counter of interrupts is used. If the counter increased, try 783 * to capture an interference safe duration. 784 */ 785 static inline s64 786 get_int_safe_duration(struct osnoise_variables *osn_var, u64 *delta_start) 787 { 788 u64 int_counter, now; 789 s64 duration; 790 791 do { 792 int_counter = local_read(&osn_var->int_counter); 793 /* synchronize with interrupts */ 794 barrier(); 795 796 now = time_get(); 797 duration = (now - *delta_start); 798 799 /* synchronize with interrupts */ 800 barrier(); 801 } while (int_counter != local_read(&osn_var->int_counter)); 802 803 /* 804 * This is an evidence of race conditions that cause 805 * a value to be "discounted" too much. 806 */ 807 if (duration < 0) 808 osnoise_taint("Negative duration!\n"); 809 810 *delta_start = 0; 811 812 return duration; 813 } 814 815 /* 816 * 817 * set_int_safe_time - Save the current time on *time, aware of interference 818 * 819 * Get the time, taking into consideration a possible interference from 820 * higher priority interrupts. 821 * 822 * See get_int_safe_duration() for an explanation. 823 */ 824 static u64 825 set_int_safe_time(struct osnoise_variables *osn_var, u64 *time) 826 { 827 u64 int_counter; 828 829 do { 830 int_counter = local_read(&osn_var->int_counter); 831 /* synchronize with interrupts */ 832 barrier(); 833 834 *time = time_get(); 835 836 /* synchronize with interrupts */ 837 barrier(); 838 } while (int_counter != local_read(&osn_var->int_counter)); 839 840 return int_counter; 841 } 842 843 #ifdef CONFIG_TIMERLAT_TRACER 844 /* 845 * copy_int_safe_time - Copy *src into *desc aware of interference 846 */ 847 static u64 848 copy_int_safe_time(struct osnoise_variables *osn_var, u64 *dst, u64 *src) 849 { 850 u64 int_counter; 851 852 do { 853 int_counter = local_read(&osn_var->int_counter); 854 /* synchronize with interrupts */ 855 barrier(); 856 857 *dst = *src; 858 859 /* synchronize with interrupts */ 860 barrier(); 861 } while (int_counter != local_read(&osn_var->int_counter)); 862 863 return int_counter; 864 } 865 #endif /* CONFIG_TIMERLAT_TRACER */ 866 867 /* 868 * trace_osnoise_callback - NMI entry/exit callback 869 * 870 * This function is called at the entry and exit NMI code. The bool enter 871 * distinguishes between either case. This function is used to note a NMI 872 * occurrence, compute the noise caused by the NMI, and to remove the noise 873 * it is potentially causing on other interference variables. 874 */ 875 void trace_osnoise_callback(bool enter) 876 { 877 struct osnoise_variables *osn_var = this_cpu_osn_var(); 878 u64 duration; 879 880 if (!osn_var->sampling) 881 return; 882 883 /* 884 * Currently trace_clock_local() calls sched_clock() and the 885 * generic version is not NMI safe. 886 */ 887 if (!IS_ENABLED(CONFIG_GENERIC_SCHED_CLOCK)) { 888 if (enter) { 889 osn_var->nmi.delta_start = time_get(); 890 local_inc(&osn_var->int_counter); 891 } else { 892 duration = time_get() - osn_var->nmi.delta_start; 893 894 trace_nmi_noise(osn_var->nmi.delta_start, duration); 895 896 cond_move_irq_delta_start(osn_var, duration); 897 cond_move_softirq_delta_start(osn_var, duration); 898 cond_move_thread_delta_start(osn_var, duration); 899 } 900 } 901 902 if (enter) 903 osn_var->nmi.count++; 904 } 905 906 /* 907 * osnoise_trace_irq_entry - Note the starting of an IRQ 908 * 909 * Save the starting time of an IRQ. As IRQs are non-preemptive to other IRQs, 910 * it is safe to use a single variable (ons_var->irq) to save the statistics. 911 * The arrival_time is used to report... the arrival time. The delta_start 912 * is used to compute the duration at the IRQ exit handler. See 913 * cond_move_irq_delta_start(). 914 */ 915 void osnoise_trace_irq_entry(int id) 916 { 917 struct osnoise_variables *osn_var = this_cpu_osn_var(); 918 919 if (!osn_var->sampling) 920 return; 921 /* 922 * This value will be used in the report, but not to compute 923 * the execution time, so it is safe to get it unsafe. 924 */ 925 osn_var->irq.arrival_time = time_get(); 926 set_int_safe_time(osn_var, &osn_var->irq.delta_start); 927 osn_var->irq.count++; 928 929 local_inc(&osn_var->int_counter); 930 } 931 932 /* 933 * osnoise_irq_exit - Note the end of an IRQ, sava data and trace 934 * 935 * Computes the duration of the IRQ noise, and trace it. Also discounts the 936 * interference from other sources of noise could be currently being accounted. 937 */ 938 void osnoise_trace_irq_exit(int id, const char *desc) 939 { 940 struct osnoise_variables *osn_var = this_cpu_osn_var(); 941 s64 duration; 942 943 if (!osn_var->sampling) 944 return; 945 946 duration = get_int_safe_duration(osn_var, &osn_var->irq.delta_start); 947 trace_irq_noise(id, desc, osn_var->irq.arrival_time, duration); 948 osn_var->irq.arrival_time = 0; 949 cond_move_softirq_delta_start(osn_var, duration); 950 cond_move_thread_delta_start(osn_var, duration); 951 } 952 953 /* 954 * trace_irqentry_callback - Callback to the irq:irq_entry traceevent 955 * 956 * Used to note the starting of an IRQ occurece. 957 */ 958 static void trace_irqentry_callback(void *data, int irq, 959 struct irqaction *action) 960 { 961 osnoise_trace_irq_entry(irq); 962 } 963 964 /* 965 * trace_irqexit_callback - Callback to the irq:irq_exit traceevent 966 * 967 * Used to note the end of an IRQ occurece. 968 */ 969 static void trace_irqexit_callback(void *data, int irq, 970 struct irqaction *action, int ret) 971 { 972 osnoise_trace_irq_exit(irq, action->name); 973 } 974 975 /* 976 * arch specific register function. 977 */ 978 int __weak osnoise_arch_register(void) 979 { 980 return 0; 981 } 982 983 /* 984 * arch specific unregister function. 985 */ 986 void __weak osnoise_arch_unregister(void) 987 { 988 return; 989 } 990 991 /* 992 * hook_irq_events - Hook IRQ handling events 993 * 994 * This function hooks the IRQ related callbacks to the respective trace 995 * events. 996 */ 997 static int hook_irq_events(void) 998 { 999 int ret; 1000 1001 ret = register_trace_irq_handler_entry(trace_irqentry_callback, NULL); 1002 if (ret) 1003 goto out_err; 1004 1005 ret = register_trace_irq_handler_exit(trace_irqexit_callback, NULL); 1006 if (ret) 1007 goto out_unregister_entry; 1008 1009 ret = osnoise_arch_register(); 1010 if (ret) 1011 goto out_irq_exit; 1012 1013 return 0; 1014 1015 out_irq_exit: 1016 unregister_trace_irq_handler_exit(trace_irqexit_callback, NULL); 1017 out_unregister_entry: 1018 unregister_trace_irq_handler_entry(trace_irqentry_callback, NULL); 1019 out_err: 1020 return -EINVAL; 1021 } 1022 1023 /* 1024 * unhook_irq_events - Unhook IRQ handling events 1025 * 1026 * This function unhooks the IRQ related callbacks to the respective trace 1027 * events. 1028 */ 1029 static void unhook_irq_events(void) 1030 { 1031 osnoise_arch_unregister(); 1032 unregister_trace_irq_handler_exit(trace_irqexit_callback, NULL); 1033 unregister_trace_irq_handler_entry(trace_irqentry_callback, NULL); 1034 } 1035 1036 #ifndef CONFIG_PREEMPT_RT 1037 /* 1038 * trace_softirq_entry_callback - Note the starting of a softirq 1039 * 1040 * Save the starting time of a softirq. As softirqs are non-preemptive to 1041 * other softirqs, it is safe to use a single variable (ons_var->softirq) 1042 * to save the statistics. The arrival_time is used to report... the 1043 * arrival time. The delta_start is used to compute the duration at the 1044 * softirq exit handler. See cond_move_softirq_delta_start(). 1045 */ 1046 static void trace_softirq_entry_callback(void *data, unsigned int vec_nr) 1047 { 1048 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1049 1050 if (!osn_var->sampling) 1051 return; 1052 /* 1053 * This value will be used in the report, but not to compute 1054 * the execution time, so it is safe to get it unsafe. 1055 */ 1056 osn_var->softirq.arrival_time = time_get(); 1057 set_int_safe_time(osn_var, &osn_var->softirq.delta_start); 1058 osn_var->softirq.count++; 1059 1060 local_inc(&osn_var->int_counter); 1061 } 1062 1063 /* 1064 * trace_softirq_exit_callback - Note the end of an softirq 1065 * 1066 * Computes the duration of the softirq noise, and trace it. Also discounts the 1067 * interference from other sources of noise could be currently being accounted. 1068 */ 1069 static void trace_softirq_exit_callback(void *data, unsigned int vec_nr) 1070 { 1071 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1072 s64 duration; 1073 1074 if (!osn_var->sampling) 1075 return; 1076 1077 if (unlikely(timerlat_enabled())) 1078 if (!timerlat_softirq_exit(osn_var)) 1079 return; 1080 1081 duration = get_int_safe_duration(osn_var, &osn_var->softirq.delta_start); 1082 trace_softirq_noise(vec_nr, osn_var->softirq.arrival_time, duration); 1083 cond_move_thread_delta_start(osn_var, duration); 1084 osn_var->softirq.arrival_time = 0; 1085 } 1086 1087 /* 1088 * hook_softirq_events - Hook softirq handling events 1089 * 1090 * This function hooks the softirq related callbacks to the respective trace 1091 * events. 1092 */ 1093 static int hook_softirq_events(void) 1094 { 1095 int ret; 1096 1097 ret = register_trace_softirq_entry(trace_softirq_entry_callback, NULL); 1098 if (ret) 1099 goto out_err; 1100 1101 ret = register_trace_softirq_exit(trace_softirq_exit_callback, NULL); 1102 if (ret) 1103 goto out_unreg_entry; 1104 1105 return 0; 1106 1107 out_unreg_entry: 1108 unregister_trace_softirq_entry(trace_softirq_entry_callback, NULL); 1109 out_err: 1110 return -EINVAL; 1111 } 1112 1113 /* 1114 * unhook_softirq_events - Unhook softirq handling events 1115 * 1116 * This function hooks the softirq related callbacks to the respective trace 1117 * events. 1118 */ 1119 static void unhook_softirq_events(void) 1120 { 1121 unregister_trace_softirq_entry(trace_softirq_entry_callback, NULL); 1122 unregister_trace_softirq_exit(trace_softirq_exit_callback, NULL); 1123 } 1124 #else /* CONFIG_PREEMPT_RT */ 1125 /* 1126 * softirq are threads on the PREEMPT_RT mode. 1127 */ 1128 static int hook_softirq_events(void) 1129 { 1130 return 0; 1131 } 1132 static void unhook_softirq_events(void) 1133 { 1134 } 1135 #endif 1136 1137 /* 1138 * thread_entry - Record the starting of a thread noise window 1139 * 1140 * It saves the context switch time for a noisy thread, and increments 1141 * the interference counters. 1142 */ 1143 static void 1144 thread_entry(struct osnoise_variables *osn_var, struct task_struct *t) 1145 { 1146 if (!osn_var->sampling) 1147 return; 1148 /* 1149 * The arrival time will be used in the report, but not to compute 1150 * the execution time, so it is safe to get it unsafe. 1151 */ 1152 osn_var->thread.arrival_time = time_get(); 1153 1154 set_int_safe_time(osn_var, &osn_var->thread.delta_start); 1155 1156 osn_var->thread.count++; 1157 local_inc(&osn_var->int_counter); 1158 } 1159 1160 /* 1161 * thread_exit - Report the end of a thread noise window 1162 * 1163 * It computes the total noise from a thread, tracing if needed. 1164 */ 1165 static void 1166 thread_exit(struct osnoise_variables *osn_var, struct task_struct *t) 1167 { 1168 s64 duration; 1169 1170 if (!osn_var->sampling) 1171 return; 1172 1173 if (unlikely(timerlat_enabled())) 1174 if (!timerlat_thread_exit(osn_var)) 1175 return; 1176 1177 duration = get_int_safe_duration(osn_var, &osn_var->thread.delta_start); 1178 1179 trace_thread_noise(t, osn_var->thread.arrival_time, duration); 1180 1181 osn_var->thread.arrival_time = 0; 1182 } 1183 1184 /* 1185 * trace_sched_switch - sched:sched_switch trace event handler 1186 * 1187 * This function is hooked to the sched:sched_switch trace event, and it is 1188 * used to record the beginning and to report the end of a thread noise window. 1189 */ 1190 static void 1191 trace_sched_switch_callback(void *data, bool preempt, 1192 struct task_struct *p, 1193 struct task_struct *n, 1194 unsigned int prev_state) 1195 { 1196 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1197 int workload = test_bit(OSN_WORKLOAD, &osnoise_options); 1198 1199 if ((p->pid != osn_var->pid) || !workload) 1200 thread_exit(osn_var, p); 1201 1202 if ((n->pid != osn_var->pid) || !workload) 1203 thread_entry(osn_var, n); 1204 } 1205 1206 /* 1207 * hook_thread_events - Hook the insturmentation for thread noise 1208 * 1209 * Hook the osnoise tracer callbacks to handle the noise from other 1210 * threads on the necessary kernel events. 1211 */ 1212 static int hook_thread_events(void) 1213 { 1214 int ret; 1215 1216 ret = register_trace_sched_switch(trace_sched_switch_callback, NULL); 1217 if (ret) 1218 return -EINVAL; 1219 1220 return 0; 1221 } 1222 1223 /* 1224 * unhook_thread_events - *nhook the insturmentation for thread noise 1225 * 1226 * Unook the osnoise tracer callbacks to handle the noise from other 1227 * threads on the necessary kernel events. 1228 */ 1229 static void unhook_thread_events(void) 1230 { 1231 unregister_trace_sched_switch(trace_sched_switch_callback, NULL); 1232 } 1233 1234 /* 1235 * save_osn_sample_stats - Save the osnoise_sample statistics 1236 * 1237 * Save the osnoise_sample statistics before the sampling phase. These 1238 * values will be used later to compute the diff betwneen the statistics 1239 * before and after the osnoise sampling. 1240 */ 1241 static void 1242 save_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample *s) 1243 { 1244 s->nmi_count = osn_var->nmi.count; 1245 s->irq_count = osn_var->irq.count; 1246 s->softirq_count = osn_var->softirq.count; 1247 s->thread_count = osn_var->thread.count; 1248 } 1249 1250 /* 1251 * diff_osn_sample_stats - Compute the osnoise_sample statistics 1252 * 1253 * After a sample period, compute the difference on the osnoise_sample 1254 * statistics. The struct osnoise_sample *s contains the statistics saved via 1255 * save_osn_sample_stats() before the osnoise sampling. 1256 */ 1257 static void 1258 diff_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample *s) 1259 { 1260 s->nmi_count = osn_var->nmi.count - s->nmi_count; 1261 s->irq_count = osn_var->irq.count - s->irq_count; 1262 s->softirq_count = osn_var->softirq.count - s->softirq_count; 1263 s->thread_count = osn_var->thread.count - s->thread_count; 1264 } 1265 1266 /* 1267 * osnoise_stop_tracing - Stop tracing and the tracer. 1268 */ 1269 static __always_inline void osnoise_stop_tracing(void) 1270 { 1271 struct osnoise_instance *inst; 1272 struct trace_array *tr; 1273 1274 rcu_read_lock(); 1275 list_for_each_entry_rcu(inst, &osnoise_instances, list) { 1276 tr = inst->tr; 1277 trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_, 1278 "stop tracing hit on cpu %d\n", smp_processor_id()); 1279 1280 if (test_bit(OSN_PANIC_ON_STOP, &osnoise_options)) 1281 panic("tracer hit stop condition on CPU %d\n", smp_processor_id()); 1282 1283 tracer_tracing_off(tr); 1284 } 1285 rcu_read_unlock(); 1286 } 1287 1288 /* 1289 * notify_new_max_latency - Notify a new max latency via fsnotify interface. 1290 */ 1291 static void notify_new_max_latency(u64 latency) 1292 { 1293 struct osnoise_instance *inst; 1294 struct trace_array *tr; 1295 1296 rcu_read_lock(); 1297 list_for_each_entry_rcu(inst, &osnoise_instances, list) { 1298 tr = inst->tr; 1299 if (tracer_tracing_is_on(tr) && tr->max_latency < latency) { 1300 tr->max_latency = latency; 1301 latency_fsnotify(tr); 1302 } 1303 } 1304 rcu_read_unlock(); 1305 } 1306 1307 /* 1308 * run_osnoise - Sample the time and look for osnoise 1309 * 1310 * Used to capture the time, looking for potential osnoise latency repeatedly. 1311 * Different from hwlat_detector, it is called with preemption and interrupts 1312 * enabled. This allows irqs, softirqs and threads to run, interfering on the 1313 * osnoise sampling thread, as they would do with a regular thread. 1314 */ 1315 static int run_osnoise(void) 1316 { 1317 bool disable_irq = test_bit(OSN_IRQ_DISABLE, &osnoise_options); 1318 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1319 u64 start, sample, last_sample; 1320 u64 last_int_count, int_count; 1321 s64 noise = 0, max_noise = 0; 1322 s64 total, last_total = 0; 1323 struct osnoise_sample s; 1324 bool disable_preemption; 1325 unsigned int threshold; 1326 u64 runtime, stop_in; 1327 u64 sum_noise = 0; 1328 int hw_count = 0; 1329 int ret = -1; 1330 1331 /* 1332 * Disabling preemption is only required if IRQs are enabled, 1333 * and the options is set on. 1334 */ 1335 disable_preemption = !disable_irq && test_bit(OSN_PREEMPT_DISABLE, &osnoise_options); 1336 1337 /* 1338 * Considers the current thread as the workload. 1339 */ 1340 osn_var->pid = current->pid; 1341 1342 /* 1343 * Save the current stats for the diff 1344 */ 1345 save_osn_sample_stats(osn_var, &s); 1346 1347 /* 1348 * if threshold is 0, use the default value of 5 us. 1349 */ 1350 threshold = tracing_thresh ? : 5000; 1351 1352 /* 1353 * Apply PREEMPT and IRQ disabled options. 1354 */ 1355 if (disable_irq) 1356 local_irq_disable(); 1357 1358 if (disable_preemption) 1359 preempt_disable(); 1360 1361 /* 1362 * Make sure NMIs see sampling first 1363 */ 1364 osn_var->sampling = true; 1365 barrier(); 1366 1367 /* 1368 * Transform the *_us config to nanoseconds to avoid the 1369 * division on the main loop. 1370 */ 1371 runtime = osnoise_data.sample_runtime * NSEC_PER_USEC; 1372 stop_in = osnoise_data.stop_tracing * NSEC_PER_USEC; 1373 1374 /* 1375 * Start timestemp 1376 */ 1377 start = time_get(); 1378 1379 /* 1380 * "previous" loop. 1381 */ 1382 last_int_count = set_int_safe_time(osn_var, &last_sample); 1383 1384 do { 1385 /* 1386 * Get sample! 1387 */ 1388 int_count = set_int_safe_time(osn_var, &sample); 1389 1390 noise = time_sub(sample, last_sample); 1391 1392 /* 1393 * This shouldn't happen. 1394 */ 1395 if (noise < 0) { 1396 osnoise_taint("negative noise!"); 1397 goto out; 1398 } 1399 1400 /* 1401 * Sample runtime. 1402 */ 1403 total = time_sub(sample, start); 1404 1405 /* 1406 * Check for possible overflows. 1407 */ 1408 if (total < last_total) { 1409 osnoise_taint("total overflow!"); 1410 break; 1411 } 1412 1413 last_total = total; 1414 1415 if (noise >= threshold) { 1416 int interference = int_count - last_int_count; 1417 1418 if (noise > max_noise) 1419 max_noise = noise; 1420 1421 if (!interference) 1422 hw_count++; 1423 1424 sum_noise += noise; 1425 1426 trace_sample_threshold(last_sample, noise, interference); 1427 1428 if (osnoise_data.stop_tracing) 1429 if (noise > stop_in) 1430 osnoise_stop_tracing(); 1431 } 1432 1433 /* 1434 * In some cases, notably when running on a nohz_full CPU with 1435 * a stopped tick PREEMPT_RCU has no way to account for QSs. 1436 * This will eventually cause unwarranted noise as PREEMPT_RCU 1437 * will force preemption as the means of ending the current 1438 * grace period. We avoid this problem by calling 1439 * rcu_momentary_dyntick_idle(), which performs a zero duration 1440 * EQS allowing PREEMPT_RCU to end the current grace period. 1441 * This call shouldn't be wrapped inside an RCU critical 1442 * section. 1443 * 1444 * Note that in non PREEMPT_RCU kernels QSs are handled through 1445 * cond_resched() 1446 */ 1447 if (IS_ENABLED(CONFIG_PREEMPT_RCU)) { 1448 if (!disable_irq) 1449 local_irq_disable(); 1450 1451 rcu_momentary_dyntick_idle(); 1452 1453 if (!disable_irq) 1454 local_irq_enable(); 1455 } 1456 1457 /* 1458 * For the non-preemptive kernel config: let threads runs, if 1459 * they so wish, unless set not do to so. 1460 */ 1461 if (!disable_irq && !disable_preemption) 1462 cond_resched(); 1463 1464 last_sample = sample; 1465 last_int_count = int_count; 1466 1467 } while (total < runtime && !kthread_should_stop()); 1468 1469 /* 1470 * Finish the above in the view for interrupts. 1471 */ 1472 barrier(); 1473 1474 osn_var->sampling = false; 1475 1476 /* 1477 * Make sure sampling data is no longer updated. 1478 */ 1479 barrier(); 1480 1481 /* 1482 * Return to the preemptive state. 1483 */ 1484 if (disable_preemption) 1485 preempt_enable(); 1486 1487 if (disable_irq) 1488 local_irq_enable(); 1489 1490 /* 1491 * Save noise info. 1492 */ 1493 s.noise = time_to_us(sum_noise); 1494 s.runtime = time_to_us(total); 1495 s.max_sample = time_to_us(max_noise); 1496 s.hw_count = hw_count; 1497 1498 /* Save interference stats info */ 1499 diff_osn_sample_stats(osn_var, &s); 1500 1501 trace_osnoise_sample(&s); 1502 1503 notify_new_max_latency(max_noise); 1504 1505 if (osnoise_data.stop_tracing_total) 1506 if (s.noise > osnoise_data.stop_tracing_total) 1507 osnoise_stop_tracing(); 1508 1509 return 0; 1510 out: 1511 return ret; 1512 } 1513 1514 static struct cpumask osnoise_cpumask; 1515 static struct cpumask save_cpumask; 1516 1517 /* 1518 * osnoise_sleep - sleep until the next period 1519 */ 1520 static void osnoise_sleep(void) 1521 { 1522 u64 interval; 1523 ktime_t wake_time; 1524 1525 mutex_lock(&interface_lock); 1526 interval = osnoise_data.sample_period - osnoise_data.sample_runtime; 1527 mutex_unlock(&interface_lock); 1528 1529 /* 1530 * differently from hwlat_detector, the osnoise tracer can run 1531 * without a pause because preemption is on. 1532 */ 1533 if (!interval) { 1534 /* Let synchronize_rcu_tasks() make progress */ 1535 cond_resched_tasks_rcu_qs(); 1536 return; 1537 } 1538 1539 wake_time = ktime_add_us(ktime_get(), interval); 1540 __set_current_state(TASK_INTERRUPTIBLE); 1541 1542 while (schedule_hrtimeout(&wake_time, HRTIMER_MODE_ABS)) { 1543 if (kthread_should_stop()) 1544 break; 1545 } 1546 } 1547 1548 /* 1549 * osnoise_main - The osnoise detection kernel thread 1550 * 1551 * Calls run_osnoise() function to measure the osnoise for the configured runtime, 1552 * every period. 1553 */ 1554 static int osnoise_main(void *data) 1555 { 1556 1557 while (!kthread_should_stop()) { 1558 run_osnoise(); 1559 osnoise_sleep(); 1560 } 1561 1562 return 0; 1563 } 1564 1565 #ifdef CONFIG_TIMERLAT_TRACER 1566 /* 1567 * timerlat_irq - hrtimer handler for timerlat. 1568 */ 1569 static enum hrtimer_restart timerlat_irq(struct hrtimer *timer) 1570 { 1571 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1572 struct timerlat_variables *tlat; 1573 struct timerlat_sample s; 1574 u64 now; 1575 u64 diff; 1576 1577 /* 1578 * I am not sure if the timer was armed for this CPU. So, get 1579 * the timerlat struct from the timer itself, not from this 1580 * CPU. 1581 */ 1582 tlat = container_of(timer, struct timerlat_variables, timer); 1583 1584 now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer)); 1585 1586 /* 1587 * Enable the osnoise: events for thread an softirq. 1588 */ 1589 tlat->tracing_thread = true; 1590 1591 osn_var->thread.arrival_time = time_get(); 1592 1593 /* 1594 * A hardirq is running: the timer IRQ. It is for sure preempting 1595 * a thread, and potentially preempting a softirq. 1596 * 1597 * At this point, it is not interesting to know the duration of the 1598 * preempted thread (and maybe softirq), but how much time they will 1599 * delay the beginning of the execution of the timer thread. 1600 * 1601 * To get the correct (net) delay added by the softirq, its delta_start 1602 * is set as the IRQ one. In this way, at the return of the IRQ, the delta 1603 * start of the sofitrq will be zeroed, accounting then only the time 1604 * after that. 1605 * 1606 * The thread follows the same principle. However, if a softirq is 1607 * running, the thread needs to receive the softirq delta_start. The 1608 * reason being is that the softirq will be the last to be unfolded, 1609 * resseting the thread delay to zero. 1610 * 1611 * The PREEMPT_RT is a special case, though. As softirqs run as threads 1612 * on RT, moving the thread is enough. 1613 */ 1614 if (!IS_ENABLED(CONFIG_PREEMPT_RT) && osn_var->softirq.delta_start) { 1615 copy_int_safe_time(osn_var, &osn_var->thread.delta_start, 1616 &osn_var->softirq.delta_start); 1617 1618 copy_int_safe_time(osn_var, &osn_var->softirq.delta_start, 1619 &osn_var->irq.delta_start); 1620 } else { 1621 copy_int_safe_time(osn_var, &osn_var->thread.delta_start, 1622 &osn_var->irq.delta_start); 1623 } 1624 1625 /* 1626 * Compute the current time with the expected time. 1627 */ 1628 diff = now - tlat->abs_period; 1629 1630 tlat->count++; 1631 s.seqnum = tlat->count; 1632 s.timer_latency = diff; 1633 s.context = IRQ_CONTEXT; 1634 1635 trace_timerlat_sample(&s); 1636 1637 if (osnoise_data.stop_tracing) { 1638 if (time_to_us(diff) >= osnoise_data.stop_tracing) { 1639 1640 /* 1641 * At this point, if stop_tracing is set and <= print_stack, 1642 * print_stack is set and would be printed in the thread handler. 1643 * 1644 * Thus, print the stack trace as it is helpful to define the 1645 * root cause of an IRQ latency. 1646 */ 1647 if (osnoise_data.stop_tracing <= osnoise_data.print_stack) { 1648 timerlat_save_stack(0); 1649 timerlat_dump_stack(time_to_us(diff)); 1650 } 1651 1652 osnoise_stop_tracing(); 1653 notify_new_max_latency(diff); 1654 1655 wake_up_process(tlat->kthread); 1656 1657 return HRTIMER_NORESTART; 1658 } 1659 } 1660 1661 wake_up_process(tlat->kthread); 1662 1663 if (osnoise_data.print_stack) 1664 timerlat_save_stack(0); 1665 1666 return HRTIMER_NORESTART; 1667 } 1668 1669 /* 1670 * wait_next_period - Wait for the next period for timerlat 1671 */ 1672 static int wait_next_period(struct timerlat_variables *tlat) 1673 { 1674 ktime_t next_abs_period, now; 1675 u64 rel_period = osnoise_data.timerlat_period * 1000; 1676 1677 now = hrtimer_cb_get_time(&tlat->timer); 1678 next_abs_period = ns_to_ktime(tlat->abs_period + rel_period); 1679 1680 /* 1681 * Save the next abs_period. 1682 */ 1683 tlat->abs_period = (u64) ktime_to_ns(next_abs_period); 1684 1685 /* 1686 * If the new abs_period is in the past, skip the activation. 1687 */ 1688 while (ktime_compare(now, next_abs_period) > 0) { 1689 next_abs_period = ns_to_ktime(tlat->abs_period + rel_period); 1690 tlat->abs_period = (u64) ktime_to_ns(next_abs_period); 1691 } 1692 1693 set_current_state(TASK_INTERRUPTIBLE); 1694 1695 hrtimer_start(&tlat->timer, next_abs_period, HRTIMER_MODE_ABS_PINNED_HARD); 1696 schedule(); 1697 return 1; 1698 } 1699 1700 /* 1701 * timerlat_main- Timerlat main 1702 */ 1703 static int timerlat_main(void *data) 1704 { 1705 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1706 struct timerlat_variables *tlat = this_cpu_tmr_var(); 1707 struct timerlat_sample s; 1708 struct sched_param sp; 1709 u64 now, diff; 1710 1711 /* 1712 * Make the thread RT, that is how cyclictest is usually used. 1713 */ 1714 sp.sched_priority = DEFAULT_TIMERLAT_PRIO; 1715 sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); 1716 1717 tlat->count = 0; 1718 tlat->tracing_thread = false; 1719 1720 hrtimer_init(&tlat->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD); 1721 tlat->timer.function = timerlat_irq; 1722 tlat->kthread = current; 1723 osn_var->pid = current->pid; 1724 /* 1725 * Anotate the arrival time. 1726 */ 1727 tlat->abs_period = hrtimer_cb_get_time(&tlat->timer); 1728 1729 wait_next_period(tlat); 1730 1731 osn_var->sampling = 1; 1732 1733 while (!kthread_should_stop()) { 1734 now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer)); 1735 diff = now - tlat->abs_period; 1736 1737 s.seqnum = tlat->count; 1738 s.timer_latency = diff; 1739 s.context = THREAD_CONTEXT; 1740 1741 trace_timerlat_sample(&s); 1742 1743 notify_new_max_latency(diff); 1744 1745 timerlat_dump_stack(time_to_us(diff)); 1746 1747 tlat->tracing_thread = false; 1748 if (osnoise_data.stop_tracing_total) 1749 if (time_to_us(diff) >= osnoise_data.stop_tracing_total) 1750 osnoise_stop_tracing(); 1751 1752 wait_next_period(tlat); 1753 } 1754 1755 hrtimer_cancel(&tlat->timer); 1756 return 0; 1757 } 1758 #else /* CONFIG_TIMERLAT_TRACER */ 1759 static int timerlat_main(void *data) 1760 { 1761 return 0; 1762 } 1763 #endif /* CONFIG_TIMERLAT_TRACER */ 1764 1765 /* 1766 * stop_kthread - stop a workload thread 1767 */ 1768 static void stop_kthread(unsigned int cpu) 1769 { 1770 struct task_struct *kthread; 1771 1772 kthread = per_cpu(per_cpu_osnoise_var, cpu).kthread; 1773 if (kthread) { 1774 kthread_stop(kthread); 1775 per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL; 1776 } else { 1777 if (!test_bit(OSN_WORKLOAD, &osnoise_options)) { 1778 per_cpu(per_cpu_osnoise_var, cpu).sampling = false; 1779 barrier(); 1780 return; 1781 } 1782 } 1783 } 1784 1785 /* 1786 * stop_per_cpu_kthread - Stop per-cpu threads 1787 * 1788 * Stop the osnoise sampling htread. Use this on unload and at system 1789 * shutdown. 1790 */ 1791 static void stop_per_cpu_kthreads(void) 1792 { 1793 int cpu; 1794 1795 cpus_read_lock(); 1796 1797 for_each_online_cpu(cpu) 1798 stop_kthread(cpu); 1799 1800 cpus_read_unlock(); 1801 } 1802 1803 /* 1804 * start_kthread - Start a workload tread 1805 */ 1806 static int start_kthread(unsigned int cpu) 1807 { 1808 struct task_struct *kthread; 1809 void *main = osnoise_main; 1810 char comm[24]; 1811 1812 if (timerlat_enabled()) { 1813 snprintf(comm, 24, "timerlat/%d", cpu); 1814 main = timerlat_main; 1815 } else { 1816 /* if no workload, just return */ 1817 if (!test_bit(OSN_WORKLOAD, &osnoise_options)) { 1818 per_cpu(per_cpu_osnoise_var, cpu).sampling = true; 1819 barrier(); 1820 return 0; 1821 } 1822 1823 snprintf(comm, 24, "osnoise/%d", cpu); 1824 } 1825 1826 kthread = kthread_run_on_cpu(main, NULL, cpu, comm); 1827 1828 if (IS_ERR(kthread)) { 1829 pr_err(BANNER "could not start sampling thread\n"); 1830 stop_per_cpu_kthreads(); 1831 return -ENOMEM; 1832 } 1833 1834 per_cpu(per_cpu_osnoise_var, cpu).kthread = kthread; 1835 1836 return 0; 1837 } 1838 1839 /* 1840 * start_per_cpu_kthread - Kick off per-cpu osnoise sampling kthreads 1841 * 1842 * This starts the kernel thread that will look for osnoise on many 1843 * cpus. 1844 */ 1845 static int start_per_cpu_kthreads(void) 1846 { 1847 struct cpumask *current_mask = &save_cpumask; 1848 int retval = 0; 1849 int cpu; 1850 1851 cpus_read_lock(); 1852 /* 1853 * Run only on online CPUs in which osnoise is allowed to run. 1854 */ 1855 cpumask_and(current_mask, cpu_online_mask, &osnoise_cpumask); 1856 1857 for_each_possible_cpu(cpu) 1858 per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL; 1859 1860 for_each_cpu(cpu, current_mask) { 1861 retval = start_kthread(cpu); 1862 if (retval) { 1863 cpus_read_unlock(); 1864 stop_per_cpu_kthreads(); 1865 return retval; 1866 } 1867 } 1868 1869 cpus_read_unlock(); 1870 1871 return retval; 1872 } 1873 1874 #ifdef CONFIG_HOTPLUG_CPU 1875 static void osnoise_hotplug_workfn(struct work_struct *dummy) 1876 { 1877 unsigned int cpu = smp_processor_id(); 1878 1879 mutex_lock(&trace_types_lock); 1880 1881 if (!osnoise_has_registered_instances()) 1882 goto out_unlock_trace; 1883 1884 mutex_lock(&interface_lock); 1885 cpus_read_lock(); 1886 1887 if (!cpumask_test_cpu(cpu, &osnoise_cpumask)) 1888 goto out_unlock; 1889 1890 start_kthread(cpu); 1891 1892 out_unlock: 1893 cpus_read_unlock(); 1894 mutex_unlock(&interface_lock); 1895 out_unlock_trace: 1896 mutex_unlock(&trace_types_lock); 1897 } 1898 1899 static DECLARE_WORK(osnoise_hotplug_work, osnoise_hotplug_workfn); 1900 1901 /* 1902 * osnoise_cpu_init - CPU hotplug online callback function 1903 */ 1904 static int osnoise_cpu_init(unsigned int cpu) 1905 { 1906 schedule_work_on(cpu, &osnoise_hotplug_work); 1907 return 0; 1908 } 1909 1910 /* 1911 * osnoise_cpu_die - CPU hotplug offline callback function 1912 */ 1913 static int osnoise_cpu_die(unsigned int cpu) 1914 { 1915 stop_kthread(cpu); 1916 return 0; 1917 } 1918 1919 static void osnoise_init_hotplug_support(void) 1920 { 1921 int ret; 1922 1923 ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "trace/osnoise:online", 1924 osnoise_cpu_init, osnoise_cpu_die); 1925 if (ret < 0) 1926 pr_warn(BANNER "Error to init cpu hotplug support\n"); 1927 1928 return; 1929 } 1930 #else /* CONFIG_HOTPLUG_CPU */ 1931 static void osnoise_init_hotplug_support(void) 1932 { 1933 return; 1934 } 1935 #endif /* CONFIG_HOTPLUG_CPU */ 1936 1937 /* 1938 * seq file functions for the osnoise/options file. 1939 */ 1940 static void *s_options_start(struct seq_file *s, loff_t *pos) 1941 { 1942 int option = *pos; 1943 1944 mutex_lock(&interface_lock); 1945 1946 if (option >= OSN_MAX) 1947 return NULL; 1948 1949 return pos; 1950 } 1951 1952 static void *s_options_next(struct seq_file *s, void *v, loff_t *pos) 1953 { 1954 int option = ++(*pos); 1955 1956 if (option >= OSN_MAX) 1957 return NULL; 1958 1959 return pos; 1960 } 1961 1962 static int s_options_show(struct seq_file *s, void *v) 1963 { 1964 loff_t *pos = v; 1965 int option = *pos; 1966 1967 if (option == OSN_DEFAULTS) { 1968 if (osnoise_options == OSN_DEFAULT_OPTIONS) 1969 seq_printf(s, "%s", osnoise_options_str[option]); 1970 else 1971 seq_printf(s, "NO_%s", osnoise_options_str[option]); 1972 goto out; 1973 } 1974 1975 if (test_bit(option, &osnoise_options)) 1976 seq_printf(s, "%s", osnoise_options_str[option]); 1977 else 1978 seq_printf(s, "NO_%s", osnoise_options_str[option]); 1979 1980 out: 1981 if (option != OSN_MAX) 1982 seq_puts(s, " "); 1983 1984 return 0; 1985 } 1986 1987 static void s_options_stop(struct seq_file *s, void *v) 1988 { 1989 seq_puts(s, "\n"); 1990 mutex_unlock(&interface_lock); 1991 } 1992 1993 static const struct seq_operations osnoise_options_seq_ops = { 1994 .start = s_options_start, 1995 .next = s_options_next, 1996 .show = s_options_show, 1997 .stop = s_options_stop 1998 }; 1999 2000 static int osnoise_options_open(struct inode *inode, struct file *file) 2001 { 2002 return seq_open(file, &osnoise_options_seq_ops); 2003 }; 2004 2005 /** 2006 * osnoise_options_write - Write function for "options" entry 2007 * @filp: The active open file structure 2008 * @ubuf: The user buffer that contains the value to write 2009 * @cnt: The maximum number of bytes to write to "file" 2010 * @ppos: The current position in @file 2011 * 2012 * Writing the option name sets the option, writing the "NO_" 2013 * prefix in front of the option name disables it. 2014 * 2015 * Writing "DEFAULTS" resets the option values to the default ones. 2016 */ 2017 static ssize_t osnoise_options_write(struct file *filp, const char __user *ubuf, 2018 size_t cnt, loff_t *ppos) 2019 { 2020 int running, option, enable, retval; 2021 char buf[256], *option_str; 2022 2023 if (cnt >= 256) 2024 return -EINVAL; 2025 2026 if (copy_from_user(buf, ubuf, cnt)) 2027 return -EFAULT; 2028 2029 buf[cnt] = 0; 2030 2031 if (strncmp(buf, "NO_", 3)) { 2032 option_str = strstrip(buf); 2033 enable = true; 2034 } else { 2035 option_str = strstrip(&buf[3]); 2036 enable = false; 2037 } 2038 2039 option = match_string(osnoise_options_str, OSN_MAX, option_str); 2040 if (option < 0) 2041 return -EINVAL; 2042 2043 /* 2044 * trace_types_lock is taken to avoid concurrency on start/stop. 2045 */ 2046 mutex_lock(&trace_types_lock); 2047 running = osnoise_has_registered_instances(); 2048 if (running) 2049 stop_per_cpu_kthreads(); 2050 2051 mutex_lock(&interface_lock); 2052 /* 2053 * avoid CPU hotplug operations that might read options. 2054 */ 2055 cpus_read_lock(); 2056 2057 retval = cnt; 2058 2059 if (enable) { 2060 if (option == OSN_DEFAULTS) 2061 osnoise_options = OSN_DEFAULT_OPTIONS; 2062 else 2063 set_bit(option, &osnoise_options); 2064 } else { 2065 if (option == OSN_DEFAULTS) 2066 retval = -EINVAL; 2067 else 2068 clear_bit(option, &osnoise_options); 2069 } 2070 2071 cpus_read_unlock(); 2072 mutex_unlock(&interface_lock); 2073 2074 if (running) 2075 start_per_cpu_kthreads(); 2076 mutex_unlock(&trace_types_lock); 2077 2078 return retval; 2079 } 2080 2081 /* 2082 * osnoise_cpus_read - Read function for reading the "cpus" file 2083 * @filp: The active open file structure 2084 * @ubuf: The userspace provided buffer to read value into 2085 * @cnt: The maximum number of bytes to read 2086 * @ppos: The current "file" position 2087 * 2088 * Prints the "cpus" output into the user-provided buffer. 2089 */ 2090 static ssize_t 2091 osnoise_cpus_read(struct file *filp, char __user *ubuf, size_t count, 2092 loff_t *ppos) 2093 { 2094 char *mask_str; 2095 int len; 2096 2097 mutex_lock(&interface_lock); 2098 2099 len = snprintf(NULL, 0, "%*pbl\n", cpumask_pr_args(&osnoise_cpumask)) + 1; 2100 mask_str = kmalloc(len, GFP_KERNEL); 2101 if (!mask_str) { 2102 count = -ENOMEM; 2103 goto out_unlock; 2104 } 2105 2106 len = snprintf(mask_str, len, "%*pbl\n", cpumask_pr_args(&osnoise_cpumask)); 2107 if (len >= count) { 2108 count = -EINVAL; 2109 goto out_free; 2110 } 2111 2112 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len); 2113 2114 out_free: 2115 kfree(mask_str); 2116 out_unlock: 2117 mutex_unlock(&interface_lock); 2118 2119 return count; 2120 } 2121 2122 /* 2123 * osnoise_cpus_write - Write function for "cpus" entry 2124 * @filp: The active open file structure 2125 * @ubuf: The user buffer that contains the value to write 2126 * @cnt: The maximum number of bytes to write to "file" 2127 * @ppos: The current position in @file 2128 * 2129 * This function provides a write implementation for the "cpus" 2130 * interface to the osnoise trace. By default, it lists all CPUs, 2131 * in this way, allowing osnoise threads to run on any online CPU 2132 * of the system. It serves to restrict the execution of osnoise to the 2133 * set of CPUs writing via this interface. Why not use "tracing_cpumask"? 2134 * Because the user might be interested in tracing what is running on 2135 * other CPUs. For instance, one might run osnoise in one HT CPU 2136 * while observing what is running on the sibling HT CPU. 2137 */ 2138 static ssize_t 2139 osnoise_cpus_write(struct file *filp, const char __user *ubuf, size_t count, 2140 loff_t *ppos) 2141 { 2142 cpumask_var_t osnoise_cpumask_new; 2143 int running, err; 2144 char buf[256]; 2145 2146 if (count >= 256) 2147 return -EINVAL; 2148 2149 if (copy_from_user(buf, ubuf, count)) 2150 return -EFAULT; 2151 2152 if (!zalloc_cpumask_var(&osnoise_cpumask_new, GFP_KERNEL)) 2153 return -ENOMEM; 2154 2155 err = cpulist_parse(buf, osnoise_cpumask_new); 2156 if (err) 2157 goto err_free; 2158 2159 /* 2160 * trace_types_lock is taken to avoid concurrency on start/stop. 2161 */ 2162 mutex_lock(&trace_types_lock); 2163 running = osnoise_has_registered_instances(); 2164 if (running) 2165 stop_per_cpu_kthreads(); 2166 2167 mutex_lock(&interface_lock); 2168 /* 2169 * osnoise_cpumask is read by CPU hotplug operations. 2170 */ 2171 cpus_read_lock(); 2172 2173 cpumask_copy(&osnoise_cpumask, osnoise_cpumask_new); 2174 2175 cpus_read_unlock(); 2176 mutex_unlock(&interface_lock); 2177 2178 if (running) 2179 start_per_cpu_kthreads(); 2180 mutex_unlock(&trace_types_lock); 2181 2182 free_cpumask_var(osnoise_cpumask_new); 2183 return count; 2184 2185 err_free: 2186 free_cpumask_var(osnoise_cpumask_new); 2187 2188 return err; 2189 } 2190 2191 /* 2192 * osnoise/runtime_us: cannot be greater than the period. 2193 */ 2194 static struct trace_min_max_param osnoise_runtime = { 2195 .lock = &interface_lock, 2196 .val = &osnoise_data.sample_runtime, 2197 .max = &osnoise_data.sample_period, 2198 .min = NULL, 2199 }; 2200 2201 /* 2202 * osnoise/period_us: cannot be smaller than the runtime. 2203 */ 2204 static struct trace_min_max_param osnoise_period = { 2205 .lock = &interface_lock, 2206 .val = &osnoise_data.sample_period, 2207 .max = NULL, 2208 .min = &osnoise_data.sample_runtime, 2209 }; 2210 2211 /* 2212 * osnoise/stop_tracing_us: no limit. 2213 */ 2214 static struct trace_min_max_param osnoise_stop_tracing_in = { 2215 .lock = &interface_lock, 2216 .val = &osnoise_data.stop_tracing, 2217 .max = NULL, 2218 .min = NULL, 2219 }; 2220 2221 /* 2222 * osnoise/stop_tracing_total_us: no limit. 2223 */ 2224 static struct trace_min_max_param osnoise_stop_tracing_total = { 2225 .lock = &interface_lock, 2226 .val = &osnoise_data.stop_tracing_total, 2227 .max = NULL, 2228 .min = NULL, 2229 }; 2230 2231 #ifdef CONFIG_TIMERLAT_TRACER 2232 /* 2233 * osnoise/print_stack: print the stacktrace of the IRQ handler if the total 2234 * latency is higher than val. 2235 */ 2236 static struct trace_min_max_param osnoise_print_stack = { 2237 .lock = &interface_lock, 2238 .val = &osnoise_data.print_stack, 2239 .max = NULL, 2240 .min = NULL, 2241 }; 2242 2243 /* 2244 * osnoise/timerlat_period: min 100 us, max 1 s 2245 */ 2246 static u64 timerlat_min_period = 100; 2247 static u64 timerlat_max_period = 1000000; 2248 static struct trace_min_max_param timerlat_period = { 2249 .lock = &interface_lock, 2250 .val = &osnoise_data.timerlat_period, 2251 .max = &timerlat_max_period, 2252 .min = &timerlat_min_period, 2253 }; 2254 #endif 2255 2256 static const struct file_operations cpus_fops = { 2257 .open = tracing_open_generic, 2258 .read = osnoise_cpus_read, 2259 .write = osnoise_cpus_write, 2260 .llseek = generic_file_llseek, 2261 }; 2262 2263 static const struct file_operations osnoise_options_fops = { 2264 .open = osnoise_options_open, 2265 .read = seq_read, 2266 .llseek = seq_lseek, 2267 .release = seq_release, 2268 .write = osnoise_options_write 2269 }; 2270 2271 #ifdef CONFIG_TIMERLAT_TRACER 2272 #ifdef CONFIG_STACKTRACE 2273 static int init_timerlat_stack_tracefs(struct dentry *top_dir) 2274 { 2275 struct dentry *tmp; 2276 2277 tmp = tracefs_create_file("print_stack", TRACE_MODE_WRITE, top_dir, 2278 &osnoise_print_stack, &trace_min_max_fops); 2279 if (!tmp) 2280 return -ENOMEM; 2281 2282 return 0; 2283 } 2284 #else /* CONFIG_STACKTRACE */ 2285 static int init_timerlat_stack_tracefs(struct dentry *top_dir) 2286 { 2287 return 0; 2288 } 2289 #endif /* CONFIG_STACKTRACE */ 2290 2291 /* 2292 * init_timerlat_tracefs - A function to initialize the timerlat interface files 2293 */ 2294 static int init_timerlat_tracefs(struct dentry *top_dir) 2295 { 2296 struct dentry *tmp; 2297 2298 tmp = tracefs_create_file("timerlat_period_us", TRACE_MODE_WRITE, top_dir, 2299 &timerlat_period, &trace_min_max_fops); 2300 if (!tmp) 2301 return -ENOMEM; 2302 2303 return init_timerlat_stack_tracefs(top_dir); 2304 } 2305 #else /* CONFIG_TIMERLAT_TRACER */ 2306 static int init_timerlat_tracefs(struct dentry *top_dir) 2307 { 2308 return 0; 2309 } 2310 #endif /* CONFIG_TIMERLAT_TRACER */ 2311 2312 /* 2313 * init_tracefs - A function to initialize the tracefs interface files 2314 * 2315 * This function creates entries in tracefs for "osnoise" and "timerlat". 2316 * It creates these directories in the tracing directory, and within that 2317 * directory the use can change and view the configs. 2318 */ 2319 static int init_tracefs(void) 2320 { 2321 struct dentry *top_dir; 2322 struct dentry *tmp; 2323 int ret; 2324 2325 ret = tracing_init_dentry(); 2326 if (ret) 2327 return -ENOMEM; 2328 2329 top_dir = tracefs_create_dir("osnoise", NULL); 2330 if (!top_dir) 2331 return 0; 2332 2333 tmp = tracefs_create_file("period_us", TRACE_MODE_WRITE, top_dir, 2334 &osnoise_period, &trace_min_max_fops); 2335 if (!tmp) 2336 goto err; 2337 2338 tmp = tracefs_create_file("runtime_us", TRACE_MODE_WRITE, top_dir, 2339 &osnoise_runtime, &trace_min_max_fops); 2340 if (!tmp) 2341 goto err; 2342 2343 tmp = tracefs_create_file("stop_tracing_us", TRACE_MODE_WRITE, top_dir, 2344 &osnoise_stop_tracing_in, &trace_min_max_fops); 2345 if (!tmp) 2346 goto err; 2347 2348 tmp = tracefs_create_file("stop_tracing_total_us", TRACE_MODE_WRITE, top_dir, 2349 &osnoise_stop_tracing_total, &trace_min_max_fops); 2350 if (!tmp) 2351 goto err; 2352 2353 tmp = trace_create_file("cpus", TRACE_MODE_WRITE, top_dir, NULL, &cpus_fops); 2354 if (!tmp) 2355 goto err; 2356 2357 tmp = trace_create_file("options", TRACE_MODE_WRITE, top_dir, NULL, 2358 &osnoise_options_fops); 2359 if (!tmp) 2360 goto err; 2361 2362 ret = init_timerlat_tracefs(top_dir); 2363 if (ret) 2364 goto err; 2365 2366 return 0; 2367 2368 err: 2369 tracefs_remove(top_dir); 2370 return -ENOMEM; 2371 } 2372 2373 static int osnoise_hook_events(void) 2374 { 2375 int retval; 2376 2377 /* 2378 * Trace is already hooked, we are re-enabling from 2379 * a stop_tracing_*. 2380 */ 2381 if (trace_osnoise_callback_enabled) 2382 return 0; 2383 2384 retval = hook_irq_events(); 2385 if (retval) 2386 return -EINVAL; 2387 2388 retval = hook_softirq_events(); 2389 if (retval) 2390 goto out_unhook_irq; 2391 2392 retval = hook_thread_events(); 2393 /* 2394 * All fine! 2395 */ 2396 if (!retval) 2397 return 0; 2398 2399 unhook_softirq_events(); 2400 out_unhook_irq: 2401 unhook_irq_events(); 2402 return -EINVAL; 2403 } 2404 2405 static void osnoise_unhook_events(void) 2406 { 2407 unhook_thread_events(); 2408 unhook_softirq_events(); 2409 unhook_irq_events(); 2410 } 2411 2412 /* 2413 * osnoise_workload_start - start the workload and hook to events 2414 */ 2415 static int osnoise_workload_start(void) 2416 { 2417 int retval; 2418 2419 /* 2420 * Instances need to be registered after calling workload 2421 * start. Hence, if there is already an instance, the 2422 * workload was already registered. Otherwise, this 2423 * code is on the way to register the first instance, 2424 * and the workload will start. 2425 */ 2426 if (osnoise_has_registered_instances()) 2427 return 0; 2428 2429 osn_var_reset_all(); 2430 2431 retval = osnoise_hook_events(); 2432 if (retval) 2433 return retval; 2434 2435 /* 2436 * Make sure that ftrace_nmi_enter/exit() see reset values 2437 * before enabling trace_osnoise_callback_enabled. 2438 */ 2439 barrier(); 2440 trace_osnoise_callback_enabled = true; 2441 2442 retval = start_per_cpu_kthreads(); 2443 if (retval) { 2444 trace_osnoise_callback_enabled = false; 2445 /* 2446 * Make sure that ftrace_nmi_enter/exit() see 2447 * trace_osnoise_callback_enabled as false before continuing. 2448 */ 2449 barrier(); 2450 2451 osnoise_unhook_events(); 2452 return retval; 2453 } 2454 2455 return 0; 2456 } 2457 2458 /* 2459 * osnoise_workload_stop - stop the workload and unhook the events 2460 */ 2461 static void osnoise_workload_stop(void) 2462 { 2463 /* 2464 * Instances need to be unregistered before calling 2465 * stop. Hence, if there is a registered instance, more 2466 * than one instance is running, and the workload will not 2467 * yet stop. Otherwise, this code is on the way to disable 2468 * the last instance, and the workload can stop. 2469 */ 2470 if (osnoise_has_registered_instances()) 2471 return; 2472 2473 /* 2474 * If callbacks were already disabled in a previous stop 2475 * call, there is no need to disable then again. 2476 * 2477 * For instance, this happens when tracing is stopped via: 2478 * echo 0 > tracing_on 2479 * echo nop > current_tracer. 2480 */ 2481 if (!trace_osnoise_callback_enabled) 2482 return; 2483 2484 trace_osnoise_callback_enabled = false; 2485 /* 2486 * Make sure that ftrace_nmi_enter/exit() see 2487 * trace_osnoise_callback_enabled as false before continuing. 2488 */ 2489 barrier(); 2490 2491 stop_per_cpu_kthreads(); 2492 2493 osnoise_unhook_events(); 2494 } 2495 2496 static void osnoise_tracer_start(struct trace_array *tr) 2497 { 2498 int retval; 2499 2500 /* 2501 * If the instance is already registered, there is no need to 2502 * register it again. 2503 */ 2504 if (osnoise_instance_registered(tr)) 2505 return; 2506 2507 retval = osnoise_workload_start(); 2508 if (retval) 2509 pr_err(BANNER "Error starting osnoise tracer\n"); 2510 2511 osnoise_register_instance(tr); 2512 } 2513 2514 static void osnoise_tracer_stop(struct trace_array *tr) 2515 { 2516 osnoise_unregister_instance(tr); 2517 osnoise_workload_stop(); 2518 } 2519 2520 static int osnoise_tracer_init(struct trace_array *tr) 2521 { 2522 /* 2523 * Only allow osnoise tracer if timerlat tracer is not running 2524 * already. 2525 */ 2526 if (timerlat_enabled()) 2527 return -EBUSY; 2528 2529 tr->max_latency = 0; 2530 2531 osnoise_tracer_start(tr); 2532 return 0; 2533 } 2534 2535 static void osnoise_tracer_reset(struct trace_array *tr) 2536 { 2537 osnoise_tracer_stop(tr); 2538 } 2539 2540 static struct tracer osnoise_tracer __read_mostly = { 2541 .name = "osnoise", 2542 .init = osnoise_tracer_init, 2543 .reset = osnoise_tracer_reset, 2544 .start = osnoise_tracer_start, 2545 .stop = osnoise_tracer_stop, 2546 .print_header = print_osnoise_headers, 2547 .allow_instances = true, 2548 }; 2549 2550 #ifdef CONFIG_TIMERLAT_TRACER 2551 static void timerlat_tracer_start(struct trace_array *tr) 2552 { 2553 int retval; 2554 2555 /* 2556 * If the instance is already registered, there is no need to 2557 * register it again. 2558 */ 2559 if (osnoise_instance_registered(tr)) 2560 return; 2561 2562 retval = osnoise_workload_start(); 2563 if (retval) 2564 pr_err(BANNER "Error starting timerlat tracer\n"); 2565 2566 osnoise_register_instance(tr); 2567 2568 return; 2569 } 2570 2571 static void timerlat_tracer_stop(struct trace_array *tr) 2572 { 2573 int cpu; 2574 2575 osnoise_unregister_instance(tr); 2576 2577 /* 2578 * Instruct the threads to stop only if this is the last instance. 2579 */ 2580 if (!osnoise_has_registered_instances()) { 2581 for_each_online_cpu(cpu) 2582 per_cpu(per_cpu_osnoise_var, cpu).sampling = 0; 2583 } 2584 2585 osnoise_workload_stop(); 2586 } 2587 2588 static int timerlat_tracer_init(struct trace_array *tr) 2589 { 2590 /* 2591 * Only allow timerlat tracer if osnoise tracer is not running already. 2592 */ 2593 if (osnoise_has_registered_instances() && !osnoise_data.timerlat_tracer) 2594 return -EBUSY; 2595 2596 /* 2597 * If this is the first instance, set timerlat_tracer to block 2598 * osnoise tracer start. 2599 */ 2600 if (!osnoise_has_registered_instances()) 2601 osnoise_data.timerlat_tracer = 1; 2602 2603 tr->max_latency = 0; 2604 timerlat_tracer_start(tr); 2605 2606 return 0; 2607 } 2608 2609 static void timerlat_tracer_reset(struct trace_array *tr) 2610 { 2611 timerlat_tracer_stop(tr); 2612 2613 /* 2614 * If this is the last instance, reset timerlat_tracer allowing 2615 * osnoise to be started. 2616 */ 2617 if (!osnoise_has_registered_instances()) 2618 osnoise_data.timerlat_tracer = 0; 2619 } 2620 2621 static struct tracer timerlat_tracer __read_mostly = { 2622 .name = "timerlat", 2623 .init = timerlat_tracer_init, 2624 .reset = timerlat_tracer_reset, 2625 .start = timerlat_tracer_start, 2626 .stop = timerlat_tracer_stop, 2627 .print_header = print_timerlat_headers, 2628 .allow_instances = true, 2629 }; 2630 2631 __init static int init_timerlat_tracer(void) 2632 { 2633 return register_tracer(&timerlat_tracer); 2634 } 2635 #else /* CONFIG_TIMERLAT_TRACER */ 2636 __init static int init_timerlat_tracer(void) 2637 { 2638 return 0; 2639 } 2640 #endif /* CONFIG_TIMERLAT_TRACER */ 2641 2642 __init static int init_osnoise_tracer(void) 2643 { 2644 int ret; 2645 2646 mutex_init(&interface_lock); 2647 2648 cpumask_copy(&osnoise_cpumask, cpu_all_mask); 2649 2650 ret = register_tracer(&osnoise_tracer); 2651 if (ret) { 2652 pr_err(BANNER "Error registering osnoise!\n"); 2653 return ret; 2654 } 2655 2656 ret = init_timerlat_tracer(); 2657 if (ret) { 2658 pr_err(BANNER "Error registering timerlat!\n"); 2659 return ret; 2660 } 2661 2662 osnoise_init_hotplug_support(); 2663 2664 INIT_LIST_HEAD_RCU(&osnoise_instances); 2665 2666 init_tracefs(); 2667 2668 return 0; 2669 } 2670 late_initcall(init_osnoise_tracer); 2671