1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * OS Noise Tracer: computes the OS Noise suffered by a running thread. 4 * Timerlat Tracer: measures the wakeup latency of a timer triggered IRQ and thread. 5 * 6 * Based on "hwlat_detector" tracer by: 7 * Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. <jcm@redhat.com> 8 * Copyright (C) 2013-2016 Steven Rostedt, Red Hat, Inc. <srostedt@redhat.com> 9 * With feedback from Clark Williams <williams@redhat.com> 10 * 11 * And also based on the rtsl tracer presented on: 12 * DE OLIVEIRA, Daniel Bristot, et al. Demystifying the real-time linux 13 * scheduling latency. In: 32nd Euromicro Conference on Real-Time Systems 14 * (ECRTS 2020). Schloss Dagstuhl-Leibniz-Zentrum fur Informatik, 2020. 15 * 16 * Copyright (C) 2021 Daniel Bristot de Oliveira, Red Hat, Inc. <bristot@redhat.com> 17 */ 18 19 #include <linux/kthread.h> 20 #include <linux/tracefs.h> 21 #include <linux/uaccess.h> 22 #include <linux/cpumask.h> 23 #include <linux/delay.h> 24 #include <linux/sched/clock.h> 25 #include <uapi/linux/sched/types.h> 26 #include <linux/sched.h> 27 #include "trace.h" 28 29 #ifdef CONFIG_X86_LOCAL_APIC 30 #include <asm/trace/irq_vectors.h> 31 #undef TRACE_INCLUDE_PATH 32 #undef TRACE_INCLUDE_FILE 33 #endif /* CONFIG_X86_LOCAL_APIC */ 34 35 #include <trace/events/irq.h> 36 #include <trace/events/sched.h> 37 38 #define CREATE_TRACE_POINTS 39 #include <trace/events/osnoise.h> 40 41 /* 42 * Default values. 43 */ 44 #define BANNER "osnoise: " 45 #define DEFAULT_SAMPLE_PERIOD 1000000 /* 1s */ 46 #define DEFAULT_SAMPLE_RUNTIME 1000000 /* 1s */ 47 48 #define DEFAULT_TIMERLAT_PERIOD 1000 /* 1ms */ 49 #define DEFAULT_TIMERLAT_PRIO 95 /* FIFO 95 */ 50 51 /* 52 * trace_array of the enabled osnoise/timerlat instances. 53 */ 54 struct osnoise_instance { 55 struct list_head list; 56 struct trace_array *tr; 57 }; 58 59 static struct list_head osnoise_instances; 60 61 static bool osnoise_has_registered_instances(void) 62 { 63 return !!list_first_or_null_rcu(&osnoise_instances, 64 struct osnoise_instance, 65 list); 66 } 67 68 /* 69 * osnoise_instance_registered - check if a tr is already registered 70 */ 71 static int osnoise_instance_registered(struct trace_array *tr) 72 { 73 struct osnoise_instance *inst; 74 int found = 0; 75 76 rcu_read_lock(); 77 list_for_each_entry_rcu(inst, &osnoise_instances, list) { 78 if (inst->tr == tr) 79 found = 1; 80 } 81 rcu_read_unlock(); 82 83 return found; 84 } 85 86 /* 87 * osnoise_register_instance - register a new trace instance 88 * 89 * Register a trace_array *tr in the list of instances running 90 * osnoise/timerlat tracers. 91 */ 92 static int osnoise_register_instance(struct trace_array *tr) 93 { 94 struct osnoise_instance *inst; 95 96 /* 97 * register/unregister serialization is provided by trace's 98 * trace_types_lock. 99 */ 100 lockdep_assert_held(&trace_types_lock); 101 102 inst = kmalloc(sizeof(*inst), GFP_KERNEL); 103 if (!inst) 104 return -ENOMEM; 105 106 INIT_LIST_HEAD_RCU(&inst->list); 107 inst->tr = tr; 108 list_add_tail_rcu(&inst->list, &osnoise_instances); 109 110 return 0; 111 } 112 113 /* 114 * osnoise_unregister_instance - unregister a registered trace instance 115 * 116 * Remove the trace_array *tr from the list of instances running 117 * osnoise/timerlat tracers. 118 */ 119 static void osnoise_unregister_instance(struct trace_array *tr) 120 { 121 struct osnoise_instance *inst; 122 int found = 0; 123 124 /* 125 * register/unregister serialization is provided by trace's 126 * trace_types_lock. 127 */ 128 lockdep_assert_held(&trace_types_lock); 129 130 list_for_each_entry_rcu(inst, &osnoise_instances, list) { 131 if (inst->tr == tr) { 132 list_del_rcu(&inst->list); 133 found = 1; 134 break; 135 } 136 } 137 138 if (!found) 139 return; 140 141 kvfree_rcu(inst); 142 } 143 144 /* 145 * NMI runtime info. 146 */ 147 struct osn_nmi { 148 u64 count; 149 u64 delta_start; 150 }; 151 152 /* 153 * IRQ runtime info. 154 */ 155 struct osn_irq { 156 u64 count; 157 u64 arrival_time; 158 u64 delta_start; 159 }; 160 161 #define IRQ_CONTEXT 0 162 #define THREAD_CONTEXT 1 163 /* 164 * sofirq runtime info. 165 */ 166 struct osn_softirq { 167 u64 count; 168 u64 arrival_time; 169 u64 delta_start; 170 }; 171 172 /* 173 * thread runtime info. 174 */ 175 struct osn_thread { 176 u64 count; 177 u64 arrival_time; 178 u64 delta_start; 179 }; 180 181 /* 182 * Runtime information: this structure saves the runtime information used by 183 * one sampling thread. 184 */ 185 struct osnoise_variables { 186 struct task_struct *kthread; 187 bool sampling; 188 pid_t pid; 189 struct osn_nmi nmi; 190 struct osn_irq irq; 191 struct osn_softirq softirq; 192 struct osn_thread thread; 193 local_t int_counter; 194 }; 195 196 /* 197 * Per-cpu runtime information. 198 */ 199 DEFINE_PER_CPU(struct osnoise_variables, per_cpu_osnoise_var); 200 201 /* 202 * this_cpu_osn_var - Return the per-cpu osnoise_variables on its relative CPU 203 */ 204 static inline struct osnoise_variables *this_cpu_osn_var(void) 205 { 206 return this_cpu_ptr(&per_cpu_osnoise_var); 207 } 208 209 #ifdef CONFIG_TIMERLAT_TRACER 210 /* 211 * Runtime information for the timer mode. 212 */ 213 struct timerlat_variables { 214 struct task_struct *kthread; 215 struct hrtimer timer; 216 u64 rel_period; 217 u64 abs_period; 218 bool tracing_thread; 219 u64 count; 220 }; 221 222 DEFINE_PER_CPU(struct timerlat_variables, per_cpu_timerlat_var); 223 224 /* 225 * this_cpu_tmr_var - Return the per-cpu timerlat_variables on its relative CPU 226 */ 227 static inline struct timerlat_variables *this_cpu_tmr_var(void) 228 { 229 return this_cpu_ptr(&per_cpu_timerlat_var); 230 } 231 232 /* 233 * tlat_var_reset - Reset the values of the given timerlat_variables 234 */ 235 static inline void tlat_var_reset(void) 236 { 237 struct timerlat_variables *tlat_var; 238 int cpu; 239 /* 240 * So far, all the values are initialized as 0, so 241 * zeroing the structure is perfect. 242 */ 243 for_each_cpu(cpu, cpu_online_mask) { 244 tlat_var = per_cpu_ptr(&per_cpu_timerlat_var, cpu); 245 memset(tlat_var, 0, sizeof(*tlat_var)); 246 } 247 } 248 #else /* CONFIG_TIMERLAT_TRACER */ 249 #define tlat_var_reset() do {} while (0) 250 #endif /* CONFIG_TIMERLAT_TRACER */ 251 252 /* 253 * osn_var_reset - Reset the values of the given osnoise_variables 254 */ 255 static inline void osn_var_reset(void) 256 { 257 struct osnoise_variables *osn_var; 258 int cpu; 259 260 /* 261 * So far, all the values are initialized as 0, so 262 * zeroing the structure is perfect. 263 */ 264 for_each_cpu(cpu, cpu_online_mask) { 265 osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu); 266 memset(osn_var, 0, sizeof(*osn_var)); 267 } 268 } 269 270 /* 271 * osn_var_reset_all - Reset the value of all per-cpu osnoise_variables 272 */ 273 static inline void osn_var_reset_all(void) 274 { 275 osn_var_reset(); 276 tlat_var_reset(); 277 } 278 279 /* 280 * Tells NMIs to call back to the osnoise tracer to record timestamps. 281 */ 282 bool trace_osnoise_callback_enabled; 283 284 /* 285 * osnoise sample structure definition. Used to store the statistics of a 286 * sample run. 287 */ 288 struct osnoise_sample { 289 u64 runtime; /* runtime */ 290 u64 noise; /* noise */ 291 u64 max_sample; /* max single noise sample */ 292 int hw_count; /* # HW (incl. hypervisor) interference */ 293 int nmi_count; /* # NMIs during this sample */ 294 int irq_count; /* # IRQs during this sample */ 295 int softirq_count; /* # softirqs during this sample */ 296 int thread_count; /* # threads during this sample */ 297 }; 298 299 #ifdef CONFIG_TIMERLAT_TRACER 300 /* 301 * timerlat sample structure definition. Used to store the statistics of 302 * a sample run. 303 */ 304 struct timerlat_sample { 305 u64 timer_latency; /* timer_latency */ 306 unsigned int seqnum; /* unique sequence */ 307 int context; /* timer context */ 308 }; 309 #endif 310 311 /* 312 * Protect the interface. 313 */ 314 struct mutex interface_lock; 315 316 /* 317 * Tracer data. 318 */ 319 static struct osnoise_data { 320 u64 sample_period; /* total sampling period */ 321 u64 sample_runtime; /* active sampling portion of period */ 322 u64 stop_tracing; /* stop trace in the internal operation (loop/irq) */ 323 u64 stop_tracing_total; /* stop trace in the final operation (report/thread) */ 324 #ifdef CONFIG_TIMERLAT_TRACER 325 u64 timerlat_period; /* timerlat period */ 326 u64 print_stack; /* print IRQ stack if total > */ 327 int timerlat_tracer; /* timerlat tracer */ 328 #endif 329 bool tainted; /* infor users and developers about a problem */ 330 } osnoise_data = { 331 .sample_period = DEFAULT_SAMPLE_PERIOD, 332 .sample_runtime = DEFAULT_SAMPLE_RUNTIME, 333 .stop_tracing = 0, 334 .stop_tracing_total = 0, 335 #ifdef CONFIG_TIMERLAT_TRACER 336 .print_stack = 0, 337 .timerlat_period = DEFAULT_TIMERLAT_PERIOD, 338 .timerlat_tracer = 0, 339 #endif 340 }; 341 342 #ifdef CONFIG_TIMERLAT_TRACER 343 static inline bool timerlat_enabled(void) 344 { 345 return osnoise_data.timerlat_tracer; 346 } 347 348 static inline int timerlat_softirq_exit(struct osnoise_variables *osn_var) 349 { 350 struct timerlat_variables *tlat_var = this_cpu_tmr_var(); 351 /* 352 * If the timerlat is enabled, but the irq handler did 353 * not run yet enabling timerlat_tracer, do not trace. 354 */ 355 if (!tlat_var->tracing_thread) { 356 osn_var->softirq.arrival_time = 0; 357 osn_var->softirq.delta_start = 0; 358 return 0; 359 } 360 return 1; 361 } 362 363 static inline int timerlat_thread_exit(struct osnoise_variables *osn_var) 364 { 365 struct timerlat_variables *tlat_var = this_cpu_tmr_var(); 366 /* 367 * If the timerlat is enabled, but the irq handler did 368 * not run yet enabling timerlat_tracer, do not trace. 369 */ 370 if (!tlat_var->tracing_thread) { 371 osn_var->thread.delta_start = 0; 372 osn_var->thread.arrival_time = 0; 373 return 0; 374 } 375 return 1; 376 } 377 #else /* CONFIG_TIMERLAT_TRACER */ 378 static inline bool timerlat_enabled(void) 379 { 380 return false; 381 } 382 383 static inline int timerlat_softirq_exit(struct osnoise_variables *osn_var) 384 { 385 return 1; 386 } 387 static inline int timerlat_thread_exit(struct osnoise_variables *osn_var) 388 { 389 return 1; 390 } 391 #endif 392 393 #ifdef CONFIG_PREEMPT_RT 394 /* 395 * Print the osnoise header info. 396 */ 397 static void print_osnoise_headers(struct seq_file *s) 398 { 399 if (osnoise_data.tainted) 400 seq_puts(s, "# osnoise is tainted!\n"); 401 402 seq_puts(s, "# _-------=> irqs-off\n"); 403 seq_puts(s, "# / _------=> need-resched\n"); 404 seq_puts(s, "# | / _-----=> need-resched-lazy\n"); 405 seq_puts(s, "# || / _----=> hardirq/softirq\n"); 406 seq_puts(s, "# ||| / _---=> preempt-depth\n"); 407 seq_puts(s, "# |||| / _--=> preempt-lazy-depth\n"); 408 seq_puts(s, "# ||||| / _-=> migrate-disable\n"); 409 410 seq_puts(s, "# |||||| / "); 411 seq_puts(s, " MAX\n"); 412 413 seq_puts(s, "# ||||| / "); 414 seq_puts(s, " SINGLE Interference counters:\n"); 415 416 seq_puts(s, "# ||||||| RUNTIME "); 417 seq_puts(s, " NOISE %% OF CPU NOISE +-----------------------------+\n"); 418 419 seq_puts(s, "# TASK-PID CPU# ||||||| TIMESTAMP IN US "); 420 seq_puts(s, " IN US AVAILABLE IN US HW NMI IRQ SIRQ THREAD\n"); 421 422 seq_puts(s, "# | | | ||||||| | | "); 423 seq_puts(s, " | | | | | | | |\n"); 424 } 425 #else /* CONFIG_PREEMPT_RT */ 426 static void print_osnoise_headers(struct seq_file *s) 427 { 428 if (osnoise_data.tainted) 429 seq_puts(s, "# osnoise is tainted!\n"); 430 431 seq_puts(s, "# _-----=> irqs-off\n"); 432 seq_puts(s, "# / _----=> need-resched\n"); 433 seq_puts(s, "# | / _---=> hardirq/softirq\n"); 434 seq_puts(s, "# || / _--=> preempt-depth\n"); 435 seq_puts(s, "# ||| / _-=> migrate-disable "); 436 seq_puts(s, " MAX\n"); 437 seq_puts(s, "# |||| / delay "); 438 seq_puts(s, " SINGLE Interference counters:\n"); 439 440 seq_puts(s, "# ||||| RUNTIME "); 441 seq_puts(s, " NOISE %% OF CPU NOISE +-----------------------------+\n"); 442 443 seq_puts(s, "# TASK-PID CPU# ||||| TIMESTAMP IN US "); 444 seq_puts(s, " IN US AVAILABLE IN US HW NMI IRQ SIRQ THREAD\n"); 445 446 seq_puts(s, "# | | | ||||| | | "); 447 seq_puts(s, " | | | | | | | |\n"); 448 } 449 #endif /* CONFIG_PREEMPT_RT */ 450 451 /* 452 * osnoise_taint - report an osnoise error. 453 */ 454 #define osnoise_taint(msg) ({ \ 455 struct osnoise_instance *inst; \ 456 struct trace_buffer *buffer; \ 457 \ 458 rcu_read_lock(); \ 459 list_for_each_entry_rcu(inst, &osnoise_instances, list) { \ 460 buffer = inst->tr->array_buffer.buffer; \ 461 trace_array_printk_buf(buffer, _THIS_IP_, msg); \ 462 } \ 463 rcu_read_unlock(); \ 464 osnoise_data.tainted = true; \ 465 }) 466 467 /* 468 * Record an osnoise_sample into the tracer buffer. 469 */ 470 static void 471 __trace_osnoise_sample(struct osnoise_sample *sample, struct trace_buffer *buffer) 472 { 473 struct trace_event_call *call = &event_osnoise; 474 struct ring_buffer_event *event; 475 struct osnoise_entry *entry; 476 477 event = trace_buffer_lock_reserve(buffer, TRACE_OSNOISE, sizeof(*entry), 478 tracing_gen_ctx()); 479 if (!event) 480 return; 481 entry = ring_buffer_event_data(event); 482 entry->runtime = sample->runtime; 483 entry->noise = sample->noise; 484 entry->max_sample = sample->max_sample; 485 entry->hw_count = sample->hw_count; 486 entry->nmi_count = sample->nmi_count; 487 entry->irq_count = sample->irq_count; 488 entry->softirq_count = sample->softirq_count; 489 entry->thread_count = sample->thread_count; 490 491 if (!call_filter_check_discard(call, entry, buffer, event)) 492 trace_buffer_unlock_commit_nostack(buffer, event); 493 } 494 495 /* 496 * Record an osnoise_sample on all osnoise instances. 497 */ 498 static void trace_osnoise_sample(struct osnoise_sample *sample) 499 { 500 struct osnoise_instance *inst; 501 struct trace_buffer *buffer; 502 503 rcu_read_lock(); 504 list_for_each_entry_rcu(inst, &osnoise_instances, list) { 505 buffer = inst->tr->array_buffer.buffer; 506 __trace_osnoise_sample(sample, buffer); 507 } 508 rcu_read_unlock(); 509 } 510 511 #ifdef CONFIG_TIMERLAT_TRACER 512 /* 513 * Print the timerlat header info. 514 */ 515 #ifdef CONFIG_PREEMPT_RT 516 static void print_timerlat_headers(struct seq_file *s) 517 { 518 seq_puts(s, "# _-------=> irqs-off\n"); 519 seq_puts(s, "# / _------=> need-resched\n"); 520 seq_puts(s, "# | / _-----=> need-resched-lazy\n"); 521 seq_puts(s, "# || / _----=> hardirq/softirq\n"); 522 seq_puts(s, "# ||| / _---=> preempt-depth\n"); 523 seq_puts(s, "# |||| / _--=> preempt-lazy-depth\n"); 524 seq_puts(s, "# ||||| / _-=> migrate-disable\n"); 525 seq_puts(s, "# |||||| /\n"); 526 seq_puts(s, "# ||||||| ACTIVATION\n"); 527 seq_puts(s, "# TASK-PID CPU# ||||||| TIMESTAMP ID "); 528 seq_puts(s, " CONTEXT LATENCY\n"); 529 seq_puts(s, "# | | | ||||||| | | "); 530 seq_puts(s, " | |\n"); 531 } 532 #else /* CONFIG_PREEMPT_RT */ 533 static void print_timerlat_headers(struct seq_file *s) 534 { 535 seq_puts(s, "# _-----=> irqs-off\n"); 536 seq_puts(s, "# / _----=> need-resched\n"); 537 seq_puts(s, "# | / _---=> hardirq/softirq\n"); 538 seq_puts(s, "# || / _--=> preempt-depth\n"); 539 seq_puts(s, "# ||| / _-=> migrate-disable\n"); 540 seq_puts(s, "# |||| / delay\n"); 541 seq_puts(s, "# ||||| ACTIVATION\n"); 542 seq_puts(s, "# TASK-PID CPU# ||||| TIMESTAMP ID "); 543 seq_puts(s, " CONTEXT LATENCY\n"); 544 seq_puts(s, "# | | | ||||| | | "); 545 seq_puts(s, " | |\n"); 546 } 547 #endif /* CONFIG_PREEMPT_RT */ 548 549 static void 550 __trace_timerlat_sample(struct timerlat_sample *sample, struct trace_buffer *buffer) 551 { 552 struct trace_event_call *call = &event_osnoise; 553 struct ring_buffer_event *event; 554 struct timerlat_entry *entry; 555 556 event = trace_buffer_lock_reserve(buffer, TRACE_TIMERLAT, sizeof(*entry), 557 tracing_gen_ctx()); 558 if (!event) 559 return; 560 entry = ring_buffer_event_data(event); 561 entry->seqnum = sample->seqnum; 562 entry->context = sample->context; 563 entry->timer_latency = sample->timer_latency; 564 565 if (!call_filter_check_discard(call, entry, buffer, event)) 566 trace_buffer_unlock_commit_nostack(buffer, event); 567 } 568 569 /* 570 * Record an timerlat_sample into the tracer buffer. 571 */ 572 static void trace_timerlat_sample(struct timerlat_sample *sample) 573 { 574 struct osnoise_instance *inst; 575 struct trace_buffer *buffer; 576 577 rcu_read_lock(); 578 list_for_each_entry_rcu(inst, &osnoise_instances, list) { 579 buffer = inst->tr->array_buffer.buffer; 580 __trace_timerlat_sample(sample, buffer); 581 } 582 rcu_read_unlock(); 583 } 584 585 #ifdef CONFIG_STACKTRACE 586 587 #define MAX_CALLS 256 588 589 /* 590 * Stack trace will take place only at IRQ level, so, no need 591 * to control nesting here. 592 */ 593 struct trace_stack { 594 int stack_size; 595 int nr_entries; 596 unsigned long calls[MAX_CALLS]; 597 }; 598 599 static DEFINE_PER_CPU(struct trace_stack, trace_stack); 600 601 /* 602 * timerlat_save_stack - save a stack trace without printing 603 * 604 * Save the current stack trace without printing. The 605 * stack will be printed later, after the end of the measurement. 606 */ 607 static void timerlat_save_stack(int skip) 608 { 609 unsigned int size, nr_entries; 610 struct trace_stack *fstack; 611 612 fstack = this_cpu_ptr(&trace_stack); 613 614 size = ARRAY_SIZE(fstack->calls); 615 616 nr_entries = stack_trace_save(fstack->calls, size, skip); 617 618 fstack->stack_size = nr_entries * sizeof(unsigned long); 619 fstack->nr_entries = nr_entries; 620 621 return; 622 623 } 624 625 static void 626 __timerlat_dump_stack(struct trace_buffer *buffer, struct trace_stack *fstack, unsigned int size) 627 { 628 struct trace_event_call *call = &event_osnoise; 629 struct ring_buffer_event *event; 630 struct stack_entry *entry; 631 632 event = trace_buffer_lock_reserve(buffer, TRACE_STACK, sizeof(*entry) + size, 633 tracing_gen_ctx()); 634 if (!event) 635 return; 636 637 entry = ring_buffer_event_data(event); 638 639 memcpy(&entry->caller, fstack->calls, size); 640 entry->size = fstack->nr_entries; 641 642 if (!call_filter_check_discard(call, entry, buffer, event)) 643 trace_buffer_unlock_commit_nostack(buffer, event); 644 } 645 646 /* 647 * timerlat_dump_stack - dump a stack trace previously saved 648 */ 649 static void timerlat_dump_stack(u64 latency) 650 { 651 struct osnoise_instance *inst; 652 struct trace_buffer *buffer; 653 struct trace_stack *fstack; 654 unsigned int size; 655 656 /* 657 * trace only if latency > print_stack config, if enabled. 658 */ 659 if (!osnoise_data.print_stack || osnoise_data.print_stack > latency) 660 return; 661 662 preempt_disable_notrace(); 663 fstack = this_cpu_ptr(&trace_stack); 664 size = fstack->stack_size; 665 666 rcu_read_lock(); 667 list_for_each_entry_rcu(inst, &osnoise_instances, list) { 668 buffer = inst->tr->array_buffer.buffer; 669 __timerlat_dump_stack(buffer, fstack, size); 670 671 } 672 rcu_read_unlock(); 673 preempt_enable_notrace(); 674 } 675 #else /* CONFIG_STACKTRACE */ 676 #define timerlat_dump_stack(u64 latency) do {} while (0) 677 #define timerlat_save_stack(a) do {} while (0) 678 #endif /* CONFIG_STACKTRACE */ 679 #endif /* CONFIG_TIMERLAT_TRACER */ 680 681 /* 682 * Macros to encapsulate the time capturing infrastructure. 683 */ 684 #define time_get() trace_clock_local() 685 #define time_to_us(x) div_u64(x, 1000) 686 #define time_sub(a, b) ((a) - (b)) 687 688 /* 689 * cond_move_irq_delta_start - Forward the delta_start of a running IRQ 690 * 691 * If an IRQ is preempted by an NMI, its delta_start is pushed forward 692 * to discount the NMI interference. 693 * 694 * See get_int_safe_duration(). 695 */ 696 static inline void 697 cond_move_irq_delta_start(struct osnoise_variables *osn_var, u64 duration) 698 { 699 if (osn_var->irq.delta_start) 700 osn_var->irq.delta_start += duration; 701 } 702 703 #ifndef CONFIG_PREEMPT_RT 704 /* 705 * cond_move_softirq_delta_start - Forward the delta_start of a running softirq. 706 * 707 * If a softirq is preempted by an IRQ or NMI, its delta_start is pushed 708 * forward to discount the interference. 709 * 710 * See get_int_safe_duration(). 711 */ 712 static inline void 713 cond_move_softirq_delta_start(struct osnoise_variables *osn_var, u64 duration) 714 { 715 if (osn_var->softirq.delta_start) 716 osn_var->softirq.delta_start += duration; 717 } 718 #else /* CONFIG_PREEMPT_RT */ 719 #define cond_move_softirq_delta_start(osn_var, duration) do {} while (0) 720 #endif 721 722 /* 723 * cond_move_thread_delta_start - Forward the delta_start of a running thread 724 * 725 * If a noisy thread is preempted by an softirq, IRQ or NMI, its delta_start 726 * is pushed forward to discount the interference. 727 * 728 * See get_int_safe_duration(). 729 */ 730 static inline void 731 cond_move_thread_delta_start(struct osnoise_variables *osn_var, u64 duration) 732 { 733 if (osn_var->thread.delta_start) 734 osn_var->thread.delta_start += duration; 735 } 736 737 /* 738 * get_int_safe_duration - Get the duration of a window 739 * 740 * The irq, softirq and thread varaibles need to have its duration without 741 * the interference from higher priority interrupts. Instead of keeping a 742 * variable to discount the interrupt interference from these variables, the 743 * starting time of these variables are pushed forward with the interrupt's 744 * duration. In this way, a single variable is used to: 745 * 746 * - Know if a given window is being measured. 747 * - Account its duration. 748 * - Discount the interference. 749 * 750 * To avoid getting inconsistent values, e.g.,: 751 * 752 * now = time_get() 753 * ---> interrupt! 754 * delta_start -= int duration; 755 * <--- 756 * duration = now - delta_start; 757 * 758 * result: negative duration if the variable duration before the 759 * interrupt was smaller than the interrupt execution. 760 * 761 * A counter of interrupts is used. If the counter increased, try 762 * to capture an interference safe duration. 763 */ 764 static inline s64 765 get_int_safe_duration(struct osnoise_variables *osn_var, u64 *delta_start) 766 { 767 u64 int_counter, now; 768 s64 duration; 769 770 do { 771 int_counter = local_read(&osn_var->int_counter); 772 /* synchronize with interrupts */ 773 barrier(); 774 775 now = time_get(); 776 duration = (now - *delta_start); 777 778 /* synchronize with interrupts */ 779 barrier(); 780 } while (int_counter != local_read(&osn_var->int_counter)); 781 782 /* 783 * This is an evidence of race conditions that cause 784 * a value to be "discounted" too much. 785 */ 786 if (duration < 0) 787 osnoise_taint("Negative duration!\n"); 788 789 *delta_start = 0; 790 791 return duration; 792 } 793 794 /* 795 * 796 * set_int_safe_time - Save the current time on *time, aware of interference 797 * 798 * Get the time, taking into consideration a possible interference from 799 * higher priority interrupts. 800 * 801 * See get_int_safe_duration() for an explanation. 802 */ 803 static u64 804 set_int_safe_time(struct osnoise_variables *osn_var, u64 *time) 805 { 806 u64 int_counter; 807 808 do { 809 int_counter = local_read(&osn_var->int_counter); 810 /* synchronize with interrupts */ 811 barrier(); 812 813 *time = time_get(); 814 815 /* synchronize with interrupts */ 816 barrier(); 817 } while (int_counter != local_read(&osn_var->int_counter)); 818 819 return int_counter; 820 } 821 822 #ifdef CONFIG_TIMERLAT_TRACER 823 /* 824 * copy_int_safe_time - Copy *src into *desc aware of interference 825 */ 826 static u64 827 copy_int_safe_time(struct osnoise_variables *osn_var, u64 *dst, u64 *src) 828 { 829 u64 int_counter; 830 831 do { 832 int_counter = local_read(&osn_var->int_counter); 833 /* synchronize with interrupts */ 834 barrier(); 835 836 *dst = *src; 837 838 /* synchronize with interrupts */ 839 barrier(); 840 } while (int_counter != local_read(&osn_var->int_counter)); 841 842 return int_counter; 843 } 844 #endif /* CONFIG_TIMERLAT_TRACER */ 845 846 /* 847 * trace_osnoise_callback - NMI entry/exit callback 848 * 849 * This function is called at the entry and exit NMI code. The bool enter 850 * distinguishes between either case. This function is used to note a NMI 851 * occurrence, compute the noise caused by the NMI, and to remove the noise 852 * it is potentially causing on other interference variables. 853 */ 854 void trace_osnoise_callback(bool enter) 855 { 856 struct osnoise_variables *osn_var = this_cpu_osn_var(); 857 u64 duration; 858 859 if (!osn_var->sampling) 860 return; 861 862 /* 863 * Currently trace_clock_local() calls sched_clock() and the 864 * generic version is not NMI safe. 865 */ 866 if (!IS_ENABLED(CONFIG_GENERIC_SCHED_CLOCK)) { 867 if (enter) { 868 osn_var->nmi.delta_start = time_get(); 869 local_inc(&osn_var->int_counter); 870 } else { 871 duration = time_get() - osn_var->nmi.delta_start; 872 873 trace_nmi_noise(osn_var->nmi.delta_start, duration); 874 875 cond_move_irq_delta_start(osn_var, duration); 876 cond_move_softirq_delta_start(osn_var, duration); 877 cond_move_thread_delta_start(osn_var, duration); 878 } 879 } 880 881 if (enter) 882 osn_var->nmi.count++; 883 } 884 885 /* 886 * osnoise_trace_irq_entry - Note the starting of an IRQ 887 * 888 * Save the starting time of an IRQ. As IRQs are non-preemptive to other IRQs, 889 * it is safe to use a single variable (ons_var->irq) to save the statistics. 890 * The arrival_time is used to report... the arrival time. The delta_start 891 * is used to compute the duration at the IRQ exit handler. See 892 * cond_move_irq_delta_start(). 893 */ 894 void osnoise_trace_irq_entry(int id) 895 { 896 struct osnoise_variables *osn_var = this_cpu_osn_var(); 897 898 if (!osn_var->sampling) 899 return; 900 /* 901 * This value will be used in the report, but not to compute 902 * the execution time, so it is safe to get it unsafe. 903 */ 904 osn_var->irq.arrival_time = time_get(); 905 set_int_safe_time(osn_var, &osn_var->irq.delta_start); 906 osn_var->irq.count++; 907 908 local_inc(&osn_var->int_counter); 909 } 910 911 /* 912 * osnoise_irq_exit - Note the end of an IRQ, sava data and trace 913 * 914 * Computes the duration of the IRQ noise, and trace it. Also discounts the 915 * interference from other sources of noise could be currently being accounted. 916 */ 917 void osnoise_trace_irq_exit(int id, const char *desc) 918 { 919 struct osnoise_variables *osn_var = this_cpu_osn_var(); 920 int duration; 921 922 if (!osn_var->sampling) 923 return; 924 925 duration = get_int_safe_duration(osn_var, &osn_var->irq.delta_start); 926 trace_irq_noise(id, desc, osn_var->irq.arrival_time, duration); 927 osn_var->irq.arrival_time = 0; 928 cond_move_softirq_delta_start(osn_var, duration); 929 cond_move_thread_delta_start(osn_var, duration); 930 } 931 932 /* 933 * trace_irqentry_callback - Callback to the irq:irq_entry traceevent 934 * 935 * Used to note the starting of an IRQ occurece. 936 */ 937 static void trace_irqentry_callback(void *data, int irq, 938 struct irqaction *action) 939 { 940 osnoise_trace_irq_entry(irq); 941 } 942 943 /* 944 * trace_irqexit_callback - Callback to the irq:irq_exit traceevent 945 * 946 * Used to note the end of an IRQ occurece. 947 */ 948 static void trace_irqexit_callback(void *data, int irq, 949 struct irqaction *action, int ret) 950 { 951 osnoise_trace_irq_exit(irq, action->name); 952 } 953 954 /* 955 * arch specific register function. 956 */ 957 int __weak osnoise_arch_register(void) 958 { 959 return 0; 960 } 961 962 /* 963 * arch specific unregister function. 964 */ 965 void __weak osnoise_arch_unregister(void) 966 { 967 return; 968 } 969 970 /* 971 * hook_irq_events - Hook IRQ handling events 972 * 973 * This function hooks the IRQ related callbacks to the respective trace 974 * events. 975 */ 976 static int hook_irq_events(void) 977 { 978 int ret; 979 980 ret = register_trace_irq_handler_entry(trace_irqentry_callback, NULL); 981 if (ret) 982 goto out_err; 983 984 ret = register_trace_irq_handler_exit(trace_irqexit_callback, NULL); 985 if (ret) 986 goto out_unregister_entry; 987 988 ret = osnoise_arch_register(); 989 if (ret) 990 goto out_irq_exit; 991 992 return 0; 993 994 out_irq_exit: 995 unregister_trace_irq_handler_exit(trace_irqexit_callback, NULL); 996 out_unregister_entry: 997 unregister_trace_irq_handler_entry(trace_irqentry_callback, NULL); 998 out_err: 999 return -EINVAL; 1000 } 1001 1002 /* 1003 * unhook_irq_events - Unhook IRQ handling events 1004 * 1005 * This function unhooks the IRQ related callbacks to the respective trace 1006 * events. 1007 */ 1008 static void unhook_irq_events(void) 1009 { 1010 osnoise_arch_unregister(); 1011 unregister_trace_irq_handler_exit(trace_irqexit_callback, NULL); 1012 unregister_trace_irq_handler_entry(trace_irqentry_callback, NULL); 1013 } 1014 1015 #ifndef CONFIG_PREEMPT_RT 1016 /* 1017 * trace_softirq_entry_callback - Note the starting of a softirq 1018 * 1019 * Save the starting time of a softirq. As softirqs are non-preemptive to 1020 * other softirqs, it is safe to use a single variable (ons_var->softirq) 1021 * to save the statistics. The arrival_time is used to report... the 1022 * arrival time. The delta_start is used to compute the duration at the 1023 * softirq exit handler. See cond_move_softirq_delta_start(). 1024 */ 1025 static void trace_softirq_entry_callback(void *data, unsigned int vec_nr) 1026 { 1027 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1028 1029 if (!osn_var->sampling) 1030 return; 1031 /* 1032 * This value will be used in the report, but not to compute 1033 * the execution time, so it is safe to get it unsafe. 1034 */ 1035 osn_var->softirq.arrival_time = time_get(); 1036 set_int_safe_time(osn_var, &osn_var->softirq.delta_start); 1037 osn_var->softirq.count++; 1038 1039 local_inc(&osn_var->int_counter); 1040 } 1041 1042 /* 1043 * trace_softirq_exit_callback - Note the end of an softirq 1044 * 1045 * Computes the duration of the softirq noise, and trace it. Also discounts the 1046 * interference from other sources of noise could be currently being accounted. 1047 */ 1048 static void trace_softirq_exit_callback(void *data, unsigned int vec_nr) 1049 { 1050 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1051 int duration; 1052 1053 if (!osn_var->sampling) 1054 return; 1055 1056 if (unlikely(timerlat_enabled())) 1057 if (!timerlat_softirq_exit(osn_var)) 1058 return; 1059 1060 duration = get_int_safe_duration(osn_var, &osn_var->softirq.delta_start); 1061 trace_softirq_noise(vec_nr, osn_var->softirq.arrival_time, duration); 1062 cond_move_thread_delta_start(osn_var, duration); 1063 osn_var->softirq.arrival_time = 0; 1064 } 1065 1066 /* 1067 * hook_softirq_events - Hook softirq handling events 1068 * 1069 * This function hooks the softirq related callbacks to the respective trace 1070 * events. 1071 */ 1072 static int hook_softirq_events(void) 1073 { 1074 int ret; 1075 1076 ret = register_trace_softirq_entry(trace_softirq_entry_callback, NULL); 1077 if (ret) 1078 goto out_err; 1079 1080 ret = register_trace_softirq_exit(trace_softirq_exit_callback, NULL); 1081 if (ret) 1082 goto out_unreg_entry; 1083 1084 return 0; 1085 1086 out_unreg_entry: 1087 unregister_trace_softirq_entry(trace_softirq_entry_callback, NULL); 1088 out_err: 1089 return -EINVAL; 1090 } 1091 1092 /* 1093 * unhook_softirq_events - Unhook softirq handling events 1094 * 1095 * This function hooks the softirq related callbacks to the respective trace 1096 * events. 1097 */ 1098 static void unhook_softirq_events(void) 1099 { 1100 unregister_trace_softirq_entry(trace_softirq_entry_callback, NULL); 1101 unregister_trace_softirq_exit(trace_softirq_exit_callback, NULL); 1102 } 1103 #else /* CONFIG_PREEMPT_RT */ 1104 /* 1105 * softirq are threads on the PREEMPT_RT mode. 1106 */ 1107 static int hook_softirq_events(void) 1108 { 1109 return 0; 1110 } 1111 static void unhook_softirq_events(void) 1112 { 1113 } 1114 #endif 1115 1116 /* 1117 * thread_entry - Record the starting of a thread noise window 1118 * 1119 * It saves the context switch time for a noisy thread, and increments 1120 * the interference counters. 1121 */ 1122 static void 1123 thread_entry(struct osnoise_variables *osn_var, struct task_struct *t) 1124 { 1125 if (!osn_var->sampling) 1126 return; 1127 /* 1128 * The arrival time will be used in the report, but not to compute 1129 * the execution time, so it is safe to get it unsafe. 1130 */ 1131 osn_var->thread.arrival_time = time_get(); 1132 1133 set_int_safe_time(osn_var, &osn_var->thread.delta_start); 1134 1135 osn_var->thread.count++; 1136 local_inc(&osn_var->int_counter); 1137 } 1138 1139 /* 1140 * thread_exit - Report the end of a thread noise window 1141 * 1142 * It computes the total noise from a thread, tracing if needed. 1143 */ 1144 static void 1145 thread_exit(struct osnoise_variables *osn_var, struct task_struct *t) 1146 { 1147 int duration; 1148 1149 if (!osn_var->sampling) 1150 return; 1151 1152 if (unlikely(timerlat_enabled())) 1153 if (!timerlat_thread_exit(osn_var)) 1154 return; 1155 1156 duration = get_int_safe_duration(osn_var, &osn_var->thread.delta_start); 1157 1158 trace_thread_noise(t, osn_var->thread.arrival_time, duration); 1159 1160 osn_var->thread.arrival_time = 0; 1161 } 1162 1163 /* 1164 * trace_sched_switch - sched:sched_switch trace event handler 1165 * 1166 * This function is hooked to the sched:sched_switch trace event, and it is 1167 * used to record the beginning and to report the end of a thread noise window. 1168 */ 1169 static void 1170 trace_sched_switch_callback(void *data, bool preempt, 1171 struct task_struct *p, 1172 struct task_struct *n, 1173 unsigned int prev_state) 1174 { 1175 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1176 1177 if (p->pid != osn_var->pid) 1178 thread_exit(osn_var, p); 1179 1180 if (n->pid != osn_var->pid) 1181 thread_entry(osn_var, n); 1182 } 1183 1184 /* 1185 * hook_thread_events - Hook the insturmentation for thread noise 1186 * 1187 * Hook the osnoise tracer callbacks to handle the noise from other 1188 * threads on the necessary kernel events. 1189 */ 1190 static int hook_thread_events(void) 1191 { 1192 int ret; 1193 1194 ret = register_trace_sched_switch(trace_sched_switch_callback, NULL); 1195 if (ret) 1196 return -EINVAL; 1197 1198 return 0; 1199 } 1200 1201 /* 1202 * unhook_thread_events - *nhook the insturmentation for thread noise 1203 * 1204 * Unook the osnoise tracer callbacks to handle the noise from other 1205 * threads on the necessary kernel events. 1206 */ 1207 static void unhook_thread_events(void) 1208 { 1209 unregister_trace_sched_switch(trace_sched_switch_callback, NULL); 1210 } 1211 1212 /* 1213 * save_osn_sample_stats - Save the osnoise_sample statistics 1214 * 1215 * Save the osnoise_sample statistics before the sampling phase. These 1216 * values will be used later to compute the diff betwneen the statistics 1217 * before and after the osnoise sampling. 1218 */ 1219 static void 1220 save_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample *s) 1221 { 1222 s->nmi_count = osn_var->nmi.count; 1223 s->irq_count = osn_var->irq.count; 1224 s->softirq_count = osn_var->softirq.count; 1225 s->thread_count = osn_var->thread.count; 1226 } 1227 1228 /* 1229 * diff_osn_sample_stats - Compute the osnoise_sample statistics 1230 * 1231 * After a sample period, compute the difference on the osnoise_sample 1232 * statistics. The struct osnoise_sample *s contains the statistics saved via 1233 * save_osn_sample_stats() before the osnoise sampling. 1234 */ 1235 static void 1236 diff_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample *s) 1237 { 1238 s->nmi_count = osn_var->nmi.count - s->nmi_count; 1239 s->irq_count = osn_var->irq.count - s->irq_count; 1240 s->softirq_count = osn_var->softirq.count - s->softirq_count; 1241 s->thread_count = osn_var->thread.count - s->thread_count; 1242 } 1243 1244 /* 1245 * osnoise_stop_tracing - Stop tracing and the tracer. 1246 */ 1247 static __always_inline void osnoise_stop_tracing(void) 1248 { 1249 struct osnoise_instance *inst; 1250 struct trace_array *tr; 1251 1252 rcu_read_lock(); 1253 list_for_each_entry_rcu(inst, &osnoise_instances, list) { 1254 tr = inst->tr; 1255 trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_, 1256 "stop tracing hit on cpu %d\n", smp_processor_id()); 1257 1258 tracer_tracing_off(tr); 1259 } 1260 rcu_read_unlock(); 1261 } 1262 1263 /* 1264 * notify_new_max_latency - Notify a new max latency via fsnotify interface. 1265 */ 1266 static void notify_new_max_latency(u64 latency) 1267 { 1268 struct osnoise_instance *inst; 1269 struct trace_array *tr; 1270 1271 rcu_read_lock(); 1272 list_for_each_entry_rcu(inst, &osnoise_instances, list) { 1273 tr = inst->tr; 1274 if (tr->max_latency < latency) { 1275 tr->max_latency = latency; 1276 latency_fsnotify(tr); 1277 } 1278 } 1279 rcu_read_unlock(); 1280 } 1281 1282 /* 1283 * run_osnoise - Sample the time and look for osnoise 1284 * 1285 * Used to capture the time, looking for potential osnoise latency repeatedly. 1286 * Different from hwlat_detector, it is called with preemption and interrupts 1287 * enabled. This allows irqs, softirqs and threads to run, interfering on the 1288 * osnoise sampling thread, as they would do with a regular thread. 1289 */ 1290 static int run_osnoise(void) 1291 { 1292 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1293 u64 start, sample, last_sample; 1294 u64 last_int_count, int_count; 1295 s64 noise = 0, max_noise = 0; 1296 s64 total, last_total = 0; 1297 struct osnoise_sample s; 1298 unsigned int threshold; 1299 u64 runtime, stop_in; 1300 u64 sum_noise = 0; 1301 int hw_count = 0; 1302 int ret = -1; 1303 1304 /* 1305 * Considers the current thread as the workload. 1306 */ 1307 osn_var->pid = current->pid; 1308 1309 /* 1310 * Save the current stats for the diff 1311 */ 1312 save_osn_sample_stats(osn_var, &s); 1313 1314 /* 1315 * if threshold is 0, use the default value of 5 us. 1316 */ 1317 threshold = tracing_thresh ? : 5000; 1318 1319 /* 1320 * Make sure NMIs see sampling first 1321 */ 1322 osn_var->sampling = true; 1323 barrier(); 1324 1325 /* 1326 * Transform the *_us config to nanoseconds to avoid the 1327 * division on the main loop. 1328 */ 1329 runtime = osnoise_data.sample_runtime * NSEC_PER_USEC; 1330 stop_in = osnoise_data.stop_tracing * NSEC_PER_USEC; 1331 1332 /* 1333 * Start timestemp 1334 */ 1335 start = time_get(); 1336 1337 /* 1338 * "previous" loop. 1339 */ 1340 last_int_count = set_int_safe_time(osn_var, &last_sample); 1341 1342 do { 1343 /* 1344 * Get sample! 1345 */ 1346 int_count = set_int_safe_time(osn_var, &sample); 1347 1348 noise = time_sub(sample, last_sample); 1349 1350 /* 1351 * This shouldn't happen. 1352 */ 1353 if (noise < 0) { 1354 osnoise_taint("negative noise!"); 1355 goto out; 1356 } 1357 1358 /* 1359 * Sample runtime. 1360 */ 1361 total = time_sub(sample, start); 1362 1363 /* 1364 * Check for possible overflows. 1365 */ 1366 if (total < last_total) { 1367 osnoise_taint("total overflow!"); 1368 break; 1369 } 1370 1371 last_total = total; 1372 1373 if (noise >= threshold) { 1374 int interference = int_count - last_int_count; 1375 1376 if (noise > max_noise) 1377 max_noise = noise; 1378 1379 if (!interference) 1380 hw_count++; 1381 1382 sum_noise += noise; 1383 1384 trace_sample_threshold(last_sample, noise, interference); 1385 1386 if (osnoise_data.stop_tracing) 1387 if (noise > stop_in) 1388 osnoise_stop_tracing(); 1389 } 1390 1391 /* 1392 * In some cases, notably when running on a nohz_full CPU with 1393 * a stopped tick PREEMPT_RCU has no way to account for QSs. 1394 * This will eventually cause unwarranted noise as PREEMPT_RCU 1395 * will force preemption as the means of ending the current 1396 * grace period. We avoid this problem by calling 1397 * rcu_momentary_dyntick_idle(), which performs a zero duration 1398 * EQS allowing PREEMPT_RCU to end the current grace period. 1399 * This call shouldn't be wrapped inside an RCU critical 1400 * section. 1401 * 1402 * Note that in non PREEMPT_RCU kernels QSs are handled through 1403 * cond_resched() 1404 */ 1405 if (IS_ENABLED(CONFIG_PREEMPT_RCU)) { 1406 local_irq_disable(); 1407 rcu_momentary_dyntick_idle(); 1408 local_irq_enable(); 1409 } 1410 1411 /* 1412 * For the non-preemptive kernel config: let threads runs, if 1413 * they so wish. 1414 */ 1415 cond_resched(); 1416 1417 last_sample = sample; 1418 last_int_count = int_count; 1419 1420 } while (total < runtime && !kthread_should_stop()); 1421 1422 /* 1423 * Finish the above in the view for interrupts. 1424 */ 1425 barrier(); 1426 1427 osn_var->sampling = false; 1428 1429 /* 1430 * Make sure sampling data is no longer updated. 1431 */ 1432 barrier(); 1433 1434 /* 1435 * Save noise info. 1436 */ 1437 s.noise = time_to_us(sum_noise); 1438 s.runtime = time_to_us(total); 1439 s.max_sample = time_to_us(max_noise); 1440 s.hw_count = hw_count; 1441 1442 /* Save interference stats info */ 1443 diff_osn_sample_stats(osn_var, &s); 1444 1445 trace_osnoise_sample(&s); 1446 1447 notify_new_max_latency(max_noise); 1448 1449 if (osnoise_data.stop_tracing_total) 1450 if (s.noise > osnoise_data.stop_tracing_total) 1451 osnoise_stop_tracing(); 1452 1453 return 0; 1454 out: 1455 return ret; 1456 } 1457 1458 static struct cpumask osnoise_cpumask; 1459 static struct cpumask save_cpumask; 1460 1461 /* 1462 * osnoise_sleep - sleep until the next period 1463 */ 1464 static void osnoise_sleep(void) 1465 { 1466 u64 interval; 1467 ktime_t wake_time; 1468 1469 mutex_lock(&interface_lock); 1470 interval = osnoise_data.sample_period - osnoise_data.sample_runtime; 1471 mutex_unlock(&interface_lock); 1472 1473 /* 1474 * differently from hwlat_detector, the osnoise tracer can run 1475 * without a pause because preemption is on. 1476 */ 1477 if (!interval) { 1478 /* Let synchronize_rcu_tasks() make progress */ 1479 cond_resched_tasks_rcu_qs(); 1480 return; 1481 } 1482 1483 wake_time = ktime_add_us(ktime_get(), interval); 1484 __set_current_state(TASK_INTERRUPTIBLE); 1485 1486 while (schedule_hrtimeout_range(&wake_time, 0, HRTIMER_MODE_ABS)) { 1487 if (kthread_should_stop()) 1488 break; 1489 } 1490 } 1491 1492 /* 1493 * osnoise_main - The osnoise detection kernel thread 1494 * 1495 * Calls run_osnoise() function to measure the osnoise for the configured runtime, 1496 * every period. 1497 */ 1498 static int osnoise_main(void *data) 1499 { 1500 1501 while (!kthread_should_stop()) { 1502 run_osnoise(); 1503 osnoise_sleep(); 1504 } 1505 1506 return 0; 1507 } 1508 1509 #ifdef CONFIG_TIMERLAT_TRACER 1510 /* 1511 * timerlat_irq - hrtimer handler for timerlat. 1512 */ 1513 static enum hrtimer_restart timerlat_irq(struct hrtimer *timer) 1514 { 1515 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1516 struct timerlat_variables *tlat; 1517 struct timerlat_sample s; 1518 u64 now; 1519 u64 diff; 1520 1521 /* 1522 * I am not sure if the timer was armed for this CPU. So, get 1523 * the timerlat struct from the timer itself, not from this 1524 * CPU. 1525 */ 1526 tlat = container_of(timer, struct timerlat_variables, timer); 1527 1528 now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer)); 1529 1530 /* 1531 * Enable the osnoise: events for thread an softirq. 1532 */ 1533 tlat->tracing_thread = true; 1534 1535 osn_var->thread.arrival_time = time_get(); 1536 1537 /* 1538 * A hardirq is running: the timer IRQ. It is for sure preempting 1539 * a thread, and potentially preempting a softirq. 1540 * 1541 * At this point, it is not interesting to know the duration of the 1542 * preempted thread (and maybe softirq), but how much time they will 1543 * delay the beginning of the execution of the timer thread. 1544 * 1545 * To get the correct (net) delay added by the softirq, its delta_start 1546 * is set as the IRQ one. In this way, at the return of the IRQ, the delta 1547 * start of the sofitrq will be zeroed, accounting then only the time 1548 * after that. 1549 * 1550 * The thread follows the same principle. However, if a softirq is 1551 * running, the thread needs to receive the softirq delta_start. The 1552 * reason being is that the softirq will be the last to be unfolded, 1553 * resseting the thread delay to zero. 1554 * 1555 * The PREEMPT_RT is a special case, though. As softirqs run as threads 1556 * on RT, moving the thread is enough. 1557 */ 1558 if (!IS_ENABLED(CONFIG_PREEMPT_RT) && osn_var->softirq.delta_start) { 1559 copy_int_safe_time(osn_var, &osn_var->thread.delta_start, 1560 &osn_var->softirq.delta_start); 1561 1562 copy_int_safe_time(osn_var, &osn_var->softirq.delta_start, 1563 &osn_var->irq.delta_start); 1564 } else { 1565 copy_int_safe_time(osn_var, &osn_var->thread.delta_start, 1566 &osn_var->irq.delta_start); 1567 } 1568 1569 /* 1570 * Compute the current time with the expected time. 1571 */ 1572 diff = now - tlat->abs_period; 1573 1574 tlat->count++; 1575 s.seqnum = tlat->count; 1576 s.timer_latency = diff; 1577 s.context = IRQ_CONTEXT; 1578 1579 trace_timerlat_sample(&s); 1580 1581 if (osnoise_data.stop_tracing) { 1582 if (time_to_us(diff) >= osnoise_data.stop_tracing) { 1583 1584 /* 1585 * At this point, if stop_tracing is set and <= print_stack, 1586 * print_stack is set and would be printed in the thread handler. 1587 * 1588 * Thus, print the stack trace as it is helpful to define the 1589 * root cause of an IRQ latency. 1590 */ 1591 if (osnoise_data.stop_tracing <= osnoise_data.print_stack) { 1592 timerlat_save_stack(0); 1593 timerlat_dump_stack(time_to_us(diff)); 1594 } 1595 1596 osnoise_stop_tracing(); 1597 notify_new_max_latency(diff); 1598 1599 return HRTIMER_NORESTART; 1600 } 1601 } 1602 1603 wake_up_process(tlat->kthread); 1604 1605 if (osnoise_data.print_stack) 1606 timerlat_save_stack(0); 1607 1608 return HRTIMER_NORESTART; 1609 } 1610 1611 /* 1612 * wait_next_period - Wait for the next period for timerlat 1613 */ 1614 static int wait_next_period(struct timerlat_variables *tlat) 1615 { 1616 ktime_t next_abs_period, now; 1617 u64 rel_period = osnoise_data.timerlat_period * 1000; 1618 1619 now = hrtimer_cb_get_time(&tlat->timer); 1620 next_abs_period = ns_to_ktime(tlat->abs_period + rel_period); 1621 1622 /* 1623 * Save the next abs_period. 1624 */ 1625 tlat->abs_period = (u64) ktime_to_ns(next_abs_period); 1626 1627 /* 1628 * If the new abs_period is in the past, skip the activation. 1629 */ 1630 while (ktime_compare(now, next_abs_period) > 0) { 1631 next_abs_period = ns_to_ktime(tlat->abs_period + rel_period); 1632 tlat->abs_period = (u64) ktime_to_ns(next_abs_period); 1633 } 1634 1635 set_current_state(TASK_INTERRUPTIBLE); 1636 1637 hrtimer_start(&tlat->timer, next_abs_period, HRTIMER_MODE_ABS_PINNED_HARD); 1638 schedule(); 1639 return 1; 1640 } 1641 1642 /* 1643 * timerlat_main- Timerlat main 1644 */ 1645 static int timerlat_main(void *data) 1646 { 1647 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1648 struct timerlat_variables *tlat = this_cpu_tmr_var(); 1649 struct timerlat_sample s; 1650 struct sched_param sp; 1651 u64 now, diff; 1652 1653 /* 1654 * Make the thread RT, that is how cyclictest is usually used. 1655 */ 1656 sp.sched_priority = DEFAULT_TIMERLAT_PRIO; 1657 sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); 1658 1659 tlat->count = 0; 1660 tlat->tracing_thread = false; 1661 1662 hrtimer_init(&tlat->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD); 1663 tlat->timer.function = timerlat_irq; 1664 tlat->kthread = current; 1665 osn_var->pid = current->pid; 1666 /* 1667 * Anotate the arrival time. 1668 */ 1669 tlat->abs_period = hrtimer_cb_get_time(&tlat->timer); 1670 1671 wait_next_period(tlat); 1672 1673 osn_var->sampling = 1; 1674 1675 while (!kthread_should_stop()) { 1676 now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer)); 1677 diff = now - tlat->abs_period; 1678 1679 s.seqnum = tlat->count; 1680 s.timer_latency = diff; 1681 s.context = THREAD_CONTEXT; 1682 1683 trace_timerlat_sample(&s); 1684 1685 timerlat_dump_stack(time_to_us(diff)); 1686 1687 tlat->tracing_thread = false; 1688 if (osnoise_data.stop_tracing_total) 1689 if (time_to_us(diff) >= osnoise_data.stop_tracing_total) 1690 osnoise_stop_tracing(); 1691 1692 wait_next_period(tlat); 1693 } 1694 1695 hrtimer_cancel(&tlat->timer); 1696 return 0; 1697 } 1698 #else /* CONFIG_TIMERLAT_TRACER */ 1699 static int timerlat_main(void *data) 1700 { 1701 return 0; 1702 } 1703 #endif /* CONFIG_TIMERLAT_TRACER */ 1704 1705 /* 1706 * stop_kthread - stop a workload thread 1707 */ 1708 static void stop_kthread(unsigned int cpu) 1709 { 1710 struct task_struct *kthread; 1711 1712 kthread = per_cpu(per_cpu_osnoise_var, cpu).kthread; 1713 if (kthread) 1714 kthread_stop(kthread); 1715 per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL; 1716 } 1717 1718 /* 1719 * stop_per_cpu_kthread - Stop per-cpu threads 1720 * 1721 * Stop the osnoise sampling htread. Use this on unload and at system 1722 * shutdown. 1723 */ 1724 static void stop_per_cpu_kthreads(void) 1725 { 1726 int cpu; 1727 1728 cpus_read_lock(); 1729 1730 for_each_online_cpu(cpu) 1731 stop_kthread(cpu); 1732 1733 cpus_read_unlock(); 1734 } 1735 1736 /* 1737 * start_kthread - Start a workload tread 1738 */ 1739 static int start_kthread(unsigned int cpu) 1740 { 1741 struct task_struct *kthread; 1742 void *main = osnoise_main; 1743 char comm[24]; 1744 1745 if (timerlat_enabled()) { 1746 snprintf(comm, 24, "timerlat/%d", cpu); 1747 main = timerlat_main; 1748 } else { 1749 snprintf(comm, 24, "osnoise/%d", cpu); 1750 } 1751 1752 kthread = kthread_run_on_cpu(main, NULL, cpu, comm); 1753 1754 if (IS_ERR(kthread)) { 1755 pr_err(BANNER "could not start sampling thread\n"); 1756 stop_per_cpu_kthreads(); 1757 return -ENOMEM; 1758 } 1759 1760 per_cpu(per_cpu_osnoise_var, cpu).kthread = kthread; 1761 1762 return 0; 1763 } 1764 1765 /* 1766 * start_per_cpu_kthread - Kick off per-cpu osnoise sampling kthreads 1767 * 1768 * This starts the kernel thread that will look for osnoise on many 1769 * cpus. 1770 */ 1771 static int start_per_cpu_kthreads(void) 1772 { 1773 struct cpumask *current_mask = &save_cpumask; 1774 int retval = 0; 1775 int cpu; 1776 1777 cpus_read_lock(); 1778 /* 1779 * Run only on online CPUs in which osnoise is allowed to run. 1780 */ 1781 cpumask_and(current_mask, cpu_online_mask, &osnoise_cpumask); 1782 1783 for_each_possible_cpu(cpu) 1784 per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL; 1785 1786 for_each_cpu(cpu, current_mask) { 1787 retval = start_kthread(cpu); 1788 if (retval) { 1789 stop_per_cpu_kthreads(); 1790 break; 1791 } 1792 } 1793 1794 cpus_read_unlock(); 1795 1796 return retval; 1797 } 1798 1799 #ifdef CONFIG_HOTPLUG_CPU 1800 static void osnoise_hotplug_workfn(struct work_struct *dummy) 1801 { 1802 unsigned int cpu = smp_processor_id(); 1803 1804 mutex_lock(&trace_types_lock); 1805 1806 if (!osnoise_has_registered_instances()) 1807 goto out_unlock_trace; 1808 1809 mutex_lock(&interface_lock); 1810 cpus_read_lock(); 1811 1812 if (!cpumask_test_cpu(cpu, &osnoise_cpumask)) 1813 goto out_unlock; 1814 1815 start_kthread(cpu); 1816 1817 out_unlock: 1818 cpus_read_unlock(); 1819 mutex_unlock(&interface_lock); 1820 out_unlock_trace: 1821 mutex_unlock(&trace_types_lock); 1822 } 1823 1824 static DECLARE_WORK(osnoise_hotplug_work, osnoise_hotplug_workfn); 1825 1826 /* 1827 * osnoise_cpu_init - CPU hotplug online callback function 1828 */ 1829 static int osnoise_cpu_init(unsigned int cpu) 1830 { 1831 schedule_work_on(cpu, &osnoise_hotplug_work); 1832 return 0; 1833 } 1834 1835 /* 1836 * osnoise_cpu_die - CPU hotplug offline callback function 1837 */ 1838 static int osnoise_cpu_die(unsigned int cpu) 1839 { 1840 stop_kthread(cpu); 1841 return 0; 1842 } 1843 1844 static void osnoise_init_hotplug_support(void) 1845 { 1846 int ret; 1847 1848 ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "trace/osnoise:online", 1849 osnoise_cpu_init, osnoise_cpu_die); 1850 if (ret < 0) 1851 pr_warn(BANNER "Error to init cpu hotplug support\n"); 1852 1853 return; 1854 } 1855 #else /* CONFIG_HOTPLUG_CPU */ 1856 static void osnoise_init_hotplug_support(void) 1857 { 1858 return; 1859 } 1860 #endif /* CONFIG_HOTPLUG_CPU */ 1861 1862 /* 1863 * osnoise_cpus_read - Read function for reading the "cpus" file 1864 * @filp: The active open file structure 1865 * @ubuf: The userspace provided buffer to read value into 1866 * @cnt: The maximum number of bytes to read 1867 * @ppos: The current "file" position 1868 * 1869 * Prints the "cpus" output into the user-provided buffer. 1870 */ 1871 static ssize_t 1872 osnoise_cpus_read(struct file *filp, char __user *ubuf, size_t count, 1873 loff_t *ppos) 1874 { 1875 char *mask_str; 1876 int len; 1877 1878 mutex_lock(&interface_lock); 1879 1880 len = snprintf(NULL, 0, "%*pbl\n", cpumask_pr_args(&osnoise_cpumask)) + 1; 1881 mask_str = kmalloc(len, GFP_KERNEL); 1882 if (!mask_str) { 1883 count = -ENOMEM; 1884 goto out_unlock; 1885 } 1886 1887 len = snprintf(mask_str, len, "%*pbl\n", cpumask_pr_args(&osnoise_cpumask)); 1888 if (len >= count) { 1889 count = -EINVAL; 1890 goto out_free; 1891 } 1892 1893 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len); 1894 1895 out_free: 1896 kfree(mask_str); 1897 out_unlock: 1898 mutex_unlock(&interface_lock); 1899 1900 return count; 1901 } 1902 1903 /* 1904 * osnoise_cpus_write - Write function for "cpus" entry 1905 * @filp: The active open file structure 1906 * @ubuf: The user buffer that contains the value to write 1907 * @cnt: The maximum number of bytes to write to "file" 1908 * @ppos: The current position in @file 1909 * 1910 * This function provides a write implementation for the "cpus" 1911 * interface to the osnoise trace. By default, it lists all CPUs, 1912 * in this way, allowing osnoise threads to run on any online CPU 1913 * of the system. It serves to restrict the execution of osnoise to the 1914 * set of CPUs writing via this interface. Why not use "tracing_cpumask"? 1915 * Because the user might be interested in tracing what is running on 1916 * other CPUs. For instance, one might run osnoise in one HT CPU 1917 * while observing what is running on the sibling HT CPU. 1918 */ 1919 static ssize_t 1920 osnoise_cpus_write(struct file *filp, const char __user *ubuf, size_t count, 1921 loff_t *ppos) 1922 { 1923 cpumask_var_t osnoise_cpumask_new; 1924 int running, err; 1925 char buf[256]; 1926 1927 if (count >= 256) 1928 return -EINVAL; 1929 1930 if (copy_from_user(buf, ubuf, count)) 1931 return -EFAULT; 1932 1933 if (!zalloc_cpumask_var(&osnoise_cpumask_new, GFP_KERNEL)) 1934 return -ENOMEM; 1935 1936 err = cpulist_parse(buf, osnoise_cpumask_new); 1937 if (err) 1938 goto err_free; 1939 1940 /* 1941 * trace_types_lock is taken to avoid concurrency on start/stop. 1942 */ 1943 mutex_lock(&trace_types_lock); 1944 running = osnoise_has_registered_instances(); 1945 if (running) 1946 stop_per_cpu_kthreads(); 1947 1948 mutex_lock(&interface_lock); 1949 /* 1950 * osnoise_cpumask is read by CPU hotplug operations. 1951 */ 1952 cpus_read_lock(); 1953 1954 cpumask_copy(&osnoise_cpumask, osnoise_cpumask_new); 1955 1956 cpus_read_unlock(); 1957 mutex_unlock(&interface_lock); 1958 1959 if (running) 1960 start_per_cpu_kthreads(); 1961 mutex_unlock(&trace_types_lock); 1962 1963 free_cpumask_var(osnoise_cpumask_new); 1964 return count; 1965 1966 err_free: 1967 free_cpumask_var(osnoise_cpumask_new); 1968 1969 return err; 1970 } 1971 1972 /* 1973 * osnoise/runtime_us: cannot be greater than the period. 1974 */ 1975 static struct trace_min_max_param osnoise_runtime = { 1976 .lock = &interface_lock, 1977 .val = &osnoise_data.sample_runtime, 1978 .max = &osnoise_data.sample_period, 1979 .min = NULL, 1980 }; 1981 1982 /* 1983 * osnoise/period_us: cannot be smaller than the runtime. 1984 */ 1985 static struct trace_min_max_param osnoise_period = { 1986 .lock = &interface_lock, 1987 .val = &osnoise_data.sample_period, 1988 .max = NULL, 1989 .min = &osnoise_data.sample_runtime, 1990 }; 1991 1992 /* 1993 * osnoise/stop_tracing_us: no limit. 1994 */ 1995 static struct trace_min_max_param osnoise_stop_tracing_in = { 1996 .lock = &interface_lock, 1997 .val = &osnoise_data.stop_tracing, 1998 .max = NULL, 1999 .min = NULL, 2000 }; 2001 2002 /* 2003 * osnoise/stop_tracing_total_us: no limit. 2004 */ 2005 static struct trace_min_max_param osnoise_stop_tracing_total = { 2006 .lock = &interface_lock, 2007 .val = &osnoise_data.stop_tracing_total, 2008 .max = NULL, 2009 .min = NULL, 2010 }; 2011 2012 #ifdef CONFIG_TIMERLAT_TRACER 2013 /* 2014 * osnoise/print_stack: print the stacktrace of the IRQ handler if the total 2015 * latency is higher than val. 2016 */ 2017 static struct trace_min_max_param osnoise_print_stack = { 2018 .lock = &interface_lock, 2019 .val = &osnoise_data.print_stack, 2020 .max = NULL, 2021 .min = NULL, 2022 }; 2023 2024 /* 2025 * osnoise/timerlat_period: min 100 us, max 1 s 2026 */ 2027 u64 timerlat_min_period = 100; 2028 u64 timerlat_max_period = 1000000; 2029 static struct trace_min_max_param timerlat_period = { 2030 .lock = &interface_lock, 2031 .val = &osnoise_data.timerlat_period, 2032 .max = &timerlat_max_period, 2033 .min = &timerlat_min_period, 2034 }; 2035 #endif 2036 2037 static const struct file_operations cpus_fops = { 2038 .open = tracing_open_generic, 2039 .read = osnoise_cpus_read, 2040 .write = osnoise_cpus_write, 2041 .llseek = generic_file_llseek, 2042 }; 2043 2044 #ifdef CONFIG_TIMERLAT_TRACER 2045 #ifdef CONFIG_STACKTRACE 2046 static int init_timerlat_stack_tracefs(struct dentry *top_dir) 2047 { 2048 struct dentry *tmp; 2049 2050 tmp = tracefs_create_file("print_stack", TRACE_MODE_WRITE, top_dir, 2051 &osnoise_print_stack, &trace_min_max_fops); 2052 if (!tmp) 2053 return -ENOMEM; 2054 2055 return 0; 2056 } 2057 #else /* CONFIG_STACKTRACE */ 2058 static int init_timerlat_stack_tracefs(struct dentry *top_dir) 2059 { 2060 return 0; 2061 } 2062 #endif /* CONFIG_STACKTRACE */ 2063 2064 /* 2065 * init_timerlat_tracefs - A function to initialize the timerlat interface files 2066 */ 2067 static int init_timerlat_tracefs(struct dentry *top_dir) 2068 { 2069 struct dentry *tmp; 2070 2071 tmp = tracefs_create_file("timerlat_period_us", TRACE_MODE_WRITE, top_dir, 2072 &timerlat_period, &trace_min_max_fops); 2073 if (!tmp) 2074 return -ENOMEM; 2075 2076 return init_timerlat_stack_tracefs(top_dir); 2077 } 2078 #else /* CONFIG_TIMERLAT_TRACER */ 2079 static int init_timerlat_tracefs(struct dentry *top_dir) 2080 { 2081 return 0; 2082 } 2083 #endif /* CONFIG_TIMERLAT_TRACER */ 2084 2085 /* 2086 * init_tracefs - A function to initialize the tracefs interface files 2087 * 2088 * This function creates entries in tracefs for "osnoise" and "timerlat". 2089 * It creates these directories in the tracing directory, and within that 2090 * directory the use can change and view the configs. 2091 */ 2092 static int init_tracefs(void) 2093 { 2094 struct dentry *top_dir; 2095 struct dentry *tmp; 2096 int ret; 2097 2098 ret = tracing_init_dentry(); 2099 if (ret) 2100 return -ENOMEM; 2101 2102 top_dir = tracefs_create_dir("osnoise", NULL); 2103 if (!top_dir) 2104 return 0; 2105 2106 tmp = tracefs_create_file("period_us", TRACE_MODE_WRITE, top_dir, 2107 &osnoise_period, &trace_min_max_fops); 2108 if (!tmp) 2109 goto err; 2110 2111 tmp = tracefs_create_file("runtime_us", TRACE_MODE_WRITE, top_dir, 2112 &osnoise_runtime, &trace_min_max_fops); 2113 if (!tmp) 2114 goto err; 2115 2116 tmp = tracefs_create_file("stop_tracing_us", TRACE_MODE_WRITE, top_dir, 2117 &osnoise_stop_tracing_in, &trace_min_max_fops); 2118 if (!tmp) 2119 goto err; 2120 2121 tmp = tracefs_create_file("stop_tracing_total_us", TRACE_MODE_WRITE, top_dir, 2122 &osnoise_stop_tracing_total, &trace_min_max_fops); 2123 if (!tmp) 2124 goto err; 2125 2126 tmp = trace_create_file("cpus", TRACE_MODE_WRITE, top_dir, NULL, &cpus_fops); 2127 if (!tmp) 2128 goto err; 2129 2130 ret = init_timerlat_tracefs(top_dir); 2131 if (ret) 2132 goto err; 2133 2134 return 0; 2135 2136 err: 2137 tracefs_remove(top_dir); 2138 return -ENOMEM; 2139 } 2140 2141 static int osnoise_hook_events(void) 2142 { 2143 int retval; 2144 2145 /* 2146 * Trace is already hooked, we are re-enabling from 2147 * a stop_tracing_*. 2148 */ 2149 if (trace_osnoise_callback_enabled) 2150 return 0; 2151 2152 retval = hook_irq_events(); 2153 if (retval) 2154 return -EINVAL; 2155 2156 retval = hook_softirq_events(); 2157 if (retval) 2158 goto out_unhook_irq; 2159 2160 retval = hook_thread_events(); 2161 /* 2162 * All fine! 2163 */ 2164 if (!retval) 2165 return 0; 2166 2167 unhook_softirq_events(); 2168 out_unhook_irq: 2169 unhook_irq_events(); 2170 return -EINVAL; 2171 } 2172 2173 static void osnoise_unhook_events(void) 2174 { 2175 unhook_thread_events(); 2176 unhook_softirq_events(); 2177 unhook_irq_events(); 2178 } 2179 2180 /* 2181 * osnoise_workload_start - start the workload and hook to events 2182 */ 2183 static int osnoise_workload_start(void) 2184 { 2185 int retval; 2186 2187 /* 2188 * Instances need to be registered after calling workload 2189 * start. Hence, if there is already an instance, the 2190 * workload was already registered. Otherwise, this 2191 * code is on the way to register the first instance, 2192 * and the workload will start. 2193 */ 2194 if (osnoise_has_registered_instances()) 2195 return 0; 2196 2197 osn_var_reset_all(); 2198 2199 retval = osnoise_hook_events(); 2200 if (retval) 2201 return retval; 2202 2203 /* 2204 * Make sure that ftrace_nmi_enter/exit() see reset values 2205 * before enabling trace_osnoise_callback_enabled. 2206 */ 2207 barrier(); 2208 trace_osnoise_callback_enabled = true; 2209 2210 retval = start_per_cpu_kthreads(); 2211 if (retval) { 2212 trace_osnoise_callback_enabled = false; 2213 /* 2214 * Make sure that ftrace_nmi_enter/exit() see 2215 * trace_osnoise_callback_enabled as false before continuing. 2216 */ 2217 barrier(); 2218 2219 osnoise_unhook_events(); 2220 return retval; 2221 } 2222 2223 return 0; 2224 } 2225 2226 /* 2227 * osnoise_workload_stop - stop the workload and unhook the events 2228 */ 2229 static void osnoise_workload_stop(void) 2230 { 2231 /* 2232 * Instances need to be unregistered before calling 2233 * stop. Hence, if there is a registered instance, more 2234 * than one instance is running, and the workload will not 2235 * yet stop. Otherwise, this code is on the way to disable 2236 * the last instance, and the workload can stop. 2237 */ 2238 if (osnoise_has_registered_instances()) 2239 return; 2240 2241 /* 2242 * If callbacks were already disabled in a previous stop 2243 * call, there is no need to disable then again. 2244 * 2245 * For instance, this happens when tracing is stopped via: 2246 * echo 0 > tracing_on 2247 * echo nop > current_tracer. 2248 */ 2249 if (!trace_osnoise_callback_enabled) 2250 return; 2251 2252 trace_osnoise_callback_enabled = false; 2253 /* 2254 * Make sure that ftrace_nmi_enter/exit() see 2255 * trace_osnoise_callback_enabled as false before continuing. 2256 */ 2257 barrier(); 2258 2259 stop_per_cpu_kthreads(); 2260 2261 osnoise_unhook_events(); 2262 } 2263 2264 static void osnoise_tracer_start(struct trace_array *tr) 2265 { 2266 int retval; 2267 2268 /* 2269 * If the instance is already registered, there is no need to 2270 * register it again. 2271 */ 2272 if (osnoise_instance_registered(tr)) 2273 return; 2274 2275 retval = osnoise_workload_start(); 2276 if (retval) 2277 pr_err(BANNER "Error starting osnoise tracer\n"); 2278 2279 osnoise_register_instance(tr); 2280 } 2281 2282 static void osnoise_tracer_stop(struct trace_array *tr) 2283 { 2284 osnoise_unregister_instance(tr); 2285 osnoise_workload_stop(); 2286 } 2287 2288 static int osnoise_tracer_init(struct trace_array *tr) 2289 { 2290 /* 2291 * Only allow osnoise tracer if timerlat tracer is not running 2292 * already. 2293 */ 2294 if (timerlat_enabled()) 2295 return -EBUSY; 2296 2297 tr->max_latency = 0; 2298 2299 osnoise_tracer_start(tr); 2300 return 0; 2301 } 2302 2303 static void osnoise_tracer_reset(struct trace_array *tr) 2304 { 2305 osnoise_tracer_stop(tr); 2306 } 2307 2308 static struct tracer osnoise_tracer __read_mostly = { 2309 .name = "osnoise", 2310 .init = osnoise_tracer_init, 2311 .reset = osnoise_tracer_reset, 2312 .start = osnoise_tracer_start, 2313 .stop = osnoise_tracer_stop, 2314 .print_header = print_osnoise_headers, 2315 .allow_instances = true, 2316 }; 2317 2318 #ifdef CONFIG_TIMERLAT_TRACER 2319 static void timerlat_tracer_start(struct trace_array *tr) 2320 { 2321 int retval; 2322 2323 /* 2324 * If the instance is already registered, there is no need to 2325 * register it again. 2326 */ 2327 if (osnoise_instance_registered(tr)) 2328 return; 2329 2330 retval = osnoise_workload_start(); 2331 if (retval) 2332 pr_err(BANNER "Error starting timerlat tracer\n"); 2333 2334 osnoise_register_instance(tr); 2335 2336 return; 2337 } 2338 2339 static void timerlat_tracer_stop(struct trace_array *tr) 2340 { 2341 int cpu; 2342 2343 osnoise_unregister_instance(tr); 2344 2345 /* 2346 * Instruct the threads to stop only if this is the last instance. 2347 */ 2348 if (!osnoise_has_registered_instances()) { 2349 for_each_online_cpu(cpu) 2350 per_cpu(per_cpu_osnoise_var, cpu).sampling = 0; 2351 } 2352 2353 osnoise_workload_stop(); 2354 } 2355 2356 static int timerlat_tracer_init(struct trace_array *tr) 2357 { 2358 /* 2359 * Only allow timerlat tracer if osnoise tracer is not running already. 2360 */ 2361 if (osnoise_has_registered_instances() && !osnoise_data.timerlat_tracer) 2362 return -EBUSY; 2363 2364 /* 2365 * If this is the first instance, set timerlat_tracer to block 2366 * osnoise tracer start. 2367 */ 2368 if (!osnoise_has_registered_instances()) 2369 osnoise_data.timerlat_tracer = 1; 2370 2371 tr->max_latency = 0; 2372 timerlat_tracer_start(tr); 2373 2374 return 0; 2375 } 2376 2377 static void timerlat_tracer_reset(struct trace_array *tr) 2378 { 2379 timerlat_tracer_stop(tr); 2380 2381 /* 2382 * If this is the last instance, reset timerlat_tracer allowing 2383 * osnoise to be started. 2384 */ 2385 if (!osnoise_has_registered_instances()) 2386 osnoise_data.timerlat_tracer = 0; 2387 } 2388 2389 static struct tracer timerlat_tracer __read_mostly = { 2390 .name = "timerlat", 2391 .init = timerlat_tracer_init, 2392 .reset = timerlat_tracer_reset, 2393 .start = timerlat_tracer_start, 2394 .stop = timerlat_tracer_stop, 2395 .print_header = print_timerlat_headers, 2396 .allow_instances = true, 2397 }; 2398 2399 __init static int init_timerlat_tracer(void) 2400 { 2401 return register_tracer(&timerlat_tracer); 2402 } 2403 #else /* CONFIG_TIMERLAT_TRACER */ 2404 __init static int init_timerlat_tracer(void) 2405 { 2406 return 0; 2407 } 2408 #endif /* CONFIG_TIMERLAT_TRACER */ 2409 2410 __init static int init_osnoise_tracer(void) 2411 { 2412 int ret; 2413 2414 mutex_init(&interface_lock); 2415 2416 cpumask_copy(&osnoise_cpumask, cpu_all_mask); 2417 2418 ret = register_tracer(&osnoise_tracer); 2419 if (ret) { 2420 pr_err(BANNER "Error registering osnoise!\n"); 2421 return ret; 2422 } 2423 2424 ret = init_timerlat_tracer(); 2425 if (ret) { 2426 pr_err(BANNER "Error registering timerlat!\n"); 2427 return ret; 2428 } 2429 2430 osnoise_init_hotplug_support(); 2431 2432 INIT_LIST_HEAD_RCU(&osnoise_instances); 2433 2434 init_tracefs(); 2435 2436 return 0; 2437 } 2438 late_initcall(init_osnoise_tracer); 2439