1bcea3f96SSteven Rostedt (VMware) // SPDX-License-Identifier: GPL-2.0 2e7c15cd8SSteven Rostedt (Red Hat) /* 30c3c86bdSSrivatsa S. Bhat (VMware) * trace_hwlat.c - A simple Hardware Latency detector. 4e7c15cd8SSteven Rostedt (Red Hat) * 5e7c15cd8SSteven Rostedt (Red Hat) * Use this tracer to detect large system latencies induced by the behavior of 6e7c15cd8SSteven Rostedt (Red Hat) * certain underlying system hardware or firmware, independent of Linux itself. 7e7c15cd8SSteven Rostedt (Red Hat) * The code was developed originally to detect the presence of SMIs on Intel 8e7c15cd8SSteven Rostedt (Red Hat) * and AMD systems, although there is no dependency upon x86 herein. 9e7c15cd8SSteven Rostedt (Red Hat) * 10e7c15cd8SSteven Rostedt (Red Hat) * The classical example usage of this tracer is in detecting the presence of 11e7c15cd8SSteven Rostedt (Red Hat) * SMIs or System Management Interrupts on Intel and AMD systems. An SMI is a 12e7c15cd8SSteven Rostedt (Red Hat) * somewhat special form of hardware interrupt spawned from earlier CPU debug 13e7c15cd8SSteven Rostedt (Red Hat) * modes in which the (BIOS/EFI/etc.) firmware arranges for the South Bridge 14e7c15cd8SSteven Rostedt (Red Hat) * LPC (or other device) to generate a special interrupt under certain 15e7c15cd8SSteven Rostedt (Red Hat) * circumstances, for example, upon expiration of a special SMI timer device, 16e7c15cd8SSteven Rostedt (Red Hat) * due to certain external thermal readings, on certain I/O address accesses, 17e7c15cd8SSteven Rostedt (Red Hat) * and other situations. An SMI hits a special CPU pin, triggers a special 18e7c15cd8SSteven Rostedt (Red Hat) * SMI mode (complete with special memory map), and the OS is unaware. 19e7c15cd8SSteven Rostedt (Red Hat) * 20e7c15cd8SSteven Rostedt (Red Hat) * Although certain hardware-inducing latencies are necessary (for example, 21e7c15cd8SSteven Rostedt (Red Hat) * a modern system often requires an SMI handler for correct thermal control 22e7c15cd8SSteven Rostedt (Red Hat) * and remote management) they can wreak havoc upon any OS-level performance 23e7c15cd8SSteven Rostedt (Red Hat) * guarantees toward low-latency, especially when the OS is not even made 24e7c15cd8SSteven Rostedt (Red Hat) * aware of the presence of these interrupts. For this reason, we need a 25e7c15cd8SSteven Rostedt (Red Hat) * somewhat brute force mechanism to detect these interrupts. In this case, 26e7c15cd8SSteven Rostedt (Red Hat) * we do it by hogging all of the CPU(s) for configurable timer intervals, 27e7c15cd8SSteven Rostedt (Red Hat) * sampling the built-in CPU timer, looking for discontiguous readings. 28e7c15cd8SSteven Rostedt (Red Hat) * 29e7c15cd8SSteven Rostedt (Red Hat) * WARNING: This implementation necessarily introduces latencies. Therefore, 30e7c15cd8SSteven Rostedt (Red Hat) * you should NEVER use this tracer while running in a production 31e7c15cd8SSteven Rostedt (Red Hat) * environment requiring any kind of low-latency performance 32e7c15cd8SSteven Rostedt (Red Hat) * guarantee(s). 33e7c15cd8SSteven Rostedt (Red Hat) * 34e7c15cd8SSteven Rostedt (Red Hat) * Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. <jcm@redhat.com> 35e7c15cd8SSteven Rostedt (Red Hat) * Copyright (C) 2013-2016 Steven Rostedt, Red Hat, Inc. <srostedt@redhat.com> 36e7c15cd8SSteven Rostedt (Red Hat) * 37bb1b24cfSDaniel Bristot de Oliveira * Includes useful feedback from Clark Williams <williams@redhat.com> 38e7c15cd8SSteven Rostedt (Red Hat) * 39e7c15cd8SSteven Rostedt (Red Hat) */ 40e7c15cd8SSteven Rostedt (Red Hat) #include <linux/kthread.h> 41e7c15cd8SSteven Rostedt (Red Hat) #include <linux/tracefs.h> 42e7c15cd8SSteven Rostedt (Red Hat) #include <linux/uaccess.h> 430330f7aaSSteven Rostedt (Red Hat) #include <linux/cpumask.h> 44e7c15cd8SSteven Rostedt (Red Hat) #include <linux/delay.h> 45e6017571SIngo Molnar #include <linux/sched/clock.h> 46e7c15cd8SSteven Rostedt (Red Hat) #include "trace.h" 47e7c15cd8SSteven Rostedt (Red Hat) 48e7c15cd8SSteven Rostedt (Red Hat) static struct trace_array *hwlat_trace; 49e7c15cd8SSteven Rostedt (Red Hat) 50e7c15cd8SSteven Rostedt (Red Hat) #define U64STR_SIZE 22 /* 20 digits max */ 51e7c15cd8SSteven Rostedt (Red Hat) 52e7c15cd8SSteven Rostedt (Red Hat) #define BANNER "hwlat_detector: " 53e7c15cd8SSteven Rostedt (Red Hat) #define DEFAULT_SAMPLE_WINDOW 1000000 /* 1s */ 54e7c15cd8SSteven Rostedt (Red Hat) #define DEFAULT_SAMPLE_WIDTH 500000 /* 0.5s */ 55e7c15cd8SSteven Rostedt (Red Hat) #define DEFAULT_LAT_THRESHOLD 10 /* 10us */ 56e7c15cd8SSteven Rostedt (Red Hat) 57e7c15cd8SSteven Rostedt (Red Hat) static struct dentry *hwlat_sample_width; /* sample width us */ 58e7c15cd8SSteven Rostedt (Red Hat) static struct dentry *hwlat_sample_window; /* sample window us */ 598fa826b7SDaniel Bristot de Oliveira static struct dentry *hwlat_thread_mode; /* hwlat thread mode */ 608fa826b7SDaniel Bristot de Oliveira 618fa826b7SDaniel Bristot de Oliveira enum { 628fa826b7SDaniel Bristot de Oliveira MODE_NONE = 0, 638fa826b7SDaniel Bristot de Oliveira MODE_ROUND_ROBIN, 64f46b1652SDaniel Bristot de Oliveira MODE_PER_CPU, 658fa826b7SDaniel Bristot de Oliveira MODE_MAX 668fa826b7SDaniel Bristot de Oliveira }; 67f46b1652SDaniel Bristot de Oliveira static char *thread_mode_str[] = { "none", "round-robin", "per-cpu" }; 68e7c15cd8SSteven Rostedt (Red Hat) 69e7c15cd8SSteven Rostedt (Red Hat) /* Save the previous tracing_thresh value */ 70e7c15cd8SSteven Rostedt (Red Hat) static unsigned long save_tracing_thresh; 71e7c15cd8SSteven Rostedt (Red Hat) 72f46b1652SDaniel Bristot de Oliveira /* runtime kthread data */ 73f46b1652SDaniel Bristot de Oliveira struct hwlat_kthread_data { 74f46b1652SDaniel Bristot de Oliveira struct task_struct *kthread; 757b2c8625SSteven Rostedt (Red Hat) /* NMI timestamp counters */ 76f46b1652SDaniel Bristot de Oliveira u64 nmi_ts_start; 77f46b1652SDaniel Bristot de Oliveira u64 nmi_total_ts; 78f46b1652SDaniel Bristot de Oliveira int nmi_count; 79f46b1652SDaniel Bristot de Oliveira int nmi_cpu; 80f46b1652SDaniel Bristot de Oliveira }; 81f46b1652SDaniel Bristot de Oliveira 82*1d628891SWang ShaoBo static struct hwlat_kthread_data hwlat_single_cpu_data; 83*1d628891SWang ShaoBo static DEFINE_PER_CPU(struct hwlat_kthread_data, hwlat_per_cpu_data); 847b2c8625SSteven Rostedt (Red Hat) 857b2c8625SSteven Rostedt (Red Hat) /* Tells NMIs to call back to the hwlat tracer to record timestamps */ 867b2c8625SSteven Rostedt (Red Hat) bool trace_hwlat_callback_enabled; 877b2c8625SSteven Rostedt (Red Hat) 88e7c15cd8SSteven Rostedt (Red Hat) /* If the user changed threshold, remember it */ 89e7c15cd8SSteven Rostedt (Red Hat) static u64 last_tracing_thresh = DEFAULT_LAT_THRESHOLD * NSEC_PER_USEC; 90e7c15cd8SSteven Rostedt (Red Hat) 91e7c15cd8SSteven Rostedt (Red Hat) /* Individual latency samples are stored here when detected. */ 92e7c15cd8SSteven Rostedt (Red Hat) struct hwlat_sample { 93e7c15cd8SSteven Rostedt (Red Hat) u64 seqnum; /* unique sequence */ 94e7c15cd8SSteven Rostedt (Red Hat) u64 duration; /* delta */ 95e7c15cd8SSteven Rostedt (Red Hat) u64 outer_duration; /* delta (outer loop) */ 967b2c8625SSteven Rostedt (Red Hat) u64 nmi_total_ts; /* Total time spent in NMIs */ 9751aad0aeSDeepa Dinamani struct timespec64 timestamp; /* wall time */ 987b2c8625SSteven Rostedt (Red Hat) int nmi_count; /* # NMIs during this sample */ 99f2cc020dSIngo Molnar int count; /* # of iterations over thresh */ 100e7c15cd8SSteven Rostedt (Red Hat) }; 101e7c15cd8SSteven Rostedt (Red Hat) 102e7c15cd8SSteven Rostedt (Red Hat) /* keep the global state somewhere. */ 103e7c15cd8SSteven Rostedt (Red Hat) static struct hwlat_data { 104e7c15cd8SSteven Rostedt (Red Hat) 105e7c15cd8SSteven Rostedt (Red Hat) struct mutex lock; /* protect changes */ 106e7c15cd8SSteven Rostedt (Red Hat) 107e7c15cd8SSteven Rostedt (Red Hat) u64 count; /* total since reset */ 108e7c15cd8SSteven Rostedt (Red Hat) 109e7c15cd8SSteven Rostedt (Red Hat) u64 sample_window; /* total sampling window (on+off) */ 110e7c15cd8SSteven Rostedt (Red Hat) u64 sample_width; /* active sampling portion of window */ 111e7c15cd8SSteven Rostedt (Red Hat) 1128fa826b7SDaniel Bristot de Oliveira int thread_mode; /* thread mode */ 1138fa826b7SDaniel Bristot de Oliveira 114e7c15cd8SSteven Rostedt (Red Hat) } hwlat_data = { 115e7c15cd8SSteven Rostedt (Red Hat) .sample_window = DEFAULT_SAMPLE_WINDOW, 116e7c15cd8SSteven Rostedt (Red Hat) .sample_width = DEFAULT_SAMPLE_WIDTH, 1178fa826b7SDaniel Bristot de Oliveira .thread_mode = MODE_ROUND_ROBIN 118e7c15cd8SSteven Rostedt (Red Hat) }; 119e7c15cd8SSteven Rostedt (Red Hat) 120f46b1652SDaniel Bristot de Oliveira static struct hwlat_kthread_data *get_cpu_data(void) 121f46b1652SDaniel Bristot de Oliveira { 122f46b1652SDaniel Bristot de Oliveira if (hwlat_data.thread_mode == MODE_PER_CPU) 123f46b1652SDaniel Bristot de Oliveira return this_cpu_ptr(&hwlat_per_cpu_data); 124f46b1652SDaniel Bristot de Oliveira else 125f46b1652SDaniel Bristot de Oliveira return &hwlat_single_cpu_data; 126f46b1652SDaniel Bristot de Oliveira } 127f46b1652SDaniel Bristot de Oliveira 1288fa826b7SDaniel Bristot de Oliveira static bool hwlat_busy; 1298fa826b7SDaniel Bristot de Oliveira 130e7c15cd8SSteven Rostedt (Red Hat) static void trace_hwlat_sample(struct hwlat_sample *sample) 131e7c15cd8SSteven Rostedt (Red Hat) { 132e7c15cd8SSteven Rostedt (Red Hat) struct trace_array *tr = hwlat_trace; 133e7c15cd8SSteven Rostedt (Red Hat) struct trace_event_call *call = &event_hwlat; 13413292494SSteven Rostedt (VMware) struct trace_buffer *buffer = tr->array_buffer.buffer; 135e7c15cd8SSteven Rostedt (Red Hat) struct ring_buffer_event *event; 136e7c15cd8SSteven Rostedt (Red Hat) struct hwlat_entry *entry; 137e7c15cd8SSteven Rostedt (Red Hat) 138e7c15cd8SSteven Rostedt (Red Hat) event = trace_buffer_lock_reserve(buffer, TRACE_HWLAT, sizeof(*entry), 13936590c50SSebastian Andrzej Siewior tracing_gen_ctx()); 140e7c15cd8SSteven Rostedt (Red Hat) if (!event) 141e7c15cd8SSteven Rostedt (Red Hat) return; 142e7c15cd8SSteven Rostedt (Red Hat) entry = ring_buffer_event_data(event); 143e7c15cd8SSteven Rostedt (Red Hat) entry->seqnum = sample->seqnum; 144e7c15cd8SSteven Rostedt (Red Hat) entry->duration = sample->duration; 145e7c15cd8SSteven Rostedt (Red Hat) entry->outer_duration = sample->outer_duration; 146e7c15cd8SSteven Rostedt (Red Hat) entry->timestamp = sample->timestamp; 1477b2c8625SSteven Rostedt (Red Hat) entry->nmi_total_ts = sample->nmi_total_ts; 1487b2c8625SSteven Rostedt (Red Hat) entry->nmi_count = sample->nmi_count; 149b396bfdeSSteven Rostedt (VMware) entry->count = sample->count; 150e7c15cd8SSteven Rostedt (Red Hat) 151e7c15cd8SSteven Rostedt (Red Hat) if (!call_filter_check_discard(call, entry, buffer, event)) 15252ffabe3SSteven Rostedt (Red Hat) trace_buffer_unlock_commit_nostack(buffer, event); 153e7c15cd8SSteven Rostedt (Red Hat) } 154e7c15cd8SSteven Rostedt (Red Hat) 155e7c15cd8SSteven Rostedt (Red Hat) /* Macros to encapsulate the time capturing infrastructure */ 156e7c15cd8SSteven Rostedt (Red Hat) #define time_type u64 157e7c15cd8SSteven Rostedt (Red Hat) #define time_get() trace_clock_local() 158e7c15cd8SSteven Rostedt (Red Hat) #define time_to_us(x) div_u64(x, 1000) 159e7c15cd8SSteven Rostedt (Red Hat) #define time_sub(a, b) ((a) - (b)) 160e7c15cd8SSteven Rostedt (Red Hat) #define init_time(a, b) (a = b) 161e7c15cd8SSteven Rostedt (Red Hat) #define time_u64(a) a 162e7c15cd8SSteven Rostedt (Red Hat) 1637b2c8625SSteven Rostedt (Red Hat) void trace_hwlat_callback(bool enter) 1647b2c8625SSteven Rostedt (Red Hat) { 165f46b1652SDaniel Bristot de Oliveira struct hwlat_kthread_data *kdata = get_cpu_data(); 166f46b1652SDaniel Bristot de Oliveira 167f46b1652SDaniel Bristot de Oliveira if (!kdata->kthread) 1687b2c8625SSteven Rostedt (Red Hat) return; 1697b2c8625SSteven Rostedt (Red Hat) 1707b2c8625SSteven Rostedt (Red Hat) /* 1717b2c8625SSteven Rostedt (Red Hat) * Currently trace_clock_local() calls sched_clock() and the 1727b2c8625SSteven Rostedt (Red Hat) * generic version is not NMI safe. 1737b2c8625SSteven Rostedt (Red Hat) */ 1747b2c8625SSteven Rostedt (Red Hat) if (!IS_ENABLED(CONFIG_GENERIC_SCHED_CLOCK)) { 1757b2c8625SSteven Rostedt (Red Hat) if (enter) 176f46b1652SDaniel Bristot de Oliveira kdata->nmi_ts_start = time_get(); 1777b2c8625SSteven Rostedt (Red Hat) else 178f46b1652SDaniel Bristot de Oliveira kdata->nmi_total_ts += time_get() - kdata->nmi_ts_start; 1797b2c8625SSteven Rostedt (Red Hat) } 1807b2c8625SSteven Rostedt (Red Hat) 1817b2c8625SSteven Rostedt (Red Hat) if (enter) 182f46b1652SDaniel Bristot de Oliveira kdata->nmi_count++; 1837b2c8625SSteven Rostedt (Red Hat) } 1847b2c8625SSteven Rostedt (Red Hat) 185aa892f8cSDaniel Bristot de Oliveira /* 186aa892f8cSDaniel Bristot de Oliveira * hwlat_err - report a hwlat error. 187aa892f8cSDaniel Bristot de Oliveira */ 188aa892f8cSDaniel Bristot de Oliveira #define hwlat_err(msg) ({ \ 189aa892f8cSDaniel Bristot de Oliveira struct trace_array *tr = hwlat_trace; \ 190aa892f8cSDaniel Bristot de Oliveira \ 191aa892f8cSDaniel Bristot de Oliveira trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_, msg); \ 192aa892f8cSDaniel Bristot de Oliveira }) 193aa892f8cSDaniel Bristot de Oliveira 194e7c15cd8SSteven Rostedt (Red Hat) /** 195e7c15cd8SSteven Rostedt (Red Hat) * get_sample - sample the CPU TSC and look for likely hardware latencies 196e7c15cd8SSteven Rostedt (Red Hat) * 197e7c15cd8SSteven Rostedt (Red Hat) * Used to repeatedly capture the CPU TSC (or similar), looking for potential 198e7c15cd8SSteven Rostedt (Red Hat) * hardware-induced latency. Called with interrupts disabled and with 199e7c15cd8SSteven Rostedt (Red Hat) * hwlat_data.lock held. 200e7c15cd8SSteven Rostedt (Red Hat) */ 201e7c15cd8SSteven Rostedt (Red Hat) static int get_sample(void) 202e7c15cd8SSteven Rostedt (Red Hat) { 203f46b1652SDaniel Bristot de Oliveira struct hwlat_kthread_data *kdata = get_cpu_data(); 204e7c15cd8SSteven Rostedt (Red Hat) struct trace_array *tr = hwlat_trace; 205b396bfdeSSteven Rostedt (VMware) struct hwlat_sample s; 206e7c15cd8SSteven Rostedt (Red Hat) time_type start, t1, t2, last_t2; 207b396bfdeSSteven Rostedt (VMware) s64 diff, outer_diff, total, last_total = 0; 208e7c15cd8SSteven Rostedt (Red Hat) u64 sample = 0; 209e7c15cd8SSteven Rostedt (Red Hat) u64 thresh = tracing_thresh; 210e7c15cd8SSteven Rostedt (Red Hat) u64 outer_sample = 0; 211e7c15cd8SSteven Rostedt (Red Hat) int ret = -1; 212b396bfdeSSteven Rostedt (VMware) unsigned int count = 0; 213e7c15cd8SSteven Rostedt (Red Hat) 214e7c15cd8SSteven Rostedt (Red Hat) do_div(thresh, NSEC_PER_USEC); /* modifies interval value */ 215e7c15cd8SSteven Rostedt (Red Hat) 216f46b1652SDaniel Bristot de Oliveira kdata->nmi_total_ts = 0; 217f46b1652SDaniel Bristot de Oliveira kdata->nmi_count = 0; 2187b2c8625SSteven Rostedt (Red Hat) /* Make sure NMIs see this first */ 2197b2c8625SSteven Rostedt (Red Hat) barrier(); 2207b2c8625SSteven Rostedt (Red Hat) 2217b2c8625SSteven Rostedt (Red Hat) trace_hwlat_callback_enabled = true; 2227b2c8625SSteven Rostedt (Red Hat) 223e7c15cd8SSteven Rostedt (Red Hat) init_time(last_t2, 0); 224e7c15cd8SSteven Rostedt (Red Hat) start = time_get(); /* start timestamp */ 225b396bfdeSSteven Rostedt (VMware) outer_diff = 0; 226e7c15cd8SSteven Rostedt (Red Hat) 227e7c15cd8SSteven Rostedt (Red Hat) do { 228e7c15cd8SSteven Rostedt (Red Hat) 229e7c15cd8SSteven Rostedt (Red Hat) t1 = time_get(); /* we'll look for a discontinuity */ 230e7c15cd8SSteven Rostedt (Red Hat) t2 = time_get(); 231e7c15cd8SSteven Rostedt (Red Hat) 232e7c15cd8SSteven Rostedt (Red Hat) if (time_u64(last_t2)) { 233e7c15cd8SSteven Rostedt (Red Hat) /* Check the delta from outer loop (t2 to next t1) */ 234b396bfdeSSteven Rostedt (VMware) outer_diff = time_to_us(time_sub(t1, last_t2)); 235e7c15cd8SSteven Rostedt (Red Hat) /* This shouldn't happen */ 236b396bfdeSSteven Rostedt (VMware) if (outer_diff < 0) { 237aa892f8cSDaniel Bristot de Oliveira hwlat_err(BANNER "time running backwards\n"); 238e7c15cd8SSteven Rostedt (Red Hat) goto out; 239e7c15cd8SSteven Rostedt (Red Hat) } 240b396bfdeSSteven Rostedt (VMware) if (outer_diff > outer_sample) 241b396bfdeSSteven Rostedt (VMware) outer_sample = outer_diff; 242e7c15cd8SSteven Rostedt (Red Hat) } 243e7c15cd8SSteven Rostedt (Red Hat) last_t2 = t2; 244e7c15cd8SSteven Rostedt (Red Hat) 245e7c15cd8SSteven Rostedt (Red Hat) total = time_to_us(time_sub(t2, start)); /* sample width */ 246e7c15cd8SSteven Rostedt (Red Hat) 247e7c15cd8SSteven Rostedt (Red Hat) /* Check for possible overflows */ 248e7c15cd8SSteven Rostedt (Red Hat) if (total < last_total) { 249aa892f8cSDaniel Bristot de Oliveira hwlat_err("Time total overflowed\n"); 250e7c15cd8SSteven Rostedt (Red Hat) break; 251e7c15cd8SSteven Rostedt (Red Hat) } 252e7c15cd8SSteven Rostedt (Red Hat) last_total = total; 253e7c15cd8SSteven Rostedt (Red Hat) 254e7c15cd8SSteven Rostedt (Red Hat) /* This checks the inner loop (t1 to t2) */ 255e7c15cd8SSteven Rostedt (Red Hat) diff = time_to_us(time_sub(t2, t1)); /* current diff */ 256e7c15cd8SSteven Rostedt (Red Hat) 257b396bfdeSSteven Rostedt (VMware) if (diff > thresh || outer_diff > thresh) { 258b396bfdeSSteven Rostedt (VMware) if (!count) 259b396bfdeSSteven Rostedt (VMware) ktime_get_real_ts64(&s.timestamp); 260b396bfdeSSteven Rostedt (VMware) count++; 261b396bfdeSSteven Rostedt (VMware) } 262b396bfdeSSteven Rostedt (VMware) 263e7c15cd8SSteven Rostedt (Red Hat) /* This shouldn't happen */ 264e7c15cd8SSteven Rostedt (Red Hat) if (diff < 0) { 265aa892f8cSDaniel Bristot de Oliveira hwlat_err(BANNER "time running backwards\n"); 266e7c15cd8SSteven Rostedt (Red Hat) goto out; 267e7c15cd8SSteven Rostedt (Red Hat) } 268e7c15cd8SSteven Rostedt (Red Hat) 269e7c15cd8SSteven Rostedt (Red Hat) if (diff > sample) 270e7c15cd8SSteven Rostedt (Red Hat) sample = diff; /* only want highest value */ 271e7c15cd8SSteven Rostedt (Red Hat) 272e7c15cd8SSteven Rostedt (Red Hat) } while (total <= hwlat_data.sample_width); 273e7c15cd8SSteven Rostedt (Red Hat) 2747b2c8625SSteven Rostedt (Red Hat) barrier(); /* finish the above in the view for NMIs */ 2757b2c8625SSteven Rostedt (Red Hat) trace_hwlat_callback_enabled = false; 2767b2c8625SSteven Rostedt (Red Hat) barrier(); /* Make sure nmi_total_ts is no longer updated */ 2777b2c8625SSteven Rostedt (Red Hat) 278e7c15cd8SSteven Rostedt (Red Hat) ret = 0; 279e7c15cd8SSteven Rostedt (Red Hat) 280e7c15cd8SSteven Rostedt (Red Hat) /* If we exceed the threshold value, we have found a hardware latency */ 281e7c15cd8SSteven Rostedt (Red Hat) if (sample > thresh || outer_sample > thresh) { 28291edde2eSViktor Rosendahl (BMW) u64 latency; 283e7c15cd8SSteven Rostedt (Red Hat) 284e7c15cd8SSteven Rostedt (Red Hat) ret = 1; 285e7c15cd8SSteven Rostedt (Red Hat) 2867b2c8625SSteven Rostedt (Red Hat) /* We read in microseconds */ 287f46b1652SDaniel Bristot de Oliveira if (kdata->nmi_total_ts) 288f46b1652SDaniel Bristot de Oliveira do_div(kdata->nmi_total_ts, NSEC_PER_USEC); 2897b2c8625SSteven Rostedt (Red Hat) 290e7c15cd8SSteven Rostedt (Red Hat) hwlat_data.count++; 291e7c15cd8SSteven Rostedt (Red Hat) s.seqnum = hwlat_data.count; 292e7c15cd8SSteven Rostedt (Red Hat) s.duration = sample; 293e7c15cd8SSteven Rostedt (Red Hat) s.outer_duration = outer_sample; 294f46b1652SDaniel Bristot de Oliveira s.nmi_total_ts = kdata->nmi_total_ts; 295f46b1652SDaniel Bristot de Oliveira s.nmi_count = kdata->nmi_count; 296b396bfdeSSteven Rostedt (VMware) s.count = count; 297e7c15cd8SSteven Rostedt (Red Hat) trace_hwlat_sample(&s); 298e7c15cd8SSteven Rostedt (Red Hat) 29991edde2eSViktor Rosendahl (BMW) latency = max(sample, outer_sample); 30091edde2eSViktor Rosendahl (BMW) 301e7c15cd8SSteven Rostedt (Red Hat) /* Keep a running maximum ever recorded hardware latency */ 30291edde2eSViktor Rosendahl (BMW) if (latency > tr->max_latency) { 30391edde2eSViktor Rosendahl (BMW) tr->max_latency = latency; 30491edde2eSViktor Rosendahl (BMW) latency_fsnotify(tr); 30591edde2eSViktor Rosendahl (BMW) } 306e7c15cd8SSteven Rostedt (Red Hat) } 307e7c15cd8SSteven Rostedt (Red Hat) 308e7c15cd8SSteven Rostedt (Red Hat) out: 309e7c15cd8SSteven Rostedt (Red Hat) return ret; 310e7c15cd8SSteven Rostedt (Red Hat) } 311e7c15cd8SSteven Rostedt (Red Hat) 3120330f7aaSSteven Rostedt (Red Hat) static struct cpumask save_cpumask; 3130330f7aaSSteven Rostedt (Red Hat) 314f447c196SSteven Rostedt (VMware) static void move_to_next_cpu(void) 3150330f7aaSSteven Rostedt (Red Hat) { 316f447c196SSteven Rostedt (VMware) struct cpumask *current_mask = &save_cpumask; 31796b4833bSKevin Hao struct trace_array *tr = hwlat_trace; 3180330f7aaSSteven Rostedt (Red Hat) int next_cpu; 3190330f7aaSSteven Rostedt (Red Hat) 3200330f7aaSSteven Rostedt (Red Hat) /* 3210330f7aaSSteven Rostedt (Red Hat) * If for some reason the user modifies the CPU affinity 3220c3c86bdSSrivatsa S. Bhat (VMware) * of this thread, then stop migrating for the duration 3230330f7aaSSteven Rostedt (Red Hat) * of the current test. 3240330f7aaSSteven Rostedt (Red Hat) */ 3253bd37062SSebastian Andrzej Siewior if (!cpumask_equal(current_mask, current->cpus_ptr)) 3267bb7d802SDaniel Bristot de Oliveira goto change_mode; 3270330f7aaSSteven Rostedt (Red Hat) 32899c37d1aSSebastian Andrzej Siewior cpus_read_lock(); 32996b4833bSKevin Hao cpumask_and(current_mask, cpu_online_mask, tr->tracing_cpumask); 33051397dc6SSteven Rostedt (VMware) next_cpu = cpumask_next(raw_smp_processor_id(), current_mask); 33199c37d1aSSebastian Andrzej Siewior cpus_read_unlock(); 3320330f7aaSSteven Rostedt (Red Hat) 3330330f7aaSSteven Rostedt (Red Hat) if (next_cpu >= nr_cpu_ids) 3340330f7aaSSteven Rostedt (Red Hat) next_cpu = cpumask_first(current_mask); 3350330f7aaSSteven Rostedt (Red Hat) 3360330f7aaSSteven Rostedt (Red Hat) if (next_cpu >= nr_cpu_ids) /* Shouldn't happen! */ 3377bb7d802SDaniel Bristot de Oliveira goto change_mode; 3380330f7aaSSteven Rostedt (Red Hat) 3390330f7aaSSteven Rostedt (Red Hat) cpumask_clear(current_mask); 3400330f7aaSSteven Rostedt (Red Hat) cpumask_set_cpu(next_cpu, current_mask); 3410330f7aaSSteven Rostedt (Red Hat) 3420330f7aaSSteven Rostedt (Red Hat) sched_setaffinity(0, current_mask); 3430330f7aaSSteven Rostedt (Red Hat) return; 3440330f7aaSSteven Rostedt (Red Hat) 3457bb7d802SDaniel Bristot de Oliveira change_mode: 3467bb7d802SDaniel Bristot de Oliveira hwlat_data.thread_mode = MODE_NONE; 3477bb7d802SDaniel Bristot de Oliveira pr_info(BANNER "cpumask changed while in round-robin mode, switching to mode none\n"); 3480330f7aaSSteven Rostedt (Red Hat) } 3490330f7aaSSteven Rostedt (Red Hat) 350e7c15cd8SSteven Rostedt (Red Hat) /* 351e7c15cd8SSteven Rostedt (Red Hat) * kthread_fn - The CPU time sampling/hardware latency detection kernel thread 352e7c15cd8SSteven Rostedt (Red Hat) * 353e7c15cd8SSteven Rostedt (Red Hat) * Used to periodically sample the CPU TSC via a call to get_sample. We 354e7c15cd8SSteven Rostedt (Red Hat) * disable interrupts, which does (intentionally) introduce latency since we 355e7c15cd8SSteven Rostedt (Red Hat) * need to ensure nothing else might be running (and thus preempting). 356e7c15cd8SSteven Rostedt (Red Hat) * Obviously this should never be used in production environments. 357e7c15cd8SSteven Rostedt (Red Hat) * 3588e0f1142SLuiz Capitulino * Executes one loop interaction on each CPU in tracing_cpumask sysfs file. 359e7c15cd8SSteven Rostedt (Red Hat) */ 360e7c15cd8SSteven Rostedt (Red Hat) static int kthread_fn(void *data) 361e7c15cd8SSteven Rostedt (Red Hat) { 362e7c15cd8SSteven Rostedt (Red Hat) u64 interval; 363e7c15cd8SSteven Rostedt (Red Hat) 364e7c15cd8SSteven Rostedt (Red Hat) while (!kthread_should_stop()) { 365e7c15cd8SSteven Rostedt (Red Hat) 3668fa826b7SDaniel Bristot de Oliveira if (hwlat_data.thread_mode == MODE_ROUND_ROBIN) 367f447c196SSteven Rostedt (VMware) move_to_next_cpu(); 3680330f7aaSSteven Rostedt (Red Hat) 369e7c15cd8SSteven Rostedt (Red Hat) local_irq_disable(); 370e7c15cd8SSteven Rostedt (Red Hat) get_sample(); 371e7c15cd8SSteven Rostedt (Red Hat) local_irq_enable(); 372e7c15cd8SSteven Rostedt (Red Hat) 373e7c15cd8SSteven Rostedt (Red Hat) mutex_lock(&hwlat_data.lock); 374e7c15cd8SSteven Rostedt (Red Hat) interval = hwlat_data.sample_window - hwlat_data.sample_width; 375e7c15cd8SSteven Rostedt (Red Hat) mutex_unlock(&hwlat_data.lock); 376e7c15cd8SSteven Rostedt (Red Hat) 377e7c15cd8SSteven Rostedt (Red Hat) do_div(interval, USEC_PER_MSEC); /* modifies interval value */ 378e7c15cd8SSteven Rostedt (Red Hat) 379e7c15cd8SSteven Rostedt (Red Hat) /* Always sleep for at least 1ms */ 380e7c15cd8SSteven Rostedt (Red Hat) if (interval < 1) 381e7c15cd8SSteven Rostedt (Red Hat) interval = 1; 382e7c15cd8SSteven Rostedt (Red Hat) 383e7c15cd8SSteven Rostedt (Red Hat) if (msleep_interruptible(interval)) 384e7c15cd8SSteven Rostedt (Red Hat) break; 385e7c15cd8SSteven Rostedt (Red Hat) } 386e7c15cd8SSteven Rostedt (Red Hat) 387e7c15cd8SSteven Rostedt (Red Hat) return 0; 388e7c15cd8SSteven Rostedt (Red Hat) } 389e7c15cd8SSteven Rostedt (Red Hat) 3908fa826b7SDaniel Bristot de Oliveira /* 391f46b1652SDaniel Bristot de Oliveira * stop_stop_kthread - Inform the hardware latency sampling/detector kthread to stop 392f46b1652SDaniel Bristot de Oliveira * 393f46b1652SDaniel Bristot de Oliveira * This kicks the running hardware latency sampling/detector kernel thread and 394f46b1652SDaniel Bristot de Oliveira * tells it to stop sampling now. Use this on unload and at system shutdown. 395f46b1652SDaniel Bristot de Oliveira */ 396f46b1652SDaniel Bristot de Oliveira static void stop_single_kthread(void) 397f46b1652SDaniel Bristot de Oliveira { 398f46b1652SDaniel Bristot de Oliveira struct hwlat_kthread_data *kdata = get_cpu_data(); 399039a602dSDaniel Bristot de Oliveira struct task_struct *kthread; 400039a602dSDaniel Bristot de Oliveira 40199c37d1aSSebastian Andrzej Siewior cpus_read_lock(); 402039a602dSDaniel Bristot de Oliveira kthread = kdata->kthread; 403f46b1652SDaniel Bristot de Oliveira 404f46b1652SDaniel Bristot de Oliveira if (!kthread) 405039a602dSDaniel Bristot de Oliveira goto out_put_cpus; 406f46b1652SDaniel Bristot de Oliveira 407f46b1652SDaniel Bristot de Oliveira kthread_stop(kthread); 408f46b1652SDaniel Bristot de Oliveira kdata->kthread = NULL; 409039a602dSDaniel Bristot de Oliveira 410039a602dSDaniel Bristot de Oliveira out_put_cpus: 41199c37d1aSSebastian Andrzej Siewior cpus_read_unlock(); 412f46b1652SDaniel Bristot de Oliveira } 413f46b1652SDaniel Bristot de Oliveira 414f46b1652SDaniel Bristot de Oliveira 415f46b1652SDaniel Bristot de Oliveira /* 416f46b1652SDaniel Bristot de Oliveira * start_single_kthread - Kick off the hardware latency sampling/detector kthread 417e7c15cd8SSteven Rostedt (Red Hat) * 418e7c15cd8SSteven Rostedt (Red Hat) * This starts the kernel thread that will sit and sample the CPU timestamp 419e7c15cd8SSteven Rostedt (Red Hat) * counter (TSC or similar) and look for potential hardware latencies. 420e7c15cd8SSteven Rostedt (Red Hat) */ 421f46b1652SDaniel Bristot de Oliveira static int start_single_kthread(struct trace_array *tr) 422e7c15cd8SSteven Rostedt (Red Hat) { 423f46b1652SDaniel Bristot de Oliveira struct hwlat_kthread_data *kdata = get_cpu_data(); 424f447c196SSteven Rostedt (VMware) struct cpumask *current_mask = &save_cpumask; 425e7c15cd8SSteven Rostedt (Red Hat) struct task_struct *kthread; 426f447c196SSteven Rostedt (VMware) int next_cpu; 427f447c196SSteven Rostedt (VMware) 42899c37d1aSSebastian Andrzej Siewior cpus_read_lock(); 429f46b1652SDaniel Bristot de Oliveira if (kdata->kthread) 430039a602dSDaniel Bristot de Oliveira goto out_put_cpus; 43182fbc8c4SErica Bugden 432e7c15cd8SSteven Rostedt (Red Hat) kthread = kthread_create(kthread_fn, NULL, "hwlatd"); 433e7c15cd8SSteven Rostedt (Red Hat) if (IS_ERR(kthread)) { 434e7c15cd8SSteven Rostedt (Red Hat) pr_err(BANNER "could not start sampling thread\n"); 43599c37d1aSSebastian Andrzej Siewior cpus_read_unlock(); 436e7c15cd8SSteven Rostedt (Red Hat) return -ENOMEM; 437e7c15cd8SSteven Rostedt (Red Hat) } 438f447c196SSteven Rostedt (VMware) 4398fa826b7SDaniel Bristot de Oliveira /* Just pick the first CPU on first iteration */ 4408fa826b7SDaniel Bristot de Oliveira cpumask_and(current_mask, cpu_online_mask, tr->tracing_cpumask); 4418fa826b7SDaniel Bristot de Oliveira 4428fa826b7SDaniel Bristot de Oliveira if (hwlat_data.thread_mode == MODE_ROUND_ROBIN) { 4438fa826b7SDaniel Bristot de Oliveira next_cpu = cpumask_first(current_mask); 444f447c196SSteven Rostedt (VMware) cpumask_clear(current_mask); 445f447c196SSteven Rostedt (VMware) cpumask_set_cpu(next_cpu, current_mask); 4468fa826b7SDaniel Bristot de Oliveira 4478fa826b7SDaniel Bristot de Oliveira } 4488fa826b7SDaniel Bristot de Oliveira 449f447c196SSteven Rostedt (VMware) sched_setaffinity(kthread->pid, current_mask); 450f447c196SSteven Rostedt (VMware) 451f46b1652SDaniel Bristot de Oliveira kdata->kthread = kthread; 452e7c15cd8SSteven Rostedt (Red Hat) wake_up_process(kthread); 453e7c15cd8SSteven Rostedt (Red Hat) 454039a602dSDaniel Bristot de Oliveira out_put_cpus: 45599c37d1aSSebastian Andrzej Siewior cpus_read_unlock(); 456e7c15cd8SSteven Rostedt (Red Hat) return 0; 457e7c15cd8SSteven Rostedt (Red Hat) } 458e7c15cd8SSteven Rostedt (Red Hat) 4598fa826b7SDaniel Bristot de Oliveira /* 460f46b1652SDaniel Bristot de Oliveira * stop_cpu_kthread - Stop a hwlat cpu kthread 461f46b1652SDaniel Bristot de Oliveira */ 462f46b1652SDaniel Bristot de Oliveira static void stop_cpu_kthread(unsigned int cpu) 463f46b1652SDaniel Bristot de Oliveira { 464f46b1652SDaniel Bristot de Oliveira struct task_struct *kthread; 465f46b1652SDaniel Bristot de Oliveira 466f46b1652SDaniel Bristot de Oliveira kthread = per_cpu(hwlat_per_cpu_data, cpu).kthread; 467f46b1652SDaniel Bristot de Oliveira if (kthread) 468f46b1652SDaniel Bristot de Oliveira kthread_stop(kthread); 469ba998f7dSDaniel Bristot de Oliveira per_cpu(hwlat_per_cpu_data, cpu).kthread = NULL; 470f46b1652SDaniel Bristot de Oliveira } 471f46b1652SDaniel Bristot de Oliveira 472f46b1652SDaniel Bristot de Oliveira /* 473f46b1652SDaniel Bristot de Oliveira * stop_per_cpu_kthreads - Inform the hardware latency sampling/detector kthread to stop 474e7c15cd8SSteven Rostedt (Red Hat) * 475f46b1652SDaniel Bristot de Oliveira * This kicks the running hardware latency sampling/detector kernel threads and 476e7c15cd8SSteven Rostedt (Red Hat) * tells it to stop sampling now. Use this on unload and at system shutdown. 477e7c15cd8SSteven Rostedt (Red Hat) */ 478f46b1652SDaniel Bristot de Oliveira static void stop_per_cpu_kthreads(void) 479e7c15cd8SSteven Rostedt (Red Hat) { 480f46b1652SDaniel Bristot de Oliveira unsigned int cpu; 481f46b1652SDaniel Bristot de Oliveira 48299c37d1aSSebastian Andrzej Siewior cpus_read_lock(); 483f46b1652SDaniel Bristot de Oliveira for_each_online_cpu(cpu) 484f46b1652SDaniel Bristot de Oliveira stop_cpu_kthread(cpu); 48599c37d1aSSebastian Andrzej Siewior cpus_read_unlock(); 486f46b1652SDaniel Bristot de Oliveira } 487f46b1652SDaniel Bristot de Oliveira 488f46b1652SDaniel Bristot de Oliveira /* 489f46b1652SDaniel Bristot de Oliveira * start_cpu_kthread - Start a hwlat cpu kthread 490f46b1652SDaniel Bristot de Oliveira */ 491f46b1652SDaniel Bristot de Oliveira static int start_cpu_kthread(unsigned int cpu) 492f46b1652SDaniel Bristot de Oliveira { 493f46b1652SDaniel Bristot de Oliveira struct task_struct *kthread; 494f46b1652SDaniel Bristot de Oliveira char comm[24]; 495f46b1652SDaniel Bristot de Oliveira 496f46b1652SDaniel Bristot de Oliveira snprintf(comm, 24, "hwlatd/%d", cpu); 497f46b1652SDaniel Bristot de Oliveira 498f46b1652SDaniel Bristot de Oliveira kthread = kthread_create_on_cpu(kthread_fn, NULL, cpu, comm); 499f46b1652SDaniel Bristot de Oliveira if (IS_ERR(kthread)) { 500f46b1652SDaniel Bristot de Oliveira pr_err(BANNER "could not start sampling thread\n"); 501f46b1652SDaniel Bristot de Oliveira return -ENOMEM; 502f46b1652SDaniel Bristot de Oliveira } 503f46b1652SDaniel Bristot de Oliveira 504f46b1652SDaniel Bristot de Oliveira per_cpu(hwlat_per_cpu_data, cpu).kthread = kthread; 505f46b1652SDaniel Bristot de Oliveira wake_up_process(kthread); 506f46b1652SDaniel Bristot de Oliveira 507f46b1652SDaniel Bristot de Oliveira return 0; 508f46b1652SDaniel Bristot de Oliveira } 509f46b1652SDaniel Bristot de Oliveira 510ba998f7dSDaniel Bristot de Oliveira #ifdef CONFIG_HOTPLUG_CPU 511ba998f7dSDaniel Bristot de Oliveira static void hwlat_hotplug_workfn(struct work_struct *dummy) 512ba998f7dSDaniel Bristot de Oliveira { 513ba998f7dSDaniel Bristot de Oliveira struct trace_array *tr = hwlat_trace; 514ba998f7dSDaniel Bristot de Oliveira unsigned int cpu = smp_processor_id(); 515ba998f7dSDaniel Bristot de Oliveira 516ba998f7dSDaniel Bristot de Oliveira mutex_lock(&trace_types_lock); 517ba998f7dSDaniel Bristot de Oliveira mutex_lock(&hwlat_data.lock); 51899c37d1aSSebastian Andrzej Siewior cpus_read_lock(); 519ba998f7dSDaniel Bristot de Oliveira 520ba998f7dSDaniel Bristot de Oliveira if (!hwlat_busy || hwlat_data.thread_mode != MODE_PER_CPU) 521ba998f7dSDaniel Bristot de Oliveira goto out_unlock; 522ba998f7dSDaniel Bristot de Oliveira 523ba998f7dSDaniel Bristot de Oliveira if (!cpumask_test_cpu(cpu, tr->tracing_cpumask)) 524ba998f7dSDaniel Bristot de Oliveira goto out_unlock; 525ba998f7dSDaniel Bristot de Oliveira 526ba998f7dSDaniel Bristot de Oliveira start_cpu_kthread(cpu); 527ba998f7dSDaniel Bristot de Oliveira 528ba998f7dSDaniel Bristot de Oliveira out_unlock: 52999c37d1aSSebastian Andrzej Siewior cpus_read_unlock(); 530ba998f7dSDaniel Bristot de Oliveira mutex_unlock(&hwlat_data.lock); 531ba998f7dSDaniel Bristot de Oliveira mutex_unlock(&trace_types_lock); 532ba998f7dSDaniel Bristot de Oliveira } 533ba998f7dSDaniel Bristot de Oliveira 534ba998f7dSDaniel Bristot de Oliveira static DECLARE_WORK(hwlat_hotplug_work, hwlat_hotplug_workfn); 535ba998f7dSDaniel Bristot de Oliveira 536ba998f7dSDaniel Bristot de Oliveira /* 537ba998f7dSDaniel Bristot de Oliveira * hwlat_cpu_init - CPU hotplug online callback function 538ba998f7dSDaniel Bristot de Oliveira */ 539ba998f7dSDaniel Bristot de Oliveira static int hwlat_cpu_init(unsigned int cpu) 540ba998f7dSDaniel Bristot de Oliveira { 541ba998f7dSDaniel Bristot de Oliveira schedule_work_on(cpu, &hwlat_hotplug_work); 542ba998f7dSDaniel Bristot de Oliveira return 0; 543ba998f7dSDaniel Bristot de Oliveira } 544ba998f7dSDaniel Bristot de Oliveira 545ba998f7dSDaniel Bristot de Oliveira /* 546ba998f7dSDaniel Bristot de Oliveira * hwlat_cpu_die - CPU hotplug offline callback function 547ba998f7dSDaniel Bristot de Oliveira */ 548ba998f7dSDaniel Bristot de Oliveira static int hwlat_cpu_die(unsigned int cpu) 549ba998f7dSDaniel Bristot de Oliveira { 550ba998f7dSDaniel Bristot de Oliveira stop_cpu_kthread(cpu); 551ba998f7dSDaniel Bristot de Oliveira return 0; 552ba998f7dSDaniel Bristot de Oliveira } 553ba998f7dSDaniel Bristot de Oliveira 554ba998f7dSDaniel Bristot de Oliveira static void hwlat_init_hotplug_support(void) 555ba998f7dSDaniel Bristot de Oliveira { 556ba998f7dSDaniel Bristot de Oliveira int ret; 557ba998f7dSDaniel Bristot de Oliveira 558ba998f7dSDaniel Bristot de Oliveira ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "trace/hwlat:online", 559ba998f7dSDaniel Bristot de Oliveira hwlat_cpu_init, hwlat_cpu_die); 560ba998f7dSDaniel Bristot de Oliveira if (ret < 0) 561ba998f7dSDaniel Bristot de Oliveira pr_warn(BANNER "Error to init cpu hotplug support\n"); 562ba998f7dSDaniel Bristot de Oliveira 563ba998f7dSDaniel Bristot de Oliveira return; 564ba998f7dSDaniel Bristot de Oliveira } 565ba998f7dSDaniel Bristot de Oliveira #else /* CONFIG_HOTPLUG_CPU */ 566ba998f7dSDaniel Bristot de Oliveira static void hwlat_init_hotplug_support(void) 567ba998f7dSDaniel Bristot de Oliveira { 568ba998f7dSDaniel Bristot de Oliveira return; 569ba998f7dSDaniel Bristot de Oliveira } 570ba998f7dSDaniel Bristot de Oliveira #endif /* CONFIG_HOTPLUG_CPU */ 571ba998f7dSDaniel Bristot de Oliveira 572f46b1652SDaniel Bristot de Oliveira /* 573f46b1652SDaniel Bristot de Oliveira * start_per_cpu_kthreads - Kick off the hardware latency sampling/detector kthreads 574f46b1652SDaniel Bristot de Oliveira * 575f46b1652SDaniel Bristot de Oliveira * This starts the kernel threads that will sit on potentially all cpus and 576f46b1652SDaniel Bristot de Oliveira * sample the CPU timestamp counter (TSC or similar) and look for potential 577f46b1652SDaniel Bristot de Oliveira * hardware latencies. 578f46b1652SDaniel Bristot de Oliveira */ 579f46b1652SDaniel Bristot de Oliveira static int start_per_cpu_kthreads(struct trace_array *tr) 580f46b1652SDaniel Bristot de Oliveira { 581f46b1652SDaniel Bristot de Oliveira struct cpumask *current_mask = &save_cpumask; 582f46b1652SDaniel Bristot de Oliveira unsigned int cpu; 583f46b1652SDaniel Bristot de Oliveira int retval; 584f46b1652SDaniel Bristot de Oliveira 58599c37d1aSSebastian Andrzej Siewior cpus_read_lock(); 586f46b1652SDaniel Bristot de Oliveira /* 587f46b1652SDaniel Bristot de Oliveira * Run only on CPUs in which hwlat is allowed to run. 588f46b1652SDaniel Bristot de Oliveira */ 589f46b1652SDaniel Bristot de Oliveira cpumask_and(current_mask, cpu_online_mask, tr->tracing_cpumask); 590f46b1652SDaniel Bristot de Oliveira 591f46b1652SDaniel Bristot de Oliveira for_each_online_cpu(cpu) 592f46b1652SDaniel Bristot de Oliveira per_cpu(hwlat_per_cpu_data, cpu).kthread = NULL; 593f46b1652SDaniel Bristot de Oliveira 594f46b1652SDaniel Bristot de Oliveira for_each_cpu(cpu, current_mask) { 595f46b1652SDaniel Bristot de Oliveira retval = start_cpu_kthread(cpu); 596f46b1652SDaniel Bristot de Oliveira if (retval) 597f46b1652SDaniel Bristot de Oliveira goto out_error; 598f46b1652SDaniel Bristot de Oliveira } 59999c37d1aSSebastian Andrzej Siewior cpus_read_unlock(); 600f46b1652SDaniel Bristot de Oliveira 601f46b1652SDaniel Bristot de Oliveira return 0; 602f46b1652SDaniel Bristot de Oliveira 603f46b1652SDaniel Bristot de Oliveira out_error: 60499c37d1aSSebastian Andrzej Siewior cpus_read_unlock(); 605f46b1652SDaniel Bristot de Oliveira stop_per_cpu_kthreads(); 606f46b1652SDaniel Bristot de Oliveira return retval; 607e7c15cd8SSteven Rostedt (Red Hat) } 608e7c15cd8SSteven Rostedt (Red Hat) 6098fa826b7SDaniel Bristot de Oliveira static void *s_mode_start(struct seq_file *s, loff_t *pos) 6108fa826b7SDaniel Bristot de Oliveira { 6118fa826b7SDaniel Bristot de Oliveira int mode = *pos; 6128fa826b7SDaniel Bristot de Oliveira 6138fa826b7SDaniel Bristot de Oliveira mutex_lock(&hwlat_data.lock); 6148fa826b7SDaniel Bristot de Oliveira 6158fa826b7SDaniel Bristot de Oliveira if (mode >= MODE_MAX) 6168fa826b7SDaniel Bristot de Oliveira return NULL; 6178fa826b7SDaniel Bristot de Oliveira 6188fa826b7SDaniel Bristot de Oliveira return pos; 6198fa826b7SDaniel Bristot de Oliveira } 6208fa826b7SDaniel Bristot de Oliveira 6218fa826b7SDaniel Bristot de Oliveira static void *s_mode_next(struct seq_file *s, void *v, loff_t *pos) 6228fa826b7SDaniel Bristot de Oliveira { 6238fa826b7SDaniel Bristot de Oliveira int mode = ++(*pos); 6248fa826b7SDaniel Bristot de Oliveira 6258fa826b7SDaniel Bristot de Oliveira if (mode >= MODE_MAX) 6268fa826b7SDaniel Bristot de Oliveira return NULL; 6278fa826b7SDaniel Bristot de Oliveira 6288fa826b7SDaniel Bristot de Oliveira return pos; 6298fa826b7SDaniel Bristot de Oliveira } 6308fa826b7SDaniel Bristot de Oliveira 6318fa826b7SDaniel Bristot de Oliveira static int s_mode_show(struct seq_file *s, void *v) 6328fa826b7SDaniel Bristot de Oliveira { 6338fa826b7SDaniel Bristot de Oliveira loff_t *pos = v; 6348fa826b7SDaniel Bristot de Oliveira int mode = *pos; 6358fa826b7SDaniel Bristot de Oliveira 6368fa826b7SDaniel Bristot de Oliveira if (mode == hwlat_data.thread_mode) 6378fa826b7SDaniel Bristot de Oliveira seq_printf(s, "[%s]", thread_mode_str[mode]); 6388fa826b7SDaniel Bristot de Oliveira else 6398fa826b7SDaniel Bristot de Oliveira seq_printf(s, "%s", thread_mode_str[mode]); 6408fa826b7SDaniel Bristot de Oliveira 6418fa826b7SDaniel Bristot de Oliveira if (mode != MODE_MAX) 6428fa826b7SDaniel Bristot de Oliveira seq_puts(s, " "); 6438fa826b7SDaniel Bristot de Oliveira 6448fa826b7SDaniel Bristot de Oliveira return 0; 6458fa826b7SDaniel Bristot de Oliveira } 6468fa826b7SDaniel Bristot de Oliveira 6478fa826b7SDaniel Bristot de Oliveira static void s_mode_stop(struct seq_file *s, void *v) 6488fa826b7SDaniel Bristot de Oliveira { 6498fa826b7SDaniel Bristot de Oliveira seq_puts(s, "\n"); 6508fa826b7SDaniel Bristot de Oliveira mutex_unlock(&hwlat_data.lock); 6518fa826b7SDaniel Bristot de Oliveira } 6528fa826b7SDaniel Bristot de Oliveira 6538fa826b7SDaniel Bristot de Oliveira static const struct seq_operations thread_mode_seq_ops = { 6548fa826b7SDaniel Bristot de Oliveira .start = s_mode_start, 6558fa826b7SDaniel Bristot de Oliveira .next = s_mode_next, 6568fa826b7SDaniel Bristot de Oliveira .show = s_mode_show, 6578fa826b7SDaniel Bristot de Oliveira .stop = s_mode_stop 6588fa826b7SDaniel Bristot de Oliveira }; 6598fa826b7SDaniel Bristot de Oliveira 6608fa826b7SDaniel Bristot de Oliveira static int hwlat_mode_open(struct inode *inode, struct file *file) 6618fa826b7SDaniel Bristot de Oliveira { 6628fa826b7SDaniel Bristot de Oliveira return seq_open(file, &thread_mode_seq_ops); 6638fa826b7SDaniel Bristot de Oliveira }; 6648fa826b7SDaniel Bristot de Oliveira 6658fa826b7SDaniel Bristot de Oliveira static void hwlat_tracer_start(struct trace_array *tr); 6668fa826b7SDaniel Bristot de Oliveira static void hwlat_tracer_stop(struct trace_array *tr); 6678fa826b7SDaniel Bristot de Oliveira 6688fa826b7SDaniel Bristot de Oliveira /** 6698fa826b7SDaniel Bristot de Oliveira * hwlat_mode_write - Write function for "mode" entry 6708fa826b7SDaniel Bristot de Oliveira * @filp: The active open file structure 6718fa826b7SDaniel Bristot de Oliveira * @ubuf: The user buffer that contains the value to write 6728fa826b7SDaniel Bristot de Oliveira * @cnt: The maximum number of bytes to write to "file" 6738fa826b7SDaniel Bristot de Oliveira * @ppos: The current position in @file 6748fa826b7SDaniel Bristot de Oliveira * 6758fa826b7SDaniel Bristot de Oliveira * This function provides a write implementation for the "mode" interface 6768fa826b7SDaniel Bristot de Oliveira * to the hardware latency detector. hwlatd has different operation modes. 6778fa826b7SDaniel Bristot de Oliveira * The "none" sets the allowed cpumask for a single hwlatd thread at the 6788fa826b7SDaniel Bristot de Oliveira * startup and lets the scheduler handle the migration. The default mode is 6798fa826b7SDaniel Bristot de Oliveira * the "round-robin" one, in which a single hwlatd thread runs, migrating 680f46b1652SDaniel Bristot de Oliveira * among the allowed CPUs in a round-robin fashion. The "per-cpu" mode 681f46b1652SDaniel Bristot de Oliveira * creates one hwlatd thread per allowed CPU. 6828fa826b7SDaniel Bristot de Oliveira */ 6838fa826b7SDaniel Bristot de Oliveira static ssize_t hwlat_mode_write(struct file *filp, const char __user *ubuf, 6848fa826b7SDaniel Bristot de Oliveira size_t cnt, loff_t *ppos) 6858fa826b7SDaniel Bristot de Oliveira { 6868fa826b7SDaniel Bristot de Oliveira struct trace_array *tr = hwlat_trace; 6878fa826b7SDaniel Bristot de Oliveira const char *mode; 6888fa826b7SDaniel Bristot de Oliveira char buf[64]; 6898fa826b7SDaniel Bristot de Oliveira int ret, i; 6908fa826b7SDaniel Bristot de Oliveira 6918fa826b7SDaniel Bristot de Oliveira if (cnt >= sizeof(buf)) 6928fa826b7SDaniel Bristot de Oliveira return -EINVAL; 6938fa826b7SDaniel Bristot de Oliveira 6948fa826b7SDaniel Bristot de Oliveira if (copy_from_user(buf, ubuf, cnt)) 6958fa826b7SDaniel Bristot de Oliveira return -EFAULT; 6968fa826b7SDaniel Bristot de Oliveira 6978fa826b7SDaniel Bristot de Oliveira buf[cnt] = 0; 6988fa826b7SDaniel Bristot de Oliveira 6998fa826b7SDaniel Bristot de Oliveira mode = strstrip(buf); 7008fa826b7SDaniel Bristot de Oliveira 7018fa826b7SDaniel Bristot de Oliveira ret = -EINVAL; 7028fa826b7SDaniel Bristot de Oliveira 7038fa826b7SDaniel Bristot de Oliveira /* 7048fa826b7SDaniel Bristot de Oliveira * trace_types_lock is taken to avoid concurrency on start/stop 7058fa826b7SDaniel Bristot de Oliveira * and hwlat_busy. 7068fa826b7SDaniel Bristot de Oliveira */ 7078fa826b7SDaniel Bristot de Oliveira mutex_lock(&trace_types_lock); 7088fa826b7SDaniel Bristot de Oliveira if (hwlat_busy) 7098fa826b7SDaniel Bristot de Oliveira hwlat_tracer_stop(tr); 7108fa826b7SDaniel Bristot de Oliveira 7118fa826b7SDaniel Bristot de Oliveira mutex_lock(&hwlat_data.lock); 7128fa826b7SDaniel Bristot de Oliveira 7138fa826b7SDaniel Bristot de Oliveira for (i = 0; i < MODE_MAX; i++) { 7148fa826b7SDaniel Bristot de Oliveira if (strcmp(mode, thread_mode_str[i]) == 0) { 7158fa826b7SDaniel Bristot de Oliveira hwlat_data.thread_mode = i; 7168fa826b7SDaniel Bristot de Oliveira ret = cnt; 7178fa826b7SDaniel Bristot de Oliveira } 7188fa826b7SDaniel Bristot de Oliveira } 7198fa826b7SDaniel Bristot de Oliveira 7208fa826b7SDaniel Bristot de Oliveira mutex_unlock(&hwlat_data.lock); 7218fa826b7SDaniel Bristot de Oliveira 7228fa826b7SDaniel Bristot de Oliveira if (hwlat_busy) 7238fa826b7SDaniel Bristot de Oliveira hwlat_tracer_start(tr); 7248fa826b7SDaniel Bristot de Oliveira mutex_unlock(&trace_types_lock); 7258fa826b7SDaniel Bristot de Oliveira 7268fa826b7SDaniel Bristot de Oliveira *ppos += cnt; 7278fa826b7SDaniel Bristot de Oliveira 7288fa826b7SDaniel Bristot de Oliveira 7298fa826b7SDaniel Bristot de Oliveira 7308fa826b7SDaniel Bristot de Oliveira return ret; 7318fa826b7SDaniel Bristot de Oliveira } 7328fa826b7SDaniel Bristot de Oliveira 733f27a1c9eSDaniel Bristot de Oliveira /* 734f27a1c9eSDaniel Bristot de Oliveira * The width parameter is read/write using the generic trace_min_max_param 735f27a1c9eSDaniel Bristot de Oliveira * method. The *val is protected by the hwlat_data lock and is upper 736f27a1c9eSDaniel Bristot de Oliveira * bounded by the window parameter. 737f27a1c9eSDaniel Bristot de Oliveira */ 738f27a1c9eSDaniel Bristot de Oliveira static struct trace_min_max_param hwlat_width = { 739f27a1c9eSDaniel Bristot de Oliveira .lock = &hwlat_data.lock, 740f27a1c9eSDaniel Bristot de Oliveira .val = &hwlat_data.sample_width, 741f27a1c9eSDaniel Bristot de Oliveira .max = &hwlat_data.sample_window, 742f27a1c9eSDaniel Bristot de Oliveira .min = NULL, 743e7c15cd8SSteven Rostedt (Red Hat) }; 744e7c15cd8SSteven Rostedt (Red Hat) 745f27a1c9eSDaniel Bristot de Oliveira /* 746f27a1c9eSDaniel Bristot de Oliveira * The window parameter is read/write using the generic trace_min_max_param 747f27a1c9eSDaniel Bristot de Oliveira * method. The *val is protected by the hwlat_data lock and is lower 748f27a1c9eSDaniel Bristot de Oliveira * bounded by the width parameter. 749f27a1c9eSDaniel Bristot de Oliveira */ 750f27a1c9eSDaniel Bristot de Oliveira static struct trace_min_max_param hwlat_window = { 751f27a1c9eSDaniel Bristot de Oliveira .lock = &hwlat_data.lock, 752f27a1c9eSDaniel Bristot de Oliveira .val = &hwlat_data.sample_window, 753f27a1c9eSDaniel Bristot de Oliveira .max = NULL, 754f27a1c9eSDaniel Bristot de Oliveira .min = &hwlat_data.sample_width, 755e7c15cd8SSteven Rostedt (Red Hat) }; 756e7c15cd8SSteven Rostedt (Red Hat) 7578fa826b7SDaniel Bristot de Oliveira static const struct file_operations thread_mode_fops = { 7588fa826b7SDaniel Bristot de Oliveira .open = hwlat_mode_open, 7598fa826b7SDaniel Bristot de Oliveira .read = seq_read, 7608fa826b7SDaniel Bristot de Oliveira .llseek = seq_lseek, 7618fa826b7SDaniel Bristot de Oliveira .release = seq_release, 7628fa826b7SDaniel Bristot de Oliveira .write = hwlat_mode_write 7638fa826b7SDaniel Bristot de Oliveira }; 764e7c15cd8SSteven Rostedt (Red Hat) /** 765e7c15cd8SSteven Rostedt (Red Hat) * init_tracefs - A function to initialize the tracefs interface files 766e7c15cd8SSteven Rostedt (Red Hat) * 767e7c15cd8SSteven Rostedt (Red Hat) * This function creates entries in tracefs for "hwlat_detector". 768e7c15cd8SSteven Rostedt (Red Hat) * It creates the hwlat_detector directory in the tracing directory, 769e7c15cd8SSteven Rostedt (Red Hat) * and within that directory is the count, width and window files to 770e7c15cd8SSteven Rostedt (Red Hat) * change and view those values. 771e7c15cd8SSteven Rostedt (Red Hat) */ 772e7c15cd8SSteven Rostedt (Red Hat) static int init_tracefs(void) 773e7c15cd8SSteven Rostedt (Red Hat) { 77422c36b18SWei Yang int ret; 775e7c15cd8SSteven Rostedt (Red Hat) struct dentry *top_dir; 776e7c15cd8SSteven Rostedt (Red Hat) 77722c36b18SWei Yang ret = tracing_init_dentry(); 77822c36b18SWei Yang if (ret) 779e7c15cd8SSteven Rostedt (Red Hat) return -ENOMEM; 780e7c15cd8SSteven Rostedt (Red Hat) 78122c36b18SWei Yang top_dir = tracefs_create_dir("hwlat_detector", NULL); 782e7c15cd8SSteven Rostedt (Red Hat) if (!top_dir) 783e7c15cd8SSteven Rostedt (Red Hat) return -ENOMEM; 784e7c15cd8SSteven Rostedt (Red Hat) 78521ccc9cdSSteven Rostedt (VMware) hwlat_sample_window = tracefs_create_file("window", TRACE_MODE_WRITE, 786e7c15cd8SSteven Rostedt (Red Hat) top_dir, 787f27a1c9eSDaniel Bristot de Oliveira &hwlat_window, 788f27a1c9eSDaniel Bristot de Oliveira &trace_min_max_fops); 789e7c15cd8SSteven Rostedt (Red Hat) if (!hwlat_sample_window) 790e7c15cd8SSteven Rostedt (Red Hat) goto err; 791e7c15cd8SSteven Rostedt (Red Hat) 79221ccc9cdSSteven Rostedt (VMware) hwlat_sample_width = tracefs_create_file("width", TRACE_MODE_WRITE, 793e7c15cd8SSteven Rostedt (Red Hat) top_dir, 794f27a1c9eSDaniel Bristot de Oliveira &hwlat_width, 795f27a1c9eSDaniel Bristot de Oliveira &trace_min_max_fops); 796e7c15cd8SSteven Rostedt (Red Hat) if (!hwlat_sample_width) 797e7c15cd8SSteven Rostedt (Red Hat) goto err; 798e7c15cd8SSteven Rostedt (Red Hat) 79921ccc9cdSSteven Rostedt (VMware) hwlat_thread_mode = trace_create_file("mode", TRACE_MODE_WRITE, 8008fa826b7SDaniel Bristot de Oliveira top_dir, 8018fa826b7SDaniel Bristot de Oliveira NULL, 8028fa826b7SDaniel Bristot de Oliveira &thread_mode_fops); 8038fa826b7SDaniel Bristot de Oliveira if (!hwlat_thread_mode) 8048fa826b7SDaniel Bristot de Oliveira goto err; 8058fa826b7SDaniel Bristot de Oliveira 806e7c15cd8SSteven Rostedt (Red Hat) return 0; 807e7c15cd8SSteven Rostedt (Red Hat) 808e7c15cd8SSteven Rostedt (Red Hat) err: 809a3d1e7ebSAl Viro tracefs_remove(top_dir); 810e7c15cd8SSteven Rostedt (Red Hat) return -ENOMEM; 811e7c15cd8SSteven Rostedt (Red Hat) } 812e7c15cd8SSteven Rostedt (Red Hat) 813e7c15cd8SSteven Rostedt (Red Hat) static void hwlat_tracer_start(struct trace_array *tr) 814e7c15cd8SSteven Rostedt (Red Hat) { 815e7c15cd8SSteven Rostedt (Red Hat) int err; 816e7c15cd8SSteven Rostedt (Red Hat) 817f46b1652SDaniel Bristot de Oliveira if (hwlat_data.thread_mode == MODE_PER_CPU) 818f46b1652SDaniel Bristot de Oliveira err = start_per_cpu_kthreads(tr); 819f46b1652SDaniel Bristot de Oliveira else 820f46b1652SDaniel Bristot de Oliveira err = start_single_kthread(tr); 821e7c15cd8SSteven Rostedt (Red Hat) if (err) 822e7c15cd8SSteven Rostedt (Red Hat) pr_err(BANNER "Cannot start hwlat kthread\n"); 823e7c15cd8SSteven Rostedt (Red Hat) } 824e7c15cd8SSteven Rostedt (Red Hat) 825e7c15cd8SSteven Rostedt (Red Hat) static void hwlat_tracer_stop(struct trace_array *tr) 826e7c15cd8SSteven Rostedt (Red Hat) { 827f46b1652SDaniel Bristot de Oliveira if (hwlat_data.thread_mode == MODE_PER_CPU) 828f46b1652SDaniel Bristot de Oliveira stop_per_cpu_kthreads(); 829f46b1652SDaniel Bristot de Oliveira else 830f46b1652SDaniel Bristot de Oliveira stop_single_kthread(); 831e7c15cd8SSteven Rostedt (Red Hat) } 832e7c15cd8SSteven Rostedt (Red Hat) 833e7c15cd8SSteven Rostedt (Red Hat) static int hwlat_tracer_init(struct trace_array *tr) 834e7c15cd8SSteven Rostedt (Red Hat) { 835e7c15cd8SSteven Rostedt (Red Hat) /* Only allow one instance to enable this */ 836e7c15cd8SSteven Rostedt (Red Hat) if (hwlat_busy) 837e7c15cd8SSteven Rostedt (Red Hat) return -EBUSY; 838e7c15cd8SSteven Rostedt (Red Hat) 839e7c15cd8SSteven Rostedt (Red Hat) hwlat_trace = tr; 840e7c15cd8SSteven Rostedt (Red Hat) 841e7c15cd8SSteven Rostedt (Red Hat) hwlat_data.count = 0; 842e7c15cd8SSteven Rostedt (Red Hat) tr->max_latency = 0; 843e7c15cd8SSteven Rostedt (Red Hat) save_tracing_thresh = tracing_thresh; 844e7c15cd8SSteven Rostedt (Red Hat) 845e7c15cd8SSteven Rostedt (Red Hat) /* tracing_thresh is in nsecs, we speak in usecs */ 846e7c15cd8SSteven Rostedt (Red Hat) if (!tracing_thresh) 847e7c15cd8SSteven Rostedt (Red Hat) tracing_thresh = last_tracing_thresh; 848e7c15cd8SSteven Rostedt (Red Hat) 849e7c15cd8SSteven Rostedt (Red Hat) if (tracer_tracing_is_on(tr)) 850e7c15cd8SSteven Rostedt (Red Hat) hwlat_tracer_start(tr); 851e7c15cd8SSteven Rostedt (Red Hat) 852e7c15cd8SSteven Rostedt (Red Hat) hwlat_busy = true; 853e7c15cd8SSteven Rostedt (Red Hat) 854e7c15cd8SSteven Rostedt (Red Hat) return 0; 855e7c15cd8SSteven Rostedt (Red Hat) } 856e7c15cd8SSteven Rostedt (Red Hat) 857e7c15cd8SSteven Rostedt (Red Hat) static void hwlat_tracer_reset(struct trace_array *tr) 858e7c15cd8SSteven Rostedt (Red Hat) { 859f46b1652SDaniel Bristot de Oliveira hwlat_tracer_stop(tr); 860e7c15cd8SSteven Rostedt (Red Hat) 861e7c15cd8SSteven Rostedt (Red Hat) /* the tracing threshold is static between runs */ 862e7c15cd8SSteven Rostedt (Red Hat) last_tracing_thresh = tracing_thresh; 863e7c15cd8SSteven Rostedt (Red Hat) 864e7c15cd8SSteven Rostedt (Red Hat) tracing_thresh = save_tracing_thresh; 865e7c15cd8SSteven Rostedt (Red Hat) hwlat_busy = false; 866e7c15cd8SSteven Rostedt (Red Hat) } 867e7c15cd8SSteven Rostedt (Red Hat) 868e7c15cd8SSteven Rostedt (Red Hat) static struct tracer hwlat_tracer __read_mostly = 869e7c15cd8SSteven Rostedt (Red Hat) { 870e7c15cd8SSteven Rostedt (Red Hat) .name = "hwlat", 871e7c15cd8SSteven Rostedt (Red Hat) .init = hwlat_tracer_init, 872e7c15cd8SSteven Rostedt (Red Hat) .reset = hwlat_tracer_reset, 873e7c15cd8SSteven Rostedt (Red Hat) .start = hwlat_tracer_start, 874e7c15cd8SSteven Rostedt (Red Hat) .stop = hwlat_tracer_stop, 875e7c15cd8SSteven Rostedt (Red Hat) .allow_instances = true, 876e7c15cd8SSteven Rostedt (Red Hat) }; 877e7c15cd8SSteven Rostedt (Red Hat) 878e7c15cd8SSteven Rostedt (Red Hat) __init static int init_hwlat_tracer(void) 879e7c15cd8SSteven Rostedt (Red Hat) { 880e7c15cd8SSteven Rostedt (Red Hat) int ret; 881e7c15cd8SSteven Rostedt (Red Hat) 882e7c15cd8SSteven Rostedt (Red Hat) mutex_init(&hwlat_data.lock); 883e7c15cd8SSteven Rostedt (Red Hat) 884e7c15cd8SSteven Rostedt (Red Hat) ret = register_tracer(&hwlat_tracer); 885e7c15cd8SSteven Rostedt (Red Hat) if (ret) 886e7c15cd8SSteven Rostedt (Red Hat) return ret; 887e7c15cd8SSteven Rostedt (Red Hat) 888ba998f7dSDaniel Bristot de Oliveira hwlat_init_hotplug_support(); 889ba998f7dSDaniel Bristot de Oliveira 890e7c15cd8SSteven Rostedt (Red Hat) init_tracefs(); 891e7c15cd8SSteven Rostedt (Red Hat) 892e7c15cd8SSteven Rostedt (Red Hat) return 0; 893e7c15cd8SSteven Rostedt (Red Hat) } 894e7c15cd8SSteven Rostedt (Red Hat) late_initcall(init_hwlat_tracer); 895