1bcea3f96SSteven Rostedt (VMware) // SPDX-License-Identifier: GPL-2.0 2e7c15cd8SSteven Rostedt (Red Hat) /* 30c3c86bdSSrivatsa S. Bhat (VMware) * trace_hwlat.c - A simple Hardware Latency detector. 4e7c15cd8SSteven Rostedt (Red Hat) * 5e7c15cd8SSteven Rostedt (Red Hat) * Use this tracer to detect large system latencies induced by the behavior of 6e7c15cd8SSteven Rostedt (Red Hat) * certain underlying system hardware or firmware, independent of Linux itself. 7e7c15cd8SSteven Rostedt (Red Hat) * The code was developed originally to detect the presence of SMIs on Intel 8e7c15cd8SSteven Rostedt (Red Hat) * and AMD systems, although there is no dependency upon x86 herein. 9e7c15cd8SSteven Rostedt (Red Hat) * 10e7c15cd8SSteven Rostedt (Red Hat) * The classical example usage of this tracer is in detecting the presence of 11e7c15cd8SSteven Rostedt (Red Hat) * SMIs or System Management Interrupts on Intel and AMD systems. An SMI is a 12e7c15cd8SSteven Rostedt (Red Hat) * somewhat special form of hardware interrupt spawned from earlier CPU debug 13e7c15cd8SSteven Rostedt (Red Hat) * modes in which the (BIOS/EFI/etc.) firmware arranges for the South Bridge 14e7c15cd8SSteven Rostedt (Red Hat) * LPC (or other device) to generate a special interrupt under certain 15e7c15cd8SSteven Rostedt (Red Hat) * circumstances, for example, upon expiration of a special SMI timer device, 16e7c15cd8SSteven Rostedt (Red Hat) * due to certain external thermal readings, on certain I/O address accesses, 17e7c15cd8SSteven Rostedt (Red Hat) * and other situations. An SMI hits a special CPU pin, triggers a special 18e7c15cd8SSteven Rostedt (Red Hat) * SMI mode (complete with special memory map), and the OS is unaware. 19e7c15cd8SSteven Rostedt (Red Hat) * 20e7c15cd8SSteven Rostedt (Red Hat) * Although certain hardware-inducing latencies are necessary (for example, 21e7c15cd8SSteven Rostedt (Red Hat) * a modern system often requires an SMI handler for correct thermal control 22e7c15cd8SSteven Rostedt (Red Hat) * and remote management) they can wreak havoc upon any OS-level performance 23e7c15cd8SSteven Rostedt (Red Hat) * guarantees toward low-latency, especially when the OS is not even made 24e7c15cd8SSteven Rostedt (Red Hat) * aware of the presence of these interrupts. For this reason, we need a 25e7c15cd8SSteven Rostedt (Red Hat) * somewhat brute force mechanism to detect these interrupts. In this case, 26e7c15cd8SSteven Rostedt (Red Hat) * we do it by hogging all of the CPU(s) for configurable timer intervals, 27e7c15cd8SSteven Rostedt (Red Hat) * sampling the built-in CPU timer, looking for discontiguous readings. 28e7c15cd8SSteven Rostedt (Red Hat) * 29e7c15cd8SSteven Rostedt (Red Hat) * WARNING: This implementation necessarily introduces latencies. Therefore, 30e7c15cd8SSteven Rostedt (Red Hat) * you should NEVER use this tracer while running in a production 31e7c15cd8SSteven Rostedt (Red Hat) * environment requiring any kind of low-latency performance 32e7c15cd8SSteven Rostedt (Red Hat) * guarantee(s). 33e7c15cd8SSteven Rostedt (Red Hat) * 34e7c15cd8SSteven Rostedt (Red Hat) * Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. <jcm@redhat.com> 35e7c15cd8SSteven Rostedt (Red Hat) * Copyright (C) 2013-2016 Steven Rostedt, Red Hat, Inc. <srostedt@redhat.com> 36e7c15cd8SSteven Rostedt (Red Hat) * 37bb1b24cfSDaniel Bristot de Oliveira * Includes useful feedback from Clark Williams <williams@redhat.com> 38e7c15cd8SSteven Rostedt (Red Hat) * 39e7c15cd8SSteven Rostedt (Red Hat) */ 40e7c15cd8SSteven Rostedt (Red Hat) #include <linux/kthread.h> 41e7c15cd8SSteven Rostedt (Red Hat) #include <linux/tracefs.h> 42e7c15cd8SSteven Rostedt (Red Hat) #include <linux/uaccess.h> 430330f7aaSSteven Rostedt (Red Hat) #include <linux/cpumask.h> 44e7c15cd8SSteven Rostedt (Red Hat) #include <linux/delay.h> 45e6017571SIngo Molnar #include <linux/sched/clock.h> 46e7c15cd8SSteven Rostedt (Red Hat) #include "trace.h" 47e7c15cd8SSteven Rostedt (Red Hat) 48e7c15cd8SSteven Rostedt (Red Hat) static struct trace_array *hwlat_trace; 49e7c15cd8SSteven Rostedt (Red Hat) 50e7c15cd8SSteven Rostedt (Red Hat) #define U64STR_SIZE 22 /* 20 digits max */ 51e7c15cd8SSteven Rostedt (Red Hat) 52e7c15cd8SSteven Rostedt (Red Hat) #define BANNER "hwlat_detector: " 53e7c15cd8SSteven Rostedt (Red Hat) #define DEFAULT_SAMPLE_WINDOW 1000000 /* 1s */ 54e7c15cd8SSteven Rostedt (Red Hat) #define DEFAULT_SAMPLE_WIDTH 500000 /* 0.5s */ 55e7c15cd8SSteven Rostedt (Red Hat) #define DEFAULT_LAT_THRESHOLD 10 /* 10us */ 56e7c15cd8SSteven Rostedt (Red Hat) 57e7c15cd8SSteven Rostedt (Red Hat) /* sampling thread*/ 58e7c15cd8SSteven Rostedt (Red Hat) static struct task_struct *hwlat_kthread; 59e7c15cd8SSteven Rostedt (Red Hat) 60e7c15cd8SSteven Rostedt (Red Hat) static struct dentry *hwlat_sample_width; /* sample width us */ 61e7c15cd8SSteven Rostedt (Red Hat) static struct dentry *hwlat_sample_window; /* sample window us */ 62*8fa826b7SDaniel Bristot de Oliveira static struct dentry *hwlat_thread_mode; /* hwlat thread mode */ 63*8fa826b7SDaniel Bristot de Oliveira 64*8fa826b7SDaniel Bristot de Oliveira enum { 65*8fa826b7SDaniel Bristot de Oliveira MODE_NONE = 0, 66*8fa826b7SDaniel Bristot de Oliveira MODE_ROUND_ROBIN, 67*8fa826b7SDaniel Bristot de Oliveira MODE_MAX 68*8fa826b7SDaniel Bristot de Oliveira }; 69*8fa826b7SDaniel Bristot de Oliveira static char *thread_mode_str[] = { "none", "round-robin" }; 70e7c15cd8SSteven Rostedt (Red Hat) 71e7c15cd8SSteven Rostedt (Red Hat) /* Save the previous tracing_thresh value */ 72e7c15cd8SSteven Rostedt (Red Hat) static unsigned long save_tracing_thresh; 73e7c15cd8SSteven Rostedt (Red Hat) 747b2c8625SSteven Rostedt (Red Hat) /* NMI timestamp counters */ 757b2c8625SSteven Rostedt (Red Hat) static u64 nmi_ts_start; 767b2c8625SSteven Rostedt (Red Hat) static u64 nmi_total_ts; 777b2c8625SSteven Rostedt (Red Hat) static int nmi_count; 787b2c8625SSteven Rostedt (Red Hat) static int nmi_cpu; 797b2c8625SSteven Rostedt (Red Hat) 807b2c8625SSteven Rostedt (Red Hat) /* Tells NMIs to call back to the hwlat tracer to record timestamps */ 817b2c8625SSteven Rostedt (Red Hat) bool trace_hwlat_callback_enabled; 827b2c8625SSteven Rostedt (Red Hat) 83e7c15cd8SSteven Rostedt (Red Hat) /* If the user changed threshold, remember it */ 84e7c15cd8SSteven Rostedt (Red Hat) static u64 last_tracing_thresh = DEFAULT_LAT_THRESHOLD * NSEC_PER_USEC; 85e7c15cd8SSteven Rostedt (Red Hat) 86e7c15cd8SSteven Rostedt (Red Hat) /* Individual latency samples are stored here when detected. */ 87e7c15cd8SSteven Rostedt (Red Hat) struct hwlat_sample { 88e7c15cd8SSteven Rostedt (Red Hat) u64 seqnum; /* unique sequence */ 89e7c15cd8SSteven Rostedt (Red Hat) u64 duration; /* delta */ 90e7c15cd8SSteven Rostedt (Red Hat) u64 outer_duration; /* delta (outer loop) */ 917b2c8625SSteven Rostedt (Red Hat) u64 nmi_total_ts; /* Total time spent in NMIs */ 9251aad0aeSDeepa Dinamani struct timespec64 timestamp; /* wall time */ 937b2c8625SSteven Rostedt (Red Hat) int nmi_count; /* # NMIs during this sample */ 94f2cc020dSIngo Molnar int count; /* # of iterations over thresh */ 95e7c15cd8SSteven Rostedt (Red Hat) }; 96e7c15cd8SSteven Rostedt (Red Hat) 97e7c15cd8SSteven Rostedt (Red Hat) /* keep the global state somewhere. */ 98e7c15cd8SSteven Rostedt (Red Hat) static struct hwlat_data { 99e7c15cd8SSteven Rostedt (Red Hat) 100e7c15cd8SSteven Rostedt (Red Hat) struct mutex lock; /* protect changes */ 101e7c15cd8SSteven Rostedt (Red Hat) 102e7c15cd8SSteven Rostedt (Red Hat) u64 count; /* total since reset */ 103e7c15cd8SSteven Rostedt (Red Hat) 104e7c15cd8SSteven Rostedt (Red Hat) u64 sample_window; /* total sampling window (on+off) */ 105e7c15cd8SSteven Rostedt (Red Hat) u64 sample_width; /* active sampling portion of window */ 106e7c15cd8SSteven Rostedt (Red Hat) 107*8fa826b7SDaniel Bristot de Oliveira int thread_mode; /* thread mode */ 108*8fa826b7SDaniel Bristot de Oliveira 109e7c15cd8SSteven Rostedt (Red Hat) } hwlat_data = { 110e7c15cd8SSteven Rostedt (Red Hat) .sample_window = DEFAULT_SAMPLE_WINDOW, 111e7c15cd8SSteven Rostedt (Red Hat) .sample_width = DEFAULT_SAMPLE_WIDTH, 112*8fa826b7SDaniel Bristot de Oliveira .thread_mode = MODE_ROUND_ROBIN 113e7c15cd8SSteven Rostedt (Red Hat) }; 114e7c15cd8SSteven Rostedt (Red Hat) 115*8fa826b7SDaniel Bristot de Oliveira static bool hwlat_busy; 116*8fa826b7SDaniel Bristot de Oliveira 117e7c15cd8SSteven Rostedt (Red Hat) static void trace_hwlat_sample(struct hwlat_sample *sample) 118e7c15cd8SSteven Rostedt (Red Hat) { 119e7c15cd8SSteven Rostedt (Red Hat) struct trace_array *tr = hwlat_trace; 120e7c15cd8SSteven Rostedt (Red Hat) struct trace_event_call *call = &event_hwlat; 12113292494SSteven Rostedt (VMware) struct trace_buffer *buffer = tr->array_buffer.buffer; 122e7c15cd8SSteven Rostedt (Red Hat) struct ring_buffer_event *event; 123e7c15cd8SSteven Rostedt (Red Hat) struct hwlat_entry *entry; 124e7c15cd8SSteven Rostedt (Red Hat) 125e7c15cd8SSteven Rostedt (Red Hat) event = trace_buffer_lock_reserve(buffer, TRACE_HWLAT, sizeof(*entry), 12636590c50SSebastian Andrzej Siewior tracing_gen_ctx()); 127e7c15cd8SSteven Rostedt (Red Hat) if (!event) 128e7c15cd8SSteven Rostedt (Red Hat) return; 129e7c15cd8SSteven Rostedt (Red Hat) entry = ring_buffer_event_data(event); 130e7c15cd8SSteven Rostedt (Red Hat) entry->seqnum = sample->seqnum; 131e7c15cd8SSteven Rostedt (Red Hat) entry->duration = sample->duration; 132e7c15cd8SSteven Rostedt (Red Hat) entry->outer_duration = sample->outer_duration; 133e7c15cd8SSteven Rostedt (Red Hat) entry->timestamp = sample->timestamp; 1347b2c8625SSteven Rostedt (Red Hat) entry->nmi_total_ts = sample->nmi_total_ts; 1357b2c8625SSteven Rostedt (Red Hat) entry->nmi_count = sample->nmi_count; 136b396bfdeSSteven Rostedt (VMware) entry->count = sample->count; 137e7c15cd8SSteven Rostedt (Red Hat) 138e7c15cd8SSteven Rostedt (Red Hat) if (!call_filter_check_discard(call, entry, buffer, event)) 13952ffabe3SSteven Rostedt (Red Hat) trace_buffer_unlock_commit_nostack(buffer, event); 140e7c15cd8SSteven Rostedt (Red Hat) } 141e7c15cd8SSteven Rostedt (Red Hat) 142e7c15cd8SSteven Rostedt (Red Hat) /* Macros to encapsulate the time capturing infrastructure */ 143e7c15cd8SSteven Rostedt (Red Hat) #define time_type u64 144e7c15cd8SSteven Rostedt (Red Hat) #define time_get() trace_clock_local() 145e7c15cd8SSteven Rostedt (Red Hat) #define time_to_us(x) div_u64(x, 1000) 146e7c15cd8SSteven Rostedt (Red Hat) #define time_sub(a, b) ((a) - (b)) 147e7c15cd8SSteven Rostedt (Red Hat) #define init_time(a, b) (a = b) 148e7c15cd8SSteven Rostedt (Red Hat) #define time_u64(a) a 149e7c15cd8SSteven Rostedt (Red Hat) 1507b2c8625SSteven Rostedt (Red Hat) void trace_hwlat_callback(bool enter) 1517b2c8625SSteven Rostedt (Red Hat) { 1527b2c8625SSteven Rostedt (Red Hat) if (smp_processor_id() != nmi_cpu) 1537b2c8625SSteven Rostedt (Red Hat) return; 1547b2c8625SSteven Rostedt (Red Hat) 1557b2c8625SSteven Rostedt (Red Hat) /* 1567b2c8625SSteven Rostedt (Red Hat) * Currently trace_clock_local() calls sched_clock() and the 1577b2c8625SSteven Rostedt (Red Hat) * generic version is not NMI safe. 1587b2c8625SSteven Rostedt (Red Hat) */ 1597b2c8625SSteven Rostedt (Red Hat) if (!IS_ENABLED(CONFIG_GENERIC_SCHED_CLOCK)) { 1607b2c8625SSteven Rostedt (Red Hat) if (enter) 1617b2c8625SSteven Rostedt (Red Hat) nmi_ts_start = time_get(); 1627b2c8625SSteven Rostedt (Red Hat) else 16398dc19c1SSrivatsa S. Bhat (VMware) nmi_total_ts += time_get() - nmi_ts_start; 1647b2c8625SSteven Rostedt (Red Hat) } 1657b2c8625SSteven Rostedt (Red Hat) 1667b2c8625SSteven Rostedt (Red Hat) if (enter) 1677b2c8625SSteven Rostedt (Red Hat) nmi_count++; 1687b2c8625SSteven Rostedt (Red Hat) } 1697b2c8625SSteven Rostedt (Red Hat) 170e7c15cd8SSteven Rostedt (Red Hat) /** 171e7c15cd8SSteven Rostedt (Red Hat) * get_sample - sample the CPU TSC and look for likely hardware latencies 172e7c15cd8SSteven Rostedt (Red Hat) * 173e7c15cd8SSteven Rostedt (Red Hat) * Used to repeatedly capture the CPU TSC (or similar), looking for potential 174e7c15cd8SSteven Rostedt (Red Hat) * hardware-induced latency. Called with interrupts disabled and with 175e7c15cd8SSteven Rostedt (Red Hat) * hwlat_data.lock held. 176e7c15cd8SSteven Rostedt (Red Hat) */ 177e7c15cd8SSteven Rostedt (Red Hat) static int get_sample(void) 178e7c15cd8SSteven Rostedt (Red Hat) { 179e7c15cd8SSteven Rostedt (Red Hat) struct trace_array *tr = hwlat_trace; 180b396bfdeSSteven Rostedt (VMware) struct hwlat_sample s; 181e7c15cd8SSteven Rostedt (Red Hat) time_type start, t1, t2, last_t2; 182b396bfdeSSteven Rostedt (VMware) s64 diff, outer_diff, total, last_total = 0; 183e7c15cd8SSteven Rostedt (Red Hat) u64 sample = 0; 184e7c15cd8SSteven Rostedt (Red Hat) u64 thresh = tracing_thresh; 185e7c15cd8SSteven Rostedt (Red Hat) u64 outer_sample = 0; 186e7c15cd8SSteven Rostedt (Red Hat) int ret = -1; 187b396bfdeSSteven Rostedt (VMware) unsigned int count = 0; 188e7c15cd8SSteven Rostedt (Red Hat) 189e7c15cd8SSteven Rostedt (Red Hat) do_div(thresh, NSEC_PER_USEC); /* modifies interval value */ 190e7c15cd8SSteven Rostedt (Red Hat) 1917b2c8625SSteven Rostedt (Red Hat) nmi_cpu = smp_processor_id(); 1927b2c8625SSteven Rostedt (Red Hat) nmi_total_ts = 0; 1937b2c8625SSteven Rostedt (Red Hat) nmi_count = 0; 1947b2c8625SSteven Rostedt (Red Hat) /* Make sure NMIs see this first */ 1957b2c8625SSteven Rostedt (Red Hat) barrier(); 1967b2c8625SSteven Rostedt (Red Hat) 1977b2c8625SSteven Rostedt (Red Hat) trace_hwlat_callback_enabled = true; 1987b2c8625SSteven Rostedt (Red Hat) 199e7c15cd8SSteven Rostedt (Red Hat) init_time(last_t2, 0); 200e7c15cd8SSteven Rostedt (Red Hat) start = time_get(); /* start timestamp */ 201b396bfdeSSteven Rostedt (VMware) outer_diff = 0; 202e7c15cd8SSteven Rostedt (Red Hat) 203e7c15cd8SSteven Rostedt (Red Hat) do { 204e7c15cd8SSteven Rostedt (Red Hat) 205e7c15cd8SSteven Rostedt (Red Hat) t1 = time_get(); /* we'll look for a discontinuity */ 206e7c15cd8SSteven Rostedt (Red Hat) t2 = time_get(); 207e7c15cd8SSteven Rostedt (Red Hat) 208e7c15cd8SSteven Rostedt (Red Hat) if (time_u64(last_t2)) { 209e7c15cd8SSteven Rostedt (Red Hat) /* Check the delta from outer loop (t2 to next t1) */ 210b396bfdeSSteven Rostedt (VMware) outer_diff = time_to_us(time_sub(t1, last_t2)); 211e7c15cd8SSteven Rostedt (Red Hat) /* This shouldn't happen */ 212b396bfdeSSteven Rostedt (VMware) if (outer_diff < 0) { 213e7c15cd8SSteven Rostedt (Red Hat) pr_err(BANNER "time running backwards\n"); 214e7c15cd8SSteven Rostedt (Red Hat) goto out; 215e7c15cd8SSteven Rostedt (Red Hat) } 216b396bfdeSSteven Rostedt (VMware) if (outer_diff > outer_sample) 217b396bfdeSSteven Rostedt (VMware) outer_sample = outer_diff; 218e7c15cd8SSteven Rostedt (Red Hat) } 219e7c15cd8SSteven Rostedt (Red Hat) last_t2 = t2; 220e7c15cd8SSteven Rostedt (Red Hat) 221e7c15cd8SSteven Rostedt (Red Hat) total = time_to_us(time_sub(t2, start)); /* sample width */ 222e7c15cd8SSteven Rostedt (Red Hat) 223e7c15cd8SSteven Rostedt (Red Hat) /* Check for possible overflows */ 224e7c15cd8SSteven Rostedt (Red Hat) if (total < last_total) { 225e7c15cd8SSteven Rostedt (Red Hat) pr_err("Time total overflowed\n"); 226e7c15cd8SSteven Rostedt (Red Hat) break; 227e7c15cd8SSteven Rostedt (Red Hat) } 228e7c15cd8SSteven Rostedt (Red Hat) last_total = total; 229e7c15cd8SSteven Rostedt (Red Hat) 230e7c15cd8SSteven Rostedt (Red Hat) /* This checks the inner loop (t1 to t2) */ 231e7c15cd8SSteven Rostedt (Red Hat) diff = time_to_us(time_sub(t2, t1)); /* current diff */ 232e7c15cd8SSteven Rostedt (Red Hat) 233b396bfdeSSteven Rostedt (VMware) if (diff > thresh || outer_diff > thresh) { 234b396bfdeSSteven Rostedt (VMware) if (!count) 235b396bfdeSSteven Rostedt (VMware) ktime_get_real_ts64(&s.timestamp); 236b396bfdeSSteven Rostedt (VMware) count++; 237b396bfdeSSteven Rostedt (VMware) } 238b396bfdeSSteven Rostedt (VMware) 239e7c15cd8SSteven Rostedt (Red Hat) /* This shouldn't happen */ 240e7c15cd8SSteven Rostedt (Red Hat) if (diff < 0) { 241e7c15cd8SSteven Rostedt (Red Hat) pr_err(BANNER "time running backwards\n"); 242e7c15cd8SSteven Rostedt (Red Hat) goto out; 243e7c15cd8SSteven Rostedt (Red Hat) } 244e7c15cd8SSteven Rostedt (Red Hat) 245e7c15cd8SSteven Rostedt (Red Hat) if (diff > sample) 246e7c15cd8SSteven Rostedt (Red Hat) sample = diff; /* only want highest value */ 247e7c15cd8SSteven Rostedt (Red Hat) 248e7c15cd8SSteven Rostedt (Red Hat) } while (total <= hwlat_data.sample_width); 249e7c15cd8SSteven Rostedt (Red Hat) 2507b2c8625SSteven Rostedt (Red Hat) barrier(); /* finish the above in the view for NMIs */ 2517b2c8625SSteven Rostedt (Red Hat) trace_hwlat_callback_enabled = false; 2527b2c8625SSteven Rostedt (Red Hat) barrier(); /* Make sure nmi_total_ts is no longer updated */ 2537b2c8625SSteven Rostedt (Red Hat) 254e7c15cd8SSteven Rostedt (Red Hat) ret = 0; 255e7c15cd8SSteven Rostedt (Red Hat) 256e7c15cd8SSteven Rostedt (Red Hat) /* If we exceed the threshold value, we have found a hardware latency */ 257e7c15cd8SSteven Rostedt (Red Hat) if (sample > thresh || outer_sample > thresh) { 25891edde2eSViktor Rosendahl (BMW) u64 latency; 259e7c15cd8SSteven Rostedt (Red Hat) 260e7c15cd8SSteven Rostedt (Red Hat) ret = 1; 261e7c15cd8SSteven Rostedt (Red Hat) 2627b2c8625SSteven Rostedt (Red Hat) /* We read in microseconds */ 2637b2c8625SSteven Rostedt (Red Hat) if (nmi_total_ts) 2647b2c8625SSteven Rostedt (Red Hat) do_div(nmi_total_ts, NSEC_PER_USEC); 2657b2c8625SSteven Rostedt (Red Hat) 266e7c15cd8SSteven Rostedt (Red Hat) hwlat_data.count++; 267e7c15cd8SSteven Rostedt (Red Hat) s.seqnum = hwlat_data.count; 268e7c15cd8SSteven Rostedt (Red Hat) s.duration = sample; 269e7c15cd8SSteven Rostedt (Red Hat) s.outer_duration = outer_sample; 2707b2c8625SSteven Rostedt (Red Hat) s.nmi_total_ts = nmi_total_ts; 2717b2c8625SSteven Rostedt (Red Hat) s.nmi_count = nmi_count; 272b396bfdeSSteven Rostedt (VMware) s.count = count; 273e7c15cd8SSteven Rostedt (Red Hat) trace_hwlat_sample(&s); 274e7c15cd8SSteven Rostedt (Red Hat) 27591edde2eSViktor Rosendahl (BMW) latency = max(sample, outer_sample); 27691edde2eSViktor Rosendahl (BMW) 277e7c15cd8SSteven Rostedt (Red Hat) /* Keep a running maximum ever recorded hardware latency */ 27891edde2eSViktor Rosendahl (BMW) if (latency > tr->max_latency) { 27991edde2eSViktor Rosendahl (BMW) tr->max_latency = latency; 28091edde2eSViktor Rosendahl (BMW) latency_fsnotify(tr); 28191edde2eSViktor Rosendahl (BMW) } 282e7c15cd8SSteven Rostedt (Red Hat) } 283e7c15cd8SSteven Rostedt (Red Hat) 284e7c15cd8SSteven Rostedt (Red Hat) out: 285e7c15cd8SSteven Rostedt (Red Hat) return ret; 286e7c15cd8SSteven Rostedt (Red Hat) } 287e7c15cd8SSteven Rostedt (Red Hat) 2880330f7aaSSteven Rostedt (Red Hat) static struct cpumask save_cpumask; 2890330f7aaSSteven Rostedt (Red Hat) static bool disable_migrate; 2900330f7aaSSteven Rostedt (Red Hat) 291f447c196SSteven Rostedt (VMware) static void move_to_next_cpu(void) 2920330f7aaSSteven Rostedt (Red Hat) { 293f447c196SSteven Rostedt (VMware) struct cpumask *current_mask = &save_cpumask; 29496b4833bSKevin Hao struct trace_array *tr = hwlat_trace; 2950330f7aaSSteven Rostedt (Red Hat) int next_cpu; 2960330f7aaSSteven Rostedt (Red Hat) 2970330f7aaSSteven Rostedt (Red Hat) if (disable_migrate) 2980330f7aaSSteven Rostedt (Red Hat) return; 2990330f7aaSSteven Rostedt (Red Hat) /* 3000330f7aaSSteven Rostedt (Red Hat) * If for some reason the user modifies the CPU affinity 3010c3c86bdSSrivatsa S. Bhat (VMware) * of this thread, then stop migrating for the duration 3020330f7aaSSteven Rostedt (Red Hat) * of the current test. 3030330f7aaSSteven Rostedt (Red Hat) */ 3043bd37062SSebastian Andrzej Siewior if (!cpumask_equal(current_mask, current->cpus_ptr)) 3050330f7aaSSteven Rostedt (Red Hat) goto disable; 3060330f7aaSSteven Rostedt (Red Hat) 3070330f7aaSSteven Rostedt (Red Hat) get_online_cpus(); 30896b4833bSKevin Hao cpumask_and(current_mask, cpu_online_mask, tr->tracing_cpumask); 3090330f7aaSSteven Rostedt (Red Hat) next_cpu = cpumask_next(smp_processor_id(), current_mask); 3100330f7aaSSteven Rostedt (Red Hat) put_online_cpus(); 3110330f7aaSSteven Rostedt (Red Hat) 3120330f7aaSSteven Rostedt (Red Hat) if (next_cpu >= nr_cpu_ids) 3130330f7aaSSteven Rostedt (Red Hat) next_cpu = cpumask_first(current_mask); 3140330f7aaSSteven Rostedt (Red Hat) 3150330f7aaSSteven Rostedt (Red Hat) if (next_cpu >= nr_cpu_ids) /* Shouldn't happen! */ 3160330f7aaSSteven Rostedt (Red Hat) goto disable; 3170330f7aaSSteven Rostedt (Red Hat) 3180330f7aaSSteven Rostedt (Red Hat) cpumask_clear(current_mask); 3190330f7aaSSteven Rostedt (Red Hat) cpumask_set_cpu(next_cpu, current_mask); 3200330f7aaSSteven Rostedt (Red Hat) 3210330f7aaSSteven Rostedt (Red Hat) sched_setaffinity(0, current_mask); 3220330f7aaSSteven Rostedt (Red Hat) return; 3230330f7aaSSteven Rostedt (Red Hat) 3240330f7aaSSteven Rostedt (Red Hat) disable: 3250330f7aaSSteven Rostedt (Red Hat) disable_migrate = true; 3260330f7aaSSteven Rostedt (Red Hat) } 3270330f7aaSSteven Rostedt (Red Hat) 328e7c15cd8SSteven Rostedt (Red Hat) /* 329e7c15cd8SSteven Rostedt (Red Hat) * kthread_fn - The CPU time sampling/hardware latency detection kernel thread 330e7c15cd8SSteven Rostedt (Red Hat) * 331e7c15cd8SSteven Rostedt (Red Hat) * Used to periodically sample the CPU TSC via a call to get_sample. We 332e7c15cd8SSteven Rostedt (Red Hat) * disable interrupts, which does (intentionally) introduce latency since we 333e7c15cd8SSteven Rostedt (Red Hat) * need to ensure nothing else might be running (and thus preempting). 334e7c15cd8SSteven Rostedt (Red Hat) * Obviously this should never be used in production environments. 335e7c15cd8SSteven Rostedt (Red Hat) * 3368e0f1142SLuiz Capitulino * Executes one loop interaction on each CPU in tracing_cpumask sysfs file. 337e7c15cd8SSteven Rostedt (Red Hat) */ 338e7c15cd8SSteven Rostedt (Red Hat) static int kthread_fn(void *data) 339e7c15cd8SSteven Rostedt (Red Hat) { 340e7c15cd8SSteven Rostedt (Red Hat) u64 interval; 341e7c15cd8SSteven Rostedt (Red Hat) 342e7c15cd8SSteven Rostedt (Red Hat) while (!kthread_should_stop()) { 343e7c15cd8SSteven Rostedt (Red Hat) 344*8fa826b7SDaniel Bristot de Oliveira if (hwlat_data.thread_mode == MODE_ROUND_ROBIN) 345f447c196SSteven Rostedt (VMware) move_to_next_cpu(); 3460330f7aaSSteven Rostedt (Red Hat) 347e7c15cd8SSteven Rostedt (Red Hat) local_irq_disable(); 348e7c15cd8SSteven Rostedt (Red Hat) get_sample(); 349e7c15cd8SSteven Rostedt (Red Hat) local_irq_enable(); 350e7c15cd8SSteven Rostedt (Red Hat) 351e7c15cd8SSteven Rostedt (Red Hat) mutex_lock(&hwlat_data.lock); 352e7c15cd8SSteven Rostedt (Red Hat) interval = hwlat_data.sample_window - hwlat_data.sample_width; 353e7c15cd8SSteven Rostedt (Red Hat) mutex_unlock(&hwlat_data.lock); 354e7c15cd8SSteven Rostedt (Red Hat) 355e7c15cd8SSteven Rostedt (Red Hat) do_div(interval, USEC_PER_MSEC); /* modifies interval value */ 356e7c15cd8SSteven Rostedt (Red Hat) 357e7c15cd8SSteven Rostedt (Red Hat) /* Always sleep for at least 1ms */ 358e7c15cd8SSteven Rostedt (Red Hat) if (interval < 1) 359e7c15cd8SSteven Rostedt (Red Hat) interval = 1; 360e7c15cd8SSteven Rostedt (Red Hat) 361e7c15cd8SSteven Rostedt (Red Hat) if (msleep_interruptible(interval)) 362e7c15cd8SSteven Rostedt (Red Hat) break; 363e7c15cd8SSteven Rostedt (Red Hat) } 364e7c15cd8SSteven Rostedt (Red Hat) 365e7c15cd8SSteven Rostedt (Red Hat) return 0; 366e7c15cd8SSteven Rostedt (Red Hat) } 367e7c15cd8SSteven Rostedt (Red Hat) 368*8fa826b7SDaniel Bristot de Oliveira /* 369e7c15cd8SSteven Rostedt (Red Hat) * start_kthread - Kick off the hardware latency sampling/detector kthread 370e7c15cd8SSteven Rostedt (Red Hat) * 371e7c15cd8SSteven Rostedt (Red Hat) * This starts the kernel thread that will sit and sample the CPU timestamp 372e7c15cd8SSteven Rostedt (Red Hat) * counter (TSC or similar) and look for potential hardware latencies. 373e7c15cd8SSteven Rostedt (Red Hat) */ 374e7c15cd8SSteven Rostedt (Red Hat) static int start_kthread(struct trace_array *tr) 375e7c15cd8SSteven Rostedt (Red Hat) { 376f447c196SSteven Rostedt (VMware) struct cpumask *current_mask = &save_cpumask; 377e7c15cd8SSteven Rostedt (Red Hat) struct task_struct *kthread; 378f447c196SSteven Rostedt (VMware) int next_cpu; 379f447c196SSteven Rostedt (VMware) 380310e3a4bSVasily Averin if (hwlat_kthread) 38182fbc8c4SErica Bugden return 0; 38282fbc8c4SErica Bugden 383e7c15cd8SSteven Rostedt (Red Hat) 384e7c15cd8SSteven Rostedt (Red Hat) kthread = kthread_create(kthread_fn, NULL, "hwlatd"); 385e7c15cd8SSteven Rostedt (Red Hat) if (IS_ERR(kthread)) { 386e7c15cd8SSteven Rostedt (Red Hat) pr_err(BANNER "could not start sampling thread\n"); 387e7c15cd8SSteven Rostedt (Red Hat) return -ENOMEM; 388e7c15cd8SSteven Rostedt (Red Hat) } 389f447c196SSteven Rostedt (VMware) 390*8fa826b7SDaniel Bristot de Oliveira 391*8fa826b7SDaniel Bristot de Oliveira /* Just pick the first CPU on first iteration */ 392*8fa826b7SDaniel Bristot de Oliveira get_online_cpus(); 393*8fa826b7SDaniel Bristot de Oliveira cpumask_and(current_mask, cpu_online_mask, tr->tracing_cpumask); 394*8fa826b7SDaniel Bristot de Oliveira put_online_cpus(); 395*8fa826b7SDaniel Bristot de Oliveira 396*8fa826b7SDaniel Bristot de Oliveira if (hwlat_data.thread_mode == MODE_ROUND_ROBIN) { 397*8fa826b7SDaniel Bristot de Oliveira next_cpu = cpumask_first(current_mask); 398f447c196SSteven Rostedt (VMware) cpumask_clear(current_mask); 399f447c196SSteven Rostedt (VMware) cpumask_set_cpu(next_cpu, current_mask); 400*8fa826b7SDaniel Bristot de Oliveira 401*8fa826b7SDaniel Bristot de Oliveira } 402*8fa826b7SDaniel Bristot de Oliveira 403f447c196SSteven Rostedt (VMware) sched_setaffinity(kthread->pid, current_mask); 404f447c196SSteven Rostedt (VMware) 405e7c15cd8SSteven Rostedt (Red Hat) hwlat_kthread = kthread; 406e7c15cd8SSteven Rostedt (Red Hat) wake_up_process(kthread); 407e7c15cd8SSteven Rostedt (Red Hat) 408e7c15cd8SSteven Rostedt (Red Hat) return 0; 409e7c15cd8SSteven Rostedt (Red Hat) } 410e7c15cd8SSteven Rostedt (Red Hat) 411*8fa826b7SDaniel Bristot de Oliveira /* 412f2cc020dSIngo Molnar * stop_kthread - Inform the hardware latency sampling/detector kthread to stop 413e7c15cd8SSteven Rostedt (Red Hat) * 414e7c15cd8SSteven Rostedt (Red Hat) * This kicks the running hardware latency sampling/detector kernel thread and 415e7c15cd8SSteven Rostedt (Red Hat) * tells it to stop sampling now. Use this on unload and at system shutdown. 416e7c15cd8SSteven Rostedt (Red Hat) */ 417e7c15cd8SSteven Rostedt (Red Hat) static void stop_kthread(void) 418e7c15cd8SSteven Rostedt (Red Hat) { 419e7c15cd8SSteven Rostedt (Red Hat) if (!hwlat_kthread) 420e7c15cd8SSteven Rostedt (Red Hat) return; 421e7c15cd8SSteven Rostedt (Red Hat) kthread_stop(hwlat_kthread); 422e7c15cd8SSteven Rostedt (Red Hat) hwlat_kthread = NULL; 423e7c15cd8SSteven Rostedt (Red Hat) } 424e7c15cd8SSteven Rostedt (Red Hat) 425e7c15cd8SSteven Rostedt (Red Hat) /* 426e7c15cd8SSteven Rostedt (Red Hat) * hwlat_read - Wrapper read function for reading both window and width 427e7c15cd8SSteven Rostedt (Red Hat) * @filp: The active open file structure 428e7c15cd8SSteven Rostedt (Red Hat) * @ubuf: The userspace provided buffer to read value into 429e7c15cd8SSteven Rostedt (Red Hat) * @cnt: The maximum number of bytes to read 430e7c15cd8SSteven Rostedt (Red Hat) * @ppos: The current "file" position 431e7c15cd8SSteven Rostedt (Red Hat) * 432e7c15cd8SSteven Rostedt (Red Hat) * This function provides a generic read implementation for the global state 433e7c15cd8SSteven Rostedt (Red Hat) * "hwlat_data" structure filesystem entries. 434e7c15cd8SSteven Rostedt (Red Hat) */ 435e7c15cd8SSteven Rostedt (Red Hat) static ssize_t hwlat_read(struct file *filp, char __user *ubuf, 436e7c15cd8SSteven Rostedt (Red Hat) size_t cnt, loff_t *ppos) 437e7c15cd8SSteven Rostedt (Red Hat) { 438e7c15cd8SSteven Rostedt (Red Hat) char buf[U64STR_SIZE]; 439e7c15cd8SSteven Rostedt (Red Hat) u64 *entry = filp->private_data; 440e7c15cd8SSteven Rostedt (Red Hat) u64 val; 441e7c15cd8SSteven Rostedt (Red Hat) int len; 442e7c15cd8SSteven Rostedt (Red Hat) 443e7c15cd8SSteven Rostedt (Red Hat) if (!entry) 444e7c15cd8SSteven Rostedt (Red Hat) return -EFAULT; 445e7c15cd8SSteven Rostedt (Red Hat) 446e7c15cd8SSteven Rostedt (Red Hat) if (cnt > sizeof(buf)) 447e7c15cd8SSteven Rostedt (Red Hat) cnt = sizeof(buf); 448e7c15cd8SSteven Rostedt (Red Hat) 449e7c15cd8SSteven Rostedt (Red Hat) val = *entry; 450e7c15cd8SSteven Rostedt (Red Hat) 451e7c15cd8SSteven Rostedt (Red Hat) len = snprintf(buf, sizeof(buf), "%llu\n", val); 452e7c15cd8SSteven Rostedt (Red Hat) 453e7c15cd8SSteven Rostedt (Red Hat) return simple_read_from_buffer(ubuf, cnt, ppos, buf, len); 454e7c15cd8SSteven Rostedt (Red Hat) } 455e7c15cd8SSteven Rostedt (Red Hat) 456e7c15cd8SSteven Rostedt (Red Hat) /** 457e7c15cd8SSteven Rostedt (Red Hat) * hwlat_width_write - Write function for "width" entry 458e7c15cd8SSteven Rostedt (Red Hat) * @filp: The active open file structure 459e7c15cd8SSteven Rostedt (Red Hat) * @ubuf: The user buffer that contains the value to write 460e7c15cd8SSteven Rostedt (Red Hat) * @cnt: The maximum number of bytes to write to "file" 461e7c15cd8SSteven Rostedt (Red Hat) * @ppos: The current position in @file 462e7c15cd8SSteven Rostedt (Red Hat) * 463e7c15cd8SSteven Rostedt (Red Hat) * This function provides a write implementation for the "width" interface 464e7c15cd8SSteven Rostedt (Red Hat) * to the hardware latency detector. It can be used to configure 465e7c15cd8SSteven Rostedt (Red Hat) * for how many us of the total window us we will actively sample for any 466e7c15cd8SSteven Rostedt (Red Hat) * hardware-induced latency periods. Obviously, it is not possible to 467e7c15cd8SSteven Rostedt (Red Hat) * sample constantly and have the system respond to a sample reader, or, 468e7c15cd8SSteven Rostedt (Red Hat) * worse, without having the system appear to have gone out to lunch. It 469e7c15cd8SSteven Rostedt (Red Hat) * is enforced that width is less that the total window size. 470e7c15cd8SSteven Rostedt (Red Hat) */ 471e7c15cd8SSteven Rostedt (Red Hat) static ssize_t 472e7c15cd8SSteven Rostedt (Red Hat) hwlat_width_write(struct file *filp, const char __user *ubuf, 473e7c15cd8SSteven Rostedt (Red Hat) size_t cnt, loff_t *ppos) 474e7c15cd8SSteven Rostedt (Red Hat) { 475e7c15cd8SSteven Rostedt (Red Hat) u64 val; 476e7c15cd8SSteven Rostedt (Red Hat) int err; 477e7c15cd8SSteven Rostedt (Red Hat) 478e7c15cd8SSteven Rostedt (Red Hat) err = kstrtoull_from_user(ubuf, cnt, 10, &val); 479e7c15cd8SSteven Rostedt (Red Hat) if (err) 480e7c15cd8SSteven Rostedt (Red Hat) return err; 481e7c15cd8SSteven Rostedt (Red Hat) 482e7c15cd8SSteven Rostedt (Red Hat) mutex_lock(&hwlat_data.lock); 483e7c15cd8SSteven Rostedt (Red Hat) if (val < hwlat_data.sample_window) 484e7c15cd8SSteven Rostedt (Red Hat) hwlat_data.sample_width = val; 485e7c15cd8SSteven Rostedt (Red Hat) else 486e7c15cd8SSteven Rostedt (Red Hat) err = -EINVAL; 487e7c15cd8SSteven Rostedt (Red Hat) mutex_unlock(&hwlat_data.lock); 488e7c15cd8SSteven Rostedt (Red Hat) 489e7c15cd8SSteven Rostedt (Red Hat) if (err) 490e7c15cd8SSteven Rostedt (Red Hat) return err; 491e7c15cd8SSteven Rostedt (Red Hat) 492e7c15cd8SSteven Rostedt (Red Hat) return cnt; 493e7c15cd8SSteven Rostedt (Red Hat) } 494e7c15cd8SSteven Rostedt (Red Hat) 495e7c15cd8SSteven Rostedt (Red Hat) /** 496e7c15cd8SSteven Rostedt (Red Hat) * hwlat_window_write - Write function for "window" entry 497e7c15cd8SSteven Rostedt (Red Hat) * @filp: The active open file structure 498e7c15cd8SSteven Rostedt (Red Hat) * @ubuf: The user buffer that contains the value to write 499e7c15cd8SSteven Rostedt (Red Hat) * @cnt: The maximum number of bytes to write to "file" 500e7c15cd8SSteven Rostedt (Red Hat) * @ppos: The current position in @file 501e7c15cd8SSteven Rostedt (Red Hat) * 502e7c15cd8SSteven Rostedt (Red Hat) * This function provides a write implementation for the "window" interface 5032b5894ccSQiujun Huang * to the hardware latency detector. The window is the total time 504e7c15cd8SSteven Rostedt (Red Hat) * in us that will be considered one sample period. Conceptually, windows 505e7c15cd8SSteven Rostedt (Red Hat) * occur back-to-back and contain a sample width period during which 506e7c15cd8SSteven Rostedt (Red Hat) * actual sampling occurs. Can be used to write a new total window size. It 5072b5894ccSQiujun Huang * is enforced that any value written must be greater than the sample width 508e7c15cd8SSteven Rostedt (Red Hat) * size, or an error results. 509e7c15cd8SSteven Rostedt (Red Hat) */ 510e7c15cd8SSteven Rostedt (Red Hat) static ssize_t 511e7c15cd8SSteven Rostedt (Red Hat) hwlat_window_write(struct file *filp, const char __user *ubuf, 512e7c15cd8SSteven Rostedt (Red Hat) size_t cnt, loff_t *ppos) 513e7c15cd8SSteven Rostedt (Red Hat) { 514e7c15cd8SSteven Rostedt (Red Hat) u64 val; 515e7c15cd8SSteven Rostedt (Red Hat) int err; 516e7c15cd8SSteven Rostedt (Red Hat) 517e7c15cd8SSteven Rostedt (Red Hat) err = kstrtoull_from_user(ubuf, cnt, 10, &val); 518e7c15cd8SSteven Rostedt (Red Hat) if (err) 519e7c15cd8SSteven Rostedt (Red Hat) return err; 520e7c15cd8SSteven Rostedt (Red Hat) 521e7c15cd8SSteven Rostedt (Red Hat) mutex_lock(&hwlat_data.lock); 522e7c15cd8SSteven Rostedt (Red Hat) if (hwlat_data.sample_width < val) 523e7c15cd8SSteven Rostedt (Red Hat) hwlat_data.sample_window = val; 524e7c15cd8SSteven Rostedt (Red Hat) else 525e7c15cd8SSteven Rostedt (Red Hat) err = -EINVAL; 526e7c15cd8SSteven Rostedt (Red Hat) mutex_unlock(&hwlat_data.lock); 527e7c15cd8SSteven Rostedt (Red Hat) 528e7c15cd8SSteven Rostedt (Red Hat) if (err) 529e7c15cd8SSteven Rostedt (Red Hat) return err; 530e7c15cd8SSteven Rostedt (Red Hat) 531e7c15cd8SSteven Rostedt (Red Hat) return cnt; 532e7c15cd8SSteven Rostedt (Red Hat) } 533e7c15cd8SSteven Rostedt (Red Hat) 534*8fa826b7SDaniel Bristot de Oliveira static void *s_mode_start(struct seq_file *s, loff_t *pos) 535*8fa826b7SDaniel Bristot de Oliveira { 536*8fa826b7SDaniel Bristot de Oliveira int mode = *pos; 537*8fa826b7SDaniel Bristot de Oliveira 538*8fa826b7SDaniel Bristot de Oliveira mutex_lock(&hwlat_data.lock); 539*8fa826b7SDaniel Bristot de Oliveira 540*8fa826b7SDaniel Bristot de Oliveira if (mode >= MODE_MAX) 541*8fa826b7SDaniel Bristot de Oliveira return NULL; 542*8fa826b7SDaniel Bristot de Oliveira 543*8fa826b7SDaniel Bristot de Oliveira return pos; 544*8fa826b7SDaniel Bristot de Oliveira } 545*8fa826b7SDaniel Bristot de Oliveira 546*8fa826b7SDaniel Bristot de Oliveira static void *s_mode_next(struct seq_file *s, void *v, loff_t *pos) 547*8fa826b7SDaniel Bristot de Oliveira { 548*8fa826b7SDaniel Bristot de Oliveira int mode = ++(*pos); 549*8fa826b7SDaniel Bristot de Oliveira 550*8fa826b7SDaniel Bristot de Oliveira if (mode >= MODE_MAX) 551*8fa826b7SDaniel Bristot de Oliveira return NULL; 552*8fa826b7SDaniel Bristot de Oliveira 553*8fa826b7SDaniel Bristot de Oliveira return pos; 554*8fa826b7SDaniel Bristot de Oliveira } 555*8fa826b7SDaniel Bristot de Oliveira 556*8fa826b7SDaniel Bristot de Oliveira static int s_mode_show(struct seq_file *s, void *v) 557*8fa826b7SDaniel Bristot de Oliveira { 558*8fa826b7SDaniel Bristot de Oliveira loff_t *pos = v; 559*8fa826b7SDaniel Bristot de Oliveira int mode = *pos; 560*8fa826b7SDaniel Bristot de Oliveira 561*8fa826b7SDaniel Bristot de Oliveira if (mode == hwlat_data.thread_mode) 562*8fa826b7SDaniel Bristot de Oliveira seq_printf(s, "[%s]", thread_mode_str[mode]); 563*8fa826b7SDaniel Bristot de Oliveira else 564*8fa826b7SDaniel Bristot de Oliveira seq_printf(s, "%s", thread_mode_str[mode]); 565*8fa826b7SDaniel Bristot de Oliveira 566*8fa826b7SDaniel Bristot de Oliveira if (mode != MODE_MAX) 567*8fa826b7SDaniel Bristot de Oliveira seq_puts(s, " "); 568*8fa826b7SDaniel Bristot de Oliveira 569*8fa826b7SDaniel Bristot de Oliveira return 0; 570*8fa826b7SDaniel Bristot de Oliveira } 571*8fa826b7SDaniel Bristot de Oliveira 572*8fa826b7SDaniel Bristot de Oliveira static void s_mode_stop(struct seq_file *s, void *v) 573*8fa826b7SDaniel Bristot de Oliveira { 574*8fa826b7SDaniel Bristot de Oliveira seq_puts(s, "\n"); 575*8fa826b7SDaniel Bristot de Oliveira mutex_unlock(&hwlat_data.lock); 576*8fa826b7SDaniel Bristot de Oliveira } 577*8fa826b7SDaniel Bristot de Oliveira 578*8fa826b7SDaniel Bristot de Oliveira static const struct seq_operations thread_mode_seq_ops = { 579*8fa826b7SDaniel Bristot de Oliveira .start = s_mode_start, 580*8fa826b7SDaniel Bristot de Oliveira .next = s_mode_next, 581*8fa826b7SDaniel Bristot de Oliveira .show = s_mode_show, 582*8fa826b7SDaniel Bristot de Oliveira .stop = s_mode_stop 583*8fa826b7SDaniel Bristot de Oliveira }; 584*8fa826b7SDaniel Bristot de Oliveira 585*8fa826b7SDaniel Bristot de Oliveira static int hwlat_mode_open(struct inode *inode, struct file *file) 586*8fa826b7SDaniel Bristot de Oliveira { 587*8fa826b7SDaniel Bristot de Oliveira return seq_open(file, &thread_mode_seq_ops); 588*8fa826b7SDaniel Bristot de Oliveira }; 589*8fa826b7SDaniel Bristot de Oliveira 590*8fa826b7SDaniel Bristot de Oliveira static void hwlat_tracer_start(struct trace_array *tr); 591*8fa826b7SDaniel Bristot de Oliveira static void hwlat_tracer_stop(struct trace_array *tr); 592*8fa826b7SDaniel Bristot de Oliveira 593*8fa826b7SDaniel Bristot de Oliveira /** 594*8fa826b7SDaniel Bristot de Oliveira * hwlat_mode_write - Write function for "mode" entry 595*8fa826b7SDaniel Bristot de Oliveira * @filp: The active open file structure 596*8fa826b7SDaniel Bristot de Oliveira * @ubuf: The user buffer that contains the value to write 597*8fa826b7SDaniel Bristot de Oliveira * @cnt: The maximum number of bytes to write to "file" 598*8fa826b7SDaniel Bristot de Oliveira * @ppos: The current position in @file 599*8fa826b7SDaniel Bristot de Oliveira * 600*8fa826b7SDaniel Bristot de Oliveira * This function provides a write implementation for the "mode" interface 601*8fa826b7SDaniel Bristot de Oliveira * to the hardware latency detector. hwlatd has different operation modes. 602*8fa826b7SDaniel Bristot de Oliveira * The "none" sets the allowed cpumask for a single hwlatd thread at the 603*8fa826b7SDaniel Bristot de Oliveira * startup and lets the scheduler handle the migration. The default mode is 604*8fa826b7SDaniel Bristot de Oliveira * the "round-robin" one, in which a single hwlatd thread runs, migrating 605*8fa826b7SDaniel Bristot de Oliveira * among the allowed CPUs in a round-robin fashion. 606*8fa826b7SDaniel Bristot de Oliveira */ 607*8fa826b7SDaniel Bristot de Oliveira static ssize_t hwlat_mode_write(struct file *filp, const char __user *ubuf, 608*8fa826b7SDaniel Bristot de Oliveira size_t cnt, loff_t *ppos) 609*8fa826b7SDaniel Bristot de Oliveira { 610*8fa826b7SDaniel Bristot de Oliveira struct trace_array *tr = hwlat_trace; 611*8fa826b7SDaniel Bristot de Oliveira const char *mode; 612*8fa826b7SDaniel Bristot de Oliveira char buf[64]; 613*8fa826b7SDaniel Bristot de Oliveira int ret, i; 614*8fa826b7SDaniel Bristot de Oliveira 615*8fa826b7SDaniel Bristot de Oliveira if (cnt >= sizeof(buf)) 616*8fa826b7SDaniel Bristot de Oliveira return -EINVAL; 617*8fa826b7SDaniel Bristot de Oliveira 618*8fa826b7SDaniel Bristot de Oliveira if (copy_from_user(buf, ubuf, cnt)) 619*8fa826b7SDaniel Bristot de Oliveira return -EFAULT; 620*8fa826b7SDaniel Bristot de Oliveira 621*8fa826b7SDaniel Bristot de Oliveira buf[cnt] = 0; 622*8fa826b7SDaniel Bristot de Oliveira 623*8fa826b7SDaniel Bristot de Oliveira mode = strstrip(buf); 624*8fa826b7SDaniel Bristot de Oliveira 625*8fa826b7SDaniel Bristot de Oliveira ret = -EINVAL; 626*8fa826b7SDaniel Bristot de Oliveira 627*8fa826b7SDaniel Bristot de Oliveira /* 628*8fa826b7SDaniel Bristot de Oliveira * trace_types_lock is taken to avoid concurrency on start/stop 629*8fa826b7SDaniel Bristot de Oliveira * and hwlat_busy. 630*8fa826b7SDaniel Bristot de Oliveira */ 631*8fa826b7SDaniel Bristot de Oliveira mutex_lock(&trace_types_lock); 632*8fa826b7SDaniel Bristot de Oliveira if (hwlat_busy) 633*8fa826b7SDaniel Bristot de Oliveira hwlat_tracer_stop(tr); 634*8fa826b7SDaniel Bristot de Oliveira 635*8fa826b7SDaniel Bristot de Oliveira mutex_lock(&hwlat_data.lock); 636*8fa826b7SDaniel Bristot de Oliveira 637*8fa826b7SDaniel Bristot de Oliveira for (i = 0; i < MODE_MAX; i++) { 638*8fa826b7SDaniel Bristot de Oliveira if (strcmp(mode, thread_mode_str[i]) == 0) { 639*8fa826b7SDaniel Bristot de Oliveira hwlat_data.thread_mode = i; 640*8fa826b7SDaniel Bristot de Oliveira ret = cnt; 641*8fa826b7SDaniel Bristot de Oliveira } 642*8fa826b7SDaniel Bristot de Oliveira } 643*8fa826b7SDaniel Bristot de Oliveira 644*8fa826b7SDaniel Bristot de Oliveira mutex_unlock(&hwlat_data.lock); 645*8fa826b7SDaniel Bristot de Oliveira 646*8fa826b7SDaniel Bristot de Oliveira if (hwlat_busy) 647*8fa826b7SDaniel Bristot de Oliveira hwlat_tracer_start(tr); 648*8fa826b7SDaniel Bristot de Oliveira mutex_unlock(&trace_types_lock); 649*8fa826b7SDaniel Bristot de Oliveira 650*8fa826b7SDaniel Bristot de Oliveira *ppos += cnt; 651*8fa826b7SDaniel Bristot de Oliveira 652*8fa826b7SDaniel Bristot de Oliveira 653*8fa826b7SDaniel Bristot de Oliveira 654*8fa826b7SDaniel Bristot de Oliveira return ret; 655*8fa826b7SDaniel Bristot de Oliveira } 656*8fa826b7SDaniel Bristot de Oliveira 657e7c15cd8SSteven Rostedt (Red Hat) static const struct file_operations width_fops = { 658e7c15cd8SSteven Rostedt (Red Hat) .open = tracing_open_generic, 659e7c15cd8SSteven Rostedt (Red Hat) .read = hwlat_read, 660e7c15cd8SSteven Rostedt (Red Hat) .write = hwlat_width_write, 661e7c15cd8SSteven Rostedt (Red Hat) }; 662e7c15cd8SSteven Rostedt (Red Hat) 663e7c15cd8SSteven Rostedt (Red Hat) static const struct file_operations window_fops = { 664e7c15cd8SSteven Rostedt (Red Hat) .open = tracing_open_generic, 665e7c15cd8SSteven Rostedt (Red Hat) .read = hwlat_read, 666e7c15cd8SSteven Rostedt (Red Hat) .write = hwlat_window_write, 667e7c15cd8SSteven Rostedt (Red Hat) }; 668e7c15cd8SSteven Rostedt (Red Hat) 669*8fa826b7SDaniel Bristot de Oliveira static const struct file_operations thread_mode_fops = { 670*8fa826b7SDaniel Bristot de Oliveira .open = hwlat_mode_open, 671*8fa826b7SDaniel Bristot de Oliveira .read = seq_read, 672*8fa826b7SDaniel Bristot de Oliveira .llseek = seq_lseek, 673*8fa826b7SDaniel Bristot de Oliveira .release = seq_release, 674*8fa826b7SDaniel Bristot de Oliveira .write = hwlat_mode_write 675*8fa826b7SDaniel Bristot de Oliveira }; 676e7c15cd8SSteven Rostedt (Red Hat) /** 677e7c15cd8SSteven Rostedt (Red Hat) * init_tracefs - A function to initialize the tracefs interface files 678e7c15cd8SSteven Rostedt (Red Hat) * 679e7c15cd8SSteven Rostedt (Red Hat) * This function creates entries in tracefs for "hwlat_detector". 680e7c15cd8SSteven Rostedt (Red Hat) * It creates the hwlat_detector directory in the tracing directory, 681e7c15cd8SSteven Rostedt (Red Hat) * and within that directory is the count, width and window files to 682e7c15cd8SSteven Rostedt (Red Hat) * change and view those values. 683e7c15cd8SSteven Rostedt (Red Hat) */ 684e7c15cd8SSteven Rostedt (Red Hat) static int init_tracefs(void) 685e7c15cd8SSteven Rostedt (Red Hat) { 68622c36b18SWei Yang int ret; 687e7c15cd8SSteven Rostedt (Red Hat) struct dentry *top_dir; 688e7c15cd8SSteven Rostedt (Red Hat) 68922c36b18SWei Yang ret = tracing_init_dentry(); 69022c36b18SWei Yang if (ret) 691e7c15cd8SSteven Rostedt (Red Hat) return -ENOMEM; 692e7c15cd8SSteven Rostedt (Red Hat) 69322c36b18SWei Yang top_dir = tracefs_create_dir("hwlat_detector", NULL); 694e7c15cd8SSteven Rostedt (Red Hat) if (!top_dir) 695e7c15cd8SSteven Rostedt (Red Hat) return -ENOMEM; 696e7c15cd8SSteven Rostedt (Red Hat) 697e7c15cd8SSteven Rostedt (Red Hat) hwlat_sample_window = tracefs_create_file("window", 0640, 698e7c15cd8SSteven Rostedt (Red Hat) top_dir, 699e7c15cd8SSteven Rostedt (Red Hat) &hwlat_data.sample_window, 700e7c15cd8SSteven Rostedt (Red Hat) &window_fops); 701e7c15cd8SSteven Rostedt (Red Hat) if (!hwlat_sample_window) 702e7c15cd8SSteven Rostedt (Red Hat) goto err; 703e7c15cd8SSteven Rostedt (Red Hat) 704e7c15cd8SSteven Rostedt (Red Hat) hwlat_sample_width = tracefs_create_file("width", 0644, 705e7c15cd8SSteven Rostedt (Red Hat) top_dir, 706e7c15cd8SSteven Rostedt (Red Hat) &hwlat_data.sample_width, 707e7c15cd8SSteven Rostedt (Red Hat) &width_fops); 708e7c15cd8SSteven Rostedt (Red Hat) if (!hwlat_sample_width) 709e7c15cd8SSteven Rostedt (Red Hat) goto err; 710e7c15cd8SSteven Rostedt (Red Hat) 711*8fa826b7SDaniel Bristot de Oliveira hwlat_thread_mode = trace_create_file("mode", 0644, 712*8fa826b7SDaniel Bristot de Oliveira top_dir, 713*8fa826b7SDaniel Bristot de Oliveira NULL, 714*8fa826b7SDaniel Bristot de Oliveira &thread_mode_fops); 715*8fa826b7SDaniel Bristot de Oliveira if (!hwlat_thread_mode) 716*8fa826b7SDaniel Bristot de Oliveira goto err; 717*8fa826b7SDaniel Bristot de Oliveira 718e7c15cd8SSteven Rostedt (Red Hat) return 0; 719e7c15cd8SSteven Rostedt (Red Hat) 720e7c15cd8SSteven Rostedt (Red Hat) err: 721a3d1e7ebSAl Viro tracefs_remove(top_dir); 722e7c15cd8SSteven Rostedt (Red Hat) return -ENOMEM; 723e7c15cd8SSteven Rostedt (Red Hat) } 724e7c15cd8SSteven Rostedt (Red Hat) 725e7c15cd8SSteven Rostedt (Red Hat) static void hwlat_tracer_start(struct trace_array *tr) 726e7c15cd8SSteven Rostedt (Red Hat) { 727e7c15cd8SSteven Rostedt (Red Hat) int err; 728e7c15cd8SSteven Rostedt (Red Hat) 729e7c15cd8SSteven Rostedt (Red Hat) err = start_kthread(tr); 730e7c15cd8SSteven Rostedt (Red Hat) if (err) 731e7c15cd8SSteven Rostedt (Red Hat) pr_err(BANNER "Cannot start hwlat kthread\n"); 732e7c15cd8SSteven Rostedt (Red Hat) } 733e7c15cd8SSteven Rostedt (Red Hat) 734e7c15cd8SSteven Rostedt (Red Hat) static void hwlat_tracer_stop(struct trace_array *tr) 735e7c15cd8SSteven Rostedt (Red Hat) { 736e7c15cd8SSteven Rostedt (Red Hat) stop_kthread(); 737e7c15cd8SSteven Rostedt (Red Hat) } 738e7c15cd8SSteven Rostedt (Red Hat) 739e7c15cd8SSteven Rostedt (Red Hat) static int hwlat_tracer_init(struct trace_array *tr) 740e7c15cd8SSteven Rostedt (Red Hat) { 741e7c15cd8SSteven Rostedt (Red Hat) /* Only allow one instance to enable this */ 742e7c15cd8SSteven Rostedt (Red Hat) if (hwlat_busy) 743e7c15cd8SSteven Rostedt (Red Hat) return -EBUSY; 744e7c15cd8SSteven Rostedt (Red Hat) 745e7c15cd8SSteven Rostedt (Red Hat) hwlat_trace = tr; 746e7c15cd8SSteven Rostedt (Red Hat) 7470330f7aaSSteven Rostedt (Red Hat) disable_migrate = false; 748e7c15cd8SSteven Rostedt (Red Hat) hwlat_data.count = 0; 749e7c15cd8SSteven Rostedt (Red Hat) tr->max_latency = 0; 750e7c15cd8SSteven Rostedt (Red Hat) save_tracing_thresh = tracing_thresh; 751e7c15cd8SSteven Rostedt (Red Hat) 752e7c15cd8SSteven Rostedt (Red Hat) /* tracing_thresh is in nsecs, we speak in usecs */ 753e7c15cd8SSteven Rostedt (Red Hat) if (!tracing_thresh) 754e7c15cd8SSteven Rostedt (Red Hat) tracing_thresh = last_tracing_thresh; 755e7c15cd8SSteven Rostedt (Red Hat) 756e7c15cd8SSteven Rostedt (Red Hat) if (tracer_tracing_is_on(tr)) 757e7c15cd8SSteven Rostedt (Red Hat) hwlat_tracer_start(tr); 758e7c15cd8SSteven Rostedt (Red Hat) 759e7c15cd8SSteven Rostedt (Red Hat) hwlat_busy = true; 760e7c15cd8SSteven Rostedt (Red Hat) 761e7c15cd8SSteven Rostedt (Red Hat) return 0; 762e7c15cd8SSteven Rostedt (Red Hat) } 763e7c15cd8SSteven Rostedt (Red Hat) 764e7c15cd8SSteven Rostedt (Red Hat) static void hwlat_tracer_reset(struct trace_array *tr) 765e7c15cd8SSteven Rostedt (Red Hat) { 766e7c15cd8SSteven Rostedt (Red Hat) stop_kthread(); 767e7c15cd8SSteven Rostedt (Red Hat) 768e7c15cd8SSteven Rostedt (Red Hat) /* the tracing threshold is static between runs */ 769e7c15cd8SSteven Rostedt (Red Hat) last_tracing_thresh = tracing_thresh; 770e7c15cd8SSteven Rostedt (Red Hat) 771e7c15cd8SSteven Rostedt (Red Hat) tracing_thresh = save_tracing_thresh; 772e7c15cd8SSteven Rostedt (Red Hat) hwlat_busy = false; 773e7c15cd8SSteven Rostedt (Red Hat) } 774e7c15cd8SSteven Rostedt (Red Hat) 775e7c15cd8SSteven Rostedt (Red Hat) static struct tracer hwlat_tracer __read_mostly = 776e7c15cd8SSteven Rostedt (Red Hat) { 777e7c15cd8SSteven Rostedt (Red Hat) .name = "hwlat", 778e7c15cd8SSteven Rostedt (Red Hat) .init = hwlat_tracer_init, 779e7c15cd8SSteven Rostedt (Red Hat) .reset = hwlat_tracer_reset, 780e7c15cd8SSteven Rostedt (Red Hat) .start = hwlat_tracer_start, 781e7c15cd8SSteven Rostedt (Red Hat) .stop = hwlat_tracer_stop, 782e7c15cd8SSteven Rostedt (Red Hat) .allow_instances = true, 783e7c15cd8SSteven Rostedt (Red Hat) }; 784e7c15cd8SSteven Rostedt (Red Hat) 785e7c15cd8SSteven Rostedt (Red Hat) __init static int init_hwlat_tracer(void) 786e7c15cd8SSteven Rostedt (Red Hat) { 787e7c15cd8SSteven Rostedt (Red Hat) int ret; 788e7c15cd8SSteven Rostedt (Red Hat) 789e7c15cd8SSteven Rostedt (Red Hat) mutex_init(&hwlat_data.lock); 790e7c15cd8SSteven Rostedt (Red Hat) 791e7c15cd8SSteven Rostedt (Red Hat) ret = register_tracer(&hwlat_tracer); 792e7c15cd8SSteven Rostedt (Red Hat) if (ret) 793e7c15cd8SSteven Rostedt (Red Hat) return ret; 794e7c15cd8SSteven Rostedt (Red Hat) 795e7c15cd8SSteven Rostedt (Red Hat) init_tracefs(); 796e7c15cd8SSteven Rostedt (Red Hat) 797e7c15cd8SSteven Rostedt (Red Hat) return 0; 798e7c15cd8SSteven Rostedt (Red Hat) } 799e7c15cd8SSteven Rostedt (Red Hat) late_initcall(init_hwlat_tracer); 800