1b886d83cSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
29745512cSArjan van de Ven /*
39745512cSArjan van de Ven * latencytop.c: Latency display infrastructure
49745512cSArjan van de Ven *
59745512cSArjan van de Ven * (C) Copyright 2008 Intel Corporation
69745512cSArjan van de Ven * Author: Arjan van de Ven <arjan@linux.intel.com>
79745512cSArjan van de Ven */
8ad0b0fd5SArjan van de Ven
9ad0b0fd5SArjan van de Ven /*
10ad0b0fd5SArjan van de Ven * CONFIG_LATENCYTOP enables a kernel latency tracking infrastructure that is
11ad0b0fd5SArjan van de Ven * used by the "latencytop" userspace tool. The latency that is tracked is not
12ad0b0fd5SArjan van de Ven * the 'traditional' interrupt latency (which is primarily caused by something
13ad0b0fd5SArjan van de Ven * else consuming CPU), but instead, it is the latency an application encounters
14ad0b0fd5SArjan van de Ven * because the kernel sleeps on its behalf for various reasons.
15ad0b0fd5SArjan van de Ven *
16ad0b0fd5SArjan van de Ven * This code tracks 2 levels of statistics:
17ad0b0fd5SArjan van de Ven * 1) System level latency
18ad0b0fd5SArjan van de Ven * 2) Per process latency
19ad0b0fd5SArjan van de Ven *
20ad0b0fd5SArjan van de Ven * The latency is stored in fixed sized data structures in an accumulated form;
21ad0b0fd5SArjan van de Ven * if the "same" latency cause is hit twice, this will be tracked as one entry
22ad0b0fd5SArjan van de Ven * in the data structure. Both the count, total accumulated latency and maximum
23ad0b0fd5SArjan van de Ven * latency are tracked in this data structure. When the fixed size structure is
24ad0b0fd5SArjan van de Ven * full, no new causes are tracked until the buffer is flushed by writing to
25ad0b0fd5SArjan van de Ven * the /proc file; the userspace tool does this on a regular basis.
26ad0b0fd5SArjan van de Ven *
27ad0b0fd5SArjan van de Ven * A latency cause is identified by a stringified backtrace at the point that
28ad0b0fd5SArjan van de Ven * the scheduler gets invoked. The userland tool will use this string to
29ad0b0fd5SArjan van de Ven * identify the cause of the latency in human readable form.
30ad0b0fd5SArjan van de Ven *
31ad0b0fd5SArjan van de Ven * The information is exported via /proc/latency_stats and /proc/<pid>/latency.
32ad0b0fd5SArjan van de Ven * These files look like this:
33ad0b0fd5SArjan van de Ven *
34ad0b0fd5SArjan van de Ven * Latency Top version : v0.1
35ad0b0fd5SArjan van de Ven * 70 59433 4897 i915_irq_wait drm_ioctl vfs_ioctl do_vfs_ioctl sys_ioctl
36ad0b0fd5SArjan van de Ven * | | | |
37ad0b0fd5SArjan van de Ven * | | | +----> the stringified backtrace
38ad0b0fd5SArjan van de Ven * | | +---------> The maximum latency for this entry in microseconds
39ad0b0fd5SArjan van de Ven * | +--------------> The accumulated latency for this entry (microseconds)
40ad0b0fd5SArjan van de Ven * +-------------------> The number of times this entry is hit
41ad0b0fd5SArjan van de Ven *
42ad0b0fd5SArjan van de Ven * (note: the average latency is the accumulated latency divided by the number
43ad0b0fd5SArjan van de Ven * of times)
44ad0b0fd5SArjan van de Ven */
45ad0b0fd5SArjan van de Ven
469745512cSArjan van de Ven #include <linux/kallsyms.h>
479745512cSArjan van de Ven #include <linux/seq_file.h>
489745512cSArjan van de Ven #include <linux/notifier.h>
499745512cSArjan van de Ven #include <linux/spinlock.h>
509745512cSArjan van de Ven #include <linux/proc_fs.h>
51cb251765SMel Gorman #include <linux/latencytop.h>
529984de1aSPaul Gortmaker #include <linux/export.h>
539745512cSArjan van de Ven #include <linux/sched.h>
54b17b0153SIngo Molnar #include <linux/sched/debug.h>
553905f9adSIngo Molnar #include <linux/sched/stat.h>
569745512cSArjan van de Ven #include <linux/list.h>
579745512cSArjan van de Ven #include <linux/stacktrace.h>
58988f11e0Sliaohua #include <linux/sysctl.h>
599745512cSArjan van de Ven
60757455d4SThomas Gleixner static DEFINE_RAW_SPINLOCK(latency_lock);
619745512cSArjan van de Ven
629745512cSArjan van de Ven #define MAXLR 128
639745512cSArjan van de Ven static struct latency_record latency_record[MAXLR];
649745512cSArjan van de Ven
659745512cSArjan van de Ven int latencytop_enabled;
669745512cSArjan van de Ven
67988f11e0Sliaohua #ifdef CONFIG_SYSCTL
sysctl_latencytop(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)68988f11e0Sliaohua static int sysctl_latencytop(struct ctl_table *table, int write, void *buffer,
69988f11e0Sliaohua size_t *lenp, loff_t *ppos)
70988f11e0Sliaohua {
71988f11e0Sliaohua int err;
72988f11e0Sliaohua
73988f11e0Sliaohua err = proc_dointvec(table, write, buffer, lenp, ppos);
74988f11e0Sliaohua if (latencytop_enabled)
75988f11e0Sliaohua force_schedstat_enabled();
76988f11e0Sliaohua
77988f11e0Sliaohua return err;
78988f11e0Sliaohua }
79988f11e0Sliaohua
80988f11e0Sliaohua static struct ctl_table latencytop_sysctl[] = {
81988f11e0Sliaohua {
82988f11e0Sliaohua .procname = "latencytop",
83988f11e0Sliaohua .data = &latencytop_enabled,
84988f11e0Sliaohua .maxlen = sizeof(int),
85988f11e0Sliaohua .mode = 0644,
86988f11e0Sliaohua .proc_handler = sysctl_latencytop,
87988f11e0Sliaohua },
88988f11e0Sliaohua {}
89988f11e0Sliaohua };
90988f11e0Sliaohua #endif
91988f11e0Sliaohua
clear_tsk_latency_tracing(struct task_struct * p)92e02c9b0dSLin Feng void clear_tsk_latency_tracing(struct task_struct *p)
939745512cSArjan van de Ven {
949745512cSArjan van de Ven unsigned long flags;
959745512cSArjan van de Ven
96757455d4SThomas Gleixner raw_spin_lock_irqsave(&latency_lock, flags);
979745512cSArjan van de Ven memset(&p->latency_record, 0, sizeof(p->latency_record));
989745512cSArjan van de Ven p->latency_record_count = 0;
99757455d4SThomas Gleixner raw_spin_unlock_irqrestore(&latency_lock, flags);
1009745512cSArjan van de Ven }
1019745512cSArjan van de Ven
clear_global_latency_tracing(void)1029745512cSArjan van de Ven static void clear_global_latency_tracing(void)
1039745512cSArjan van de Ven {
1049745512cSArjan van de Ven unsigned long flags;
1059745512cSArjan van de Ven
106757455d4SThomas Gleixner raw_spin_lock_irqsave(&latency_lock, flags);
1079745512cSArjan van de Ven memset(&latency_record, 0, sizeof(latency_record));
108757455d4SThomas Gleixner raw_spin_unlock_irqrestore(&latency_lock, flags);
1099745512cSArjan van de Ven }
1109745512cSArjan van de Ven
1119745512cSArjan van de Ven static void __sched
account_global_scheduler_latency(struct task_struct * tsk,struct latency_record * lat)112eaa1809bSFabian Frederick account_global_scheduler_latency(struct task_struct *tsk,
113eaa1809bSFabian Frederick struct latency_record *lat)
1149745512cSArjan van de Ven {
115*b8147511Swuchi int firstnonnull = MAXLR;
1169745512cSArjan van de Ven int i;
1179745512cSArjan van de Ven
1189745512cSArjan van de Ven /* skip kernel threads for now */
1199745512cSArjan van de Ven if (!tsk->mm)
1209745512cSArjan van de Ven return;
1219745512cSArjan van de Ven
1229745512cSArjan van de Ven for (i = 0; i < MAXLR; i++) {
12319fb518cSDmitry Adamushko int q, same = 1;
12419fb518cSDmitry Adamushko
1259745512cSArjan van de Ven /* Nothing stored: */
1269745512cSArjan van de Ven if (!latency_record[i].backtrace[0]) {
1279745512cSArjan van de Ven if (firstnonnull > i)
1289745512cSArjan van de Ven firstnonnull = i;
1299745512cSArjan van de Ven continue;
1309745512cSArjan van de Ven }
1319745512cSArjan van de Ven for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
13219fb518cSDmitry Adamushko unsigned long record = lat->backtrace[q];
13319fb518cSDmitry Adamushko
13419fb518cSDmitry Adamushko if (latency_record[i].backtrace[q] != record) {
1359745512cSArjan van de Ven same = 0;
1369745512cSArjan van de Ven break;
13719fb518cSDmitry Adamushko }
13819fb518cSDmitry Adamushko
139accddc41SThomas Gleixner /* 0 entry marks end of backtrace: */
140accddc41SThomas Gleixner if (!record)
1419745512cSArjan van de Ven break;
1429745512cSArjan van de Ven }
1439745512cSArjan van de Ven if (same) {
1449745512cSArjan van de Ven latency_record[i].count++;
1459745512cSArjan van de Ven latency_record[i].time += lat->time;
1469745512cSArjan van de Ven if (lat->time > latency_record[i].max)
1479745512cSArjan van de Ven latency_record[i].max = lat->time;
1489745512cSArjan van de Ven return;
1499745512cSArjan van de Ven }
1509745512cSArjan van de Ven }
1519745512cSArjan van de Ven
1529745512cSArjan van de Ven i = firstnonnull;
153*b8147511Swuchi if (i >= MAXLR)
1549745512cSArjan van de Ven return;
1559745512cSArjan van de Ven
1569745512cSArjan van de Ven /* Allocted a new one: */
1579745512cSArjan van de Ven memcpy(&latency_record[i], lat, sizeof(struct latency_record));
1589745512cSArjan van de Ven }
1599745512cSArjan van de Ven
160ad0b0fd5SArjan van de Ven /**
16125985edcSLucas De Marchi * __account_scheduler_latency - record an occurred latency
162ad0b0fd5SArjan van de Ven * @tsk - the task struct of the task hitting the latency
163ad0b0fd5SArjan van de Ven * @usecs - the duration of the latency in microseconds
164ad0b0fd5SArjan van de Ven * @inter - 1 if the sleep was interruptible, 0 if uninterruptible
165ad0b0fd5SArjan van de Ven *
166ad0b0fd5SArjan van de Ven * This function is the main entry point for recording latency entries
167ad0b0fd5SArjan van de Ven * as called by the scheduler.
168ad0b0fd5SArjan van de Ven *
169ad0b0fd5SArjan van de Ven * This function has a few special cases to deal with normal 'non-latency'
170ad0b0fd5SArjan van de Ven * sleeps: specifically, interruptible sleep longer than 5 msec is skipped
171ad0b0fd5SArjan van de Ven * since this usually is caused by waiting for events via select() and co.
172ad0b0fd5SArjan van de Ven *
173ad0b0fd5SArjan van de Ven * Negative latencies (caused by time going backwards) are also explicitly
174ad0b0fd5SArjan van de Ven * skipped.
175ad0b0fd5SArjan van de Ven */
1769745512cSArjan van de Ven void __sched
__account_scheduler_latency(struct task_struct * tsk,int usecs,int inter)177ad0b0fd5SArjan van de Ven __account_scheduler_latency(struct task_struct *tsk, int usecs, int inter)
1789745512cSArjan van de Ven {
1799745512cSArjan van de Ven unsigned long flags;
1809745512cSArjan van de Ven int i, q;
1819745512cSArjan van de Ven struct latency_record lat;
1829745512cSArjan van de Ven
1839745512cSArjan van de Ven /* Long interruptible waits are generally user requested... */
1849745512cSArjan van de Ven if (inter && usecs > 5000)
1859745512cSArjan van de Ven return;
1869745512cSArjan van de Ven
187ad0b0fd5SArjan van de Ven /* Negative sleeps are time going backwards */
188ad0b0fd5SArjan van de Ven /* Zero-time sleeps are non-interesting */
189ad0b0fd5SArjan van de Ven if (usecs <= 0)
190ad0b0fd5SArjan van de Ven return;
191ad0b0fd5SArjan van de Ven
1929745512cSArjan van de Ven memset(&lat, 0, sizeof(lat));
1939745512cSArjan van de Ven lat.count = 1;
1949745512cSArjan van de Ven lat.time = usecs;
1959745512cSArjan van de Ven lat.max = usecs;
196f9387721SThomas Gleixner
197f9387721SThomas Gleixner stack_trace_save_tsk(tsk, lat.backtrace, LT_BACKTRACEDEPTH, 0);
1989745512cSArjan van de Ven
199757455d4SThomas Gleixner raw_spin_lock_irqsave(&latency_lock, flags);
2009745512cSArjan van de Ven
2019745512cSArjan van de Ven account_global_scheduler_latency(tsk, &lat);
2029745512cSArjan van de Ven
20338715258SKen Chen for (i = 0; i < tsk->latency_record_count; i++) {
2049745512cSArjan van de Ven struct latency_record *mylat;
2059745512cSArjan van de Ven int same = 1;
20619fb518cSDmitry Adamushko
2079745512cSArjan van de Ven mylat = &tsk->latency_record[i];
2089745512cSArjan van de Ven for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
20919fb518cSDmitry Adamushko unsigned long record = lat.backtrace[q];
21019fb518cSDmitry Adamushko
21119fb518cSDmitry Adamushko if (mylat->backtrace[q] != record) {
2129745512cSArjan van de Ven same = 0;
2139745512cSArjan van de Ven break;
21419fb518cSDmitry Adamushko }
21519fb518cSDmitry Adamushko
216accddc41SThomas Gleixner /* 0 entry is end of backtrace */
217accddc41SThomas Gleixner if (!record)
2189745512cSArjan van de Ven break;
2199745512cSArjan van de Ven }
2209745512cSArjan van de Ven if (same) {
2219745512cSArjan van de Ven mylat->count++;
2229745512cSArjan van de Ven mylat->time += lat.time;
2239745512cSArjan van de Ven if (lat.time > mylat->max)
2249745512cSArjan van de Ven mylat->max = lat.time;
2259745512cSArjan van de Ven goto out_unlock;
2269745512cSArjan van de Ven }
2279745512cSArjan van de Ven }
2289745512cSArjan van de Ven
22938715258SKen Chen /*
23038715258SKen Chen * short term hack; if we're > 32 we stop; future we recycle:
23138715258SKen Chen */
23238715258SKen Chen if (tsk->latency_record_count >= LT_SAVECOUNT)
23338715258SKen Chen goto out_unlock;
23438715258SKen Chen
2359745512cSArjan van de Ven /* Allocated a new one: */
23638715258SKen Chen i = tsk->latency_record_count++;
2379745512cSArjan van de Ven memcpy(&tsk->latency_record[i], &lat, sizeof(struct latency_record));
2389745512cSArjan van de Ven
2399745512cSArjan van de Ven out_unlock:
240757455d4SThomas Gleixner raw_spin_unlock_irqrestore(&latency_lock, flags);
2419745512cSArjan van de Ven }
2429745512cSArjan van de Ven
lstats_show(struct seq_file * m,void * v)2439745512cSArjan van de Ven static int lstats_show(struct seq_file *m, void *v)
2449745512cSArjan van de Ven {
2459745512cSArjan van de Ven int i;
2469745512cSArjan van de Ven
2479745512cSArjan van de Ven seq_puts(m, "Latency Top version : v0.1\n");
2489745512cSArjan van de Ven
2499745512cSArjan van de Ven for (i = 0; i < MAXLR; i++) {
25034e49d4fSJoe Perches struct latency_record *lr = &latency_record[i];
25134e49d4fSJoe Perches
25234e49d4fSJoe Perches if (lr->backtrace[0]) {
2539745512cSArjan van de Ven int q;
254ad0b0fd5SArjan van de Ven seq_printf(m, "%i %lu %lu",
25534e49d4fSJoe Perches lr->count, lr->time, lr->max);
2569745512cSArjan van de Ven for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
25734e49d4fSJoe Perches unsigned long bt = lr->backtrace[q];
258accddc41SThomas Gleixner
25934e49d4fSJoe Perches if (!bt)
2609745512cSArjan van de Ven break;
261accddc41SThomas Gleixner
26234e49d4fSJoe Perches seq_printf(m, " %ps", (void *)bt);
2639745512cSArjan van de Ven }
264eaa1809bSFabian Frederick seq_puts(m, "\n");
2659745512cSArjan van de Ven }
2669745512cSArjan van de Ven }
2679745512cSArjan van de Ven return 0;
2689745512cSArjan van de Ven }
2699745512cSArjan van de Ven
2709745512cSArjan van de Ven static ssize_t
lstats_write(struct file * file,const char __user * buf,size_t count,loff_t * offs)2719745512cSArjan van de Ven lstats_write(struct file *file, const char __user *buf, size_t count,
2729745512cSArjan van de Ven loff_t *offs)
2739745512cSArjan van de Ven {
2749745512cSArjan van de Ven clear_global_latency_tracing();
2759745512cSArjan van de Ven
2769745512cSArjan van de Ven return count;
2779745512cSArjan van de Ven }
2789745512cSArjan van de Ven
lstats_open(struct inode * inode,struct file * filp)2799745512cSArjan van de Ven static int lstats_open(struct inode *inode, struct file *filp)
2809745512cSArjan van de Ven {
2819745512cSArjan van de Ven return single_open(filp, lstats_show, NULL);
2829745512cSArjan van de Ven }
2839745512cSArjan van de Ven
28497a32539SAlexey Dobriyan static const struct proc_ops lstats_proc_ops = {
28597a32539SAlexey Dobriyan .proc_open = lstats_open,
28697a32539SAlexey Dobriyan .proc_read = seq_read,
28797a32539SAlexey Dobriyan .proc_write = lstats_write,
28897a32539SAlexey Dobriyan .proc_lseek = seq_lseek,
28997a32539SAlexey Dobriyan .proc_release = single_release,
2909745512cSArjan van de Ven };
2919745512cSArjan van de Ven
init_lstats_procfs(void)2929745512cSArjan van de Ven static int __init init_lstats_procfs(void)
2939745512cSArjan van de Ven {
29497a32539SAlexey Dobriyan proc_create("latency_stats", 0644, NULL, &lstats_proc_ops);
295988f11e0Sliaohua #ifdef CONFIG_SYSCTL
296988f11e0Sliaohua register_sysctl_init("kernel", latencytop_sysctl);
297988f11e0Sliaohua #endif
2989745512cSArjan van de Ven return 0;
2999745512cSArjan van de Ven }
300ad0b0fd5SArjan van de Ven device_initcall(init_lstats_procfs);
301