1 /* 2 * linux/kernel/irq/timings.c 3 * 4 * Copyright (C) 2016, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 as 8 * published by the Free Software Foundation. 9 * 10 */ 11 #include <linux/kernel.h> 12 #include <linux/percpu.h> 13 #include <linux/slab.h> 14 #include <linux/static_key.h> 15 #include <linux/interrupt.h> 16 #include <linux/idr.h> 17 #include <linux/irq.h> 18 #include <linux/math64.h> 19 20 #include <trace/events/irq.h> 21 22 #include "internals.h" 23 24 DEFINE_STATIC_KEY_FALSE(irq_timing_enabled); 25 26 DEFINE_PER_CPU(struct irq_timings, irq_timings); 27 28 struct irqt_stat { 29 u64 next_evt; 30 u64 last_ts; 31 u64 variance; 32 u32 avg; 33 u32 nr_samples; 34 int anomalies; 35 int valid; 36 }; 37 38 static DEFINE_IDR(irqt_stats); 39 40 void irq_timings_enable(void) 41 { 42 static_branch_enable(&irq_timing_enabled); 43 } 44 45 void irq_timings_disable(void) 46 { 47 static_branch_disable(&irq_timing_enabled); 48 } 49 50 /** 51 * irqs_update - update the irq timing statistics with a new timestamp 52 * 53 * @irqs: an irqt_stat struct pointer 54 * @ts: the new timestamp 55 * 56 * The statistics are computed online, in other words, the code is 57 * designed to compute the statistics on a stream of values rather 58 * than doing multiple passes on the values to compute the average, 59 * then the variance. The integer division introduces a loss of 60 * precision but with an acceptable error margin regarding the results 61 * we would have with the double floating precision: we are dealing 62 * with nanosec, so big numbers, consequently the mantisse is 63 * negligeable, especially when converting the time in usec 64 * afterwards. 65 * 66 * The computation happens at idle time. When the CPU is not idle, the 67 * interrupts' timestamps are stored in the circular buffer, when the 68 * CPU goes idle and this routine is called, all the buffer's values 69 * are injected in the statistical model continuying to extend the 70 * statistics from the previous busy-idle cycle. 71 * 72 * The observations showed a device will trigger a burst of periodic 73 * interrupts followed by one or two peaks of longer time, for 74 * instance when a SD card device flushes its cache, then the periodic 75 * intervals occur again. A one second inactivity period resets the 76 * stats, that gives us the certitude the statistical values won't 77 * exceed 1x10^9, thus the computation won't overflow. 78 * 79 * Basically, the purpose of the algorithm is to watch the periodic 80 * interrupts and eliminate the peaks. 81 * 82 * An interrupt is considered periodically stable if the interval of 83 * its occurences follow the normal distribution, thus the values 84 * comply with: 85 * 86 * avg - 3 x stddev < value < avg + 3 x stddev 87 * 88 * Which can be simplified to: 89 * 90 * -3 x stddev < value - avg < 3 x stddev 91 * 92 * abs(value - avg) < 3 x stddev 93 * 94 * In order to save a costly square root computation, we use the 95 * variance. For the record, stddev = sqrt(variance). The equation 96 * above becomes: 97 * 98 * abs(value - avg) < 3 x sqrt(variance) 99 * 100 * And finally we square it: 101 * 102 * (value - avg) ^ 2 < (3 x sqrt(variance)) ^ 2 103 * 104 * (value - avg) x (value - avg) < 9 x variance 105 * 106 * Statistically speaking, any values out of this interval is 107 * considered as an anomaly and is discarded. However, a normal 108 * distribution appears when the number of samples is 30 (it is the 109 * rule of thumb in statistics, cf. "30 samples" on Internet). When 110 * there are three consecutive anomalies, the statistics are resetted. 111 * 112 */ 113 static void irqs_update(struct irqt_stat *irqs, u64 ts) 114 { 115 u64 old_ts = irqs->last_ts; 116 u64 variance = 0; 117 u64 interval; 118 s64 diff; 119 120 /* 121 * The timestamps are absolute time values, we need to compute 122 * the timing interval between two interrupts. 123 */ 124 irqs->last_ts = ts; 125 126 /* 127 * The interval type is u64 in order to deal with the same 128 * type in our computation, that prevent mindfuck issues with 129 * overflow, sign and division. 130 */ 131 interval = ts - old_ts; 132 133 /* 134 * The interrupt triggered more than one second apart, that 135 * ends the sequence as predictible for our purpose. In this 136 * case, assume we have the beginning of a sequence and the 137 * timestamp is the first value. As it is impossible to 138 * predict anything at this point, return. 139 * 140 * Note the first timestamp of the sequence will always fall 141 * in this test because the old_ts is zero. That is what we 142 * want as we need another timestamp to compute an interval. 143 */ 144 if (interval >= NSEC_PER_SEC) { 145 memset(irqs, 0, sizeof(*irqs)); 146 irqs->last_ts = ts; 147 return; 148 } 149 150 /* 151 * Pre-compute the delta with the average as the result is 152 * used several times in this function. 153 */ 154 diff = interval - irqs->avg; 155 156 /* 157 * Increment the number of samples. 158 */ 159 irqs->nr_samples++; 160 161 /* 162 * Online variance divided by the number of elements if there 163 * is more than one sample. Normally the formula is division 164 * by nr_samples - 1 but we assume the number of element will be 165 * more than 32 and dividing by 32 instead of 31 is enough 166 * precise. 167 */ 168 if (likely(irqs->nr_samples > 1)) 169 variance = irqs->variance >> IRQ_TIMINGS_SHIFT; 170 171 /* 172 * The rule of thumb in statistics for the normal distribution 173 * is having at least 30 samples in order to have the model to 174 * apply. Values outside the interval are considered as an 175 * anomaly. 176 */ 177 if ((irqs->nr_samples >= 30) && ((diff * diff) > (9 * variance))) { 178 /* 179 * After three consecutive anomalies, we reset the 180 * stats as it is no longer stable enough. 181 */ 182 if (irqs->anomalies++ >= 3) { 183 memset(irqs, 0, sizeof(*irqs)); 184 irqs->last_ts = ts; 185 return; 186 } 187 } else { 188 /* 189 * The anomalies must be consecutives, so at this 190 * point, we reset the anomalies counter. 191 */ 192 irqs->anomalies = 0; 193 } 194 195 /* 196 * The interrupt is considered stable enough to try to predict 197 * the next event on it. 198 */ 199 irqs->valid = 1; 200 201 /* 202 * Online average algorithm: 203 * 204 * new_average = average + ((value - average) / count) 205 * 206 * The variance computation depends on the new average 207 * to be computed here first. 208 * 209 */ 210 irqs->avg = irqs->avg + (diff >> IRQ_TIMINGS_SHIFT); 211 212 /* 213 * Online variance algorithm: 214 * 215 * new_variance = variance + (value - average) x (value - new_average) 216 * 217 * Warning: irqs->avg is updated with the line above, hence 218 * 'interval - irqs->avg' is no longer equal to 'diff' 219 */ 220 irqs->variance = irqs->variance + (diff * (interval - irqs->avg)); 221 222 /* 223 * Update the next event 224 */ 225 irqs->next_evt = ts + irqs->avg; 226 } 227 228 /** 229 * irq_timings_next_event - Return when the next event is supposed to arrive 230 * 231 * During the last busy cycle, the number of interrupts is incremented 232 * and stored in the irq_timings structure. This information is 233 * necessary to: 234 * 235 * - know if the index in the table wrapped up: 236 * 237 * If more than the array size interrupts happened during the 238 * last busy/idle cycle, the index wrapped up and we have to 239 * begin with the next element in the array which is the last one 240 * in the sequence, otherwise it is a the index 0. 241 * 242 * - have an indication of the interrupts activity on this CPU 243 * (eg. irq/sec) 244 * 245 * The values are 'consumed' after inserting in the statistical model, 246 * thus the count is reinitialized. 247 * 248 * The array of values **must** be browsed in the time direction, the 249 * timestamp must increase between an element and the next one. 250 * 251 * Returns a nanosec time based estimation of the earliest interrupt, 252 * U64_MAX otherwise. 253 */ 254 u64 irq_timings_next_event(u64 now) 255 { 256 struct irq_timings *irqts = this_cpu_ptr(&irq_timings); 257 struct irqt_stat *irqs; 258 struct irqt_stat __percpu *s; 259 u64 ts, next_evt = U64_MAX; 260 int i, irq = 0; 261 262 /* 263 * This function must be called with the local irq disabled in 264 * order to prevent the timings circular buffer to be updated 265 * while we are reading it. 266 */ 267 lockdep_assert_irqs_disabled(); 268 269 /* 270 * Number of elements in the circular buffer: If it happens it 271 * was flushed before, then the number of elements could be 272 * smaller than IRQ_TIMINGS_SIZE, so the count is used, 273 * otherwise the array size is used as we wrapped. The index 274 * begins from zero when we did not wrap. That could be done 275 * in a nicer way with the proper circular array structure 276 * type but with the cost of extra computation in the 277 * interrupt handler hot path. We choose efficiency. 278 * 279 * Inject measured irq/timestamp to the statistical model 280 * while decrementing the counter because we consume the data 281 * from our circular buffer. 282 */ 283 for (i = irqts->count & IRQ_TIMINGS_MASK, 284 irqts->count = min(IRQ_TIMINGS_SIZE, irqts->count); 285 irqts->count > 0; irqts->count--, i = (i + 1) & IRQ_TIMINGS_MASK) { 286 287 irq = irq_timing_decode(irqts->values[i], &ts); 288 289 s = idr_find(&irqt_stats, irq); 290 if (s) { 291 irqs = this_cpu_ptr(s); 292 irqs_update(irqs, ts); 293 } 294 } 295 296 /* 297 * Look in the list of interrupts' statistics, the earliest 298 * next event. 299 */ 300 idr_for_each_entry(&irqt_stats, s, i) { 301 302 irqs = this_cpu_ptr(s); 303 304 if (!irqs->valid) 305 continue; 306 307 if (irqs->next_evt <= now) { 308 irq = i; 309 next_evt = now; 310 311 /* 312 * This interrupt mustn't use in the future 313 * until new events occur and update the 314 * statistics. 315 */ 316 irqs->valid = 0; 317 break; 318 } 319 320 if (irqs->next_evt < next_evt) { 321 irq = i; 322 next_evt = irqs->next_evt; 323 } 324 } 325 326 return next_evt; 327 } 328 329 void irq_timings_free(int irq) 330 { 331 struct irqt_stat __percpu *s; 332 333 s = idr_find(&irqt_stats, irq); 334 if (s) { 335 free_percpu(s); 336 idr_remove(&irqt_stats, irq); 337 } 338 } 339 340 int irq_timings_alloc(int irq) 341 { 342 struct irqt_stat __percpu *s; 343 int id; 344 345 /* 346 * Some platforms can have the same private interrupt per cpu, 347 * so this function may be be called several times with the 348 * same interrupt number. Just bail out in case the per cpu 349 * stat structure is already allocated. 350 */ 351 s = idr_find(&irqt_stats, irq); 352 if (s) 353 return 0; 354 355 s = alloc_percpu(*s); 356 if (!s) 357 return -ENOMEM; 358 359 idr_preload(GFP_KERNEL); 360 id = idr_alloc(&irqt_stats, s, irq, irq + 1, GFP_NOWAIT); 361 idr_preload_end(); 362 363 if (id < 0) { 364 free_percpu(s); 365 return id; 366 } 367 368 return 0; 369 } 370