152a65ff5SThomas Gleixner // SPDX-License-Identifier: GPL-2.0 2f3f59fbcSThomas Gleixner // Copyright (C) 2016, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> 3f3f59fbcSThomas Gleixner 4e1c92149SDaniel Lezcano #include <linux/kernel.h> 5b2d3d61aSDaniel Lezcano #include <linux/percpu.h> 6e1c92149SDaniel Lezcano #include <linux/slab.h> 7b2d3d61aSDaniel Lezcano #include <linux/static_key.h> 8b2d3d61aSDaniel Lezcano #include <linux/interrupt.h> 9e1c92149SDaniel Lezcano #include <linux/idr.h> 10b2d3d61aSDaniel Lezcano #include <linux/irq.h> 11bbba0e7cSDaniel Lezcano #include <linux/math64.h> 12bbba0e7cSDaniel Lezcano #include <linux/log2.h> 13e1c92149SDaniel Lezcano 14e1c92149SDaniel Lezcano #include <trace/events/irq.h> 15b2d3d61aSDaniel Lezcano 16b2d3d61aSDaniel Lezcano #include "internals.h" 17b2d3d61aSDaniel Lezcano 18b2d3d61aSDaniel Lezcano DEFINE_STATIC_KEY_FALSE(irq_timing_enabled); 19b2d3d61aSDaniel Lezcano 20b2d3d61aSDaniel Lezcano DEFINE_PER_CPU(struct irq_timings, irq_timings); 21b2d3d61aSDaniel Lezcano 22e1c92149SDaniel Lezcano static DEFINE_IDR(irqt_stats); 23e1c92149SDaniel Lezcano 24b2d3d61aSDaniel Lezcano void irq_timings_enable(void) 25b2d3d61aSDaniel Lezcano { 26b2d3d61aSDaniel Lezcano static_branch_enable(&irq_timing_enabled); 27b2d3d61aSDaniel Lezcano } 28b2d3d61aSDaniel Lezcano 29b2d3d61aSDaniel Lezcano void irq_timings_disable(void) 30b2d3d61aSDaniel Lezcano { 31b2d3d61aSDaniel Lezcano static_branch_disable(&irq_timing_enabled); 32b2d3d61aSDaniel Lezcano } 33e1c92149SDaniel Lezcano 34bbba0e7cSDaniel Lezcano /* 35bbba0e7cSDaniel Lezcano * The main goal of this algorithm is to predict the next interrupt 36bbba0e7cSDaniel Lezcano * occurrence on the current CPU. 37bbba0e7cSDaniel Lezcano * 38bbba0e7cSDaniel Lezcano * Currently, the interrupt timings are stored in a circular array 39bbba0e7cSDaniel Lezcano * buffer every time there is an interrupt, as a tuple: the interrupt 40bbba0e7cSDaniel Lezcano * number and the associated timestamp when the event occurred <irq, 41bbba0e7cSDaniel Lezcano * timestamp>. 42bbba0e7cSDaniel Lezcano * 43bbba0e7cSDaniel Lezcano * For every interrupt occurring in a short period of time, we can 44bbba0e7cSDaniel Lezcano * measure the elapsed time between the occurrences for the same 45bbba0e7cSDaniel Lezcano * interrupt and we end up with a suite of intervals. The experience 46bbba0e7cSDaniel Lezcano * showed the interrupts are often coming following a periodic 47bbba0e7cSDaniel Lezcano * pattern. 48bbba0e7cSDaniel Lezcano * 49bbba0e7cSDaniel Lezcano * The objective of the algorithm is to find out this periodic pattern 50bbba0e7cSDaniel Lezcano * in a fastest way and use its period to predict the next irq event. 51bbba0e7cSDaniel Lezcano * 52bbba0e7cSDaniel Lezcano * When the next interrupt event is requested, we are in the situation 53bbba0e7cSDaniel Lezcano * where the interrupts are disabled and the circular buffer 54bbba0e7cSDaniel Lezcano * containing the timings is filled with the events which happened 55bbba0e7cSDaniel Lezcano * after the previous next-interrupt-event request. 56bbba0e7cSDaniel Lezcano * 57bbba0e7cSDaniel Lezcano * At this point, we read the circular buffer and we fill the irq 58bbba0e7cSDaniel Lezcano * related statistics structure. After this step, the circular array 59bbba0e7cSDaniel Lezcano * containing the timings is empty because all the values are 60bbba0e7cSDaniel Lezcano * dispatched in their corresponding buffers. 61bbba0e7cSDaniel Lezcano * 62bbba0e7cSDaniel Lezcano * Now for each interrupt, we can predict the next event by using the 63bbba0e7cSDaniel Lezcano * suffix array, log interval and exponential moving average 64bbba0e7cSDaniel Lezcano * 65bbba0e7cSDaniel Lezcano * 1. Suffix array 66bbba0e7cSDaniel Lezcano * 67bbba0e7cSDaniel Lezcano * Suffix array is an array of all the suffixes of a string. It is 68bbba0e7cSDaniel Lezcano * widely used as a data structure for compression, text search, ... 69bbba0e7cSDaniel Lezcano * For instance for the word 'banana', the suffixes will be: 'banana' 70bbba0e7cSDaniel Lezcano * 'anana' 'nana' 'ana' 'na' 'a' 71bbba0e7cSDaniel Lezcano * 72bbba0e7cSDaniel Lezcano * Usually, the suffix array is sorted but for our purpose it is 73bbba0e7cSDaniel Lezcano * not necessary and won't provide any improvement in the context of 74bbba0e7cSDaniel Lezcano * the solved problem where we clearly define the boundaries of the 75bbba0e7cSDaniel Lezcano * search by a max period and min period. 76bbba0e7cSDaniel Lezcano * 77bbba0e7cSDaniel Lezcano * The suffix array will build a suite of intervals of different 78bbba0e7cSDaniel Lezcano * length and will look for the repetition of each suite. If the suite 79bbba0e7cSDaniel Lezcano * is repeating then we have the period because it is the length of 80bbba0e7cSDaniel Lezcano * the suite whatever its position in the buffer. 81bbba0e7cSDaniel Lezcano * 82bbba0e7cSDaniel Lezcano * 2. Log interval 83bbba0e7cSDaniel Lezcano * 84bbba0e7cSDaniel Lezcano * We saw the irq timings allow to compute the interval of the 85bbba0e7cSDaniel Lezcano * occurrences for a specific interrupt. We can reasonibly assume the 86bbba0e7cSDaniel Lezcano * longer is the interval, the higher is the error for the next event 87bbba0e7cSDaniel Lezcano * and we can consider storing those interval values into an array 88bbba0e7cSDaniel Lezcano * where each slot in the array correspond to an interval at the power 89bbba0e7cSDaniel Lezcano * of 2 of the index. For example, index 12 will contain values 90bbba0e7cSDaniel Lezcano * between 2^11 and 2^12. 91bbba0e7cSDaniel Lezcano * 92bbba0e7cSDaniel Lezcano * At the end we have an array of values where at each index defines a 93bbba0e7cSDaniel Lezcano * [2^index - 1, 2 ^ index] interval values allowing to store a large 94bbba0e7cSDaniel Lezcano * number of values inside a small array. 95bbba0e7cSDaniel Lezcano * 96bbba0e7cSDaniel Lezcano * For example, if we have the value 1123, then we store it at 97bbba0e7cSDaniel Lezcano * ilog2(1123) = 10 index value. 98bbba0e7cSDaniel Lezcano * 99bbba0e7cSDaniel Lezcano * Storing those value at the specific index is done by computing an 100bbba0e7cSDaniel Lezcano * exponential moving average for this specific slot. For instance, 101bbba0e7cSDaniel Lezcano * for values 1800, 1123, 1453, ... fall under the same slot (10) and 102bbba0e7cSDaniel Lezcano * the exponential moving average is computed every time a new value 103bbba0e7cSDaniel Lezcano * is stored at this slot. 104bbba0e7cSDaniel Lezcano * 105bbba0e7cSDaniel Lezcano * 3. Exponential Moving Average 106bbba0e7cSDaniel Lezcano * 107bbba0e7cSDaniel Lezcano * The EMA is largely used to track a signal for stocks or as a low 108bbba0e7cSDaniel Lezcano * pass filter. The magic of the formula, is it is very simple and the 109bbba0e7cSDaniel Lezcano * reactivity of the average can be tuned with the factors called 110bbba0e7cSDaniel Lezcano * alpha. 111bbba0e7cSDaniel Lezcano * 112bbba0e7cSDaniel Lezcano * The higher the alphas are, the faster the average respond to the 113bbba0e7cSDaniel Lezcano * signal change. In our case, if a slot in the array is a big 114bbba0e7cSDaniel Lezcano * interval, we can have numbers with a big difference between 115bbba0e7cSDaniel Lezcano * them. The impact of those differences in the average computation 116bbba0e7cSDaniel Lezcano * can be tuned by changing the alpha value. 117bbba0e7cSDaniel Lezcano * 118bbba0e7cSDaniel Lezcano * 119bbba0e7cSDaniel Lezcano * -- The algorithm -- 120bbba0e7cSDaniel Lezcano * 121bbba0e7cSDaniel Lezcano * We saw the different processing above, now let's see how they are 122bbba0e7cSDaniel Lezcano * used together. 123bbba0e7cSDaniel Lezcano * 124bbba0e7cSDaniel Lezcano * For each interrupt: 125bbba0e7cSDaniel Lezcano * For each interval: 126bbba0e7cSDaniel Lezcano * Compute the index = ilog2(interval) 127bbba0e7cSDaniel Lezcano * Compute a new_ema(buffer[index], interval) 128bbba0e7cSDaniel Lezcano * Store the index in a circular buffer 129bbba0e7cSDaniel Lezcano * 130bbba0e7cSDaniel Lezcano * Compute the suffix array of the indexes 131bbba0e7cSDaniel Lezcano * 132bbba0e7cSDaniel Lezcano * For each suffix: 133bbba0e7cSDaniel Lezcano * If the suffix is reverse-found 3 times 134bbba0e7cSDaniel Lezcano * Return suffix 135bbba0e7cSDaniel Lezcano * 136bbba0e7cSDaniel Lezcano * Return Not found 137bbba0e7cSDaniel Lezcano * 138bbba0e7cSDaniel Lezcano * However we can not have endless suffix array to be build, it won't 139bbba0e7cSDaniel Lezcano * make sense and it will add an extra overhead, so we can restrict 140bbba0e7cSDaniel Lezcano * this to a maximum suffix length of 5 and a minimum suffix length of 141bbba0e7cSDaniel Lezcano * 2. The experience showed 5 is the majority of the maximum pattern 142bbba0e7cSDaniel Lezcano * period found for different devices. 143bbba0e7cSDaniel Lezcano * 144bbba0e7cSDaniel Lezcano * The result is a pattern finding less than 1us for an interrupt. 145bbba0e7cSDaniel Lezcano * 146bbba0e7cSDaniel Lezcano * Example based on real values: 147bbba0e7cSDaniel Lezcano * 148bbba0e7cSDaniel Lezcano * Example 1 : MMC write/read interrupt interval: 149bbba0e7cSDaniel Lezcano * 150bbba0e7cSDaniel Lezcano * 223947, 1240, 1384, 1386, 1386, 151bbba0e7cSDaniel Lezcano * 217416, 1236, 1384, 1386, 1387, 152bbba0e7cSDaniel Lezcano * 214719, 1241, 1386, 1387, 1384, 153bbba0e7cSDaniel Lezcano * 213696, 1234, 1384, 1386, 1388, 154bbba0e7cSDaniel Lezcano * 219904, 1240, 1385, 1389, 1385, 155bbba0e7cSDaniel Lezcano * 212240, 1240, 1386, 1386, 1386, 156bbba0e7cSDaniel Lezcano * 214415, 1236, 1384, 1386, 1387, 157bbba0e7cSDaniel Lezcano * 214276, 1234, 1384, 1388, ? 158bbba0e7cSDaniel Lezcano * 159bbba0e7cSDaniel Lezcano * For each element, apply ilog2(value) 160bbba0e7cSDaniel Lezcano * 161bbba0e7cSDaniel Lezcano * 15, 8, 8, 8, 8, 162bbba0e7cSDaniel Lezcano * 15, 8, 8, 8, 8, 163bbba0e7cSDaniel Lezcano * 15, 8, 8, 8, 8, 164bbba0e7cSDaniel Lezcano * 15, 8, 8, 8, 8, 165bbba0e7cSDaniel Lezcano * 15, 8, 8, 8, 8, 166bbba0e7cSDaniel Lezcano * 15, 8, 8, 8, 8, 167bbba0e7cSDaniel Lezcano * 15, 8, 8, 8, 8, 168bbba0e7cSDaniel Lezcano * 15, 8, 8, 8, ? 169bbba0e7cSDaniel Lezcano * 170bbba0e7cSDaniel Lezcano * Max period of 5, we take the last (max_period * 3) 15 elements as 171bbba0e7cSDaniel Lezcano * we can be confident if the pattern repeats itself three times it is 172bbba0e7cSDaniel Lezcano * a repeating pattern. 173bbba0e7cSDaniel Lezcano * 174bbba0e7cSDaniel Lezcano * 8, 175bbba0e7cSDaniel Lezcano * 15, 8, 8, 8, 8, 176bbba0e7cSDaniel Lezcano * 15, 8, 8, 8, 8, 177bbba0e7cSDaniel Lezcano * 15, 8, 8, 8, ? 178bbba0e7cSDaniel Lezcano * 179bbba0e7cSDaniel Lezcano * Suffixes are: 180bbba0e7cSDaniel Lezcano * 181bbba0e7cSDaniel Lezcano * 1) 8, 15, 8, 8, 8 <- max period 182bbba0e7cSDaniel Lezcano * 2) 8, 15, 8, 8 183bbba0e7cSDaniel Lezcano * 3) 8, 15, 8 184bbba0e7cSDaniel Lezcano * 4) 8, 15 <- min period 185bbba0e7cSDaniel Lezcano * 186bbba0e7cSDaniel Lezcano * From there we search the repeating pattern for each suffix. 187bbba0e7cSDaniel Lezcano * 188bbba0e7cSDaniel Lezcano * buffer: 8, 15, 8, 8, 8, 8, 15, 8, 8, 8, 8, 15, 8, 8, 8 189bbba0e7cSDaniel Lezcano * | | | | | | | | | | | | | | | 190bbba0e7cSDaniel Lezcano * 8, 15, 8, 8, 8 | | | | | | | | | | 191bbba0e7cSDaniel Lezcano * 8, 15, 8, 8, 8 | | | | | 192bbba0e7cSDaniel Lezcano * 8, 15, 8, 8, 8 193bbba0e7cSDaniel Lezcano * 194bbba0e7cSDaniel Lezcano * When moving the suffix, we found exactly 3 matches. 195bbba0e7cSDaniel Lezcano * 196bbba0e7cSDaniel Lezcano * The first suffix with period 5 is repeating. 197bbba0e7cSDaniel Lezcano * 198bbba0e7cSDaniel Lezcano * The next event is (3 * max_period) % suffix_period 199bbba0e7cSDaniel Lezcano * 200bbba0e7cSDaniel Lezcano * In this example, the result 0, so the next event is suffix[0] => 8 201bbba0e7cSDaniel Lezcano * 202bbba0e7cSDaniel Lezcano * However, 8 is the index in the array of exponential moving average 203bbba0e7cSDaniel Lezcano * which was calculated on the fly when storing the values, so the 204bbba0e7cSDaniel Lezcano * interval is ema[8] = 1366 205bbba0e7cSDaniel Lezcano * 206bbba0e7cSDaniel Lezcano * 207bbba0e7cSDaniel Lezcano * Example 2: 208bbba0e7cSDaniel Lezcano * 209bbba0e7cSDaniel Lezcano * 4, 3, 5, 100, 210bbba0e7cSDaniel Lezcano * 3, 3, 5, 117, 211bbba0e7cSDaniel Lezcano * 4, 4, 5, 112, 212bbba0e7cSDaniel Lezcano * 4, 3, 4, 110, 213bbba0e7cSDaniel Lezcano * 3, 5, 3, 117, 214bbba0e7cSDaniel Lezcano * 4, 4, 5, 112, 215bbba0e7cSDaniel Lezcano * 4, 3, 4, 110, 216bbba0e7cSDaniel Lezcano * 3, 4, 5, 112, 217bbba0e7cSDaniel Lezcano * 4, 3, 4, 110 218bbba0e7cSDaniel Lezcano * 219bbba0e7cSDaniel Lezcano * ilog2 220bbba0e7cSDaniel Lezcano * 221bbba0e7cSDaniel Lezcano * 0, 0, 0, 4, 222bbba0e7cSDaniel Lezcano * 0, 0, 0, 4, 223bbba0e7cSDaniel Lezcano * 0, 0, 0, 4, 224bbba0e7cSDaniel Lezcano * 0, 0, 0, 4, 225bbba0e7cSDaniel Lezcano * 0, 0, 0, 4, 226bbba0e7cSDaniel Lezcano * 0, 0, 0, 4, 227bbba0e7cSDaniel Lezcano * 0, 0, 0, 4, 228bbba0e7cSDaniel Lezcano * 0, 0, 0, 4, 229bbba0e7cSDaniel Lezcano * 0, 0, 0, 4 230bbba0e7cSDaniel Lezcano * 231bbba0e7cSDaniel Lezcano * Max period 5: 232bbba0e7cSDaniel Lezcano * 0, 0, 4, 233bbba0e7cSDaniel Lezcano * 0, 0, 0, 4, 234bbba0e7cSDaniel Lezcano * 0, 0, 0, 4, 235bbba0e7cSDaniel Lezcano * 0, 0, 0, 4 236bbba0e7cSDaniel Lezcano * 237bbba0e7cSDaniel Lezcano * Suffixes: 238bbba0e7cSDaniel Lezcano * 239bbba0e7cSDaniel Lezcano * 1) 0, 0, 4, 0, 0 240bbba0e7cSDaniel Lezcano * 2) 0, 0, 4, 0 241bbba0e7cSDaniel Lezcano * 3) 0, 0, 4 242bbba0e7cSDaniel Lezcano * 4) 0, 0 243bbba0e7cSDaniel Lezcano * 244bbba0e7cSDaniel Lezcano * buffer: 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4 245bbba0e7cSDaniel Lezcano * | | | | | | X 246bbba0e7cSDaniel Lezcano * 0, 0, 4, 0, 0, | X 247bbba0e7cSDaniel Lezcano * 0, 0 248bbba0e7cSDaniel Lezcano * 249bbba0e7cSDaniel Lezcano * buffer: 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4 250bbba0e7cSDaniel Lezcano * | | | | | | | | | | | | | | | 251bbba0e7cSDaniel Lezcano * 0, 0, 4, 0, | | | | | | | | | | | 252bbba0e7cSDaniel Lezcano * 0, 0, 4, 0, | | | | | | | 253bbba0e7cSDaniel Lezcano * 0, 0, 4, 0, | | | 254bbba0e7cSDaniel Lezcano * 0 0 4 255bbba0e7cSDaniel Lezcano * 256bbba0e7cSDaniel Lezcano * Pattern is found 3 times, the remaining is 1 which results from 257bbba0e7cSDaniel Lezcano * (max_period * 3) % suffix_period. This value is the index in the 258bbba0e7cSDaniel Lezcano * suffix arrays. The suffix array for a period 4 has the value 4 259bbba0e7cSDaniel Lezcano * at index 1. 260bbba0e7cSDaniel Lezcano */ 261bbba0e7cSDaniel Lezcano #define EMA_ALPHA_VAL 64 262bbba0e7cSDaniel Lezcano #define EMA_ALPHA_SHIFT 7 263bbba0e7cSDaniel Lezcano 2643c2e79f4SDaniel Lezcano #define PREDICTION_PERIOD_MIN 3 265bbba0e7cSDaniel Lezcano #define PREDICTION_PERIOD_MAX 5 266bbba0e7cSDaniel Lezcano #define PREDICTION_FACTOR 4 267bbba0e7cSDaniel Lezcano #define PREDICTION_MAX 10 /* 2 ^ PREDICTION_MAX useconds */ 268bbba0e7cSDaniel Lezcano #define PREDICTION_BUFFER_SIZE 16 /* slots for EMAs, hardly more than 16 */ 269bbba0e7cSDaniel Lezcano 2702840eef0SDaniel Lezcano /* 2712840eef0SDaniel Lezcano * Number of elements in the circular buffer: If it happens it was 2722840eef0SDaniel Lezcano * flushed before, then the number of elements could be smaller than 2732840eef0SDaniel Lezcano * IRQ_TIMINGS_SIZE, so the count is used, otherwise the array size is 2742840eef0SDaniel Lezcano * used as we wrapped. The index begins from zero when we did not 2752840eef0SDaniel Lezcano * wrap. That could be done in a nicer way with the proper circular 2762840eef0SDaniel Lezcano * array structure type but with the cost of extra computation in the 2772840eef0SDaniel Lezcano * interrupt handler hot path. We choose efficiency. 2782840eef0SDaniel Lezcano */ 2792840eef0SDaniel Lezcano #define for_each_irqts(i, irqts) \ 2802840eef0SDaniel Lezcano for (i = irqts->count < IRQ_TIMINGS_SIZE ? \ 2812840eef0SDaniel Lezcano 0 : irqts->count & IRQ_TIMINGS_MASK, \ 2822840eef0SDaniel Lezcano irqts->count = min(IRQ_TIMINGS_SIZE, \ 2832840eef0SDaniel Lezcano irqts->count); \ 2842840eef0SDaniel Lezcano irqts->count > 0; irqts->count--, \ 2852840eef0SDaniel Lezcano i = (i + 1) & IRQ_TIMINGS_MASK) 2862840eef0SDaniel Lezcano 287bbba0e7cSDaniel Lezcano struct irqt_stat { 288bbba0e7cSDaniel Lezcano u64 last_ts; 289bbba0e7cSDaniel Lezcano u64 ema_time[PREDICTION_BUFFER_SIZE]; 290bbba0e7cSDaniel Lezcano int timings[IRQ_TIMINGS_SIZE]; 291bbba0e7cSDaniel Lezcano int circ_timings[IRQ_TIMINGS_SIZE]; 292bbba0e7cSDaniel Lezcano int count; 293bbba0e7cSDaniel Lezcano }; 294bbba0e7cSDaniel Lezcano 295bbba0e7cSDaniel Lezcano /* 296bbba0e7cSDaniel Lezcano * Exponential moving average computation 297bbba0e7cSDaniel Lezcano */ 298bbba0e7cSDaniel Lezcano static u64 irq_timings_ema_new(u64 value, u64 ema_old) 299bbba0e7cSDaniel Lezcano { 300bbba0e7cSDaniel Lezcano s64 diff; 301bbba0e7cSDaniel Lezcano 302bbba0e7cSDaniel Lezcano if (unlikely(!ema_old)) 303bbba0e7cSDaniel Lezcano return value; 304bbba0e7cSDaniel Lezcano 305bbba0e7cSDaniel Lezcano diff = (value - ema_old) * EMA_ALPHA_VAL; 306bbba0e7cSDaniel Lezcano /* 307bbba0e7cSDaniel Lezcano * We can use a s64 type variable to be added with the u64 308bbba0e7cSDaniel Lezcano * ema_old variable as this one will never have its topmost 309bbba0e7cSDaniel Lezcano * bit set, it will be always smaller than 2^63 nanosec 310bbba0e7cSDaniel Lezcano * interrupt interval (292 years). 311bbba0e7cSDaniel Lezcano */ 312bbba0e7cSDaniel Lezcano return ema_old + (diff >> EMA_ALPHA_SHIFT); 313bbba0e7cSDaniel Lezcano } 314bbba0e7cSDaniel Lezcano 315bbba0e7cSDaniel Lezcano static int irq_timings_next_event_index(int *buffer, size_t len, int period_max) 316bbba0e7cSDaniel Lezcano { 317619c1baaSDaniel Lezcano int period; 318619c1baaSDaniel Lezcano 319619c1baaSDaniel Lezcano /* 320619c1baaSDaniel Lezcano * Move the beginning pointer to the end minus the max period x 3. 321619c1baaSDaniel Lezcano * We are at the point we can begin searching the pattern 322619c1baaSDaniel Lezcano */ 323619c1baaSDaniel Lezcano buffer = &buffer[len - (period_max * 3)]; 324619c1baaSDaniel Lezcano 325619c1baaSDaniel Lezcano /* Adjust the length to the maximum allowed period x 3 */ 326619c1baaSDaniel Lezcano len = period_max * 3; 327bbba0e7cSDaniel Lezcano 328bbba0e7cSDaniel Lezcano /* 329bbba0e7cSDaniel Lezcano * The buffer contains the suite of intervals, in a ilog2 330bbba0e7cSDaniel Lezcano * basis, we are looking for a repetition. We point the 331bbba0e7cSDaniel Lezcano * beginning of the search three times the length of the 332bbba0e7cSDaniel Lezcano * period beginning at the end of the buffer. We do that for 333bbba0e7cSDaniel Lezcano * each suffix. 334bbba0e7cSDaniel Lezcano */ 335619c1baaSDaniel Lezcano for (period = period_max; period >= PREDICTION_PERIOD_MIN; period--) { 336bbba0e7cSDaniel Lezcano 337619c1baaSDaniel Lezcano /* 338619c1baaSDaniel Lezcano * The first comparison always succeed because the 339619c1baaSDaniel Lezcano * suffix is deduced from the first n-period bytes of 340619c1baaSDaniel Lezcano * the buffer and we compare the initial suffix with 341619c1baaSDaniel Lezcano * itself, so we can skip the first iteration. 342619c1baaSDaniel Lezcano */ 343619c1baaSDaniel Lezcano int idx = period; 344619c1baaSDaniel Lezcano size_t size = period; 345bbba0e7cSDaniel Lezcano 346bbba0e7cSDaniel Lezcano /* 347bbba0e7cSDaniel Lezcano * We look if the suite with period 'i' repeat 348bbba0e7cSDaniel Lezcano * itself. If it is truncated at the end, as it 349bbba0e7cSDaniel Lezcano * repeats we can use the period to find out the next 350619c1baaSDaniel Lezcano * element with the modulo. 351bbba0e7cSDaniel Lezcano */ 352619c1baaSDaniel Lezcano while (!memcmp(buffer, &buffer[idx], size * sizeof(int))) { 353619c1baaSDaniel Lezcano 354619c1baaSDaniel Lezcano /* 355619c1baaSDaniel Lezcano * Move the index in a period basis 356619c1baaSDaniel Lezcano */ 357619c1baaSDaniel Lezcano idx += size; 358619c1baaSDaniel Lezcano 359619c1baaSDaniel Lezcano /* 360619c1baaSDaniel Lezcano * If this condition is reached, all previous 361619c1baaSDaniel Lezcano * memcmp were successful, so the period is 362619c1baaSDaniel Lezcano * found. 363619c1baaSDaniel Lezcano */ 364619c1baaSDaniel Lezcano if (idx == len) 365619c1baaSDaniel Lezcano return buffer[len % period]; 366619c1baaSDaniel Lezcano 367619c1baaSDaniel Lezcano /* 368619c1baaSDaniel Lezcano * If the remaining elements to compare are 369619c1baaSDaniel Lezcano * smaller than the period, readjust the size 370619c1baaSDaniel Lezcano * of the comparison for the last iteration. 371619c1baaSDaniel Lezcano */ 372619c1baaSDaniel Lezcano if (len - idx < period) 373619c1baaSDaniel Lezcano size = len - idx; 374bbba0e7cSDaniel Lezcano } 375bbba0e7cSDaniel Lezcano } 376bbba0e7cSDaniel Lezcano 377bbba0e7cSDaniel Lezcano return -1; 378bbba0e7cSDaniel Lezcano } 379bbba0e7cSDaniel Lezcano 380bbba0e7cSDaniel Lezcano static u64 __irq_timings_next_event(struct irqt_stat *irqs, int irq, u64 now) 381bbba0e7cSDaniel Lezcano { 382bbba0e7cSDaniel Lezcano int index, i, period_max, count, start, min = INT_MAX; 383bbba0e7cSDaniel Lezcano 384bbba0e7cSDaniel Lezcano if ((now - irqs->last_ts) >= NSEC_PER_SEC) { 385bbba0e7cSDaniel Lezcano irqs->count = irqs->last_ts = 0; 386bbba0e7cSDaniel Lezcano return U64_MAX; 387bbba0e7cSDaniel Lezcano } 388bbba0e7cSDaniel Lezcano 389bbba0e7cSDaniel Lezcano /* 390bbba0e7cSDaniel Lezcano * As we want to find three times the repetition, we need a 391bbba0e7cSDaniel Lezcano * number of intervals greater or equal to three times the 392bbba0e7cSDaniel Lezcano * maximum period, otherwise we truncate the max period. 393bbba0e7cSDaniel Lezcano */ 394bbba0e7cSDaniel Lezcano period_max = irqs->count > (3 * PREDICTION_PERIOD_MAX) ? 395bbba0e7cSDaniel Lezcano PREDICTION_PERIOD_MAX : irqs->count / 3; 396bbba0e7cSDaniel Lezcano 397bbba0e7cSDaniel Lezcano /* 398bbba0e7cSDaniel Lezcano * If we don't have enough irq timings for this prediction, 399bbba0e7cSDaniel Lezcano * just bail out. 400bbba0e7cSDaniel Lezcano */ 401bbba0e7cSDaniel Lezcano if (period_max <= PREDICTION_PERIOD_MIN) 402bbba0e7cSDaniel Lezcano return U64_MAX; 403bbba0e7cSDaniel Lezcano 404bbba0e7cSDaniel Lezcano /* 405bbba0e7cSDaniel Lezcano * 'count' will depends if the circular buffer wrapped or not 406bbba0e7cSDaniel Lezcano */ 407bbba0e7cSDaniel Lezcano count = irqs->count < IRQ_TIMINGS_SIZE ? 408bbba0e7cSDaniel Lezcano irqs->count : IRQ_TIMINGS_SIZE; 409bbba0e7cSDaniel Lezcano 410bbba0e7cSDaniel Lezcano start = irqs->count < IRQ_TIMINGS_SIZE ? 411bbba0e7cSDaniel Lezcano 0 : (irqs->count & IRQ_TIMINGS_MASK); 412bbba0e7cSDaniel Lezcano 413bbba0e7cSDaniel Lezcano /* 414bbba0e7cSDaniel Lezcano * Copy the content of the circular buffer into another buffer 415bbba0e7cSDaniel Lezcano * in order to linearize the buffer instead of dealing with 416bbba0e7cSDaniel Lezcano * wrapping indexes and shifted array which will be prone to 417bbba0e7cSDaniel Lezcano * error and extremelly difficult to debug. 418bbba0e7cSDaniel Lezcano */ 419bbba0e7cSDaniel Lezcano for (i = 0; i < count; i++) { 420bbba0e7cSDaniel Lezcano int index = (start + i) & IRQ_TIMINGS_MASK; 421bbba0e7cSDaniel Lezcano 422bbba0e7cSDaniel Lezcano irqs->timings[i] = irqs->circ_timings[index]; 423bbba0e7cSDaniel Lezcano min = min_t(int, irqs->timings[i], min); 424bbba0e7cSDaniel Lezcano } 425bbba0e7cSDaniel Lezcano 426bbba0e7cSDaniel Lezcano index = irq_timings_next_event_index(irqs->timings, count, period_max); 427bbba0e7cSDaniel Lezcano if (index < 0) 428bbba0e7cSDaniel Lezcano return irqs->last_ts + irqs->ema_time[min]; 429bbba0e7cSDaniel Lezcano 430bbba0e7cSDaniel Lezcano return irqs->last_ts + irqs->ema_time[index]; 431bbba0e7cSDaniel Lezcano } 432bbba0e7cSDaniel Lezcano 433bbba0e7cSDaniel Lezcano static inline void irq_timings_store(int irq, struct irqt_stat *irqs, u64 ts) 434bbba0e7cSDaniel Lezcano { 435bbba0e7cSDaniel Lezcano u64 old_ts = irqs->last_ts; 436bbba0e7cSDaniel Lezcano u64 interval; 437bbba0e7cSDaniel Lezcano int index; 438bbba0e7cSDaniel Lezcano 439bbba0e7cSDaniel Lezcano /* 440bbba0e7cSDaniel Lezcano * The timestamps are absolute time values, we need to compute 441bbba0e7cSDaniel Lezcano * the timing interval between two interrupts. 442bbba0e7cSDaniel Lezcano */ 443bbba0e7cSDaniel Lezcano irqs->last_ts = ts; 444bbba0e7cSDaniel Lezcano 445bbba0e7cSDaniel Lezcano /* 446bbba0e7cSDaniel Lezcano * The interval type is u64 in order to deal with the same 447bbba0e7cSDaniel Lezcano * type in our computation, that prevent mindfuck issues with 448bbba0e7cSDaniel Lezcano * overflow, sign and division. 449bbba0e7cSDaniel Lezcano */ 450bbba0e7cSDaniel Lezcano interval = ts - old_ts; 451bbba0e7cSDaniel Lezcano 452bbba0e7cSDaniel Lezcano /* 453bbba0e7cSDaniel Lezcano * The interrupt triggered more than one second apart, that 454bbba0e7cSDaniel Lezcano * ends the sequence as predictible for our purpose. In this 455bbba0e7cSDaniel Lezcano * case, assume we have the beginning of a sequence and the 456bbba0e7cSDaniel Lezcano * timestamp is the first value. As it is impossible to 457bbba0e7cSDaniel Lezcano * predict anything at this point, return. 458bbba0e7cSDaniel Lezcano * 459bbba0e7cSDaniel Lezcano * Note the first timestamp of the sequence will always fall 460bbba0e7cSDaniel Lezcano * in this test because the old_ts is zero. That is what we 461bbba0e7cSDaniel Lezcano * want as we need another timestamp to compute an interval. 462bbba0e7cSDaniel Lezcano */ 463bbba0e7cSDaniel Lezcano if (interval >= NSEC_PER_SEC) { 464bbba0e7cSDaniel Lezcano irqs->count = 0; 465bbba0e7cSDaniel Lezcano return; 466bbba0e7cSDaniel Lezcano } 467bbba0e7cSDaniel Lezcano 468bbba0e7cSDaniel Lezcano /* 469bbba0e7cSDaniel Lezcano * Get the index in the ema table for this interrupt. The 470bbba0e7cSDaniel Lezcano * PREDICTION_FACTOR increase the interval size for the array 471bbba0e7cSDaniel Lezcano * of exponential average. 472bbba0e7cSDaniel Lezcano */ 473bbba0e7cSDaniel Lezcano index = likely(interval) ? 474bbba0e7cSDaniel Lezcano ilog2((interval >> 10) / PREDICTION_FACTOR) : 0; 475bbba0e7cSDaniel Lezcano 476bbba0e7cSDaniel Lezcano /* 477bbba0e7cSDaniel Lezcano * Store the index as an element of the pattern in another 478bbba0e7cSDaniel Lezcano * circular array. 479bbba0e7cSDaniel Lezcano */ 480bbba0e7cSDaniel Lezcano irqs->circ_timings[irqs->count & IRQ_TIMINGS_MASK] = index; 481bbba0e7cSDaniel Lezcano 482bbba0e7cSDaniel Lezcano irqs->ema_time[index] = irq_timings_ema_new(interval, 483bbba0e7cSDaniel Lezcano irqs->ema_time[index]); 484bbba0e7cSDaniel Lezcano 485bbba0e7cSDaniel Lezcano irqs->count++; 486bbba0e7cSDaniel Lezcano } 487bbba0e7cSDaniel Lezcano 488e1c92149SDaniel Lezcano /** 489e1c92149SDaniel Lezcano * irq_timings_next_event - Return when the next event is supposed to arrive 490e1c92149SDaniel Lezcano * 491e1c92149SDaniel Lezcano * During the last busy cycle, the number of interrupts is incremented 492e1c92149SDaniel Lezcano * and stored in the irq_timings structure. This information is 493e1c92149SDaniel Lezcano * necessary to: 494e1c92149SDaniel Lezcano * 495e1c92149SDaniel Lezcano * - know if the index in the table wrapped up: 496e1c92149SDaniel Lezcano * 497e1c92149SDaniel Lezcano * If more than the array size interrupts happened during the 498e1c92149SDaniel Lezcano * last busy/idle cycle, the index wrapped up and we have to 499e1c92149SDaniel Lezcano * begin with the next element in the array which is the last one 500e1c92149SDaniel Lezcano * in the sequence, otherwise it is a the index 0. 501e1c92149SDaniel Lezcano * 502e1c92149SDaniel Lezcano * - have an indication of the interrupts activity on this CPU 503e1c92149SDaniel Lezcano * (eg. irq/sec) 504e1c92149SDaniel Lezcano * 505e1c92149SDaniel Lezcano * The values are 'consumed' after inserting in the statistical model, 506e1c92149SDaniel Lezcano * thus the count is reinitialized. 507e1c92149SDaniel Lezcano * 508e1c92149SDaniel Lezcano * The array of values **must** be browsed in the time direction, the 509e1c92149SDaniel Lezcano * timestamp must increase between an element and the next one. 510e1c92149SDaniel Lezcano * 511e1c92149SDaniel Lezcano * Returns a nanosec time based estimation of the earliest interrupt, 512e1c92149SDaniel Lezcano * U64_MAX otherwise. 513e1c92149SDaniel Lezcano */ 514e1c92149SDaniel Lezcano u64 irq_timings_next_event(u64 now) 515e1c92149SDaniel Lezcano { 516bbba0e7cSDaniel Lezcano struct irq_timings *irqts = this_cpu_ptr(&irq_timings); 517bbba0e7cSDaniel Lezcano struct irqt_stat *irqs; 518bbba0e7cSDaniel Lezcano struct irqt_stat __percpu *s; 519bbba0e7cSDaniel Lezcano u64 ts, next_evt = U64_MAX; 520bbba0e7cSDaniel Lezcano int i, irq = 0; 521bbba0e7cSDaniel Lezcano 522e1c92149SDaniel Lezcano /* 523e1c92149SDaniel Lezcano * This function must be called with the local irq disabled in 524e1c92149SDaniel Lezcano * order to prevent the timings circular buffer to be updated 525e1c92149SDaniel Lezcano * while we are reading it. 526e1c92149SDaniel Lezcano */ 527a934d4d1SFrederic Weisbecker lockdep_assert_irqs_disabled(); 528e1c92149SDaniel Lezcano 529bbba0e7cSDaniel Lezcano if (!irqts->count) 530bbba0e7cSDaniel Lezcano return next_evt; 531bbba0e7cSDaniel Lezcano 532bbba0e7cSDaniel Lezcano /* 533bbba0e7cSDaniel Lezcano * Number of elements in the circular buffer: If it happens it 534bbba0e7cSDaniel Lezcano * was flushed before, then the number of elements could be 535bbba0e7cSDaniel Lezcano * smaller than IRQ_TIMINGS_SIZE, so the count is used, 536bbba0e7cSDaniel Lezcano * otherwise the array size is used as we wrapped. The index 537bbba0e7cSDaniel Lezcano * begins from zero when we did not wrap. That could be done 538bbba0e7cSDaniel Lezcano * in a nicer way with the proper circular array structure 539bbba0e7cSDaniel Lezcano * type but with the cost of extra computation in the 540bbba0e7cSDaniel Lezcano * interrupt handler hot path. We choose efficiency. 541bbba0e7cSDaniel Lezcano * 542bbba0e7cSDaniel Lezcano * Inject measured irq/timestamp to the pattern prediction 543bbba0e7cSDaniel Lezcano * model while decrementing the counter because we consume the 544bbba0e7cSDaniel Lezcano * data from our circular buffer. 545bbba0e7cSDaniel Lezcano */ 5462840eef0SDaniel Lezcano for_each_irqts(i, irqts) { 547bbba0e7cSDaniel Lezcano irq = irq_timing_decode(irqts->values[i], &ts); 548bbba0e7cSDaniel Lezcano s = idr_find(&irqt_stats, irq); 549bbba0e7cSDaniel Lezcano if (s) 550bbba0e7cSDaniel Lezcano irq_timings_store(irq, this_cpu_ptr(s), ts); 551bbba0e7cSDaniel Lezcano } 552bbba0e7cSDaniel Lezcano 553bbba0e7cSDaniel Lezcano /* 554bbba0e7cSDaniel Lezcano * Look in the list of interrupts' statistics, the earliest 555bbba0e7cSDaniel Lezcano * next event. 556bbba0e7cSDaniel Lezcano */ 557bbba0e7cSDaniel Lezcano idr_for_each_entry(&irqt_stats, s, i) { 558bbba0e7cSDaniel Lezcano 559bbba0e7cSDaniel Lezcano irqs = this_cpu_ptr(s); 560bbba0e7cSDaniel Lezcano 561bbba0e7cSDaniel Lezcano ts = __irq_timings_next_event(irqs, i, now); 562bbba0e7cSDaniel Lezcano if (ts <= now) 563bbba0e7cSDaniel Lezcano return now; 564bbba0e7cSDaniel Lezcano 565bbba0e7cSDaniel Lezcano if (ts < next_evt) 566bbba0e7cSDaniel Lezcano next_evt = ts; 567bbba0e7cSDaniel Lezcano } 568bbba0e7cSDaniel Lezcano 569bbba0e7cSDaniel Lezcano return next_evt; 570e1c92149SDaniel Lezcano } 571e1c92149SDaniel Lezcano 572e1c92149SDaniel Lezcano void irq_timings_free(int irq) 573e1c92149SDaniel Lezcano { 574e1c92149SDaniel Lezcano struct irqt_stat __percpu *s; 575e1c92149SDaniel Lezcano 576e1c92149SDaniel Lezcano s = idr_find(&irqt_stats, irq); 577e1c92149SDaniel Lezcano if (s) { 578e1c92149SDaniel Lezcano free_percpu(s); 579e1c92149SDaniel Lezcano idr_remove(&irqt_stats, irq); 580e1c92149SDaniel Lezcano } 581e1c92149SDaniel Lezcano } 582e1c92149SDaniel Lezcano 583e1c92149SDaniel Lezcano int irq_timings_alloc(int irq) 584e1c92149SDaniel Lezcano { 585e1c92149SDaniel Lezcano struct irqt_stat __percpu *s; 586e1c92149SDaniel Lezcano int id; 587e1c92149SDaniel Lezcano 588e1c92149SDaniel Lezcano /* 589e1c92149SDaniel Lezcano * Some platforms can have the same private interrupt per cpu, 590e1c92149SDaniel Lezcano * so this function may be be called several times with the 591e1c92149SDaniel Lezcano * same interrupt number. Just bail out in case the per cpu 592e1c92149SDaniel Lezcano * stat structure is already allocated. 593e1c92149SDaniel Lezcano */ 594e1c92149SDaniel Lezcano s = idr_find(&irqt_stats, irq); 595e1c92149SDaniel Lezcano if (s) 596e1c92149SDaniel Lezcano return 0; 597e1c92149SDaniel Lezcano 598e1c92149SDaniel Lezcano s = alloc_percpu(*s); 599e1c92149SDaniel Lezcano if (!s) 600e1c92149SDaniel Lezcano return -ENOMEM; 601e1c92149SDaniel Lezcano 602e1c92149SDaniel Lezcano idr_preload(GFP_KERNEL); 603e1c92149SDaniel Lezcano id = idr_alloc(&irqt_stats, s, irq, irq + 1, GFP_NOWAIT); 604e1c92149SDaniel Lezcano idr_preload_end(); 605e1c92149SDaniel Lezcano 606e1c92149SDaniel Lezcano if (id < 0) { 607e1c92149SDaniel Lezcano free_percpu(s); 608e1c92149SDaniel Lezcano return id; 609e1c92149SDaniel Lezcano } 610e1c92149SDaniel Lezcano 611e1c92149SDaniel Lezcano return 0; 612e1c92149SDaniel Lezcano } 613