xref: /openbmc/qemu/util/throttle.c (revision e76f201f)
1 /*
2  * QEMU throttling infrastructure
3  *
4  * Copyright (C) Nodalink, EURL. 2013-2014
5  * Copyright (C) Igalia, S.L. 2015
6  *
7  * Authors:
8  *   Benoît Canet <benoit.canet@nodalink.com>
9  *   Alberto Garcia <berto@igalia.com>
10  *
11  * This program is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU General Public License as
13  * published by the Free Software Foundation; either version 2 or
14  * (at your option) version 3 of the License.
15  *
16  * This program is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19  * GNU General Public License for more details.
20  *
21  * You should have received a copy of the GNU General Public License
22  * along with this program; if not, see <http://www.gnu.org/licenses/>.
23  */
24 
25 #include "qemu/osdep.h"
26 #include "qapi/error.h"
27 #include "qemu/throttle.h"
28 #include "qemu/timer.h"
29 #include "block/aio.h"
30 
31 /* This function make a bucket leak
32  *
33  * @bkt:   the bucket to make leak
34  * @delta_ns: the time delta
35  */
36 void throttle_leak_bucket(LeakyBucket *bkt, int64_t delta_ns)
37 {
38     double leak;
39 
40     /* compute how much to leak */
41     leak = (bkt->avg * (double) delta_ns) / NANOSECONDS_PER_SECOND;
42 
43     /* make the bucket leak */
44     bkt->level = MAX(bkt->level - leak, 0);
45 
46     /* if we allow bursts for more than one second we also need to
47      * keep track of bkt->burst_level so the bkt->max goal per second
48      * is attained */
49     if (bkt->burst_length > 1) {
50         leak = (bkt->max * (double) delta_ns) / NANOSECONDS_PER_SECOND;
51         bkt->burst_level = MAX(bkt->burst_level - leak, 0);
52     }
53 }
54 
55 /* Calculate the time delta since last leak and make proportionals leaks
56  *
57  * @now:      the current timestamp in ns
58  */
59 static void throttle_do_leak(ThrottleState *ts, int64_t now)
60 {
61     /* compute the time elapsed since the last leak */
62     int64_t delta_ns = now - ts->previous_leak;
63     int i;
64 
65     ts->previous_leak = now;
66 
67     if (delta_ns <= 0) {
68         return;
69     }
70 
71     /* make each bucket leak */
72     for (i = 0; i < BUCKETS_COUNT; i++) {
73         throttle_leak_bucket(&ts->cfg.buckets[i], delta_ns);
74     }
75 }
76 
77 /* do the real job of computing the time to wait
78  *
79  * @limit: the throttling limit
80  * @extra: the number of operation to delay
81  * @ret:   the time to wait in ns
82  */
83 static int64_t throttle_do_compute_wait(double limit, double extra)
84 {
85     double wait = extra * NANOSECONDS_PER_SECOND;
86     wait /= limit;
87     return wait;
88 }
89 
90 /* This function compute the wait time in ns that a leaky bucket should trigger
91  *
92  * @bkt: the leaky bucket we operate on
93  * @ret: the resulting wait time in ns or 0 if the operation can go through
94  */
95 int64_t throttle_compute_wait(LeakyBucket *bkt)
96 {
97     double extra; /* the number of extra units blocking the io */
98     double bucket_size;   /* I/O before throttling to bkt->avg */
99     double burst_bucket_size; /* Before throttling to bkt->max */
100 
101     if (!bkt->avg) {
102         return 0;
103     }
104 
105     if (!bkt->max) {
106         /* If bkt->max is 0 we still want to allow short bursts of I/O
107          * from the guest, otherwise every other request will be throttled
108          * and performance will suffer considerably. */
109         bucket_size = (double) bkt->avg / 10;
110         burst_bucket_size = 0;
111     } else {
112         /* If we have a burst limit then we have to wait until all I/O
113          * at burst rate has finished before throttling to bkt->avg */
114         bucket_size = bkt->max * bkt->burst_length;
115         burst_bucket_size = (double) bkt->max / 10;
116     }
117 
118     /* If the main bucket is full then we have to wait */
119     extra = bkt->level - bucket_size;
120     if (extra > 0) {
121         return throttle_do_compute_wait(bkt->avg, extra);
122     }
123 
124     /* If the main bucket is not full yet we still have to check the
125      * burst bucket in order to enforce the burst limit */
126     if (bkt->burst_length > 1) {
127         assert(bkt->max > 0); /* see throttle_is_valid() */
128         extra = bkt->burst_level - burst_bucket_size;
129         if (extra > 0) {
130             return throttle_do_compute_wait(bkt->max, extra);
131         }
132     }
133 
134     return 0;
135 }
136 
137 /* This function compute the time that must be waited while this IO
138  *
139  * @direction:  throttle direction
140  * @ret:        time to wait
141  */
142 static int64_t throttle_compute_wait_for(ThrottleState *ts,
143                                          ThrottleDirection direction)
144 {
145     BucketType to_check[2][4] = { {THROTTLE_BPS_TOTAL,
146                                    THROTTLE_OPS_TOTAL,
147                                    THROTTLE_BPS_READ,
148                                    THROTTLE_OPS_READ},
149                                   {THROTTLE_BPS_TOTAL,
150                                    THROTTLE_OPS_TOTAL,
151                                    THROTTLE_BPS_WRITE,
152                                    THROTTLE_OPS_WRITE}, };
153     int64_t wait, max_wait = 0;
154     int i;
155 
156     for (i = 0; i < 4; i++) {
157         BucketType index = to_check[direction][i];
158         wait = throttle_compute_wait(&ts->cfg.buckets[index]);
159         if (wait > max_wait) {
160             max_wait = wait;
161         }
162     }
163 
164     return max_wait;
165 }
166 
167 /* compute the timer for this type of operation
168  *
169  * @direction:  throttle direction
170  * @now:        the current clock timestamp
171  * @next_timestamp: the resulting timer
172  * @ret:        true if a timer must be set
173  */
174 static bool throttle_compute_timer(ThrottleState *ts,
175                                    ThrottleDirection direction,
176                                    int64_t now,
177                                    int64_t *next_timestamp)
178 {
179     int64_t wait;
180 
181     /* leak proportionally to the time elapsed */
182     throttle_do_leak(ts, now);
183 
184     /* compute the wait time if any */
185     wait = throttle_compute_wait_for(ts, direction);
186 
187     /* if the code must wait compute when the next timer should fire */
188     if (wait) {
189         *next_timestamp = now + wait;
190         return true;
191     }
192 
193     /* else no need to wait at all */
194     *next_timestamp = now;
195     return false;
196 }
197 
198 /* Add timers to event loop */
199 void throttle_timers_attach_aio_context(ThrottleTimers *tt,
200                                         AioContext *new_context)
201 {
202     ThrottleDirection dir;
203 
204     for (dir = THROTTLE_READ; dir < THROTTLE_MAX; dir++) {
205         if (tt->timer_cb[dir]) {
206             tt->timers[dir] =
207                 aio_timer_new(new_context, tt->clock_type, SCALE_NS,
208                               tt->timer_cb[dir], tt->timer_opaque);
209         }
210     }
211 }
212 
213 /*
214  * Initialize the ThrottleConfig structure to a valid state
215  * @cfg: the config to initialize
216  */
217 void throttle_config_init(ThrottleConfig *cfg)
218 {
219     unsigned i;
220     memset(cfg, 0, sizeof(*cfg));
221     for (i = 0; i < BUCKETS_COUNT; i++) {
222         cfg->buckets[i].burst_length = 1;
223     }
224 }
225 
226 /* To be called first on the ThrottleState */
227 void throttle_init(ThrottleState *ts)
228 {
229     memset(ts, 0, sizeof(ThrottleState));
230     throttle_config_init(&ts->cfg);
231 }
232 
233 /* To be called first on the ThrottleTimers */
234 void throttle_timers_init(ThrottleTimers *tt,
235                           AioContext *aio_context,
236                           QEMUClockType clock_type,
237                           QEMUTimerCB *read_timer_cb,
238                           QEMUTimerCB *write_timer_cb,
239                           void *timer_opaque)
240 {
241     assert(read_timer_cb || write_timer_cb);
242     memset(tt, 0, sizeof(ThrottleTimers));
243 
244     tt->clock_type = clock_type;
245     tt->timer_cb[THROTTLE_READ] = read_timer_cb;
246     tt->timer_cb[THROTTLE_WRITE] = write_timer_cb;
247     tt->timer_opaque = timer_opaque;
248     throttle_timers_attach_aio_context(tt, aio_context);
249 }
250 
251 /* destroy a timer */
252 static void throttle_timer_destroy(QEMUTimer **timer)
253 {
254     if (*timer == NULL) {
255         return;
256     }
257 
258     timer_free(*timer);
259     *timer = NULL;
260 }
261 
262 /* Remove timers from event loop */
263 void throttle_timers_detach_aio_context(ThrottleTimers *tt)
264 {
265     ThrottleDirection dir;
266 
267     for (dir = THROTTLE_READ; dir < THROTTLE_MAX; dir++) {
268         throttle_timer_destroy(&tt->timers[dir]);
269     }
270 }
271 
272 /* To be called last on the ThrottleTimers */
273 void throttle_timers_destroy(ThrottleTimers *tt)
274 {
275     throttle_timers_detach_aio_context(tt);
276 }
277 
278 /* is any throttling timer configured */
279 bool throttle_timers_are_initialized(ThrottleTimers *tt)
280 {
281     ThrottleDirection dir;
282 
283     for (dir = THROTTLE_READ; dir < THROTTLE_MAX; dir++) {
284         if (tt->timers[dir]) {
285             return true;
286         }
287     }
288 
289     return false;
290 }
291 
292 /* Does any throttling must be done
293  *
294  * @cfg: the throttling configuration to inspect
295  * @ret: true if throttling must be done else false
296  */
297 bool throttle_enabled(ThrottleConfig *cfg)
298 {
299     int i;
300 
301     for (i = 0; i < BUCKETS_COUNT; i++) {
302         if (cfg->buckets[i].avg > 0) {
303             return true;
304         }
305     }
306 
307     return false;
308 }
309 
310 /* check if a throttling configuration is valid
311  * @cfg: the throttling configuration to inspect
312  * @ret: true if valid else false
313  * @errp: error object
314  */
315 bool throttle_is_valid(ThrottleConfig *cfg, Error **errp)
316 {
317     int i;
318     bool bps_flag, ops_flag;
319     bool bps_max_flag, ops_max_flag;
320 
321     bps_flag = cfg->buckets[THROTTLE_BPS_TOTAL].avg &&
322                (cfg->buckets[THROTTLE_BPS_READ].avg ||
323                 cfg->buckets[THROTTLE_BPS_WRITE].avg);
324 
325     ops_flag = cfg->buckets[THROTTLE_OPS_TOTAL].avg &&
326                (cfg->buckets[THROTTLE_OPS_READ].avg ||
327                 cfg->buckets[THROTTLE_OPS_WRITE].avg);
328 
329     bps_max_flag = cfg->buckets[THROTTLE_BPS_TOTAL].max &&
330                   (cfg->buckets[THROTTLE_BPS_READ].max  ||
331                    cfg->buckets[THROTTLE_BPS_WRITE].max);
332 
333     ops_max_flag = cfg->buckets[THROTTLE_OPS_TOTAL].max &&
334                    (cfg->buckets[THROTTLE_OPS_READ].max ||
335                    cfg->buckets[THROTTLE_OPS_WRITE].max);
336 
337     if (bps_flag || ops_flag || bps_max_flag || ops_max_flag) {
338         error_setg(errp, "bps/iops/max total values and read/write values"
339                    " cannot be used at the same time");
340         return false;
341     }
342 
343     if (cfg->op_size &&
344         !cfg->buckets[THROTTLE_OPS_TOTAL].avg &&
345         !cfg->buckets[THROTTLE_OPS_READ].avg &&
346         !cfg->buckets[THROTTLE_OPS_WRITE].avg) {
347         error_setg(errp, "iops size requires an iops value to be set");
348         return false;
349     }
350 
351     for (i = 0; i < BUCKETS_COUNT; i++) {
352         LeakyBucket *bkt = &cfg->buckets[i];
353         if (bkt->avg > THROTTLE_VALUE_MAX || bkt->max > THROTTLE_VALUE_MAX) {
354             error_setg(errp, "bps/iops/max values must be within [0, %lld]",
355                        THROTTLE_VALUE_MAX);
356             return false;
357         }
358 
359         if (!bkt->burst_length) {
360             error_setg(errp, "the burst length cannot be 0");
361             return false;
362         }
363 
364         if (bkt->burst_length > 1 && !bkt->max) {
365             error_setg(errp, "burst length set without burst rate");
366             return false;
367         }
368 
369         if (bkt->max && bkt->burst_length > THROTTLE_VALUE_MAX / bkt->max) {
370             error_setg(errp, "burst length too high for this burst rate");
371             return false;
372         }
373 
374         if (bkt->max && !bkt->avg) {
375             error_setg(errp, "bps_max/iops_max require corresponding"
376                        " bps/iops values");
377             return false;
378         }
379 
380         if (bkt->max && bkt->max < bkt->avg) {
381             error_setg(errp, "bps_max/iops_max cannot be lower than bps/iops");
382             return false;
383         }
384     }
385 
386     return true;
387 }
388 
389 /* Used to configure the throttle
390  *
391  * @ts: the throttle state we are working on
392  * @clock_type: the group's clock_type
393  * @cfg: the config to set
394  */
395 void throttle_config(ThrottleState *ts,
396                      QEMUClockType clock_type,
397                      ThrottleConfig *cfg)
398 {
399     int i;
400 
401     ts->cfg = *cfg;
402 
403     /* Zero bucket level */
404     for (i = 0; i < BUCKETS_COUNT; i++) {
405         ts->cfg.buckets[i].level = 0;
406         ts->cfg.buckets[i].burst_level = 0;
407     }
408 
409     ts->previous_leak = qemu_clock_get_ns(clock_type);
410 }
411 
412 /* used to get config
413  *
414  * @ts:  the throttle state we are working on
415  * @cfg: the config to write
416  */
417 void throttle_get_config(ThrottleState *ts, ThrottleConfig *cfg)
418 {
419     *cfg = ts->cfg;
420 }
421 
422 
423 /* Schedule the read or write timer if needed
424  *
425  * NOTE: this function is not unit tested due to it's usage of timer_mod
426  *
427  * @tt:       the timers structure
428  * @direction: throttle direction
429  * @ret:      true if the timer has been scheduled else false
430  */
431 bool throttle_schedule_timer(ThrottleState *ts,
432                              ThrottleTimers *tt,
433                              ThrottleDirection direction)
434 {
435     int64_t now = qemu_clock_get_ns(tt->clock_type);
436     int64_t next_timestamp;
437     QEMUTimer *timer;
438     bool must_wait;
439 
440     assert(direction < THROTTLE_MAX);
441     timer = tt->timers[direction];
442     assert(timer);
443 
444     must_wait = throttle_compute_timer(ts,
445                                        direction,
446                                        now,
447                                        &next_timestamp);
448 
449     /* request not throttled */
450     if (!must_wait) {
451         return false;
452     }
453 
454     /* request throttled and timer pending -> do nothing */
455     if (timer_pending(timer)) {
456         return true;
457     }
458 
459     /* request throttled and timer not pending -> arm timer */
460     timer_mod(timer, next_timestamp);
461     return true;
462 }
463 
464 /* do the accounting for this operation
465  *
466  * @direction: throttle direction
467  * @size:     the size of the operation
468  */
469 void throttle_account(ThrottleState *ts, ThrottleDirection direction,
470                       uint64_t size)
471 {
472     const BucketType bucket_types_size[2][2] = {
473         { THROTTLE_BPS_TOTAL, THROTTLE_BPS_READ },
474         { THROTTLE_BPS_TOTAL, THROTTLE_BPS_WRITE }
475     };
476     const BucketType bucket_types_units[2][2] = {
477         { THROTTLE_OPS_TOTAL, THROTTLE_OPS_READ },
478         { THROTTLE_OPS_TOTAL, THROTTLE_OPS_WRITE }
479     };
480     double units = 1.0;
481     unsigned i;
482 
483     assert(direction < THROTTLE_MAX);
484     /* if cfg.op_size is defined and smaller than size we compute unit count */
485     if (ts->cfg.op_size && size > ts->cfg.op_size) {
486         units = (double) size / ts->cfg.op_size;
487     }
488 
489     for (i = 0; i < 2; i++) {
490         LeakyBucket *bkt;
491 
492         bkt = &ts->cfg.buckets[bucket_types_size[direction][i]];
493         bkt->level += size;
494         if (bkt->burst_length > 1) {
495             bkt->burst_level += size;
496         }
497 
498         bkt = &ts->cfg.buckets[bucket_types_units[direction][i]];
499         bkt->level += units;
500         if (bkt->burst_length > 1) {
501             bkt->burst_level += units;
502         }
503     }
504 }
505 
506 /* return a ThrottleConfig based on the options in a ThrottleLimits
507  *
508  * @arg:    the ThrottleLimits object to read from
509  * @cfg:    the ThrottleConfig to edit
510  * @errp:   error object
511  */
512 void throttle_limits_to_config(ThrottleLimits *arg, ThrottleConfig *cfg,
513                                Error **errp)
514 {
515     if (arg->has_bps_total) {
516         cfg->buckets[THROTTLE_BPS_TOTAL].avg = arg->bps_total;
517     }
518     if (arg->has_bps_read) {
519         cfg->buckets[THROTTLE_BPS_READ].avg  = arg->bps_read;
520     }
521     if (arg->has_bps_write) {
522         cfg->buckets[THROTTLE_BPS_WRITE].avg = arg->bps_write;
523     }
524 
525     if (arg->has_iops_total) {
526         cfg->buckets[THROTTLE_OPS_TOTAL].avg = arg->iops_total;
527     }
528     if (arg->has_iops_read) {
529         cfg->buckets[THROTTLE_OPS_READ].avg  = arg->iops_read;
530     }
531     if (arg->has_iops_write) {
532         cfg->buckets[THROTTLE_OPS_WRITE].avg = arg->iops_write;
533     }
534 
535     if (arg->has_bps_total_max) {
536         cfg->buckets[THROTTLE_BPS_TOTAL].max = arg->bps_total_max;
537     }
538     if (arg->has_bps_read_max) {
539         cfg->buckets[THROTTLE_BPS_READ].max = arg->bps_read_max;
540     }
541     if (arg->has_bps_write_max) {
542         cfg->buckets[THROTTLE_BPS_WRITE].max = arg->bps_write_max;
543     }
544     if (arg->has_iops_total_max) {
545         cfg->buckets[THROTTLE_OPS_TOTAL].max = arg->iops_total_max;
546     }
547     if (arg->has_iops_read_max) {
548         cfg->buckets[THROTTLE_OPS_READ].max = arg->iops_read_max;
549     }
550     if (arg->has_iops_write_max) {
551         cfg->buckets[THROTTLE_OPS_WRITE].max = arg->iops_write_max;
552     }
553 
554     if (arg->has_bps_total_max_length) {
555         if (arg->bps_total_max_length > UINT_MAX) {
556             error_setg(errp, "bps-total-max-length value must be in"
557                              " the range [0, %u]", UINT_MAX);
558             return;
559         }
560         cfg->buckets[THROTTLE_BPS_TOTAL].burst_length = arg->bps_total_max_length;
561     }
562     if (arg->has_bps_read_max_length) {
563         if (arg->bps_read_max_length > UINT_MAX) {
564             error_setg(errp, "bps-read-max-length value must be in"
565                              " the range [0, %u]", UINT_MAX);
566             return;
567         }
568         cfg->buckets[THROTTLE_BPS_READ].burst_length = arg->bps_read_max_length;
569     }
570     if (arg->has_bps_write_max_length) {
571         if (arg->bps_write_max_length > UINT_MAX) {
572             error_setg(errp, "bps-write-max-length value must be in"
573                              " the range [0, %u]", UINT_MAX);
574             return;
575         }
576         cfg->buckets[THROTTLE_BPS_WRITE].burst_length = arg->bps_write_max_length;
577     }
578     if (arg->has_iops_total_max_length) {
579         if (arg->iops_total_max_length > UINT_MAX) {
580             error_setg(errp, "iops-total-max-length value must be in"
581                              " the range [0, %u]", UINT_MAX);
582             return;
583         }
584         cfg->buckets[THROTTLE_OPS_TOTAL].burst_length = arg->iops_total_max_length;
585     }
586     if (arg->has_iops_read_max_length) {
587         if (arg->iops_read_max_length > UINT_MAX) {
588             error_setg(errp, "iops-read-max-length value must be in"
589                              " the range [0, %u]", UINT_MAX);
590             return;
591         }
592         cfg->buckets[THROTTLE_OPS_READ].burst_length = arg->iops_read_max_length;
593     }
594     if (arg->has_iops_write_max_length) {
595         if (arg->iops_write_max_length > UINT_MAX) {
596             error_setg(errp, "iops-write-max-length value must be in"
597                              " the range [0, %u]", UINT_MAX);
598             return;
599         }
600         cfg->buckets[THROTTLE_OPS_WRITE].burst_length = arg->iops_write_max_length;
601     }
602 
603     if (arg->has_iops_size) {
604         cfg->op_size = arg->iops_size;
605     }
606 
607     throttle_is_valid(cfg, errp);
608 }
609 
610 /* write the options of a ThrottleConfig to a ThrottleLimits
611  *
612  * @cfg:    the ThrottleConfig to read from
613  * @var:    the ThrottleLimits to write to
614  */
615 void throttle_config_to_limits(ThrottleConfig *cfg, ThrottleLimits *var)
616 {
617     var->bps_total               = cfg->buckets[THROTTLE_BPS_TOTAL].avg;
618     var->bps_read                = cfg->buckets[THROTTLE_BPS_READ].avg;
619     var->bps_write               = cfg->buckets[THROTTLE_BPS_WRITE].avg;
620     var->iops_total              = cfg->buckets[THROTTLE_OPS_TOTAL].avg;
621     var->iops_read               = cfg->buckets[THROTTLE_OPS_READ].avg;
622     var->iops_write              = cfg->buckets[THROTTLE_OPS_WRITE].avg;
623     var->bps_total_max           = cfg->buckets[THROTTLE_BPS_TOTAL].max;
624     var->bps_read_max            = cfg->buckets[THROTTLE_BPS_READ].max;
625     var->bps_write_max           = cfg->buckets[THROTTLE_BPS_WRITE].max;
626     var->iops_total_max          = cfg->buckets[THROTTLE_OPS_TOTAL].max;
627     var->iops_read_max           = cfg->buckets[THROTTLE_OPS_READ].max;
628     var->iops_write_max          = cfg->buckets[THROTTLE_OPS_WRITE].max;
629     var->bps_total_max_length    = cfg->buckets[THROTTLE_BPS_TOTAL].burst_length;
630     var->bps_read_max_length     = cfg->buckets[THROTTLE_BPS_READ].burst_length;
631     var->bps_write_max_length    = cfg->buckets[THROTTLE_BPS_WRITE].burst_length;
632     var->iops_total_max_length   = cfg->buckets[THROTTLE_OPS_TOTAL].burst_length;
633     var->iops_read_max_length    = cfg->buckets[THROTTLE_OPS_READ].burst_length;
634     var->iops_write_max_length   = cfg->buckets[THROTTLE_OPS_WRITE].burst_length;
635     var->iops_size               = cfg->op_size;
636 
637     var->has_bps_total = true;
638     var->has_bps_read = true;
639     var->has_bps_write = true;
640     var->has_iops_total = true;
641     var->has_iops_read = true;
642     var->has_iops_write = true;
643     var->has_bps_total_max = true;
644     var->has_bps_read_max = true;
645     var->has_bps_write_max = true;
646     var->has_iops_total_max = true;
647     var->has_iops_read_max = true;
648     var->has_iops_write_max = true;
649     var->has_bps_read_max_length = true;
650     var->has_bps_total_max_length = true;
651     var->has_bps_write_max_length = true;
652     var->has_iops_total_max_length = true;
653     var->has_iops_read_max_length = true;
654     var->has_iops_write_max_length = true;
655     var->has_iops_size = true;
656 }
657