1 /* 2 * QEMU throttling infrastructure 3 * 4 * Copyright (C) Nodalink, EURL. 2013-2014 5 * Copyright (C) Igalia, S.L. 2015 6 * 7 * Authors: 8 * Benoît Canet <benoit.canet@nodalink.com> 9 * Alberto Garcia <berto@igalia.com> 10 * 11 * This program is free software; you can redistribute it and/or 12 * modify it under the terms of the GNU General Public License as 13 * published by the Free Software Foundation; either version 2 or 14 * (at your option) version 3 of the License. 15 * 16 * This program is distributed in the hope that it will be useful, 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 * GNU General Public License for more details. 20 * 21 * You should have received a copy of the GNU General Public License 22 * along with this program; if not, see <http://www.gnu.org/licenses/>. 23 */ 24 25 #include "qemu/osdep.h" 26 #include "qapi/error.h" 27 #include "qemu/throttle.h" 28 #include "qemu/timer.h" 29 #include "block/aio.h" 30 31 /* This function make a bucket leak 32 * 33 * @bkt: the bucket to make leak 34 * @delta_ns: the time delta 35 */ 36 void throttle_leak_bucket(LeakyBucket *bkt, int64_t delta_ns) 37 { 38 double leak; 39 40 /* compute how much to leak */ 41 leak = (bkt->avg * (double) delta_ns) / NANOSECONDS_PER_SECOND; 42 43 /* make the bucket leak */ 44 bkt->level = MAX(bkt->level - leak, 0); 45 46 /* if we allow bursts for more than one second we also need to 47 * keep track of bkt->burst_level so the bkt->max goal per second 48 * is attained */ 49 if (bkt->burst_length > 1) { 50 leak = (bkt->max * (double) delta_ns) / NANOSECONDS_PER_SECOND; 51 bkt->burst_level = MAX(bkt->burst_level - leak, 0); 52 } 53 } 54 55 /* Calculate the time delta since last leak and make proportionals leaks 56 * 57 * @now: the current timestamp in ns 58 */ 59 static void throttle_do_leak(ThrottleState *ts, int64_t now) 60 { 61 /* compute the time elapsed since the last leak */ 62 int64_t delta_ns = now - ts->previous_leak; 63 int i; 64 65 ts->previous_leak = now; 66 67 if (delta_ns <= 0) { 68 return; 69 } 70 71 /* make each bucket leak */ 72 for (i = 0; i < BUCKETS_COUNT; i++) { 73 throttle_leak_bucket(&ts->cfg.buckets[i], delta_ns); 74 } 75 } 76 77 /* do the real job of computing the time to wait 78 * 79 * @limit: the throttling limit 80 * @extra: the number of operation to delay 81 * @ret: the time to wait in ns 82 */ 83 static int64_t throttle_do_compute_wait(double limit, double extra) 84 { 85 double wait = extra * NANOSECONDS_PER_SECOND; 86 wait /= limit; 87 return wait; 88 } 89 90 /* This function compute the wait time in ns that a leaky bucket should trigger 91 * 92 * @bkt: the leaky bucket we operate on 93 * @ret: the resulting wait time in ns or 0 if the operation can go through 94 */ 95 int64_t throttle_compute_wait(LeakyBucket *bkt) 96 { 97 double extra; /* the number of extra units blocking the io */ 98 double bucket_size; /* I/O before throttling to bkt->avg */ 99 double burst_bucket_size; /* Before throttling to bkt->max */ 100 101 if (!bkt->avg) { 102 return 0; 103 } 104 105 if (!bkt->max) { 106 /* If bkt->max is 0 we still want to allow short bursts of I/O 107 * from the guest, otherwise every other request will be throttled 108 * and performance will suffer considerably. */ 109 bucket_size = (double) bkt->avg / 10; 110 burst_bucket_size = 0; 111 } else { 112 /* If we have a burst limit then we have to wait until all I/O 113 * at burst rate has finished before throttling to bkt->avg */ 114 bucket_size = bkt->max * bkt->burst_length; 115 burst_bucket_size = (double) bkt->max / 10; 116 } 117 118 /* If the main bucket is full then we have to wait */ 119 extra = bkt->level - bucket_size; 120 if (extra > 0) { 121 return throttle_do_compute_wait(bkt->avg, extra); 122 } 123 124 /* If the main bucket is not full yet we still have to check the 125 * burst bucket in order to enforce the burst limit */ 126 if (bkt->burst_length > 1) { 127 assert(bkt->max > 0); /* see throttle_is_valid() */ 128 extra = bkt->burst_level - burst_bucket_size; 129 if (extra > 0) { 130 return throttle_do_compute_wait(bkt->max, extra); 131 } 132 } 133 134 return 0; 135 } 136 137 /* This function compute the time that must be waited while this IO 138 * 139 * @direction: throttle direction 140 * @ret: time to wait 141 */ 142 static int64_t throttle_compute_wait_for(ThrottleState *ts, 143 ThrottleDirection direction) 144 { 145 static const BucketType to_check[THROTTLE_MAX][4] = { 146 {THROTTLE_BPS_TOTAL, 147 THROTTLE_OPS_TOTAL, 148 THROTTLE_BPS_READ, 149 THROTTLE_OPS_READ}, 150 {THROTTLE_BPS_TOTAL, 151 THROTTLE_OPS_TOTAL, 152 THROTTLE_BPS_WRITE, 153 THROTTLE_OPS_WRITE}, }; 154 int64_t wait, max_wait = 0; 155 int i; 156 157 for (i = 0; i < ARRAY_SIZE(to_check[THROTTLE_READ]); i++) { 158 BucketType index = to_check[direction][i]; 159 wait = throttle_compute_wait(&ts->cfg.buckets[index]); 160 if (wait > max_wait) { 161 max_wait = wait; 162 } 163 } 164 165 return max_wait; 166 } 167 168 /* compute the timer for this type of operation 169 * 170 * @direction: throttle direction 171 * @now: the current clock timestamp 172 * @next_timestamp: the resulting timer 173 * @ret: true if a timer must be set 174 */ 175 static bool throttle_compute_timer(ThrottleState *ts, 176 ThrottleDirection direction, 177 int64_t now, 178 int64_t *next_timestamp) 179 { 180 int64_t wait; 181 182 /* leak proportionally to the time elapsed */ 183 throttle_do_leak(ts, now); 184 185 /* compute the wait time if any */ 186 wait = throttle_compute_wait_for(ts, direction); 187 188 /* if the code must wait compute when the next timer should fire */ 189 if (wait) { 190 *next_timestamp = now + wait; 191 return true; 192 } 193 194 /* else no need to wait at all */ 195 *next_timestamp = now; 196 return false; 197 } 198 199 /* Add timers to event loop */ 200 void throttle_timers_attach_aio_context(ThrottleTimers *tt, 201 AioContext *new_context) 202 { 203 ThrottleDirection dir; 204 205 for (dir = THROTTLE_READ; dir < THROTTLE_MAX; dir++) { 206 if (tt->timer_cb[dir]) { 207 tt->timers[dir] = 208 aio_timer_new(new_context, tt->clock_type, SCALE_NS, 209 tt->timer_cb[dir], tt->timer_opaque); 210 } 211 } 212 } 213 214 /* 215 * Initialize the ThrottleConfig structure to a valid state 216 * @cfg: the config to initialize 217 */ 218 void throttle_config_init(ThrottleConfig *cfg) 219 { 220 unsigned i; 221 memset(cfg, 0, sizeof(*cfg)); 222 for (i = 0; i < BUCKETS_COUNT; i++) { 223 cfg->buckets[i].burst_length = 1; 224 } 225 } 226 227 /* To be called first on the ThrottleState */ 228 void throttle_init(ThrottleState *ts) 229 { 230 memset(ts, 0, sizeof(ThrottleState)); 231 throttle_config_init(&ts->cfg); 232 } 233 234 /* To be called first on the ThrottleTimers */ 235 void throttle_timers_init(ThrottleTimers *tt, 236 AioContext *aio_context, 237 QEMUClockType clock_type, 238 QEMUTimerCB *read_timer_cb, 239 QEMUTimerCB *write_timer_cb, 240 void *timer_opaque) 241 { 242 assert(read_timer_cb || write_timer_cb); 243 memset(tt, 0, sizeof(ThrottleTimers)); 244 245 tt->clock_type = clock_type; 246 tt->timer_cb[THROTTLE_READ] = read_timer_cb; 247 tt->timer_cb[THROTTLE_WRITE] = write_timer_cb; 248 tt->timer_opaque = timer_opaque; 249 throttle_timers_attach_aio_context(tt, aio_context); 250 } 251 252 /* destroy a timer */ 253 static void throttle_timer_destroy(QEMUTimer **timer) 254 { 255 if (*timer == NULL) { 256 return; 257 } 258 259 timer_free(*timer); 260 *timer = NULL; 261 } 262 263 /* Remove timers from event loop */ 264 void throttle_timers_detach_aio_context(ThrottleTimers *tt) 265 { 266 ThrottleDirection dir; 267 268 for (dir = THROTTLE_READ; dir < THROTTLE_MAX; dir++) { 269 throttle_timer_destroy(&tt->timers[dir]); 270 } 271 } 272 273 /* To be called last on the ThrottleTimers */ 274 void throttle_timers_destroy(ThrottleTimers *tt) 275 { 276 throttle_timers_detach_aio_context(tt); 277 } 278 279 /* is any throttling timer configured */ 280 bool throttle_timers_are_initialized(ThrottleTimers *tt) 281 { 282 ThrottleDirection dir; 283 284 for (dir = THROTTLE_READ; dir < THROTTLE_MAX; dir++) { 285 if (tt->timers[dir]) { 286 return true; 287 } 288 } 289 290 return false; 291 } 292 293 /* Does any throttling must be done 294 * 295 * @cfg: the throttling configuration to inspect 296 * @ret: true if throttling must be done else false 297 */ 298 bool throttle_enabled(ThrottleConfig *cfg) 299 { 300 int i; 301 302 for (i = 0; i < BUCKETS_COUNT; i++) { 303 if (cfg->buckets[i].avg > 0) { 304 return true; 305 } 306 } 307 308 return false; 309 } 310 311 /* check if a throttling configuration is valid 312 * @cfg: the throttling configuration to inspect 313 * @ret: true if valid else false 314 * @errp: error object 315 */ 316 bool throttle_is_valid(ThrottleConfig *cfg, Error **errp) 317 { 318 int i; 319 bool bps_flag, ops_flag; 320 bool bps_max_flag, ops_max_flag; 321 322 bps_flag = cfg->buckets[THROTTLE_BPS_TOTAL].avg && 323 (cfg->buckets[THROTTLE_BPS_READ].avg || 324 cfg->buckets[THROTTLE_BPS_WRITE].avg); 325 326 ops_flag = cfg->buckets[THROTTLE_OPS_TOTAL].avg && 327 (cfg->buckets[THROTTLE_OPS_READ].avg || 328 cfg->buckets[THROTTLE_OPS_WRITE].avg); 329 330 bps_max_flag = cfg->buckets[THROTTLE_BPS_TOTAL].max && 331 (cfg->buckets[THROTTLE_BPS_READ].max || 332 cfg->buckets[THROTTLE_BPS_WRITE].max); 333 334 ops_max_flag = cfg->buckets[THROTTLE_OPS_TOTAL].max && 335 (cfg->buckets[THROTTLE_OPS_READ].max || 336 cfg->buckets[THROTTLE_OPS_WRITE].max); 337 338 if (bps_flag || ops_flag || bps_max_flag || ops_max_flag) { 339 error_setg(errp, "bps/iops/max total values and read/write values" 340 " cannot be used at the same time"); 341 return false; 342 } 343 344 if (cfg->op_size && 345 !cfg->buckets[THROTTLE_OPS_TOTAL].avg && 346 !cfg->buckets[THROTTLE_OPS_READ].avg && 347 !cfg->buckets[THROTTLE_OPS_WRITE].avg) { 348 error_setg(errp, "iops size requires an iops value to be set"); 349 return false; 350 } 351 352 for (i = 0; i < BUCKETS_COUNT; i++) { 353 LeakyBucket *bkt = &cfg->buckets[i]; 354 if (bkt->avg > THROTTLE_VALUE_MAX || bkt->max > THROTTLE_VALUE_MAX) { 355 error_setg(errp, "bps/iops/max values must be within [0, %lld]", 356 THROTTLE_VALUE_MAX); 357 return false; 358 } 359 360 if (!bkt->burst_length) { 361 error_setg(errp, "the burst length cannot be 0"); 362 return false; 363 } 364 365 if (bkt->burst_length > 1 && !bkt->max) { 366 error_setg(errp, "burst length set without burst rate"); 367 return false; 368 } 369 370 if (bkt->max && bkt->burst_length > THROTTLE_VALUE_MAX / bkt->max) { 371 error_setg(errp, "burst length too high for this burst rate"); 372 return false; 373 } 374 375 if (bkt->max && !bkt->avg) { 376 error_setg(errp, "bps_max/iops_max require corresponding" 377 " bps/iops values"); 378 return false; 379 } 380 381 if (bkt->max && bkt->max < bkt->avg) { 382 error_setg(errp, "bps_max/iops_max cannot be lower than bps/iops"); 383 return false; 384 } 385 } 386 387 return true; 388 } 389 390 /* Used to configure the throttle 391 * 392 * @ts: the throttle state we are working on 393 * @clock_type: the group's clock_type 394 * @cfg: the config to set 395 */ 396 void throttle_config(ThrottleState *ts, 397 QEMUClockType clock_type, 398 ThrottleConfig *cfg) 399 { 400 int i; 401 402 ts->cfg = *cfg; 403 404 /* Zero bucket level */ 405 for (i = 0; i < BUCKETS_COUNT; i++) { 406 ts->cfg.buckets[i].level = 0; 407 ts->cfg.buckets[i].burst_level = 0; 408 } 409 410 ts->previous_leak = qemu_clock_get_ns(clock_type); 411 } 412 413 /* used to get config 414 * 415 * @ts: the throttle state we are working on 416 * @cfg: the config to write 417 */ 418 void throttle_get_config(ThrottleState *ts, ThrottleConfig *cfg) 419 { 420 *cfg = ts->cfg; 421 } 422 423 424 /* Schedule the read or write timer if needed 425 * 426 * NOTE: this function is not unit tested due to it's usage of timer_mod 427 * 428 * @tt: the timers structure 429 * @direction: throttle direction 430 * @ret: true if the timer has been scheduled else false 431 */ 432 bool throttle_schedule_timer(ThrottleState *ts, 433 ThrottleTimers *tt, 434 ThrottleDirection direction) 435 { 436 int64_t now = qemu_clock_get_ns(tt->clock_type); 437 int64_t next_timestamp; 438 QEMUTimer *timer; 439 bool must_wait; 440 441 assert(direction < THROTTLE_MAX); 442 timer = tt->timers[direction]; 443 assert(timer); 444 445 must_wait = throttle_compute_timer(ts, 446 direction, 447 now, 448 &next_timestamp); 449 450 /* request not throttled */ 451 if (!must_wait) { 452 return false; 453 } 454 455 /* request throttled and timer pending -> do nothing */ 456 if (timer_pending(timer)) { 457 return true; 458 } 459 460 /* request throttled and timer not pending -> arm timer */ 461 timer_mod(timer, next_timestamp); 462 return true; 463 } 464 465 /* do the accounting for this operation 466 * 467 * @direction: throttle direction 468 * @size: the size of the operation 469 */ 470 void throttle_account(ThrottleState *ts, ThrottleDirection direction, 471 uint64_t size) 472 { 473 static const BucketType bucket_types_size[THROTTLE_MAX][2] = { 474 { THROTTLE_BPS_TOTAL, THROTTLE_BPS_READ }, 475 { THROTTLE_BPS_TOTAL, THROTTLE_BPS_WRITE } 476 }; 477 static const BucketType bucket_types_units[THROTTLE_MAX][2] = { 478 { THROTTLE_OPS_TOTAL, THROTTLE_OPS_READ }, 479 { THROTTLE_OPS_TOTAL, THROTTLE_OPS_WRITE } 480 }; 481 double units = 1.0; 482 unsigned i; 483 484 assert(direction < THROTTLE_MAX); 485 /* if cfg.op_size is defined and smaller than size we compute unit count */ 486 if (ts->cfg.op_size && size > ts->cfg.op_size) { 487 units = (double) size / ts->cfg.op_size; 488 } 489 490 for (i = 0; i < ARRAY_SIZE(bucket_types_size[THROTTLE_READ]); i++) { 491 LeakyBucket *bkt; 492 493 bkt = &ts->cfg.buckets[bucket_types_size[direction][i]]; 494 bkt->level += size; 495 if (bkt->burst_length > 1) { 496 bkt->burst_level += size; 497 } 498 499 bkt = &ts->cfg.buckets[bucket_types_units[direction][i]]; 500 bkt->level += units; 501 if (bkt->burst_length > 1) { 502 bkt->burst_level += units; 503 } 504 } 505 } 506 507 /* return a ThrottleConfig based on the options in a ThrottleLimits 508 * 509 * @arg: the ThrottleLimits object to read from 510 * @cfg: the ThrottleConfig to edit 511 * @errp: error object 512 */ 513 void throttle_limits_to_config(ThrottleLimits *arg, ThrottleConfig *cfg, 514 Error **errp) 515 { 516 if (arg->has_bps_total) { 517 cfg->buckets[THROTTLE_BPS_TOTAL].avg = arg->bps_total; 518 } 519 if (arg->has_bps_read) { 520 cfg->buckets[THROTTLE_BPS_READ].avg = arg->bps_read; 521 } 522 if (arg->has_bps_write) { 523 cfg->buckets[THROTTLE_BPS_WRITE].avg = arg->bps_write; 524 } 525 526 if (arg->has_iops_total) { 527 cfg->buckets[THROTTLE_OPS_TOTAL].avg = arg->iops_total; 528 } 529 if (arg->has_iops_read) { 530 cfg->buckets[THROTTLE_OPS_READ].avg = arg->iops_read; 531 } 532 if (arg->has_iops_write) { 533 cfg->buckets[THROTTLE_OPS_WRITE].avg = arg->iops_write; 534 } 535 536 if (arg->has_bps_total_max) { 537 cfg->buckets[THROTTLE_BPS_TOTAL].max = arg->bps_total_max; 538 } 539 if (arg->has_bps_read_max) { 540 cfg->buckets[THROTTLE_BPS_READ].max = arg->bps_read_max; 541 } 542 if (arg->has_bps_write_max) { 543 cfg->buckets[THROTTLE_BPS_WRITE].max = arg->bps_write_max; 544 } 545 if (arg->has_iops_total_max) { 546 cfg->buckets[THROTTLE_OPS_TOTAL].max = arg->iops_total_max; 547 } 548 if (arg->has_iops_read_max) { 549 cfg->buckets[THROTTLE_OPS_READ].max = arg->iops_read_max; 550 } 551 if (arg->has_iops_write_max) { 552 cfg->buckets[THROTTLE_OPS_WRITE].max = arg->iops_write_max; 553 } 554 555 if (arg->has_bps_total_max_length) { 556 if (arg->bps_total_max_length > UINT_MAX) { 557 error_setg(errp, "bps-total-max-length value must be in" 558 " the range [0, %u]", UINT_MAX); 559 return; 560 } 561 cfg->buckets[THROTTLE_BPS_TOTAL].burst_length = arg->bps_total_max_length; 562 } 563 if (arg->has_bps_read_max_length) { 564 if (arg->bps_read_max_length > UINT_MAX) { 565 error_setg(errp, "bps-read-max-length value must be in" 566 " the range [0, %u]", UINT_MAX); 567 return; 568 } 569 cfg->buckets[THROTTLE_BPS_READ].burst_length = arg->bps_read_max_length; 570 } 571 if (arg->has_bps_write_max_length) { 572 if (arg->bps_write_max_length > UINT_MAX) { 573 error_setg(errp, "bps-write-max-length value must be in" 574 " the range [0, %u]", UINT_MAX); 575 return; 576 } 577 cfg->buckets[THROTTLE_BPS_WRITE].burst_length = arg->bps_write_max_length; 578 } 579 if (arg->has_iops_total_max_length) { 580 if (arg->iops_total_max_length > UINT_MAX) { 581 error_setg(errp, "iops-total-max-length value must be in" 582 " the range [0, %u]", UINT_MAX); 583 return; 584 } 585 cfg->buckets[THROTTLE_OPS_TOTAL].burst_length = arg->iops_total_max_length; 586 } 587 if (arg->has_iops_read_max_length) { 588 if (arg->iops_read_max_length > UINT_MAX) { 589 error_setg(errp, "iops-read-max-length value must be in" 590 " the range [0, %u]", UINT_MAX); 591 return; 592 } 593 cfg->buckets[THROTTLE_OPS_READ].burst_length = arg->iops_read_max_length; 594 } 595 if (arg->has_iops_write_max_length) { 596 if (arg->iops_write_max_length > UINT_MAX) { 597 error_setg(errp, "iops-write-max-length value must be in" 598 " the range [0, %u]", UINT_MAX); 599 return; 600 } 601 cfg->buckets[THROTTLE_OPS_WRITE].burst_length = arg->iops_write_max_length; 602 } 603 604 if (arg->has_iops_size) { 605 cfg->op_size = arg->iops_size; 606 } 607 608 throttle_is_valid(cfg, errp); 609 } 610 611 /* write the options of a ThrottleConfig to a ThrottleLimits 612 * 613 * @cfg: the ThrottleConfig to read from 614 * @var: the ThrottleLimits to write to 615 */ 616 void throttle_config_to_limits(ThrottleConfig *cfg, ThrottleLimits *var) 617 { 618 var->bps_total = cfg->buckets[THROTTLE_BPS_TOTAL].avg; 619 var->bps_read = cfg->buckets[THROTTLE_BPS_READ].avg; 620 var->bps_write = cfg->buckets[THROTTLE_BPS_WRITE].avg; 621 var->iops_total = cfg->buckets[THROTTLE_OPS_TOTAL].avg; 622 var->iops_read = cfg->buckets[THROTTLE_OPS_READ].avg; 623 var->iops_write = cfg->buckets[THROTTLE_OPS_WRITE].avg; 624 var->bps_total_max = cfg->buckets[THROTTLE_BPS_TOTAL].max; 625 var->bps_read_max = cfg->buckets[THROTTLE_BPS_READ].max; 626 var->bps_write_max = cfg->buckets[THROTTLE_BPS_WRITE].max; 627 var->iops_total_max = cfg->buckets[THROTTLE_OPS_TOTAL].max; 628 var->iops_read_max = cfg->buckets[THROTTLE_OPS_READ].max; 629 var->iops_write_max = cfg->buckets[THROTTLE_OPS_WRITE].max; 630 var->bps_total_max_length = cfg->buckets[THROTTLE_BPS_TOTAL].burst_length; 631 var->bps_read_max_length = cfg->buckets[THROTTLE_BPS_READ].burst_length; 632 var->bps_write_max_length = cfg->buckets[THROTTLE_BPS_WRITE].burst_length; 633 var->iops_total_max_length = cfg->buckets[THROTTLE_OPS_TOTAL].burst_length; 634 var->iops_read_max_length = cfg->buckets[THROTTLE_OPS_READ].burst_length; 635 var->iops_write_max_length = cfg->buckets[THROTTLE_OPS_WRITE].burst_length; 636 var->iops_size = cfg->op_size; 637 638 var->has_bps_total = true; 639 var->has_bps_read = true; 640 var->has_bps_write = true; 641 var->has_iops_total = true; 642 var->has_iops_read = true; 643 var->has_iops_write = true; 644 var->has_bps_total_max = true; 645 var->has_bps_read_max = true; 646 var->has_bps_write_max = true; 647 var->has_iops_total_max = true; 648 var->has_iops_read_max = true; 649 var->has_iops_write_max = true; 650 var->has_bps_read_max_length = true; 651 var->has_bps_total_max_length = true; 652 var->has_bps_write_max_length = true; 653 var->has_iops_total_max_length = true; 654 var->has_iops_read_max_length = true; 655 var->has_iops_write_max_length = true; 656 var->has_iops_size = true; 657 } 658