1 /* 2 * QEMU throttling infrastructure 3 * 4 * Copyright (C) Nodalink, EURL. 2013-2014 5 * Copyright (C) Igalia, S.L. 2015 6 * 7 * Authors: 8 * Benoît Canet <benoit.canet@nodalink.com> 9 * Alberto Garcia <berto@igalia.com> 10 * 11 * This program is free software; you can redistribute it and/or 12 * modify it under the terms of the GNU General Public License as 13 * published by the Free Software Foundation; either version 2 or 14 * (at your option) version 3 of the License. 15 * 16 * This program is distributed in the hope that it will be useful, 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 * GNU General Public License for more details. 20 * 21 * You should have received a copy of the GNU General Public License 22 * along with this program; if not, see <http://www.gnu.org/licenses/>. 23 */ 24 25 #include "qemu/throttle.h" 26 #include "qemu/timer.h" 27 #include "block/aio.h" 28 29 /* This function make a bucket leak 30 * 31 * @bkt: the bucket to make leak 32 * @delta_ns: the time delta 33 */ 34 void throttle_leak_bucket(LeakyBucket *bkt, int64_t delta_ns) 35 { 36 double leak; 37 38 /* compute how much to leak */ 39 leak = (bkt->avg * (double) delta_ns) / NANOSECONDS_PER_SECOND; 40 41 /* make the bucket leak */ 42 bkt->level = MAX(bkt->level - leak, 0); 43 } 44 45 /* Calculate the time delta since last leak and make proportionals leaks 46 * 47 * @now: the current timestamp in ns 48 */ 49 static void throttle_do_leak(ThrottleState *ts, int64_t now) 50 { 51 /* compute the time elapsed since the last leak */ 52 int64_t delta_ns = now - ts->previous_leak; 53 int i; 54 55 ts->previous_leak = now; 56 57 if (delta_ns <= 0) { 58 return; 59 } 60 61 /* make each bucket leak */ 62 for (i = 0; i < BUCKETS_COUNT; i++) { 63 throttle_leak_bucket(&ts->cfg.buckets[i], delta_ns); 64 } 65 } 66 67 /* do the real job of computing the time to wait 68 * 69 * @limit: the throttling limit 70 * @extra: the number of operation to delay 71 * @ret: the time to wait in ns 72 */ 73 static int64_t throttle_do_compute_wait(double limit, double extra) 74 { 75 double wait = extra * NANOSECONDS_PER_SECOND; 76 wait /= limit; 77 return wait; 78 } 79 80 /* This function compute the wait time in ns that a leaky bucket should trigger 81 * 82 * @bkt: the leaky bucket we operate on 83 * @ret: the resulting wait time in ns or 0 if the operation can go through 84 */ 85 int64_t throttle_compute_wait(LeakyBucket *bkt) 86 { 87 double extra; /* the number of extra units blocking the io */ 88 89 if (!bkt->avg) { 90 return 0; 91 } 92 93 extra = bkt->level - bkt->max; 94 95 if (extra <= 0) { 96 return 0; 97 } 98 99 return throttle_do_compute_wait(bkt->avg, extra); 100 } 101 102 /* This function compute the time that must be waited while this IO 103 * 104 * @is_write: true if the current IO is a write, false if it's a read 105 * @ret: time to wait 106 */ 107 static int64_t throttle_compute_wait_for(ThrottleState *ts, 108 bool is_write) 109 { 110 BucketType to_check[2][4] = { {THROTTLE_BPS_TOTAL, 111 THROTTLE_OPS_TOTAL, 112 THROTTLE_BPS_READ, 113 THROTTLE_OPS_READ}, 114 {THROTTLE_BPS_TOTAL, 115 THROTTLE_OPS_TOTAL, 116 THROTTLE_BPS_WRITE, 117 THROTTLE_OPS_WRITE}, }; 118 int64_t wait, max_wait = 0; 119 int i; 120 121 for (i = 0; i < 4; i++) { 122 BucketType index = to_check[is_write][i]; 123 wait = throttle_compute_wait(&ts->cfg.buckets[index]); 124 if (wait > max_wait) { 125 max_wait = wait; 126 } 127 } 128 129 return max_wait; 130 } 131 132 /* compute the timer for this type of operation 133 * 134 * @is_write: the type of operation 135 * @now: the current clock timestamp 136 * @next_timestamp: the resulting timer 137 * @ret: true if a timer must be set 138 */ 139 bool throttle_compute_timer(ThrottleState *ts, 140 bool is_write, 141 int64_t now, 142 int64_t *next_timestamp) 143 { 144 int64_t wait; 145 146 /* leak proportionally to the time elapsed */ 147 throttle_do_leak(ts, now); 148 149 /* compute the wait time if any */ 150 wait = throttle_compute_wait_for(ts, is_write); 151 152 /* if the code must wait compute when the next timer should fire */ 153 if (wait) { 154 *next_timestamp = now + wait; 155 return true; 156 } 157 158 /* else no need to wait at all */ 159 *next_timestamp = now; 160 return false; 161 } 162 163 /* Add timers to event loop */ 164 void throttle_timers_attach_aio_context(ThrottleTimers *tt, 165 AioContext *new_context) 166 { 167 tt->timers[0] = aio_timer_new(new_context, tt->clock_type, SCALE_NS, 168 tt->read_timer_cb, tt->timer_opaque); 169 tt->timers[1] = aio_timer_new(new_context, tt->clock_type, SCALE_NS, 170 tt->write_timer_cb, tt->timer_opaque); 171 } 172 173 /* To be called first on the ThrottleState */ 174 void throttle_init(ThrottleState *ts) 175 { 176 memset(ts, 0, sizeof(ThrottleState)); 177 } 178 179 /* To be called first on the ThrottleTimers */ 180 void throttle_timers_init(ThrottleTimers *tt, 181 AioContext *aio_context, 182 QEMUClockType clock_type, 183 QEMUTimerCB *read_timer_cb, 184 QEMUTimerCB *write_timer_cb, 185 void *timer_opaque) 186 { 187 memset(tt, 0, sizeof(ThrottleTimers)); 188 189 tt->clock_type = clock_type; 190 tt->read_timer_cb = read_timer_cb; 191 tt->write_timer_cb = write_timer_cb; 192 tt->timer_opaque = timer_opaque; 193 throttle_timers_attach_aio_context(tt, aio_context); 194 } 195 196 /* destroy a timer */ 197 static void throttle_timer_destroy(QEMUTimer **timer) 198 { 199 assert(*timer != NULL); 200 201 timer_del(*timer); 202 timer_free(*timer); 203 *timer = NULL; 204 } 205 206 /* Remove timers from event loop */ 207 void throttle_timers_detach_aio_context(ThrottleTimers *tt) 208 { 209 int i; 210 211 for (i = 0; i < 2; i++) { 212 throttle_timer_destroy(&tt->timers[i]); 213 } 214 } 215 216 /* To be called last on the ThrottleTimers */ 217 void throttle_timers_destroy(ThrottleTimers *tt) 218 { 219 throttle_timers_detach_aio_context(tt); 220 } 221 222 /* is any throttling timer configured */ 223 bool throttle_timers_are_initialized(ThrottleTimers *tt) 224 { 225 if (tt->timers[0]) { 226 return true; 227 } 228 229 return false; 230 } 231 232 /* Does any throttling must be done 233 * 234 * @cfg: the throttling configuration to inspect 235 * @ret: true if throttling must be done else false 236 */ 237 bool throttle_enabled(ThrottleConfig *cfg) 238 { 239 int i; 240 241 for (i = 0; i < BUCKETS_COUNT; i++) { 242 if (cfg->buckets[i].avg > 0) { 243 return true; 244 } 245 } 246 247 return false; 248 } 249 250 /* return true if any two throttling parameters conflicts 251 * 252 * @cfg: the throttling configuration to inspect 253 * @ret: true if any conflict detected else false 254 */ 255 bool throttle_conflicting(ThrottleConfig *cfg) 256 { 257 bool bps_flag, ops_flag; 258 bool bps_max_flag, ops_max_flag; 259 260 bps_flag = cfg->buckets[THROTTLE_BPS_TOTAL].avg && 261 (cfg->buckets[THROTTLE_BPS_READ].avg || 262 cfg->buckets[THROTTLE_BPS_WRITE].avg); 263 264 ops_flag = cfg->buckets[THROTTLE_OPS_TOTAL].avg && 265 (cfg->buckets[THROTTLE_OPS_READ].avg || 266 cfg->buckets[THROTTLE_OPS_WRITE].avg); 267 268 bps_max_flag = cfg->buckets[THROTTLE_BPS_TOTAL].max && 269 (cfg->buckets[THROTTLE_BPS_READ].max || 270 cfg->buckets[THROTTLE_BPS_WRITE].max); 271 272 ops_max_flag = cfg->buckets[THROTTLE_OPS_TOTAL].max && 273 (cfg->buckets[THROTTLE_OPS_READ].max || 274 cfg->buckets[THROTTLE_OPS_WRITE].max); 275 276 return bps_flag || ops_flag || bps_max_flag || ops_max_flag; 277 } 278 279 /* check if a throttling configuration is valid 280 * @cfg: the throttling configuration to inspect 281 * @ret: true if valid else false 282 */ 283 bool throttle_is_valid(ThrottleConfig *cfg) 284 { 285 bool invalid = false; 286 int i; 287 288 for (i = 0; i < BUCKETS_COUNT; i++) { 289 if (cfg->buckets[i].avg < 0) { 290 invalid = true; 291 } 292 } 293 294 for (i = 0; i < BUCKETS_COUNT; i++) { 295 if (cfg->buckets[i].max < 0) { 296 invalid = true; 297 } 298 } 299 300 return !invalid; 301 } 302 303 /* check if bps_max/iops_max is used without bps/iops 304 * @cfg: the throttling configuration to inspect 305 */ 306 bool throttle_max_is_missing_limit(ThrottleConfig *cfg) 307 { 308 int i; 309 310 for (i = 0; i < BUCKETS_COUNT; i++) { 311 if (cfg->buckets[i].max && !cfg->buckets[i].avg) { 312 return true; 313 } 314 } 315 return false; 316 } 317 318 /* fix bucket parameters */ 319 static void throttle_fix_bucket(LeakyBucket *bkt) 320 { 321 double min; 322 323 /* zero bucket level */ 324 bkt->level = 0; 325 326 /* The following is done to cope with the Linux CFQ block scheduler 327 * which regroup reads and writes by block of 100ms in the guest. 328 * When they are two process one making reads and one making writes cfq 329 * make a pattern looking like the following: 330 * WWWWWWWWWWWRRRRRRRRRRRRRRWWWWWWWWWWWWWwRRRRRRRRRRRRRRRRR 331 * Having a max burst value of 100ms of the average will help smooth the 332 * throttling 333 */ 334 min = bkt->avg / 10; 335 if (bkt->avg && !bkt->max) { 336 bkt->max = min; 337 } 338 } 339 340 /* take care of canceling a timer */ 341 static void throttle_cancel_timer(QEMUTimer *timer) 342 { 343 assert(timer != NULL); 344 345 timer_del(timer); 346 } 347 348 /* Used to configure the throttle 349 * 350 * @ts: the throttle state we are working on 351 * @tt: the throttle timers we use in this aio context 352 * @cfg: the config to set 353 */ 354 void throttle_config(ThrottleState *ts, 355 ThrottleTimers *tt, 356 ThrottleConfig *cfg) 357 { 358 int i; 359 360 ts->cfg = *cfg; 361 362 for (i = 0; i < BUCKETS_COUNT; i++) { 363 throttle_fix_bucket(&ts->cfg.buckets[i]); 364 } 365 366 ts->previous_leak = qemu_clock_get_ns(tt->clock_type); 367 368 for (i = 0; i < 2; i++) { 369 throttle_cancel_timer(tt->timers[i]); 370 } 371 } 372 373 /* used to get config 374 * 375 * @ts: the throttle state we are working on 376 * @cfg: the config to write 377 */ 378 void throttle_get_config(ThrottleState *ts, ThrottleConfig *cfg) 379 { 380 *cfg = ts->cfg; 381 } 382 383 384 /* Schedule the read or write timer if needed 385 * 386 * NOTE: this function is not unit tested due to it's usage of timer_mod 387 * 388 * @tt: the timers structure 389 * @is_write: the type of operation (read/write) 390 * @ret: true if the timer has been scheduled else false 391 */ 392 bool throttle_schedule_timer(ThrottleState *ts, 393 ThrottleTimers *tt, 394 bool is_write) 395 { 396 int64_t now = qemu_clock_get_ns(tt->clock_type); 397 int64_t next_timestamp; 398 bool must_wait; 399 400 must_wait = throttle_compute_timer(ts, 401 is_write, 402 now, 403 &next_timestamp); 404 405 /* request not throttled */ 406 if (!must_wait) { 407 return false; 408 } 409 410 /* request throttled and timer pending -> do nothing */ 411 if (timer_pending(tt->timers[is_write])) { 412 return true; 413 } 414 415 /* request throttled and timer not pending -> arm timer */ 416 timer_mod(tt->timers[is_write], next_timestamp); 417 return true; 418 } 419 420 /* do the accounting for this operation 421 * 422 * @is_write: the type of operation (read/write) 423 * @size: the size of the operation 424 */ 425 void throttle_account(ThrottleState *ts, bool is_write, uint64_t size) 426 { 427 double units = 1.0; 428 429 /* if cfg.op_size is defined and smaller than size we compute unit count */ 430 if (ts->cfg.op_size && size > ts->cfg.op_size) { 431 units = (double) size / ts->cfg.op_size; 432 } 433 434 ts->cfg.buckets[THROTTLE_BPS_TOTAL].level += size; 435 ts->cfg.buckets[THROTTLE_OPS_TOTAL].level += units; 436 437 if (is_write) { 438 ts->cfg.buckets[THROTTLE_BPS_WRITE].level += size; 439 ts->cfg.buckets[THROTTLE_OPS_WRITE].level += units; 440 } else { 441 ts->cfg.buckets[THROTTLE_BPS_READ].level += size; 442 ts->cfg.buckets[THROTTLE_OPS_READ].level += units; 443 } 444 } 445 446