1 /* 2 * QEMU throttling infrastructure 3 * 4 * Copyright (C) Nodalink, EURL. 2013-2014 5 * Copyright (C) Igalia, S.L. 2015 6 * 7 * Authors: 8 * Benoît Canet <benoit.canet@nodalink.com> 9 * Alberto Garcia <berto@igalia.com> 10 * 11 * This program is free software; you can redistribute it and/or 12 * modify it under the terms of the GNU General Public License as 13 * published by the Free Software Foundation; either version 2 or 14 * (at your option) version 3 of the License. 15 * 16 * This program is distributed in the hope that it will be useful, 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 * GNU General Public License for more details. 20 * 21 * You should have received a copy of the GNU General Public License 22 * along with this program; if not, see <http://www.gnu.org/licenses/>. 23 */ 24 25 #include "qemu/throttle.h" 26 #include "qemu/timer.h" 27 #include "block/aio.h" 28 29 /* This function make a bucket leak 30 * 31 * @bkt: the bucket to make leak 32 * @delta_ns: the time delta 33 */ 34 void throttle_leak_bucket(LeakyBucket *bkt, int64_t delta_ns) 35 { 36 double leak; 37 38 /* compute how much to leak */ 39 leak = (bkt->avg * (double) delta_ns) / NSEC_PER_SEC; 40 41 /* make the bucket leak */ 42 bkt->level = MAX(bkt->level - leak, 0); 43 } 44 45 /* Calculate the time delta since last leak and make proportionals leaks 46 * 47 * @now: the current timestamp in ns 48 */ 49 static void throttle_do_leak(ThrottleState *ts, int64_t now) 50 { 51 /* compute the time elapsed since the last leak */ 52 int64_t delta_ns = now - ts->previous_leak; 53 int i; 54 55 ts->previous_leak = now; 56 57 if (delta_ns <= 0) { 58 return; 59 } 60 61 /* make each bucket leak */ 62 for (i = 0; i < BUCKETS_COUNT; i++) { 63 throttle_leak_bucket(&ts->cfg.buckets[i], delta_ns); 64 } 65 } 66 67 /* do the real job of computing the time to wait 68 * 69 * @limit: the throttling limit 70 * @extra: the number of operation to delay 71 * @ret: the time to wait in ns 72 */ 73 static int64_t throttle_do_compute_wait(double limit, double extra) 74 { 75 double wait = extra * NSEC_PER_SEC; 76 wait /= limit; 77 return wait; 78 } 79 80 /* This function compute the wait time in ns that a leaky bucket should trigger 81 * 82 * @bkt: the leaky bucket we operate on 83 * @ret: the resulting wait time in ns or 0 if the operation can go through 84 */ 85 int64_t throttle_compute_wait(LeakyBucket *bkt) 86 { 87 double extra; /* the number of extra units blocking the io */ 88 89 if (!bkt->avg) { 90 return 0; 91 } 92 93 extra = bkt->level - bkt->max; 94 95 if (extra <= 0) { 96 return 0; 97 } 98 99 return throttle_do_compute_wait(bkt->avg, extra); 100 } 101 102 /* This function compute the time that must be waited while this IO 103 * 104 * @is_write: true if the current IO is a write, false if it's a read 105 * @ret: time to wait 106 */ 107 static int64_t throttle_compute_wait_for(ThrottleState *ts, 108 bool is_write) 109 { 110 BucketType to_check[2][4] = { {THROTTLE_BPS_TOTAL, 111 THROTTLE_OPS_TOTAL, 112 THROTTLE_BPS_READ, 113 THROTTLE_OPS_READ}, 114 {THROTTLE_BPS_TOTAL, 115 THROTTLE_OPS_TOTAL, 116 THROTTLE_BPS_WRITE, 117 THROTTLE_OPS_WRITE}, }; 118 int64_t wait, max_wait = 0; 119 int i; 120 121 for (i = 0; i < 4; i++) { 122 BucketType index = to_check[is_write][i]; 123 wait = throttle_compute_wait(&ts->cfg.buckets[index]); 124 if (wait > max_wait) { 125 max_wait = wait; 126 } 127 } 128 129 return max_wait; 130 } 131 132 /* compute the timer for this type of operation 133 * 134 * @is_write: the type of operation 135 * @now: the current clock timestamp 136 * @next_timestamp: the resulting timer 137 * @ret: true if a timer must be set 138 */ 139 bool throttle_compute_timer(ThrottleState *ts, 140 bool is_write, 141 int64_t now, 142 int64_t *next_timestamp) 143 { 144 int64_t wait; 145 146 /* leak proportionally to the time elapsed */ 147 throttle_do_leak(ts, now); 148 149 /* compute the wait time if any */ 150 wait = throttle_compute_wait_for(ts, is_write); 151 152 /* if the code must wait compute when the next timer should fire */ 153 if (wait) { 154 *next_timestamp = now + wait; 155 return true; 156 } 157 158 /* else no need to wait at all */ 159 *next_timestamp = now; 160 return false; 161 } 162 163 /* Add timers to event loop */ 164 void throttle_timers_attach_aio_context(ThrottleTimers *tt, 165 AioContext *new_context) 166 { 167 tt->timers[0] = aio_timer_new(new_context, tt->clock_type, SCALE_NS, 168 tt->read_timer_cb, tt->timer_opaque); 169 tt->timers[1] = aio_timer_new(new_context, tt->clock_type, SCALE_NS, 170 tt->write_timer_cb, tt->timer_opaque); 171 } 172 173 /* To be called first on the ThrottleState */ 174 void throttle_init(ThrottleState *ts) 175 { 176 memset(ts, 0, sizeof(ThrottleState)); 177 } 178 179 /* To be called first on the ThrottleTimers */ 180 void throttle_timers_init(ThrottleTimers *tt, 181 AioContext *aio_context, 182 QEMUClockType clock_type, 183 QEMUTimerCB *read_timer_cb, 184 QEMUTimerCB *write_timer_cb, 185 void *timer_opaque) 186 { 187 memset(tt, 0, sizeof(ThrottleTimers)); 188 189 tt->clock_type = clock_type; 190 tt->read_timer_cb = read_timer_cb; 191 tt->write_timer_cb = write_timer_cb; 192 tt->timer_opaque = timer_opaque; 193 throttle_timers_attach_aio_context(tt, aio_context); 194 } 195 196 /* destroy a timer */ 197 static void throttle_timer_destroy(QEMUTimer **timer) 198 { 199 assert(*timer != NULL); 200 201 timer_del(*timer); 202 timer_free(*timer); 203 *timer = NULL; 204 } 205 206 /* Remove timers from event loop */ 207 void throttle_timers_detach_aio_context(ThrottleTimers *tt) 208 { 209 int i; 210 211 for (i = 0; i < 2; i++) { 212 throttle_timer_destroy(&tt->timers[i]); 213 } 214 } 215 216 /* To be called last on the ThrottleTimers */ 217 void throttle_timers_destroy(ThrottleTimers *tt) 218 { 219 throttle_timers_detach_aio_context(tt); 220 } 221 222 /* is any throttling timer configured */ 223 bool throttle_timers_are_initialized(ThrottleTimers *tt) 224 { 225 if (tt->timers[0]) { 226 return true; 227 } 228 229 return false; 230 } 231 232 /* Does any throttling must be done 233 * 234 * @cfg: the throttling configuration to inspect 235 * @ret: true if throttling must be done else false 236 */ 237 bool throttle_enabled(ThrottleConfig *cfg) 238 { 239 int i; 240 241 for (i = 0; i < BUCKETS_COUNT; i++) { 242 if (cfg->buckets[i].avg > 0) { 243 return true; 244 } 245 } 246 247 return false; 248 } 249 250 /* return true if any two throttling parameters conflicts 251 * 252 * @cfg: the throttling configuration to inspect 253 * @ret: true if any conflict detected else false 254 */ 255 bool throttle_conflicting(ThrottleConfig *cfg) 256 { 257 bool bps_flag, ops_flag; 258 bool bps_max_flag, ops_max_flag; 259 260 bps_flag = cfg->buckets[THROTTLE_BPS_TOTAL].avg && 261 (cfg->buckets[THROTTLE_BPS_READ].avg || 262 cfg->buckets[THROTTLE_BPS_WRITE].avg); 263 264 ops_flag = cfg->buckets[THROTTLE_OPS_TOTAL].avg && 265 (cfg->buckets[THROTTLE_OPS_READ].avg || 266 cfg->buckets[THROTTLE_OPS_WRITE].avg); 267 268 bps_max_flag = cfg->buckets[THROTTLE_BPS_TOTAL].max && 269 (cfg->buckets[THROTTLE_BPS_READ].max || 270 cfg->buckets[THROTTLE_BPS_WRITE].max); 271 272 ops_max_flag = cfg->buckets[THROTTLE_OPS_TOTAL].max && 273 (cfg->buckets[THROTTLE_OPS_READ].max || 274 cfg->buckets[THROTTLE_OPS_WRITE].max); 275 276 return bps_flag || ops_flag || bps_max_flag || ops_max_flag; 277 } 278 279 /* check if a throttling configuration is valid 280 * @cfg: the throttling configuration to inspect 281 * @ret: true if valid else false 282 */ 283 bool throttle_is_valid(ThrottleConfig *cfg) 284 { 285 bool invalid = false; 286 int i; 287 288 for (i = 0; i < BUCKETS_COUNT; i++) { 289 if (cfg->buckets[i].avg < 0) { 290 invalid = true; 291 } 292 } 293 294 for (i = 0; i < BUCKETS_COUNT; i++) { 295 if (cfg->buckets[i].max < 0) { 296 invalid = true; 297 } 298 } 299 300 return !invalid; 301 } 302 303 /* fix bucket parameters */ 304 static void throttle_fix_bucket(LeakyBucket *bkt) 305 { 306 double min; 307 308 /* zero bucket level */ 309 bkt->level = 0; 310 311 /* The following is done to cope with the Linux CFQ block scheduler 312 * which regroup reads and writes by block of 100ms in the guest. 313 * When they are two process one making reads and one making writes cfq 314 * make a pattern looking like the following: 315 * WWWWWWWWWWWRRRRRRRRRRRRRRWWWWWWWWWWWWWwRRRRRRRRRRRRRRRRR 316 * Having a max burst value of 100ms of the average will help smooth the 317 * throttling 318 */ 319 min = bkt->avg / 10; 320 if (bkt->avg && !bkt->max) { 321 bkt->max = min; 322 } 323 } 324 325 /* take care of canceling a timer */ 326 static void throttle_cancel_timer(QEMUTimer *timer) 327 { 328 assert(timer != NULL); 329 330 timer_del(timer); 331 } 332 333 /* Used to configure the throttle 334 * 335 * @ts: the throttle state we are working on 336 * @tt: the throttle timers we use in this aio context 337 * @cfg: the config to set 338 */ 339 void throttle_config(ThrottleState *ts, 340 ThrottleTimers *tt, 341 ThrottleConfig *cfg) 342 { 343 int i; 344 345 ts->cfg = *cfg; 346 347 for (i = 0; i < BUCKETS_COUNT; i++) { 348 throttle_fix_bucket(&ts->cfg.buckets[i]); 349 } 350 351 ts->previous_leak = qemu_clock_get_ns(tt->clock_type); 352 353 for (i = 0; i < 2; i++) { 354 throttle_cancel_timer(tt->timers[i]); 355 } 356 } 357 358 /* used to get config 359 * 360 * @ts: the throttle state we are working on 361 * @cfg: the config to write 362 */ 363 void throttle_get_config(ThrottleState *ts, ThrottleConfig *cfg) 364 { 365 *cfg = ts->cfg; 366 } 367 368 369 /* Schedule the read or write timer if needed 370 * 371 * NOTE: this function is not unit tested due to it's usage of timer_mod 372 * 373 * @tt: the timers structure 374 * @is_write: the type of operation (read/write) 375 * @ret: true if the timer has been scheduled else false 376 */ 377 bool throttle_schedule_timer(ThrottleState *ts, 378 ThrottleTimers *tt, 379 bool is_write) 380 { 381 int64_t now = qemu_clock_get_ns(tt->clock_type); 382 int64_t next_timestamp; 383 bool must_wait; 384 385 must_wait = throttle_compute_timer(ts, 386 is_write, 387 now, 388 &next_timestamp); 389 390 /* request not throttled */ 391 if (!must_wait) { 392 return false; 393 } 394 395 /* request throttled and timer pending -> do nothing */ 396 if (timer_pending(tt->timers[is_write])) { 397 return true; 398 } 399 400 /* request throttled and timer not pending -> arm timer */ 401 timer_mod(tt->timers[is_write], next_timestamp); 402 return true; 403 } 404 405 /* do the accounting for this operation 406 * 407 * @is_write: the type of operation (read/write) 408 * @size: the size of the operation 409 */ 410 void throttle_account(ThrottleState *ts, bool is_write, uint64_t size) 411 { 412 double units = 1.0; 413 414 /* if cfg.op_size is defined and smaller than size we compute unit count */ 415 if (ts->cfg.op_size && size > ts->cfg.op_size) { 416 units = (double) size / ts->cfg.op_size; 417 } 418 419 ts->cfg.buckets[THROTTLE_BPS_TOTAL].level += size; 420 ts->cfg.buckets[THROTTLE_OPS_TOTAL].level += units; 421 422 if (is_write) { 423 ts->cfg.buckets[THROTTLE_BPS_WRITE].level += size; 424 ts->cfg.buckets[THROTTLE_OPS_WRITE].level += units; 425 } else { 426 ts->cfg.buckets[THROTTLE_BPS_READ].level += size; 427 ts->cfg.buckets[THROTTLE_OPS_READ].level += units; 428 } 429 } 430 431