1 /* 2 * Dirty page rate limit implementation code 3 * 4 * Copyright (c) 2022 CHINA TELECOM CO.,LTD. 5 * 6 * Authors: 7 * Hyman Huang(黄勇) <huangy81@chinatelecom.cn> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2 or later. 10 * See the COPYING file in the top-level directory. 11 */ 12 13 #include "qemu/osdep.h" 14 #include "qemu/main-loop.h" 15 #include "qapi/qapi-commands-migration.h" 16 #include "qapi/qmp/qdict.h" 17 #include "qapi/error.h" 18 #include "sysemu/dirtyrate.h" 19 #include "sysemu/dirtylimit.h" 20 #include "monitor/hmp.h" 21 #include "monitor/monitor.h" 22 #include "exec/memory.h" 23 #include "exec/target_page.h" 24 #include "hw/boards.h" 25 #include "sysemu/kvm.h" 26 #include "trace.h" 27 #include "migration/misc.h" 28 #include "migration/migration.h" 29 30 /* 31 * Dirtylimit stop working if dirty page rate error 32 * value less than DIRTYLIMIT_TOLERANCE_RANGE 33 */ 34 #define DIRTYLIMIT_TOLERANCE_RANGE 25 /* MB/s */ 35 /* 36 * Plus or minus vcpu sleep time linearly if dirty 37 * page rate error value percentage over 38 * DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT. 39 * Otherwise, plus or minus a fixed vcpu sleep time. 40 */ 41 #define DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT 50 42 /* 43 * Max vcpu sleep time percentage during a cycle 44 * composed of dirty ring full and sleep time. 45 */ 46 #define DIRTYLIMIT_THROTTLE_PCT_MAX 99 47 48 struct { 49 VcpuStat stat; 50 bool running; 51 QemuThread thread; 52 } *vcpu_dirty_rate_stat; 53 54 typedef struct VcpuDirtyLimitState { 55 int cpu_index; 56 bool enabled; 57 /* 58 * Quota dirty page rate, unit is MB/s 59 * zero if not enabled. 60 */ 61 uint64_t quota; 62 } VcpuDirtyLimitState; 63 64 struct { 65 VcpuDirtyLimitState *states; 66 /* Max cpus number configured by user */ 67 int max_cpus; 68 /* Number of vcpu under dirtylimit */ 69 int limited_nvcpu; 70 } *dirtylimit_state; 71 72 /* protect dirtylimit_state */ 73 static QemuMutex dirtylimit_mutex; 74 75 /* dirtylimit thread quit if dirtylimit_quit is true */ 76 static bool dirtylimit_quit; 77 78 static void vcpu_dirty_rate_stat_collect(void) 79 { 80 MigrationState *s = migrate_get_current(); 81 VcpuStat stat; 82 int i = 0; 83 int64_t period = DIRTYLIMIT_CALC_TIME_MS; 84 85 if (migrate_dirty_limit() && 86 migration_is_active(s)) { 87 period = s->parameters.x_vcpu_dirty_limit_period; 88 } 89 90 /* calculate vcpu dirtyrate */ 91 vcpu_calculate_dirtyrate(period, 92 &stat, 93 GLOBAL_DIRTY_LIMIT, 94 false); 95 96 for (i = 0; i < stat.nvcpu; i++) { 97 vcpu_dirty_rate_stat->stat.rates[i].id = i; 98 vcpu_dirty_rate_stat->stat.rates[i].dirty_rate = 99 stat.rates[i].dirty_rate; 100 } 101 102 g_free(stat.rates); 103 } 104 105 static void *vcpu_dirty_rate_stat_thread(void *opaque) 106 { 107 rcu_register_thread(); 108 109 /* start log sync */ 110 global_dirty_log_change(GLOBAL_DIRTY_LIMIT, true); 111 112 while (qatomic_read(&vcpu_dirty_rate_stat->running)) { 113 vcpu_dirty_rate_stat_collect(); 114 if (dirtylimit_in_service()) { 115 dirtylimit_process(); 116 } 117 } 118 119 /* stop log sync */ 120 global_dirty_log_change(GLOBAL_DIRTY_LIMIT, false); 121 122 rcu_unregister_thread(); 123 return NULL; 124 } 125 126 int64_t vcpu_dirty_rate_get(int cpu_index) 127 { 128 DirtyRateVcpu *rates = vcpu_dirty_rate_stat->stat.rates; 129 return qatomic_read_i64(&rates[cpu_index].dirty_rate); 130 } 131 132 void vcpu_dirty_rate_stat_start(void) 133 { 134 if (qatomic_read(&vcpu_dirty_rate_stat->running)) { 135 return; 136 } 137 138 qatomic_set(&vcpu_dirty_rate_stat->running, 1); 139 qemu_thread_create(&vcpu_dirty_rate_stat->thread, 140 "dirtyrate-stat", 141 vcpu_dirty_rate_stat_thread, 142 NULL, 143 QEMU_THREAD_JOINABLE); 144 } 145 146 void vcpu_dirty_rate_stat_stop(void) 147 { 148 qatomic_set(&vcpu_dirty_rate_stat->running, 0); 149 dirtylimit_state_unlock(); 150 bql_unlock(); 151 qemu_thread_join(&vcpu_dirty_rate_stat->thread); 152 bql_lock(); 153 dirtylimit_state_lock(); 154 } 155 156 void vcpu_dirty_rate_stat_initialize(void) 157 { 158 MachineState *ms = MACHINE(qdev_get_machine()); 159 int max_cpus = ms->smp.max_cpus; 160 161 vcpu_dirty_rate_stat = 162 g_malloc0(sizeof(*vcpu_dirty_rate_stat)); 163 164 vcpu_dirty_rate_stat->stat.nvcpu = max_cpus; 165 vcpu_dirty_rate_stat->stat.rates = 166 g_new0(DirtyRateVcpu, max_cpus); 167 168 vcpu_dirty_rate_stat->running = false; 169 } 170 171 void vcpu_dirty_rate_stat_finalize(void) 172 { 173 g_free(vcpu_dirty_rate_stat->stat.rates); 174 vcpu_dirty_rate_stat->stat.rates = NULL; 175 176 g_free(vcpu_dirty_rate_stat); 177 vcpu_dirty_rate_stat = NULL; 178 } 179 180 void dirtylimit_state_lock(void) 181 { 182 qemu_mutex_lock(&dirtylimit_mutex); 183 } 184 185 void dirtylimit_state_unlock(void) 186 { 187 qemu_mutex_unlock(&dirtylimit_mutex); 188 } 189 190 static void 191 __attribute__((__constructor__)) dirtylimit_mutex_init(void) 192 { 193 qemu_mutex_init(&dirtylimit_mutex); 194 } 195 196 static inline VcpuDirtyLimitState *dirtylimit_vcpu_get_state(int cpu_index) 197 { 198 return &dirtylimit_state->states[cpu_index]; 199 } 200 201 void dirtylimit_state_initialize(void) 202 { 203 MachineState *ms = MACHINE(qdev_get_machine()); 204 int max_cpus = ms->smp.max_cpus; 205 int i; 206 207 dirtylimit_state = g_malloc0(sizeof(*dirtylimit_state)); 208 209 dirtylimit_state->states = 210 g_new0(VcpuDirtyLimitState, max_cpus); 211 212 for (i = 0; i < max_cpus; i++) { 213 dirtylimit_state->states[i].cpu_index = i; 214 } 215 216 dirtylimit_state->max_cpus = max_cpus; 217 trace_dirtylimit_state_initialize(max_cpus); 218 } 219 220 void dirtylimit_state_finalize(void) 221 { 222 g_free(dirtylimit_state->states); 223 dirtylimit_state->states = NULL; 224 225 g_free(dirtylimit_state); 226 dirtylimit_state = NULL; 227 228 trace_dirtylimit_state_finalize(); 229 } 230 231 bool dirtylimit_in_service(void) 232 { 233 return !!dirtylimit_state; 234 } 235 236 bool dirtylimit_vcpu_index_valid(int cpu_index) 237 { 238 MachineState *ms = MACHINE(qdev_get_machine()); 239 240 return !(cpu_index < 0 || 241 cpu_index >= ms->smp.max_cpus); 242 } 243 244 static uint64_t dirtylimit_dirty_ring_full_time(uint64_t dirtyrate) 245 { 246 static uint64_t max_dirtyrate; 247 uint64_t dirty_ring_size_MiB; 248 249 dirty_ring_size_MiB = qemu_target_pages_to_MiB(kvm_dirty_ring_size()); 250 251 if (max_dirtyrate < dirtyrate) { 252 max_dirtyrate = dirtyrate; 253 } 254 255 return dirty_ring_size_MiB * 1000000 / max_dirtyrate; 256 } 257 258 static inline bool dirtylimit_done(uint64_t quota, 259 uint64_t current) 260 { 261 uint64_t min, max; 262 263 min = MIN(quota, current); 264 max = MAX(quota, current); 265 266 return ((max - min) <= DIRTYLIMIT_TOLERANCE_RANGE) ? true : false; 267 } 268 269 static inline bool 270 dirtylimit_need_linear_adjustment(uint64_t quota, 271 uint64_t current) 272 { 273 uint64_t min, max; 274 275 min = MIN(quota, current); 276 max = MAX(quota, current); 277 278 return ((max - min) * 100 / max) > DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT; 279 } 280 281 static void dirtylimit_set_throttle(CPUState *cpu, 282 uint64_t quota, 283 uint64_t current) 284 { 285 int64_t ring_full_time_us = 0; 286 uint64_t sleep_pct = 0; 287 uint64_t throttle_us = 0; 288 289 if (current == 0) { 290 cpu->throttle_us_per_full = 0; 291 return; 292 } 293 294 ring_full_time_us = dirtylimit_dirty_ring_full_time(current); 295 296 if (dirtylimit_need_linear_adjustment(quota, current)) { 297 if (quota < current) { 298 sleep_pct = (current - quota) * 100 / current; 299 throttle_us = 300 ring_full_time_us * sleep_pct / (double)(100 - sleep_pct); 301 cpu->throttle_us_per_full += throttle_us; 302 } else { 303 sleep_pct = (quota - current) * 100 / quota; 304 throttle_us = 305 ring_full_time_us * sleep_pct / (double)(100 - sleep_pct); 306 cpu->throttle_us_per_full -= throttle_us; 307 } 308 309 trace_dirtylimit_throttle_pct(cpu->cpu_index, 310 sleep_pct, 311 throttle_us); 312 } else { 313 if (quota < current) { 314 cpu->throttle_us_per_full += ring_full_time_us / 10; 315 } else { 316 cpu->throttle_us_per_full -= ring_full_time_us / 10; 317 } 318 } 319 320 /* 321 * TODO: in the big kvm_dirty_ring_size case (eg: 65536, or other scenario), 322 * current dirty page rate may never reach the quota, we should stop 323 * increasing sleep time? 324 */ 325 cpu->throttle_us_per_full = MIN(cpu->throttle_us_per_full, 326 ring_full_time_us * DIRTYLIMIT_THROTTLE_PCT_MAX); 327 328 cpu->throttle_us_per_full = MAX(cpu->throttle_us_per_full, 0); 329 } 330 331 static void dirtylimit_adjust_throttle(CPUState *cpu) 332 { 333 uint64_t quota = 0; 334 uint64_t current = 0; 335 int cpu_index = cpu->cpu_index; 336 337 quota = dirtylimit_vcpu_get_state(cpu_index)->quota; 338 current = vcpu_dirty_rate_get(cpu_index); 339 340 if (!dirtylimit_done(quota, current)) { 341 dirtylimit_set_throttle(cpu, quota, current); 342 } 343 344 return; 345 } 346 347 void dirtylimit_process(void) 348 { 349 CPUState *cpu; 350 351 if (!qatomic_read(&dirtylimit_quit)) { 352 dirtylimit_state_lock(); 353 354 if (!dirtylimit_in_service()) { 355 dirtylimit_state_unlock(); 356 return; 357 } 358 359 CPU_FOREACH(cpu) { 360 if (!dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled) { 361 continue; 362 } 363 dirtylimit_adjust_throttle(cpu); 364 } 365 dirtylimit_state_unlock(); 366 } 367 } 368 369 void dirtylimit_change(bool start) 370 { 371 if (start) { 372 qatomic_set(&dirtylimit_quit, 0); 373 } else { 374 qatomic_set(&dirtylimit_quit, 1); 375 } 376 } 377 378 void dirtylimit_set_vcpu(int cpu_index, 379 uint64_t quota, 380 bool enable) 381 { 382 trace_dirtylimit_set_vcpu(cpu_index, quota); 383 384 if (enable) { 385 dirtylimit_state->states[cpu_index].quota = quota; 386 if (!dirtylimit_vcpu_get_state(cpu_index)->enabled) { 387 dirtylimit_state->limited_nvcpu++; 388 } 389 } else { 390 dirtylimit_state->states[cpu_index].quota = 0; 391 if (dirtylimit_state->states[cpu_index].enabled) { 392 dirtylimit_state->limited_nvcpu--; 393 } 394 } 395 396 dirtylimit_state->states[cpu_index].enabled = enable; 397 } 398 399 void dirtylimit_set_all(uint64_t quota, 400 bool enable) 401 { 402 MachineState *ms = MACHINE(qdev_get_machine()); 403 int max_cpus = ms->smp.max_cpus; 404 int i; 405 406 for (i = 0; i < max_cpus; i++) { 407 dirtylimit_set_vcpu(i, quota, enable); 408 } 409 } 410 411 void dirtylimit_vcpu_execute(CPUState *cpu) 412 { 413 if (cpu->throttle_us_per_full) { 414 dirtylimit_state_lock(); 415 416 if (dirtylimit_in_service() && 417 dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled) { 418 dirtylimit_state_unlock(); 419 trace_dirtylimit_vcpu_execute(cpu->cpu_index, 420 cpu->throttle_us_per_full); 421 422 g_usleep(cpu->throttle_us_per_full); 423 return; 424 } 425 426 dirtylimit_state_unlock(); 427 } 428 } 429 430 static void dirtylimit_init(void) 431 { 432 dirtylimit_state_initialize(); 433 dirtylimit_change(true); 434 vcpu_dirty_rate_stat_initialize(); 435 vcpu_dirty_rate_stat_start(); 436 } 437 438 static void dirtylimit_cleanup(void) 439 { 440 vcpu_dirty_rate_stat_stop(); 441 vcpu_dirty_rate_stat_finalize(); 442 dirtylimit_change(false); 443 dirtylimit_state_finalize(); 444 } 445 446 /* 447 * dirty page rate limit is not allowed to set if migration 448 * is running with dirty-limit capability enabled. 449 */ 450 static bool dirtylimit_is_allowed(void) 451 { 452 MigrationState *ms = migrate_get_current(); 453 454 if (migration_is_running(ms->state) && 455 (!qemu_thread_is_self(&ms->thread)) && 456 migrate_dirty_limit() && 457 dirtylimit_in_service()) { 458 return false; 459 } 460 return true; 461 } 462 463 void qmp_cancel_vcpu_dirty_limit(bool has_cpu_index, 464 int64_t cpu_index, 465 Error **errp) 466 { 467 if (!kvm_enabled() || !kvm_dirty_ring_enabled()) { 468 return; 469 } 470 471 if (has_cpu_index && !dirtylimit_vcpu_index_valid(cpu_index)) { 472 error_setg(errp, "incorrect cpu index specified"); 473 return; 474 } 475 476 if (!dirtylimit_is_allowed()) { 477 error_setg(errp, "can't cancel dirty page rate limit while" 478 " migration is running"); 479 return; 480 } 481 482 if (!dirtylimit_in_service()) { 483 return; 484 } 485 486 dirtylimit_state_lock(); 487 488 if (has_cpu_index) { 489 dirtylimit_set_vcpu(cpu_index, 0, false); 490 } else { 491 dirtylimit_set_all(0, false); 492 } 493 494 if (!dirtylimit_state->limited_nvcpu) { 495 dirtylimit_cleanup(); 496 } 497 498 dirtylimit_state_unlock(); 499 } 500 501 void hmp_cancel_vcpu_dirty_limit(Monitor *mon, const QDict *qdict) 502 { 503 int64_t cpu_index = qdict_get_try_int(qdict, "cpu_index", -1); 504 Error *err = NULL; 505 506 qmp_cancel_vcpu_dirty_limit(!!(cpu_index != -1), cpu_index, &err); 507 if (err) { 508 hmp_handle_error(mon, err); 509 return; 510 } 511 512 monitor_printf(mon, "[Please use 'info vcpu_dirty_limit' to query " 513 "dirty limit for virtual CPU]\n"); 514 } 515 516 void qmp_set_vcpu_dirty_limit(bool has_cpu_index, 517 int64_t cpu_index, 518 uint64_t dirty_rate, 519 Error **errp) 520 { 521 if (!kvm_enabled() || !kvm_dirty_ring_enabled()) { 522 error_setg(errp, "dirty page limit feature requires KVM with" 523 " accelerator property 'dirty-ring-size' set'"); 524 return; 525 } 526 527 if (has_cpu_index && !dirtylimit_vcpu_index_valid(cpu_index)) { 528 error_setg(errp, "incorrect cpu index specified"); 529 return; 530 } 531 532 if (!dirtylimit_is_allowed()) { 533 error_setg(errp, "can't set dirty page rate limit while" 534 " migration is running"); 535 return; 536 } 537 538 if (!dirty_rate) { 539 qmp_cancel_vcpu_dirty_limit(has_cpu_index, cpu_index, errp); 540 return; 541 } 542 543 dirtylimit_state_lock(); 544 545 if (!dirtylimit_in_service()) { 546 dirtylimit_init(); 547 } 548 549 if (has_cpu_index) { 550 dirtylimit_set_vcpu(cpu_index, dirty_rate, true); 551 } else { 552 dirtylimit_set_all(dirty_rate, true); 553 } 554 555 dirtylimit_state_unlock(); 556 } 557 558 void hmp_set_vcpu_dirty_limit(Monitor *mon, const QDict *qdict) 559 { 560 int64_t dirty_rate = qdict_get_int(qdict, "dirty_rate"); 561 int64_t cpu_index = qdict_get_try_int(qdict, "cpu_index", -1); 562 Error *err = NULL; 563 564 if (dirty_rate < 0) { 565 error_setg(&err, "invalid dirty page limit %" PRId64, dirty_rate); 566 goto out; 567 } 568 569 qmp_set_vcpu_dirty_limit(!!(cpu_index != -1), cpu_index, dirty_rate, &err); 570 571 out: 572 hmp_handle_error(mon, err); 573 } 574 575 /* Return the max throttle time of each virtual CPU */ 576 uint64_t dirtylimit_throttle_time_per_round(void) 577 { 578 CPUState *cpu; 579 int64_t max = 0; 580 581 CPU_FOREACH(cpu) { 582 if (cpu->throttle_us_per_full > max) { 583 max = cpu->throttle_us_per_full; 584 } 585 } 586 587 return max; 588 } 589 590 /* 591 * Estimate average dirty ring full time of each virtaul CPU. 592 * Return 0 if guest doesn't dirty memory. 593 */ 594 uint64_t dirtylimit_ring_full_time(void) 595 { 596 CPUState *cpu; 597 uint64_t curr_rate = 0; 598 int nvcpus = 0; 599 600 CPU_FOREACH(cpu) { 601 if (cpu->running) { 602 nvcpus++; 603 curr_rate += vcpu_dirty_rate_get(cpu->cpu_index); 604 } 605 } 606 607 if (!curr_rate || !nvcpus) { 608 return 0; 609 } 610 611 return dirtylimit_dirty_ring_full_time(curr_rate / nvcpus); 612 } 613 614 static struct DirtyLimitInfo *dirtylimit_query_vcpu(int cpu_index) 615 { 616 DirtyLimitInfo *info = NULL; 617 618 info = g_malloc0(sizeof(*info)); 619 info->cpu_index = cpu_index; 620 info->limit_rate = dirtylimit_vcpu_get_state(cpu_index)->quota; 621 info->current_rate = vcpu_dirty_rate_get(cpu_index); 622 623 return info; 624 } 625 626 static struct DirtyLimitInfoList *dirtylimit_query_all(void) 627 { 628 int i, index; 629 DirtyLimitInfo *info = NULL; 630 DirtyLimitInfoList *head = NULL, **tail = &head; 631 632 dirtylimit_state_lock(); 633 634 if (!dirtylimit_in_service()) { 635 dirtylimit_state_unlock(); 636 return NULL; 637 } 638 639 for (i = 0; i < dirtylimit_state->max_cpus; i++) { 640 index = dirtylimit_state->states[i].cpu_index; 641 if (dirtylimit_vcpu_get_state(index)->enabled) { 642 info = dirtylimit_query_vcpu(index); 643 QAPI_LIST_APPEND(tail, info); 644 } 645 } 646 647 dirtylimit_state_unlock(); 648 649 return head; 650 } 651 652 struct DirtyLimitInfoList *qmp_query_vcpu_dirty_limit(Error **errp) 653 { 654 return dirtylimit_query_all(); 655 } 656 657 void hmp_info_vcpu_dirty_limit(Monitor *mon, const QDict *qdict) 658 { 659 DirtyLimitInfoList *info; 660 g_autoptr(DirtyLimitInfoList) head = NULL; 661 Error *err = NULL; 662 663 if (!dirtylimit_in_service()) { 664 monitor_printf(mon, "Dirty page limit not enabled!\n"); 665 return; 666 } 667 668 head = qmp_query_vcpu_dirty_limit(&err); 669 if (err) { 670 hmp_handle_error(mon, err); 671 return; 672 } 673 674 for (info = head; info != NULL; info = info->next) { 675 monitor_printf(mon, "vcpu[%"PRIi64"], limit rate %"PRIi64 " (MB/s)," 676 " current rate %"PRIi64 " (MB/s)\n", 677 info->value->cpu_index, 678 info->value->limit_rate, 679 info->value->current_rate); 680 } 681 } 682