1 /* 2 * Dirty page rate limit implementation code 3 * 4 * Copyright (c) 2022 CHINA TELECOM CO.,LTD. 5 * 6 * Authors: 7 * Hyman Huang(黄勇) <huangy81@chinatelecom.cn> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2 or later. 10 * See the COPYING file in the top-level directory. 11 */ 12 13 #include "qemu/osdep.h" 14 #include "qemu/main-loop.h" 15 #include "qapi/qapi-commands-migration.h" 16 #include "qapi/qmp/qdict.h" 17 #include "qapi/error.h" 18 #include "sysemu/dirtyrate.h" 19 #include "sysemu/dirtylimit.h" 20 #include "monitor/hmp.h" 21 #include "monitor/monitor.h" 22 #include "exec/memory.h" 23 #include "exec/target_page.h" 24 #include "hw/boards.h" 25 #include "sysemu/kvm.h" 26 #include "trace.h" 27 #include "migration/misc.h" 28 #include "migration/migration.h" 29 #include "migration/options.h" 30 31 /* 32 * Dirtylimit stop working if dirty page rate error 33 * value less than DIRTYLIMIT_TOLERANCE_RANGE 34 */ 35 #define DIRTYLIMIT_TOLERANCE_RANGE 25 /* MB/s */ 36 /* 37 * Plus or minus vcpu sleep time linearly if dirty 38 * page rate error value percentage over 39 * DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT. 40 * Otherwise, plus or minus a fixed vcpu sleep time. 41 */ 42 #define DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT 50 43 /* 44 * Max vcpu sleep time percentage during a cycle 45 * composed of dirty ring full and sleep time. 46 */ 47 #define DIRTYLIMIT_THROTTLE_PCT_MAX 99 48 49 struct { 50 VcpuStat stat; 51 bool running; 52 QemuThread thread; 53 } *vcpu_dirty_rate_stat; 54 55 typedef struct VcpuDirtyLimitState { 56 int cpu_index; 57 bool enabled; 58 /* 59 * Quota dirty page rate, unit is MB/s 60 * zero if not enabled. 61 */ 62 uint64_t quota; 63 } VcpuDirtyLimitState; 64 65 struct { 66 VcpuDirtyLimitState *states; 67 /* Max cpus number configured by user */ 68 int max_cpus; 69 /* Number of vcpu under dirtylimit */ 70 int limited_nvcpu; 71 } *dirtylimit_state; 72 73 /* protect dirtylimit_state */ 74 static QemuMutex dirtylimit_mutex; 75 76 /* dirtylimit thread quit if dirtylimit_quit is true */ 77 static bool dirtylimit_quit; 78 79 static void vcpu_dirty_rate_stat_collect(void) 80 { 81 MigrationState *s = migrate_get_current(); 82 VcpuStat stat; 83 int i = 0; 84 int64_t period = DIRTYLIMIT_CALC_TIME_MS; 85 86 if (migrate_dirty_limit() && 87 migration_is_active(s)) { 88 period = s->parameters.x_vcpu_dirty_limit_period; 89 } 90 91 /* calculate vcpu dirtyrate */ 92 vcpu_calculate_dirtyrate(period, 93 &stat, 94 GLOBAL_DIRTY_LIMIT, 95 false); 96 97 for (i = 0; i < stat.nvcpu; i++) { 98 vcpu_dirty_rate_stat->stat.rates[i].id = i; 99 vcpu_dirty_rate_stat->stat.rates[i].dirty_rate = 100 stat.rates[i].dirty_rate; 101 } 102 103 g_free(stat.rates); 104 } 105 106 static void *vcpu_dirty_rate_stat_thread(void *opaque) 107 { 108 rcu_register_thread(); 109 110 /* start log sync */ 111 global_dirty_log_change(GLOBAL_DIRTY_LIMIT, true); 112 113 while (qatomic_read(&vcpu_dirty_rate_stat->running)) { 114 vcpu_dirty_rate_stat_collect(); 115 if (dirtylimit_in_service()) { 116 dirtylimit_process(); 117 } 118 } 119 120 /* stop log sync */ 121 global_dirty_log_change(GLOBAL_DIRTY_LIMIT, false); 122 123 rcu_unregister_thread(); 124 return NULL; 125 } 126 127 int64_t vcpu_dirty_rate_get(int cpu_index) 128 { 129 DirtyRateVcpu *rates = vcpu_dirty_rate_stat->stat.rates; 130 return qatomic_read_i64(&rates[cpu_index].dirty_rate); 131 } 132 133 void vcpu_dirty_rate_stat_start(void) 134 { 135 if (qatomic_read(&vcpu_dirty_rate_stat->running)) { 136 return; 137 } 138 139 qatomic_set(&vcpu_dirty_rate_stat->running, 1); 140 qemu_thread_create(&vcpu_dirty_rate_stat->thread, 141 "dirtyrate-stat", 142 vcpu_dirty_rate_stat_thread, 143 NULL, 144 QEMU_THREAD_JOINABLE); 145 } 146 147 void vcpu_dirty_rate_stat_stop(void) 148 { 149 qatomic_set(&vcpu_dirty_rate_stat->running, 0); 150 dirtylimit_state_unlock(); 151 bql_unlock(); 152 qemu_thread_join(&vcpu_dirty_rate_stat->thread); 153 bql_lock(); 154 dirtylimit_state_lock(); 155 } 156 157 void vcpu_dirty_rate_stat_initialize(void) 158 { 159 MachineState *ms = MACHINE(qdev_get_machine()); 160 int max_cpus = ms->smp.max_cpus; 161 162 vcpu_dirty_rate_stat = 163 g_malloc0(sizeof(*vcpu_dirty_rate_stat)); 164 165 vcpu_dirty_rate_stat->stat.nvcpu = max_cpus; 166 vcpu_dirty_rate_stat->stat.rates = 167 g_new0(DirtyRateVcpu, max_cpus); 168 169 vcpu_dirty_rate_stat->running = false; 170 } 171 172 void vcpu_dirty_rate_stat_finalize(void) 173 { 174 g_free(vcpu_dirty_rate_stat->stat.rates); 175 vcpu_dirty_rate_stat->stat.rates = NULL; 176 177 g_free(vcpu_dirty_rate_stat); 178 vcpu_dirty_rate_stat = NULL; 179 } 180 181 void dirtylimit_state_lock(void) 182 { 183 qemu_mutex_lock(&dirtylimit_mutex); 184 } 185 186 void dirtylimit_state_unlock(void) 187 { 188 qemu_mutex_unlock(&dirtylimit_mutex); 189 } 190 191 static void 192 __attribute__((__constructor__)) dirtylimit_mutex_init(void) 193 { 194 qemu_mutex_init(&dirtylimit_mutex); 195 } 196 197 static inline VcpuDirtyLimitState *dirtylimit_vcpu_get_state(int cpu_index) 198 { 199 return &dirtylimit_state->states[cpu_index]; 200 } 201 202 void dirtylimit_state_initialize(void) 203 { 204 MachineState *ms = MACHINE(qdev_get_machine()); 205 int max_cpus = ms->smp.max_cpus; 206 int i; 207 208 dirtylimit_state = g_malloc0(sizeof(*dirtylimit_state)); 209 210 dirtylimit_state->states = 211 g_new0(VcpuDirtyLimitState, max_cpus); 212 213 for (i = 0; i < max_cpus; i++) { 214 dirtylimit_state->states[i].cpu_index = i; 215 } 216 217 dirtylimit_state->max_cpus = max_cpus; 218 trace_dirtylimit_state_initialize(max_cpus); 219 } 220 221 void dirtylimit_state_finalize(void) 222 { 223 g_free(dirtylimit_state->states); 224 dirtylimit_state->states = NULL; 225 226 g_free(dirtylimit_state); 227 dirtylimit_state = NULL; 228 229 trace_dirtylimit_state_finalize(); 230 } 231 232 bool dirtylimit_in_service(void) 233 { 234 return !!dirtylimit_state; 235 } 236 237 bool dirtylimit_vcpu_index_valid(int cpu_index) 238 { 239 MachineState *ms = MACHINE(qdev_get_machine()); 240 241 return !(cpu_index < 0 || 242 cpu_index >= ms->smp.max_cpus); 243 } 244 245 static uint64_t dirtylimit_dirty_ring_full_time(uint64_t dirtyrate) 246 { 247 static uint64_t max_dirtyrate; 248 uint64_t dirty_ring_size_MiB; 249 250 dirty_ring_size_MiB = qemu_target_pages_to_MiB(kvm_dirty_ring_size()); 251 252 if (max_dirtyrate < dirtyrate) { 253 max_dirtyrate = dirtyrate; 254 } 255 256 return dirty_ring_size_MiB * 1000000 / max_dirtyrate; 257 } 258 259 static inline bool dirtylimit_done(uint64_t quota, 260 uint64_t current) 261 { 262 uint64_t min, max; 263 264 min = MIN(quota, current); 265 max = MAX(quota, current); 266 267 return ((max - min) <= DIRTYLIMIT_TOLERANCE_RANGE) ? true : false; 268 } 269 270 static inline bool 271 dirtylimit_need_linear_adjustment(uint64_t quota, 272 uint64_t current) 273 { 274 uint64_t min, max; 275 276 min = MIN(quota, current); 277 max = MAX(quota, current); 278 279 return ((max - min) * 100 / max) > DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT; 280 } 281 282 static void dirtylimit_set_throttle(CPUState *cpu, 283 uint64_t quota, 284 uint64_t current) 285 { 286 int64_t ring_full_time_us = 0; 287 uint64_t sleep_pct = 0; 288 uint64_t throttle_us = 0; 289 290 if (current == 0) { 291 cpu->throttle_us_per_full = 0; 292 return; 293 } 294 295 ring_full_time_us = dirtylimit_dirty_ring_full_time(current); 296 297 if (dirtylimit_need_linear_adjustment(quota, current)) { 298 if (quota < current) { 299 sleep_pct = (current - quota) * 100 / current; 300 throttle_us = 301 ring_full_time_us * sleep_pct / (double)(100 - sleep_pct); 302 cpu->throttle_us_per_full += throttle_us; 303 } else { 304 sleep_pct = (quota - current) * 100 / quota; 305 throttle_us = 306 ring_full_time_us * sleep_pct / (double)(100 - sleep_pct); 307 cpu->throttle_us_per_full -= throttle_us; 308 } 309 310 trace_dirtylimit_throttle_pct(cpu->cpu_index, 311 sleep_pct, 312 throttle_us); 313 } else { 314 if (quota < current) { 315 cpu->throttle_us_per_full += ring_full_time_us / 10; 316 } else { 317 cpu->throttle_us_per_full -= ring_full_time_us / 10; 318 } 319 } 320 321 /* 322 * TODO: in the big kvm_dirty_ring_size case (eg: 65536, or other scenario), 323 * current dirty page rate may never reach the quota, we should stop 324 * increasing sleep time? 325 */ 326 cpu->throttle_us_per_full = MIN(cpu->throttle_us_per_full, 327 ring_full_time_us * DIRTYLIMIT_THROTTLE_PCT_MAX); 328 329 cpu->throttle_us_per_full = MAX(cpu->throttle_us_per_full, 0); 330 } 331 332 static void dirtylimit_adjust_throttle(CPUState *cpu) 333 { 334 uint64_t quota = 0; 335 uint64_t current = 0; 336 int cpu_index = cpu->cpu_index; 337 338 quota = dirtylimit_vcpu_get_state(cpu_index)->quota; 339 current = vcpu_dirty_rate_get(cpu_index); 340 341 if (!dirtylimit_done(quota, current)) { 342 dirtylimit_set_throttle(cpu, quota, current); 343 } 344 345 return; 346 } 347 348 void dirtylimit_process(void) 349 { 350 CPUState *cpu; 351 352 if (!qatomic_read(&dirtylimit_quit)) { 353 dirtylimit_state_lock(); 354 355 if (!dirtylimit_in_service()) { 356 dirtylimit_state_unlock(); 357 return; 358 } 359 360 CPU_FOREACH(cpu) { 361 if (!dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled) { 362 continue; 363 } 364 dirtylimit_adjust_throttle(cpu); 365 } 366 dirtylimit_state_unlock(); 367 } 368 } 369 370 void dirtylimit_change(bool start) 371 { 372 if (start) { 373 qatomic_set(&dirtylimit_quit, 0); 374 } else { 375 qatomic_set(&dirtylimit_quit, 1); 376 } 377 } 378 379 void dirtylimit_set_vcpu(int cpu_index, 380 uint64_t quota, 381 bool enable) 382 { 383 trace_dirtylimit_set_vcpu(cpu_index, quota); 384 385 if (enable) { 386 dirtylimit_state->states[cpu_index].quota = quota; 387 if (!dirtylimit_vcpu_get_state(cpu_index)->enabled) { 388 dirtylimit_state->limited_nvcpu++; 389 } 390 } else { 391 dirtylimit_state->states[cpu_index].quota = 0; 392 if (dirtylimit_state->states[cpu_index].enabled) { 393 dirtylimit_state->limited_nvcpu--; 394 } 395 } 396 397 dirtylimit_state->states[cpu_index].enabled = enable; 398 } 399 400 void dirtylimit_set_all(uint64_t quota, 401 bool enable) 402 { 403 MachineState *ms = MACHINE(qdev_get_machine()); 404 int max_cpus = ms->smp.max_cpus; 405 int i; 406 407 for (i = 0; i < max_cpus; i++) { 408 dirtylimit_set_vcpu(i, quota, enable); 409 } 410 } 411 412 void dirtylimit_vcpu_execute(CPUState *cpu) 413 { 414 if (cpu->throttle_us_per_full) { 415 dirtylimit_state_lock(); 416 417 if (dirtylimit_in_service() && 418 dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled) { 419 dirtylimit_state_unlock(); 420 trace_dirtylimit_vcpu_execute(cpu->cpu_index, 421 cpu->throttle_us_per_full); 422 423 g_usleep(cpu->throttle_us_per_full); 424 return; 425 } 426 427 dirtylimit_state_unlock(); 428 } 429 } 430 431 static void dirtylimit_init(void) 432 { 433 dirtylimit_state_initialize(); 434 dirtylimit_change(true); 435 vcpu_dirty_rate_stat_initialize(); 436 vcpu_dirty_rate_stat_start(); 437 } 438 439 static void dirtylimit_cleanup(void) 440 { 441 vcpu_dirty_rate_stat_stop(); 442 vcpu_dirty_rate_stat_finalize(); 443 dirtylimit_change(false); 444 dirtylimit_state_finalize(); 445 } 446 447 /* 448 * dirty page rate limit is not allowed to set if migration 449 * is running with dirty-limit capability enabled. 450 */ 451 static bool dirtylimit_is_allowed(void) 452 { 453 MigrationState *ms = migrate_get_current(); 454 455 if (migration_is_running(ms->state) && 456 (!qemu_thread_is_self(&ms->thread)) && 457 migrate_dirty_limit() && 458 dirtylimit_in_service()) { 459 return false; 460 } 461 return true; 462 } 463 464 void qmp_cancel_vcpu_dirty_limit(bool has_cpu_index, 465 int64_t cpu_index, 466 Error **errp) 467 { 468 if (!kvm_enabled() || !kvm_dirty_ring_enabled()) { 469 return; 470 } 471 472 if (has_cpu_index && !dirtylimit_vcpu_index_valid(cpu_index)) { 473 error_setg(errp, "incorrect cpu index specified"); 474 return; 475 } 476 477 if (!dirtylimit_is_allowed()) { 478 error_setg(errp, "can't cancel dirty page rate limit while" 479 " migration is running"); 480 return; 481 } 482 483 if (!dirtylimit_in_service()) { 484 return; 485 } 486 487 dirtylimit_state_lock(); 488 489 if (has_cpu_index) { 490 dirtylimit_set_vcpu(cpu_index, 0, false); 491 } else { 492 dirtylimit_set_all(0, false); 493 } 494 495 if (!dirtylimit_state->limited_nvcpu) { 496 dirtylimit_cleanup(); 497 } 498 499 dirtylimit_state_unlock(); 500 } 501 502 void hmp_cancel_vcpu_dirty_limit(Monitor *mon, const QDict *qdict) 503 { 504 int64_t cpu_index = qdict_get_try_int(qdict, "cpu_index", -1); 505 Error *err = NULL; 506 507 qmp_cancel_vcpu_dirty_limit(!!(cpu_index != -1), cpu_index, &err); 508 if (err) { 509 hmp_handle_error(mon, err); 510 return; 511 } 512 513 monitor_printf(mon, "[Please use 'info vcpu_dirty_limit' to query " 514 "dirty limit for virtual CPU]\n"); 515 } 516 517 void qmp_set_vcpu_dirty_limit(bool has_cpu_index, 518 int64_t cpu_index, 519 uint64_t dirty_rate, 520 Error **errp) 521 { 522 if (!kvm_enabled() || !kvm_dirty_ring_enabled()) { 523 error_setg(errp, "dirty page limit feature requires KVM with" 524 " accelerator property 'dirty-ring-size' set'"); 525 return; 526 } 527 528 if (has_cpu_index && !dirtylimit_vcpu_index_valid(cpu_index)) { 529 error_setg(errp, "incorrect cpu index specified"); 530 return; 531 } 532 533 if (!dirtylimit_is_allowed()) { 534 error_setg(errp, "can't set dirty page rate limit while" 535 " migration is running"); 536 return; 537 } 538 539 if (!dirty_rate) { 540 qmp_cancel_vcpu_dirty_limit(has_cpu_index, cpu_index, errp); 541 return; 542 } 543 544 dirtylimit_state_lock(); 545 546 if (!dirtylimit_in_service()) { 547 dirtylimit_init(); 548 } 549 550 if (has_cpu_index) { 551 dirtylimit_set_vcpu(cpu_index, dirty_rate, true); 552 } else { 553 dirtylimit_set_all(dirty_rate, true); 554 } 555 556 dirtylimit_state_unlock(); 557 } 558 559 void hmp_set_vcpu_dirty_limit(Monitor *mon, const QDict *qdict) 560 { 561 int64_t dirty_rate = qdict_get_int(qdict, "dirty_rate"); 562 int64_t cpu_index = qdict_get_try_int(qdict, "cpu_index", -1); 563 Error *err = NULL; 564 565 if (dirty_rate < 0) { 566 error_setg(&err, "invalid dirty page limit %" PRId64, dirty_rate); 567 goto out; 568 } 569 570 qmp_set_vcpu_dirty_limit(!!(cpu_index != -1), cpu_index, dirty_rate, &err); 571 572 out: 573 hmp_handle_error(mon, err); 574 } 575 576 /* Return the max throttle time of each virtual CPU */ 577 uint64_t dirtylimit_throttle_time_per_round(void) 578 { 579 CPUState *cpu; 580 int64_t max = 0; 581 582 CPU_FOREACH(cpu) { 583 if (cpu->throttle_us_per_full > max) { 584 max = cpu->throttle_us_per_full; 585 } 586 } 587 588 return max; 589 } 590 591 /* 592 * Estimate average dirty ring full time of each virtaul CPU. 593 * Return 0 if guest doesn't dirty memory. 594 */ 595 uint64_t dirtylimit_ring_full_time(void) 596 { 597 CPUState *cpu; 598 uint64_t curr_rate = 0; 599 int nvcpus = 0; 600 601 CPU_FOREACH(cpu) { 602 if (cpu->running) { 603 nvcpus++; 604 curr_rate += vcpu_dirty_rate_get(cpu->cpu_index); 605 } 606 } 607 608 if (!curr_rate || !nvcpus) { 609 return 0; 610 } 611 612 return dirtylimit_dirty_ring_full_time(curr_rate / nvcpus); 613 } 614 615 static struct DirtyLimitInfo *dirtylimit_query_vcpu(int cpu_index) 616 { 617 DirtyLimitInfo *info = NULL; 618 619 info = g_malloc0(sizeof(*info)); 620 info->cpu_index = cpu_index; 621 info->limit_rate = dirtylimit_vcpu_get_state(cpu_index)->quota; 622 info->current_rate = vcpu_dirty_rate_get(cpu_index); 623 624 return info; 625 } 626 627 static struct DirtyLimitInfoList *dirtylimit_query_all(void) 628 { 629 int i, index; 630 DirtyLimitInfo *info = NULL; 631 DirtyLimitInfoList *head = NULL, **tail = &head; 632 633 dirtylimit_state_lock(); 634 635 if (!dirtylimit_in_service()) { 636 dirtylimit_state_unlock(); 637 return NULL; 638 } 639 640 for (i = 0; i < dirtylimit_state->max_cpus; i++) { 641 index = dirtylimit_state->states[i].cpu_index; 642 if (dirtylimit_vcpu_get_state(index)->enabled) { 643 info = dirtylimit_query_vcpu(index); 644 QAPI_LIST_APPEND(tail, info); 645 } 646 } 647 648 dirtylimit_state_unlock(); 649 650 return head; 651 } 652 653 struct DirtyLimitInfoList *qmp_query_vcpu_dirty_limit(Error **errp) 654 { 655 return dirtylimit_query_all(); 656 } 657 658 void hmp_info_vcpu_dirty_limit(Monitor *mon, const QDict *qdict) 659 { 660 DirtyLimitInfoList *info; 661 g_autoptr(DirtyLimitInfoList) head = NULL; 662 Error *err = NULL; 663 664 if (!dirtylimit_in_service()) { 665 monitor_printf(mon, "Dirty page limit not enabled!\n"); 666 return; 667 } 668 669 head = qmp_query_vcpu_dirty_limit(&err); 670 if (err) { 671 hmp_handle_error(mon, err); 672 return; 673 } 674 675 for (info = head; info != NULL; info = info->next) { 676 monitor_printf(mon, "vcpu[%"PRIi64"], limit rate %"PRIi64 " (MB/s)," 677 " current rate %"PRIi64 " (MB/s)\n", 678 info->value->cpu_index, 679 info->value->limit_rate, 680 info->value->current_rate); 681 } 682 } 683