1 /*
2 * Dirty page rate limit implementation code
3 *
4 * Copyright (c) 2022 CHINA TELECOM CO.,LTD.
5 *
6 * Authors:
7 * Hyman Huang(黄勇) <huangy81@chinatelecom.cn>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
10 * See the COPYING file in the top-level directory.
11 */
12
13 #include "qemu/osdep.h"
14 #include "qemu/main-loop.h"
15 #include "qapi/qapi-commands-migration.h"
16 #include "qapi/qmp/qdict.h"
17 #include "qapi/error.h"
18 #include "sysemu/dirtyrate.h"
19 #include "sysemu/dirtylimit.h"
20 #include "monitor/hmp.h"
21 #include "monitor/monitor.h"
22 #include "exec/memory.h"
23 #include "exec/target_page.h"
24 #include "hw/boards.h"
25 #include "sysemu/kvm.h"
26 #include "trace.h"
27 #include "migration/misc.h"
28
29 /*
30 * Dirtylimit stop working if dirty page rate error
31 * value less than DIRTYLIMIT_TOLERANCE_RANGE
32 */
33 #define DIRTYLIMIT_TOLERANCE_RANGE 25 /* MB/s */
34 /*
35 * Plus or minus vcpu sleep time linearly if dirty
36 * page rate error value percentage over
37 * DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT.
38 * Otherwise, plus or minus a fixed vcpu sleep time.
39 */
40 #define DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT 50
41 /*
42 * Max vcpu sleep time percentage during a cycle
43 * composed of dirty ring full and sleep time.
44 */
45 #define DIRTYLIMIT_THROTTLE_PCT_MAX 99
46
47 struct {
48 VcpuStat stat;
49 bool running;
50 QemuThread thread;
51 } *vcpu_dirty_rate_stat;
52
53 typedef struct VcpuDirtyLimitState {
54 int cpu_index;
55 bool enabled;
56 /*
57 * Quota dirty page rate, unit is MB/s
58 * zero if not enabled.
59 */
60 uint64_t quota;
61 } VcpuDirtyLimitState;
62
63 struct {
64 VcpuDirtyLimitState *states;
65 /* Max cpus number configured by user */
66 int max_cpus;
67 /* Number of vcpu under dirtylimit */
68 int limited_nvcpu;
69 } *dirtylimit_state;
70
71 /* protect dirtylimit_state */
72 static QemuMutex dirtylimit_mutex;
73
74 /* dirtylimit thread quit if dirtylimit_quit is true */
75 static bool dirtylimit_quit;
76
vcpu_dirty_rate_stat_collect(void)77 static void vcpu_dirty_rate_stat_collect(void)
78 {
79 VcpuStat stat;
80 int i = 0;
81 int64_t period = DIRTYLIMIT_CALC_TIME_MS;
82
83 if (migrate_dirty_limit() &&
84 migration_is_active()) {
85 period = migrate_vcpu_dirty_limit_period();
86 }
87
88 /* calculate vcpu dirtyrate */
89 vcpu_calculate_dirtyrate(period,
90 &stat,
91 GLOBAL_DIRTY_LIMIT,
92 false);
93
94 for (i = 0; i < stat.nvcpu; i++) {
95 vcpu_dirty_rate_stat->stat.rates[i].id = i;
96 vcpu_dirty_rate_stat->stat.rates[i].dirty_rate =
97 stat.rates[i].dirty_rate;
98 }
99
100 g_free(stat.rates);
101 }
102
vcpu_dirty_rate_stat_thread(void * opaque)103 static void *vcpu_dirty_rate_stat_thread(void *opaque)
104 {
105 rcu_register_thread();
106
107 /* start log sync */
108 global_dirty_log_change(GLOBAL_DIRTY_LIMIT, true);
109
110 while (qatomic_read(&vcpu_dirty_rate_stat->running)) {
111 vcpu_dirty_rate_stat_collect();
112 if (dirtylimit_in_service()) {
113 dirtylimit_process();
114 }
115 }
116
117 /* stop log sync */
118 global_dirty_log_change(GLOBAL_DIRTY_LIMIT, false);
119
120 rcu_unregister_thread();
121 return NULL;
122 }
123
vcpu_dirty_rate_get(int cpu_index)124 int64_t vcpu_dirty_rate_get(int cpu_index)
125 {
126 DirtyRateVcpu *rates = vcpu_dirty_rate_stat->stat.rates;
127 return qatomic_read_i64(&rates[cpu_index].dirty_rate);
128 }
129
vcpu_dirty_rate_stat_start(void)130 void vcpu_dirty_rate_stat_start(void)
131 {
132 if (qatomic_read(&vcpu_dirty_rate_stat->running)) {
133 return;
134 }
135
136 qatomic_set(&vcpu_dirty_rate_stat->running, 1);
137 qemu_thread_create(&vcpu_dirty_rate_stat->thread,
138 "dirtyrate-stat",
139 vcpu_dirty_rate_stat_thread,
140 NULL,
141 QEMU_THREAD_JOINABLE);
142 }
143
vcpu_dirty_rate_stat_stop(void)144 void vcpu_dirty_rate_stat_stop(void)
145 {
146 qatomic_set(&vcpu_dirty_rate_stat->running, 0);
147 dirtylimit_state_unlock();
148 bql_unlock();
149 qemu_thread_join(&vcpu_dirty_rate_stat->thread);
150 bql_lock();
151 dirtylimit_state_lock();
152 }
153
vcpu_dirty_rate_stat_initialize(void)154 void vcpu_dirty_rate_stat_initialize(void)
155 {
156 MachineState *ms = MACHINE(qdev_get_machine());
157 int max_cpus = ms->smp.max_cpus;
158
159 vcpu_dirty_rate_stat =
160 g_malloc0(sizeof(*vcpu_dirty_rate_stat));
161
162 vcpu_dirty_rate_stat->stat.nvcpu = max_cpus;
163 vcpu_dirty_rate_stat->stat.rates =
164 g_new0(DirtyRateVcpu, max_cpus);
165
166 vcpu_dirty_rate_stat->running = false;
167 }
168
vcpu_dirty_rate_stat_finalize(void)169 void vcpu_dirty_rate_stat_finalize(void)
170 {
171 g_free(vcpu_dirty_rate_stat->stat.rates);
172 vcpu_dirty_rate_stat->stat.rates = NULL;
173
174 g_free(vcpu_dirty_rate_stat);
175 vcpu_dirty_rate_stat = NULL;
176 }
177
dirtylimit_state_lock(void)178 void dirtylimit_state_lock(void)
179 {
180 qemu_mutex_lock(&dirtylimit_mutex);
181 }
182
dirtylimit_state_unlock(void)183 void dirtylimit_state_unlock(void)
184 {
185 qemu_mutex_unlock(&dirtylimit_mutex);
186 }
187
188 static void
dirtylimit_mutex_init(void)189 __attribute__((__constructor__)) dirtylimit_mutex_init(void)
190 {
191 qemu_mutex_init(&dirtylimit_mutex);
192 }
193
dirtylimit_vcpu_get_state(int cpu_index)194 static inline VcpuDirtyLimitState *dirtylimit_vcpu_get_state(int cpu_index)
195 {
196 return &dirtylimit_state->states[cpu_index];
197 }
198
dirtylimit_state_initialize(void)199 void dirtylimit_state_initialize(void)
200 {
201 MachineState *ms = MACHINE(qdev_get_machine());
202 int max_cpus = ms->smp.max_cpus;
203 int i;
204
205 dirtylimit_state = g_malloc0(sizeof(*dirtylimit_state));
206
207 dirtylimit_state->states =
208 g_new0(VcpuDirtyLimitState, max_cpus);
209
210 for (i = 0; i < max_cpus; i++) {
211 dirtylimit_state->states[i].cpu_index = i;
212 }
213
214 dirtylimit_state->max_cpus = max_cpus;
215 trace_dirtylimit_state_initialize(max_cpus);
216 }
217
dirtylimit_state_finalize(void)218 void dirtylimit_state_finalize(void)
219 {
220 g_free(dirtylimit_state->states);
221 dirtylimit_state->states = NULL;
222
223 g_free(dirtylimit_state);
224 dirtylimit_state = NULL;
225
226 trace_dirtylimit_state_finalize();
227 }
228
dirtylimit_in_service(void)229 bool dirtylimit_in_service(void)
230 {
231 return !!dirtylimit_state;
232 }
233
dirtylimit_vcpu_index_valid(int cpu_index)234 bool dirtylimit_vcpu_index_valid(int cpu_index)
235 {
236 MachineState *ms = MACHINE(qdev_get_machine());
237
238 return !(cpu_index < 0 ||
239 cpu_index >= ms->smp.max_cpus);
240 }
241
dirtylimit_dirty_ring_full_time(uint64_t dirtyrate)242 static uint64_t dirtylimit_dirty_ring_full_time(uint64_t dirtyrate)
243 {
244 static uint64_t max_dirtyrate;
245 uint64_t dirty_ring_size_MiB;
246
247 dirty_ring_size_MiB = qemu_target_pages_to_MiB(kvm_dirty_ring_size());
248
249 if (max_dirtyrate < dirtyrate) {
250 max_dirtyrate = dirtyrate;
251 }
252
253 return dirty_ring_size_MiB * 1000000 / max_dirtyrate;
254 }
255
dirtylimit_done(uint64_t quota,uint64_t current)256 static inline bool dirtylimit_done(uint64_t quota,
257 uint64_t current)
258 {
259 uint64_t min, max;
260
261 min = MIN(quota, current);
262 max = MAX(quota, current);
263
264 return ((max - min) <= DIRTYLIMIT_TOLERANCE_RANGE) ? true : false;
265 }
266
267 static inline bool
dirtylimit_need_linear_adjustment(uint64_t quota,uint64_t current)268 dirtylimit_need_linear_adjustment(uint64_t quota,
269 uint64_t current)
270 {
271 uint64_t min, max;
272
273 min = MIN(quota, current);
274 max = MAX(quota, current);
275
276 return ((max - min) * 100 / max) > DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT;
277 }
278
dirtylimit_set_throttle(CPUState * cpu,uint64_t quota,uint64_t current)279 static void dirtylimit_set_throttle(CPUState *cpu,
280 uint64_t quota,
281 uint64_t current)
282 {
283 int64_t ring_full_time_us = 0;
284 uint64_t sleep_pct = 0;
285 uint64_t throttle_us = 0;
286
287 if (current == 0) {
288 cpu->throttle_us_per_full = 0;
289 return;
290 }
291
292 ring_full_time_us = dirtylimit_dirty_ring_full_time(current);
293
294 if (dirtylimit_need_linear_adjustment(quota, current)) {
295 if (quota < current) {
296 sleep_pct = (current - quota) * 100 / current;
297 throttle_us =
298 ring_full_time_us * sleep_pct / (double)(100 - sleep_pct);
299 cpu->throttle_us_per_full += throttle_us;
300 } else {
301 sleep_pct = (quota - current) * 100 / quota;
302 throttle_us =
303 ring_full_time_us * sleep_pct / (double)(100 - sleep_pct);
304 cpu->throttle_us_per_full -= throttle_us;
305 }
306
307 trace_dirtylimit_throttle_pct(cpu->cpu_index,
308 sleep_pct,
309 throttle_us);
310 } else {
311 if (quota < current) {
312 cpu->throttle_us_per_full += ring_full_time_us / 10;
313 } else {
314 cpu->throttle_us_per_full -= ring_full_time_us / 10;
315 }
316 }
317
318 /*
319 * TODO: in the big kvm_dirty_ring_size case (eg: 65536, or other scenario),
320 * current dirty page rate may never reach the quota, we should stop
321 * increasing sleep time?
322 */
323 cpu->throttle_us_per_full = MIN(cpu->throttle_us_per_full,
324 ring_full_time_us * DIRTYLIMIT_THROTTLE_PCT_MAX);
325
326 cpu->throttle_us_per_full = MAX(cpu->throttle_us_per_full, 0);
327 }
328
dirtylimit_adjust_throttle(CPUState * cpu)329 static void dirtylimit_adjust_throttle(CPUState *cpu)
330 {
331 uint64_t quota = 0;
332 uint64_t current = 0;
333 int cpu_index = cpu->cpu_index;
334
335 quota = dirtylimit_vcpu_get_state(cpu_index)->quota;
336 current = vcpu_dirty_rate_get(cpu_index);
337
338 if (!dirtylimit_done(quota, current)) {
339 dirtylimit_set_throttle(cpu, quota, current);
340 }
341
342 return;
343 }
344
dirtylimit_process(void)345 void dirtylimit_process(void)
346 {
347 CPUState *cpu;
348
349 if (!qatomic_read(&dirtylimit_quit)) {
350 dirtylimit_state_lock();
351
352 if (!dirtylimit_in_service()) {
353 dirtylimit_state_unlock();
354 return;
355 }
356
357 CPU_FOREACH(cpu) {
358 if (!dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled) {
359 continue;
360 }
361 dirtylimit_adjust_throttle(cpu);
362 }
363 dirtylimit_state_unlock();
364 }
365 }
366
dirtylimit_change(bool start)367 void dirtylimit_change(bool start)
368 {
369 if (start) {
370 qatomic_set(&dirtylimit_quit, 0);
371 } else {
372 qatomic_set(&dirtylimit_quit, 1);
373 }
374 }
375
dirtylimit_set_vcpu(int cpu_index,uint64_t quota,bool enable)376 void dirtylimit_set_vcpu(int cpu_index,
377 uint64_t quota,
378 bool enable)
379 {
380 trace_dirtylimit_set_vcpu(cpu_index, quota);
381
382 if (enable) {
383 dirtylimit_state->states[cpu_index].quota = quota;
384 if (!dirtylimit_vcpu_get_state(cpu_index)->enabled) {
385 dirtylimit_state->limited_nvcpu++;
386 }
387 } else {
388 dirtylimit_state->states[cpu_index].quota = 0;
389 if (dirtylimit_state->states[cpu_index].enabled) {
390 dirtylimit_state->limited_nvcpu--;
391 }
392 }
393
394 dirtylimit_state->states[cpu_index].enabled = enable;
395 }
396
dirtylimit_set_all(uint64_t quota,bool enable)397 void dirtylimit_set_all(uint64_t quota,
398 bool enable)
399 {
400 MachineState *ms = MACHINE(qdev_get_machine());
401 int max_cpus = ms->smp.max_cpus;
402 int i;
403
404 for (i = 0; i < max_cpus; i++) {
405 dirtylimit_set_vcpu(i, quota, enable);
406 }
407 }
408
dirtylimit_vcpu_execute(CPUState * cpu)409 void dirtylimit_vcpu_execute(CPUState *cpu)
410 {
411 if (cpu->throttle_us_per_full) {
412 dirtylimit_state_lock();
413
414 if (dirtylimit_in_service() &&
415 dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled) {
416 dirtylimit_state_unlock();
417 trace_dirtylimit_vcpu_execute(cpu->cpu_index,
418 cpu->throttle_us_per_full);
419
420 g_usleep(cpu->throttle_us_per_full);
421 return;
422 }
423
424 dirtylimit_state_unlock();
425 }
426 }
427
dirtylimit_init(void)428 static void dirtylimit_init(void)
429 {
430 dirtylimit_state_initialize();
431 dirtylimit_change(true);
432 vcpu_dirty_rate_stat_initialize();
433 vcpu_dirty_rate_stat_start();
434 }
435
dirtylimit_cleanup(void)436 static void dirtylimit_cleanup(void)
437 {
438 vcpu_dirty_rate_stat_stop();
439 vcpu_dirty_rate_stat_finalize();
440 dirtylimit_change(false);
441 dirtylimit_state_finalize();
442 }
443
444 /*
445 * dirty page rate limit is not allowed to set if migration
446 * is running with dirty-limit capability enabled.
447 */
dirtylimit_is_allowed(void)448 static bool dirtylimit_is_allowed(void)
449 {
450 if (migration_is_running() &&
451 !migration_thread_is_self() &&
452 migrate_dirty_limit() &&
453 dirtylimit_in_service()) {
454 return false;
455 }
456 return true;
457 }
458
qmp_cancel_vcpu_dirty_limit(bool has_cpu_index,int64_t cpu_index,Error ** errp)459 void qmp_cancel_vcpu_dirty_limit(bool has_cpu_index,
460 int64_t cpu_index,
461 Error **errp)
462 {
463 if (!kvm_enabled() || !kvm_dirty_ring_enabled()) {
464 return;
465 }
466
467 if (has_cpu_index && !dirtylimit_vcpu_index_valid(cpu_index)) {
468 error_setg(errp, "incorrect cpu index specified");
469 return;
470 }
471
472 if (!dirtylimit_is_allowed()) {
473 error_setg(errp, "can't cancel dirty page rate limit while"
474 " migration is running");
475 return;
476 }
477
478 if (!dirtylimit_in_service()) {
479 return;
480 }
481
482 dirtylimit_state_lock();
483
484 if (has_cpu_index) {
485 dirtylimit_set_vcpu(cpu_index, 0, false);
486 } else {
487 dirtylimit_set_all(0, false);
488 }
489
490 if (!dirtylimit_state->limited_nvcpu) {
491 dirtylimit_cleanup();
492 }
493
494 dirtylimit_state_unlock();
495 }
496
hmp_cancel_vcpu_dirty_limit(Monitor * mon,const QDict * qdict)497 void hmp_cancel_vcpu_dirty_limit(Monitor *mon, const QDict *qdict)
498 {
499 int64_t cpu_index = qdict_get_try_int(qdict, "cpu_index", -1);
500 Error *err = NULL;
501
502 qmp_cancel_vcpu_dirty_limit(!!(cpu_index != -1), cpu_index, &err);
503 if (err) {
504 hmp_handle_error(mon, err);
505 return;
506 }
507
508 monitor_printf(mon, "[Please use 'info vcpu_dirty_limit' to query "
509 "dirty limit for virtual CPU]\n");
510 }
511
qmp_set_vcpu_dirty_limit(bool has_cpu_index,int64_t cpu_index,uint64_t dirty_rate,Error ** errp)512 void qmp_set_vcpu_dirty_limit(bool has_cpu_index,
513 int64_t cpu_index,
514 uint64_t dirty_rate,
515 Error **errp)
516 {
517 if (!kvm_enabled() || !kvm_dirty_ring_enabled()) {
518 error_setg(errp, "dirty page limit feature requires KVM with"
519 " accelerator property 'dirty-ring-size' set'");
520 return;
521 }
522
523 if (has_cpu_index && !dirtylimit_vcpu_index_valid(cpu_index)) {
524 error_setg(errp, "incorrect cpu index specified");
525 return;
526 }
527
528 if (!dirtylimit_is_allowed()) {
529 error_setg(errp, "can't set dirty page rate limit while"
530 " migration is running");
531 return;
532 }
533
534 if (!dirty_rate) {
535 qmp_cancel_vcpu_dirty_limit(has_cpu_index, cpu_index, errp);
536 return;
537 }
538
539 dirtylimit_state_lock();
540
541 if (!dirtylimit_in_service()) {
542 dirtylimit_init();
543 }
544
545 if (has_cpu_index) {
546 dirtylimit_set_vcpu(cpu_index, dirty_rate, true);
547 } else {
548 dirtylimit_set_all(dirty_rate, true);
549 }
550
551 dirtylimit_state_unlock();
552 }
553
hmp_set_vcpu_dirty_limit(Monitor * mon,const QDict * qdict)554 void hmp_set_vcpu_dirty_limit(Monitor *mon, const QDict *qdict)
555 {
556 int64_t dirty_rate = qdict_get_int(qdict, "dirty_rate");
557 int64_t cpu_index = qdict_get_try_int(qdict, "cpu_index", -1);
558 Error *err = NULL;
559
560 if (dirty_rate < 0) {
561 error_setg(&err, "invalid dirty page limit %" PRId64, dirty_rate);
562 goto out;
563 }
564
565 qmp_set_vcpu_dirty_limit(!!(cpu_index != -1), cpu_index, dirty_rate, &err);
566
567 out:
568 hmp_handle_error(mon, err);
569 }
570
571 /* Return the max throttle time of each virtual CPU */
dirtylimit_throttle_time_per_round(void)572 uint64_t dirtylimit_throttle_time_per_round(void)
573 {
574 CPUState *cpu;
575 int64_t max = 0;
576
577 CPU_FOREACH(cpu) {
578 if (cpu->throttle_us_per_full > max) {
579 max = cpu->throttle_us_per_full;
580 }
581 }
582
583 return max;
584 }
585
586 /*
587 * Estimate average dirty ring full time of each virtaul CPU.
588 * Return 0 if guest doesn't dirty memory.
589 */
dirtylimit_ring_full_time(void)590 uint64_t dirtylimit_ring_full_time(void)
591 {
592 CPUState *cpu;
593 uint64_t curr_rate = 0;
594 int nvcpus = 0;
595
596 CPU_FOREACH(cpu) {
597 if (cpu->running) {
598 nvcpus++;
599 curr_rate += vcpu_dirty_rate_get(cpu->cpu_index);
600 }
601 }
602
603 if (!curr_rate || !nvcpus) {
604 return 0;
605 }
606
607 return dirtylimit_dirty_ring_full_time(curr_rate / nvcpus);
608 }
609
dirtylimit_query_vcpu(int cpu_index)610 static struct DirtyLimitInfo *dirtylimit_query_vcpu(int cpu_index)
611 {
612 DirtyLimitInfo *info = NULL;
613
614 info = g_malloc0(sizeof(*info));
615 info->cpu_index = cpu_index;
616 info->limit_rate = dirtylimit_vcpu_get_state(cpu_index)->quota;
617 info->current_rate = vcpu_dirty_rate_get(cpu_index);
618
619 return info;
620 }
621
dirtylimit_query_all(void)622 static struct DirtyLimitInfoList *dirtylimit_query_all(void)
623 {
624 int i, index;
625 DirtyLimitInfo *info = NULL;
626 DirtyLimitInfoList *head = NULL, **tail = &head;
627
628 dirtylimit_state_lock();
629
630 if (!dirtylimit_in_service()) {
631 dirtylimit_state_unlock();
632 return NULL;
633 }
634
635 for (i = 0; i < dirtylimit_state->max_cpus; i++) {
636 index = dirtylimit_state->states[i].cpu_index;
637 if (dirtylimit_vcpu_get_state(index)->enabled) {
638 info = dirtylimit_query_vcpu(index);
639 QAPI_LIST_APPEND(tail, info);
640 }
641 }
642
643 dirtylimit_state_unlock();
644
645 return head;
646 }
647
qmp_query_vcpu_dirty_limit(Error ** errp)648 struct DirtyLimitInfoList *qmp_query_vcpu_dirty_limit(Error **errp)
649 {
650 return dirtylimit_query_all();
651 }
652
hmp_info_vcpu_dirty_limit(Monitor * mon,const QDict * qdict)653 void hmp_info_vcpu_dirty_limit(Monitor *mon, const QDict *qdict)
654 {
655 DirtyLimitInfoList *info;
656 g_autoptr(DirtyLimitInfoList) head = NULL;
657 Error *err = NULL;
658
659 if (!dirtylimit_in_service()) {
660 monitor_printf(mon, "Dirty page limit not enabled!\n");
661 return;
662 }
663
664 head = qmp_query_vcpu_dirty_limit(&err);
665 if (err) {
666 hmp_handle_error(mon, err);
667 return;
668 }
669
670 for (info = head; info != NULL; info = info->next) {
671 monitor_printf(mon, "vcpu[%"PRIi64"], limit rate %"PRIi64 " (MB/s),"
672 " current rate %"PRIi64 " (MB/s)\n",
673 info->value->cpu_index,
674 info->value->limit_rate,
675 info->value->current_rate);
676 }
677 }
678