1 /*
2 * Dirty page rate limit implementation code
3 *
4 * Copyright (c) 2022 CHINA TELECOM CO.,LTD.
5 *
6 * Authors:
7 * Hyman Huang(黄勇) <huangy81@chinatelecom.cn>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
10 * See the COPYING file in the top-level directory.
11 */
12
13 #include "qemu/osdep.h"
14 #include "qemu/main-loop.h"
15 #include "qapi/qapi-commands-migration.h"
16 #include "qobject/qdict.h"
17 #include "qapi/error.h"
18 #include "system/dirtyrate.h"
19 #include "system/dirtylimit.h"
20 #include "monitor/hmp.h"
21 #include "monitor/monitor.h"
22 #include "exec/memory.h"
23 #include "exec/target_page.h"
24 #include "hw/boards.h"
25 #include "system/kvm.h"
26 #include "trace.h"
27 #include "migration/misc.h"
28
29 /*
30 * Dirtylimit stop working if dirty page rate error
31 * value less than DIRTYLIMIT_TOLERANCE_RANGE
32 */
33 #define DIRTYLIMIT_TOLERANCE_RANGE 25 /* MB/s */
34 /*
35 * Plus or minus vcpu sleep time linearly if dirty
36 * page rate error value percentage over
37 * DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT.
38 * Otherwise, plus or minus a fixed vcpu sleep time.
39 */
40 #define DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT 50
41 /*
42 * Max vcpu sleep time percentage during a cycle
43 * composed of dirty ring full and sleep time.
44 */
45 #define DIRTYLIMIT_THROTTLE_PCT_MAX 99
46
47 struct {
48 VcpuStat stat;
49 bool running;
50 QemuThread thread;
51 } *vcpu_dirty_rate_stat;
52
53 typedef struct VcpuDirtyLimitState {
54 int cpu_index;
55 bool enabled;
56 /*
57 * Quota dirty page rate, unit is MB/s
58 * zero if not enabled.
59 */
60 uint64_t quota;
61 } VcpuDirtyLimitState;
62
63 struct {
64 VcpuDirtyLimitState *states;
65 /* Max cpus number configured by user */
66 int max_cpus;
67 /* Number of vcpu under dirtylimit */
68 int limited_nvcpu;
69 } *dirtylimit_state;
70
71 /* protect dirtylimit_state */
72 static QemuMutex dirtylimit_mutex;
73
74 /* dirtylimit thread quit if dirtylimit_quit is true */
75 static bool dirtylimit_quit;
76
vcpu_dirty_rate_stat_collect(void)77 static void vcpu_dirty_rate_stat_collect(void)
78 {
79 VcpuStat stat;
80 int i = 0;
81 int64_t period = DIRTYLIMIT_CALC_TIME_MS;
82
83 if (migrate_dirty_limit() && migration_is_running()) {
84 period = migrate_vcpu_dirty_limit_period();
85 }
86
87 /* calculate vcpu dirtyrate */
88 vcpu_calculate_dirtyrate(period,
89 &stat,
90 GLOBAL_DIRTY_LIMIT,
91 false);
92
93 for (i = 0; i < stat.nvcpu; i++) {
94 vcpu_dirty_rate_stat->stat.rates[i].id = i;
95 vcpu_dirty_rate_stat->stat.rates[i].dirty_rate =
96 stat.rates[i].dirty_rate;
97 }
98
99 g_free(stat.rates);
100 }
101
vcpu_dirty_rate_stat_thread(void * opaque)102 static void *vcpu_dirty_rate_stat_thread(void *opaque)
103 {
104 rcu_register_thread();
105
106 /* start log sync */
107 global_dirty_log_change(GLOBAL_DIRTY_LIMIT, true);
108
109 while (qatomic_read(&vcpu_dirty_rate_stat->running)) {
110 vcpu_dirty_rate_stat_collect();
111 if (dirtylimit_in_service()) {
112 dirtylimit_process();
113 }
114 }
115
116 /* stop log sync */
117 global_dirty_log_change(GLOBAL_DIRTY_LIMIT, false);
118
119 rcu_unregister_thread();
120 return NULL;
121 }
122
vcpu_dirty_rate_get(int cpu_index)123 int64_t vcpu_dirty_rate_get(int cpu_index)
124 {
125 DirtyRateVcpu *rates = vcpu_dirty_rate_stat->stat.rates;
126 return qatomic_read_i64(&rates[cpu_index].dirty_rate);
127 }
128
vcpu_dirty_rate_stat_start(void)129 void vcpu_dirty_rate_stat_start(void)
130 {
131 if (qatomic_read(&vcpu_dirty_rate_stat->running)) {
132 return;
133 }
134
135 qatomic_set(&vcpu_dirty_rate_stat->running, 1);
136 qemu_thread_create(&vcpu_dirty_rate_stat->thread,
137 "dirtyrate-stat",
138 vcpu_dirty_rate_stat_thread,
139 NULL,
140 QEMU_THREAD_JOINABLE);
141 }
142
vcpu_dirty_rate_stat_stop(void)143 void vcpu_dirty_rate_stat_stop(void)
144 {
145 qatomic_set(&vcpu_dirty_rate_stat->running, 0);
146 dirtylimit_state_unlock();
147 bql_unlock();
148 qemu_thread_join(&vcpu_dirty_rate_stat->thread);
149 bql_lock();
150 dirtylimit_state_lock();
151 }
152
vcpu_dirty_rate_stat_initialize(void)153 void vcpu_dirty_rate_stat_initialize(void)
154 {
155 MachineState *ms = MACHINE(qdev_get_machine());
156 int max_cpus = ms->smp.max_cpus;
157
158 vcpu_dirty_rate_stat =
159 g_malloc0(sizeof(*vcpu_dirty_rate_stat));
160
161 vcpu_dirty_rate_stat->stat.nvcpu = max_cpus;
162 vcpu_dirty_rate_stat->stat.rates =
163 g_new0(DirtyRateVcpu, max_cpus);
164
165 vcpu_dirty_rate_stat->running = false;
166 }
167
vcpu_dirty_rate_stat_finalize(void)168 void vcpu_dirty_rate_stat_finalize(void)
169 {
170 g_free(vcpu_dirty_rate_stat->stat.rates);
171 vcpu_dirty_rate_stat->stat.rates = NULL;
172
173 g_free(vcpu_dirty_rate_stat);
174 vcpu_dirty_rate_stat = NULL;
175 }
176
dirtylimit_state_lock(void)177 void dirtylimit_state_lock(void)
178 {
179 qemu_mutex_lock(&dirtylimit_mutex);
180 }
181
dirtylimit_state_unlock(void)182 void dirtylimit_state_unlock(void)
183 {
184 qemu_mutex_unlock(&dirtylimit_mutex);
185 }
186
187 static void
dirtylimit_mutex_init(void)188 __attribute__((__constructor__)) dirtylimit_mutex_init(void)
189 {
190 qemu_mutex_init(&dirtylimit_mutex);
191 }
192
dirtylimit_vcpu_get_state(int cpu_index)193 static inline VcpuDirtyLimitState *dirtylimit_vcpu_get_state(int cpu_index)
194 {
195 return &dirtylimit_state->states[cpu_index];
196 }
197
dirtylimit_state_initialize(void)198 void dirtylimit_state_initialize(void)
199 {
200 MachineState *ms = MACHINE(qdev_get_machine());
201 int max_cpus = ms->smp.max_cpus;
202 int i;
203
204 dirtylimit_state = g_malloc0(sizeof(*dirtylimit_state));
205
206 dirtylimit_state->states =
207 g_new0(VcpuDirtyLimitState, max_cpus);
208
209 for (i = 0; i < max_cpus; i++) {
210 dirtylimit_state->states[i].cpu_index = i;
211 }
212
213 dirtylimit_state->max_cpus = max_cpus;
214 trace_dirtylimit_state_initialize(max_cpus);
215 }
216
dirtylimit_state_finalize(void)217 void dirtylimit_state_finalize(void)
218 {
219 g_free(dirtylimit_state->states);
220 dirtylimit_state->states = NULL;
221
222 g_free(dirtylimit_state);
223 dirtylimit_state = NULL;
224
225 trace_dirtylimit_state_finalize();
226 }
227
dirtylimit_in_service(void)228 bool dirtylimit_in_service(void)
229 {
230 return !!dirtylimit_state;
231 }
232
dirtylimit_vcpu_index_valid(int cpu_index)233 bool dirtylimit_vcpu_index_valid(int cpu_index)
234 {
235 MachineState *ms = MACHINE(qdev_get_machine());
236
237 return !(cpu_index < 0 ||
238 cpu_index >= ms->smp.max_cpus);
239 }
240
dirtylimit_dirty_ring_full_time(uint64_t dirtyrate)241 static uint64_t dirtylimit_dirty_ring_full_time(uint64_t dirtyrate)
242 {
243 static uint64_t max_dirtyrate;
244 uint64_t dirty_ring_size_MiB;
245
246 dirty_ring_size_MiB = qemu_target_pages_to_MiB(kvm_dirty_ring_size());
247
248 if (max_dirtyrate < dirtyrate) {
249 max_dirtyrate = dirtyrate;
250 }
251
252 return dirty_ring_size_MiB * 1000000 / max_dirtyrate;
253 }
254
dirtylimit_done(uint64_t quota,uint64_t current)255 static inline bool dirtylimit_done(uint64_t quota,
256 uint64_t current)
257 {
258 uint64_t min, max;
259
260 min = MIN(quota, current);
261 max = MAX(quota, current);
262
263 return ((max - min) <= DIRTYLIMIT_TOLERANCE_RANGE) ? true : false;
264 }
265
266 static inline bool
dirtylimit_need_linear_adjustment(uint64_t quota,uint64_t current)267 dirtylimit_need_linear_adjustment(uint64_t quota,
268 uint64_t current)
269 {
270 uint64_t min, max;
271
272 min = MIN(quota, current);
273 max = MAX(quota, current);
274
275 return ((max - min) * 100 / max) > DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT;
276 }
277
dirtylimit_set_throttle(CPUState * cpu,uint64_t quota,uint64_t current)278 static void dirtylimit_set_throttle(CPUState *cpu,
279 uint64_t quota,
280 uint64_t current)
281 {
282 int64_t ring_full_time_us = 0;
283 uint64_t sleep_pct = 0;
284 uint64_t throttle_us = 0;
285
286 if (current == 0) {
287 cpu->throttle_us_per_full = 0;
288 return;
289 }
290
291 ring_full_time_us = dirtylimit_dirty_ring_full_time(current);
292
293 if (dirtylimit_need_linear_adjustment(quota, current)) {
294 if (quota < current) {
295 sleep_pct = (current - quota) * 100 / current;
296 throttle_us =
297 ring_full_time_us * sleep_pct / (double)(100 - sleep_pct);
298 cpu->throttle_us_per_full += throttle_us;
299 } else {
300 sleep_pct = (quota - current) * 100 / quota;
301 throttle_us =
302 ring_full_time_us * sleep_pct / (double)(100 - sleep_pct);
303 cpu->throttle_us_per_full -= throttle_us;
304 }
305
306 trace_dirtylimit_throttle_pct(cpu->cpu_index,
307 sleep_pct,
308 throttle_us);
309 } else {
310 if (quota < current) {
311 cpu->throttle_us_per_full += ring_full_time_us / 10;
312 } else {
313 cpu->throttle_us_per_full -= ring_full_time_us / 10;
314 }
315 }
316
317 /*
318 * TODO: in the big kvm_dirty_ring_size case (eg: 65536, or other scenario),
319 * current dirty page rate may never reach the quota, we should stop
320 * increasing sleep time?
321 */
322 cpu->throttle_us_per_full = MIN(cpu->throttle_us_per_full,
323 ring_full_time_us * DIRTYLIMIT_THROTTLE_PCT_MAX);
324
325 cpu->throttle_us_per_full = MAX(cpu->throttle_us_per_full, 0);
326 }
327
dirtylimit_adjust_throttle(CPUState * cpu)328 static void dirtylimit_adjust_throttle(CPUState *cpu)
329 {
330 uint64_t quota = 0;
331 uint64_t current = 0;
332 int cpu_index = cpu->cpu_index;
333
334 quota = dirtylimit_vcpu_get_state(cpu_index)->quota;
335 current = vcpu_dirty_rate_get(cpu_index);
336
337 if (!dirtylimit_done(quota, current)) {
338 dirtylimit_set_throttle(cpu, quota, current);
339 }
340
341 return;
342 }
343
dirtylimit_process(void)344 void dirtylimit_process(void)
345 {
346 CPUState *cpu;
347
348 if (!qatomic_read(&dirtylimit_quit)) {
349 dirtylimit_state_lock();
350
351 if (!dirtylimit_in_service()) {
352 dirtylimit_state_unlock();
353 return;
354 }
355
356 CPU_FOREACH(cpu) {
357 if (!dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled) {
358 continue;
359 }
360 dirtylimit_adjust_throttle(cpu);
361 }
362 dirtylimit_state_unlock();
363 }
364 }
365
dirtylimit_change(bool start)366 void dirtylimit_change(bool start)
367 {
368 if (start) {
369 qatomic_set(&dirtylimit_quit, 0);
370 } else {
371 qatomic_set(&dirtylimit_quit, 1);
372 }
373 }
374
dirtylimit_set_vcpu(int cpu_index,uint64_t quota,bool enable)375 void dirtylimit_set_vcpu(int cpu_index,
376 uint64_t quota,
377 bool enable)
378 {
379 trace_dirtylimit_set_vcpu(cpu_index, quota);
380
381 if (enable) {
382 dirtylimit_state->states[cpu_index].quota = quota;
383 if (!dirtylimit_vcpu_get_state(cpu_index)->enabled) {
384 dirtylimit_state->limited_nvcpu++;
385 }
386 } else {
387 dirtylimit_state->states[cpu_index].quota = 0;
388 if (dirtylimit_state->states[cpu_index].enabled) {
389 dirtylimit_state->limited_nvcpu--;
390 }
391 }
392
393 dirtylimit_state->states[cpu_index].enabled = enable;
394 }
395
dirtylimit_set_all(uint64_t quota,bool enable)396 void dirtylimit_set_all(uint64_t quota,
397 bool enable)
398 {
399 MachineState *ms = MACHINE(qdev_get_machine());
400 int max_cpus = ms->smp.max_cpus;
401 int i;
402
403 for (i = 0; i < max_cpus; i++) {
404 dirtylimit_set_vcpu(i, quota, enable);
405 }
406 }
407
dirtylimit_vcpu_execute(CPUState * cpu)408 void dirtylimit_vcpu_execute(CPUState *cpu)
409 {
410 if (cpu->throttle_us_per_full) {
411 dirtylimit_state_lock();
412
413 if (dirtylimit_in_service() &&
414 dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled) {
415 dirtylimit_state_unlock();
416 trace_dirtylimit_vcpu_execute(cpu->cpu_index,
417 cpu->throttle_us_per_full);
418
419 g_usleep(cpu->throttle_us_per_full);
420 return;
421 }
422
423 dirtylimit_state_unlock();
424 }
425 }
426
dirtylimit_init(void)427 static void dirtylimit_init(void)
428 {
429 dirtylimit_state_initialize();
430 dirtylimit_change(true);
431 vcpu_dirty_rate_stat_initialize();
432 vcpu_dirty_rate_stat_start();
433 }
434
dirtylimit_cleanup(void)435 static void dirtylimit_cleanup(void)
436 {
437 vcpu_dirty_rate_stat_stop();
438 vcpu_dirty_rate_stat_finalize();
439 dirtylimit_change(false);
440 dirtylimit_state_finalize();
441 }
442
443 /*
444 * dirty page rate limit is not allowed to set if migration
445 * is running with dirty-limit capability enabled.
446 */
dirtylimit_is_allowed(void)447 static bool dirtylimit_is_allowed(void)
448 {
449 if (migration_is_running() &&
450 !migration_thread_is_self() &&
451 migrate_dirty_limit() &&
452 dirtylimit_in_service()) {
453 return false;
454 }
455 return true;
456 }
457
qmp_cancel_vcpu_dirty_limit(bool has_cpu_index,int64_t cpu_index,Error ** errp)458 void qmp_cancel_vcpu_dirty_limit(bool has_cpu_index,
459 int64_t cpu_index,
460 Error **errp)
461 {
462 if (!kvm_enabled() || !kvm_dirty_ring_enabled()) {
463 return;
464 }
465
466 if (has_cpu_index && !dirtylimit_vcpu_index_valid(cpu_index)) {
467 error_setg(errp, "incorrect cpu index specified");
468 return;
469 }
470
471 if (!dirtylimit_is_allowed()) {
472 error_setg(errp, "can't cancel dirty page rate limit while"
473 " migration is running");
474 return;
475 }
476
477 if (!dirtylimit_in_service()) {
478 return;
479 }
480
481 dirtylimit_state_lock();
482
483 if (has_cpu_index) {
484 dirtylimit_set_vcpu(cpu_index, 0, false);
485 } else {
486 dirtylimit_set_all(0, false);
487 }
488
489 if (!dirtylimit_state->limited_nvcpu) {
490 dirtylimit_cleanup();
491 }
492
493 dirtylimit_state_unlock();
494 }
495
hmp_cancel_vcpu_dirty_limit(Monitor * mon,const QDict * qdict)496 void hmp_cancel_vcpu_dirty_limit(Monitor *mon, const QDict *qdict)
497 {
498 int64_t cpu_index = qdict_get_try_int(qdict, "cpu_index", -1);
499 Error *err = NULL;
500
501 qmp_cancel_vcpu_dirty_limit(!!(cpu_index != -1), cpu_index, &err);
502 if (err) {
503 hmp_handle_error(mon, err);
504 return;
505 }
506
507 monitor_printf(mon, "[Please use 'info vcpu_dirty_limit' to query "
508 "dirty limit for virtual CPU]\n");
509 }
510
qmp_set_vcpu_dirty_limit(bool has_cpu_index,int64_t cpu_index,uint64_t dirty_rate,Error ** errp)511 void qmp_set_vcpu_dirty_limit(bool has_cpu_index,
512 int64_t cpu_index,
513 uint64_t dirty_rate,
514 Error **errp)
515 {
516 if (!kvm_enabled() || !kvm_dirty_ring_enabled()) {
517 error_setg(errp, "dirty page limit feature requires KVM with"
518 " accelerator property 'dirty-ring-size' set'");
519 return;
520 }
521
522 if (has_cpu_index && !dirtylimit_vcpu_index_valid(cpu_index)) {
523 error_setg(errp, "incorrect cpu index specified");
524 return;
525 }
526
527 if (!dirtylimit_is_allowed()) {
528 error_setg(errp, "can't set dirty page rate limit while"
529 " migration is running");
530 return;
531 }
532
533 if (!dirty_rate) {
534 qmp_cancel_vcpu_dirty_limit(has_cpu_index, cpu_index, errp);
535 return;
536 }
537
538 dirtylimit_state_lock();
539
540 if (!dirtylimit_in_service()) {
541 dirtylimit_init();
542 }
543
544 if (has_cpu_index) {
545 dirtylimit_set_vcpu(cpu_index, dirty_rate, true);
546 } else {
547 dirtylimit_set_all(dirty_rate, true);
548 }
549
550 dirtylimit_state_unlock();
551 }
552
hmp_set_vcpu_dirty_limit(Monitor * mon,const QDict * qdict)553 void hmp_set_vcpu_dirty_limit(Monitor *mon, const QDict *qdict)
554 {
555 int64_t dirty_rate = qdict_get_int(qdict, "dirty_rate");
556 int64_t cpu_index = qdict_get_try_int(qdict, "cpu_index", -1);
557 Error *err = NULL;
558
559 if (dirty_rate < 0) {
560 error_setg(&err, "invalid dirty page limit %" PRId64, dirty_rate);
561 goto out;
562 }
563
564 qmp_set_vcpu_dirty_limit(!!(cpu_index != -1), cpu_index, dirty_rate, &err);
565
566 out:
567 hmp_handle_error(mon, err);
568 }
569
570 /* Return the max throttle time of each virtual CPU */
dirtylimit_throttle_time_per_round(void)571 uint64_t dirtylimit_throttle_time_per_round(void)
572 {
573 CPUState *cpu;
574 int64_t max = 0;
575
576 CPU_FOREACH(cpu) {
577 if (cpu->throttle_us_per_full > max) {
578 max = cpu->throttle_us_per_full;
579 }
580 }
581
582 return max;
583 }
584
585 /*
586 * Estimate average dirty ring full time of each virtaul CPU.
587 * Return 0 if guest doesn't dirty memory.
588 */
dirtylimit_ring_full_time(void)589 uint64_t dirtylimit_ring_full_time(void)
590 {
591 CPUState *cpu;
592 uint64_t curr_rate = 0;
593 int nvcpus = 0;
594
595 CPU_FOREACH(cpu) {
596 if (cpu->running) {
597 nvcpus++;
598 curr_rate += vcpu_dirty_rate_get(cpu->cpu_index);
599 }
600 }
601
602 if (!curr_rate || !nvcpus) {
603 return 0;
604 }
605
606 return dirtylimit_dirty_ring_full_time(curr_rate / nvcpus);
607 }
608
dirtylimit_query_vcpu(int cpu_index)609 static struct DirtyLimitInfo *dirtylimit_query_vcpu(int cpu_index)
610 {
611 DirtyLimitInfo *info = NULL;
612
613 info = g_malloc0(sizeof(*info));
614 info->cpu_index = cpu_index;
615 info->limit_rate = dirtylimit_vcpu_get_state(cpu_index)->quota;
616 info->current_rate = vcpu_dirty_rate_get(cpu_index);
617
618 return info;
619 }
620
dirtylimit_query_all(void)621 static struct DirtyLimitInfoList *dirtylimit_query_all(void)
622 {
623 int i, index;
624 DirtyLimitInfo *info = NULL;
625 DirtyLimitInfoList *head = NULL, **tail = &head;
626
627 dirtylimit_state_lock();
628
629 if (!dirtylimit_in_service()) {
630 dirtylimit_state_unlock();
631 return NULL;
632 }
633
634 for (i = 0; i < dirtylimit_state->max_cpus; i++) {
635 index = dirtylimit_state->states[i].cpu_index;
636 if (dirtylimit_vcpu_get_state(index)->enabled) {
637 info = dirtylimit_query_vcpu(index);
638 QAPI_LIST_APPEND(tail, info);
639 }
640 }
641
642 dirtylimit_state_unlock();
643
644 return head;
645 }
646
qmp_query_vcpu_dirty_limit(Error ** errp)647 struct DirtyLimitInfoList *qmp_query_vcpu_dirty_limit(Error **errp)
648 {
649 return dirtylimit_query_all();
650 }
651
hmp_info_vcpu_dirty_limit(Monitor * mon,const QDict * qdict)652 void hmp_info_vcpu_dirty_limit(Monitor *mon, const QDict *qdict)
653 {
654 DirtyLimitInfoList *info;
655 g_autoptr(DirtyLimitInfoList) head = NULL;
656 Error *err = NULL;
657
658 if (!dirtylimit_in_service()) {
659 monitor_printf(mon, "Dirty page limit not enabled!\n");
660 return;
661 }
662
663 head = qmp_query_vcpu_dirty_limit(&err);
664 if (err) {
665 hmp_handle_error(mon, err);
666 return;
667 }
668
669 for (info = head; info != NULL; info = info->next) {
670 monitor_printf(mon, "vcpu[%"PRIi64"], limit rate %"PRIi64 " (MB/s),"
671 " current rate %"PRIi64 " (MB/s)\n",
672 info->value->cpu_index,
673 info->value->limit_rate,
674 info->value->current_rate);
675 }
676 }
677