xref: /openbmc/qemu/system/dirtylimit.c (revision af8c14a25477e0ea127ca66d5d9c0710da854906)
1 /*
2  * Dirty page rate limit implementation code
3  *
4  * Copyright (c) 2022 CHINA TELECOM CO.,LTD.
5  *
6  * Authors:
7  *  Hyman Huang(黄勇) <huangy81@chinatelecom.cn>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2 or later.
10  * See the COPYING file in the top-level directory.
11  */
12 
13 #include "qemu/osdep.h"
14 #include "qemu/main-loop.h"
15 #include "qapi/qapi-commands-migration.h"
16 #include "qapi/qmp/qdict.h"
17 #include "qapi/error.h"
18 #include "sysemu/dirtyrate.h"
19 #include "sysemu/dirtylimit.h"
20 #include "monitor/hmp.h"
21 #include "monitor/monitor.h"
22 #include "exec/memory.h"
23 #include "exec/target_page.h"
24 #include "hw/boards.h"
25 #include "sysemu/kvm.h"
26 #include "trace.h"
27 #include "migration/misc.h"
28 
29 /*
30  * Dirtylimit stop working if dirty page rate error
31  * value less than DIRTYLIMIT_TOLERANCE_RANGE
32  */
33 #define DIRTYLIMIT_TOLERANCE_RANGE  25  /* MB/s */
34 /*
35  * Plus or minus vcpu sleep time linearly if dirty
36  * page rate error value percentage over
37  * DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT.
38  * Otherwise, plus or minus a fixed vcpu sleep time.
39  */
40 #define DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT     50
41 /*
42  * Max vcpu sleep time percentage during a cycle
43  * composed of dirty ring full and sleep time.
44  */
45 #define DIRTYLIMIT_THROTTLE_PCT_MAX 99
46 
47 struct {
48     VcpuStat stat;
49     bool running;
50     QemuThread thread;
51 } *vcpu_dirty_rate_stat;
52 
53 typedef struct VcpuDirtyLimitState {
54     int cpu_index;
55     bool enabled;
56     /*
57      * Quota dirty page rate, unit is MB/s
58      * zero if not enabled.
59      */
60     uint64_t quota;
61 } VcpuDirtyLimitState;
62 
63 struct {
64     VcpuDirtyLimitState *states;
65     /* Max cpus number configured by user */
66     int max_cpus;
67     /* Number of vcpu under dirtylimit */
68     int limited_nvcpu;
69 } *dirtylimit_state;
70 
71 /* protect dirtylimit_state */
72 static QemuMutex dirtylimit_mutex;
73 
74 /* dirtylimit thread quit if dirtylimit_quit is true */
75 static bool dirtylimit_quit;
76 
77 static void vcpu_dirty_rate_stat_collect(void)
78 {
79     VcpuStat stat;
80     int i = 0;
81     int64_t period = DIRTYLIMIT_CALC_TIME_MS;
82 
83     if (migrate_dirty_limit() &&
84         migration_is_active()) {
85         period = migrate_vcpu_dirty_limit_period();
86     }
87 
88     /* calculate vcpu dirtyrate */
89     vcpu_calculate_dirtyrate(period,
90                               &stat,
91                               GLOBAL_DIRTY_LIMIT,
92                               false);
93 
94     for (i = 0; i < stat.nvcpu; i++) {
95         vcpu_dirty_rate_stat->stat.rates[i].id = i;
96         vcpu_dirty_rate_stat->stat.rates[i].dirty_rate =
97             stat.rates[i].dirty_rate;
98     }
99 
100     g_free(stat.rates);
101 }
102 
103 static void *vcpu_dirty_rate_stat_thread(void *opaque)
104 {
105     rcu_register_thread();
106 
107     /* start log sync */
108     global_dirty_log_change(GLOBAL_DIRTY_LIMIT, true);
109 
110     while (qatomic_read(&vcpu_dirty_rate_stat->running)) {
111         vcpu_dirty_rate_stat_collect();
112         if (dirtylimit_in_service()) {
113             dirtylimit_process();
114         }
115     }
116 
117     /* stop log sync */
118     global_dirty_log_change(GLOBAL_DIRTY_LIMIT, false);
119 
120     rcu_unregister_thread();
121     return NULL;
122 }
123 
124 int64_t vcpu_dirty_rate_get(int cpu_index)
125 {
126     DirtyRateVcpu *rates = vcpu_dirty_rate_stat->stat.rates;
127     return qatomic_read_i64(&rates[cpu_index].dirty_rate);
128 }
129 
130 void vcpu_dirty_rate_stat_start(void)
131 {
132     if (qatomic_read(&vcpu_dirty_rate_stat->running)) {
133         return;
134     }
135 
136     qatomic_set(&vcpu_dirty_rate_stat->running, 1);
137     qemu_thread_create(&vcpu_dirty_rate_stat->thread,
138                        "dirtyrate-stat",
139                        vcpu_dirty_rate_stat_thread,
140                        NULL,
141                        QEMU_THREAD_JOINABLE);
142 }
143 
144 void vcpu_dirty_rate_stat_stop(void)
145 {
146     qatomic_set(&vcpu_dirty_rate_stat->running, 0);
147     dirtylimit_state_unlock();
148     bql_unlock();
149     qemu_thread_join(&vcpu_dirty_rate_stat->thread);
150     bql_lock();
151     dirtylimit_state_lock();
152 }
153 
154 void vcpu_dirty_rate_stat_initialize(void)
155 {
156     MachineState *ms = MACHINE(qdev_get_machine());
157     int max_cpus = ms->smp.max_cpus;
158 
159     vcpu_dirty_rate_stat =
160         g_malloc0(sizeof(*vcpu_dirty_rate_stat));
161 
162     vcpu_dirty_rate_stat->stat.nvcpu = max_cpus;
163     vcpu_dirty_rate_stat->stat.rates =
164         g_new0(DirtyRateVcpu, max_cpus);
165 
166     vcpu_dirty_rate_stat->running = false;
167 }
168 
169 void vcpu_dirty_rate_stat_finalize(void)
170 {
171     g_free(vcpu_dirty_rate_stat->stat.rates);
172     vcpu_dirty_rate_stat->stat.rates = NULL;
173 
174     g_free(vcpu_dirty_rate_stat);
175     vcpu_dirty_rate_stat = NULL;
176 }
177 
178 void dirtylimit_state_lock(void)
179 {
180     qemu_mutex_lock(&dirtylimit_mutex);
181 }
182 
183 void dirtylimit_state_unlock(void)
184 {
185     qemu_mutex_unlock(&dirtylimit_mutex);
186 }
187 
188 static void
189 __attribute__((__constructor__)) dirtylimit_mutex_init(void)
190 {
191     qemu_mutex_init(&dirtylimit_mutex);
192 }
193 
194 static inline VcpuDirtyLimitState *dirtylimit_vcpu_get_state(int cpu_index)
195 {
196     return &dirtylimit_state->states[cpu_index];
197 }
198 
199 void dirtylimit_state_initialize(void)
200 {
201     MachineState *ms = MACHINE(qdev_get_machine());
202     int max_cpus = ms->smp.max_cpus;
203     int i;
204 
205     dirtylimit_state = g_malloc0(sizeof(*dirtylimit_state));
206 
207     dirtylimit_state->states =
208             g_new0(VcpuDirtyLimitState, max_cpus);
209 
210     for (i = 0; i < max_cpus; i++) {
211         dirtylimit_state->states[i].cpu_index = i;
212     }
213 
214     dirtylimit_state->max_cpus = max_cpus;
215     trace_dirtylimit_state_initialize(max_cpus);
216 }
217 
218 void dirtylimit_state_finalize(void)
219 {
220     g_free(dirtylimit_state->states);
221     dirtylimit_state->states = NULL;
222 
223     g_free(dirtylimit_state);
224     dirtylimit_state = NULL;
225 
226     trace_dirtylimit_state_finalize();
227 }
228 
229 bool dirtylimit_in_service(void)
230 {
231     return !!dirtylimit_state;
232 }
233 
234 bool dirtylimit_vcpu_index_valid(int cpu_index)
235 {
236     MachineState *ms = MACHINE(qdev_get_machine());
237 
238     return !(cpu_index < 0 ||
239              cpu_index >= ms->smp.max_cpus);
240 }
241 
242 static uint64_t dirtylimit_dirty_ring_full_time(uint64_t dirtyrate)
243 {
244     static uint64_t max_dirtyrate;
245     uint64_t dirty_ring_size_MiB;
246 
247     dirty_ring_size_MiB = qemu_target_pages_to_MiB(kvm_dirty_ring_size());
248 
249     if (max_dirtyrate < dirtyrate) {
250         max_dirtyrate = dirtyrate;
251     }
252 
253     return dirty_ring_size_MiB * 1000000 / max_dirtyrate;
254 }
255 
256 static inline bool dirtylimit_done(uint64_t quota,
257                                    uint64_t current)
258 {
259     uint64_t min, max;
260 
261     min = MIN(quota, current);
262     max = MAX(quota, current);
263 
264     return ((max - min) <= DIRTYLIMIT_TOLERANCE_RANGE) ? true : false;
265 }
266 
267 static inline bool
268 dirtylimit_need_linear_adjustment(uint64_t quota,
269                                   uint64_t current)
270 {
271     uint64_t min, max;
272 
273     min = MIN(quota, current);
274     max = MAX(quota, current);
275 
276     return ((max - min) * 100 / max) > DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT;
277 }
278 
279 static void dirtylimit_set_throttle(CPUState *cpu,
280                                     uint64_t quota,
281                                     uint64_t current)
282 {
283     int64_t ring_full_time_us = 0;
284     uint64_t sleep_pct = 0;
285     uint64_t throttle_us = 0;
286 
287     if (current == 0) {
288         cpu->throttle_us_per_full = 0;
289         return;
290     }
291 
292     ring_full_time_us = dirtylimit_dirty_ring_full_time(current);
293 
294     if (dirtylimit_need_linear_adjustment(quota, current)) {
295         if (quota < current) {
296             sleep_pct = (current - quota) * 100 / current;
297             throttle_us =
298                 ring_full_time_us * sleep_pct / (double)(100 - sleep_pct);
299             cpu->throttle_us_per_full += throttle_us;
300         } else {
301             sleep_pct = (quota - current) * 100 / quota;
302             throttle_us =
303                 ring_full_time_us * sleep_pct / (double)(100 - sleep_pct);
304             cpu->throttle_us_per_full -= throttle_us;
305         }
306 
307         trace_dirtylimit_throttle_pct(cpu->cpu_index,
308                                       sleep_pct,
309                                       throttle_us);
310     } else {
311         if (quota < current) {
312             cpu->throttle_us_per_full += ring_full_time_us / 10;
313         } else {
314             cpu->throttle_us_per_full -= ring_full_time_us / 10;
315         }
316     }
317 
318     /*
319      * TODO: in the big kvm_dirty_ring_size case (eg: 65536, or other scenario),
320      *       current dirty page rate may never reach the quota, we should stop
321      *       increasing sleep time?
322      */
323     cpu->throttle_us_per_full = MIN(cpu->throttle_us_per_full,
324         ring_full_time_us * DIRTYLIMIT_THROTTLE_PCT_MAX);
325 
326     cpu->throttle_us_per_full = MAX(cpu->throttle_us_per_full, 0);
327 }
328 
329 static void dirtylimit_adjust_throttle(CPUState *cpu)
330 {
331     uint64_t quota = 0;
332     uint64_t current = 0;
333     int cpu_index = cpu->cpu_index;
334 
335     quota = dirtylimit_vcpu_get_state(cpu_index)->quota;
336     current = vcpu_dirty_rate_get(cpu_index);
337 
338     if (!dirtylimit_done(quota, current)) {
339         dirtylimit_set_throttle(cpu, quota, current);
340     }
341 
342     return;
343 }
344 
345 void dirtylimit_process(void)
346 {
347     CPUState *cpu;
348 
349     if (!qatomic_read(&dirtylimit_quit)) {
350         dirtylimit_state_lock();
351 
352         if (!dirtylimit_in_service()) {
353             dirtylimit_state_unlock();
354             return;
355         }
356 
357         CPU_FOREACH(cpu) {
358             if (!dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled) {
359                 continue;
360             }
361             dirtylimit_adjust_throttle(cpu);
362         }
363         dirtylimit_state_unlock();
364     }
365 }
366 
367 void dirtylimit_change(bool start)
368 {
369     if (start) {
370         qatomic_set(&dirtylimit_quit, 0);
371     } else {
372         qatomic_set(&dirtylimit_quit, 1);
373     }
374 }
375 
376 void dirtylimit_set_vcpu(int cpu_index,
377                          uint64_t quota,
378                          bool enable)
379 {
380     trace_dirtylimit_set_vcpu(cpu_index, quota);
381 
382     if (enable) {
383         dirtylimit_state->states[cpu_index].quota = quota;
384         if (!dirtylimit_vcpu_get_state(cpu_index)->enabled) {
385             dirtylimit_state->limited_nvcpu++;
386         }
387     } else {
388         dirtylimit_state->states[cpu_index].quota = 0;
389         if (dirtylimit_state->states[cpu_index].enabled) {
390             dirtylimit_state->limited_nvcpu--;
391         }
392     }
393 
394     dirtylimit_state->states[cpu_index].enabled = enable;
395 }
396 
397 void dirtylimit_set_all(uint64_t quota,
398                         bool enable)
399 {
400     MachineState *ms = MACHINE(qdev_get_machine());
401     int max_cpus = ms->smp.max_cpus;
402     int i;
403 
404     for (i = 0; i < max_cpus; i++) {
405         dirtylimit_set_vcpu(i, quota, enable);
406     }
407 }
408 
409 void dirtylimit_vcpu_execute(CPUState *cpu)
410 {
411     if (cpu->throttle_us_per_full) {
412         dirtylimit_state_lock();
413 
414         if (dirtylimit_in_service() &&
415             dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled) {
416             dirtylimit_state_unlock();
417             trace_dirtylimit_vcpu_execute(cpu->cpu_index,
418                     cpu->throttle_us_per_full);
419 
420             g_usleep(cpu->throttle_us_per_full);
421             return;
422         }
423 
424         dirtylimit_state_unlock();
425     }
426 }
427 
428 static void dirtylimit_init(void)
429 {
430     dirtylimit_state_initialize();
431     dirtylimit_change(true);
432     vcpu_dirty_rate_stat_initialize();
433     vcpu_dirty_rate_stat_start();
434 }
435 
436 static void dirtylimit_cleanup(void)
437 {
438     vcpu_dirty_rate_stat_stop();
439     vcpu_dirty_rate_stat_finalize();
440     dirtylimit_change(false);
441     dirtylimit_state_finalize();
442 }
443 
444 /*
445  * dirty page rate limit is not allowed to set if migration
446  * is running with dirty-limit capability enabled.
447  */
448 static bool dirtylimit_is_allowed(void)
449 {
450     if (migration_is_running() &&
451         !migration_thread_is_self() &&
452         migrate_dirty_limit() &&
453         dirtylimit_in_service()) {
454         return false;
455     }
456     return true;
457 }
458 
459 void qmp_cancel_vcpu_dirty_limit(bool has_cpu_index,
460                                  int64_t cpu_index,
461                                  Error **errp)
462 {
463     if (!kvm_enabled() || !kvm_dirty_ring_enabled()) {
464         return;
465     }
466 
467     if (has_cpu_index && !dirtylimit_vcpu_index_valid(cpu_index)) {
468         error_setg(errp, "incorrect cpu index specified");
469         return;
470     }
471 
472     if (!dirtylimit_is_allowed()) {
473         error_setg(errp, "can't cancel dirty page rate limit while"
474                    " migration is running");
475         return;
476     }
477 
478     if (!dirtylimit_in_service()) {
479         return;
480     }
481 
482     dirtylimit_state_lock();
483 
484     if (has_cpu_index) {
485         dirtylimit_set_vcpu(cpu_index, 0, false);
486     } else {
487         dirtylimit_set_all(0, false);
488     }
489 
490     if (!dirtylimit_state->limited_nvcpu) {
491         dirtylimit_cleanup();
492     }
493 
494     dirtylimit_state_unlock();
495 }
496 
497 void hmp_cancel_vcpu_dirty_limit(Monitor *mon, const QDict *qdict)
498 {
499     int64_t cpu_index = qdict_get_try_int(qdict, "cpu_index", -1);
500     Error *err = NULL;
501 
502     qmp_cancel_vcpu_dirty_limit(!!(cpu_index != -1), cpu_index, &err);
503     if (err) {
504         hmp_handle_error(mon, err);
505         return;
506     }
507 
508     monitor_printf(mon, "[Please use 'info vcpu_dirty_limit' to query "
509                    "dirty limit for virtual CPU]\n");
510 }
511 
512 void qmp_set_vcpu_dirty_limit(bool has_cpu_index,
513                               int64_t cpu_index,
514                               uint64_t dirty_rate,
515                               Error **errp)
516 {
517     if (!kvm_enabled() || !kvm_dirty_ring_enabled()) {
518         error_setg(errp, "dirty page limit feature requires KVM with"
519                    " accelerator property 'dirty-ring-size' set'");
520         return;
521     }
522 
523     if (has_cpu_index && !dirtylimit_vcpu_index_valid(cpu_index)) {
524         error_setg(errp, "incorrect cpu index specified");
525         return;
526     }
527 
528     if (!dirtylimit_is_allowed()) {
529         error_setg(errp, "can't set dirty page rate limit while"
530                    " migration is running");
531         return;
532     }
533 
534     if (!dirty_rate) {
535         qmp_cancel_vcpu_dirty_limit(has_cpu_index, cpu_index, errp);
536         return;
537     }
538 
539     dirtylimit_state_lock();
540 
541     if (!dirtylimit_in_service()) {
542         dirtylimit_init();
543     }
544 
545     if (has_cpu_index) {
546         dirtylimit_set_vcpu(cpu_index, dirty_rate, true);
547     } else {
548         dirtylimit_set_all(dirty_rate, true);
549     }
550 
551     dirtylimit_state_unlock();
552 }
553 
554 void hmp_set_vcpu_dirty_limit(Monitor *mon, const QDict *qdict)
555 {
556     int64_t dirty_rate = qdict_get_int(qdict, "dirty_rate");
557     int64_t cpu_index = qdict_get_try_int(qdict, "cpu_index", -1);
558     Error *err = NULL;
559 
560     if (dirty_rate < 0) {
561         error_setg(&err, "invalid dirty page limit %" PRId64, dirty_rate);
562         goto out;
563     }
564 
565     qmp_set_vcpu_dirty_limit(!!(cpu_index != -1), cpu_index, dirty_rate, &err);
566 
567 out:
568     hmp_handle_error(mon, err);
569 }
570 
571 /* Return the max throttle time of each virtual CPU */
572 uint64_t dirtylimit_throttle_time_per_round(void)
573 {
574     CPUState *cpu;
575     int64_t max = 0;
576 
577     CPU_FOREACH(cpu) {
578         if (cpu->throttle_us_per_full > max) {
579             max = cpu->throttle_us_per_full;
580         }
581     }
582 
583     return max;
584 }
585 
586 /*
587  * Estimate average dirty ring full time of each virtaul CPU.
588  * Return 0 if guest doesn't dirty memory.
589  */
590 uint64_t dirtylimit_ring_full_time(void)
591 {
592     CPUState *cpu;
593     uint64_t curr_rate = 0;
594     int nvcpus = 0;
595 
596     CPU_FOREACH(cpu) {
597         if (cpu->running) {
598             nvcpus++;
599             curr_rate += vcpu_dirty_rate_get(cpu->cpu_index);
600         }
601     }
602 
603     if (!curr_rate || !nvcpus) {
604         return 0;
605     }
606 
607     return dirtylimit_dirty_ring_full_time(curr_rate / nvcpus);
608 }
609 
610 static struct DirtyLimitInfo *dirtylimit_query_vcpu(int cpu_index)
611 {
612     DirtyLimitInfo *info = NULL;
613 
614     info = g_malloc0(sizeof(*info));
615     info->cpu_index = cpu_index;
616     info->limit_rate = dirtylimit_vcpu_get_state(cpu_index)->quota;
617     info->current_rate = vcpu_dirty_rate_get(cpu_index);
618 
619     return info;
620 }
621 
622 static struct DirtyLimitInfoList *dirtylimit_query_all(void)
623 {
624     int i, index;
625     DirtyLimitInfo *info = NULL;
626     DirtyLimitInfoList *head = NULL, **tail = &head;
627 
628     dirtylimit_state_lock();
629 
630     if (!dirtylimit_in_service()) {
631         dirtylimit_state_unlock();
632         return NULL;
633     }
634 
635     for (i = 0; i < dirtylimit_state->max_cpus; i++) {
636         index = dirtylimit_state->states[i].cpu_index;
637         if (dirtylimit_vcpu_get_state(index)->enabled) {
638             info = dirtylimit_query_vcpu(index);
639             QAPI_LIST_APPEND(tail, info);
640         }
641     }
642 
643     dirtylimit_state_unlock();
644 
645     return head;
646 }
647 
648 struct DirtyLimitInfoList *qmp_query_vcpu_dirty_limit(Error **errp)
649 {
650     return dirtylimit_query_all();
651 }
652 
653 void hmp_info_vcpu_dirty_limit(Monitor *mon, const QDict *qdict)
654 {
655     DirtyLimitInfoList *info;
656     g_autoptr(DirtyLimitInfoList) head = NULL;
657     Error *err = NULL;
658 
659     if (!dirtylimit_in_service()) {
660         monitor_printf(mon, "Dirty page limit not enabled!\n");
661         return;
662     }
663 
664     head = qmp_query_vcpu_dirty_limit(&err);
665     if (err) {
666         hmp_handle_error(mon, err);
667         return;
668     }
669 
670     for (info = head; info != NULL; info = info->next) {
671         monitor_printf(mon, "vcpu[%"PRIi64"], limit rate %"PRIi64 " (MB/s),"
672                             " current rate %"PRIi64 " (MB/s)\n",
673                             info->value->cpu_index,
674                             info->value->limit_rate,
675                             info->value->current_rate);
676     }
677 }
678