xref: /openbmc/qemu/system/dirtylimit.c (revision 714f3312)
1 /*
2  * Dirty page rate limit implementation code
3  *
4  * Copyright (c) 2022 CHINA TELECOM CO.,LTD.
5  *
6  * Authors:
7  *  Hyman Huang(黄勇) <huangy81@chinatelecom.cn>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2 or later.
10  * See the COPYING file in the top-level directory.
11  */
12 
13 #include "qemu/osdep.h"
14 #include "qemu/main-loop.h"
15 #include "qapi/qapi-commands-migration.h"
16 #include "qapi/qmp/qdict.h"
17 #include "qapi/error.h"
18 #include "sysemu/dirtyrate.h"
19 #include "sysemu/dirtylimit.h"
20 #include "monitor/hmp.h"
21 #include "monitor/monitor.h"
22 #include "exec/memory.h"
23 #include "exec/target_page.h"
24 #include "hw/boards.h"
25 #include "sysemu/kvm.h"
26 #include "trace.h"
27 #include "migration/misc.h"
28 #include "migration/migration.h"
29 
30 /*
31  * Dirtylimit stop working if dirty page rate error
32  * value less than DIRTYLIMIT_TOLERANCE_RANGE
33  */
34 #define DIRTYLIMIT_TOLERANCE_RANGE  25  /* MB/s */
35 /*
36  * Plus or minus vcpu sleep time linearly if dirty
37  * page rate error value percentage over
38  * DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT.
39  * Otherwise, plus or minus a fixed vcpu sleep time.
40  */
41 #define DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT     50
42 /*
43  * Max vcpu sleep time percentage during a cycle
44  * composed of dirty ring full and sleep time.
45  */
46 #define DIRTYLIMIT_THROTTLE_PCT_MAX 99
47 
48 struct {
49     VcpuStat stat;
50     bool running;
51     QemuThread thread;
52 } *vcpu_dirty_rate_stat;
53 
54 typedef struct VcpuDirtyLimitState {
55     int cpu_index;
56     bool enabled;
57     /*
58      * Quota dirty page rate, unit is MB/s
59      * zero if not enabled.
60      */
61     uint64_t quota;
62 } VcpuDirtyLimitState;
63 
64 struct {
65     VcpuDirtyLimitState *states;
66     /* Max cpus number configured by user */
67     int max_cpus;
68     /* Number of vcpu under dirtylimit */
69     int limited_nvcpu;
70 } *dirtylimit_state;
71 
72 /* protect dirtylimit_state */
73 static QemuMutex dirtylimit_mutex;
74 
75 /* dirtylimit thread quit if dirtylimit_quit is true */
76 static bool dirtylimit_quit;
77 
78 static void vcpu_dirty_rate_stat_collect(void)
79 {
80     VcpuStat stat;
81     int i = 0;
82     int64_t period = DIRTYLIMIT_CALC_TIME_MS;
83 
84     if (migrate_dirty_limit() &&
85         migration_is_active()) {
86         period = migrate_vcpu_dirty_limit_period();
87     }
88 
89     /* calculate vcpu dirtyrate */
90     vcpu_calculate_dirtyrate(period,
91                               &stat,
92                               GLOBAL_DIRTY_LIMIT,
93                               false);
94 
95     for (i = 0; i < stat.nvcpu; i++) {
96         vcpu_dirty_rate_stat->stat.rates[i].id = i;
97         vcpu_dirty_rate_stat->stat.rates[i].dirty_rate =
98             stat.rates[i].dirty_rate;
99     }
100 
101     g_free(stat.rates);
102 }
103 
104 static void *vcpu_dirty_rate_stat_thread(void *opaque)
105 {
106     rcu_register_thread();
107 
108     /* start log sync */
109     global_dirty_log_change(GLOBAL_DIRTY_LIMIT, true);
110 
111     while (qatomic_read(&vcpu_dirty_rate_stat->running)) {
112         vcpu_dirty_rate_stat_collect();
113         if (dirtylimit_in_service()) {
114             dirtylimit_process();
115         }
116     }
117 
118     /* stop log sync */
119     global_dirty_log_change(GLOBAL_DIRTY_LIMIT, false);
120 
121     rcu_unregister_thread();
122     return NULL;
123 }
124 
125 int64_t vcpu_dirty_rate_get(int cpu_index)
126 {
127     DirtyRateVcpu *rates = vcpu_dirty_rate_stat->stat.rates;
128     return qatomic_read_i64(&rates[cpu_index].dirty_rate);
129 }
130 
131 void vcpu_dirty_rate_stat_start(void)
132 {
133     if (qatomic_read(&vcpu_dirty_rate_stat->running)) {
134         return;
135     }
136 
137     qatomic_set(&vcpu_dirty_rate_stat->running, 1);
138     qemu_thread_create(&vcpu_dirty_rate_stat->thread,
139                        "dirtyrate-stat",
140                        vcpu_dirty_rate_stat_thread,
141                        NULL,
142                        QEMU_THREAD_JOINABLE);
143 }
144 
145 void vcpu_dirty_rate_stat_stop(void)
146 {
147     qatomic_set(&vcpu_dirty_rate_stat->running, 0);
148     dirtylimit_state_unlock();
149     bql_unlock();
150     qemu_thread_join(&vcpu_dirty_rate_stat->thread);
151     bql_lock();
152     dirtylimit_state_lock();
153 }
154 
155 void vcpu_dirty_rate_stat_initialize(void)
156 {
157     MachineState *ms = MACHINE(qdev_get_machine());
158     int max_cpus = ms->smp.max_cpus;
159 
160     vcpu_dirty_rate_stat =
161         g_malloc0(sizeof(*vcpu_dirty_rate_stat));
162 
163     vcpu_dirty_rate_stat->stat.nvcpu = max_cpus;
164     vcpu_dirty_rate_stat->stat.rates =
165         g_new0(DirtyRateVcpu, max_cpus);
166 
167     vcpu_dirty_rate_stat->running = false;
168 }
169 
170 void vcpu_dirty_rate_stat_finalize(void)
171 {
172     g_free(vcpu_dirty_rate_stat->stat.rates);
173     vcpu_dirty_rate_stat->stat.rates = NULL;
174 
175     g_free(vcpu_dirty_rate_stat);
176     vcpu_dirty_rate_stat = NULL;
177 }
178 
179 void dirtylimit_state_lock(void)
180 {
181     qemu_mutex_lock(&dirtylimit_mutex);
182 }
183 
184 void dirtylimit_state_unlock(void)
185 {
186     qemu_mutex_unlock(&dirtylimit_mutex);
187 }
188 
189 static void
190 __attribute__((__constructor__)) dirtylimit_mutex_init(void)
191 {
192     qemu_mutex_init(&dirtylimit_mutex);
193 }
194 
195 static inline VcpuDirtyLimitState *dirtylimit_vcpu_get_state(int cpu_index)
196 {
197     return &dirtylimit_state->states[cpu_index];
198 }
199 
200 void dirtylimit_state_initialize(void)
201 {
202     MachineState *ms = MACHINE(qdev_get_machine());
203     int max_cpus = ms->smp.max_cpus;
204     int i;
205 
206     dirtylimit_state = g_malloc0(sizeof(*dirtylimit_state));
207 
208     dirtylimit_state->states =
209             g_new0(VcpuDirtyLimitState, max_cpus);
210 
211     for (i = 0; i < max_cpus; i++) {
212         dirtylimit_state->states[i].cpu_index = i;
213     }
214 
215     dirtylimit_state->max_cpus = max_cpus;
216     trace_dirtylimit_state_initialize(max_cpus);
217 }
218 
219 void dirtylimit_state_finalize(void)
220 {
221     g_free(dirtylimit_state->states);
222     dirtylimit_state->states = NULL;
223 
224     g_free(dirtylimit_state);
225     dirtylimit_state = NULL;
226 
227     trace_dirtylimit_state_finalize();
228 }
229 
230 bool dirtylimit_in_service(void)
231 {
232     return !!dirtylimit_state;
233 }
234 
235 bool dirtylimit_vcpu_index_valid(int cpu_index)
236 {
237     MachineState *ms = MACHINE(qdev_get_machine());
238 
239     return !(cpu_index < 0 ||
240              cpu_index >= ms->smp.max_cpus);
241 }
242 
243 static uint64_t dirtylimit_dirty_ring_full_time(uint64_t dirtyrate)
244 {
245     static uint64_t max_dirtyrate;
246     uint64_t dirty_ring_size_MiB;
247 
248     dirty_ring_size_MiB = qemu_target_pages_to_MiB(kvm_dirty_ring_size());
249 
250     if (max_dirtyrate < dirtyrate) {
251         max_dirtyrate = dirtyrate;
252     }
253 
254     return dirty_ring_size_MiB * 1000000 / max_dirtyrate;
255 }
256 
257 static inline bool dirtylimit_done(uint64_t quota,
258                                    uint64_t current)
259 {
260     uint64_t min, max;
261 
262     min = MIN(quota, current);
263     max = MAX(quota, current);
264 
265     return ((max - min) <= DIRTYLIMIT_TOLERANCE_RANGE) ? true : false;
266 }
267 
268 static inline bool
269 dirtylimit_need_linear_adjustment(uint64_t quota,
270                                   uint64_t current)
271 {
272     uint64_t min, max;
273 
274     min = MIN(quota, current);
275     max = MAX(quota, current);
276 
277     return ((max - min) * 100 / max) > DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT;
278 }
279 
280 static void dirtylimit_set_throttle(CPUState *cpu,
281                                     uint64_t quota,
282                                     uint64_t current)
283 {
284     int64_t ring_full_time_us = 0;
285     uint64_t sleep_pct = 0;
286     uint64_t throttle_us = 0;
287 
288     if (current == 0) {
289         cpu->throttle_us_per_full = 0;
290         return;
291     }
292 
293     ring_full_time_us = dirtylimit_dirty_ring_full_time(current);
294 
295     if (dirtylimit_need_linear_adjustment(quota, current)) {
296         if (quota < current) {
297             sleep_pct = (current - quota) * 100 / current;
298             throttle_us =
299                 ring_full_time_us * sleep_pct / (double)(100 - sleep_pct);
300             cpu->throttle_us_per_full += throttle_us;
301         } else {
302             sleep_pct = (quota - current) * 100 / quota;
303             throttle_us =
304                 ring_full_time_us * sleep_pct / (double)(100 - sleep_pct);
305             cpu->throttle_us_per_full -= throttle_us;
306         }
307 
308         trace_dirtylimit_throttle_pct(cpu->cpu_index,
309                                       sleep_pct,
310                                       throttle_us);
311     } else {
312         if (quota < current) {
313             cpu->throttle_us_per_full += ring_full_time_us / 10;
314         } else {
315             cpu->throttle_us_per_full -= ring_full_time_us / 10;
316         }
317     }
318 
319     /*
320      * TODO: in the big kvm_dirty_ring_size case (eg: 65536, or other scenario),
321      *       current dirty page rate may never reach the quota, we should stop
322      *       increasing sleep time?
323      */
324     cpu->throttle_us_per_full = MIN(cpu->throttle_us_per_full,
325         ring_full_time_us * DIRTYLIMIT_THROTTLE_PCT_MAX);
326 
327     cpu->throttle_us_per_full = MAX(cpu->throttle_us_per_full, 0);
328 }
329 
330 static void dirtylimit_adjust_throttle(CPUState *cpu)
331 {
332     uint64_t quota = 0;
333     uint64_t current = 0;
334     int cpu_index = cpu->cpu_index;
335 
336     quota = dirtylimit_vcpu_get_state(cpu_index)->quota;
337     current = vcpu_dirty_rate_get(cpu_index);
338 
339     if (!dirtylimit_done(quota, current)) {
340         dirtylimit_set_throttle(cpu, quota, current);
341     }
342 
343     return;
344 }
345 
346 void dirtylimit_process(void)
347 {
348     CPUState *cpu;
349 
350     if (!qatomic_read(&dirtylimit_quit)) {
351         dirtylimit_state_lock();
352 
353         if (!dirtylimit_in_service()) {
354             dirtylimit_state_unlock();
355             return;
356         }
357 
358         CPU_FOREACH(cpu) {
359             if (!dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled) {
360                 continue;
361             }
362             dirtylimit_adjust_throttle(cpu);
363         }
364         dirtylimit_state_unlock();
365     }
366 }
367 
368 void dirtylimit_change(bool start)
369 {
370     if (start) {
371         qatomic_set(&dirtylimit_quit, 0);
372     } else {
373         qatomic_set(&dirtylimit_quit, 1);
374     }
375 }
376 
377 void dirtylimit_set_vcpu(int cpu_index,
378                          uint64_t quota,
379                          bool enable)
380 {
381     trace_dirtylimit_set_vcpu(cpu_index, quota);
382 
383     if (enable) {
384         dirtylimit_state->states[cpu_index].quota = quota;
385         if (!dirtylimit_vcpu_get_state(cpu_index)->enabled) {
386             dirtylimit_state->limited_nvcpu++;
387         }
388     } else {
389         dirtylimit_state->states[cpu_index].quota = 0;
390         if (dirtylimit_state->states[cpu_index].enabled) {
391             dirtylimit_state->limited_nvcpu--;
392         }
393     }
394 
395     dirtylimit_state->states[cpu_index].enabled = enable;
396 }
397 
398 void dirtylimit_set_all(uint64_t quota,
399                         bool enable)
400 {
401     MachineState *ms = MACHINE(qdev_get_machine());
402     int max_cpus = ms->smp.max_cpus;
403     int i;
404 
405     for (i = 0; i < max_cpus; i++) {
406         dirtylimit_set_vcpu(i, quota, enable);
407     }
408 }
409 
410 void dirtylimit_vcpu_execute(CPUState *cpu)
411 {
412     if (cpu->throttle_us_per_full) {
413         dirtylimit_state_lock();
414 
415         if (dirtylimit_in_service() &&
416             dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled) {
417             dirtylimit_state_unlock();
418             trace_dirtylimit_vcpu_execute(cpu->cpu_index,
419                     cpu->throttle_us_per_full);
420 
421             g_usleep(cpu->throttle_us_per_full);
422             return;
423         }
424 
425         dirtylimit_state_unlock();
426     }
427 }
428 
429 static void dirtylimit_init(void)
430 {
431     dirtylimit_state_initialize();
432     dirtylimit_change(true);
433     vcpu_dirty_rate_stat_initialize();
434     vcpu_dirty_rate_stat_start();
435 }
436 
437 static void dirtylimit_cleanup(void)
438 {
439     vcpu_dirty_rate_stat_stop();
440     vcpu_dirty_rate_stat_finalize();
441     dirtylimit_change(false);
442     dirtylimit_state_finalize();
443 }
444 
445 /*
446  * dirty page rate limit is not allowed to set if migration
447  * is running with dirty-limit capability enabled.
448  */
449 static bool dirtylimit_is_allowed(void)
450 {
451     MigrationState *ms = migrate_get_current();
452 
453     if (migration_is_running() &&
454         (!qemu_thread_is_self(&ms->thread)) &&
455         migrate_dirty_limit() &&
456         dirtylimit_in_service()) {
457         return false;
458     }
459     return true;
460 }
461 
462 void qmp_cancel_vcpu_dirty_limit(bool has_cpu_index,
463                                  int64_t cpu_index,
464                                  Error **errp)
465 {
466     if (!kvm_enabled() || !kvm_dirty_ring_enabled()) {
467         return;
468     }
469 
470     if (has_cpu_index && !dirtylimit_vcpu_index_valid(cpu_index)) {
471         error_setg(errp, "incorrect cpu index specified");
472         return;
473     }
474 
475     if (!dirtylimit_is_allowed()) {
476         error_setg(errp, "can't cancel dirty page rate limit while"
477                    " migration is running");
478         return;
479     }
480 
481     if (!dirtylimit_in_service()) {
482         return;
483     }
484 
485     dirtylimit_state_lock();
486 
487     if (has_cpu_index) {
488         dirtylimit_set_vcpu(cpu_index, 0, false);
489     } else {
490         dirtylimit_set_all(0, false);
491     }
492 
493     if (!dirtylimit_state->limited_nvcpu) {
494         dirtylimit_cleanup();
495     }
496 
497     dirtylimit_state_unlock();
498 }
499 
500 void hmp_cancel_vcpu_dirty_limit(Monitor *mon, const QDict *qdict)
501 {
502     int64_t cpu_index = qdict_get_try_int(qdict, "cpu_index", -1);
503     Error *err = NULL;
504 
505     qmp_cancel_vcpu_dirty_limit(!!(cpu_index != -1), cpu_index, &err);
506     if (err) {
507         hmp_handle_error(mon, err);
508         return;
509     }
510 
511     monitor_printf(mon, "[Please use 'info vcpu_dirty_limit' to query "
512                    "dirty limit for virtual CPU]\n");
513 }
514 
515 void qmp_set_vcpu_dirty_limit(bool has_cpu_index,
516                               int64_t cpu_index,
517                               uint64_t dirty_rate,
518                               Error **errp)
519 {
520     if (!kvm_enabled() || !kvm_dirty_ring_enabled()) {
521         error_setg(errp, "dirty page limit feature requires KVM with"
522                    " accelerator property 'dirty-ring-size' set'");
523         return;
524     }
525 
526     if (has_cpu_index && !dirtylimit_vcpu_index_valid(cpu_index)) {
527         error_setg(errp, "incorrect cpu index specified");
528         return;
529     }
530 
531     if (!dirtylimit_is_allowed()) {
532         error_setg(errp, "can't set dirty page rate limit while"
533                    " migration is running");
534         return;
535     }
536 
537     if (!dirty_rate) {
538         qmp_cancel_vcpu_dirty_limit(has_cpu_index, cpu_index, errp);
539         return;
540     }
541 
542     dirtylimit_state_lock();
543 
544     if (!dirtylimit_in_service()) {
545         dirtylimit_init();
546     }
547 
548     if (has_cpu_index) {
549         dirtylimit_set_vcpu(cpu_index, dirty_rate, true);
550     } else {
551         dirtylimit_set_all(dirty_rate, true);
552     }
553 
554     dirtylimit_state_unlock();
555 }
556 
557 void hmp_set_vcpu_dirty_limit(Monitor *mon, const QDict *qdict)
558 {
559     int64_t dirty_rate = qdict_get_int(qdict, "dirty_rate");
560     int64_t cpu_index = qdict_get_try_int(qdict, "cpu_index", -1);
561     Error *err = NULL;
562 
563     if (dirty_rate < 0) {
564         error_setg(&err, "invalid dirty page limit %" PRId64, dirty_rate);
565         goto out;
566     }
567 
568     qmp_set_vcpu_dirty_limit(!!(cpu_index != -1), cpu_index, dirty_rate, &err);
569 
570 out:
571     hmp_handle_error(mon, err);
572 }
573 
574 /* Return the max throttle time of each virtual CPU */
575 uint64_t dirtylimit_throttle_time_per_round(void)
576 {
577     CPUState *cpu;
578     int64_t max = 0;
579 
580     CPU_FOREACH(cpu) {
581         if (cpu->throttle_us_per_full > max) {
582             max = cpu->throttle_us_per_full;
583         }
584     }
585 
586     return max;
587 }
588 
589 /*
590  * Estimate average dirty ring full time of each virtaul CPU.
591  * Return 0 if guest doesn't dirty memory.
592  */
593 uint64_t dirtylimit_ring_full_time(void)
594 {
595     CPUState *cpu;
596     uint64_t curr_rate = 0;
597     int nvcpus = 0;
598 
599     CPU_FOREACH(cpu) {
600         if (cpu->running) {
601             nvcpus++;
602             curr_rate += vcpu_dirty_rate_get(cpu->cpu_index);
603         }
604     }
605 
606     if (!curr_rate || !nvcpus) {
607         return 0;
608     }
609 
610     return dirtylimit_dirty_ring_full_time(curr_rate / nvcpus);
611 }
612 
613 static struct DirtyLimitInfo *dirtylimit_query_vcpu(int cpu_index)
614 {
615     DirtyLimitInfo *info = NULL;
616 
617     info = g_malloc0(sizeof(*info));
618     info->cpu_index = cpu_index;
619     info->limit_rate = dirtylimit_vcpu_get_state(cpu_index)->quota;
620     info->current_rate = vcpu_dirty_rate_get(cpu_index);
621 
622     return info;
623 }
624 
625 static struct DirtyLimitInfoList *dirtylimit_query_all(void)
626 {
627     int i, index;
628     DirtyLimitInfo *info = NULL;
629     DirtyLimitInfoList *head = NULL, **tail = &head;
630 
631     dirtylimit_state_lock();
632 
633     if (!dirtylimit_in_service()) {
634         dirtylimit_state_unlock();
635         return NULL;
636     }
637 
638     for (i = 0; i < dirtylimit_state->max_cpus; i++) {
639         index = dirtylimit_state->states[i].cpu_index;
640         if (dirtylimit_vcpu_get_state(index)->enabled) {
641             info = dirtylimit_query_vcpu(index);
642             QAPI_LIST_APPEND(tail, info);
643         }
644     }
645 
646     dirtylimit_state_unlock();
647 
648     return head;
649 }
650 
651 struct DirtyLimitInfoList *qmp_query_vcpu_dirty_limit(Error **errp)
652 {
653     return dirtylimit_query_all();
654 }
655 
656 void hmp_info_vcpu_dirty_limit(Monitor *mon, const QDict *qdict)
657 {
658     DirtyLimitInfoList *info;
659     g_autoptr(DirtyLimitInfoList) head = NULL;
660     Error *err = NULL;
661 
662     if (!dirtylimit_in_service()) {
663         monitor_printf(mon, "Dirty page limit not enabled!\n");
664         return;
665     }
666 
667     head = qmp_query_vcpu_dirty_limit(&err);
668     if (err) {
669         hmp_handle_error(mon, err);
670         return;
671     }
672 
673     for (info = head; info != NULL; info = info->next) {
674         monitor_printf(mon, "vcpu[%"PRIi64"], limit rate %"PRIi64 " (MB/s),"
675                             " current rate %"PRIi64 " (MB/s)\n",
676                             info->value->cpu_index,
677                             info->value->limit_rate,
678                             info->value->current_rate);
679     }
680 }
681