xref: /openbmc/qemu/migration/dirtyrate.c (revision 83fb360d)
1 /*
2  * Dirtyrate implement code
3  *
4  * Copyright (c) 2020 HUAWEI TECHNOLOGIES CO.,LTD.
5  *
6  * Authors:
7  *  Chuan Zheng <zhengchuan@huawei.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2 or later.
10  * See the COPYING file in the top-level directory.
11  */
12 
13 #include "qemu/osdep.h"
14 #include "qemu/error-report.h"
15 #include <zlib.h>
16 #include "hw/core/cpu.h"
17 #include "qapi/error.h"
18 #include "exec/ramblock.h"
19 #include "exec/target_page.h"
20 #include "qemu/rcu_queue.h"
21 #include "qemu/main-loop.h"
22 #include "qapi/qapi-commands-migration.h"
23 #include "ram.h"
24 #include "trace.h"
25 #include "dirtyrate.h"
26 #include "monitor/hmp.h"
27 #include "monitor/monitor.h"
28 #include "qapi/qmp/qdict.h"
29 #include "sysemu/kvm.h"
30 #include "sysemu/runstate.h"
31 #include "exec/memory.h"
32 #include "qemu/xxhash.h"
33 
34 /*
35  * total_dirty_pages is procted by BQL and is used
36  * to stat dirty pages during the period of two
37  * memory_global_dirty_log_sync
38  */
39 uint64_t total_dirty_pages;
40 
41 typedef struct DirtyPageRecord {
42     uint64_t start_pages;
43     uint64_t end_pages;
44 } DirtyPageRecord;
45 
46 static int CalculatingState = DIRTY_RATE_STATUS_UNSTARTED;
47 static struct DirtyRateStat DirtyStat;
48 static DirtyRateMeasureMode dirtyrate_mode =
49                 DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING;
50 
51 static int64_t dirty_stat_wait(int64_t msec, int64_t initial_time)
52 {
53     int64_t current_time;
54 
55     current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
56     if ((current_time - initial_time) >= msec) {
57         msec = current_time - initial_time;
58     } else {
59         g_usleep((msec + initial_time - current_time) * 1000);
60         /* g_usleep may overshoot */
61         msec = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - initial_time;
62     }
63 
64     return msec;
65 }
66 
67 static inline void record_dirtypages(DirtyPageRecord *dirty_pages,
68                                      CPUState *cpu, bool start)
69 {
70     if (start) {
71         dirty_pages[cpu->cpu_index].start_pages = cpu->dirty_pages;
72     } else {
73         dirty_pages[cpu->cpu_index].end_pages = cpu->dirty_pages;
74     }
75 }
76 
77 static int64_t do_calculate_dirtyrate(DirtyPageRecord dirty_pages,
78                                       int64_t calc_time_ms)
79 {
80     uint64_t increased_dirty_pages =
81         dirty_pages.end_pages - dirty_pages.start_pages;
82 
83     /*
84      * multiply by 1000ms/s _before_ converting down to megabytes
85      * to avoid losing precision
86      */
87     return qemu_target_pages_to_MiB(increased_dirty_pages * 1000) /
88         calc_time_ms;
89 }
90 
91 void global_dirty_log_change(unsigned int flag, bool start)
92 {
93     Error *local_err = NULL;
94     bool ret;
95 
96     bql_lock();
97     if (start) {
98         ret = memory_global_dirty_log_start(flag, &local_err);
99         if (!ret) {
100             error_report_err(local_err);
101         }
102     } else {
103         memory_global_dirty_log_stop(flag);
104     }
105     bql_unlock();
106 }
107 
108 /*
109  * global_dirty_log_sync
110  * 1. sync dirty log from kvm
111  * 2. stop dirty tracking if needed.
112  */
113 static void global_dirty_log_sync(unsigned int flag, bool one_shot)
114 {
115     bql_lock();
116     memory_global_dirty_log_sync(false);
117     if (one_shot) {
118         memory_global_dirty_log_stop(flag);
119     }
120     bql_unlock();
121 }
122 
123 static DirtyPageRecord *vcpu_dirty_stat_alloc(VcpuStat *stat)
124 {
125     CPUState *cpu;
126     int nvcpu = 0;
127 
128     CPU_FOREACH(cpu) {
129         nvcpu++;
130     }
131 
132     stat->nvcpu = nvcpu;
133     stat->rates = g_new0(DirtyRateVcpu, nvcpu);
134 
135     return g_new0(DirtyPageRecord, nvcpu);
136 }
137 
138 static void vcpu_dirty_stat_collect(DirtyPageRecord *records,
139                                     bool start)
140 {
141     CPUState *cpu;
142 
143     CPU_FOREACH(cpu) {
144         record_dirtypages(records, cpu, start);
145     }
146 }
147 
148 int64_t vcpu_calculate_dirtyrate(int64_t calc_time_ms,
149                                  VcpuStat *stat,
150                                  unsigned int flag,
151                                  bool one_shot)
152 {
153     DirtyPageRecord *records;
154     int64_t init_time_ms;
155     int64_t duration;
156     int64_t dirtyrate;
157     int i = 0;
158     unsigned int gen_id;
159 
160 retry:
161     init_time_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
162 
163     WITH_QEMU_LOCK_GUARD(&qemu_cpu_list_lock) {
164         gen_id = cpu_list_generation_id_get();
165         records = vcpu_dirty_stat_alloc(stat);
166         vcpu_dirty_stat_collect(records, true);
167     }
168 
169     duration = dirty_stat_wait(calc_time_ms, init_time_ms);
170 
171     global_dirty_log_sync(flag, one_shot);
172 
173     WITH_QEMU_LOCK_GUARD(&qemu_cpu_list_lock) {
174         if (gen_id != cpu_list_generation_id_get()) {
175             g_free(records);
176             g_free(stat->rates);
177             cpu_list_unlock();
178             goto retry;
179         }
180         vcpu_dirty_stat_collect(records, false);
181     }
182 
183     for (i = 0; i < stat->nvcpu; i++) {
184         dirtyrate = do_calculate_dirtyrate(records[i], duration);
185 
186         stat->rates[i].id = i;
187         stat->rates[i].dirty_rate = dirtyrate;
188 
189         trace_dirtyrate_do_calculate_vcpu(i, dirtyrate);
190     }
191 
192     g_free(records);
193 
194     return duration;
195 }
196 
197 static bool is_calc_time_valid(int64_t msec)
198 {
199     if ((msec < MIN_CALC_TIME_MS) || (msec > MAX_CALC_TIME_MS)) {
200         return false;
201     }
202 
203     return true;
204 }
205 
206 static bool is_sample_pages_valid(int64_t pages)
207 {
208     return pages >= MIN_SAMPLE_PAGE_COUNT &&
209            pages <= MAX_SAMPLE_PAGE_COUNT;
210 }
211 
212 static int dirtyrate_set_state(int *state, int old_state, int new_state)
213 {
214     assert(new_state < DIRTY_RATE_STATUS__MAX);
215     trace_dirtyrate_set_state(DirtyRateStatus_str(new_state));
216     if (qatomic_cmpxchg(state, old_state, new_state) == old_state) {
217         return 0;
218     } else {
219         return -1;
220     }
221 }
222 
223 /* Decimal power of given time unit relative to one second */
224 static int time_unit_to_power(TimeUnit time_unit)
225 {
226     switch (time_unit) {
227     case TIME_UNIT_SECOND:
228         return 0;
229     case TIME_UNIT_MILLISECOND:
230         return -3;
231     default:
232         assert(false); /* unreachable */
233         return 0;
234     }
235 }
236 
237 static int64_t convert_time_unit(int64_t value, TimeUnit unit_from,
238                                  TimeUnit unit_to)
239 {
240     int power = time_unit_to_power(unit_from) -
241                 time_unit_to_power(unit_to);
242     while (power < 0) {
243         value /= 10;
244         power += 1;
245     }
246     while (power > 0) {
247         value *= 10;
248         power -= 1;
249     }
250     return value;
251 }
252 
253 
254 static struct DirtyRateInfo *
255 query_dirty_rate_info(TimeUnit calc_time_unit)
256 {
257     int i;
258     int64_t dirty_rate = DirtyStat.dirty_rate;
259     struct DirtyRateInfo *info = g_new0(DirtyRateInfo, 1);
260     DirtyRateVcpuList *head = NULL, **tail = &head;
261 
262     info->status = CalculatingState;
263     info->start_time = DirtyStat.start_time;
264     info->calc_time = convert_time_unit(DirtyStat.calc_time_ms,
265                                         TIME_UNIT_MILLISECOND,
266                                         calc_time_unit);
267     info->calc_time_unit = calc_time_unit;
268     info->sample_pages = DirtyStat.sample_pages;
269     info->mode = dirtyrate_mode;
270 
271     if (qatomic_read(&CalculatingState) == DIRTY_RATE_STATUS_MEASURED) {
272         info->has_dirty_rate = true;
273         info->dirty_rate = dirty_rate;
274 
275         if (dirtyrate_mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) {
276             /*
277              * set sample_pages with 0 to indicate page sampling
278              * isn't enabled
279              **/
280             info->sample_pages = 0;
281             info->has_vcpu_dirty_rate = true;
282             for (i = 0; i < DirtyStat.dirty_ring.nvcpu; i++) {
283                 DirtyRateVcpu *rate = g_new0(DirtyRateVcpu, 1);
284                 rate->id = DirtyStat.dirty_ring.rates[i].id;
285                 rate->dirty_rate = DirtyStat.dirty_ring.rates[i].dirty_rate;
286                 QAPI_LIST_APPEND(tail, rate);
287             }
288             info->vcpu_dirty_rate = head;
289         }
290 
291         if (dirtyrate_mode == DIRTY_RATE_MEASURE_MODE_DIRTY_BITMAP) {
292             info->sample_pages = 0;
293         }
294     }
295 
296     trace_query_dirty_rate_info(DirtyRateStatus_str(CalculatingState));
297 
298     return info;
299 }
300 
301 static void init_dirtyrate_stat(struct DirtyRateConfig config)
302 {
303     DirtyStat.dirty_rate = -1;
304     DirtyStat.start_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) / 1000;
305     DirtyStat.calc_time_ms = config.calc_time_ms;
306     DirtyStat.sample_pages = config.sample_pages_per_gigabytes;
307 
308     switch (config.mode) {
309     case DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING:
310         DirtyStat.page_sampling.total_dirty_samples = 0;
311         DirtyStat.page_sampling.total_sample_count = 0;
312         DirtyStat.page_sampling.total_block_mem_MB = 0;
313         break;
314     case DIRTY_RATE_MEASURE_MODE_DIRTY_RING:
315         DirtyStat.dirty_ring.nvcpu = -1;
316         DirtyStat.dirty_ring.rates = NULL;
317         break;
318     default:
319         break;
320     }
321 }
322 
323 static void cleanup_dirtyrate_stat(struct DirtyRateConfig config)
324 {
325     /* last calc-dirty-rate qmp use dirty ring mode */
326     if (dirtyrate_mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) {
327         free(DirtyStat.dirty_ring.rates);
328         DirtyStat.dirty_ring.rates = NULL;
329     }
330 }
331 
332 static void update_dirtyrate_stat(struct RamblockDirtyInfo *info)
333 {
334     DirtyStat.page_sampling.total_dirty_samples += info->sample_dirty_count;
335     DirtyStat.page_sampling.total_sample_count += info->sample_pages_count;
336     /* size of total pages in MB */
337     DirtyStat.page_sampling.total_block_mem_MB +=
338         qemu_target_pages_to_MiB(info->ramblock_pages);
339 }
340 
341 static void update_dirtyrate(uint64_t msec)
342 {
343     uint64_t dirtyrate;
344     uint64_t total_dirty_samples = DirtyStat.page_sampling.total_dirty_samples;
345     uint64_t total_sample_count = DirtyStat.page_sampling.total_sample_count;
346     uint64_t total_block_mem_MB = DirtyStat.page_sampling.total_block_mem_MB;
347 
348     dirtyrate = total_dirty_samples * total_block_mem_MB *
349                 1000 / (total_sample_count * msec);
350 
351     DirtyStat.dirty_rate = dirtyrate;
352 }
353 
354 /*
355  * Compute hash of a single page of size TARGET_PAGE_SIZE.
356  */
357 static uint32_t compute_page_hash(void *ptr)
358 {
359     size_t page_size = qemu_target_page_size();
360     uint32_t i;
361     uint64_t v1, v2, v3, v4;
362     uint64_t res;
363     const uint64_t *p = ptr;
364 
365     v1 = QEMU_XXHASH_SEED + XXH_PRIME64_1 + XXH_PRIME64_2;
366     v2 = QEMU_XXHASH_SEED + XXH_PRIME64_2;
367     v3 = QEMU_XXHASH_SEED + 0;
368     v4 = QEMU_XXHASH_SEED - XXH_PRIME64_1;
369     for (i = 0; i < page_size / 8; i += 4) {
370         v1 = XXH64_round(v1, p[i + 0]);
371         v2 = XXH64_round(v2, p[i + 1]);
372         v3 = XXH64_round(v3, p[i + 2]);
373         v4 = XXH64_round(v4, p[i + 3]);
374     }
375     res = XXH64_mergerounds(v1, v2, v3, v4);
376     res += page_size;
377     res = XXH64_avalanche(res);
378     return (uint32_t)(res & UINT32_MAX);
379 }
380 
381 
382 /*
383  * get hash result for the sampled memory with length of TARGET_PAGE_SIZE
384  * in ramblock, which starts from ramblock base address.
385  */
386 static uint32_t get_ramblock_vfn_hash(struct RamblockDirtyInfo *info,
387                                       uint64_t vfn)
388 {
389     uint32_t hash;
390 
391     hash = compute_page_hash(info->ramblock_addr +
392                              vfn * qemu_target_page_size());
393 
394     trace_get_ramblock_vfn_hash(info->idstr, vfn, hash);
395     return hash;
396 }
397 
398 static bool save_ramblock_hash(struct RamblockDirtyInfo *info)
399 {
400     unsigned int sample_pages_count;
401     int i;
402     GRand *rand;
403 
404     sample_pages_count = info->sample_pages_count;
405 
406     /* ramblock size less than one page, return success to skip this ramblock */
407     if (unlikely(info->ramblock_pages == 0 || sample_pages_count == 0)) {
408         return true;
409     }
410 
411     info->hash_result = g_try_malloc0_n(sample_pages_count,
412                                         sizeof(uint32_t));
413     if (!info->hash_result) {
414         return false;
415     }
416 
417     info->sample_page_vfn = g_try_malloc0_n(sample_pages_count,
418                                             sizeof(uint64_t));
419     if (!info->sample_page_vfn) {
420         g_free(info->hash_result);
421         return false;
422     }
423 
424     rand  = g_rand_new();
425     for (i = 0; i < sample_pages_count; i++) {
426         info->sample_page_vfn[i] = g_rand_int_range(rand, 0,
427                                                     info->ramblock_pages - 1);
428         info->hash_result[i] = get_ramblock_vfn_hash(info,
429                                                      info->sample_page_vfn[i]);
430     }
431     g_rand_free(rand);
432 
433     return true;
434 }
435 
436 static void get_ramblock_dirty_info(RAMBlock *block,
437                                     struct RamblockDirtyInfo *info,
438                                     struct DirtyRateConfig *config)
439 {
440     uint64_t sample_pages_per_gigabytes = config->sample_pages_per_gigabytes;
441 
442     /* Right shift 30 bits to calc ramblock size in GB */
443     info->sample_pages_count = (qemu_ram_get_used_length(block) *
444                                 sample_pages_per_gigabytes) >> 30;
445     /* Right shift TARGET_PAGE_BITS to calc page count */
446     info->ramblock_pages = qemu_ram_get_used_length(block) >>
447                            qemu_target_page_bits();
448     info->ramblock_addr = qemu_ram_get_host_addr(block);
449     strcpy(info->idstr, qemu_ram_get_idstr(block));
450 }
451 
452 static void free_ramblock_dirty_info(struct RamblockDirtyInfo *infos, int count)
453 {
454     int i;
455 
456     if (!infos) {
457         return;
458     }
459 
460     for (i = 0; i < count; i++) {
461         g_free(infos[i].sample_page_vfn);
462         g_free(infos[i].hash_result);
463     }
464     g_free(infos);
465 }
466 
467 static bool skip_sample_ramblock(RAMBlock *block)
468 {
469     /*
470      * Sample only blocks larger than MIN_RAMBLOCK_SIZE.
471      */
472     if (qemu_ram_get_used_length(block) < (MIN_RAMBLOCK_SIZE << 10)) {
473         trace_skip_sample_ramblock(block->idstr,
474                                    qemu_ram_get_used_length(block));
475         return true;
476     }
477 
478     return false;
479 }
480 
481 static bool record_ramblock_hash_info(struct RamblockDirtyInfo **block_dinfo,
482                                       struct DirtyRateConfig config,
483                                       int *block_count)
484 {
485     struct RamblockDirtyInfo *info = NULL;
486     struct RamblockDirtyInfo *dinfo = NULL;
487     RAMBlock *block = NULL;
488     int total_count = 0;
489     int index = 0;
490     bool ret = false;
491 
492     RAMBLOCK_FOREACH_MIGRATABLE(block) {
493         if (skip_sample_ramblock(block)) {
494             continue;
495         }
496         total_count++;
497     }
498 
499     dinfo = g_try_malloc0_n(total_count, sizeof(struct RamblockDirtyInfo));
500     if (dinfo == NULL) {
501         goto out;
502     }
503 
504     RAMBLOCK_FOREACH_MIGRATABLE(block) {
505         if (skip_sample_ramblock(block)) {
506             continue;
507         }
508         if (index >= total_count) {
509             break;
510         }
511         info = &dinfo[index];
512         get_ramblock_dirty_info(block, info, &config);
513         if (!save_ramblock_hash(info)) {
514             goto out;
515         }
516         index++;
517     }
518     ret = true;
519 
520 out:
521     *block_count = index;
522     *block_dinfo = dinfo;
523     return ret;
524 }
525 
526 static void calc_page_dirty_rate(struct RamblockDirtyInfo *info)
527 {
528     uint32_t hash;
529     int i;
530 
531     for (i = 0; i < info->sample_pages_count; i++) {
532         hash = get_ramblock_vfn_hash(info, info->sample_page_vfn[i]);
533         if (hash != info->hash_result[i]) {
534             trace_calc_page_dirty_rate(info->idstr, hash, info->hash_result[i]);
535             info->sample_dirty_count++;
536         }
537     }
538 }
539 
540 static struct RamblockDirtyInfo *
541 find_block_matched(RAMBlock *block, int count,
542                   struct RamblockDirtyInfo *infos)
543 {
544     int i;
545 
546     for (i = 0; i < count; i++) {
547         if (!strcmp(infos[i].idstr, qemu_ram_get_idstr(block))) {
548             break;
549         }
550     }
551 
552     if (i == count) {
553         return NULL;
554     }
555 
556     if (infos[i].ramblock_addr != qemu_ram_get_host_addr(block) ||
557         infos[i].ramblock_pages !=
558             (qemu_ram_get_used_length(block) >> qemu_target_page_bits())) {
559         trace_find_page_matched(block->idstr);
560         return NULL;
561     }
562 
563     return &infos[i];
564 }
565 
566 static bool compare_page_hash_info(struct RamblockDirtyInfo *info,
567                                   int block_count)
568 {
569     struct RamblockDirtyInfo *block_dinfo = NULL;
570     RAMBlock *block = NULL;
571 
572     RAMBLOCK_FOREACH_MIGRATABLE(block) {
573         if (skip_sample_ramblock(block)) {
574             continue;
575         }
576         block_dinfo = find_block_matched(block, block_count, info);
577         if (block_dinfo == NULL) {
578             continue;
579         }
580         calc_page_dirty_rate(block_dinfo);
581         update_dirtyrate_stat(block_dinfo);
582     }
583 
584     if (DirtyStat.page_sampling.total_sample_count == 0) {
585         return false;
586     }
587 
588     return true;
589 }
590 
591 static inline void record_dirtypages_bitmap(DirtyPageRecord *dirty_pages,
592                                             bool start)
593 {
594     if (start) {
595         dirty_pages->start_pages = total_dirty_pages;
596     } else {
597         dirty_pages->end_pages = total_dirty_pages;
598     }
599 }
600 
601 static inline void dirtyrate_manual_reset_protect(void)
602 {
603     RAMBlock *block = NULL;
604 
605     WITH_RCU_READ_LOCK_GUARD() {
606         RAMBLOCK_FOREACH_MIGRATABLE(block) {
607             memory_region_clear_dirty_bitmap(block->mr, 0,
608                                              block->used_length);
609         }
610     }
611 }
612 
613 static void calculate_dirtyrate_dirty_bitmap(struct DirtyRateConfig config)
614 {
615     int64_t start_time;
616     DirtyPageRecord dirty_pages;
617     Error *local_err = NULL;
618 
619     bql_lock();
620     if (!memory_global_dirty_log_start(GLOBAL_DIRTY_DIRTY_RATE, &local_err)) {
621         error_report_err(local_err);
622     }
623 
624     /*
625      * 1'round of log sync may return all 1 bits with
626      * KVM_DIRTY_LOG_INITIALLY_SET enable
627      * skip it unconditionally and start dirty tracking
628      * from 2'round of log sync
629      */
630     memory_global_dirty_log_sync(false);
631 
632     /*
633      * reset page protect manually and unconditionally.
634      * this make sure kvm dirty log be cleared if
635      * KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE cap is enabled.
636      */
637     dirtyrate_manual_reset_protect();
638     bql_unlock();
639 
640     record_dirtypages_bitmap(&dirty_pages, true);
641 
642     start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
643     DirtyStat.start_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) / 1000;
644 
645     DirtyStat.calc_time_ms = dirty_stat_wait(config.calc_time_ms, start_time);
646 
647     /*
648      * do two things.
649      * 1. fetch dirty bitmap from kvm
650      * 2. stop dirty tracking
651      */
652     global_dirty_log_sync(GLOBAL_DIRTY_DIRTY_RATE, true);
653 
654     record_dirtypages_bitmap(&dirty_pages, false);
655 
656     DirtyStat.dirty_rate = do_calculate_dirtyrate(dirty_pages,
657                                                   DirtyStat.calc_time_ms);
658 }
659 
660 static void calculate_dirtyrate_dirty_ring(struct DirtyRateConfig config)
661 {
662     uint64_t dirtyrate = 0;
663     uint64_t dirtyrate_sum = 0;
664     int i = 0;
665 
666     /* start log sync */
667     global_dirty_log_change(GLOBAL_DIRTY_DIRTY_RATE, true);
668 
669     DirtyStat.start_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) / 1000;
670 
671     /* calculate vcpu dirtyrate */
672     DirtyStat.calc_time_ms = vcpu_calculate_dirtyrate(config.calc_time_ms,
673                                                       &DirtyStat.dirty_ring,
674                                                       GLOBAL_DIRTY_DIRTY_RATE,
675                                                       true);
676 
677     /* calculate vm dirtyrate */
678     for (i = 0; i < DirtyStat.dirty_ring.nvcpu; i++) {
679         dirtyrate = DirtyStat.dirty_ring.rates[i].dirty_rate;
680         DirtyStat.dirty_ring.rates[i].dirty_rate = dirtyrate;
681         dirtyrate_sum += dirtyrate;
682     }
683 
684     DirtyStat.dirty_rate = dirtyrate_sum;
685 }
686 
687 static void calculate_dirtyrate_sample_vm(struct DirtyRateConfig config)
688 {
689     struct RamblockDirtyInfo *block_dinfo = NULL;
690     int block_count = 0;
691     int64_t initial_time;
692 
693     rcu_read_lock();
694     initial_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
695     DirtyStat.start_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) / 1000;
696     if (!record_ramblock_hash_info(&block_dinfo, config, &block_count)) {
697         goto out;
698     }
699     rcu_read_unlock();
700 
701     DirtyStat.calc_time_ms = dirty_stat_wait(config.calc_time_ms,
702                                              initial_time);
703 
704     rcu_read_lock();
705     if (!compare_page_hash_info(block_dinfo, block_count)) {
706         goto out;
707     }
708 
709     update_dirtyrate(DirtyStat.calc_time_ms);
710 
711 out:
712     rcu_read_unlock();
713     free_ramblock_dirty_info(block_dinfo, block_count);
714 }
715 
716 static void calculate_dirtyrate(struct DirtyRateConfig config)
717 {
718     if (config.mode == DIRTY_RATE_MEASURE_MODE_DIRTY_BITMAP) {
719         calculate_dirtyrate_dirty_bitmap(config);
720     } else if (config.mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) {
721         calculate_dirtyrate_dirty_ring(config);
722     } else {
723         calculate_dirtyrate_sample_vm(config);
724     }
725 
726     trace_dirtyrate_calculate(DirtyStat.dirty_rate);
727 }
728 
729 void *get_dirtyrate_thread(void *arg)
730 {
731     struct DirtyRateConfig config = *(struct DirtyRateConfig *)arg;
732     int ret;
733     rcu_register_thread();
734 
735     ret = dirtyrate_set_state(&CalculatingState, DIRTY_RATE_STATUS_UNSTARTED,
736                               DIRTY_RATE_STATUS_MEASURING);
737     if (ret == -1) {
738         error_report("change dirtyrate state failed.");
739         return NULL;
740     }
741 
742     calculate_dirtyrate(config);
743 
744     ret = dirtyrate_set_state(&CalculatingState, DIRTY_RATE_STATUS_MEASURING,
745                               DIRTY_RATE_STATUS_MEASURED);
746     if (ret == -1) {
747         error_report("change dirtyrate state failed.");
748     }
749 
750     rcu_unregister_thread();
751     return NULL;
752 }
753 
754 void qmp_calc_dirty_rate(int64_t calc_time,
755                          bool has_calc_time_unit,
756                          TimeUnit calc_time_unit,
757                          bool has_sample_pages,
758                          int64_t sample_pages,
759                          bool has_mode,
760                          DirtyRateMeasureMode mode,
761                          Error **errp)
762 {
763     static struct DirtyRateConfig config;
764     QemuThread thread;
765     int ret;
766 
767     /*
768      * If the dirty rate is already being measured, don't attempt to start.
769      */
770     if (qatomic_read(&CalculatingState) == DIRTY_RATE_STATUS_MEASURING) {
771         error_setg(errp, "the dirty rate is already being measured.");
772         return;
773     }
774 
775     int64_t calc_time_ms = convert_time_unit(
776         calc_time,
777         has_calc_time_unit ? calc_time_unit : TIME_UNIT_SECOND,
778         TIME_UNIT_MILLISECOND
779     );
780 
781     if (!is_calc_time_valid(calc_time_ms)) {
782         error_setg(errp, "Calculation time is out of range [%dms, %dms].",
783                          MIN_CALC_TIME_MS, MAX_CALC_TIME_MS);
784         return;
785     }
786 
787     if (!has_mode) {
788         mode =  DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING;
789     }
790 
791     if (has_sample_pages && mode != DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING) {
792         error_setg(errp, "sample-pages is used only in page-sampling mode");
793         return;
794     }
795 
796     if (has_sample_pages) {
797         if (!is_sample_pages_valid(sample_pages)) {
798             error_setg(errp, "sample-pages is out of range[%d, %d].",
799                             MIN_SAMPLE_PAGE_COUNT,
800                             MAX_SAMPLE_PAGE_COUNT);
801             return;
802         }
803     } else {
804         sample_pages = DIRTYRATE_DEFAULT_SAMPLE_PAGES;
805     }
806 
807     /*
808      * dirty ring mode only works when kvm dirty ring is enabled.
809      * on the contrary, dirty bitmap mode is not.
810      */
811     if (((mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) &&
812         !kvm_dirty_ring_enabled()) ||
813         ((mode == DIRTY_RATE_MEASURE_MODE_DIRTY_BITMAP) &&
814          kvm_dirty_ring_enabled())) {
815         error_setg(errp, "mode %s is not enabled, use other method instead.",
816                          DirtyRateMeasureMode_str(mode));
817          return;
818     }
819 
820     /*
821      * Init calculation state as unstarted.
822      */
823     ret = dirtyrate_set_state(&CalculatingState, CalculatingState,
824                               DIRTY_RATE_STATUS_UNSTARTED);
825     if (ret == -1) {
826         error_setg(errp, "init dirty rate calculation state failed.");
827         return;
828     }
829 
830     config.calc_time_ms = calc_time_ms;
831     config.sample_pages_per_gigabytes = sample_pages;
832     config.mode = mode;
833 
834     cleanup_dirtyrate_stat(config);
835 
836     /*
837      * update dirty rate mode so that we can figure out what mode has
838      * been used in last calculation
839      **/
840     dirtyrate_mode = mode;
841 
842     init_dirtyrate_stat(config);
843 
844     qemu_thread_create(&thread, "get_dirtyrate", get_dirtyrate_thread,
845                        (void *)&config, QEMU_THREAD_DETACHED);
846 }
847 
848 
849 struct DirtyRateInfo *qmp_query_dirty_rate(bool has_calc_time_unit,
850                                            TimeUnit calc_time_unit,
851                                            Error **errp)
852 {
853     return query_dirty_rate_info(
854         has_calc_time_unit ? calc_time_unit : TIME_UNIT_SECOND);
855 }
856 
857 void hmp_info_dirty_rate(Monitor *mon, const QDict *qdict)
858 {
859     DirtyRateInfo *info = query_dirty_rate_info(TIME_UNIT_SECOND);
860 
861     monitor_printf(mon, "Status: %s\n",
862                    DirtyRateStatus_str(info->status));
863     monitor_printf(mon, "Start Time: %"PRIi64" (ms)\n",
864                    info->start_time);
865     if (info->mode == DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING) {
866         monitor_printf(mon, "Sample Pages: %"PRIu64" (per GB)\n",
867                        info->sample_pages);
868     }
869     monitor_printf(mon, "Period: %"PRIi64" (sec)\n",
870                    info->calc_time);
871     monitor_printf(mon, "Mode: %s\n",
872                    DirtyRateMeasureMode_str(info->mode));
873     monitor_printf(mon, "Dirty rate: ");
874     if (info->has_dirty_rate) {
875         monitor_printf(mon, "%"PRIi64" (MB/s)\n", info->dirty_rate);
876         if (info->has_vcpu_dirty_rate) {
877             DirtyRateVcpuList *rate, *head = info->vcpu_dirty_rate;
878             for (rate = head; rate != NULL; rate = rate->next) {
879                 monitor_printf(mon, "vcpu[%"PRIi64"], Dirty rate: %"PRIi64
880                                " (MB/s)\n", rate->value->id,
881                                rate->value->dirty_rate);
882             }
883         }
884     } else {
885         monitor_printf(mon, "(not ready)\n");
886     }
887 
888     qapi_free_DirtyRateVcpuList(info->vcpu_dirty_rate);
889     g_free(info);
890 }
891 
892 void hmp_calc_dirty_rate(Monitor *mon, const QDict *qdict)
893 {
894     int64_t sec = qdict_get_try_int(qdict, "second", 0);
895     int64_t sample_pages = qdict_get_try_int(qdict, "sample_pages_per_GB", -1);
896     bool has_sample_pages = (sample_pages != -1);
897     bool dirty_ring = qdict_get_try_bool(qdict, "dirty_ring", false);
898     bool dirty_bitmap = qdict_get_try_bool(qdict, "dirty_bitmap", false);
899     DirtyRateMeasureMode mode = DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING;
900     Error *err = NULL;
901 
902     if (!sec) {
903         monitor_printf(mon, "Incorrect period length specified!\n");
904         return;
905     }
906 
907     if (dirty_ring && dirty_bitmap) {
908         monitor_printf(mon, "Either dirty ring or dirty bitmap "
909                        "can be specified!\n");
910         return;
911     }
912 
913     if (dirty_bitmap) {
914         mode = DIRTY_RATE_MEASURE_MODE_DIRTY_BITMAP;
915     } else if (dirty_ring) {
916         mode = DIRTY_RATE_MEASURE_MODE_DIRTY_RING;
917     }
918 
919     qmp_calc_dirty_rate(sec, /* calc-time */
920                         false, TIME_UNIT_SECOND, /* calc-time-unit */
921                         has_sample_pages, sample_pages,
922                         true, mode,
923                         &err);
924     if (err) {
925         hmp_handle_error(mon, err);
926         return;
927     }
928 
929     monitor_printf(mon, "Starting dirty rate measurement with period %"PRIi64
930                    " seconds\n", sec);
931     monitor_printf(mon, "[Please use 'info dirty_rate' to check results]\n");
932 }
933