xref: /openbmc/qemu/migration/dirtyrate.c (revision ab2691b6c7ff360875e0af86ff463278f17786f5)
1 /*
2  * Dirtyrate implement code
3  *
4  * Copyright (c) 2020 HUAWEI TECHNOLOGIES CO.,LTD.
5  *
6  * Authors:
7  *  Chuan Zheng <zhengchuan@huawei.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2 or later.
10  * See the COPYING file in the top-level directory.
11  */
12 
13 #include "qemu/osdep.h"
14 #include "qemu/error-report.h"
15 #include "hw/core/cpu.h"
16 #include "qapi/error.h"
17 #include "exec/ramblock.h"
18 #include "exec/target_page.h"
19 #include "qemu/rcu_queue.h"
20 #include "qemu/main-loop.h"
21 #include "qapi/qapi-commands-migration.h"
22 #include "ram.h"
23 #include "trace.h"
24 #include "dirtyrate.h"
25 #include "monitor/hmp.h"
26 #include "monitor/monitor.h"
27 #include "qapi/qmp/qdict.h"
28 #include "sysemu/kvm.h"
29 #include "sysemu/runstate.h"
30 #include "exec/memory.h"
31 #include "qemu/xxhash.h"
32 
33 /*
34  * total_dirty_pages is procted by BQL and is used
35  * to stat dirty pages during the period of two
36  * memory_global_dirty_log_sync
37  */
38 uint64_t total_dirty_pages;
39 
40 typedef struct DirtyPageRecord {
41     uint64_t start_pages;
42     uint64_t end_pages;
43 } DirtyPageRecord;
44 
45 static int CalculatingState = DIRTY_RATE_STATUS_UNSTARTED;
46 static struct DirtyRateStat DirtyStat;
47 static DirtyRateMeasureMode dirtyrate_mode =
48                 DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING;
49 
50 static int64_t dirty_stat_wait(int64_t msec, int64_t initial_time)
51 {
52     int64_t current_time;
53 
54     current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
55     if ((current_time - initial_time) >= msec) {
56         msec = current_time - initial_time;
57     } else {
58         g_usleep((msec + initial_time - current_time) * 1000);
59         /* g_usleep may overshoot */
60         msec = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - initial_time;
61     }
62 
63     return msec;
64 }
65 
66 static inline void record_dirtypages(DirtyPageRecord *dirty_pages,
67                                      CPUState *cpu, bool start)
68 {
69     if (start) {
70         dirty_pages[cpu->cpu_index].start_pages = cpu->dirty_pages;
71     } else {
72         dirty_pages[cpu->cpu_index].end_pages = cpu->dirty_pages;
73     }
74 }
75 
76 static int64_t do_calculate_dirtyrate(DirtyPageRecord dirty_pages,
77                                       int64_t calc_time_ms)
78 {
79     uint64_t increased_dirty_pages =
80         dirty_pages.end_pages - dirty_pages.start_pages;
81 
82     /*
83      * multiply by 1000ms/s _before_ converting down to megabytes
84      * to avoid losing precision
85      */
86     return qemu_target_pages_to_MiB(increased_dirty_pages * 1000) /
87         calc_time_ms;
88 }
89 
90 void global_dirty_log_change(unsigned int flag, bool start)
91 {
92     Error *local_err = NULL;
93     bool ret;
94 
95     bql_lock();
96     if (start) {
97         ret = memory_global_dirty_log_start(flag, &local_err);
98         if (!ret) {
99             error_report_err(local_err);
100         }
101     } else {
102         memory_global_dirty_log_stop(flag);
103     }
104     bql_unlock();
105 }
106 
107 /*
108  * global_dirty_log_sync
109  * 1. sync dirty log from kvm
110  * 2. stop dirty tracking if needed.
111  */
112 static void global_dirty_log_sync(unsigned int flag, bool one_shot)
113 {
114     bql_lock();
115     memory_global_dirty_log_sync(false);
116     if (one_shot) {
117         memory_global_dirty_log_stop(flag);
118     }
119     bql_unlock();
120 }
121 
122 static DirtyPageRecord *vcpu_dirty_stat_alloc(VcpuStat *stat)
123 {
124     CPUState *cpu;
125     int nvcpu = 0;
126 
127     CPU_FOREACH(cpu) {
128         nvcpu++;
129     }
130 
131     stat->nvcpu = nvcpu;
132     stat->rates = g_new0(DirtyRateVcpu, nvcpu);
133 
134     return g_new0(DirtyPageRecord, nvcpu);
135 }
136 
137 static void vcpu_dirty_stat_collect(DirtyPageRecord *records,
138                                     bool start)
139 {
140     CPUState *cpu;
141 
142     CPU_FOREACH(cpu) {
143         record_dirtypages(records, cpu, start);
144     }
145 }
146 
147 int64_t vcpu_calculate_dirtyrate(int64_t calc_time_ms,
148                                  VcpuStat *stat,
149                                  unsigned int flag,
150                                  bool one_shot)
151 {
152     DirtyPageRecord *records = NULL;
153     int64_t init_time_ms;
154     int64_t duration;
155     int64_t dirtyrate;
156     int i = 0;
157     unsigned int gen_id = 0;
158 
159 retry:
160     init_time_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
161 
162     WITH_QEMU_LOCK_GUARD(&qemu_cpu_list_lock) {
163         gen_id = cpu_list_generation_id_get();
164         records = vcpu_dirty_stat_alloc(stat);
165         vcpu_dirty_stat_collect(records, true);
166     }
167 
168     duration = dirty_stat_wait(calc_time_ms, init_time_ms);
169 
170     global_dirty_log_sync(flag, one_shot);
171 
172     WITH_QEMU_LOCK_GUARD(&qemu_cpu_list_lock) {
173         if (gen_id != cpu_list_generation_id_get()) {
174             g_free(records);
175             g_free(stat->rates);
176             cpu_list_unlock();
177             goto retry;
178         }
179         vcpu_dirty_stat_collect(records, false);
180     }
181 
182     for (i = 0; i < stat->nvcpu; i++) {
183         dirtyrate = do_calculate_dirtyrate(records[i], duration);
184 
185         stat->rates[i].id = i;
186         stat->rates[i].dirty_rate = dirtyrate;
187 
188         trace_dirtyrate_do_calculate_vcpu(i, dirtyrate);
189     }
190 
191     g_free(records);
192 
193     return duration;
194 }
195 
196 static bool is_calc_time_valid(int64_t msec)
197 {
198     if ((msec < MIN_CALC_TIME_MS) || (msec > MAX_CALC_TIME_MS)) {
199         return false;
200     }
201 
202     return true;
203 }
204 
205 static bool is_sample_pages_valid(int64_t pages)
206 {
207     return pages >= MIN_SAMPLE_PAGE_COUNT &&
208            pages <= MAX_SAMPLE_PAGE_COUNT;
209 }
210 
211 static int dirtyrate_set_state(int *state, int old_state, int new_state)
212 {
213     assert(new_state < DIRTY_RATE_STATUS__MAX);
214     trace_dirtyrate_set_state(DirtyRateStatus_str(new_state));
215     if (qatomic_cmpxchg(state, old_state, new_state) == old_state) {
216         return 0;
217     } else {
218         return -1;
219     }
220 }
221 
222 /* Decimal power of given time unit relative to one second */
223 static int time_unit_to_power(TimeUnit time_unit)
224 {
225     switch (time_unit) {
226     case TIME_UNIT_SECOND:
227         return 0;
228     case TIME_UNIT_MILLISECOND:
229         return -3;
230     default:
231         g_assert_not_reached();
232     }
233 }
234 
235 static int64_t convert_time_unit(int64_t value, TimeUnit unit_from,
236                                  TimeUnit unit_to)
237 {
238     int power = time_unit_to_power(unit_from) -
239                 time_unit_to_power(unit_to);
240     while (power < 0) {
241         value /= 10;
242         power += 1;
243     }
244     while (power > 0) {
245         value *= 10;
246         power -= 1;
247     }
248     return value;
249 }
250 
251 
252 static struct DirtyRateInfo *
253 query_dirty_rate_info(TimeUnit calc_time_unit)
254 {
255     int i;
256     int64_t dirty_rate = DirtyStat.dirty_rate;
257     struct DirtyRateInfo *info = g_new0(DirtyRateInfo, 1);
258     DirtyRateVcpuList *head = NULL, **tail = &head;
259 
260     info->status = CalculatingState;
261     info->start_time = DirtyStat.start_time;
262     info->calc_time = convert_time_unit(DirtyStat.calc_time_ms,
263                                         TIME_UNIT_MILLISECOND,
264                                         calc_time_unit);
265     info->calc_time_unit = calc_time_unit;
266     info->sample_pages = DirtyStat.sample_pages;
267     info->mode = dirtyrate_mode;
268 
269     if (qatomic_read(&CalculatingState) == DIRTY_RATE_STATUS_MEASURED) {
270         info->has_dirty_rate = true;
271         info->dirty_rate = dirty_rate;
272 
273         if (dirtyrate_mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) {
274             /*
275              * set sample_pages with 0 to indicate page sampling
276              * isn't enabled
277              **/
278             info->sample_pages = 0;
279             info->has_vcpu_dirty_rate = true;
280             for (i = 0; i < DirtyStat.dirty_ring.nvcpu; i++) {
281                 DirtyRateVcpu *rate = g_new0(DirtyRateVcpu, 1);
282                 rate->id = DirtyStat.dirty_ring.rates[i].id;
283                 rate->dirty_rate = DirtyStat.dirty_ring.rates[i].dirty_rate;
284                 QAPI_LIST_APPEND(tail, rate);
285             }
286             info->vcpu_dirty_rate = head;
287         }
288 
289         if (dirtyrate_mode == DIRTY_RATE_MEASURE_MODE_DIRTY_BITMAP) {
290             info->sample_pages = 0;
291         }
292     }
293 
294     trace_query_dirty_rate_info(DirtyRateStatus_str(CalculatingState));
295 
296     return info;
297 }
298 
299 static void init_dirtyrate_stat(struct DirtyRateConfig config)
300 {
301     DirtyStat.dirty_rate = -1;
302     DirtyStat.start_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) / 1000;
303     DirtyStat.calc_time_ms = config.calc_time_ms;
304     DirtyStat.sample_pages = config.sample_pages_per_gigabytes;
305 
306     switch (config.mode) {
307     case DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING:
308         DirtyStat.page_sampling.total_dirty_samples = 0;
309         DirtyStat.page_sampling.total_sample_count = 0;
310         DirtyStat.page_sampling.total_block_mem_MB = 0;
311         break;
312     case DIRTY_RATE_MEASURE_MODE_DIRTY_RING:
313         DirtyStat.dirty_ring.nvcpu = -1;
314         DirtyStat.dirty_ring.rates = NULL;
315         break;
316     default:
317         break;
318     }
319 }
320 
321 static void cleanup_dirtyrate_stat(struct DirtyRateConfig config)
322 {
323     /* last calc-dirty-rate qmp use dirty ring mode */
324     if (dirtyrate_mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) {
325         free(DirtyStat.dirty_ring.rates);
326         DirtyStat.dirty_ring.rates = NULL;
327     }
328 }
329 
330 static void update_dirtyrate_stat(struct RamblockDirtyInfo *info)
331 {
332     DirtyStat.page_sampling.total_dirty_samples += info->sample_dirty_count;
333     DirtyStat.page_sampling.total_sample_count += info->sample_pages_count;
334     /* size of total pages in MB */
335     DirtyStat.page_sampling.total_block_mem_MB +=
336         qemu_target_pages_to_MiB(info->ramblock_pages);
337 }
338 
339 static void update_dirtyrate(uint64_t msec)
340 {
341     uint64_t dirtyrate;
342     uint64_t total_dirty_samples = DirtyStat.page_sampling.total_dirty_samples;
343     uint64_t total_sample_count = DirtyStat.page_sampling.total_sample_count;
344     uint64_t total_block_mem_MB = DirtyStat.page_sampling.total_block_mem_MB;
345 
346     dirtyrate = total_dirty_samples * total_block_mem_MB *
347                 1000 / (total_sample_count * msec);
348 
349     DirtyStat.dirty_rate = dirtyrate;
350 }
351 
352 /*
353  * Compute hash of a single page of size TARGET_PAGE_SIZE.
354  */
355 static uint32_t compute_page_hash(void *ptr)
356 {
357     size_t page_size = qemu_target_page_size();
358     uint32_t i;
359     uint64_t v1, v2, v3, v4;
360     uint64_t res;
361     const uint64_t *p = ptr;
362 
363     v1 = QEMU_XXHASH_SEED + XXH_PRIME64_1 + XXH_PRIME64_2;
364     v2 = QEMU_XXHASH_SEED + XXH_PRIME64_2;
365     v3 = QEMU_XXHASH_SEED + 0;
366     v4 = QEMU_XXHASH_SEED - XXH_PRIME64_1;
367     for (i = 0; i < page_size / 8; i += 4) {
368         v1 = XXH64_round(v1, p[i + 0]);
369         v2 = XXH64_round(v2, p[i + 1]);
370         v3 = XXH64_round(v3, p[i + 2]);
371         v4 = XXH64_round(v4, p[i + 3]);
372     }
373     res = XXH64_mergerounds(v1, v2, v3, v4);
374     res += page_size;
375     res = XXH64_avalanche(res);
376     return (uint32_t)(res & UINT32_MAX);
377 }
378 
379 
380 /*
381  * get hash result for the sampled memory with length of TARGET_PAGE_SIZE
382  * in ramblock, which starts from ramblock base address.
383  */
384 static uint32_t get_ramblock_vfn_hash(struct RamblockDirtyInfo *info,
385                                       uint64_t vfn)
386 {
387     uint32_t hash;
388 
389     hash = compute_page_hash(info->ramblock_addr +
390                              vfn * qemu_target_page_size());
391 
392     trace_get_ramblock_vfn_hash(info->idstr, vfn, hash);
393     return hash;
394 }
395 
396 static bool save_ramblock_hash(struct RamblockDirtyInfo *info)
397 {
398     unsigned int sample_pages_count;
399     int i;
400     GRand *rand;
401 
402     sample_pages_count = info->sample_pages_count;
403 
404     /* ramblock size less than one page, return success to skip this ramblock */
405     if (unlikely(info->ramblock_pages == 0 || sample_pages_count == 0)) {
406         return true;
407     }
408 
409     info->hash_result = g_try_malloc0_n(sample_pages_count,
410                                         sizeof(uint32_t));
411     if (!info->hash_result) {
412         return false;
413     }
414 
415     info->sample_page_vfn = g_try_malloc0_n(sample_pages_count,
416                                             sizeof(uint64_t));
417     if (!info->sample_page_vfn) {
418         g_free(info->hash_result);
419         return false;
420     }
421 
422     rand  = g_rand_new();
423     for (i = 0; i < sample_pages_count; i++) {
424         info->sample_page_vfn[i] = g_rand_int_range(rand, 0,
425                                                     info->ramblock_pages - 1);
426         info->hash_result[i] = get_ramblock_vfn_hash(info,
427                                                      info->sample_page_vfn[i]);
428     }
429     g_rand_free(rand);
430 
431     return true;
432 }
433 
434 static void get_ramblock_dirty_info(RAMBlock *block,
435                                     struct RamblockDirtyInfo *info,
436                                     struct DirtyRateConfig *config)
437 {
438     uint64_t sample_pages_per_gigabytes = config->sample_pages_per_gigabytes;
439 
440     /* Right shift 30 bits to calc ramblock size in GB */
441     info->sample_pages_count = (qemu_ram_get_used_length(block) *
442                                 sample_pages_per_gigabytes) >> 30;
443     /* Right shift TARGET_PAGE_BITS to calc page count */
444     info->ramblock_pages = qemu_ram_get_used_length(block) >>
445                            qemu_target_page_bits();
446     info->ramblock_addr = qemu_ram_get_host_addr(block);
447     strcpy(info->idstr, qemu_ram_get_idstr(block));
448 }
449 
450 static void free_ramblock_dirty_info(struct RamblockDirtyInfo *infos, int count)
451 {
452     int i;
453 
454     if (!infos) {
455         return;
456     }
457 
458     for (i = 0; i < count; i++) {
459         g_free(infos[i].sample_page_vfn);
460         g_free(infos[i].hash_result);
461     }
462     g_free(infos);
463 }
464 
465 static bool skip_sample_ramblock(RAMBlock *block)
466 {
467     /*
468      * Sample only blocks larger than MIN_RAMBLOCK_SIZE.
469      */
470     if (qemu_ram_get_used_length(block) < (MIN_RAMBLOCK_SIZE << 10)) {
471         trace_skip_sample_ramblock(block->idstr,
472                                    qemu_ram_get_used_length(block));
473         return true;
474     }
475 
476     return false;
477 }
478 
479 static bool record_ramblock_hash_info(struct RamblockDirtyInfo **block_dinfo,
480                                       struct DirtyRateConfig config,
481                                       int *block_count)
482 {
483     struct RamblockDirtyInfo *info = NULL;
484     struct RamblockDirtyInfo *dinfo = NULL;
485     RAMBlock *block = NULL;
486     int total_count = 0;
487     int index = 0;
488     bool ret = false;
489 
490     RAMBLOCK_FOREACH_MIGRATABLE(block) {
491         if (skip_sample_ramblock(block)) {
492             continue;
493         }
494         total_count++;
495     }
496 
497     dinfo = g_try_malloc0_n(total_count, sizeof(struct RamblockDirtyInfo));
498     if (dinfo == NULL) {
499         goto out;
500     }
501 
502     RAMBLOCK_FOREACH_MIGRATABLE(block) {
503         if (skip_sample_ramblock(block)) {
504             continue;
505         }
506         if (index >= total_count) {
507             break;
508         }
509         info = &dinfo[index];
510         get_ramblock_dirty_info(block, info, &config);
511         if (!save_ramblock_hash(info)) {
512             goto out;
513         }
514         index++;
515     }
516     ret = true;
517 
518 out:
519     *block_count = index;
520     *block_dinfo = dinfo;
521     return ret;
522 }
523 
524 static void calc_page_dirty_rate(struct RamblockDirtyInfo *info)
525 {
526     uint32_t hash;
527     int i;
528 
529     for (i = 0; i < info->sample_pages_count; i++) {
530         hash = get_ramblock_vfn_hash(info, info->sample_page_vfn[i]);
531         if (hash != info->hash_result[i]) {
532             trace_calc_page_dirty_rate(info->idstr, hash, info->hash_result[i]);
533             info->sample_dirty_count++;
534         }
535     }
536 }
537 
538 static struct RamblockDirtyInfo *
539 find_block_matched(RAMBlock *block, int count,
540                   struct RamblockDirtyInfo *infos)
541 {
542     int i;
543 
544     for (i = 0; i < count; i++) {
545         if (!strcmp(infos[i].idstr, qemu_ram_get_idstr(block))) {
546             break;
547         }
548     }
549 
550     if (i == count) {
551         return NULL;
552     }
553 
554     if (infos[i].ramblock_addr != qemu_ram_get_host_addr(block) ||
555         infos[i].ramblock_pages !=
556             (qemu_ram_get_used_length(block) >> qemu_target_page_bits())) {
557         trace_find_page_matched(block->idstr);
558         return NULL;
559     }
560 
561     return &infos[i];
562 }
563 
564 static bool compare_page_hash_info(struct RamblockDirtyInfo *info,
565                                   int block_count)
566 {
567     struct RamblockDirtyInfo *block_dinfo = NULL;
568     RAMBlock *block = NULL;
569 
570     RAMBLOCK_FOREACH_MIGRATABLE(block) {
571         if (skip_sample_ramblock(block)) {
572             continue;
573         }
574         block_dinfo = find_block_matched(block, block_count, info);
575         if (block_dinfo == NULL) {
576             continue;
577         }
578         calc_page_dirty_rate(block_dinfo);
579         update_dirtyrate_stat(block_dinfo);
580     }
581 
582     if (DirtyStat.page_sampling.total_sample_count == 0) {
583         return false;
584     }
585 
586     return true;
587 }
588 
589 static inline void record_dirtypages_bitmap(DirtyPageRecord *dirty_pages,
590                                             bool start)
591 {
592     if (start) {
593         dirty_pages->start_pages = total_dirty_pages;
594     } else {
595         dirty_pages->end_pages = total_dirty_pages;
596     }
597 }
598 
599 static inline void dirtyrate_manual_reset_protect(void)
600 {
601     RAMBlock *block = NULL;
602 
603     WITH_RCU_READ_LOCK_GUARD() {
604         RAMBLOCK_FOREACH_MIGRATABLE(block) {
605             memory_region_clear_dirty_bitmap(block->mr, 0,
606                                              block->used_length);
607         }
608     }
609 }
610 
611 static void calculate_dirtyrate_dirty_bitmap(struct DirtyRateConfig config)
612 {
613     int64_t start_time;
614     DirtyPageRecord dirty_pages;
615     Error *local_err = NULL;
616 
617     bql_lock();
618     if (!memory_global_dirty_log_start(GLOBAL_DIRTY_DIRTY_RATE, &local_err)) {
619         error_report_err(local_err);
620     }
621 
622     /*
623      * 1'round of log sync may return all 1 bits with
624      * KVM_DIRTY_LOG_INITIALLY_SET enable
625      * skip it unconditionally and start dirty tracking
626      * from 2'round of log sync
627      */
628     memory_global_dirty_log_sync(false);
629 
630     /*
631      * reset page protect manually and unconditionally.
632      * this make sure kvm dirty log be cleared if
633      * KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE cap is enabled.
634      */
635     dirtyrate_manual_reset_protect();
636     bql_unlock();
637 
638     record_dirtypages_bitmap(&dirty_pages, true);
639 
640     start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
641     DirtyStat.start_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) / 1000;
642 
643     DirtyStat.calc_time_ms = dirty_stat_wait(config.calc_time_ms, start_time);
644 
645     /*
646      * do two things.
647      * 1. fetch dirty bitmap from kvm
648      * 2. stop dirty tracking
649      */
650     global_dirty_log_sync(GLOBAL_DIRTY_DIRTY_RATE, true);
651 
652     record_dirtypages_bitmap(&dirty_pages, false);
653 
654     DirtyStat.dirty_rate = do_calculate_dirtyrate(dirty_pages,
655                                                   DirtyStat.calc_time_ms);
656 }
657 
658 static void calculate_dirtyrate_dirty_ring(struct DirtyRateConfig config)
659 {
660     uint64_t dirtyrate = 0;
661     uint64_t dirtyrate_sum = 0;
662     int i = 0;
663 
664     /* start log sync */
665     global_dirty_log_change(GLOBAL_DIRTY_DIRTY_RATE, true);
666 
667     DirtyStat.start_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) / 1000;
668 
669     /* calculate vcpu dirtyrate */
670     DirtyStat.calc_time_ms = vcpu_calculate_dirtyrate(config.calc_time_ms,
671                                                       &DirtyStat.dirty_ring,
672                                                       GLOBAL_DIRTY_DIRTY_RATE,
673                                                       true);
674 
675     /* calculate vm dirtyrate */
676     for (i = 0; i < DirtyStat.dirty_ring.nvcpu; i++) {
677         dirtyrate = DirtyStat.dirty_ring.rates[i].dirty_rate;
678         DirtyStat.dirty_ring.rates[i].dirty_rate = dirtyrate;
679         dirtyrate_sum += dirtyrate;
680     }
681 
682     DirtyStat.dirty_rate = dirtyrate_sum;
683 }
684 
685 static void calculate_dirtyrate_sample_vm(struct DirtyRateConfig config)
686 {
687     struct RamblockDirtyInfo *block_dinfo = NULL;
688     int block_count = 0;
689     int64_t initial_time;
690 
691     rcu_read_lock();
692     initial_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
693     DirtyStat.start_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) / 1000;
694     if (!record_ramblock_hash_info(&block_dinfo, config, &block_count)) {
695         goto out;
696     }
697     rcu_read_unlock();
698 
699     DirtyStat.calc_time_ms = dirty_stat_wait(config.calc_time_ms,
700                                              initial_time);
701 
702     rcu_read_lock();
703     if (!compare_page_hash_info(block_dinfo, block_count)) {
704         goto out;
705     }
706 
707     update_dirtyrate(DirtyStat.calc_time_ms);
708 
709 out:
710     rcu_read_unlock();
711     free_ramblock_dirty_info(block_dinfo, block_count);
712 }
713 
714 static void calculate_dirtyrate(struct DirtyRateConfig config)
715 {
716     if (config.mode == DIRTY_RATE_MEASURE_MODE_DIRTY_BITMAP) {
717         calculate_dirtyrate_dirty_bitmap(config);
718     } else if (config.mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) {
719         calculate_dirtyrate_dirty_ring(config);
720     } else {
721         calculate_dirtyrate_sample_vm(config);
722     }
723 
724     trace_dirtyrate_calculate(DirtyStat.dirty_rate);
725 }
726 
727 void *get_dirtyrate_thread(void *arg)
728 {
729     struct DirtyRateConfig config = *(struct DirtyRateConfig *)arg;
730     int ret;
731     rcu_register_thread();
732 
733     ret = dirtyrate_set_state(&CalculatingState, DIRTY_RATE_STATUS_UNSTARTED,
734                               DIRTY_RATE_STATUS_MEASURING);
735     if (ret == -1) {
736         error_report("change dirtyrate state failed.");
737         return NULL;
738     }
739 
740     calculate_dirtyrate(config);
741 
742     ret = dirtyrate_set_state(&CalculatingState, DIRTY_RATE_STATUS_MEASURING,
743                               DIRTY_RATE_STATUS_MEASURED);
744     if (ret == -1) {
745         error_report("change dirtyrate state failed.");
746     }
747 
748     rcu_unregister_thread();
749     return NULL;
750 }
751 
752 void qmp_calc_dirty_rate(int64_t calc_time,
753                          bool has_calc_time_unit,
754                          TimeUnit calc_time_unit,
755                          bool has_sample_pages,
756                          int64_t sample_pages,
757                          bool has_mode,
758                          DirtyRateMeasureMode mode,
759                          Error **errp)
760 {
761     static struct DirtyRateConfig config;
762     QemuThread thread;
763     int ret;
764 
765     /*
766      * If the dirty rate is already being measured, don't attempt to start.
767      */
768     if (qatomic_read(&CalculatingState) == DIRTY_RATE_STATUS_MEASURING) {
769         error_setg(errp, "the dirty rate is already being measured.");
770         return;
771     }
772 
773     int64_t calc_time_ms = convert_time_unit(
774         calc_time,
775         has_calc_time_unit ? calc_time_unit : TIME_UNIT_SECOND,
776         TIME_UNIT_MILLISECOND
777     );
778 
779     if (!is_calc_time_valid(calc_time_ms)) {
780         error_setg(errp, "Calculation time is out of range [%dms, %dms].",
781                          MIN_CALC_TIME_MS, MAX_CALC_TIME_MS);
782         return;
783     }
784 
785     if (!has_mode) {
786         mode =  DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING;
787     }
788 
789     if (has_sample_pages && mode != DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING) {
790         error_setg(errp, "sample-pages is used only in page-sampling mode");
791         return;
792     }
793 
794     if (has_sample_pages) {
795         if (!is_sample_pages_valid(sample_pages)) {
796             error_setg(errp, "sample-pages is out of range[%d, %d].",
797                             MIN_SAMPLE_PAGE_COUNT,
798                             MAX_SAMPLE_PAGE_COUNT);
799             return;
800         }
801     } else {
802         sample_pages = DIRTYRATE_DEFAULT_SAMPLE_PAGES;
803     }
804 
805     /*
806      * dirty ring mode only works when kvm dirty ring is enabled.
807      * on the contrary, dirty bitmap mode is not.
808      */
809     if (((mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) &&
810         !kvm_dirty_ring_enabled()) ||
811         ((mode == DIRTY_RATE_MEASURE_MODE_DIRTY_BITMAP) &&
812          kvm_dirty_ring_enabled())) {
813         error_setg(errp, "mode %s is not enabled, use other method instead.",
814                          DirtyRateMeasureMode_str(mode));
815          return;
816     }
817 
818     /*
819      * Init calculation state as unstarted.
820      */
821     ret = dirtyrate_set_state(&CalculatingState, CalculatingState,
822                               DIRTY_RATE_STATUS_UNSTARTED);
823     if (ret == -1) {
824         error_setg(errp, "init dirty rate calculation state failed.");
825         return;
826     }
827 
828     config.calc_time_ms = calc_time_ms;
829     config.sample_pages_per_gigabytes = sample_pages;
830     config.mode = mode;
831 
832     cleanup_dirtyrate_stat(config);
833 
834     /*
835      * update dirty rate mode so that we can figure out what mode has
836      * been used in last calculation
837      **/
838     dirtyrate_mode = mode;
839 
840     init_dirtyrate_stat(config);
841 
842     qemu_thread_create(&thread, "get_dirtyrate", get_dirtyrate_thread,
843                        (void *)&config, QEMU_THREAD_DETACHED);
844 }
845 
846 
847 struct DirtyRateInfo *qmp_query_dirty_rate(bool has_calc_time_unit,
848                                            TimeUnit calc_time_unit,
849                                            Error **errp)
850 {
851     return query_dirty_rate_info(
852         has_calc_time_unit ? calc_time_unit : TIME_UNIT_SECOND);
853 }
854 
855 void hmp_info_dirty_rate(Monitor *mon, const QDict *qdict)
856 {
857     DirtyRateInfo *info = query_dirty_rate_info(TIME_UNIT_SECOND);
858 
859     monitor_printf(mon, "Status: %s\n",
860                    DirtyRateStatus_str(info->status));
861     monitor_printf(mon, "Start Time: %"PRIi64" (ms)\n",
862                    info->start_time);
863     if (info->mode == DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING) {
864         monitor_printf(mon, "Sample Pages: %"PRIu64" (per GB)\n",
865                        info->sample_pages);
866     }
867     monitor_printf(mon, "Period: %"PRIi64" (sec)\n",
868                    info->calc_time);
869     monitor_printf(mon, "Mode: %s\n",
870                    DirtyRateMeasureMode_str(info->mode));
871     monitor_printf(mon, "Dirty rate: ");
872     if (info->has_dirty_rate) {
873         monitor_printf(mon, "%"PRIi64" (MB/s)\n", info->dirty_rate);
874         if (info->has_vcpu_dirty_rate) {
875             DirtyRateVcpuList *rate, *head = info->vcpu_dirty_rate;
876             for (rate = head; rate != NULL; rate = rate->next) {
877                 monitor_printf(mon, "vcpu[%"PRIi64"], Dirty rate: %"PRIi64
878                                " (MB/s)\n", rate->value->id,
879                                rate->value->dirty_rate);
880             }
881         }
882     } else {
883         monitor_printf(mon, "(not ready)\n");
884     }
885 
886     qapi_free_DirtyRateVcpuList(info->vcpu_dirty_rate);
887     g_free(info);
888 }
889 
890 void hmp_calc_dirty_rate(Monitor *mon, const QDict *qdict)
891 {
892     int64_t sec = qdict_get_try_int(qdict, "second", 0);
893     int64_t sample_pages = qdict_get_try_int(qdict, "sample_pages_per_GB", -1);
894     bool has_sample_pages = (sample_pages != -1);
895     bool dirty_ring = qdict_get_try_bool(qdict, "dirty_ring", false);
896     bool dirty_bitmap = qdict_get_try_bool(qdict, "dirty_bitmap", false);
897     DirtyRateMeasureMode mode = DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING;
898     Error *err = NULL;
899 
900     if (!sec) {
901         monitor_printf(mon, "Incorrect period length specified!\n");
902         return;
903     }
904 
905     if (dirty_ring && dirty_bitmap) {
906         monitor_printf(mon, "Either dirty ring or dirty bitmap "
907                        "can be specified!\n");
908         return;
909     }
910 
911     if (dirty_bitmap) {
912         mode = DIRTY_RATE_MEASURE_MODE_DIRTY_BITMAP;
913     } else if (dirty_ring) {
914         mode = DIRTY_RATE_MEASURE_MODE_DIRTY_RING;
915     }
916 
917     qmp_calc_dirty_rate(sec, /* calc-time */
918                         false, TIME_UNIT_SECOND, /* calc-time-unit */
919                         has_sample_pages, sample_pages,
920                         true, mode,
921                         &err);
922     if (err) {
923         hmp_handle_error(mon, err);
924         return;
925     }
926 
927     monitor_printf(mon, "Starting dirty rate measurement with period %"PRIi64
928                    " seconds\n", sec);
929     monitor_printf(mon, "[Please use 'info dirty_rate' to check results]\n");
930 }
931