2 * Dirtyrate implement code
4 * Copyright (c) 2020 HUAWEI TECHNOLOGIES CO.,LTD.
7 * Chuan Zheng <zhengchuan@huawei.com>
9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
10 * See the COPYING file in the top-level directory.
13 #include "qemu/osdep.h"
15 #include "qapi/error.h"
17 #include "exec/ramblock.h"
18 #include "qemu/rcu_queue.h"
19 #include "qemu/main-loop.h"
20 #include "qapi/qapi-commands-migration.h"
23 #include "dirtyrate.h"
24 #include "monitor/hmp.h"
25 #include "monitor/monitor.h"
26 #include "qapi/qmp/qdict.h"
27 #include "sysemu/kvm.h"
28 #include "sysemu/runstate.h"
29 #include "exec/memory.h"
31 typedef struct DirtyPageRecord
{
36 static int CalculatingState
= DIRTY_RATE_STATUS_UNSTARTED
;
37 static struct DirtyRateStat DirtyStat
;
38 static DirtyRateMeasureMode dirtyrate_mode
=
39 DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING
;
41 static int64_t set_sample_page_period(int64_t msec
, int64_t initial_time
)
45 current_time
= qemu_clock_get_ms(QEMU_CLOCK_REALTIME
);
46 if ((current_time
- initial_time
) >= msec
) {
47 msec
= current_time
- initial_time
;
49 g_usleep((msec
+ initial_time
- current_time
) * 1000);
55 static bool is_sample_period_valid(int64_t sec
)
57 if (sec
< MIN_FETCH_DIRTYRATE_TIME_SEC
||
58 sec
> MAX_FETCH_DIRTYRATE_TIME_SEC
) {
65 static bool is_sample_pages_valid(int64_t pages
)
67 return pages
>= MIN_SAMPLE_PAGE_COUNT
&&
68 pages
<= MAX_SAMPLE_PAGE_COUNT
;
71 static int dirtyrate_set_state(int *state
, int old_state
, int new_state
)
73 assert(new_state
< DIRTY_RATE_STATUS__MAX
);
74 trace_dirtyrate_set_state(DirtyRateStatus_str(new_state
));
75 if (qatomic_cmpxchg(state
, old_state
, new_state
) == old_state
) {
82 static struct DirtyRateInfo
*query_dirty_rate_info(void)
85 int64_t dirty_rate
= DirtyStat
.dirty_rate
;
86 struct DirtyRateInfo
*info
= g_malloc0(sizeof(DirtyRateInfo
));
87 DirtyRateVcpuList
*head
= NULL
, **tail
= &head
;
89 info
->status
= CalculatingState
;
90 info
->start_time
= DirtyStat
.start_time
;
91 info
->calc_time
= DirtyStat
.calc_time
;
92 info
->sample_pages
= DirtyStat
.sample_pages
;
93 info
->mode
= dirtyrate_mode
;
95 if (qatomic_read(&CalculatingState
) == DIRTY_RATE_STATUS_MEASURED
) {
96 info
->has_dirty_rate
= true;
97 info
->dirty_rate
= dirty_rate
;
99 if (dirtyrate_mode
== DIRTY_RATE_MEASURE_MODE_DIRTY_RING
) {
101 * set sample_pages with 0 to indicate page sampling
104 info
->sample_pages
= 0;
105 info
->has_vcpu_dirty_rate
= true;
106 for (i
= 0; i
< DirtyStat
.dirty_ring
.nvcpu
; i
++) {
107 DirtyRateVcpu
*rate
= g_malloc0(sizeof(DirtyRateVcpu
));
108 rate
->id
= DirtyStat
.dirty_ring
.rates
[i
].id
;
109 rate
->dirty_rate
= DirtyStat
.dirty_ring
.rates
[i
].dirty_rate
;
110 QAPI_LIST_APPEND(tail
, rate
);
112 info
->vcpu_dirty_rate
= head
;
116 trace_query_dirty_rate_info(DirtyRateStatus_str(CalculatingState
));
121 static void init_dirtyrate_stat(int64_t start_time
,
122 struct DirtyRateConfig config
)
124 DirtyStat
.dirty_rate
= -1;
125 DirtyStat
.start_time
= start_time
;
126 DirtyStat
.calc_time
= config
.sample_period_seconds
;
127 DirtyStat
.sample_pages
= config
.sample_pages_per_gigabytes
;
129 switch (config
.mode
) {
130 case DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING
:
131 DirtyStat
.page_sampling
.total_dirty_samples
= 0;
132 DirtyStat
.page_sampling
.total_sample_count
= 0;
133 DirtyStat
.page_sampling
.total_block_mem_MB
= 0;
135 case DIRTY_RATE_MEASURE_MODE_DIRTY_RING
:
136 DirtyStat
.dirty_ring
.nvcpu
= -1;
137 DirtyStat
.dirty_ring
.rates
= NULL
;
144 static void cleanup_dirtyrate_stat(struct DirtyRateConfig config
)
146 /* last calc-dirty-rate qmp use dirty ring mode */
147 if (dirtyrate_mode
== DIRTY_RATE_MEASURE_MODE_DIRTY_RING
) {
148 free(DirtyStat
.dirty_ring
.rates
);
149 DirtyStat
.dirty_ring
.rates
= NULL
;
153 static void update_dirtyrate_stat(struct RamblockDirtyInfo
*info
)
155 DirtyStat
.page_sampling
.total_dirty_samples
+= info
->sample_dirty_count
;
156 DirtyStat
.page_sampling
.total_sample_count
+= info
->sample_pages_count
;
157 /* size of total pages in MB */
158 DirtyStat
.page_sampling
.total_block_mem_MB
+= (info
->ramblock_pages
*
159 TARGET_PAGE_SIZE
) >> 20;
162 static void update_dirtyrate(uint64_t msec
)
165 uint64_t total_dirty_samples
= DirtyStat
.page_sampling
.total_dirty_samples
;
166 uint64_t total_sample_count
= DirtyStat
.page_sampling
.total_sample_count
;
167 uint64_t total_block_mem_MB
= DirtyStat
.page_sampling
.total_block_mem_MB
;
169 dirtyrate
= total_dirty_samples
* total_block_mem_MB
*
170 1000 / (total_sample_count
* msec
);
172 DirtyStat
.dirty_rate
= dirtyrate
;
176 * get hash result for the sampled memory with length of TARGET_PAGE_SIZE
177 * in ramblock, which starts from ramblock base address.
179 static uint32_t get_ramblock_vfn_hash(struct RamblockDirtyInfo
*info
,
184 crc
= crc32(0, (info
->ramblock_addr
+
185 vfn
* TARGET_PAGE_SIZE
), TARGET_PAGE_SIZE
);
187 trace_get_ramblock_vfn_hash(info
->idstr
, vfn
, crc
);
191 static bool save_ramblock_hash(struct RamblockDirtyInfo
*info
)
193 unsigned int sample_pages_count
;
197 sample_pages_count
= info
->sample_pages_count
;
199 /* ramblock size less than one page, return success to skip this ramblock */
200 if (unlikely(info
->ramblock_pages
== 0 || sample_pages_count
== 0)) {
204 info
->hash_result
= g_try_malloc0_n(sample_pages_count
,
206 if (!info
->hash_result
) {
210 info
->sample_page_vfn
= g_try_malloc0_n(sample_pages_count
,
212 if (!info
->sample_page_vfn
) {
213 g_free(info
->hash_result
);
218 for (i
= 0; i
< sample_pages_count
; i
++) {
219 info
->sample_page_vfn
[i
] = g_rand_int_range(rand
, 0,
220 info
->ramblock_pages
- 1);
221 info
->hash_result
[i
] = get_ramblock_vfn_hash(info
,
222 info
->sample_page_vfn
[i
]);
229 static void get_ramblock_dirty_info(RAMBlock
*block
,
230 struct RamblockDirtyInfo
*info
,
231 struct DirtyRateConfig
*config
)
233 uint64_t sample_pages_per_gigabytes
= config
->sample_pages_per_gigabytes
;
235 /* Right shift 30 bits to calc ramblock size in GB */
236 info
->sample_pages_count
= (qemu_ram_get_used_length(block
) *
237 sample_pages_per_gigabytes
) >> 30;
238 /* Right shift TARGET_PAGE_BITS to calc page count */
239 info
->ramblock_pages
= qemu_ram_get_used_length(block
) >>
241 info
->ramblock_addr
= qemu_ram_get_host_addr(block
);
242 strcpy(info
->idstr
, qemu_ram_get_idstr(block
));
245 static void free_ramblock_dirty_info(struct RamblockDirtyInfo
*infos
, int count
)
253 for (i
= 0; i
< count
; i
++) {
254 g_free(infos
[i
].sample_page_vfn
);
255 g_free(infos
[i
].hash_result
);
260 static bool skip_sample_ramblock(RAMBlock
*block
)
263 * Sample only blocks larger than MIN_RAMBLOCK_SIZE.
265 if (qemu_ram_get_used_length(block
) < (MIN_RAMBLOCK_SIZE
<< 10)) {
266 trace_skip_sample_ramblock(block
->idstr
,
267 qemu_ram_get_used_length(block
));
274 static bool record_ramblock_hash_info(struct RamblockDirtyInfo
**block_dinfo
,
275 struct DirtyRateConfig config
,
278 struct RamblockDirtyInfo
*info
= NULL
;
279 struct RamblockDirtyInfo
*dinfo
= NULL
;
280 RAMBlock
*block
= NULL
;
285 RAMBLOCK_FOREACH_MIGRATABLE(block
) {
286 if (skip_sample_ramblock(block
)) {
292 dinfo
= g_try_malloc0_n(total_count
, sizeof(struct RamblockDirtyInfo
));
297 RAMBLOCK_FOREACH_MIGRATABLE(block
) {
298 if (skip_sample_ramblock(block
)) {
301 if (index
>= total_count
) {
304 info
= &dinfo
[index
];
305 get_ramblock_dirty_info(block
, info
, &config
);
306 if (!save_ramblock_hash(info
)) {
314 *block_count
= index
;
315 *block_dinfo
= dinfo
;
319 static void calc_page_dirty_rate(struct RamblockDirtyInfo
*info
)
324 for (i
= 0; i
< info
->sample_pages_count
; i
++) {
325 crc
= get_ramblock_vfn_hash(info
, info
->sample_page_vfn
[i
]);
326 if (crc
!= info
->hash_result
[i
]) {
327 trace_calc_page_dirty_rate(info
->idstr
, crc
, info
->hash_result
[i
]);
328 info
->sample_dirty_count
++;
333 static struct RamblockDirtyInfo
*
334 find_block_matched(RAMBlock
*block
, int count
,
335 struct RamblockDirtyInfo
*infos
)
338 struct RamblockDirtyInfo
*matched
;
340 for (i
= 0; i
< count
; i
++) {
341 if (!strcmp(infos
[i
].idstr
, qemu_ram_get_idstr(block
))) {
350 if (infos
[i
].ramblock_addr
!= qemu_ram_get_host_addr(block
) ||
351 infos
[i
].ramblock_pages
!=
352 (qemu_ram_get_used_length(block
) >> TARGET_PAGE_BITS
)) {
353 trace_find_page_matched(block
->idstr
);
362 static bool compare_page_hash_info(struct RamblockDirtyInfo
*info
,
365 struct RamblockDirtyInfo
*block_dinfo
= NULL
;
366 RAMBlock
*block
= NULL
;
368 RAMBLOCK_FOREACH_MIGRATABLE(block
) {
369 if (skip_sample_ramblock(block
)) {
372 block_dinfo
= find_block_matched(block
, block_count
, info
);
373 if (block_dinfo
== NULL
) {
376 calc_page_dirty_rate(block_dinfo
);
377 update_dirtyrate_stat(block_dinfo
);
380 if (DirtyStat
.page_sampling
.total_sample_count
== 0) {
387 static inline void record_dirtypages(DirtyPageRecord
*dirty_pages
,
388 CPUState
*cpu
, bool start
)
391 dirty_pages
[cpu
->cpu_index
].start_pages
= cpu
->dirty_pages
;
393 dirty_pages
[cpu
->cpu_index
].end_pages
= cpu
->dirty_pages
;
397 static void dirtyrate_global_dirty_log_start(void)
399 qemu_mutex_lock_iothread();
400 memory_global_dirty_log_start(GLOBAL_DIRTY_DIRTY_RATE
);
401 qemu_mutex_unlock_iothread();
404 static void dirtyrate_global_dirty_log_stop(void)
406 qemu_mutex_lock_iothread();
407 memory_global_dirty_log_sync();
408 memory_global_dirty_log_stop(GLOBAL_DIRTY_DIRTY_RATE
);
409 qemu_mutex_unlock_iothread();
412 static int64_t do_calculate_dirtyrate_vcpu(DirtyPageRecord dirty_pages
)
414 uint64_t memory_size_MB
;
416 uint64_t increased_dirty_pages
=
417 dirty_pages
.end_pages
- dirty_pages
.start_pages
;
419 memory_size_MB
= (increased_dirty_pages
* TARGET_PAGE_SIZE
) >> 20;
420 time_s
= DirtyStat
.calc_time
;
422 return memory_size_MB
/ time_s
;
425 static void calculate_dirtyrate_dirty_ring(struct DirtyRateConfig config
)
430 uint64_t dirtyrate
= 0;
431 uint64_t dirtyrate_sum
= 0;
432 DirtyPageRecord
*dirty_pages
;
440 dirty_pages
= malloc(sizeof(*dirty_pages
) * nvcpu
);
442 DirtyStat
.dirty_ring
.nvcpu
= nvcpu
;
443 DirtyStat
.dirty_ring
.rates
= malloc(sizeof(DirtyRateVcpu
) * nvcpu
);
445 dirtyrate_global_dirty_log_start();
448 record_dirtypages(dirty_pages
, cpu
, true);
451 start_time
= qemu_clock_get_ms(QEMU_CLOCK_REALTIME
);
452 DirtyStat
.start_time
= start_time
/ 1000;
454 msec
= config
.sample_period_seconds
* 1000;
455 msec
= set_sample_page_period(msec
, start_time
);
456 DirtyStat
.calc_time
= msec
/ 1000;
458 dirtyrate_global_dirty_log_stop();
461 record_dirtypages(dirty_pages
, cpu
, false);
464 for (i
= 0; i
< DirtyStat
.dirty_ring
.nvcpu
; i
++) {
465 dirtyrate
= do_calculate_dirtyrate_vcpu(dirty_pages
[i
]);
466 trace_dirtyrate_do_calculate_vcpu(i
, dirtyrate
);
468 DirtyStat
.dirty_ring
.rates
[i
].id
= i
;
469 DirtyStat
.dirty_ring
.rates
[i
].dirty_rate
= dirtyrate
;
470 dirtyrate_sum
+= dirtyrate
;
473 DirtyStat
.dirty_rate
= dirtyrate_sum
;
477 static void calculate_dirtyrate_sample_vm(struct DirtyRateConfig config
)
479 struct RamblockDirtyInfo
*block_dinfo
= NULL
;
482 int64_t initial_time
;
485 initial_time
= qemu_clock_get_ms(QEMU_CLOCK_REALTIME
);
486 if (!record_ramblock_hash_info(&block_dinfo
, config
, &block_count
)) {
491 msec
= config
.sample_period_seconds
* 1000;
492 msec
= set_sample_page_period(msec
, initial_time
);
493 DirtyStat
.start_time
= initial_time
/ 1000;
494 DirtyStat
.calc_time
= msec
/ 1000;
497 if (!compare_page_hash_info(block_dinfo
, block_count
)) {
501 update_dirtyrate(msec
);
505 free_ramblock_dirty_info(block_dinfo
, block_count
);
508 static void calculate_dirtyrate(struct DirtyRateConfig config
)
510 if (config
.mode
== DIRTY_RATE_MEASURE_MODE_DIRTY_RING
) {
511 calculate_dirtyrate_dirty_ring(config
);
513 calculate_dirtyrate_sample_vm(config
);
516 trace_dirtyrate_calculate(DirtyStat
.dirty_rate
);
519 void *get_dirtyrate_thread(void *arg
)
521 struct DirtyRateConfig config
= *(struct DirtyRateConfig
*)arg
;
523 rcu_register_thread();
525 ret
= dirtyrate_set_state(&CalculatingState
, DIRTY_RATE_STATUS_UNSTARTED
,
526 DIRTY_RATE_STATUS_MEASURING
);
528 error_report("change dirtyrate state failed.");
532 calculate_dirtyrate(config
);
534 ret
= dirtyrate_set_state(&CalculatingState
, DIRTY_RATE_STATUS_MEASURING
,
535 DIRTY_RATE_STATUS_MEASURED
);
537 error_report("change dirtyrate state failed.");
540 rcu_unregister_thread();
544 void qmp_calc_dirty_rate(int64_t calc_time
,
545 bool has_sample_pages
,
546 int64_t sample_pages
,
548 DirtyRateMeasureMode mode
,
551 static struct DirtyRateConfig config
;
557 * If the dirty rate is already being measured, don't attempt to start.
559 if (qatomic_read(&CalculatingState
) == DIRTY_RATE_STATUS_MEASURING
) {
560 error_setg(errp
, "the dirty rate is already being measured.");
564 if (!is_sample_period_valid(calc_time
)) {
565 error_setg(errp
, "calc-time is out of range[%d, %d].",
566 MIN_FETCH_DIRTYRATE_TIME_SEC
,
567 MAX_FETCH_DIRTYRATE_TIME_SEC
);
572 mode
= DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING
;
575 if (has_sample_pages
&& mode
== DIRTY_RATE_MEASURE_MODE_DIRTY_RING
) {
576 error_setg(errp
, "either sample-pages or dirty-ring can be specified.");
580 if (has_sample_pages
) {
581 if (!is_sample_pages_valid(sample_pages
)) {
582 error_setg(errp
, "sample-pages is out of range[%d, %d].",
583 MIN_SAMPLE_PAGE_COUNT
,
584 MAX_SAMPLE_PAGE_COUNT
);
588 sample_pages
= DIRTYRATE_DEFAULT_SAMPLE_PAGES
;
592 * dirty ring mode only works when kvm dirty ring is enabled.
594 if ((mode
== DIRTY_RATE_MEASURE_MODE_DIRTY_RING
) &&
595 !kvm_dirty_ring_enabled()) {
596 error_setg(errp
, "dirty ring is disabled, use sample-pages method "
597 "or remeasure later.");
602 * Init calculation state as unstarted.
604 ret
= dirtyrate_set_state(&CalculatingState
, CalculatingState
,
605 DIRTY_RATE_STATUS_UNSTARTED
);
607 error_setg(errp
, "init dirty rate calculation state failed.");
611 config
.sample_period_seconds
= calc_time
;
612 config
.sample_pages_per_gigabytes
= sample_pages
;
615 cleanup_dirtyrate_stat(config
);
618 * update dirty rate mode so that we can figure out what mode has
619 * been used in last calculation
621 dirtyrate_mode
= mode
;
623 start_time
= qemu_clock_get_ms(QEMU_CLOCK_REALTIME
) / 1000;
624 init_dirtyrate_stat(start_time
, config
);
626 qemu_thread_create(&thread
, "get_dirtyrate", get_dirtyrate_thread
,
627 (void *)&config
, QEMU_THREAD_DETACHED
);
630 struct DirtyRateInfo
*qmp_query_dirty_rate(Error
**errp
)
632 return query_dirty_rate_info();
635 void hmp_info_dirty_rate(Monitor
*mon
, const QDict
*qdict
)
637 DirtyRateInfo
*info
= query_dirty_rate_info();
639 monitor_printf(mon
, "Status: %s\n",
640 DirtyRateStatus_str(info
->status
));
641 monitor_printf(mon
, "Start Time: %"PRIi64
" (ms)\n",
643 monitor_printf(mon
, "Sample Pages: %"PRIu64
" (per GB)\n",
645 monitor_printf(mon
, "Period: %"PRIi64
" (sec)\n",
647 monitor_printf(mon
, "Mode: %s\n",
648 DirtyRateMeasureMode_str(info
->mode
));
649 monitor_printf(mon
, "Dirty rate: ");
650 if (info
->has_dirty_rate
) {
651 monitor_printf(mon
, "%"PRIi64
" (MB/s)\n", info
->dirty_rate
);
652 if (info
->has_vcpu_dirty_rate
) {
653 DirtyRateVcpuList
*rate
, *head
= info
->vcpu_dirty_rate
;
654 for (rate
= head
; rate
!= NULL
; rate
= rate
->next
) {
655 monitor_printf(mon
, "vcpu[%"PRIi64
"], Dirty rate: %"PRIi64
656 " (MB/s)\n", rate
->value
->id
,
657 rate
->value
->dirty_rate
);
661 monitor_printf(mon
, "(not ready)\n");
664 qapi_free_DirtyRateVcpuList(info
->vcpu_dirty_rate
);
668 void hmp_calc_dirty_rate(Monitor
*mon
, const QDict
*qdict
)
670 int64_t sec
= qdict_get_try_int(qdict
, "second", 0);
671 int64_t sample_pages
= qdict_get_try_int(qdict
, "sample_pages_per_GB", -1);
672 bool has_sample_pages
= (sample_pages
!= -1);
673 bool dirty_ring
= qdict_get_try_bool(qdict
, "dirty_ring", false);
674 DirtyRateMeasureMode mode
=
675 (dirty_ring
? DIRTY_RATE_MEASURE_MODE_DIRTY_RING
:
676 DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING
);
680 monitor_printf(mon
, "Incorrect period length specified!\n");
684 qmp_calc_dirty_rate(sec
, has_sample_pages
, sample_pages
, true,
687 hmp_handle_error(mon
, err
);
691 monitor_printf(mon
, "Starting dirty rate measurement with period %"PRIi64
693 monitor_printf(mon
, "[Please use 'info dirty_rate' to check results]\n");