virtio-mem: Drop precopy notifier
[qemu/kevin.git] / migration / dirtyrate.c
blobf92c4b498e4a4eefb23bdb4bc0f07cb384332e0a
1 /*
2 * Dirtyrate implement code
4 * Copyright (c) 2020 HUAWEI TECHNOLOGIES CO.,LTD.
6 * Authors:
7 * Chuan Zheng <zhengchuan@huawei.com>
9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
10 * See the COPYING file in the top-level directory.
13 #include "qemu/osdep.h"
14 #include <zlib.h>
15 #include "qapi/error.h"
16 #include "cpu.h"
17 #include "exec/ramblock.h"
18 #include "qemu/rcu_queue.h"
19 #include "qemu/main-loop.h"
20 #include "qapi/qapi-commands-migration.h"
21 #include "ram.h"
22 #include "trace.h"
23 #include "dirtyrate.h"
24 #include "monitor/hmp.h"
25 #include "monitor/monitor.h"
26 #include "qapi/qmp/qdict.h"
27 #include "sysemu/kvm.h"
28 #include "sysemu/runstate.h"
29 #include "exec/memory.h"
31 typedef struct DirtyPageRecord {
32 uint64_t start_pages;
33 uint64_t end_pages;
34 } DirtyPageRecord;
36 static int CalculatingState = DIRTY_RATE_STATUS_UNSTARTED;
37 static struct DirtyRateStat DirtyStat;
38 static DirtyRateMeasureMode dirtyrate_mode =
39 DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING;
41 static int64_t set_sample_page_period(int64_t msec, int64_t initial_time)
43 int64_t current_time;
45 current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
46 if ((current_time - initial_time) >= msec) {
47 msec = current_time - initial_time;
48 } else {
49 g_usleep((msec + initial_time - current_time) * 1000);
52 return msec;
55 static bool is_sample_period_valid(int64_t sec)
57 if (sec < MIN_FETCH_DIRTYRATE_TIME_SEC ||
58 sec > MAX_FETCH_DIRTYRATE_TIME_SEC) {
59 return false;
62 return true;
65 static bool is_sample_pages_valid(int64_t pages)
67 return pages >= MIN_SAMPLE_PAGE_COUNT &&
68 pages <= MAX_SAMPLE_PAGE_COUNT;
71 static int dirtyrate_set_state(int *state, int old_state, int new_state)
73 assert(new_state < DIRTY_RATE_STATUS__MAX);
74 trace_dirtyrate_set_state(DirtyRateStatus_str(new_state));
75 if (qatomic_cmpxchg(state, old_state, new_state) == old_state) {
76 return 0;
77 } else {
78 return -1;
82 static struct DirtyRateInfo *query_dirty_rate_info(void)
84 int i;
85 int64_t dirty_rate = DirtyStat.dirty_rate;
86 struct DirtyRateInfo *info = g_malloc0(sizeof(DirtyRateInfo));
87 DirtyRateVcpuList *head = NULL, **tail = &head;
89 info->status = CalculatingState;
90 info->start_time = DirtyStat.start_time;
91 info->calc_time = DirtyStat.calc_time;
92 info->sample_pages = DirtyStat.sample_pages;
93 info->mode = dirtyrate_mode;
95 if (qatomic_read(&CalculatingState) == DIRTY_RATE_STATUS_MEASURED) {
96 info->has_dirty_rate = true;
97 info->dirty_rate = dirty_rate;
99 if (dirtyrate_mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) {
101 * set sample_pages with 0 to indicate page sampling
102 * isn't enabled
104 info->sample_pages = 0;
105 info->has_vcpu_dirty_rate = true;
106 for (i = 0; i < DirtyStat.dirty_ring.nvcpu; i++) {
107 DirtyRateVcpu *rate = g_malloc0(sizeof(DirtyRateVcpu));
108 rate->id = DirtyStat.dirty_ring.rates[i].id;
109 rate->dirty_rate = DirtyStat.dirty_ring.rates[i].dirty_rate;
110 QAPI_LIST_APPEND(tail, rate);
112 info->vcpu_dirty_rate = head;
116 trace_query_dirty_rate_info(DirtyRateStatus_str(CalculatingState));
118 return info;
121 static void init_dirtyrate_stat(int64_t start_time,
122 struct DirtyRateConfig config)
124 DirtyStat.dirty_rate = -1;
125 DirtyStat.start_time = start_time;
126 DirtyStat.calc_time = config.sample_period_seconds;
127 DirtyStat.sample_pages = config.sample_pages_per_gigabytes;
129 switch (config.mode) {
130 case DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING:
131 DirtyStat.page_sampling.total_dirty_samples = 0;
132 DirtyStat.page_sampling.total_sample_count = 0;
133 DirtyStat.page_sampling.total_block_mem_MB = 0;
134 break;
135 case DIRTY_RATE_MEASURE_MODE_DIRTY_RING:
136 DirtyStat.dirty_ring.nvcpu = -1;
137 DirtyStat.dirty_ring.rates = NULL;
138 break;
139 default:
140 break;
144 static void cleanup_dirtyrate_stat(struct DirtyRateConfig config)
146 /* last calc-dirty-rate qmp use dirty ring mode */
147 if (dirtyrate_mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) {
148 free(DirtyStat.dirty_ring.rates);
149 DirtyStat.dirty_ring.rates = NULL;
153 static void update_dirtyrate_stat(struct RamblockDirtyInfo *info)
155 DirtyStat.page_sampling.total_dirty_samples += info->sample_dirty_count;
156 DirtyStat.page_sampling.total_sample_count += info->sample_pages_count;
157 /* size of total pages in MB */
158 DirtyStat.page_sampling.total_block_mem_MB += (info->ramblock_pages *
159 TARGET_PAGE_SIZE) >> 20;
162 static void update_dirtyrate(uint64_t msec)
164 uint64_t dirtyrate;
165 uint64_t total_dirty_samples = DirtyStat.page_sampling.total_dirty_samples;
166 uint64_t total_sample_count = DirtyStat.page_sampling.total_sample_count;
167 uint64_t total_block_mem_MB = DirtyStat.page_sampling.total_block_mem_MB;
169 dirtyrate = total_dirty_samples * total_block_mem_MB *
170 1000 / (total_sample_count * msec);
172 DirtyStat.dirty_rate = dirtyrate;
176 * get hash result for the sampled memory with length of TARGET_PAGE_SIZE
177 * in ramblock, which starts from ramblock base address.
179 static uint32_t get_ramblock_vfn_hash(struct RamblockDirtyInfo *info,
180 uint64_t vfn)
182 uint32_t crc;
184 crc = crc32(0, (info->ramblock_addr +
185 vfn * TARGET_PAGE_SIZE), TARGET_PAGE_SIZE);
187 trace_get_ramblock_vfn_hash(info->idstr, vfn, crc);
188 return crc;
191 static bool save_ramblock_hash(struct RamblockDirtyInfo *info)
193 unsigned int sample_pages_count;
194 int i;
195 GRand *rand;
197 sample_pages_count = info->sample_pages_count;
199 /* ramblock size less than one page, return success to skip this ramblock */
200 if (unlikely(info->ramblock_pages == 0 || sample_pages_count == 0)) {
201 return true;
204 info->hash_result = g_try_malloc0_n(sample_pages_count,
205 sizeof(uint32_t));
206 if (!info->hash_result) {
207 return false;
210 info->sample_page_vfn = g_try_malloc0_n(sample_pages_count,
211 sizeof(uint64_t));
212 if (!info->sample_page_vfn) {
213 g_free(info->hash_result);
214 return false;
217 rand = g_rand_new();
218 for (i = 0; i < sample_pages_count; i++) {
219 info->sample_page_vfn[i] = g_rand_int_range(rand, 0,
220 info->ramblock_pages - 1);
221 info->hash_result[i] = get_ramblock_vfn_hash(info,
222 info->sample_page_vfn[i]);
224 g_rand_free(rand);
226 return true;
229 static void get_ramblock_dirty_info(RAMBlock *block,
230 struct RamblockDirtyInfo *info,
231 struct DirtyRateConfig *config)
233 uint64_t sample_pages_per_gigabytes = config->sample_pages_per_gigabytes;
235 /* Right shift 30 bits to calc ramblock size in GB */
236 info->sample_pages_count = (qemu_ram_get_used_length(block) *
237 sample_pages_per_gigabytes) >> 30;
238 /* Right shift TARGET_PAGE_BITS to calc page count */
239 info->ramblock_pages = qemu_ram_get_used_length(block) >>
240 TARGET_PAGE_BITS;
241 info->ramblock_addr = qemu_ram_get_host_addr(block);
242 strcpy(info->idstr, qemu_ram_get_idstr(block));
245 static void free_ramblock_dirty_info(struct RamblockDirtyInfo *infos, int count)
247 int i;
249 if (!infos) {
250 return;
253 for (i = 0; i < count; i++) {
254 g_free(infos[i].sample_page_vfn);
255 g_free(infos[i].hash_result);
257 g_free(infos);
260 static bool skip_sample_ramblock(RAMBlock *block)
263 * Sample only blocks larger than MIN_RAMBLOCK_SIZE.
265 if (qemu_ram_get_used_length(block) < (MIN_RAMBLOCK_SIZE << 10)) {
266 trace_skip_sample_ramblock(block->idstr,
267 qemu_ram_get_used_length(block));
268 return true;
271 return false;
274 static bool record_ramblock_hash_info(struct RamblockDirtyInfo **block_dinfo,
275 struct DirtyRateConfig config,
276 int *block_count)
278 struct RamblockDirtyInfo *info = NULL;
279 struct RamblockDirtyInfo *dinfo = NULL;
280 RAMBlock *block = NULL;
281 int total_count = 0;
282 int index = 0;
283 bool ret = false;
285 RAMBLOCK_FOREACH_MIGRATABLE(block) {
286 if (skip_sample_ramblock(block)) {
287 continue;
289 total_count++;
292 dinfo = g_try_malloc0_n(total_count, sizeof(struct RamblockDirtyInfo));
293 if (dinfo == NULL) {
294 goto out;
297 RAMBLOCK_FOREACH_MIGRATABLE(block) {
298 if (skip_sample_ramblock(block)) {
299 continue;
301 if (index >= total_count) {
302 break;
304 info = &dinfo[index];
305 get_ramblock_dirty_info(block, info, &config);
306 if (!save_ramblock_hash(info)) {
307 goto out;
309 index++;
311 ret = true;
313 out:
314 *block_count = index;
315 *block_dinfo = dinfo;
316 return ret;
319 static void calc_page_dirty_rate(struct RamblockDirtyInfo *info)
321 uint32_t crc;
322 int i;
324 for (i = 0; i < info->sample_pages_count; i++) {
325 crc = get_ramblock_vfn_hash(info, info->sample_page_vfn[i]);
326 if (crc != info->hash_result[i]) {
327 trace_calc_page_dirty_rate(info->idstr, crc, info->hash_result[i]);
328 info->sample_dirty_count++;
333 static struct RamblockDirtyInfo *
334 find_block_matched(RAMBlock *block, int count,
335 struct RamblockDirtyInfo *infos)
337 int i;
338 struct RamblockDirtyInfo *matched;
340 for (i = 0; i < count; i++) {
341 if (!strcmp(infos[i].idstr, qemu_ram_get_idstr(block))) {
342 break;
346 if (i == count) {
347 return NULL;
350 if (infos[i].ramblock_addr != qemu_ram_get_host_addr(block) ||
351 infos[i].ramblock_pages !=
352 (qemu_ram_get_used_length(block) >> TARGET_PAGE_BITS)) {
353 trace_find_page_matched(block->idstr);
354 return NULL;
357 matched = &infos[i];
359 return matched;
362 static bool compare_page_hash_info(struct RamblockDirtyInfo *info,
363 int block_count)
365 struct RamblockDirtyInfo *block_dinfo = NULL;
366 RAMBlock *block = NULL;
368 RAMBLOCK_FOREACH_MIGRATABLE(block) {
369 if (skip_sample_ramblock(block)) {
370 continue;
372 block_dinfo = find_block_matched(block, block_count, info);
373 if (block_dinfo == NULL) {
374 continue;
376 calc_page_dirty_rate(block_dinfo);
377 update_dirtyrate_stat(block_dinfo);
380 if (DirtyStat.page_sampling.total_sample_count == 0) {
381 return false;
384 return true;
387 static inline void record_dirtypages(DirtyPageRecord *dirty_pages,
388 CPUState *cpu, bool start)
390 if (start) {
391 dirty_pages[cpu->cpu_index].start_pages = cpu->dirty_pages;
392 } else {
393 dirty_pages[cpu->cpu_index].end_pages = cpu->dirty_pages;
397 static void dirtyrate_global_dirty_log_start(void)
399 qemu_mutex_lock_iothread();
400 memory_global_dirty_log_start(GLOBAL_DIRTY_DIRTY_RATE);
401 qemu_mutex_unlock_iothread();
404 static void dirtyrate_global_dirty_log_stop(void)
406 qemu_mutex_lock_iothread();
407 memory_global_dirty_log_sync();
408 memory_global_dirty_log_stop(GLOBAL_DIRTY_DIRTY_RATE);
409 qemu_mutex_unlock_iothread();
412 static int64_t do_calculate_dirtyrate_vcpu(DirtyPageRecord dirty_pages)
414 uint64_t memory_size_MB;
415 int64_t time_s;
416 uint64_t increased_dirty_pages =
417 dirty_pages.end_pages - dirty_pages.start_pages;
419 memory_size_MB = (increased_dirty_pages * TARGET_PAGE_SIZE) >> 20;
420 time_s = DirtyStat.calc_time;
422 return memory_size_MB / time_s;
425 static void calculate_dirtyrate_dirty_ring(struct DirtyRateConfig config)
427 CPUState *cpu;
428 int64_t msec = 0;
429 int64_t start_time;
430 uint64_t dirtyrate = 0;
431 uint64_t dirtyrate_sum = 0;
432 DirtyPageRecord *dirty_pages;
433 int nvcpu = 0;
434 int i = 0;
436 CPU_FOREACH(cpu) {
437 nvcpu++;
440 dirty_pages = malloc(sizeof(*dirty_pages) * nvcpu);
442 DirtyStat.dirty_ring.nvcpu = nvcpu;
443 DirtyStat.dirty_ring.rates = malloc(sizeof(DirtyRateVcpu) * nvcpu);
445 dirtyrate_global_dirty_log_start();
447 CPU_FOREACH(cpu) {
448 record_dirtypages(dirty_pages, cpu, true);
451 start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
452 DirtyStat.start_time = start_time / 1000;
454 msec = config.sample_period_seconds * 1000;
455 msec = set_sample_page_period(msec, start_time);
456 DirtyStat.calc_time = msec / 1000;
458 dirtyrate_global_dirty_log_stop();
460 CPU_FOREACH(cpu) {
461 record_dirtypages(dirty_pages, cpu, false);
464 for (i = 0; i < DirtyStat.dirty_ring.nvcpu; i++) {
465 dirtyrate = do_calculate_dirtyrate_vcpu(dirty_pages[i]);
466 trace_dirtyrate_do_calculate_vcpu(i, dirtyrate);
468 DirtyStat.dirty_ring.rates[i].id = i;
469 DirtyStat.dirty_ring.rates[i].dirty_rate = dirtyrate;
470 dirtyrate_sum += dirtyrate;
473 DirtyStat.dirty_rate = dirtyrate_sum;
474 free(dirty_pages);
477 static void calculate_dirtyrate_sample_vm(struct DirtyRateConfig config)
479 struct RamblockDirtyInfo *block_dinfo = NULL;
480 int block_count = 0;
481 int64_t msec = 0;
482 int64_t initial_time;
484 rcu_read_lock();
485 initial_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
486 if (!record_ramblock_hash_info(&block_dinfo, config, &block_count)) {
487 goto out;
489 rcu_read_unlock();
491 msec = config.sample_period_seconds * 1000;
492 msec = set_sample_page_period(msec, initial_time);
493 DirtyStat.start_time = initial_time / 1000;
494 DirtyStat.calc_time = msec / 1000;
496 rcu_read_lock();
497 if (!compare_page_hash_info(block_dinfo, block_count)) {
498 goto out;
501 update_dirtyrate(msec);
503 out:
504 rcu_read_unlock();
505 free_ramblock_dirty_info(block_dinfo, block_count);
508 static void calculate_dirtyrate(struct DirtyRateConfig config)
510 if (config.mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) {
511 calculate_dirtyrate_dirty_ring(config);
512 } else {
513 calculate_dirtyrate_sample_vm(config);
516 trace_dirtyrate_calculate(DirtyStat.dirty_rate);
519 void *get_dirtyrate_thread(void *arg)
521 struct DirtyRateConfig config = *(struct DirtyRateConfig *)arg;
522 int ret;
523 rcu_register_thread();
525 ret = dirtyrate_set_state(&CalculatingState, DIRTY_RATE_STATUS_UNSTARTED,
526 DIRTY_RATE_STATUS_MEASURING);
527 if (ret == -1) {
528 error_report("change dirtyrate state failed.");
529 return NULL;
532 calculate_dirtyrate(config);
534 ret = dirtyrate_set_state(&CalculatingState, DIRTY_RATE_STATUS_MEASURING,
535 DIRTY_RATE_STATUS_MEASURED);
536 if (ret == -1) {
537 error_report("change dirtyrate state failed.");
540 rcu_unregister_thread();
541 return NULL;
544 void qmp_calc_dirty_rate(int64_t calc_time,
545 bool has_sample_pages,
546 int64_t sample_pages,
547 bool has_mode,
548 DirtyRateMeasureMode mode,
549 Error **errp)
551 static struct DirtyRateConfig config;
552 QemuThread thread;
553 int ret;
554 int64_t start_time;
557 * If the dirty rate is already being measured, don't attempt to start.
559 if (qatomic_read(&CalculatingState) == DIRTY_RATE_STATUS_MEASURING) {
560 error_setg(errp, "the dirty rate is already being measured.");
561 return;
564 if (!is_sample_period_valid(calc_time)) {
565 error_setg(errp, "calc-time is out of range[%d, %d].",
566 MIN_FETCH_DIRTYRATE_TIME_SEC,
567 MAX_FETCH_DIRTYRATE_TIME_SEC);
568 return;
571 if (!has_mode) {
572 mode = DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING;
575 if (has_sample_pages && mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) {
576 error_setg(errp, "either sample-pages or dirty-ring can be specified.");
577 return;
580 if (has_sample_pages) {
581 if (!is_sample_pages_valid(sample_pages)) {
582 error_setg(errp, "sample-pages is out of range[%d, %d].",
583 MIN_SAMPLE_PAGE_COUNT,
584 MAX_SAMPLE_PAGE_COUNT);
585 return;
587 } else {
588 sample_pages = DIRTYRATE_DEFAULT_SAMPLE_PAGES;
592 * dirty ring mode only works when kvm dirty ring is enabled.
594 if ((mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) &&
595 !kvm_dirty_ring_enabled()) {
596 error_setg(errp, "dirty ring is disabled, use sample-pages method "
597 "or remeasure later.");
598 return;
602 * Init calculation state as unstarted.
604 ret = dirtyrate_set_state(&CalculatingState, CalculatingState,
605 DIRTY_RATE_STATUS_UNSTARTED);
606 if (ret == -1) {
607 error_setg(errp, "init dirty rate calculation state failed.");
608 return;
611 config.sample_period_seconds = calc_time;
612 config.sample_pages_per_gigabytes = sample_pages;
613 config.mode = mode;
615 cleanup_dirtyrate_stat(config);
618 * update dirty rate mode so that we can figure out what mode has
619 * been used in last calculation
621 dirtyrate_mode = mode;
623 start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) / 1000;
624 init_dirtyrate_stat(start_time, config);
626 qemu_thread_create(&thread, "get_dirtyrate", get_dirtyrate_thread,
627 (void *)&config, QEMU_THREAD_DETACHED);
630 struct DirtyRateInfo *qmp_query_dirty_rate(Error **errp)
632 return query_dirty_rate_info();
635 void hmp_info_dirty_rate(Monitor *mon, const QDict *qdict)
637 DirtyRateInfo *info = query_dirty_rate_info();
639 monitor_printf(mon, "Status: %s\n",
640 DirtyRateStatus_str(info->status));
641 monitor_printf(mon, "Start Time: %"PRIi64" (ms)\n",
642 info->start_time);
643 monitor_printf(mon, "Sample Pages: %"PRIu64" (per GB)\n",
644 info->sample_pages);
645 monitor_printf(mon, "Period: %"PRIi64" (sec)\n",
646 info->calc_time);
647 monitor_printf(mon, "Mode: %s\n",
648 DirtyRateMeasureMode_str(info->mode));
649 monitor_printf(mon, "Dirty rate: ");
650 if (info->has_dirty_rate) {
651 monitor_printf(mon, "%"PRIi64" (MB/s)\n", info->dirty_rate);
652 if (info->has_vcpu_dirty_rate) {
653 DirtyRateVcpuList *rate, *head = info->vcpu_dirty_rate;
654 for (rate = head; rate != NULL; rate = rate->next) {
655 monitor_printf(mon, "vcpu[%"PRIi64"], Dirty rate: %"PRIi64
656 " (MB/s)\n", rate->value->id,
657 rate->value->dirty_rate);
660 } else {
661 monitor_printf(mon, "(not ready)\n");
664 qapi_free_DirtyRateVcpuList(info->vcpu_dirty_rate);
665 g_free(info);
668 void hmp_calc_dirty_rate(Monitor *mon, const QDict *qdict)
670 int64_t sec = qdict_get_try_int(qdict, "second", 0);
671 int64_t sample_pages = qdict_get_try_int(qdict, "sample_pages_per_GB", -1);
672 bool has_sample_pages = (sample_pages != -1);
673 bool dirty_ring = qdict_get_try_bool(qdict, "dirty_ring", false);
674 DirtyRateMeasureMode mode =
675 (dirty_ring ? DIRTY_RATE_MEASURE_MODE_DIRTY_RING :
676 DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING);
677 Error *err = NULL;
679 if (!sec) {
680 monitor_printf(mon, "Incorrect period length specified!\n");
681 return;
684 qmp_calc_dirty_rate(sec, has_sample_pages, sample_pages, true,
685 mode, &err);
686 if (err) {
687 hmp_handle_error(mon, err);
688 return;
691 monitor_printf(mon, "Starting dirty rate measurement with period %"PRIi64
692 " seconds\n", sec);
693 monitor_printf(mon, "[Please use 'info dirty_rate' to check results]\n");