2 * Dirty page rate limit implementation code
4 * Copyright (c) 2022 CHINA TELECOM CO.,LTD.
7 * Hyman Huang(黄勇) <huangy81@chinatelecom.cn>
9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
10 * See the COPYING file in the top-level directory.
13 #include "qemu/osdep.h"
14 #include "qemu/main-loop.h"
15 #include "qapi/qapi-commands-migration.h"
16 #include "qapi/qmp/qdict.h"
17 #include "qapi/error.h"
18 #include "sysemu/dirtyrate.h"
19 #include "sysemu/dirtylimit.h"
20 #include "monitor/hmp.h"
21 #include "monitor/monitor.h"
22 #include "exec/memory.h"
23 #include "exec/target_page.h"
24 #include "hw/boards.h"
25 #include "sysemu/kvm.h"
27 #include "migration/misc.h"
28 #include "migration/migration.h"
29 #include "migration/options.h"
32 * Dirtylimit stop working if dirty page rate error
33 * value less than DIRTYLIMIT_TOLERANCE_RANGE
35 #define DIRTYLIMIT_TOLERANCE_RANGE 25 /* MB/s */
37 * Plus or minus vcpu sleep time linearly if dirty
38 * page rate error value percentage over
39 * DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT.
40 * Otherwise, plus or minus a fixed vcpu sleep time.
42 #define DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT 50
44 * Max vcpu sleep time percentage during a cycle
45 * composed of dirty ring full and sleep time.
47 #define DIRTYLIMIT_THROTTLE_PCT_MAX 99
53 } *vcpu_dirty_rate_stat
;
55 typedef struct VcpuDirtyLimitState
{
59 * Quota dirty page rate, unit is MB/s
60 * zero if not enabled.
63 } VcpuDirtyLimitState
;
66 VcpuDirtyLimitState
*states
;
67 /* Max cpus number configured by user */
69 /* Number of vcpu under dirtylimit */
73 /* protect dirtylimit_state */
74 static QemuMutex dirtylimit_mutex
;
76 /* dirtylimit thread quit if dirtylimit_quit is true */
77 static bool dirtylimit_quit
;
79 static void vcpu_dirty_rate_stat_collect(void)
81 MigrationState
*s
= migrate_get_current();
84 int64_t period
= DIRTYLIMIT_CALC_TIME_MS
;
86 if (migrate_dirty_limit() &&
87 migration_is_active(s
)) {
88 period
= s
->parameters
.x_vcpu_dirty_limit_period
;
91 /* calculate vcpu dirtyrate */
92 vcpu_calculate_dirtyrate(period
,
97 for (i
= 0; i
< stat
.nvcpu
; i
++) {
98 vcpu_dirty_rate_stat
->stat
.rates
[i
].id
= i
;
99 vcpu_dirty_rate_stat
->stat
.rates
[i
].dirty_rate
=
100 stat
.rates
[i
].dirty_rate
;
106 static void *vcpu_dirty_rate_stat_thread(void *opaque
)
108 rcu_register_thread();
111 global_dirty_log_change(GLOBAL_DIRTY_LIMIT
, true);
113 while (qatomic_read(&vcpu_dirty_rate_stat
->running
)) {
114 vcpu_dirty_rate_stat_collect();
115 if (dirtylimit_in_service()) {
116 dirtylimit_process();
121 global_dirty_log_change(GLOBAL_DIRTY_LIMIT
, false);
123 rcu_unregister_thread();
127 int64_t vcpu_dirty_rate_get(int cpu_index
)
129 DirtyRateVcpu
*rates
= vcpu_dirty_rate_stat
->stat
.rates
;
130 return qatomic_read_i64(&rates
[cpu_index
].dirty_rate
);
133 void vcpu_dirty_rate_stat_start(void)
135 if (qatomic_read(&vcpu_dirty_rate_stat
->running
)) {
139 qatomic_set(&vcpu_dirty_rate_stat
->running
, 1);
140 qemu_thread_create(&vcpu_dirty_rate_stat
->thread
,
142 vcpu_dirty_rate_stat_thread
,
144 QEMU_THREAD_JOINABLE
);
147 void vcpu_dirty_rate_stat_stop(void)
149 qatomic_set(&vcpu_dirty_rate_stat
->running
, 0);
150 dirtylimit_state_unlock();
151 qemu_mutex_unlock_iothread();
152 qemu_thread_join(&vcpu_dirty_rate_stat
->thread
);
153 qemu_mutex_lock_iothread();
154 dirtylimit_state_lock();
157 void vcpu_dirty_rate_stat_initialize(void)
159 MachineState
*ms
= MACHINE(qdev_get_machine());
160 int max_cpus
= ms
->smp
.max_cpus
;
162 vcpu_dirty_rate_stat
=
163 g_malloc0(sizeof(*vcpu_dirty_rate_stat
));
165 vcpu_dirty_rate_stat
->stat
.nvcpu
= max_cpus
;
166 vcpu_dirty_rate_stat
->stat
.rates
=
167 g_new0(DirtyRateVcpu
, max_cpus
);
169 vcpu_dirty_rate_stat
->running
= false;
172 void vcpu_dirty_rate_stat_finalize(void)
174 g_free(vcpu_dirty_rate_stat
->stat
.rates
);
175 vcpu_dirty_rate_stat
->stat
.rates
= NULL
;
177 g_free(vcpu_dirty_rate_stat
);
178 vcpu_dirty_rate_stat
= NULL
;
181 void dirtylimit_state_lock(void)
183 qemu_mutex_lock(&dirtylimit_mutex
);
186 void dirtylimit_state_unlock(void)
188 qemu_mutex_unlock(&dirtylimit_mutex
);
192 __attribute__((__constructor__
)) dirtylimit_mutex_init(void)
194 qemu_mutex_init(&dirtylimit_mutex
);
197 static inline VcpuDirtyLimitState
*dirtylimit_vcpu_get_state(int cpu_index
)
199 return &dirtylimit_state
->states
[cpu_index
];
202 void dirtylimit_state_initialize(void)
204 MachineState
*ms
= MACHINE(qdev_get_machine());
205 int max_cpus
= ms
->smp
.max_cpus
;
208 dirtylimit_state
= g_malloc0(sizeof(*dirtylimit_state
));
210 dirtylimit_state
->states
=
211 g_new0(VcpuDirtyLimitState
, max_cpus
);
213 for (i
= 0; i
< max_cpus
; i
++) {
214 dirtylimit_state
->states
[i
].cpu_index
= i
;
217 dirtylimit_state
->max_cpus
= max_cpus
;
218 trace_dirtylimit_state_initialize(max_cpus
);
221 void dirtylimit_state_finalize(void)
223 g_free(dirtylimit_state
->states
);
224 dirtylimit_state
->states
= NULL
;
226 g_free(dirtylimit_state
);
227 dirtylimit_state
= NULL
;
229 trace_dirtylimit_state_finalize();
232 bool dirtylimit_in_service(void)
234 return !!dirtylimit_state
;
237 bool dirtylimit_vcpu_index_valid(int cpu_index
)
239 MachineState
*ms
= MACHINE(qdev_get_machine());
241 return !(cpu_index
< 0 ||
242 cpu_index
>= ms
->smp
.max_cpus
);
245 static uint64_t dirtylimit_dirty_ring_full_time(uint64_t dirtyrate
)
247 static uint64_t max_dirtyrate
;
248 uint64_t dirty_ring_size_MiB
;
250 dirty_ring_size_MiB
= qemu_target_pages_to_MiB(kvm_dirty_ring_size());
252 if (max_dirtyrate
< dirtyrate
) {
253 max_dirtyrate
= dirtyrate
;
256 return dirty_ring_size_MiB
* 1000000 / max_dirtyrate
;
259 static inline bool dirtylimit_done(uint64_t quota
,
264 min
= MIN(quota
, current
);
265 max
= MAX(quota
, current
);
267 return ((max
- min
) <= DIRTYLIMIT_TOLERANCE_RANGE
) ? true : false;
271 dirtylimit_need_linear_adjustment(uint64_t quota
,
276 min
= MIN(quota
, current
);
277 max
= MAX(quota
, current
);
279 return ((max
- min
) * 100 / max
) > DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT
;
282 static void dirtylimit_set_throttle(CPUState
*cpu
,
286 int64_t ring_full_time_us
= 0;
287 uint64_t sleep_pct
= 0;
288 uint64_t throttle_us
= 0;
291 cpu
->throttle_us_per_full
= 0;
295 ring_full_time_us
= dirtylimit_dirty_ring_full_time(current
);
297 if (dirtylimit_need_linear_adjustment(quota
, current
)) {
298 if (quota
< current
) {
299 sleep_pct
= (current
- quota
) * 100 / current
;
301 ring_full_time_us
* sleep_pct
/ (double)(100 - sleep_pct
);
302 cpu
->throttle_us_per_full
+= throttle_us
;
304 sleep_pct
= (quota
- current
) * 100 / quota
;
306 ring_full_time_us
* sleep_pct
/ (double)(100 - sleep_pct
);
307 cpu
->throttle_us_per_full
-= throttle_us
;
310 trace_dirtylimit_throttle_pct(cpu
->cpu_index
,
314 if (quota
< current
) {
315 cpu
->throttle_us_per_full
+= ring_full_time_us
/ 10;
317 cpu
->throttle_us_per_full
-= ring_full_time_us
/ 10;
322 * TODO: in the big kvm_dirty_ring_size case (eg: 65536, or other scenario),
323 * current dirty page rate may never reach the quota, we should stop
324 * increasing sleep time?
326 cpu
->throttle_us_per_full
= MIN(cpu
->throttle_us_per_full
,
327 ring_full_time_us
* DIRTYLIMIT_THROTTLE_PCT_MAX
);
329 cpu
->throttle_us_per_full
= MAX(cpu
->throttle_us_per_full
, 0);
332 static void dirtylimit_adjust_throttle(CPUState
*cpu
)
335 uint64_t current
= 0;
336 int cpu_index
= cpu
->cpu_index
;
338 quota
= dirtylimit_vcpu_get_state(cpu_index
)->quota
;
339 current
= vcpu_dirty_rate_get(cpu_index
);
341 if (!dirtylimit_done(quota
, current
)) {
342 dirtylimit_set_throttle(cpu
, quota
, current
);
348 void dirtylimit_process(void)
352 if (!qatomic_read(&dirtylimit_quit
)) {
353 dirtylimit_state_lock();
355 if (!dirtylimit_in_service()) {
356 dirtylimit_state_unlock();
361 if (!dirtylimit_vcpu_get_state(cpu
->cpu_index
)->enabled
) {
364 dirtylimit_adjust_throttle(cpu
);
366 dirtylimit_state_unlock();
370 void dirtylimit_change(bool start
)
373 qatomic_set(&dirtylimit_quit
, 0);
375 qatomic_set(&dirtylimit_quit
, 1);
379 void dirtylimit_set_vcpu(int cpu_index
,
383 trace_dirtylimit_set_vcpu(cpu_index
, quota
);
386 dirtylimit_state
->states
[cpu_index
].quota
= quota
;
387 if (!dirtylimit_vcpu_get_state(cpu_index
)->enabled
) {
388 dirtylimit_state
->limited_nvcpu
++;
391 dirtylimit_state
->states
[cpu_index
].quota
= 0;
392 if (dirtylimit_state
->states
[cpu_index
].enabled
) {
393 dirtylimit_state
->limited_nvcpu
--;
397 dirtylimit_state
->states
[cpu_index
].enabled
= enable
;
400 void dirtylimit_set_all(uint64_t quota
,
403 MachineState
*ms
= MACHINE(qdev_get_machine());
404 int max_cpus
= ms
->smp
.max_cpus
;
407 for (i
= 0; i
< max_cpus
; i
++) {
408 dirtylimit_set_vcpu(i
, quota
, enable
);
412 void dirtylimit_vcpu_execute(CPUState
*cpu
)
414 if (cpu
->throttle_us_per_full
) {
415 dirtylimit_state_lock();
417 if (dirtylimit_in_service() &&
418 dirtylimit_vcpu_get_state(cpu
->cpu_index
)->enabled
) {
419 dirtylimit_state_unlock();
420 trace_dirtylimit_vcpu_execute(cpu
->cpu_index
,
421 cpu
->throttle_us_per_full
);
423 g_usleep(cpu
->throttle_us_per_full
);
427 dirtylimit_state_unlock();
431 static void dirtylimit_init(void)
433 dirtylimit_state_initialize();
434 dirtylimit_change(true);
435 vcpu_dirty_rate_stat_initialize();
436 vcpu_dirty_rate_stat_start();
439 static void dirtylimit_cleanup(void)
441 vcpu_dirty_rate_stat_stop();
442 vcpu_dirty_rate_stat_finalize();
443 dirtylimit_change(false);
444 dirtylimit_state_finalize();
448 * dirty page rate limit is not allowed to set if migration
449 * is running with dirty-limit capability enabled.
451 static bool dirtylimit_is_allowed(void)
453 MigrationState
*ms
= migrate_get_current();
455 if (migration_is_running(ms
->state
) &&
456 (!qemu_thread_is_self(&ms
->thread
)) &&
457 migrate_dirty_limit() &&
458 dirtylimit_in_service()) {
464 void qmp_cancel_vcpu_dirty_limit(bool has_cpu_index
,
468 if (!kvm_enabled() || !kvm_dirty_ring_enabled()) {
472 if (has_cpu_index
&& !dirtylimit_vcpu_index_valid(cpu_index
)) {
473 error_setg(errp
, "incorrect cpu index specified");
477 if (!dirtylimit_is_allowed()) {
478 error_setg(errp
, "can't cancel dirty page rate limit while"
479 " migration is running");
483 if (!dirtylimit_in_service()) {
487 dirtylimit_state_lock();
490 dirtylimit_set_vcpu(cpu_index
, 0, false);
492 dirtylimit_set_all(0, false);
495 if (!dirtylimit_state
->limited_nvcpu
) {
496 dirtylimit_cleanup();
499 dirtylimit_state_unlock();
502 void hmp_cancel_vcpu_dirty_limit(Monitor
*mon
, const QDict
*qdict
)
504 int64_t cpu_index
= qdict_get_try_int(qdict
, "cpu_index", -1);
507 qmp_cancel_vcpu_dirty_limit(!!(cpu_index
!= -1), cpu_index
, &err
);
509 hmp_handle_error(mon
, err
);
513 monitor_printf(mon
, "[Please use 'info vcpu_dirty_limit' to query "
514 "dirty limit for virtual CPU]\n");
517 void qmp_set_vcpu_dirty_limit(bool has_cpu_index
,
522 if (!kvm_enabled() || !kvm_dirty_ring_enabled()) {
523 error_setg(errp
, "dirty page limit feature requires KVM with"
524 " accelerator property 'dirty-ring-size' set'");
528 if (has_cpu_index
&& !dirtylimit_vcpu_index_valid(cpu_index
)) {
529 error_setg(errp
, "incorrect cpu index specified");
533 if (!dirtylimit_is_allowed()) {
534 error_setg(errp
, "can't set dirty page rate limit while"
535 " migration is running");
540 qmp_cancel_vcpu_dirty_limit(has_cpu_index
, cpu_index
, errp
);
544 dirtylimit_state_lock();
546 if (!dirtylimit_in_service()) {
551 dirtylimit_set_vcpu(cpu_index
, dirty_rate
, true);
553 dirtylimit_set_all(dirty_rate
, true);
556 dirtylimit_state_unlock();
559 void hmp_set_vcpu_dirty_limit(Monitor
*mon
, const QDict
*qdict
)
561 int64_t dirty_rate
= qdict_get_int(qdict
, "dirty_rate");
562 int64_t cpu_index
= qdict_get_try_int(qdict
, "cpu_index", -1);
565 if (dirty_rate
< 0) {
566 error_setg(&err
, "invalid dirty page limit %" PRId64
, dirty_rate
);
570 qmp_set_vcpu_dirty_limit(!!(cpu_index
!= -1), cpu_index
, dirty_rate
, &err
);
573 hmp_handle_error(mon
, err
);
576 /* Return the max throttle time of each virtual CPU */
577 uint64_t dirtylimit_throttle_time_per_round(void)
583 if (cpu
->throttle_us_per_full
> max
) {
584 max
= cpu
->throttle_us_per_full
;
592 * Estimate average dirty ring full time of each virtaul CPU.
593 * Return 0 if guest doesn't dirty memory.
595 uint64_t dirtylimit_ring_full_time(void)
598 uint64_t curr_rate
= 0;
604 curr_rate
+= vcpu_dirty_rate_get(cpu
->cpu_index
);
608 if (!curr_rate
|| !nvcpus
) {
612 return dirtylimit_dirty_ring_full_time(curr_rate
/ nvcpus
);
615 static struct DirtyLimitInfo
*dirtylimit_query_vcpu(int cpu_index
)
617 DirtyLimitInfo
*info
= NULL
;
619 info
= g_malloc0(sizeof(*info
));
620 info
->cpu_index
= cpu_index
;
621 info
->limit_rate
= dirtylimit_vcpu_get_state(cpu_index
)->quota
;
622 info
->current_rate
= vcpu_dirty_rate_get(cpu_index
);
627 static struct DirtyLimitInfoList
*dirtylimit_query_all(void)
630 DirtyLimitInfo
*info
= NULL
;
631 DirtyLimitInfoList
*head
= NULL
, **tail
= &head
;
633 dirtylimit_state_lock();
635 if (!dirtylimit_in_service()) {
636 dirtylimit_state_unlock();
640 for (i
= 0; i
< dirtylimit_state
->max_cpus
; i
++) {
641 index
= dirtylimit_state
->states
[i
].cpu_index
;
642 if (dirtylimit_vcpu_get_state(index
)->enabled
) {
643 info
= dirtylimit_query_vcpu(index
);
644 QAPI_LIST_APPEND(tail
, info
);
648 dirtylimit_state_unlock();
653 struct DirtyLimitInfoList
*qmp_query_vcpu_dirty_limit(Error
**errp
)
655 return dirtylimit_query_all();
658 void hmp_info_vcpu_dirty_limit(Monitor
*mon
, const QDict
*qdict
)
660 DirtyLimitInfoList
*info
;
661 g_autoptr(DirtyLimitInfoList
) head
= NULL
;
664 if (!dirtylimit_in_service()) {
665 monitor_printf(mon
, "Dirty page limit not enabled!\n");
669 head
= qmp_query_vcpu_dirty_limit(&err
);
671 hmp_handle_error(mon
, err
);
675 for (info
= head
; info
!= NULL
; info
= info
->next
) {
676 monitor_printf(mon
, "vcpu[%"PRIi64
"], limit rate %"PRIi64
" (MB/s),"
677 " current rate %"PRIi64
" (MB/s)\n",
678 info
->value
->cpu_index
,
679 info
->value
->limit_rate
,
680 info
->value
->current_rate
);