2 * Dirty page rate limit implementation code
4 * Copyright (c) 2022 CHINA TELECOM CO.,LTD.
7 * Hyman Huang(黄勇) <huangy81@chinatelecom.cn>
9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
10 * See the COPYING file in the top-level directory.
13 #include "qemu/osdep.h"
14 #include "qemu/main-loop.h"
15 #include "qapi/qapi-commands-migration.h"
16 #include "qapi/qmp/qdict.h"
17 #include "qapi/error.h"
18 #include "sysemu/dirtyrate.h"
19 #include "sysemu/dirtylimit.h"
20 #include "monitor/hmp.h"
21 #include "monitor/monitor.h"
22 #include "exec/memory.h"
23 #include "exec/target_page.h"
24 #include "hw/boards.h"
25 #include "sysemu/kvm.h"
29 * Dirtylimit stop working if dirty page rate error
30 * value less than DIRTYLIMIT_TOLERANCE_RANGE
32 #define DIRTYLIMIT_TOLERANCE_RANGE 25 /* MB/s */
34 * Plus or minus vcpu sleep time linearly if dirty
35 * page rate error value percentage over
36 * DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT.
37 * Otherwise, plus or minus a fixed vcpu sleep time.
39 #define DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT 50
41 * Max vcpu sleep time percentage during a cycle
42 * composed of dirty ring full and sleep time.
44 #define DIRTYLIMIT_THROTTLE_PCT_MAX 99
50 } *vcpu_dirty_rate_stat
;
52 typedef struct VcpuDirtyLimitState
{
56 * Quota dirty page rate, unit is MB/s
57 * zero if not enabled.
60 } VcpuDirtyLimitState
;
63 VcpuDirtyLimitState
*states
;
64 /* Max cpus number configured by user */
66 /* Number of vcpu under dirtylimit */
70 /* protect dirtylimit_state */
71 static QemuMutex dirtylimit_mutex
;
73 /* dirtylimit thread quit if dirtylimit_quit is true */
74 static bool dirtylimit_quit
;
76 static void vcpu_dirty_rate_stat_collect(void)
81 /* calculate vcpu dirtyrate */
82 vcpu_calculate_dirtyrate(DIRTYLIMIT_CALC_TIME_MS
,
87 for (i
= 0; i
< stat
.nvcpu
; i
++) {
88 vcpu_dirty_rate_stat
->stat
.rates
[i
].id
= i
;
89 vcpu_dirty_rate_stat
->stat
.rates
[i
].dirty_rate
=
90 stat
.rates
[i
].dirty_rate
;
96 static void *vcpu_dirty_rate_stat_thread(void *opaque
)
98 rcu_register_thread();
101 global_dirty_log_change(GLOBAL_DIRTY_LIMIT
, true);
103 while (qatomic_read(&vcpu_dirty_rate_stat
->running
)) {
104 vcpu_dirty_rate_stat_collect();
105 if (dirtylimit_in_service()) {
106 dirtylimit_process();
111 global_dirty_log_change(GLOBAL_DIRTY_LIMIT
, false);
113 rcu_unregister_thread();
117 int64_t vcpu_dirty_rate_get(int cpu_index
)
119 DirtyRateVcpu
*rates
= vcpu_dirty_rate_stat
->stat
.rates
;
120 return qatomic_read_i64(&rates
[cpu_index
].dirty_rate
);
123 void vcpu_dirty_rate_stat_start(void)
125 if (qatomic_read(&vcpu_dirty_rate_stat
->running
)) {
129 qatomic_set(&vcpu_dirty_rate_stat
->running
, 1);
130 qemu_thread_create(&vcpu_dirty_rate_stat
->thread
,
132 vcpu_dirty_rate_stat_thread
,
134 QEMU_THREAD_JOINABLE
);
137 void vcpu_dirty_rate_stat_stop(void)
139 qatomic_set(&vcpu_dirty_rate_stat
->running
, 0);
140 dirtylimit_state_unlock();
141 qemu_mutex_unlock_iothread();
142 qemu_thread_join(&vcpu_dirty_rate_stat
->thread
);
143 qemu_mutex_lock_iothread();
144 dirtylimit_state_lock();
147 void vcpu_dirty_rate_stat_initialize(void)
149 MachineState
*ms
= MACHINE(qdev_get_machine());
150 int max_cpus
= ms
->smp
.max_cpus
;
152 vcpu_dirty_rate_stat
=
153 g_malloc0(sizeof(*vcpu_dirty_rate_stat
));
155 vcpu_dirty_rate_stat
->stat
.nvcpu
= max_cpus
;
156 vcpu_dirty_rate_stat
->stat
.rates
=
157 g_new0(DirtyRateVcpu
, max_cpus
);
159 vcpu_dirty_rate_stat
->running
= false;
162 void vcpu_dirty_rate_stat_finalize(void)
164 free(vcpu_dirty_rate_stat
->stat
.rates
);
165 vcpu_dirty_rate_stat
->stat
.rates
= NULL
;
167 free(vcpu_dirty_rate_stat
);
168 vcpu_dirty_rate_stat
= NULL
;
171 void dirtylimit_state_lock(void)
173 qemu_mutex_lock(&dirtylimit_mutex
);
176 void dirtylimit_state_unlock(void)
178 qemu_mutex_unlock(&dirtylimit_mutex
);
182 __attribute__((__constructor__
)) dirtylimit_mutex_init(void)
184 qemu_mutex_init(&dirtylimit_mutex
);
187 static inline VcpuDirtyLimitState
*dirtylimit_vcpu_get_state(int cpu_index
)
189 return &dirtylimit_state
->states
[cpu_index
];
192 void dirtylimit_state_initialize(void)
194 MachineState
*ms
= MACHINE(qdev_get_machine());
195 int max_cpus
= ms
->smp
.max_cpus
;
198 dirtylimit_state
= g_malloc0(sizeof(*dirtylimit_state
));
200 dirtylimit_state
->states
=
201 g_new0(VcpuDirtyLimitState
, max_cpus
);
203 for (i
= 0; i
< max_cpus
; i
++) {
204 dirtylimit_state
->states
[i
].cpu_index
= i
;
207 dirtylimit_state
->max_cpus
= max_cpus
;
208 trace_dirtylimit_state_initialize(max_cpus
);
211 void dirtylimit_state_finalize(void)
213 free(dirtylimit_state
->states
);
214 dirtylimit_state
->states
= NULL
;
216 free(dirtylimit_state
);
217 dirtylimit_state
= NULL
;
219 trace_dirtylimit_state_finalize();
222 bool dirtylimit_in_service(void)
224 return !!dirtylimit_state
;
227 bool dirtylimit_vcpu_index_valid(int cpu_index
)
229 MachineState
*ms
= MACHINE(qdev_get_machine());
231 return !(cpu_index
< 0 ||
232 cpu_index
>= ms
->smp
.max_cpus
);
235 static uint64_t dirtylimit_dirty_ring_full_time(uint64_t dirtyrate
)
237 static uint64_t max_dirtyrate
;
238 unsigned target_page_bits
= qemu_target_page_bits();
239 uint64_t dirty_ring_size_MB
;
241 /* So far, the largest (non-huge) page size is 64k, i.e. 16 bits. */
242 assert(target_page_bits
< 20);
244 /* Convert ring size (pages) to MiB (2**20). */
245 dirty_ring_size_MB
= kvm_dirty_ring_size() >> (20 - target_page_bits
);
247 if (max_dirtyrate
< dirtyrate
) {
248 max_dirtyrate
= dirtyrate
;
251 return dirty_ring_size_MB
* 1000000 / max_dirtyrate
;
254 static inline bool dirtylimit_done(uint64_t quota
,
259 min
= MIN(quota
, current
);
260 max
= MAX(quota
, current
);
262 return ((max
- min
) <= DIRTYLIMIT_TOLERANCE_RANGE
) ? true : false;
266 dirtylimit_need_linear_adjustment(uint64_t quota
,
271 min
= MIN(quota
, current
);
272 max
= MAX(quota
, current
);
274 return ((max
- min
) * 100 / max
) > DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT
;
277 static void dirtylimit_set_throttle(CPUState
*cpu
,
281 int64_t ring_full_time_us
= 0;
282 uint64_t sleep_pct
= 0;
283 uint64_t throttle_us
= 0;
286 cpu
->throttle_us_per_full
= 0;
290 ring_full_time_us
= dirtylimit_dirty_ring_full_time(current
);
292 if (dirtylimit_need_linear_adjustment(quota
, current
)) {
293 if (quota
< current
) {
294 sleep_pct
= (current
- quota
) * 100 / current
;
296 ring_full_time_us
* sleep_pct
/ (double)(100 - sleep_pct
);
297 cpu
->throttle_us_per_full
+= throttle_us
;
299 sleep_pct
= (quota
- current
) * 100 / quota
;
301 ring_full_time_us
* sleep_pct
/ (double)(100 - sleep_pct
);
302 cpu
->throttle_us_per_full
-= throttle_us
;
305 trace_dirtylimit_throttle_pct(cpu
->cpu_index
,
309 if (quota
< current
) {
310 cpu
->throttle_us_per_full
+= ring_full_time_us
/ 10;
312 cpu
->throttle_us_per_full
-= ring_full_time_us
/ 10;
317 * TODO: in the big kvm_dirty_ring_size case (eg: 65536, or other scenario),
318 * current dirty page rate may never reach the quota, we should stop
319 * increasing sleep time?
321 cpu
->throttle_us_per_full
= MIN(cpu
->throttle_us_per_full
,
322 ring_full_time_us
* DIRTYLIMIT_THROTTLE_PCT_MAX
);
324 cpu
->throttle_us_per_full
= MAX(cpu
->throttle_us_per_full
, 0);
327 static void dirtylimit_adjust_throttle(CPUState
*cpu
)
330 uint64_t current
= 0;
331 int cpu_index
= cpu
->cpu_index
;
333 quota
= dirtylimit_vcpu_get_state(cpu_index
)->quota
;
334 current
= vcpu_dirty_rate_get(cpu_index
);
336 if (!dirtylimit_done(quota
, current
)) {
337 dirtylimit_set_throttle(cpu
, quota
, current
);
343 void dirtylimit_process(void)
347 if (!qatomic_read(&dirtylimit_quit
)) {
348 dirtylimit_state_lock();
350 if (!dirtylimit_in_service()) {
351 dirtylimit_state_unlock();
356 if (!dirtylimit_vcpu_get_state(cpu
->cpu_index
)->enabled
) {
359 dirtylimit_adjust_throttle(cpu
);
361 dirtylimit_state_unlock();
365 void dirtylimit_change(bool start
)
368 qatomic_set(&dirtylimit_quit
, 0);
370 qatomic_set(&dirtylimit_quit
, 1);
374 void dirtylimit_set_vcpu(int cpu_index
,
378 trace_dirtylimit_set_vcpu(cpu_index
, quota
);
381 dirtylimit_state
->states
[cpu_index
].quota
= quota
;
382 if (!dirtylimit_vcpu_get_state(cpu_index
)->enabled
) {
383 dirtylimit_state
->limited_nvcpu
++;
386 dirtylimit_state
->states
[cpu_index
].quota
= 0;
387 if (dirtylimit_state
->states
[cpu_index
].enabled
) {
388 dirtylimit_state
->limited_nvcpu
--;
392 dirtylimit_state
->states
[cpu_index
].enabled
= enable
;
395 void dirtylimit_set_all(uint64_t quota
,
398 MachineState
*ms
= MACHINE(qdev_get_machine());
399 int max_cpus
= ms
->smp
.max_cpus
;
402 for (i
= 0; i
< max_cpus
; i
++) {
403 dirtylimit_set_vcpu(i
, quota
, enable
);
407 void dirtylimit_vcpu_execute(CPUState
*cpu
)
409 if (dirtylimit_in_service() &&
410 dirtylimit_vcpu_get_state(cpu
->cpu_index
)->enabled
&&
411 cpu
->throttle_us_per_full
) {
412 trace_dirtylimit_vcpu_execute(cpu
->cpu_index
,
413 cpu
->throttle_us_per_full
);
414 usleep(cpu
->throttle_us_per_full
);
418 static void dirtylimit_init(void)
420 dirtylimit_state_initialize();
421 dirtylimit_change(true);
422 vcpu_dirty_rate_stat_initialize();
423 vcpu_dirty_rate_stat_start();
426 static void dirtylimit_cleanup(void)
428 vcpu_dirty_rate_stat_stop();
429 vcpu_dirty_rate_stat_finalize();
430 dirtylimit_change(false);
431 dirtylimit_state_finalize();
434 void qmp_cancel_vcpu_dirty_limit(bool has_cpu_index
,
438 if (!kvm_enabled() || !kvm_dirty_ring_enabled()) {
442 if (has_cpu_index
&& !dirtylimit_vcpu_index_valid(cpu_index
)) {
443 error_setg(errp
, "incorrect cpu index specified");
447 if (!dirtylimit_in_service()) {
451 dirtylimit_state_lock();
454 dirtylimit_set_vcpu(cpu_index
, 0, false);
456 dirtylimit_set_all(0, false);
459 if (!dirtylimit_state
->limited_nvcpu
) {
460 dirtylimit_cleanup();
463 dirtylimit_state_unlock();
466 void hmp_cancel_vcpu_dirty_limit(Monitor
*mon
, const QDict
*qdict
)
468 int64_t cpu_index
= qdict_get_try_int(qdict
, "cpu_index", -1);
471 qmp_cancel_vcpu_dirty_limit(!!(cpu_index
!= -1), cpu_index
, &err
);
473 hmp_handle_error(mon
, err
);
477 monitor_printf(mon
, "[Please use 'info vcpu_dirty_limit' to query "
478 "dirty limit for virtual CPU]\n");
481 void qmp_set_vcpu_dirty_limit(bool has_cpu_index
,
486 if (!kvm_enabled() || !kvm_dirty_ring_enabled()) {
487 error_setg(errp
, "dirty page limit feature requires KVM with"
488 " accelerator property 'dirty-ring-size' set'");
492 if (has_cpu_index
&& !dirtylimit_vcpu_index_valid(cpu_index
)) {
493 error_setg(errp
, "incorrect cpu index specified");
498 qmp_cancel_vcpu_dirty_limit(has_cpu_index
, cpu_index
, errp
);
502 dirtylimit_state_lock();
504 if (!dirtylimit_in_service()) {
509 dirtylimit_set_vcpu(cpu_index
, dirty_rate
, true);
511 dirtylimit_set_all(dirty_rate
, true);
514 dirtylimit_state_unlock();
517 void hmp_set_vcpu_dirty_limit(Monitor
*mon
, const QDict
*qdict
)
519 int64_t dirty_rate
= qdict_get_int(qdict
, "dirty_rate");
520 int64_t cpu_index
= qdict_get_try_int(qdict
, "cpu_index", -1);
523 qmp_set_vcpu_dirty_limit(!!(cpu_index
!= -1), cpu_index
, dirty_rate
, &err
);
525 hmp_handle_error(mon
, err
);
529 monitor_printf(mon
, "[Please use 'info vcpu_dirty_limit' to query "
530 "dirty limit for virtual CPU]\n");
533 static struct DirtyLimitInfo
*dirtylimit_query_vcpu(int cpu_index
)
535 DirtyLimitInfo
*info
= NULL
;
537 info
= g_malloc0(sizeof(*info
));
538 info
->cpu_index
= cpu_index
;
539 info
->limit_rate
= dirtylimit_vcpu_get_state(cpu_index
)->quota
;
540 info
->current_rate
= vcpu_dirty_rate_get(cpu_index
);
545 static struct DirtyLimitInfoList
*dirtylimit_query_all(void)
548 DirtyLimitInfo
*info
= NULL
;
549 DirtyLimitInfoList
*head
= NULL
, **tail
= &head
;
551 dirtylimit_state_lock();
553 if (!dirtylimit_in_service()) {
554 dirtylimit_state_unlock();
558 for (i
= 0; i
< dirtylimit_state
->max_cpus
; i
++) {
559 index
= dirtylimit_state
->states
[i
].cpu_index
;
560 if (dirtylimit_vcpu_get_state(index
)->enabled
) {
561 info
= dirtylimit_query_vcpu(index
);
562 QAPI_LIST_APPEND(tail
, info
);
566 dirtylimit_state_unlock();
571 struct DirtyLimitInfoList
*qmp_query_vcpu_dirty_limit(Error
**errp
)
573 if (!dirtylimit_in_service()) {
577 return dirtylimit_query_all();
580 void hmp_info_vcpu_dirty_limit(Monitor
*mon
, const QDict
*qdict
)
582 DirtyLimitInfoList
*limit
, *head
, *info
= NULL
;
585 if (!dirtylimit_in_service()) {
586 monitor_printf(mon
, "Dirty page limit not enabled!\n");
590 info
= qmp_query_vcpu_dirty_limit(&err
);
592 hmp_handle_error(mon
, err
);
597 for (limit
= head
; limit
!= NULL
; limit
= limit
->next
) {
598 monitor_printf(mon
, "vcpu[%"PRIi64
"], limit rate %"PRIi64
" (MB/s),"
599 " current rate %"PRIi64
" (MB/s)\n",
600 limit
->value
->cpu_index
,
601 limit
->value
->limit_rate
,
602 limit
->value
->current_rate
);