kvm: dirty-ring: Fix race with vcpu creation
[qemu/ar7.git] / softmmu / dirtylimit.c
blobc56f0f58c8c9eb2030ea4325aaf31bab63bbb073
1 /*
2 * Dirty page rate limit implementation code
4 * Copyright (c) 2022 CHINA TELECOM CO.,LTD.
6 * Authors:
7 * Hyman Huang(黄勇) <huangy81@chinatelecom.cn>
9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
10 * See the COPYING file in the top-level directory.
13 #include "qemu/osdep.h"
14 #include "qemu/main-loop.h"
15 #include "qapi/qapi-commands-migration.h"
16 #include "qapi/qmp/qdict.h"
17 #include "qapi/error.h"
18 #include "sysemu/dirtyrate.h"
19 #include "sysemu/dirtylimit.h"
20 #include "monitor/hmp.h"
21 #include "monitor/monitor.h"
22 #include "exec/memory.h"
23 #include "hw/boards.h"
24 #include "sysemu/kvm.h"
25 #include "trace.h"
28 * Dirtylimit stop working if dirty page rate error
29 * value less than DIRTYLIMIT_TOLERANCE_RANGE
31 #define DIRTYLIMIT_TOLERANCE_RANGE 25 /* MB/s */
33 * Plus or minus vcpu sleep time linearly if dirty
34 * page rate error value percentage over
35 * DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT.
36 * Otherwise, plus or minus a fixed vcpu sleep time.
38 #define DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT 50
40 * Max vcpu sleep time percentage during a cycle
41 * composed of dirty ring full and sleep time.
43 #define DIRTYLIMIT_THROTTLE_PCT_MAX 99
45 struct {
46 VcpuStat stat;
47 bool running;
48 QemuThread thread;
49 } *vcpu_dirty_rate_stat;
51 typedef struct VcpuDirtyLimitState {
52 int cpu_index;
53 bool enabled;
55 * Quota dirty page rate, unit is MB/s
56 * zero if not enabled.
58 uint64_t quota;
59 } VcpuDirtyLimitState;
61 struct {
62 VcpuDirtyLimitState *states;
63 /* Max cpus number configured by user */
64 int max_cpus;
65 /* Number of vcpu under dirtylimit */
66 int limited_nvcpu;
67 } *dirtylimit_state;
69 /* protect dirtylimit_state */
70 static QemuMutex dirtylimit_mutex;
72 /* dirtylimit thread quit if dirtylimit_quit is true */
73 static bool dirtylimit_quit;
75 static void vcpu_dirty_rate_stat_collect(void)
77 VcpuStat stat;
78 int i = 0;
80 /* calculate vcpu dirtyrate */
81 vcpu_calculate_dirtyrate(DIRTYLIMIT_CALC_TIME_MS,
82 &stat,
83 GLOBAL_DIRTY_LIMIT,
84 false);
86 for (i = 0; i < stat.nvcpu; i++) {
87 vcpu_dirty_rate_stat->stat.rates[i].id = i;
88 vcpu_dirty_rate_stat->stat.rates[i].dirty_rate =
89 stat.rates[i].dirty_rate;
92 free(stat.rates);
95 static void *vcpu_dirty_rate_stat_thread(void *opaque)
97 rcu_register_thread();
99 /* start log sync */
100 global_dirty_log_change(GLOBAL_DIRTY_LIMIT, true);
102 while (qatomic_read(&vcpu_dirty_rate_stat->running)) {
103 vcpu_dirty_rate_stat_collect();
104 if (dirtylimit_in_service()) {
105 dirtylimit_process();
109 /* stop log sync */
110 global_dirty_log_change(GLOBAL_DIRTY_LIMIT, false);
112 rcu_unregister_thread();
113 return NULL;
116 int64_t vcpu_dirty_rate_get(int cpu_index)
118 DirtyRateVcpu *rates = vcpu_dirty_rate_stat->stat.rates;
119 return qatomic_read_i64(&rates[cpu_index].dirty_rate);
122 void vcpu_dirty_rate_stat_start(void)
124 if (qatomic_read(&vcpu_dirty_rate_stat->running)) {
125 return;
128 qatomic_set(&vcpu_dirty_rate_stat->running, 1);
129 qemu_thread_create(&vcpu_dirty_rate_stat->thread,
130 "dirtyrate-stat",
131 vcpu_dirty_rate_stat_thread,
132 NULL,
133 QEMU_THREAD_JOINABLE);
136 void vcpu_dirty_rate_stat_stop(void)
138 qatomic_set(&vcpu_dirty_rate_stat->running, 0);
139 dirtylimit_state_unlock();
140 qemu_mutex_unlock_iothread();
141 qemu_thread_join(&vcpu_dirty_rate_stat->thread);
142 qemu_mutex_lock_iothread();
143 dirtylimit_state_lock();
146 void vcpu_dirty_rate_stat_initialize(void)
148 MachineState *ms = MACHINE(qdev_get_machine());
149 int max_cpus = ms->smp.max_cpus;
151 vcpu_dirty_rate_stat =
152 g_malloc0(sizeof(*vcpu_dirty_rate_stat));
154 vcpu_dirty_rate_stat->stat.nvcpu = max_cpus;
155 vcpu_dirty_rate_stat->stat.rates =
156 g_new0(DirtyRateVcpu, max_cpus);
158 vcpu_dirty_rate_stat->running = false;
161 void vcpu_dirty_rate_stat_finalize(void)
163 free(vcpu_dirty_rate_stat->stat.rates);
164 vcpu_dirty_rate_stat->stat.rates = NULL;
166 free(vcpu_dirty_rate_stat);
167 vcpu_dirty_rate_stat = NULL;
170 void dirtylimit_state_lock(void)
172 qemu_mutex_lock(&dirtylimit_mutex);
175 void dirtylimit_state_unlock(void)
177 qemu_mutex_unlock(&dirtylimit_mutex);
180 static void
181 __attribute__((__constructor__)) dirtylimit_mutex_init(void)
183 qemu_mutex_init(&dirtylimit_mutex);
186 static inline VcpuDirtyLimitState *dirtylimit_vcpu_get_state(int cpu_index)
188 return &dirtylimit_state->states[cpu_index];
191 void dirtylimit_state_initialize(void)
193 MachineState *ms = MACHINE(qdev_get_machine());
194 int max_cpus = ms->smp.max_cpus;
195 int i;
197 dirtylimit_state = g_malloc0(sizeof(*dirtylimit_state));
199 dirtylimit_state->states =
200 g_new0(VcpuDirtyLimitState, max_cpus);
202 for (i = 0; i < max_cpus; i++) {
203 dirtylimit_state->states[i].cpu_index = i;
206 dirtylimit_state->max_cpus = max_cpus;
207 trace_dirtylimit_state_initialize(max_cpus);
210 void dirtylimit_state_finalize(void)
212 free(dirtylimit_state->states);
213 dirtylimit_state->states = NULL;
215 free(dirtylimit_state);
216 dirtylimit_state = NULL;
218 trace_dirtylimit_state_finalize();
221 bool dirtylimit_in_service(void)
223 return !!dirtylimit_state;
226 bool dirtylimit_vcpu_index_valid(int cpu_index)
228 MachineState *ms = MACHINE(qdev_get_machine());
230 return !(cpu_index < 0 ||
231 cpu_index >= ms->smp.max_cpus);
234 static inline int64_t dirtylimit_dirty_ring_full_time(uint64_t dirtyrate)
236 static uint64_t max_dirtyrate;
237 uint32_t dirty_ring_size = kvm_dirty_ring_size();
238 uint64_t dirty_ring_size_meory_MB =
239 dirty_ring_size * TARGET_PAGE_SIZE >> 20;
241 if (max_dirtyrate < dirtyrate) {
242 max_dirtyrate = dirtyrate;
245 return dirty_ring_size_meory_MB * 1000000 / max_dirtyrate;
248 static inline bool dirtylimit_done(uint64_t quota,
249 uint64_t current)
251 uint64_t min, max;
253 min = MIN(quota, current);
254 max = MAX(quota, current);
256 return ((max - min) <= DIRTYLIMIT_TOLERANCE_RANGE) ? true : false;
259 static inline bool
260 dirtylimit_need_linear_adjustment(uint64_t quota,
261 uint64_t current)
263 uint64_t min, max;
265 min = MIN(quota, current);
266 max = MAX(quota, current);
268 return ((max - min) * 100 / max) > DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT;
271 static void dirtylimit_set_throttle(CPUState *cpu,
272 uint64_t quota,
273 uint64_t current)
275 int64_t ring_full_time_us = 0;
276 uint64_t sleep_pct = 0;
277 uint64_t throttle_us = 0;
279 if (current == 0) {
280 cpu->throttle_us_per_full = 0;
281 return;
284 ring_full_time_us = dirtylimit_dirty_ring_full_time(current);
286 if (dirtylimit_need_linear_adjustment(quota, current)) {
287 if (quota < current) {
288 sleep_pct = (current - quota) * 100 / current;
289 throttle_us =
290 ring_full_time_us * sleep_pct / (double)(100 - sleep_pct);
291 cpu->throttle_us_per_full += throttle_us;
292 } else {
293 sleep_pct = (quota - current) * 100 / quota;
294 throttle_us =
295 ring_full_time_us * sleep_pct / (double)(100 - sleep_pct);
296 cpu->throttle_us_per_full -= throttle_us;
299 trace_dirtylimit_throttle_pct(cpu->cpu_index,
300 sleep_pct,
301 throttle_us);
302 } else {
303 if (quota < current) {
304 cpu->throttle_us_per_full += ring_full_time_us / 10;
305 } else {
306 cpu->throttle_us_per_full -= ring_full_time_us / 10;
311 * TODO: in the big kvm_dirty_ring_size case (eg: 65536, or other scenario),
312 * current dirty page rate may never reach the quota, we should stop
313 * increasing sleep time?
315 cpu->throttle_us_per_full = MIN(cpu->throttle_us_per_full,
316 ring_full_time_us * DIRTYLIMIT_THROTTLE_PCT_MAX);
318 cpu->throttle_us_per_full = MAX(cpu->throttle_us_per_full, 0);
321 static void dirtylimit_adjust_throttle(CPUState *cpu)
323 uint64_t quota = 0;
324 uint64_t current = 0;
325 int cpu_index = cpu->cpu_index;
327 quota = dirtylimit_vcpu_get_state(cpu_index)->quota;
328 current = vcpu_dirty_rate_get(cpu_index);
330 if (!dirtylimit_done(quota, current)) {
331 dirtylimit_set_throttle(cpu, quota, current);
334 return;
337 void dirtylimit_process(void)
339 CPUState *cpu;
341 if (!qatomic_read(&dirtylimit_quit)) {
342 dirtylimit_state_lock();
344 if (!dirtylimit_in_service()) {
345 dirtylimit_state_unlock();
346 return;
349 CPU_FOREACH(cpu) {
350 if (!dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled) {
351 continue;
353 dirtylimit_adjust_throttle(cpu);
355 dirtylimit_state_unlock();
359 void dirtylimit_change(bool start)
361 if (start) {
362 qatomic_set(&dirtylimit_quit, 0);
363 } else {
364 qatomic_set(&dirtylimit_quit, 1);
368 void dirtylimit_set_vcpu(int cpu_index,
369 uint64_t quota,
370 bool enable)
372 trace_dirtylimit_set_vcpu(cpu_index, quota);
374 if (enable) {
375 dirtylimit_state->states[cpu_index].quota = quota;
376 if (!dirtylimit_vcpu_get_state(cpu_index)->enabled) {
377 dirtylimit_state->limited_nvcpu++;
379 } else {
380 dirtylimit_state->states[cpu_index].quota = 0;
381 if (dirtylimit_state->states[cpu_index].enabled) {
382 dirtylimit_state->limited_nvcpu--;
386 dirtylimit_state->states[cpu_index].enabled = enable;
389 void dirtylimit_set_all(uint64_t quota,
390 bool enable)
392 MachineState *ms = MACHINE(qdev_get_machine());
393 int max_cpus = ms->smp.max_cpus;
394 int i;
396 for (i = 0; i < max_cpus; i++) {
397 dirtylimit_set_vcpu(i, quota, enable);
401 void dirtylimit_vcpu_execute(CPUState *cpu)
403 if (dirtylimit_in_service() &&
404 dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled &&
405 cpu->throttle_us_per_full) {
406 trace_dirtylimit_vcpu_execute(cpu->cpu_index,
407 cpu->throttle_us_per_full);
408 usleep(cpu->throttle_us_per_full);
412 static void dirtylimit_init(void)
414 dirtylimit_state_initialize();
415 dirtylimit_change(true);
416 vcpu_dirty_rate_stat_initialize();
417 vcpu_dirty_rate_stat_start();
420 static void dirtylimit_cleanup(void)
422 vcpu_dirty_rate_stat_stop();
423 vcpu_dirty_rate_stat_finalize();
424 dirtylimit_change(false);
425 dirtylimit_state_finalize();
428 void qmp_cancel_vcpu_dirty_limit(bool has_cpu_index,
429 int64_t cpu_index,
430 Error **errp)
432 if (!kvm_enabled() || !kvm_dirty_ring_enabled()) {
433 return;
436 if (has_cpu_index && !dirtylimit_vcpu_index_valid(cpu_index)) {
437 error_setg(errp, "incorrect cpu index specified");
438 return;
441 if (!dirtylimit_in_service()) {
442 return;
445 dirtylimit_state_lock();
447 if (has_cpu_index) {
448 dirtylimit_set_vcpu(cpu_index, 0, false);
449 } else {
450 dirtylimit_set_all(0, false);
453 if (!dirtylimit_state->limited_nvcpu) {
454 dirtylimit_cleanup();
457 dirtylimit_state_unlock();
460 void hmp_cancel_vcpu_dirty_limit(Monitor *mon, const QDict *qdict)
462 int64_t cpu_index = qdict_get_try_int(qdict, "cpu_index", -1);
463 Error *err = NULL;
465 qmp_cancel_vcpu_dirty_limit(!!(cpu_index != -1), cpu_index, &err);
466 if (err) {
467 hmp_handle_error(mon, err);
468 return;
471 monitor_printf(mon, "[Please use 'info vcpu_dirty_limit' to query "
472 "dirty limit for virtual CPU]\n");
475 void qmp_set_vcpu_dirty_limit(bool has_cpu_index,
476 int64_t cpu_index,
477 uint64_t dirty_rate,
478 Error **errp)
480 if (!kvm_enabled() || !kvm_dirty_ring_enabled()) {
481 error_setg(errp, "dirty page limit feature requires KVM with"
482 " accelerator property 'dirty-ring-size' set'");
483 return;
486 if (has_cpu_index && !dirtylimit_vcpu_index_valid(cpu_index)) {
487 error_setg(errp, "incorrect cpu index specified");
488 return;
491 if (!dirty_rate) {
492 qmp_cancel_vcpu_dirty_limit(has_cpu_index, cpu_index, errp);
493 return;
496 dirtylimit_state_lock();
498 if (!dirtylimit_in_service()) {
499 dirtylimit_init();
502 if (has_cpu_index) {
503 dirtylimit_set_vcpu(cpu_index, dirty_rate, true);
504 } else {
505 dirtylimit_set_all(dirty_rate, true);
508 dirtylimit_state_unlock();
511 void hmp_set_vcpu_dirty_limit(Monitor *mon, const QDict *qdict)
513 int64_t dirty_rate = qdict_get_int(qdict, "dirty_rate");
514 int64_t cpu_index = qdict_get_try_int(qdict, "cpu_index", -1);
515 Error *err = NULL;
517 qmp_set_vcpu_dirty_limit(!!(cpu_index != -1), cpu_index, dirty_rate, &err);
518 if (err) {
519 hmp_handle_error(mon, err);
520 return;
523 monitor_printf(mon, "[Please use 'info vcpu_dirty_limit' to query "
524 "dirty limit for virtual CPU]\n");
527 static struct DirtyLimitInfo *dirtylimit_query_vcpu(int cpu_index)
529 DirtyLimitInfo *info = NULL;
531 info = g_malloc0(sizeof(*info));
532 info->cpu_index = cpu_index;
533 info->limit_rate = dirtylimit_vcpu_get_state(cpu_index)->quota;
534 info->current_rate = vcpu_dirty_rate_get(cpu_index);
536 return info;
539 static struct DirtyLimitInfoList *dirtylimit_query_all(void)
541 int i, index;
542 DirtyLimitInfo *info = NULL;
543 DirtyLimitInfoList *head = NULL, **tail = &head;
545 dirtylimit_state_lock();
547 if (!dirtylimit_in_service()) {
548 dirtylimit_state_unlock();
549 return NULL;
552 for (i = 0; i < dirtylimit_state->max_cpus; i++) {
553 index = dirtylimit_state->states[i].cpu_index;
554 if (dirtylimit_vcpu_get_state(index)->enabled) {
555 info = dirtylimit_query_vcpu(index);
556 QAPI_LIST_APPEND(tail, info);
560 dirtylimit_state_unlock();
562 return head;
565 struct DirtyLimitInfoList *qmp_query_vcpu_dirty_limit(Error **errp)
567 if (!dirtylimit_in_service()) {
568 return NULL;
571 return dirtylimit_query_all();
574 void hmp_info_vcpu_dirty_limit(Monitor *mon, const QDict *qdict)
576 DirtyLimitInfoList *limit, *head, *info = NULL;
577 Error *err = NULL;
579 if (!dirtylimit_in_service()) {
580 monitor_printf(mon, "Dirty page limit not enabled!\n");
581 return;
584 info = qmp_query_vcpu_dirty_limit(&err);
585 if (err) {
586 hmp_handle_error(mon, err);
587 return;
590 head = info;
591 for (limit = head; limit != NULL; limit = limit->next) {
592 monitor_printf(mon, "vcpu[%"PRIi64"], limit rate %"PRIi64 " (MB/s),"
593 " current rate %"PRIi64 " (MB/s)\n",
594 limit->value->cpu_index,
595 limit->value->limit_rate,
596 limit->value->current_rate);
599 g_free(info);