system/qdev-monitor: move drain_call_rcu call under if (!dev) in qmp_device_add()
[qemu/ar7.git] / system / dirtylimit.c
blobb5607eb8c272a4b5456f3f903c6396dd307381f6
1 /*
2 * Dirty page rate limit implementation code
4 * Copyright (c) 2022 CHINA TELECOM CO.,LTD.
6 * Authors:
7 * Hyman Huang(黄勇) <huangy81@chinatelecom.cn>
9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
10 * See the COPYING file in the top-level directory.
13 #include "qemu/osdep.h"
14 #include "qemu/main-loop.h"
15 #include "qapi/qapi-commands-migration.h"
16 #include "qapi/qmp/qdict.h"
17 #include "qapi/error.h"
18 #include "sysemu/dirtyrate.h"
19 #include "sysemu/dirtylimit.h"
20 #include "monitor/hmp.h"
21 #include "monitor/monitor.h"
22 #include "exec/memory.h"
23 #include "exec/target_page.h"
24 #include "hw/boards.h"
25 #include "sysemu/kvm.h"
26 #include "trace.h"
27 #include "migration/misc.h"
28 #include "migration/migration.h"
29 #include "migration/options.h"
32 * Dirtylimit stop working if dirty page rate error
33 * value less than DIRTYLIMIT_TOLERANCE_RANGE
35 #define DIRTYLIMIT_TOLERANCE_RANGE 25 /* MB/s */
37 * Plus or minus vcpu sleep time linearly if dirty
38 * page rate error value percentage over
39 * DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT.
40 * Otherwise, plus or minus a fixed vcpu sleep time.
42 #define DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT 50
44 * Max vcpu sleep time percentage during a cycle
45 * composed of dirty ring full and sleep time.
47 #define DIRTYLIMIT_THROTTLE_PCT_MAX 99
49 struct {
50 VcpuStat stat;
51 bool running;
52 QemuThread thread;
53 } *vcpu_dirty_rate_stat;
55 typedef struct VcpuDirtyLimitState {
56 int cpu_index;
57 bool enabled;
59 * Quota dirty page rate, unit is MB/s
60 * zero if not enabled.
62 uint64_t quota;
63 } VcpuDirtyLimitState;
65 struct {
66 VcpuDirtyLimitState *states;
67 /* Max cpus number configured by user */
68 int max_cpus;
69 /* Number of vcpu under dirtylimit */
70 int limited_nvcpu;
71 } *dirtylimit_state;
73 /* protect dirtylimit_state */
74 static QemuMutex dirtylimit_mutex;
76 /* dirtylimit thread quit if dirtylimit_quit is true */
77 static bool dirtylimit_quit;
79 static void vcpu_dirty_rate_stat_collect(void)
81 MigrationState *s = migrate_get_current();
82 VcpuStat stat;
83 int i = 0;
84 int64_t period = DIRTYLIMIT_CALC_TIME_MS;
86 if (migrate_dirty_limit() &&
87 migration_is_active(s)) {
88 period = s->parameters.x_vcpu_dirty_limit_period;
91 /* calculate vcpu dirtyrate */
92 vcpu_calculate_dirtyrate(period,
93 &stat,
94 GLOBAL_DIRTY_LIMIT,
95 false);
97 for (i = 0; i < stat.nvcpu; i++) {
98 vcpu_dirty_rate_stat->stat.rates[i].id = i;
99 vcpu_dirty_rate_stat->stat.rates[i].dirty_rate =
100 stat.rates[i].dirty_rate;
103 g_free(stat.rates);
106 static void *vcpu_dirty_rate_stat_thread(void *opaque)
108 rcu_register_thread();
110 /* start log sync */
111 global_dirty_log_change(GLOBAL_DIRTY_LIMIT, true);
113 while (qatomic_read(&vcpu_dirty_rate_stat->running)) {
114 vcpu_dirty_rate_stat_collect();
115 if (dirtylimit_in_service()) {
116 dirtylimit_process();
120 /* stop log sync */
121 global_dirty_log_change(GLOBAL_DIRTY_LIMIT, false);
123 rcu_unregister_thread();
124 return NULL;
127 int64_t vcpu_dirty_rate_get(int cpu_index)
129 DirtyRateVcpu *rates = vcpu_dirty_rate_stat->stat.rates;
130 return qatomic_read_i64(&rates[cpu_index].dirty_rate);
133 void vcpu_dirty_rate_stat_start(void)
135 if (qatomic_read(&vcpu_dirty_rate_stat->running)) {
136 return;
139 qatomic_set(&vcpu_dirty_rate_stat->running, 1);
140 qemu_thread_create(&vcpu_dirty_rate_stat->thread,
141 "dirtyrate-stat",
142 vcpu_dirty_rate_stat_thread,
143 NULL,
144 QEMU_THREAD_JOINABLE);
147 void vcpu_dirty_rate_stat_stop(void)
149 qatomic_set(&vcpu_dirty_rate_stat->running, 0);
150 dirtylimit_state_unlock();
151 bql_unlock();
152 qemu_thread_join(&vcpu_dirty_rate_stat->thread);
153 bql_lock();
154 dirtylimit_state_lock();
157 void vcpu_dirty_rate_stat_initialize(void)
159 MachineState *ms = MACHINE(qdev_get_machine());
160 int max_cpus = ms->smp.max_cpus;
162 vcpu_dirty_rate_stat =
163 g_malloc0(sizeof(*vcpu_dirty_rate_stat));
165 vcpu_dirty_rate_stat->stat.nvcpu = max_cpus;
166 vcpu_dirty_rate_stat->stat.rates =
167 g_new0(DirtyRateVcpu, max_cpus);
169 vcpu_dirty_rate_stat->running = false;
172 void vcpu_dirty_rate_stat_finalize(void)
174 g_free(vcpu_dirty_rate_stat->stat.rates);
175 vcpu_dirty_rate_stat->stat.rates = NULL;
177 g_free(vcpu_dirty_rate_stat);
178 vcpu_dirty_rate_stat = NULL;
181 void dirtylimit_state_lock(void)
183 qemu_mutex_lock(&dirtylimit_mutex);
186 void dirtylimit_state_unlock(void)
188 qemu_mutex_unlock(&dirtylimit_mutex);
191 static void
192 __attribute__((__constructor__)) dirtylimit_mutex_init(void)
194 qemu_mutex_init(&dirtylimit_mutex);
197 static inline VcpuDirtyLimitState *dirtylimit_vcpu_get_state(int cpu_index)
199 return &dirtylimit_state->states[cpu_index];
202 void dirtylimit_state_initialize(void)
204 MachineState *ms = MACHINE(qdev_get_machine());
205 int max_cpus = ms->smp.max_cpus;
206 int i;
208 dirtylimit_state = g_malloc0(sizeof(*dirtylimit_state));
210 dirtylimit_state->states =
211 g_new0(VcpuDirtyLimitState, max_cpus);
213 for (i = 0; i < max_cpus; i++) {
214 dirtylimit_state->states[i].cpu_index = i;
217 dirtylimit_state->max_cpus = max_cpus;
218 trace_dirtylimit_state_initialize(max_cpus);
221 void dirtylimit_state_finalize(void)
223 g_free(dirtylimit_state->states);
224 dirtylimit_state->states = NULL;
226 g_free(dirtylimit_state);
227 dirtylimit_state = NULL;
229 trace_dirtylimit_state_finalize();
232 bool dirtylimit_in_service(void)
234 return !!dirtylimit_state;
237 bool dirtylimit_vcpu_index_valid(int cpu_index)
239 MachineState *ms = MACHINE(qdev_get_machine());
241 return !(cpu_index < 0 ||
242 cpu_index >= ms->smp.max_cpus);
245 static uint64_t dirtylimit_dirty_ring_full_time(uint64_t dirtyrate)
247 static uint64_t max_dirtyrate;
248 uint64_t dirty_ring_size_MiB;
250 dirty_ring_size_MiB = qemu_target_pages_to_MiB(kvm_dirty_ring_size());
252 if (max_dirtyrate < dirtyrate) {
253 max_dirtyrate = dirtyrate;
256 return dirty_ring_size_MiB * 1000000 / max_dirtyrate;
259 static inline bool dirtylimit_done(uint64_t quota,
260 uint64_t current)
262 uint64_t min, max;
264 min = MIN(quota, current);
265 max = MAX(quota, current);
267 return ((max - min) <= DIRTYLIMIT_TOLERANCE_RANGE) ? true : false;
270 static inline bool
271 dirtylimit_need_linear_adjustment(uint64_t quota,
272 uint64_t current)
274 uint64_t min, max;
276 min = MIN(quota, current);
277 max = MAX(quota, current);
279 return ((max - min) * 100 / max) > DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT;
282 static void dirtylimit_set_throttle(CPUState *cpu,
283 uint64_t quota,
284 uint64_t current)
286 int64_t ring_full_time_us = 0;
287 uint64_t sleep_pct = 0;
288 uint64_t throttle_us = 0;
290 if (current == 0) {
291 cpu->throttle_us_per_full = 0;
292 return;
295 ring_full_time_us = dirtylimit_dirty_ring_full_time(current);
297 if (dirtylimit_need_linear_adjustment(quota, current)) {
298 if (quota < current) {
299 sleep_pct = (current - quota) * 100 / current;
300 throttle_us =
301 ring_full_time_us * sleep_pct / (double)(100 - sleep_pct);
302 cpu->throttle_us_per_full += throttle_us;
303 } else {
304 sleep_pct = (quota - current) * 100 / quota;
305 throttle_us =
306 ring_full_time_us * sleep_pct / (double)(100 - sleep_pct);
307 cpu->throttle_us_per_full -= throttle_us;
310 trace_dirtylimit_throttle_pct(cpu->cpu_index,
311 sleep_pct,
312 throttle_us);
313 } else {
314 if (quota < current) {
315 cpu->throttle_us_per_full += ring_full_time_us / 10;
316 } else {
317 cpu->throttle_us_per_full -= ring_full_time_us / 10;
322 * TODO: in the big kvm_dirty_ring_size case (eg: 65536, or other scenario),
323 * current dirty page rate may never reach the quota, we should stop
324 * increasing sleep time?
326 cpu->throttle_us_per_full = MIN(cpu->throttle_us_per_full,
327 ring_full_time_us * DIRTYLIMIT_THROTTLE_PCT_MAX);
329 cpu->throttle_us_per_full = MAX(cpu->throttle_us_per_full, 0);
332 static void dirtylimit_adjust_throttle(CPUState *cpu)
334 uint64_t quota = 0;
335 uint64_t current = 0;
336 int cpu_index = cpu->cpu_index;
338 quota = dirtylimit_vcpu_get_state(cpu_index)->quota;
339 current = vcpu_dirty_rate_get(cpu_index);
341 if (!dirtylimit_done(quota, current)) {
342 dirtylimit_set_throttle(cpu, quota, current);
345 return;
348 void dirtylimit_process(void)
350 CPUState *cpu;
352 if (!qatomic_read(&dirtylimit_quit)) {
353 dirtylimit_state_lock();
355 if (!dirtylimit_in_service()) {
356 dirtylimit_state_unlock();
357 return;
360 CPU_FOREACH(cpu) {
361 if (!dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled) {
362 continue;
364 dirtylimit_adjust_throttle(cpu);
366 dirtylimit_state_unlock();
370 void dirtylimit_change(bool start)
372 if (start) {
373 qatomic_set(&dirtylimit_quit, 0);
374 } else {
375 qatomic_set(&dirtylimit_quit, 1);
379 void dirtylimit_set_vcpu(int cpu_index,
380 uint64_t quota,
381 bool enable)
383 trace_dirtylimit_set_vcpu(cpu_index, quota);
385 if (enable) {
386 dirtylimit_state->states[cpu_index].quota = quota;
387 if (!dirtylimit_vcpu_get_state(cpu_index)->enabled) {
388 dirtylimit_state->limited_nvcpu++;
390 } else {
391 dirtylimit_state->states[cpu_index].quota = 0;
392 if (dirtylimit_state->states[cpu_index].enabled) {
393 dirtylimit_state->limited_nvcpu--;
397 dirtylimit_state->states[cpu_index].enabled = enable;
400 void dirtylimit_set_all(uint64_t quota,
401 bool enable)
403 MachineState *ms = MACHINE(qdev_get_machine());
404 int max_cpus = ms->smp.max_cpus;
405 int i;
407 for (i = 0; i < max_cpus; i++) {
408 dirtylimit_set_vcpu(i, quota, enable);
412 void dirtylimit_vcpu_execute(CPUState *cpu)
414 if (cpu->throttle_us_per_full) {
415 dirtylimit_state_lock();
417 if (dirtylimit_in_service() &&
418 dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled) {
419 dirtylimit_state_unlock();
420 trace_dirtylimit_vcpu_execute(cpu->cpu_index,
421 cpu->throttle_us_per_full);
423 g_usleep(cpu->throttle_us_per_full);
424 return;
427 dirtylimit_state_unlock();
431 static void dirtylimit_init(void)
433 dirtylimit_state_initialize();
434 dirtylimit_change(true);
435 vcpu_dirty_rate_stat_initialize();
436 vcpu_dirty_rate_stat_start();
439 static void dirtylimit_cleanup(void)
441 vcpu_dirty_rate_stat_stop();
442 vcpu_dirty_rate_stat_finalize();
443 dirtylimit_change(false);
444 dirtylimit_state_finalize();
448 * dirty page rate limit is not allowed to set if migration
449 * is running with dirty-limit capability enabled.
451 static bool dirtylimit_is_allowed(void)
453 MigrationState *ms = migrate_get_current();
455 if (migration_is_running(ms->state) &&
456 (!qemu_thread_is_self(&ms->thread)) &&
457 migrate_dirty_limit() &&
458 dirtylimit_in_service()) {
459 return false;
461 return true;
464 void qmp_cancel_vcpu_dirty_limit(bool has_cpu_index,
465 int64_t cpu_index,
466 Error **errp)
468 if (!kvm_enabled() || !kvm_dirty_ring_enabled()) {
469 return;
472 if (has_cpu_index && !dirtylimit_vcpu_index_valid(cpu_index)) {
473 error_setg(errp, "incorrect cpu index specified");
474 return;
477 if (!dirtylimit_is_allowed()) {
478 error_setg(errp, "can't cancel dirty page rate limit while"
479 " migration is running");
480 return;
483 if (!dirtylimit_in_service()) {
484 return;
487 dirtylimit_state_lock();
489 if (has_cpu_index) {
490 dirtylimit_set_vcpu(cpu_index, 0, false);
491 } else {
492 dirtylimit_set_all(0, false);
495 if (!dirtylimit_state->limited_nvcpu) {
496 dirtylimit_cleanup();
499 dirtylimit_state_unlock();
502 void hmp_cancel_vcpu_dirty_limit(Monitor *mon, const QDict *qdict)
504 int64_t cpu_index = qdict_get_try_int(qdict, "cpu_index", -1);
505 Error *err = NULL;
507 qmp_cancel_vcpu_dirty_limit(!!(cpu_index != -1), cpu_index, &err);
508 if (err) {
509 hmp_handle_error(mon, err);
510 return;
513 monitor_printf(mon, "[Please use 'info vcpu_dirty_limit' to query "
514 "dirty limit for virtual CPU]\n");
517 void qmp_set_vcpu_dirty_limit(bool has_cpu_index,
518 int64_t cpu_index,
519 uint64_t dirty_rate,
520 Error **errp)
522 if (!kvm_enabled() || !kvm_dirty_ring_enabled()) {
523 error_setg(errp, "dirty page limit feature requires KVM with"
524 " accelerator property 'dirty-ring-size' set'");
525 return;
528 if (has_cpu_index && !dirtylimit_vcpu_index_valid(cpu_index)) {
529 error_setg(errp, "incorrect cpu index specified");
530 return;
533 if (!dirtylimit_is_allowed()) {
534 error_setg(errp, "can't set dirty page rate limit while"
535 " migration is running");
536 return;
539 if (!dirty_rate) {
540 qmp_cancel_vcpu_dirty_limit(has_cpu_index, cpu_index, errp);
541 return;
544 dirtylimit_state_lock();
546 if (!dirtylimit_in_service()) {
547 dirtylimit_init();
550 if (has_cpu_index) {
551 dirtylimit_set_vcpu(cpu_index, dirty_rate, true);
552 } else {
553 dirtylimit_set_all(dirty_rate, true);
556 dirtylimit_state_unlock();
559 void hmp_set_vcpu_dirty_limit(Monitor *mon, const QDict *qdict)
561 int64_t dirty_rate = qdict_get_int(qdict, "dirty_rate");
562 int64_t cpu_index = qdict_get_try_int(qdict, "cpu_index", -1);
563 Error *err = NULL;
565 if (dirty_rate < 0) {
566 error_setg(&err, "invalid dirty page limit %" PRId64, dirty_rate);
567 goto out;
570 qmp_set_vcpu_dirty_limit(!!(cpu_index != -1), cpu_index, dirty_rate, &err);
572 out:
573 hmp_handle_error(mon, err);
576 /* Return the max throttle time of each virtual CPU */
577 uint64_t dirtylimit_throttle_time_per_round(void)
579 CPUState *cpu;
580 int64_t max = 0;
582 CPU_FOREACH(cpu) {
583 if (cpu->throttle_us_per_full > max) {
584 max = cpu->throttle_us_per_full;
588 return max;
592 * Estimate average dirty ring full time of each virtaul CPU.
593 * Return 0 if guest doesn't dirty memory.
595 uint64_t dirtylimit_ring_full_time(void)
597 CPUState *cpu;
598 uint64_t curr_rate = 0;
599 int nvcpus = 0;
601 CPU_FOREACH(cpu) {
602 if (cpu->running) {
603 nvcpus++;
604 curr_rate += vcpu_dirty_rate_get(cpu->cpu_index);
608 if (!curr_rate || !nvcpus) {
609 return 0;
612 return dirtylimit_dirty_ring_full_time(curr_rate / nvcpus);
615 static struct DirtyLimitInfo *dirtylimit_query_vcpu(int cpu_index)
617 DirtyLimitInfo *info = NULL;
619 info = g_malloc0(sizeof(*info));
620 info->cpu_index = cpu_index;
621 info->limit_rate = dirtylimit_vcpu_get_state(cpu_index)->quota;
622 info->current_rate = vcpu_dirty_rate_get(cpu_index);
624 return info;
627 static struct DirtyLimitInfoList *dirtylimit_query_all(void)
629 int i, index;
630 DirtyLimitInfo *info = NULL;
631 DirtyLimitInfoList *head = NULL, **tail = &head;
633 dirtylimit_state_lock();
635 if (!dirtylimit_in_service()) {
636 dirtylimit_state_unlock();
637 return NULL;
640 for (i = 0; i < dirtylimit_state->max_cpus; i++) {
641 index = dirtylimit_state->states[i].cpu_index;
642 if (dirtylimit_vcpu_get_state(index)->enabled) {
643 info = dirtylimit_query_vcpu(index);
644 QAPI_LIST_APPEND(tail, info);
648 dirtylimit_state_unlock();
650 return head;
653 struct DirtyLimitInfoList *qmp_query_vcpu_dirty_limit(Error **errp)
655 return dirtylimit_query_all();
658 void hmp_info_vcpu_dirty_limit(Monitor *mon, const QDict *qdict)
660 DirtyLimitInfoList *info;
661 g_autoptr(DirtyLimitInfoList) head = NULL;
662 Error *err = NULL;
664 if (!dirtylimit_in_service()) {
665 monitor_printf(mon, "Dirty page limit not enabled!\n");
666 return;
669 head = qmp_query_vcpu_dirty_limit(&err);
670 if (err) {
671 hmp_handle_error(mon, err);
672 return;
675 for (info = head; info != NULL; info = info->next) {
676 monitor_printf(mon, "vcpu[%"PRIi64"], limit rate %"PRIi64 " (MB/s),"
677 " current rate %"PRIi64 " (MB/s)\n",
678 info->value->cpu_index,
679 info->value->limit_rate,
680 info->value->current_rate);