4 * Copyright (c) 2003-2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 #include "qemu/osdep.h"
26 #include "qemu/cutils.h"
27 #include "migration/vmstate.h"
28 #include "qapi/error.h"
29 #include "qemu/error-report.h"
30 #include "sysemu/cpus.h"
31 #include "sysemu/qtest.h"
32 #include "qemu/main-loop.h"
33 #include "qemu/option.h"
34 #include "qemu/seqlock.h"
35 #include "sysemu/replay.h"
36 #include "sysemu/runstate.h"
37 #include "hw/core/cpu.h"
38 #include "sysemu/cpu-timers.h"
39 #include "sysemu/cpu-throttle.h"
40 #include "sysemu/cpu-timers-internal.h"
43 * ICOUNT: Instruction Counter
45 * this module is split off from cpu-timers because the icount part
46 * is TCG-specific, and does not need to be built for other accels.
48 static bool icount_sleep
= true;
49 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
50 #define MAX_ICOUNT_SHIFT 10
53 * 0 = Do not count executed instructions.
54 * 1 = Fixed conversion of insn to ns via "shift" option
55 * 2 = Runtime adaptive algorithm to compute shift
59 static void icount_enable_precise(void)
64 static void icount_enable_adaptive(void)
70 * The current number of executed instructions is based on what we
71 * originally budgeted minus the current state of the decrementing
72 * icount counters in extra/u16.low.
74 static int64_t icount_get_executed(CPUState
*cpu
)
76 return (cpu
->icount_budget
-
77 (cpu
->neg
.icount_decr
.u16
.low
+ cpu
->icount_extra
));
81 * Update the global shared timer_state.qemu_icount to take into
82 * account executed instructions. This is done by the TCG vCPU
83 * thread so the main-loop can see time has moved forward.
85 static void icount_update_locked(CPUState
*cpu
)
87 int64_t executed
= icount_get_executed(cpu
);
88 cpu
->icount_budget
-= executed
;
90 qatomic_set_i64(&timers_state
.qemu_icount
,
91 timers_state
.qemu_icount
+ executed
);
95 * Update the global shared timer_state.qemu_icount to take into
96 * account executed instructions. This is done by the TCG vCPU
97 * thread so the main-loop can see time has moved forward.
99 void icount_update(CPUState
*cpu
)
101 seqlock_write_lock(&timers_state
.vm_clock_seqlock
,
102 &timers_state
.vm_clock_lock
);
103 icount_update_locked(cpu
);
104 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
,
105 &timers_state
.vm_clock_lock
);
108 static int64_t icount_get_raw_locked(void)
110 CPUState
*cpu
= current_cpu
;
112 if (cpu
&& cpu
->running
) {
113 if (!cpu
->neg
.can_do_io
) {
114 error_report("Bad icount read");
117 /* Take into account what has run */
118 icount_update_locked(cpu
);
120 /* The read is protected by the seqlock, but needs atomic64 to avoid UB */
121 return qatomic_read_i64(&timers_state
.qemu_icount
);
124 static int64_t icount_get_locked(void)
126 int64_t icount
= icount_get_raw_locked();
127 return qatomic_read_i64(&timers_state
.qemu_icount_bias
) +
128 icount_to_ns(icount
);
131 int64_t icount_get_raw(void)
137 start
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
138 icount
= icount_get_raw_locked();
139 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, start
));
144 /* Return the virtual CPU time, based on the instruction counter. */
145 int64_t icount_get(void)
151 start
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
152 icount
= icount_get_locked();
153 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, start
));
158 int64_t icount_to_ns(int64_t icount
)
160 return icount
<< qatomic_read(&timers_state
.icount_time_shift
);
164 * Correlation between real and virtual time is always going to be
165 * fairly approximate, so ignore small variation.
166 * When the guest is idle real and virtual time will be aligned in
169 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
171 static void icount_adjust(void)
177 /* If the VM is not running, then do nothing. */
178 if (!runstate_is_running()) {
182 seqlock_write_lock(&timers_state
.vm_clock_seqlock
,
183 &timers_state
.vm_clock_lock
);
184 cur_time
= REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT
,
185 cpu_get_clock_locked());
186 cur_icount
= icount_get_locked();
188 delta
= cur_icount
- cur_time
;
189 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
191 && timers_state
.last_delta
+ ICOUNT_WOBBLE
< delta
* 2
192 && timers_state
.icount_time_shift
> 0) {
193 /* The guest is getting too far ahead. Slow time down. */
194 qatomic_set(&timers_state
.icount_time_shift
,
195 timers_state
.icount_time_shift
- 1);
198 && timers_state
.last_delta
- ICOUNT_WOBBLE
> delta
* 2
199 && timers_state
.icount_time_shift
< MAX_ICOUNT_SHIFT
) {
200 /* The guest is getting too far behind. Speed time up. */
201 qatomic_set(&timers_state
.icount_time_shift
,
202 timers_state
.icount_time_shift
+ 1);
204 timers_state
.last_delta
= delta
;
205 qatomic_set_i64(&timers_state
.qemu_icount_bias
,
206 cur_icount
- (timers_state
.qemu_icount
207 << timers_state
.icount_time_shift
));
208 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
,
209 &timers_state
.vm_clock_lock
);
212 static void icount_adjust_rt(void *opaque
)
214 timer_mod(timers_state
.icount_rt_timer
,
215 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT
) + 1000);
219 static void icount_adjust_vm(void *opaque
)
221 timer_mod(timers_state
.icount_vm_timer
,
222 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
223 NANOSECONDS_PER_SECOND
/ 10);
227 int64_t icount_round(int64_t count
)
229 int shift
= qatomic_read(&timers_state
.icount_time_shift
);
230 return (count
+ (1 << shift
) - 1) >> shift
;
233 static void icount_warp_rt(void)
239 * The icount_warp_timer is rescheduled soon after vm_clock_warp_start
240 * changes from -1 to another value, so the race here is okay.
243 seq
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
244 warp_start
= timers_state
.vm_clock_warp_start
;
245 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, seq
));
247 if (warp_start
== -1) {
251 seqlock_write_lock(&timers_state
.vm_clock_seqlock
,
252 &timers_state
.vm_clock_lock
);
253 if (runstate_is_running()) {
254 int64_t clock
= REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT
,
255 cpu_get_clock_locked());
258 warp_delta
= clock
- timers_state
.vm_clock_warp_start
;
259 if (icount_enabled() == 2) {
261 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too far
262 * ahead of real time (it might already be ahead so careful not
265 int64_t cur_icount
= icount_get_locked();
266 int64_t delta
= clock
- cur_icount
;
271 warp_delta
= MIN(warp_delta
, delta
);
273 qatomic_set_i64(&timers_state
.qemu_icount_bias
,
274 timers_state
.qemu_icount_bias
+ warp_delta
);
276 timers_state
.vm_clock_warp_start
= -1;
277 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
,
278 &timers_state
.vm_clock_lock
);
280 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL
)) {
281 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
285 static void icount_timer_cb(void *opaque
)
288 * No need for a checkpoint because the timer already synchronizes
289 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
294 void icount_start_warp_timer(void)
299 assert(icount_enabled());
302 * Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
303 * do not fire, so computing the deadline does not make sense.
305 if (!runstate_is_running()) {
309 if (replay_mode
!= REPLAY_MODE_PLAY
) {
310 if (!all_cpu_threads_idle()) {
314 if (qtest_enabled()) {
315 /* When testing, qtest commands advance icount. */
319 replay_checkpoint(CHECKPOINT_CLOCK_WARP_START
);
321 /* warp clock deterministically in record/replay mode */
322 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START
)) {
324 * vCPU is sleeping and warp can't be started.
325 * It is probably a race condition: notification sent
326 * to vCPU was processed in advance and vCPU went to sleep.
327 * Therefore we have to wake it up for doing something.
329 if (replay_has_event()) {
330 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
336 /* We want to use the earliest deadline from ALL vm_clocks */
337 clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT
);
338 deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
,
339 ~QEMU_TIMER_ATTR_EXTERNAL
);
341 static bool notified
;
342 if (!icount_sleep
&& !notified
) {
343 warn_report("icount sleep disabled and no active timers");
351 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
352 * sleep. Otherwise, the CPU might be waiting for a future timer
353 * interrupt to wake it up, but the interrupt never comes because
354 * the vCPU isn't running any insns and thus doesn't advance the
355 * QEMU_CLOCK_VIRTUAL.
359 * We never let VCPUs sleep in no sleep icount mode.
360 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
361 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
362 * It is useful when we want a deterministic execution time,
363 * isolated from host latencies.
365 seqlock_write_lock(&timers_state
.vm_clock_seqlock
,
366 &timers_state
.vm_clock_lock
);
367 qatomic_set_i64(&timers_state
.qemu_icount_bias
,
368 timers_state
.qemu_icount_bias
+ deadline
);
369 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
,
370 &timers_state
.vm_clock_lock
);
371 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
374 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
375 * "real" time, (related to the time left until the next event) has
376 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
377 * This avoids that the warps are visible externally; for example,
378 * you will not be sending network packets continuously instead of
381 seqlock_write_lock(&timers_state
.vm_clock_seqlock
,
382 &timers_state
.vm_clock_lock
);
383 if (timers_state
.vm_clock_warp_start
== -1
384 || timers_state
.vm_clock_warp_start
> clock
) {
385 timers_state
.vm_clock_warp_start
= clock
;
387 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
,
388 &timers_state
.vm_clock_lock
);
389 timer_mod_anticipate(timers_state
.icount_warp_timer
,
392 } else if (deadline
== 0) {
393 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
397 void icount_account_warp_timer(void)
404 * Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
405 * do not fire, so computing the deadline does not make sense.
407 if (!runstate_is_running()) {
411 replay_async_events();
413 /* warp clock deterministically in record/replay mode */
414 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT
)) {
418 timer_del(timers_state
.icount_warp_timer
);
422 void icount_configure(QemuOpts
*opts
, Error
**errp
)
424 const char *option
= qemu_opt_get(opts
, "shift");
425 bool sleep
= qemu_opt_get_bool(opts
, "sleep", true);
426 bool align
= qemu_opt_get_bool(opts
, "align", false);
427 long time_shift
= -1;
430 if (qemu_opt_get(opts
, "align") != NULL
) {
431 error_setg(errp
, "Please specify shift option when using align");
436 if (align
&& !sleep
) {
437 error_setg(errp
, "align=on and sleep=off are incompatible");
441 if (strcmp(option
, "auto") != 0) {
442 if (qemu_strtol(option
, NULL
, 0, &time_shift
) < 0
443 || time_shift
< 0 || time_shift
> MAX_ICOUNT_SHIFT
) {
444 error_setg(errp
, "icount: Invalid shift value");
447 } else if (icount_align_option
) {
448 error_setg(errp
, "shift=auto and align=on are incompatible");
450 } else if (!icount_sleep
) {
451 error_setg(errp
, "shift=auto and sleep=off are incompatible");
455 icount_sleep
= sleep
;
457 timers_state
.icount_warp_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL_RT
,
458 icount_timer_cb
, NULL
);
461 icount_align_option
= align
;
463 if (time_shift
>= 0) {
464 timers_state
.icount_time_shift
= time_shift
;
465 icount_enable_precise();
469 icount_enable_adaptive();
472 * 125MIPS seems a reasonable initial guess at the guest speed.
473 * It will be corrected fairly quickly anyway.
475 timers_state
.icount_time_shift
= 3;
478 * Have both realtime and virtual time triggers for speed adjustment.
479 * The realtime trigger catches emulated time passing too slowly,
480 * the virtual time trigger catches emulated time passing too fast.
481 * Realtime triggers occur even when idle, so use them less frequently
484 timers_state
.vm_clock_warp_start
= -1;
485 timers_state
.icount_rt_timer
= timer_new_ms(QEMU_CLOCK_VIRTUAL_RT
,
486 icount_adjust_rt
, NULL
);
487 timer_mod(timers_state
.icount_rt_timer
,
488 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT
) + 1000);
489 timers_state
.icount_vm_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL
,
490 icount_adjust_vm
, NULL
);
491 timer_mod(timers_state
.icount_vm_timer
,
492 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
493 NANOSECONDS_PER_SECOND
/ 10);
496 void icount_notify_exit(void)
498 if (icount_enabled() && current_cpu
) {
499 qemu_cpu_kick(current_cpu
);
500 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);