3 #include "config-host.h"
6 #define KVM_ALLOWED_DEFAULT 1
8 #define KVM_ALLOWED_DEFAULT 0
11 int kvm_allowed
= KVM_ALLOWED_DEFAULT
;
23 #include <sys/utsname.h>
25 extern void perror(const char *s
);
27 kvm_context_t kvm_context
;
31 pthread_mutex_t qemu_mutex
= PTHREAD_MUTEX_INITIALIZER
;
32 pthread_cond_t qemu_aio_cond
= PTHREAD_COND_INITIALIZER
;
33 __thread CPUState
*vcpu_env
;
35 static sigset_t io_sigset
, io_negsigset
;
39 #define SIG_IPI (SIGRTMIN+4)
50 static void sig_ipi_handler(int n
)
54 void kvm_update_interrupt_request(CPUState
*env
)
56 if (env
&& env
!= vcpu_env
) {
57 if (vcpu_info
[env
->cpu_index
].signalled
)
59 vcpu_info
[env
->cpu_index
].signalled
= 1;
60 if (vcpu_info
[env
->cpu_index
].thread
)
61 pthread_kill(vcpu_info
[env
->cpu_index
].thread
, SIG_IPI
);
65 void kvm_update_after_sipi(CPUState
*env
)
67 vcpu_info
[env
->cpu_index
].sipi_needed
= 1;
68 kvm_update_interrupt_request(env
);
71 * the qemu bios waits using a busy loop that's much too short for
72 * kvm. add a wait after the first sipi.
75 static int first_sipi
= 1;
84 void kvm_apic_init(CPUState
*env
)
86 if (env
->cpu_index
!= 0)
87 vcpu_info
[env
->cpu_index
].init
= 1;
88 kvm_update_interrupt_request(env
);
93 static int try_push_interrupts(void *opaque
)
95 return kvm_arch_try_push_interrupts(opaque
);
98 static void post_kvm_run(void *opaque
, int vcpu
)
101 pthread_mutex_lock(&qemu_mutex
);
102 kvm_arch_post_kvm_run(opaque
, vcpu
);
105 static int pre_kvm_run(void *opaque
, int vcpu
)
107 CPUState
*env
= cpu_single_env
;
109 if (env
->cpu_index
== 0 && wait_hack
) {
114 pthread_mutex_unlock(&qemu_mutex
);
115 for (i
= 0; i
< 10; ++i
)
117 pthread_mutex_lock(&qemu_mutex
);
120 kvm_arch_pre_kvm_run(opaque
, vcpu
);
122 if (env
->interrupt_request
& CPU_INTERRUPT_EXIT
)
124 pthread_mutex_unlock(&qemu_mutex
);
128 void kvm_load_registers(CPUState
*env
)
131 kvm_arch_load_regs(env
);
134 void kvm_save_registers(CPUState
*env
)
137 kvm_arch_save_regs(env
);
140 int kvm_cpu_exec(CPUState
*env
)
144 r
= kvm_run(kvm_context
, env
->cpu_index
);
146 printf("kvm_run returned %d\n", r
);
153 extern int vm_running
;
155 static int has_work(CPUState
*env
)
159 if (!(env
->hflags
& HF_HALTED_MASK
))
161 return kvm_arch_has_work(env
);
164 static int kvm_eat_signal(CPUState
*env
, int timeout
)
171 ts
.tv_sec
= timeout
/ 1000;
172 ts
.tv_nsec
= (timeout
% 1000) * 1000000;
173 r
= sigtimedwait(&io_sigset
, &siginfo
, &ts
);
174 if (r
== -1 && (errno
== EAGAIN
|| errno
== EINTR
) && !timeout
)
177 pthread_mutex_lock(&qemu_mutex
);
178 cpu_single_env
= vcpu_env
;
179 if (r
== -1 && !(errno
== EAGAIN
|| errno
== EINTR
)) {
180 printf("sigtimedwait: %s\n", strerror(e
));
184 sigaction(siginfo
.si_signo
, NULL
, &sa
);
185 sa
.sa_handler(siginfo
.si_signo
);
186 if (siginfo
.si_signo
== SIGUSR2
)
187 pthread_cond_signal(&qemu_aio_cond
);
190 pthread_mutex_unlock(&qemu_mutex
);
196 static void kvm_eat_signals(CPUState
*env
, int timeout
)
200 while (kvm_eat_signal(env
, 0))
203 r
= kvm_eat_signal(env
, timeout
);
205 while (kvm_eat_signal(env
, 0))
209 * we call select() even if no signal was received, to account for
210 * for which there is no signal handler installed.
212 pthread_mutex_lock(&qemu_mutex
);
213 cpu_single_env
= vcpu_env
;
215 pthread_mutex_unlock(&qemu_mutex
);
218 static void kvm_main_loop_wait(CPUState
*env
, int timeout
)
220 pthread_mutex_unlock(&qemu_mutex
);
221 if (env
->cpu_index
== 0)
222 kvm_eat_signals(env
, timeout
);
224 if (!kvm_irqchip_in_kernel(kvm_context
) &&
225 (timeout
|| vcpu_info
[env
->cpu_index
].stopped
)) {
231 sigaddset(&set
, SIG_IPI
);
241 sigaddset(&set
, SIG_IPI
);
242 sigtimedwait(&set
, &siginfo
, &ts
);
244 if (vcpu_info
[env
->cpu_index
].stop
) {
245 vcpu_info
[env
->cpu_index
].stop
= 0;
246 vcpu_info
[env
->cpu_index
].stopped
= 1;
247 pthread_kill(vcpu_info
[0].thread
, SIG_IPI
);
251 pthread_mutex_lock(&qemu_mutex
);
252 cpu_single_env
= env
;
253 vcpu_info
[env
->cpu_index
].signalled
= 0;
256 static int all_threads_paused(void)
260 for (i
= 1; i
< smp_cpus
; ++i
)
261 if (vcpu_info
[i
].stopped
)
266 static void pause_other_threads(void)
270 for (i
= 1; i
< smp_cpus
; ++i
) {
271 vcpu_info
[i
].stop
= 1;
272 pthread_kill(vcpu_info
[i
].thread
, SIG_IPI
);
274 while (!all_threads_paused())
275 kvm_eat_signals(vcpu_env
, 0);
278 static void resume_other_threads(void)
282 for (i
= 1; i
< smp_cpus
; ++i
) {
283 vcpu_info
[i
].stop
= 0;
284 vcpu_info
[i
].stopped
= 0;
285 pthread_kill(vcpu_info
[i
].thread
, SIG_IPI
);
289 static void kvm_vm_state_change_handler(void *context
, int running
)
292 resume_other_threads();
294 pause_other_threads();
297 static void update_regs_for_sipi(CPUState
*env
)
299 kvm_arch_update_regs_for_sipi(env
);
300 vcpu_info
[env
->cpu_index
].sipi_needed
= 0;
301 vcpu_info
[env
->cpu_index
].init
= 0;
304 static void update_regs_for_init(CPUState
*env
)
307 kvm_arch_load_regs(env
);
310 static void setup_kernel_sigmask(CPUState
*env
)
314 sigprocmask(SIG_BLOCK
, NULL
, &set
);
315 sigdelset(&set
, SIG_IPI
);
316 if (env
->cpu_index
== 0)
317 sigandset(&set
, &set
, &io_negsigset
);
319 kvm_set_signal_mask(kvm_context
, env
->cpu_index
, &set
);
322 static int kvm_main_loop_cpu(CPUState
*env
)
324 struct vcpu_info
*info
= &vcpu_info
[env
->cpu_index
];
326 setup_kernel_sigmask(env
);
327 pthread_mutex_lock(&qemu_mutex
);
329 kvm_qemu_init_env(env
);
330 env
->ready_for_interrupt_injection
= 1;
332 cpu_single_env
= env
;
334 kvm_tpr_opt_setup(env
);
337 while (!has_work(env
))
338 kvm_main_loop_wait(env
, 10);
339 if (env
->interrupt_request
& CPU_INTERRUPT_HARD
)
340 env
->hflags
&= ~HF_HALTED_MASK
;
341 if (!kvm_irqchip_in_kernel(kvm_context
) && info
->sipi_needed
)
342 update_regs_for_sipi(env
);
343 if (!kvm_irqchip_in_kernel(kvm_context
) && info
->init
)
344 update_regs_for_init(env
);
345 if (!(env
->hflags
& HF_HALTED_MASK
) && !info
->init
)
347 env
->interrupt_request
&= ~CPU_INTERRUPT_EXIT
;
348 kvm_main_loop_wait(env
, 0);
349 if (qemu_shutdown_requested())
351 else if (qemu_powerdown_requested())
352 qemu_system_powerdown();
353 else if (qemu_reset_requested()) {
354 env
->interrupt_request
= 0;
356 kvm_arch_load_regs(env
);
359 pthread_mutex_unlock(&qemu_mutex
);
363 static void *ap_main_loop(void *_env
)
365 CPUState
*env
= _env
;
369 sigfillset(&signals
);
370 //sigdelset(&signals, SIG_IPI);
371 sigprocmask(SIG_BLOCK
, &signals
, NULL
);
372 kvm_create_vcpu(kvm_context
, env
->cpu_index
);
373 kvm_qemu_init_env(env
);
374 if (kvm_irqchip_in_kernel(kvm_context
))
375 env
->hflags
&= ~HF_HALTED_MASK
;
376 kvm_main_loop_cpu(env
);
380 static void kvm_add_signal(int signum
)
382 sigaddset(&io_sigset
, signum
);
383 sigdelset(&io_negsigset
, signum
);
384 sigprocmask(SIG_BLOCK
, &io_sigset
, NULL
);
387 int kvm_init_ap(void)
389 CPUState
*env
= first_cpu
->next_cpu
;
392 qemu_add_vm_change_state_handler(kvm_vm_state_change_handler
, NULL
);
393 sigemptyset(&io_sigset
);
394 sigfillset(&io_negsigset
);
395 kvm_add_signal(SIGIO
);
396 kvm_add_signal(SIGALRM
);
397 kvm_add_signal(SIGUSR2
);
398 if (!kvm_irqchip_in_kernel(kvm_context
))
399 kvm_add_signal(SIG_IPI
);
401 vcpu_env
= first_cpu
;
402 signal(SIG_IPI
, sig_ipi_handler
);
403 for (i
= 1; i
< smp_cpus
; ++i
) {
404 pthread_create(&vcpu_info
[i
].thread
, NULL
, ap_main_loop
, env
);
410 int kvm_main_loop(void)
412 vcpu_info
[0].thread
= pthread_self();
413 pthread_mutex_unlock(&qemu_mutex
);
414 return kvm_main_loop_cpu(first_cpu
);
417 static int kvm_debug(void *opaque
, int vcpu
)
419 CPUState
*env
= cpu_single_env
;
421 env
->exception_index
= EXCP_DEBUG
;
425 static int kvm_inb(void *opaque
, uint16_t addr
, uint8_t *data
)
427 *data
= cpu_inb(0, addr
);
431 static int kvm_inw(void *opaque
, uint16_t addr
, uint16_t *data
)
433 *data
= cpu_inw(0, addr
);
437 static int kvm_inl(void *opaque
, uint16_t addr
, uint32_t *data
)
439 *data
= cpu_inl(0, addr
);
443 #define PM_IO_BASE 0xb000
445 static int kvm_outb(void *opaque
, uint16_t addr
, uint8_t data
)
450 cpu_outb(0, 0xb3, 0);
457 x
= cpu_inw(0, PM_IO_BASE
+ 4);
459 cpu_outw(0, PM_IO_BASE
+ 4, x
);
466 x
= cpu_inw(0, PM_IO_BASE
+ 4);
468 cpu_outw(0, PM_IO_BASE
+ 4, x
);
476 cpu_outb(0, addr
, data
);
480 static int kvm_outw(void *opaque
, uint16_t addr
, uint16_t data
)
482 cpu_outw(0, addr
, data
);
486 static int kvm_outl(void *opaque
, uint16_t addr
, uint32_t data
)
488 cpu_outl(0, addr
, data
);
492 static int kvm_mmio_read(void *opaque
, uint64_t addr
, uint8_t *data
, int len
)
494 cpu_physical_memory_rw(addr
, data
, len
, 0);
498 static int kvm_mmio_write(void *opaque
, uint64_t addr
, uint8_t *data
, int len
)
500 cpu_physical_memory_rw(addr
, data
, len
, 1);
504 static int kvm_io_window(void *opaque
)
510 static int kvm_halt(void *opaque
, int vcpu
)
512 return kvm_arch_halt(opaque
, vcpu
);
515 static int kvm_shutdown(void *opaque
, int vcpu
)
517 qemu_system_reset_request();
521 static struct kvm_callbacks qemu_kvm_ops
= {
529 .mmio_read
= kvm_mmio_read
,
530 .mmio_write
= kvm_mmio_write
,
532 .shutdown
= kvm_shutdown
,
533 .io_window
= kvm_io_window
,
534 .try_push_interrupts
= try_push_interrupts
,
535 .post_kvm_run
= post_kvm_run
,
536 .pre_kvm_run
= pre_kvm_run
,
538 .tpr_access
= handle_tpr_access
,
541 .powerpc_dcr_read
= handle_powerpc_dcr_read
,
542 .powerpc_dcr_write
= handle_powerpc_dcr_write
,
548 /* Try to initialize kvm */
549 kvm_context
= kvm_init(&qemu_kvm_ops
, cpu_single_env
);
553 pthread_mutex_lock(&qemu_mutex
);
558 int kvm_qemu_create_context(void)
562 kvm_disable_irqchip_creation(kvm_context
);
564 if (kvm_create(kvm_context
, phys_ram_size
, (void**)&phys_ram_base
) < 0) {
568 r
= kvm_arch_qemu_create_context();
574 void kvm_qemu_destroy(void)
576 kvm_finalize(kvm_context
);
579 void kvm_cpu_register_physical_memory(target_phys_addr_t start_addr
,
581 unsigned long phys_offset
)
583 #ifdef KVM_CAP_USER_MEMORY
586 r
= kvm_check_extension(kvm_context
, KVM_CAP_USER_MEMORY
);
588 if (!(phys_offset
& ~TARGET_PAGE_MASK
)) {
589 r
= kvm_is_allocated_mem(kvm_context
, start_addr
, size
);
592 r
= kvm_is_intersecting_mem(kvm_context
, start_addr
);
594 kvm_create_mem_hole(kvm_context
, start_addr
, size
);
595 r
= kvm_register_userspace_phys_mem(kvm_context
, start_addr
,
596 phys_ram_base
+ phys_offset
,
599 if (phys_offset
& IO_MEM_ROM
) {
600 phys_offset
&= ~IO_MEM_ROM
;
601 r
= kvm_is_intersecting_mem(kvm_context
, start_addr
);
603 kvm_create_mem_hole(kvm_context
, start_addr
, size
);
604 r
= kvm_register_userspace_phys_mem(kvm_context
, start_addr
,
605 phys_ram_base
+ phys_offset
,
609 printf("kvm_cpu_register_physical_memory: failed\n");
615 if (phys_offset
& IO_MEM_ROM
) {
616 phys_offset
&= ~IO_MEM_ROM
;
617 memcpy(phys_ram_base
+ start_addr
, phys_ram_base
+ phys_offset
, size
);
621 int kvm_qemu_check_extension(int ext
)
623 return kvm_check_extension(kvm_context
, ext
);
626 int kvm_qemu_init_env(CPUState
*cenv
)
628 return kvm_arch_qemu_init_env(cenv
);
631 int kvm_update_debugger(CPUState
*env
)
633 struct kvm_debug_guest dbg
;
637 if (env
->nb_breakpoints
|| env
->singlestep_enabled
) {
639 for (i
= 0; i
< 4 && i
< env
->nb_breakpoints
; ++i
) {
640 dbg
.breakpoints
[i
].enabled
= 1;
641 dbg
.breakpoints
[i
].address
= env
->breakpoints
[i
];
643 dbg
.singlestep
= env
->singlestep_enabled
;
645 return kvm_guest_debug(kvm_context
, env
->cpu_index
, &dbg
);
650 * dirty pages logging
652 /* FIXME: use unsigned long pointer instead of unsigned char */
653 unsigned char *kvm_dirty_bitmap
= NULL
;
654 int kvm_physical_memory_set_dirty_tracking(int enable
)
662 if (!kvm_dirty_bitmap
) {
663 unsigned bitmap_size
= BITMAP_SIZE(phys_ram_size
);
664 kvm_dirty_bitmap
= qemu_malloc(bitmap_size
);
665 if (kvm_dirty_bitmap
== NULL
) {
666 perror("Failed to allocate dirty pages bitmap");
670 r
= kvm_dirty_pages_log_enable_all(kvm_context
);
675 if (kvm_dirty_bitmap
) {
676 r
= kvm_dirty_pages_log_reset(kvm_context
);
677 qemu_free(kvm_dirty_bitmap
);
678 kvm_dirty_bitmap
= NULL
;
684 /* get kvm's dirty pages bitmap and update qemu's */
685 int kvm_get_dirty_pages_log_range(unsigned long start_addr
,
686 unsigned char *bitmap
,
688 unsigned long mem_size
)
690 unsigned int i
, j
, n
=0;
692 unsigned page_number
, addr
, addr1
;
693 unsigned int len
= ((mem_size
/TARGET_PAGE_SIZE
) + 7) / 8;
696 * bitmap-traveling is faster than memory-traveling (for addr...)
697 * especially when most of the memory is not dirty.
699 for (i
=0; i
<len
; i
++) {
704 page_number
= i
* 8 + j
;
705 addr1
= page_number
* TARGET_PAGE_SIZE
;
706 addr
= offset
+ addr1
;
707 cpu_physical_memory_set_dirty(addr
);
713 int kvm_get_dirty_bitmap_cb(unsigned long start
, unsigned long len
,
714 void *bitmap
, void *opaque
)
716 return kvm_get_dirty_pages_log_range(start
, bitmap
, start
, len
);
720 * get kvm's dirty pages bitmap and update qemu's
721 * we only care about physical ram, which resides in slots 0 and 3
723 int kvm_update_dirty_pages_log(void)
728 r
= kvm_get_dirty_pages_range(kvm_context
, 0, phys_ram_size
,
729 kvm_dirty_bitmap
, NULL
,
730 kvm_get_dirty_bitmap_cb
);
734 int kvm_get_phys_ram_page_bitmap(unsigned char *bitmap
)
736 unsigned int bsize
= BITMAP_SIZE(phys_ram_size
);
737 unsigned int brsize
= BITMAP_SIZE(ram_size
);
738 unsigned int extra_pages
= (phys_ram_size
- ram_size
) / TARGET_PAGE_SIZE
;
739 unsigned int extra_bytes
= (extra_pages
+7)/8;
740 unsigned int hole_start
= BITMAP_SIZE(0xa0000);
741 unsigned int hole_end
= BITMAP_SIZE(0xc0000);
743 memset(bitmap
, 0xFF, brsize
+ extra_bytes
);
744 memset(bitmap
+ hole_start
, 0, hole_end
- hole_start
);
745 memset(bitmap
+ brsize
+ extra_bytes
, 0, bsize
- brsize
- extra_bytes
);
750 #ifdef KVM_CAP_IRQCHIP
752 int kvm_set_irq(int irq
, int level
)
754 return kvm_set_irq_level(kvm_context
, irq
, level
);
759 void qemu_kvm_aio_wait_start(void)
763 void qemu_kvm_aio_wait(void)
765 if (!cpu_single_env
|| cpu_single_env
->cpu_index
== 0) {
766 pthread_mutex_unlock(&qemu_mutex
);
767 kvm_eat_signal(cpu_single_env
, 1000);
768 pthread_mutex_lock(&qemu_mutex
);
770 pthread_cond_wait(&qemu_aio_cond
, &qemu_mutex
);
774 void qemu_kvm_aio_wait_end(void)