3 #include "config-host.h"
6 #define KVM_ALLOWED_DEFAULT 1
8 #define KVM_ALLOWED_DEFAULT 0
11 int kvm_allowed
= KVM_ALLOWED_DEFAULT
;
22 #include <sys/utsname.h>
24 extern void perror(const char *s
);
26 kvm_context_t kvm_context
;
30 pthread_mutex_t qemu_mutex
= PTHREAD_MUTEX_INITIALIZER
;
31 __thread CPUState
*vcpu_env
;
33 static sigset_t io_sigset
, io_negsigset
;
37 #define SIG_IPI (SIGRTMIN+4)
48 static void sig_ipi_handler(int n
)
52 void kvm_update_interrupt_request(CPUState
*env
)
54 if (env
&& env
!= vcpu_env
) {
55 if (vcpu_info
[env
->cpu_index
].signalled
)
57 vcpu_info
[env
->cpu_index
].signalled
= 1;
58 if (vcpu_info
[env
->cpu_index
].thread
)
59 pthread_kill(vcpu_info
[env
->cpu_index
].thread
, SIG_IPI
);
63 void kvm_update_after_sipi(CPUState
*env
)
65 vcpu_info
[env
->cpu_index
].sipi_needed
= 1;
66 kvm_update_interrupt_request(env
);
69 * the qemu bios waits using a busy loop that's much too short for
70 * kvm. add a wait after the first sipi.
73 static int first_sipi
= 1;
82 void kvm_apic_init(CPUState
*env
)
84 if (env
->cpu_index
!= 0)
85 vcpu_info
[env
->cpu_index
].init
= 1;
86 kvm_update_interrupt_request(env
);
91 static int try_push_interrupts(void *opaque
)
93 return kvm_arch_try_push_interrupts(opaque
);
96 static void post_kvm_run(void *opaque
, int vcpu
)
99 pthread_mutex_lock(&qemu_mutex
);
100 kvm_arch_post_kvm_run(opaque
, vcpu
);
103 static int pre_kvm_run(void *opaque
, int vcpu
)
105 CPUState
*env
= cpu_single_env
;
107 if (env
->cpu_index
== 0 && wait_hack
) {
112 pthread_mutex_unlock(&qemu_mutex
);
113 for (i
= 0; i
< 10; ++i
)
115 pthread_mutex_lock(&qemu_mutex
);
118 kvm_arch_pre_kvm_run(opaque
, vcpu
);
120 if (env
->interrupt_request
& CPU_INTERRUPT_EXIT
)
122 pthread_mutex_unlock(&qemu_mutex
);
126 void kvm_load_registers(CPUState
*env
)
129 kvm_arch_load_regs(env
);
132 void kvm_save_registers(CPUState
*env
)
135 kvm_arch_save_regs(env
);
138 int kvm_cpu_exec(CPUState
*env
)
142 r
= kvm_run(kvm_context
, env
->cpu_index
);
144 printf("kvm_run returned %d\n", r
);
151 extern int vm_running
;
153 static int has_work(CPUState
*env
)
157 if (!(env
->hflags
& HF_HALTED_MASK
))
159 return kvm_arch_has_work(env
);
162 static int kvm_eat_signal(CPUState
*env
, int timeout
)
169 ts
.tv_sec
= timeout
/ 1000;
170 ts
.tv_nsec
= (timeout
% 1000) * 1000000;
171 r
= sigtimedwait(&io_sigset
, &siginfo
, &ts
);
172 if (r
== -1 && (errno
== EAGAIN
|| errno
== EINTR
) && !timeout
)
175 pthread_mutex_lock(&qemu_mutex
);
176 cpu_single_env
= vcpu_env
;
177 if (r
== -1 && !(errno
== EAGAIN
|| errno
== EINTR
)) {
178 printf("sigtimedwait: %s\n", strerror(e
));
182 sigaction(siginfo
.si_signo
, NULL
, &sa
);
183 sa
.sa_handler(siginfo
.si_signo
);
186 pthread_mutex_unlock(&qemu_mutex
);
192 static void kvm_eat_signals(CPUState
*env
, int timeout
)
196 while (kvm_eat_signal(env
, 0))
199 r
= kvm_eat_signal(env
, timeout
);
201 while (kvm_eat_signal(env
, 0))
205 * we call select() even if no signal was received, to account for
206 * for which there is no signal handler installed.
208 pthread_mutex_lock(&qemu_mutex
);
209 cpu_single_env
= vcpu_env
;
211 pthread_mutex_unlock(&qemu_mutex
);
214 static void kvm_main_loop_wait(CPUState
*env
, int timeout
)
216 pthread_mutex_unlock(&qemu_mutex
);
217 if (env
->cpu_index
== 0)
218 kvm_eat_signals(env
, timeout
);
220 if (!kvm_irqchip_in_kernel(kvm_context
) &&
221 (timeout
|| vcpu_info
[env
->cpu_index
].stopped
)) {
227 sigaddset(&set
, SIG_IPI
);
237 sigaddset(&set
, SIG_IPI
);
238 sigtimedwait(&set
, &siginfo
, &ts
);
240 if (vcpu_info
[env
->cpu_index
].stop
) {
241 vcpu_info
[env
->cpu_index
].stop
= 0;
242 vcpu_info
[env
->cpu_index
].stopped
= 1;
243 pthread_kill(vcpu_info
[0].thread
, SIG_IPI
);
247 pthread_mutex_lock(&qemu_mutex
);
248 cpu_single_env
= env
;
249 vcpu_info
[env
->cpu_index
].signalled
= 0;
252 static int all_threads_paused(void)
256 for (i
= 1; i
< smp_cpus
; ++i
)
257 if (vcpu_info
[i
].stopped
)
262 static void pause_other_threads(void)
266 for (i
= 1; i
< smp_cpus
; ++i
) {
267 vcpu_info
[i
].stop
= 1;
268 pthread_kill(vcpu_info
[i
].thread
, SIG_IPI
);
270 while (!all_threads_paused())
271 kvm_eat_signals(vcpu_env
, 0);
274 static void resume_other_threads(void)
278 for (i
= 1; i
< smp_cpus
; ++i
) {
279 vcpu_info
[i
].stop
= 0;
280 vcpu_info
[i
].stopped
= 0;
281 pthread_kill(vcpu_info
[i
].thread
, SIG_IPI
);
285 static void kvm_vm_state_change_handler(void *context
, int running
)
288 resume_other_threads();
290 pause_other_threads();
293 static void update_regs_for_sipi(CPUState
*env
)
295 kvm_arch_update_regs_for_sipi(env
);
296 vcpu_info
[env
->cpu_index
].sipi_needed
= 0;
297 vcpu_info
[env
->cpu_index
].init
= 0;
300 static void update_regs_for_init(CPUState
*env
)
303 kvm_arch_load_regs(env
);
306 static void setup_kernel_sigmask(CPUState
*env
)
310 sigprocmask(SIG_BLOCK
, NULL
, &set
);
311 sigdelset(&set
, SIG_IPI
);
312 if (env
->cpu_index
== 0)
313 sigandset(&set
, &set
, &io_negsigset
);
315 kvm_set_signal_mask(kvm_context
, env
->cpu_index
, &set
);
318 static int kvm_main_loop_cpu(CPUState
*env
)
320 struct vcpu_info
*info
= &vcpu_info
[env
->cpu_index
];
322 setup_kernel_sigmask(env
);
323 pthread_mutex_lock(&qemu_mutex
);
324 cpu_single_env
= env
;
326 while (!has_work(env
))
327 kvm_main_loop_wait(env
, 10);
328 if (env
->interrupt_request
& CPU_INTERRUPT_HARD
)
329 env
->hflags
&= ~HF_HALTED_MASK
;
330 if (!kvm_irqchip_in_kernel(kvm_context
) && info
->sipi_needed
)
331 update_regs_for_sipi(env
);
332 if (!kvm_irqchip_in_kernel(kvm_context
) && info
->init
)
333 update_regs_for_init(env
);
334 if (!(env
->hflags
& HF_HALTED_MASK
) && !info
->init
)
336 env
->interrupt_request
&= ~CPU_INTERRUPT_EXIT
;
337 kvm_main_loop_wait(env
, 0);
338 if (qemu_shutdown_requested())
340 else if (qemu_powerdown_requested())
341 qemu_system_powerdown();
342 else if (qemu_reset_requested()) {
343 env
->interrupt_request
= 0;
345 kvm_arch_load_regs(env
);
348 pthread_mutex_unlock(&qemu_mutex
);
352 static void *ap_main_loop(void *_env
)
354 CPUState
*env
= _env
;
358 sigfillset(&signals
);
359 //sigdelset(&signals, SIG_IPI);
360 sigprocmask(SIG_BLOCK
, &signals
, NULL
);
361 kvm_create_vcpu(kvm_context
, env
->cpu_index
);
362 kvm_qemu_init_env(env
);
363 if (kvm_irqchip_in_kernel(kvm_context
))
364 env
->hflags
&= ~HF_HALTED_MASK
;
365 kvm_main_loop_cpu(env
);
369 static void kvm_add_signal(int signum
)
371 sigaddset(&io_sigset
, signum
);
372 sigdelset(&io_negsigset
, signum
);
373 sigprocmask(SIG_BLOCK
, &io_sigset
, NULL
);
376 int kvm_init_ap(void)
378 CPUState
*env
= first_cpu
->next_cpu
;
381 qemu_add_vm_change_state_handler(kvm_vm_state_change_handler
, NULL
);
382 sigemptyset(&io_sigset
);
383 sigfillset(&io_negsigset
);
384 kvm_add_signal(SIGIO
);
385 kvm_add_signal(SIGALRM
);
386 kvm_add_signal(SIGUSR2
);
387 if (!kvm_irqchip_in_kernel(kvm_context
))
388 kvm_add_signal(SIG_IPI
);
390 vcpu_env
= first_cpu
;
391 signal(SIG_IPI
, sig_ipi_handler
);
392 for (i
= 1; i
< smp_cpus
; ++i
) {
393 pthread_create(&vcpu_info
[i
].thread
, NULL
, ap_main_loop
, env
);
399 int kvm_main_loop(void)
401 vcpu_info
[0].thread
= pthread_self();
402 return kvm_main_loop_cpu(first_cpu
);
405 static int kvm_debug(void *opaque
, int vcpu
)
407 CPUState
*env
= cpu_single_env
;
409 env
->exception_index
= EXCP_DEBUG
;
413 static int kvm_inb(void *opaque
, uint16_t addr
, uint8_t *data
)
415 *data
= cpu_inb(0, addr
);
419 static int kvm_inw(void *opaque
, uint16_t addr
, uint16_t *data
)
421 *data
= cpu_inw(0, addr
);
425 static int kvm_inl(void *opaque
, uint16_t addr
, uint32_t *data
)
427 *data
= cpu_inl(0, addr
);
431 #define PM_IO_BASE 0xb000
433 static int kvm_outb(void *opaque
, uint16_t addr
, uint8_t data
)
438 cpu_outb(0, 0xb3, 0);
445 x
= cpu_inw(0, PM_IO_BASE
+ 4);
447 cpu_outw(0, PM_IO_BASE
+ 4, x
);
454 x
= cpu_inw(0, PM_IO_BASE
+ 4);
456 cpu_outw(0, PM_IO_BASE
+ 4, x
);
464 cpu_outb(0, addr
, data
);
468 static int kvm_outw(void *opaque
, uint16_t addr
, uint16_t data
)
470 cpu_outw(0, addr
, data
);
474 static int kvm_outl(void *opaque
, uint16_t addr
, uint32_t data
)
476 cpu_outl(0, addr
, data
);
480 static int kvm_readb(void *opaque
, uint64_t addr
, uint8_t *data
)
482 *data
= ldub_phys(addr
);
486 static int kvm_readw(void *opaque
, uint64_t addr
, uint16_t *data
)
488 *data
= lduw_phys(addr
);
492 static int kvm_readl(void *opaque
, uint64_t addr
, uint32_t *data
)
494 /* hack: Red Hat 7.1 generates some wierd accesses. */
495 if (addr
> 0xa0000 - 4 && addr
< 0xa0000) {
500 *data
= ldl_phys(addr
);
504 static int kvm_readq(void *opaque
, uint64_t addr
, uint64_t *data
)
506 *data
= ldq_phys(addr
);
510 static int kvm_writeb(void *opaque
, uint64_t addr
, uint8_t data
)
512 stb_phys(addr
, data
);
516 static int kvm_writew(void *opaque
, uint64_t addr
, uint16_t data
)
518 stw_phys(addr
, data
);
522 static int kvm_writel(void *opaque
, uint64_t addr
, uint32_t data
)
524 stl_phys(addr
, data
);
528 static int kvm_writeq(void *opaque
, uint64_t addr
, uint64_t data
)
530 stq_phys(addr
, data
);
534 static int kvm_io_window(void *opaque
)
540 static int kvm_halt(void *opaque
, int vcpu
)
542 return kvm_arch_halt(opaque
, vcpu
);
545 static int kvm_shutdown(void *opaque
, int vcpu
)
547 qemu_system_reset_request();
551 static struct kvm_callbacks qemu_kvm_ops
= {
563 .writeb
= kvm_writeb
,
564 .writew
= kvm_writew
,
565 .writel
= kvm_writel
,
566 .writeq
= kvm_writeq
,
568 .shutdown
= kvm_shutdown
,
569 .io_window
= kvm_io_window
,
570 .try_push_interrupts
= try_push_interrupts
,
571 .post_kvm_run
= post_kvm_run
,
572 .pre_kvm_run
= pre_kvm_run
,
577 /* Try to initialize kvm */
578 kvm_context
= kvm_init(&qemu_kvm_ops
, cpu_single_env
);
586 int kvm_qemu_create_context(void)
590 kvm_disable_irqchip_creation(kvm_context
);
592 if (kvm_create(kvm_context
, phys_ram_size
, (void**)&phys_ram_base
) < 0) {
596 r
= kvm_arch_qemu_create_context();
602 void kvm_qemu_destroy(void)
604 kvm_finalize(kvm_context
);
607 void kvm_cpu_register_physical_memory(target_phys_addr_t start_addr
,
609 unsigned long phys_offset
)
611 #ifdef KVM_CAP_USER_MEMORY
614 r
= kvm_check_extension(kvm_context
, KVM_CAP_USER_MEMORY
);
616 if (!(phys_offset
& ~TARGET_PAGE_MASK
)) {
617 r
= kvm_is_allocated_mem(kvm_context
, start_addr
, size
);
620 r
= kvm_is_intersecting_mem(kvm_context
, start_addr
);
622 kvm_create_mem_hole(kvm_context
, start_addr
, size
);
623 r
= kvm_register_userspace_phys_mem(kvm_context
, start_addr
,
624 phys_ram_base
+ phys_offset
,
627 if (phys_offset
& IO_MEM_ROM
) {
628 phys_offset
&= ~IO_MEM_ROM
;
629 r
= kvm_is_intersecting_mem(kvm_context
, start_addr
);
631 kvm_create_mem_hole(kvm_context
, start_addr
, size
);
632 r
= kvm_register_userspace_phys_mem(kvm_context
, start_addr
,
633 phys_ram_base
+ phys_offset
,
637 printf("kvm_cpu_register_physical_memory: failed\n");
643 if (phys_offset
& IO_MEM_ROM
) {
644 phys_offset
&= ~IO_MEM_ROM
;
645 memcpy(phys_ram_base
+ start_addr
, phys_ram_base
+ phys_offset
, size
);
649 int kvm_qemu_check_extension(int ext
)
651 return kvm_check_extension(kvm_context
, ext
);
654 int kvm_qemu_init_env(CPUState
*cenv
)
656 return kvm_arch_qemu_init_env(cenv
);
659 int kvm_update_debugger(CPUState
*env
)
661 struct kvm_debug_guest dbg
;
665 if (env
->nb_breakpoints
|| env
->singlestep_enabled
) {
667 for (i
= 0; i
< 4 && i
< env
->nb_breakpoints
; ++i
) {
668 dbg
.breakpoints
[i
].enabled
= 1;
669 dbg
.breakpoints
[i
].address
= env
->breakpoints
[i
];
671 dbg
.singlestep
= env
->singlestep_enabled
;
673 return kvm_guest_debug(kvm_context
, env
->cpu_index
, &dbg
);
678 * dirty pages logging
680 /* FIXME: use unsigned long pointer instead of unsigned char */
681 unsigned char *kvm_dirty_bitmap
= NULL
;
682 int kvm_physical_memory_set_dirty_tracking(int enable
)
690 if (!kvm_dirty_bitmap
) {
691 unsigned bitmap_size
= BITMAP_SIZE(phys_ram_size
);
692 kvm_dirty_bitmap
= qemu_malloc(bitmap_size
);
693 if (kvm_dirty_bitmap
== NULL
) {
694 perror("Failed to allocate dirty pages bitmap");
698 r
= kvm_dirty_pages_log_enable_all(kvm_context
);
703 if (kvm_dirty_bitmap
) {
704 r
= kvm_dirty_pages_log_reset(kvm_context
);
705 qemu_free(kvm_dirty_bitmap
);
706 kvm_dirty_bitmap
= NULL
;
712 /* get kvm's dirty pages bitmap and update qemu's */
713 int kvm_get_dirty_pages_log_range(unsigned long start_addr
,
714 unsigned char *bitmap
,
716 unsigned long mem_size
)
718 unsigned int i
, j
, n
=0;
720 unsigned page_number
, addr
, addr1
;
721 unsigned int len
= ((mem_size
/TARGET_PAGE_SIZE
) + 7) / 8;
724 * bitmap-traveling is faster than memory-traveling (for addr...)
725 * especially when most of the memory is not dirty.
727 for (i
=0; i
<len
; i
++) {
732 page_number
= i
* 8 + j
;
733 addr1
= page_number
* TARGET_PAGE_SIZE
;
734 addr
= offset
+ addr1
;
735 cpu_physical_memory_set_dirty(addr
);
741 int kvm_get_dirty_bitmap_cb(unsigned long start
, unsigned long len
,
742 void *bitmap
, void *opaque
)
744 return kvm_get_dirty_pages_log_range(start
, bitmap
, start
, len
);
748 * get kvm's dirty pages bitmap and update qemu's
749 * we only care about physical ram, which resides in slots 0 and 3
751 int kvm_update_dirty_pages_log(void)
756 r
= kvm_get_dirty_pages_range(kvm_context
, 0, phys_ram_size
,
757 kvm_dirty_bitmap
, NULL
,
758 kvm_get_dirty_bitmap_cb
);
762 int kvm_get_phys_ram_bitmap_cb(unsigned long start
, unsigned long len
,
763 void *local_bitmap
, void *qemu_bitmap
)
765 unsigned int bsize
= ((len
/TARGET_PAGE_SIZE
) + 7) / 8;
766 unsigned int offset
= ((start
/TARGET_PAGE_SIZE
) + 7) / 8;
768 memcpy(qemu_bitmap
+ offset
, local_bitmap
, bsize
);
773 int kvm_get_phys_ram_page_bitmap(unsigned char *bitmap
)
777 unsigned int bsize
= BITMAP_SIZE(phys_ram_size
);
779 local_bitmap
= qemu_malloc(bsize
);
781 fprintf(stderr
, "could not allocate memory for phys_page bitmap\n");
785 r
= kvm_get_mem_map_range(kvm_context
, 0, phys_ram_size
,
786 local_bitmap
, bitmap
,
787 kvm_get_phys_ram_bitmap_cb
);
789 qemu_free(local_bitmap
);
793 #ifdef KVM_CAP_IRQCHIP
795 int kvm_set_irq(int irq
, int level
)
797 return kvm_set_irq_level(kvm_context
, irq
, level
);