4 * Copyright (C) 2006-2008 Qumranet Technologies
6 * Licensed under the terms of the GNU GPL version 2 or higher.
9 #include "config-host.h"
21 #include <sys/utsname.h>
23 extern void perror(const char *s
);
25 kvm_context_t kvm_context
;
29 pthread_mutex_t qemu_mutex
= PTHREAD_MUTEX_INITIALIZER
;
30 pthread_cond_t qemu_aio_cond
= PTHREAD_COND_INITIALIZER
;
31 __thread
struct vcpu_info
*vcpu
;
33 struct qemu_kvm_signal_table
{
38 static struct qemu_kvm_signal_table io_signal_table
;
40 #define SIG_IPI (SIGRTMIN+4)
52 CPUState
*qemu_kvm_cpu_env(int index
)
54 return vcpu_info
[index
].env
;
57 static void sig_ipi_handler(int n
)
61 void kvm_update_interrupt_request(CPUState
*env
)
63 if (env
&& vcpu
&& env
!= vcpu
->env
) {
64 if (vcpu_info
[env
->cpu_index
].signalled
)
66 vcpu_info
[env
->cpu_index
].signalled
= 1;
67 if (vcpu_info
[env
->cpu_index
].thread
)
68 pthread_kill(vcpu_info
[env
->cpu_index
].thread
, SIG_IPI
);
72 void kvm_update_after_sipi(CPUState
*env
)
74 vcpu_info
[env
->cpu_index
].sipi_needed
= 1;
75 kvm_update_interrupt_request(env
);
78 void kvm_apic_init(CPUState
*env
)
80 if (env
->cpu_index
!= 0)
81 vcpu_info
[env
->cpu_index
].init
= 1;
82 kvm_update_interrupt_request(env
);
87 static int try_push_interrupts(void *opaque
)
89 return kvm_arch_try_push_interrupts(opaque
);
92 static void post_kvm_run(void *opaque
, int vcpu
)
95 pthread_mutex_lock(&qemu_mutex
);
96 kvm_arch_post_kvm_run(opaque
, vcpu
);
99 static int pre_kvm_run(void *opaque
, int vcpu
)
101 CPUState
*env
= cpu_single_env
;
103 kvm_arch_pre_kvm_run(opaque
, vcpu
);
105 if (env
->interrupt_request
& CPU_INTERRUPT_EXIT
)
107 pthread_mutex_unlock(&qemu_mutex
);
111 void kvm_load_registers(CPUState
*env
)
114 kvm_arch_load_regs(env
);
117 void kvm_save_registers(CPUState
*env
)
120 kvm_arch_save_regs(env
);
123 int kvm_cpu_exec(CPUState
*env
)
127 r
= kvm_run(kvm_context
, env
->cpu_index
);
129 printf("kvm_run returned %d\n", r
);
136 extern int vm_running
;
138 static int has_work(CPUState
*env
)
140 if (!vm_running
|| (env
&& vcpu_info
[env
->cpu_index
].stopped
))
142 if (!(env
->hflags
& HF_HALTED_MASK
))
144 return kvm_arch_has_work(env
);
147 static int kvm_eat_signal(CPUState
*env
, int timeout
)
154 ts
.tv_sec
= timeout
/ 1000;
155 ts
.tv_nsec
= (timeout
% 1000) * 1000000;
156 r
= sigtimedwait(&io_signal_table
.sigset
, &siginfo
, &ts
);
157 if (r
== -1 && (errno
== EAGAIN
|| errno
== EINTR
) && !timeout
)
160 pthread_mutex_lock(&qemu_mutex
);
162 cpu_single_env
= vcpu
->env
;
163 if (r
== -1 && !(errno
== EAGAIN
|| errno
== EINTR
)) {
164 printf("sigtimedwait: %s\n", strerror(e
));
168 sigaction(siginfo
.si_signo
, NULL
, &sa
);
169 sa
.sa_handler(siginfo
.si_signo
);
170 if (siginfo
.si_signo
== SIGUSR2
)
171 pthread_cond_signal(&qemu_aio_cond
);
174 if (env
&& vcpu_info
[env
->cpu_index
].stop
) {
175 vcpu_info
[env
->cpu_index
].stop
= 0;
176 vcpu_info
[env
->cpu_index
].stopped
= 1;
177 pthread_kill(vcpu_info
[0].thread
, SIG_IPI
);
179 pthread_mutex_unlock(&qemu_mutex
);
185 static void kvm_eat_signals(CPUState
*env
, int timeout
)
189 while (kvm_eat_signal(env
, 0))
192 r
= kvm_eat_signal(env
, timeout
);
194 while (kvm_eat_signal(env
, 0))
198 * we call select() even if no signal was received, to account for
199 * for which there is no signal handler installed.
201 pthread_mutex_lock(&qemu_mutex
);
202 cpu_single_env
= vcpu
->env
;
203 if (env
->cpu_index
== 0)
205 pthread_mutex_unlock(&qemu_mutex
);
208 static void kvm_main_loop_wait(CPUState
*env
, int timeout
)
210 pthread_mutex_unlock(&qemu_mutex
);
211 kvm_eat_signals(env
, timeout
);
212 pthread_mutex_lock(&qemu_mutex
);
213 cpu_single_env
= env
;
214 vcpu_info
[env
->cpu_index
].signalled
= 0;
217 static int all_threads_paused(void)
221 for (i
= 1; i
< smp_cpus
; ++i
)
222 if (vcpu_info
[i
].stopped
)
227 static void pause_other_threads(void)
231 for (i
= 1; i
< smp_cpus
; ++i
) {
232 vcpu_info
[i
].stop
= 1;
233 pthread_kill(vcpu_info
[i
].thread
, SIG_IPI
);
235 while (!all_threads_paused())
236 kvm_eat_signals(vcpu
->env
, 0);
239 static void resume_other_threads(void)
243 for (i
= 1; i
< smp_cpus
; ++i
) {
244 vcpu_info
[i
].stop
= 0;
245 vcpu_info
[i
].stopped
= 0;
246 pthread_kill(vcpu_info
[i
].thread
, SIG_IPI
);
250 static void kvm_vm_state_change_handler(void *context
, int running
)
253 resume_other_threads();
255 pause_other_threads();
258 static void update_regs_for_sipi(CPUState
*env
)
260 kvm_arch_update_regs_for_sipi(env
);
261 vcpu_info
[env
->cpu_index
].sipi_needed
= 0;
262 vcpu_info
[env
->cpu_index
].init
= 0;
265 static void update_regs_for_init(CPUState
*env
)
268 kvm_arch_load_regs(env
);
271 static void setup_kernel_sigmask(CPUState
*env
)
275 sigprocmask(SIG_BLOCK
, NULL
, &set
);
276 sigdelset(&set
, SIG_IPI
);
277 if (env
->cpu_index
== 0)
278 sigandset(&set
, &set
, &io_signal_table
.negsigset
);
280 kvm_set_signal_mask(kvm_context
, env
->cpu_index
, &set
);
283 static int kvm_main_loop_cpu(CPUState
*env
)
285 struct vcpu_info
*info
= &vcpu_info
[env
->cpu_index
];
287 setup_kernel_sigmask(env
);
288 pthread_mutex_lock(&qemu_mutex
);
290 kvm_qemu_init_env(env
);
291 env
->ready_for_interrupt_injection
= 1;
293 kvm_tpr_vcpu_start(env
);
296 cpu_single_env
= env
;
298 while (!has_work(env
))
299 kvm_main_loop_wait(env
, 10);
300 if (env
->interrupt_request
& CPU_INTERRUPT_HARD
)
301 env
->hflags
&= ~HF_HALTED_MASK
;
302 if (!kvm_irqchip_in_kernel(kvm_context
) && info
->sipi_needed
)
303 update_regs_for_sipi(env
);
304 if (!kvm_irqchip_in_kernel(kvm_context
) && info
->init
)
305 update_regs_for_init(env
);
306 if (!(env
->hflags
& HF_HALTED_MASK
) && !info
->init
)
308 env
->interrupt_request
&= ~CPU_INTERRUPT_EXIT
;
309 kvm_main_loop_wait(env
, 0);
310 if (qemu_shutdown_requested())
312 else if (qemu_powerdown_requested())
313 qemu_system_powerdown();
314 else if (qemu_reset_requested()) {
315 env
->interrupt_request
= 0;
317 kvm_arch_load_regs(env
);
320 pthread_mutex_unlock(&qemu_mutex
);
324 static void *ap_main_loop(void *_env
)
326 CPUState
*env
= _env
;
329 vcpu
= &vcpu_info
[env
->cpu_index
];
331 sigfillset(&signals
);
332 //sigdelset(&signals, SIG_IPI);
333 sigprocmask(SIG_BLOCK
, &signals
, NULL
);
334 kvm_create_vcpu(kvm_context
, env
->cpu_index
);
335 kvm_qemu_init_env(env
);
336 if (kvm_irqchip_in_kernel(kvm_context
))
337 env
->hflags
&= ~HF_HALTED_MASK
;
338 kvm_main_loop_cpu(env
);
342 static void qemu_kvm_init_signal_table(struct qemu_kvm_signal_table
*sigtab
)
344 sigemptyset(&sigtab
->sigset
);
345 sigfillset(&sigtab
->negsigset
);
348 static void kvm_add_signal(struct qemu_kvm_signal_table
*sigtab
, int signum
)
350 sigaddset(&sigtab
->sigset
, signum
);
351 sigdelset(&sigtab
->negsigset
, signum
);
354 int kvm_init_ap(void)
356 CPUState
*env
= first_cpu
->next_cpu
;
362 qemu_add_vm_change_state_handler(kvm_vm_state_change_handler
, NULL
);
363 qemu_kvm_init_signal_table(&io_signal_table
);
364 kvm_add_signal(&io_signal_table
, SIGIO
);
365 kvm_add_signal(&io_signal_table
, SIGALRM
);
366 kvm_add_signal(&io_signal_table
, SIGUSR2
);
367 kvm_add_signal(&io_signal_table
, SIG_IPI
);
368 sigprocmask(SIG_BLOCK
, &io_signal_table
.sigset
, NULL
);
370 vcpu
= &vcpu_info
[0];
371 vcpu
->env
= first_cpu
;
372 signal(SIG_IPI
, sig_ipi_handler
);
373 for (i
= 1; i
< smp_cpus
; ++i
) {
374 pthread_create(&vcpu_info
[i
].thread
, NULL
, ap_main_loop
, env
);
380 int kvm_main_loop(void)
382 vcpu_info
[0].thread
= pthread_self();
383 pthread_mutex_unlock(&qemu_mutex
);
384 return kvm_main_loop_cpu(first_cpu
);
387 static int kvm_debug(void *opaque
, int vcpu
)
389 CPUState
*env
= cpu_single_env
;
391 env
->exception_index
= EXCP_DEBUG
;
395 static int kvm_inb(void *opaque
, uint16_t addr
, uint8_t *data
)
397 *data
= cpu_inb(0, addr
);
401 static int kvm_inw(void *opaque
, uint16_t addr
, uint16_t *data
)
403 *data
= cpu_inw(0, addr
);
407 static int kvm_inl(void *opaque
, uint16_t addr
, uint32_t *data
)
409 *data
= cpu_inl(0, addr
);
413 #define PM_IO_BASE 0xb000
415 static int kvm_outb(void *opaque
, uint16_t addr
, uint8_t data
)
420 cpu_outb(0, 0xb3, 0);
427 x
= cpu_inw(0, PM_IO_BASE
+ 4);
429 cpu_outw(0, PM_IO_BASE
+ 4, x
);
436 x
= cpu_inw(0, PM_IO_BASE
+ 4);
438 cpu_outw(0, PM_IO_BASE
+ 4, x
);
446 cpu_outb(0, addr
, data
);
450 static int kvm_outw(void *opaque
, uint16_t addr
, uint16_t data
)
452 cpu_outw(0, addr
, data
);
456 static int kvm_outl(void *opaque
, uint16_t addr
, uint32_t data
)
458 cpu_outl(0, addr
, data
);
462 static int kvm_mmio_read(void *opaque
, uint64_t addr
, uint8_t *data
, int len
)
464 cpu_physical_memory_rw(addr
, data
, len
, 0);
468 static int kvm_mmio_write(void *opaque
, uint64_t addr
, uint8_t *data
, int len
)
470 cpu_physical_memory_rw(addr
, data
, len
, 1);
474 static int kvm_io_window(void *opaque
)
480 static int kvm_halt(void *opaque
, int vcpu
)
482 return kvm_arch_halt(opaque
, vcpu
);
485 static int kvm_shutdown(void *opaque
, int vcpu
)
487 qemu_system_reset_request();
491 static struct kvm_callbacks qemu_kvm_ops
= {
499 .mmio_read
= kvm_mmio_read
,
500 .mmio_write
= kvm_mmio_write
,
502 .shutdown
= kvm_shutdown
,
503 .io_window
= kvm_io_window
,
504 .try_push_interrupts
= try_push_interrupts
,
505 .post_kvm_run
= post_kvm_run
,
506 .pre_kvm_run
= pre_kvm_run
,
508 .tpr_access
= handle_tpr_access
,
511 .powerpc_dcr_read
= handle_powerpc_dcr_read
,
512 .powerpc_dcr_write
= handle_powerpc_dcr_write
,
518 /* Try to initialize kvm */
519 kvm_context
= kvm_init(&qemu_kvm_ops
, cpu_single_env
);
523 pthread_mutex_lock(&qemu_mutex
);
528 int kvm_qemu_create_context(void)
532 kvm_disable_irqchip_creation(kvm_context
);
534 if (kvm_create(kvm_context
, phys_ram_size
, (void**)&phys_ram_base
) < 0) {
538 r
= kvm_arch_qemu_create_context();
544 void kvm_qemu_destroy(void)
546 kvm_finalize(kvm_context
);
549 void kvm_cpu_register_physical_memory(target_phys_addr_t start_addr
,
551 unsigned long phys_offset
)
553 #ifdef KVM_CAP_USER_MEMORY
556 r
= kvm_check_extension(kvm_context
, KVM_CAP_USER_MEMORY
);
558 if (!(phys_offset
& ~TARGET_PAGE_MASK
)) {
559 r
= kvm_is_allocated_mem(kvm_context
, start_addr
, size
);
562 r
= kvm_is_intersecting_mem(kvm_context
, start_addr
);
564 kvm_create_mem_hole(kvm_context
, start_addr
, size
);
565 r
= kvm_register_userspace_phys_mem(kvm_context
, start_addr
,
566 phys_ram_base
+ phys_offset
,
569 if (phys_offset
& IO_MEM_ROM
) {
570 phys_offset
&= ~IO_MEM_ROM
;
571 r
= kvm_is_intersecting_mem(kvm_context
, start_addr
);
573 kvm_create_mem_hole(kvm_context
, start_addr
, size
);
574 r
= kvm_register_userspace_phys_mem(kvm_context
, start_addr
,
575 phys_ram_base
+ phys_offset
,
579 printf("kvm_cpu_register_physical_memory: failed\n");
585 if (phys_offset
& IO_MEM_ROM
) {
586 phys_offset
&= ~IO_MEM_ROM
;
587 memcpy(phys_ram_base
+ start_addr
, phys_ram_base
+ phys_offset
, size
);
591 int kvm_qemu_check_extension(int ext
)
593 return kvm_check_extension(kvm_context
, ext
);
596 int kvm_qemu_init_env(CPUState
*cenv
)
598 return kvm_arch_qemu_init_env(cenv
);
601 int kvm_update_debugger(CPUState
*env
)
603 struct kvm_debug_guest dbg
;
607 if (env
->nb_breakpoints
|| env
->singlestep_enabled
) {
609 for (i
= 0; i
< 4 && i
< env
->nb_breakpoints
; ++i
) {
610 dbg
.breakpoints
[i
].enabled
= 1;
611 dbg
.breakpoints
[i
].address
= env
->breakpoints
[i
];
613 dbg
.singlestep
= env
->singlestep_enabled
;
615 return kvm_guest_debug(kvm_context
, env
->cpu_index
, &dbg
);
620 * dirty pages logging
622 /* FIXME: use unsigned long pointer instead of unsigned char */
623 unsigned char *kvm_dirty_bitmap
= NULL
;
624 int kvm_physical_memory_set_dirty_tracking(int enable
)
632 if (!kvm_dirty_bitmap
) {
633 unsigned bitmap_size
= BITMAP_SIZE(phys_ram_size
);
634 kvm_dirty_bitmap
= qemu_malloc(bitmap_size
);
635 if (kvm_dirty_bitmap
== NULL
) {
636 perror("Failed to allocate dirty pages bitmap");
640 r
= kvm_dirty_pages_log_enable_all(kvm_context
);
645 if (kvm_dirty_bitmap
) {
646 r
= kvm_dirty_pages_log_reset(kvm_context
);
647 qemu_free(kvm_dirty_bitmap
);
648 kvm_dirty_bitmap
= NULL
;
654 /* get kvm's dirty pages bitmap and update qemu's */
655 int kvm_get_dirty_pages_log_range(unsigned long start_addr
,
656 unsigned char *bitmap
,
658 unsigned long mem_size
)
660 unsigned int i
, j
, n
=0;
662 unsigned page_number
, addr
, addr1
;
663 unsigned int len
= ((mem_size
/TARGET_PAGE_SIZE
) + 7) / 8;
666 * bitmap-traveling is faster than memory-traveling (for addr...)
667 * especially when most of the memory is not dirty.
669 for (i
=0; i
<len
; i
++) {
674 page_number
= i
* 8 + j
;
675 addr1
= page_number
* TARGET_PAGE_SIZE
;
676 addr
= offset
+ addr1
;
677 cpu_physical_memory_set_dirty(addr
);
683 int kvm_get_dirty_bitmap_cb(unsigned long start
, unsigned long len
,
684 void *bitmap
, void *opaque
)
686 return kvm_get_dirty_pages_log_range(start
, bitmap
, start
, len
);
690 * get kvm's dirty pages bitmap and update qemu's
691 * we only care about physical ram, which resides in slots 0 and 3
693 int kvm_update_dirty_pages_log(void)
698 r
= kvm_get_dirty_pages_range(kvm_context
, 0, phys_ram_size
,
699 kvm_dirty_bitmap
, NULL
,
700 kvm_get_dirty_bitmap_cb
);
704 int kvm_get_phys_ram_page_bitmap(unsigned char *bitmap
)
706 unsigned int bsize
= BITMAP_SIZE(phys_ram_size
);
707 unsigned int brsize
= BITMAP_SIZE(ram_size
);
708 unsigned int extra_pages
= (phys_ram_size
- ram_size
) / TARGET_PAGE_SIZE
;
709 unsigned int extra_bytes
= (extra_pages
+7)/8;
710 unsigned int hole_start
= BITMAP_SIZE(0xa0000);
711 unsigned int hole_end
= BITMAP_SIZE(0xc0000);
713 memset(bitmap
, 0xFF, brsize
+ extra_bytes
);
714 memset(bitmap
+ hole_start
, 0, hole_end
- hole_start
);
715 memset(bitmap
+ brsize
+ extra_bytes
, 0, bsize
- brsize
- extra_bytes
);
720 #ifdef KVM_CAP_IRQCHIP
722 int kvm_set_irq(int irq
, int level
)
724 return kvm_set_irq_level(kvm_context
, irq
, level
);
729 void qemu_kvm_aio_wait_start(void)
733 void qemu_kvm_aio_wait(void)
735 if (!cpu_single_env
|| cpu_single_env
->cpu_index
== 0) {
736 pthread_mutex_unlock(&qemu_mutex
);
737 kvm_eat_signal(cpu_single_env
, 1000);
738 pthread_mutex_lock(&qemu_mutex
);
740 pthread_cond_wait(&qemu_aio_cond
, &qemu_mutex
);
744 void qemu_kvm_aio_wait_end(void)
748 int qemu_kvm_get_dirty_pages(unsigned long phys_addr
, void *buf
)
750 return kvm_get_dirty_pages(kvm_context
, phys_addr
, buf
);
753 void *kvm_cpu_create_phys_mem(target_phys_addr_t start_addr
,
754 unsigned long size
, int log
, int writable
)
756 return kvm_create_phys_mem(kvm_context
, start_addr
, size
, log
, writable
);
759 void kvm_cpu_destroy_phys_mem(target_phys_addr_t start_addr
,
762 kvm_destroy_phys_mem(kvm_context
, start_addr
, size
);