4 * Copyright (C) 2006-2008 Qumranet Technologies
6 * Licensed under the terms of the GNU GPL version 2 or higher.
9 #include "config-host.h"
18 #include "qemu-common.h"
24 #include <sys/utsname.h>
25 #include <sys/syscall.h>
27 extern void perror(const char *s
);
29 kvm_context_t kvm_context
;
33 static int qemu_kvm_reset_requested
;
35 pthread_mutex_t qemu_mutex
= PTHREAD_MUTEX_INITIALIZER
;
36 pthread_cond_t qemu_aio_cond
= PTHREAD_COND_INITIALIZER
;
37 pthread_cond_t qemu_vcpu_cond
= PTHREAD_COND_INITIALIZER
;
38 pthread_cond_t qemu_system_cond
= PTHREAD_COND_INITIALIZER
;
39 __thread
struct vcpu_info
*vcpu
;
41 static int qemu_system_ready
;
43 struct qemu_kvm_signal_table
{
48 static struct qemu_kvm_signal_table io_signal_table
;
49 static struct qemu_kvm_signal_table vcpu_signal_table
;
51 #define SIG_IPI (SIGRTMIN+4)
66 static int io_thread_fd
= -1;
68 static inline unsigned long kvm_get_thread_id(void)
70 return syscall(SYS_gettid
);
73 CPUState
*qemu_kvm_cpu_env(int index
)
75 return vcpu_info
[index
].env
;
78 static void sig_ipi_handler(int n
)
82 void kvm_update_interrupt_request(CPUState
*env
)
89 if (vcpu
&& env
!= vcpu
->env
&& !vcpu_info
[env
->cpu_index
].signalled
)
93 vcpu_info
[env
->cpu_index
].signalled
= 1;
94 if (vcpu_info
[env
->cpu_index
].thread
)
95 pthread_kill(vcpu_info
[env
->cpu_index
].thread
, SIG_IPI
);
100 void kvm_update_after_sipi(CPUState
*env
)
102 vcpu_info
[env
->cpu_index
].sipi_needed
= 1;
103 kvm_update_interrupt_request(env
);
106 void kvm_apic_init(CPUState
*env
)
108 if (env
->cpu_index
!= 0)
109 vcpu_info
[env
->cpu_index
].init
= 1;
110 kvm_update_interrupt_request(env
);
115 static int try_push_interrupts(void *opaque
)
117 return kvm_arch_try_push_interrupts(opaque
);
120 static void post_kvm_run(void *opaque
, int vcpu
)
123 pthread_mutex_lock(&qemu_mutex
);
124 kvm_arch_post_kvm_run(opaque
, vcpu
);
127 static int pre_kvm_run(void *opaque
, int vcpu
)
129 CPUState
*env
= qemu_kvm_cpu_env(vcpu
);
131 kvm_arch_pre_kvm_run(opaque
, vcpu
);
133 if (env
->interrupt_request
& CPU_INTERRUPT_EXIT
)
135 pthread_mutex_unlock(&qemu_mutex
);
139 void kvm_load_registers(CPUState
*env
)
142 kvm_arch_load_regs(env
);
145 void kvm_save_registers(CPUState
*env
)
148 kvm_arch_save_regs(env
);
151 int kvm_cpu_exec(CPUState
*env
)
155 r
= kvm_run(kvm_context
, env
->cpu_index
);
157 printf("kvm_run returned %d\n", r
);
164 extern int vm_running
;
166 static int has_work(CPUState
*env
)
168 if (!vm_running
|| (env
&& vcpu_info
[env
->cpu_index
].stopped
))
170 if (!(env
->hflags
& HF_HALTED_MASK
))
172 return kvm_arch_has_work(env
);
175 static int kvm_process_signal(int si_signo
)
181 pthread_cond_signal(&qemu_aio_cond
);
185 sigaction(si_signo
, NULL
, &sa
);
186 sa
.sa_handler(si_signo
);
193 static int kvm_eat_signal(struct qemu_kvm_signal_table
*waitset
, CPUState
*env
,
200 ts
.tv_sec
= timeout
/ 1000;
201 ts
.tv_nsec
= (timeout
% 1000) * 1000000;
202 r
= sigtimedwait(&waitset
->sigset
, &siginfo
, &ts
);
203 if (r
== -1 && (errno
== EAGAIN
|| errno
== EINTR
) && !timeout
)
206 pthread_mutex_lock(&qemu_mutex
);
208 cpu_single_env
= vcpu
->env
;
209 if (r
== -1 && !(errno
== EAGAIN
|| errno
== EINTR
)) {
210 printf("sigtimedwait: %s\n", strerror(e
));
214 ret
= kvm_process_signal(siginfo
.si_signo
);
216 if (env
&& vcpu_info
[env
->cpu_index
].stop
) {
217 vcpu_info
[env
->cpu_index
].stop
= 0;
218 vcpu_info
[env
->cpu_index
].stopped
= 1;
219 qemu_kvm_notify_work();
221 pthread_mutex_unlock(&qemu_mutex
);
227 static void kvm_eat_signals(CPUState
*env
, int timeout
)
230 struct qemu_kvm_signal_table
*waitset
= &vcpu_signal_table
;
232 while (kvm_eat_signal(waitset
, env
, 0))
235 r
= kvm_eat_signal(waitset
, env
, timeout
);
237 while (kvm_eat_signal(waitset
, env
, 0))
242 static void kvm_main_loop_wait(CPUState
*env
, int timeout
)
244 pthread_mutex_unlock(&qemu_mutex
);
245 kvm_eat_signals(env
, timeout
);
246 pthread_mutex_lock(&qemu_mutex
);
247 cpu_single_env
= env
;
248 vcpu_info
[env
->cpu_index
].signalled
= 0;
251 static int all_threads_paused(void)
255 for (i
= 0; i
< smp_cpus
; ++i
)
256 if (vcpu_info
[i
].stop
)
261 static void pause_all_threads(void)
265 for (i
= 0; i
< smp_cpus
; ++i
) {
266 vcpu_info
[i
].stop
= 1;
267 pthread_kill(vcpu_info
[i
].thread
, SIG_IPI
);
269 while (!all_threads_paused()) {
270 pthread_mutex_unlock(&qemu_mutex
);
271 kvm_eat_signal(&io_signal_table
, NULL
, 1000);
272 pthread_mutex_lock(&qemu_mutex
);
273 cpu_single_env
= NULL
;
277 static void resume_all_threads(void)
281 for (i
= 0; i
< smp_cpus
; ++i
) {
282 vcpu_info
[i
].stop
= 0;
283 vcpu_info
[i
].stopped
= 0;
284 pthread_kill(vcpu_info
[i
].thread
, SIG_IPI
);
288 static void kvm_vm_state_change_handler(void *context
, int running
)
291 resume_all_threads();
296 static void update_regs_for_sipi(CPUState
*env
)
298 kvm_arch_update_regs_for_sipi(env
);
299 vcpu_info
[env
->cpu_index
].sipi_needed
= 0;
300 vcpu_info
[env
->cpu_index
].init
= 0;
303 static void update_regs_for_init(CPUState
*env
)
306 kvm_arch_load_regs(env
);
309 static void setup_kernel_sigmask(CPUState
*env
)
313 sigprocmask(SIG_BLOCK
, NULL
, &set
);
314 sigdelset(&set
, SIG_IPI
);
316 kvm_set_signal_mask(kvm_context
, env
->cpu_index
, &set
);
319 void qemu_kvm_system_reset_request(void)
323 for (i
= 0; i
< smp_cpus
; ++i
) {
324 vcpu_info
[i
].reload_regs
= 1;
325 pthread_kill(vcpu_info
[i
].thread
, SIG_IPI
);
330 static int kvm_main_loop_cpu(CPUState
*env
)
332 struct vcpu_info
*info
= &vcpu_info
[env
->cpu_index
];
334 setup_kernel_sigmask(env
);
336 pthread_mutex_lock(&qemu_mutex
);
337 if (kvm_irqchip_in_kernel(kvm_context
))
338 env
->hflags
&= ~HF_HALTED_MASK
;
340 kvm_qemu_init_env(env
);
341 env
->ready_for_interrupt_injection
= 1;
343 kvm_tpr_vcpu_start(env
);
346 cpu_single_env
= env
;
348 while (!has_work(env
))
349 kvm_main_loop_wait(env
, 10);
350 if (env
->interrupt_request
& CPU_INTERRUPT_HARD
)
351 env
->hflags
&= ~HF_HALTED_MASK
;
352 if (!kvm_irqchip_in_kernel(kvm_context
) && info
->sipi_needed
)
353 update_regs_for_sipi(env
);
354 if (!kvm_irqchip_in_kernel(kvm_context
) && info
->init
)
355 update_regs_for_init(env
);
356 if (!(env
->hflags
& HF_HALTED_MASK
) && !info
->init
)
358 env
->interrupt_request
&= ~CPU_INTERRUPT_EXIT
;
359 kvm_main_loop_wait(env
, 0);
360 if (info
->reload_regs
) {
361 info
->reload_regs
= 0;
362 if (env
->cpu_index
== 0) /* ap needs to be placed in INIT */
363 kvm_arch_load_regs(env
);
366 pthread_mutex_unlock(&qemu_mutex
);
370 static void *ap_main_loop(void *_env
)
372 CPUState
*env
= _env
;
375 vcpu
= &vcpu_info
[env
->cpu_index
];
377 vcpu
->env
->thread_id
= kvm_get_thread_id();
378 sigfillset(&signals
);
379 sigprocmask(SIG_BLOCK
, &signals
, NULL
);
380 kvm_create_vcpu(kvm_context
, env
->cpu_index
);
381 kvm_qemu_init_env(env
);
383 /* signal VCPU creation */
384 pthread_mutex_lock(&qemu_mutex
);
386 pthread_cond_signal(&qemu_vcpu_cond
);
388 /* and wait for machine initialization */
389 while (!qemu_system_ready
)
390 pthread_cond_wait(&qemu_system_cond
, &qemu_mutex
);
391 pthread_mutex_unlock(&qemu_mutex
);
393 kvm_main_loop_cpu(env
);
397 static void qemu_kvm_init_signal_table(struct qemu_kvm_signal_table
*sigtab
)
399 sigemptyset(&sigtab
->sigset
);
400 sigfillset(&sigtab
->negsigset
);
403 static void kvm_add_signal(struct qemu_kvm_signal_table
*sigtab
, int signum
)
405 sigaddset(&sigtab
->sigset
, signum
);
406 sigdelset(&sigtab
->negsigset
, signum
);
409 void kvm_init_new_ap(int cpu
, CPUState
*env
)
411 pthread_create(&vcpu_info
[cpu
].thread
, NULL
, ap_main_loop
, env
);
413 while (vcpu_info
[cpu
].created
== 0)
414 pthread_cond_wait(&qemu_vcpu_cond
, &qemu_mutex
);
417 static void qemu_kvm_init_signal_tables(void)
419 qemu_kvm_init_signal_table(&io_signal_table
);
420 qemu_kvm_init_signal_table(&vcpu_signal_table
);
422 kvm_add_signal(&io_signal_table
, SIGIO
);
423 kvm_add_signal(&io_signal_table
, SIGALRM
);
424 kvm_add_signal(&io_signal_table
, SIGUSR2
);
426 kvm_add_signal(&vcpu_signal_table
, SIG_IPI
);
428 sigprocmask(SIG_BLOCK
, &io_signal_table
.sigset
, NULL
);
431 int kvm_init_ap(void)
436 qemu_add_vm_change_state_handler(kvm_vm_state_change_handler
, NULL
);
437 qemu_kvm_init_signal_tables();
439 signal(SIG_IPI
, sig_ipi_handler
);
443 void qemu_kvm_notify_work(void)
449 if (io_thread_fd
== -1)
452 memcpy(buffer
, &value
, sizeof(value
));
457 len
= write(io_thread_fd
, buffer
+ offset
, 8 - offset
);
458 if (len
== -1 && errno
== EINTR
)
468 fprintf(stderr
, "failed to notify io thread\n");
471 /* Used to break IO thread out of select */
472 static void io_thread_wakeup(void *opaque
)
474 int fd
= (unsigned long)opaque
;
481 len
= read(fd
, buffer
+ offset
, 8 - offset
);
482 if (len
== -1 && errno
== EINTR
)
493 * The IO thread has all signals that inform machine events
494 * blocked (io_signal_table), so it won't get interrupted
495 * while processing in main_loop_wait().
498 int kvm_main_loop(void)
502 io_thread
= pthread_self();
503 qemu_system_ready
= 1;
505 if (kvm_eventfd(fds
) == -1) {
506 fprintf(stderr
, "failed to create eventfd\n");
510 qemu_set_fd_handler2(fds
[0], NULL
, io_thread_wakeup
, NULL
,
511 (void *)(unsigned long)fds
[0]);
513 io_thread_fd
= fds
[1];
514 pthread_mutex_unlock(&qemu_mutex
);
516 pthread_cond_broadcast(&qemu_system_cond
);
519 kvm_eat_signal(&io_signal_table
, NULL
, 1000);
520 pthread_mutex_lock(&qemu_mutex
);
521 cpu_single_env
= NULL
;
523 if (qemu_shutdown_requested())
525 else if (qemu_powerdown_requested())
526 qemu_system_powerdown();
527 else if (qemu_reset_requested()) {
528 pthread_kill(vcpu_info
[0].thread
, SIG_IPI
);
529 qemu_kvm_reset_requested
= 1;
531 pthread_mutex_unlock(&qemu_mutex
);
535 pthread_mutex_unlock(&qemu_mutex
);
540 static int kvm_debug(void *opaque
, int vcpu
)
542 CPUState
*env
= cpu_single_env
;
544 env
->exception_index
= EXCP_DEBUG
;
548 static int kvm_inb(void *opaque
, uint16_t addr
, uint8_t *data
)
550 *data
= cpu_inb(0, addr
);
554 static int kvm_inw(void *opaque
, uint16_t addr
, uint16_t *data
)
556 *data
= cpu_inw(0, addr
);
560 static int kvm_inl(void *opaque
, uint16_t addr
, uint32_t *data
)
562 *data
= cpu_inl(0, addr
);
566 #define PM_IO_BASE 0xb000
568 static int kvm_outb(void *opaque
, uint16_t addr
, uint8_t data
)
573 cpu_outb(0, 0xb3, 0);
580 x
= cpu_inw(0, PM_IO_BASE
+ 4);
582 cpu_outw(0, PM_IO_BASE
+ 4, x
);
589 x
= cpu_inw(0, PM_IO_BASE
+ 4);
591 cpu_outw(0, PM_IO_BASE
+ 4, x
);
599 cpu_outb(0, addr
, data
);
603 static int kvm_outw(void *opaque
, uint16_t addr
, uint16_t data
)
605 cpu_outw(0, addr
, data
);
609 static int kvm_outl(void *opaque
, uint16_t addr
, uint32_t data
)
611 cpu_outl(0, addr
, data
);
615 static int kvm_mmio_read(void *opaque
, uint64_t addr
, uint8_t *data
, int len
)
617 cpu_physical_memory_rw(addr
, data
, len
, 0);
621 static int kvm_mmio_write(void *opaque
, uint64_t addr
, uint8_t *data
, int len
)
623 cpu_physical_memory_rw(addr
, data
, len
, 1);
627 static int kvm_io_window(void *opaque
)
633 static int kvm_halt(void *opaque
, int vcpu
)
635 return kvm_arch_halt(opaque
, vcpu
);
638 static int kvm_shutdown(void *opaque
, int vcpu
)
640 qemu_system_reset_request();
644 static struct kvm_callbacks qemu_kvm_ops
= {
652 .mmio_read
= kvm_mmio_read
,
653 .mmio_write
= kvm_mmio_write
,
655 .shutdown
= kvm_shutdown
,
656 .io_window
= kvm_io_window
,
657 .try_push_interrupts
= try_push_interrupts
,
658 .post_kvm_run
= post_kvm_run
,
659 .pre_kvm_run
= pre_kvm_run
,
661 .tpr_access
= handle_tpr_access
,
664 .powerpc_dcr_read
= handle_powerpc_dcr_read
,
665 .powerpc_dcr_write
= handle_powerpc_dcr_write
,
671 /* Try to initialize kvm */
672 kvm_context
= kvm_init(&qemu_kvm_ops
, cpu_single_env
);
676 pthread_mutex_lock(&qemu_mutex
);
681 int kvm_qemu_create_context(void)
685 kvm_disable_irqchip_creation(kvm_context
);
688 kvm_disable_pit_creation(kvm_context
);
690 if (kvm_create(kvm_context
, phys_ram_size
, (void**)&phys_ram_base
) < 0) {
694 r
= kvm_arch_qemu_create_context();
700 void kvm_qemu_destroy(void)
702 kvm_finalize(kvm_context
);
705 void kvm_cpu_register_physical_memory(target_phys_addr_t start_addr
,
707 unsigned long phys_offset
)
709 #ifdef KVM_CAP_USER_MEMORY
712 r
= kvm_check_extension(kvm_context
, KVM_CAP_USER_MEMORY
);
714 if (!(phys_offset
& ~TARGET_PAGE_MASK
)) {
715 r
= kvm_is_allocated_mem(kvm_context
, start_addr
, size
);
718 r
= kvm_is_intersecting_mem(kvm_context
, start_addr
);
720 kvm_create_mem_hole(kvm_context
, start_addr
, size
);
721 r
= kvm_register_userspace_phys_mem(kvm_context
, start_addr
,
722 phys_ram_base
+ phys_offset
,
725 if (phys_offset
& IO_MEM_ROM
) {
726 phys_offset
&= ~IO_MEM_ROM
;
727 r
= kvm_is_intersecting_mem(kvm_context
, start_addr
);
729 kvm_create_mem_hole(kvm_context
, start_addr
, size
);
730 r
= kvm_register_userspace_phys_mem(kvm_context
, start_addr
,
731 phys_ram_base
+ phys_offset
,
735 printf("kvm_cpu_register_physical_memory: failed\n");
741 if (phys_offset
& IO_MEM_ROM
) {
742 phys_offset
&= ~IO_MEM_ROM
;
743 memcpy(phys_ram_base
+ start_addr
, phys_ram_base
+ phys_offset
, size
);
747 int kvm_qemu_check_extension(int ext
)
749 return kvm_check_extension(kvm_context
, ext
);
752 int kvm_qemu_init_env(CPUState
*cenv
)
754 return kvm_arch_qemu_init_env(cenv
);
757 int kvm_update_debugger(CPUState
*env
)
759 struct kvm_debug_guest dbg
;
763 if (env
->nb_breakpoints
|| env
->singlestep_enabled
) {
765 for (i
= 0; i
< 4 && i
< env
->nb_breakpoints
; ++i
) {
766 dbg
.breakpoints
[i
].enabled
= 1;
767 dbg
.breakpoints
[i
].address
= env
->breakpoints
[i
];
769 dbg
.singlestep
= env
->singlestep_enabled
;
771 return kvm_guest_debug(kvm_context
, env
->cpu_index
, &dbg
);
776 * dirty pages logging
778 /* FIXME: use unsigned long pointer instead of unsigned char */
779 unsigned char *kvm_dirty_bitmap
= NULL
;
780 int kvm_physical_memory_set_dirty_tracking(int enable
)
788 if (!kvm_dirty_bitmap
) {
789 unsigned bitmap_size
= BITMAP_SIZE(phys_ram_size
);
790 kvm_dirty_bitmap
= qemu_malloc(bitmap_size
);
791 if (kvm_dirty_bitmap
== NULL
) {
792 perror("Failed to allocate dirty pages bitmap");
796 r
= kvm_dirty_pages_log_enable_all(kvm_context
);
801 if (kvm_dirty_bitmap
) {
802 r
= kvm_dirty_pages_log_reset(kvm_context
);
803 qemu_free(kvm_dirty_bitmap
);
804 kvm_dirty_bitmap
= NULL
;
810 /* get kvm's dirty pages bitmap and update qemu's */
811 int kvm_get_dirty_pages_log_range(unsigned long start_addr
,
812 unsigned char *bitmap
,
814 unsigned long mem_size
)
816 unsigned int i
, j
, n
=0;
818 unsigned page_number
, addr
, addr1
;
819 unsigned int len
= ((mem_size
/TARGET_PAGE_SIZE
) + 7) / 8;
822 * bitmap-traveling is faster than memory-traveling (for addr...)
823 * especially when most of the memory is not dirty.
825 for (i
=0; i
<len
; i
++) {
830 page_number
= i
* 8 + j
;
831 addr1
= page_number
* TARGET_PAGE_SIZE
;
832 addr
= offset
+ addr1
;
833 cpu_physical_memory_set_dirty(addr
);
839 int kvm_get_dirty_bitmap_cb(unsigned long start
, unsigned long len
,
840 void *bitmap
, void *opaque
)
842 return kvm_get_dirty_pages_log_range(start
, bitmap
, start
, len
);
846 * get kvm's dirty pages bitmap and update qemu's
847 * we only care about physical ram, which resides in slots 0 and 3
849 int kvm_update_dirty_pages_log(void)
854 r
= kvm_get_dirty_pages_range(kvm_context
, 0, phys_ram_size
,
855 kvm_dirty_bitmap
, NULL
,
856 kvm_get_dirty_bitmap_cb
);
860 int kvm_get_phys_ram_page_bitmap(unsigned char *bitmap
)
862 unsigned int bsize
= BITMAP_SIZE(phys_ram_size
);
863 unsigned int brsize
= BITMAP_SIZE(ram_size
);
864 unsigned int extra_pages
= (phys_ram_size
- ram_size
) / TARGET_PAGE_SIZE
;
865 unsigned int extra_bytes
= (extra_pages
+7)/8;
866 unsigned int hole_start
= BITMAP_SIZE(0xa0000);
867 unsigned int hole_end
= BITMAP_SIZE(0xc0000);
869 memset(bitmap
, 0xFF, brsize
+ extra_bytes
);
870 memset(bitmap
+ hole_start
, 0, hole_end
- hole_start
);
871 memset(bitmap
+ brsize
+ extra_bytes
, 0, bsize
- brsize
- extra_bytes
);
876 #ifdef KVM_CAP_IRQCHIP
878 int kvm_set_irq(int irq
, int level
)
880 return kvm_set_irq_level(kvm_context
, irq
, level
);
885 void qemu_kvm_aio_wait_start(void)
889 void qemu_kvm_aio_wait(void)
891 CPUState
*cpu_single
= cpu_single_env
;
893 if (!cpu_single_env
) {
894 pthread_mutex_unlock(&qemu_mutex
);
895 kvm_eat_signal(&io_signal_table
, NULL
, 1000);
896 pthread_mutex_lock(&qemu_mutex
);
897 cpu_single_env
= NULL
;
899 pthread_cond_wait(&qemu_aio_cond
, &qemu_mutex
);
900 cpu_single_env
= cpu_single
;
904 void qemu_kvm_aio_wait_end(void)
908 int qemu_kvm_get_dirty_pages(unsigned long phys_addr
, void *buf
)
910 return kvm_get_dirty_pages(kvm_context
, phys_addr
, buf
);
913 void *kvm_cpu_create_phys_mem(target_phys_addr_t start_addr
,
914 unsigned long size
, int log
, int writable
)
916 return kvm_create_phys_mem(kvm_context
, start_addr
, size
, log
, writable
);
919 void kvm_cpu_destroy_phys_mem(target_phys_addr_t start_addr
,
922 kvm_destroy_phys_mem(kvm_context
, start_addr
, size
);