Augment info cpus
[qemu-kvm/fedora.git] / qemu-kvm.c
blob45fddd39cce980f4c78d0c8a29838a3c4092b1ef
1 /*
2 * qemu/kvm integration
4 * Copyright (C) 2006-2008 Qumranet Technologies
6 * Licensed under the terms of the GNU GPL version 2 or higher.
7 */
8 #include "config.h"
9 #include "config-host.h"
11 int kvm_allowed = 1;
12 int kvm_irqchip = 1;
14 #include <string.h>
15 #include "hw/hw.h"
16 #include "sysemu.h"
18 #include "qemu-kvm.h"
19 #include <libkvm.h>
20 #include <pthread.h>
21 #include <sys/utsname.h>
22 #include <sys/syscall.h>
24 extern void perror(const char *s);
26 kvm_context_t kvm_context;
28 extern int smp_cpus;
30 pthread_mutex_t qemu_mutex = PTHREAD_MUTEX_INITIALIZER;
31 pthread_cond_t qemu_aio_cond = PTHREAD_COND_INITIALIZER;
32 __thread struct vcpu_info *vcpu;
34 struct qemu_kvm_signal_table {
35 sigset_t sigset;
36 sigset_t negsigset;
39 static struct qemu_kvm_signal_table io_signal_table;
41 #define SIG_IPI (SIGRTMIN+4)
43 struct vcpu_info {
44 CPUState *env;
45 int sipi_needed;
46 int init;
47 pthread_t thread;
48 int signalled;
49 int stop;
50 int stopped;
51 } vcpu_info[256];
53 static inline unsigned long kvm_get_thread_id(void)
55 return syscall(SYS_gettid);
58 CPUState *qemu_kvm_cpu_env(int index)
60 return vcpu_info[index].env;
63 static void sig_ipi_handler(int n)
67 void kvm_update_interrupt_request(CPUState *env)
69 if (env && vcpu && env != vcpu->env) {
70 if (vcpu_info[env->cpu_index].signalled)
71 return;
72 vcpu_info[env->cpu_index].signalled = 1;
73 if (vcpu_info[env->cpu_index].thread)
74 pthread_kill(vcpu_info[env->cpu_index].thread, SIG_IPI);
78 void kvm_update_after_sipi(CPUState *env)
80 vcpu_info[env->cpu_index].sipi_needed = 1;
81 kvm_update_interrupt_request(env);
84 void kvm_apic_init(CPUState *env)
86 if (env->cpu_index != 0)
87 vcpu_info[env->cpu_index].init = 1;
88 kvm_update_interrupt_request(env);
91 #include <signal.h>
93 static int try_push_interrupts(void *opaque)
95 return kvm_arch_try_push_interrupts(opaque);
98 static void post_kvm_run(void *opaque, int vcpu)
101 pthread_mutex_lock(&qemu_mutex);
102 kvm_arch_post_kvm_run(opaque, vcpu);
105 static int pre_kvm_run(void *opaque, int vcpu)
107 CPUState *env = cpu_single_env;
109 kvm_arch_pre_kvm_run(opaque, vcpu);
111 if (env->interrupt_request & CPU_INTERRUPT_EXIT)
112 return 1;
113 pthread_mutex_unlock(&qemu_mutex);
114 return 0;
117 void kvm_load_registers(CPUState *env)
119 if (kvm_enabled())
120 kvm_arch_load_regs(env);
123 void kvm_save_registers(CPUState *env)
125 if (kvm_enabled())
126 kvm_arch_save_regs(env);
129 int kvm_cpu_exec(CPUState *env)
131 int r;
133 r = kvm_run(kvm_context, env->cpu_index);
134 if (r < 0) {
135 printf("kvm_run returned %d\n", r);
136 exit(1);
139 return 0;
142 extern int vm_running;
144 static int has_work(CPUState *env)
146 if (!vm_running || (env && vcpu_info[env->cpu_index].stopped))
147 return 0;
148 if (!(env->hflags & HF_HALTED_MASK))
149 return 1;
150 return kvm_arch_has_work(env);
153 static int kvm_eat_signal(CPUState *env, int timeout)
155 struct timespec ts;
156 int r, e, ret = 0;
157 siginfo_t siginfo;
158 struct sigaction sa;
160 ts.tv_sec = timeout / 1000;
161 ts.tv_nsec = (timeout % 1000) * 1000000;
162 r = sigtimedwait(&io_signal_table.sigset, &siginfo, &ts);
163 if (r == -1 && (errno == EAGAIN || errno == EINTR) && !timeout)
164 return 0;
165 e = errno;
166 pthread_mutex_lock(&qemu_mutex);
167 if (vcpu)
168 cpu_single_env = vcpu->env;
169 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
170 printf("sigtimedwait: %s\n", strerror(e));
171 exit(1);
173 if (r != -1) {
174 sigaction(siginfo.si_signo, NULL, &sa);
175 sa.sa_handler(siginfo.si_signo);
176 if (siginfo.si_signo == SIGUSR2)
177 pthread_cond_signal(&qemu_aio_cond);
178 ret = 1;
180 if (env && vcpu_info[env->cpu_index].stop) {
181 vcpu_info[env->cpu_index].stop = 0;
182 vcpu_info[env->cpu_index].stopped = 1;
183 pthread_kill(vcpu_info[0].thread, SIG_IPI);
185 pthread_mutex_unlock(&qemu_mutex);
187 return ret;
191 static void kvm_eat_signals(CPUState *env, int timeout)
193 int r = 0;
195 while (kvm_eat_signal(env, 0))
196 r = 1;
197 if (!r && timeout) {
198 r = kvm_eat_signal(env, timeout);
199 if (r)
200 while (kvm_eat_signal(env, 0))
204 * we call select() even if no signal was received, to account for
205 * for which there is no signal handler installed.
207 pthread_mutex_lock(&qemu_mutex);
208 cpu_single_env = vcpu->env;
209 if (env->cpu_index == 0)
210 main_loop_wait(0);
211 pthread_mutex_unlock(&qemu_mutex);
214 static void kvm_main_loop_wait(CPUState *env, int timeout)
216 pthread_mutex_unlock(&qemu_mutex);
217 kvm_eat_signals(env, timeout);
218 pthread_mutex_lock(&qemu_mutex);
219 cpu_single_env = env;
220 vcpu_info[env->cpu_index].signalled = 0;
223 static int all_threads_paused(void)
225 int i;
227 for (i = 1; i < smp_cpus; ++i)
228 if (vcpu_info[i].stopped)
229 return 0;
230 return 1;
233 static void pause_other_threads(void)
235 int i;
237 for (i = 1; i < smp_cpus; ++i) {
238 vcpu_info[i].stop = 1;
239 pthread_kill(vcpu_info[i].thread, SIG_IPI);
241 while (!all_threads_paused())
242 kvm_eat_signals(vcpu->env, 0);
245 static void resume_other_threads(void)
247 int i;
249 for (i = 1; i < smp_cpus; ++i) {
250 vcpu_info[i].stop = 0;
251 vcpu_info[i].stopped = 0;
252 pthread_kill(vcpu_info[i].thread, SIG_IPI);
256 static void kvm_vm_state_change_handler(void *context, int running)
258 if (running)
259 resume_other_threads();
260 else
261 pause_other_threads();
264 static void update_regs_for_sipi(CPUState *env)
266 kvm_arch_update_regs_for_sipi(env);
267 vcpu_info[env->cpu_index].sipi_needed = 0;
268 vcpu_info[env->cpu_index].init = 0;
271 static void update_regs_for_init(CPUState *env)
273 cpu_reset(env);
274 kvm_arch_load_regs(env);
277 static void setup_kernel_sigmask(CPUState *env)
279 sigset_t set;
281 sigprocmask(SIG_BLOCK, NULL, &set);
282 sigdelset(&set, SIG_IPI);
283 if (env->cpu_index == 0)
284 sigandset(&set, &set, &io_signal_table.negsigset);
286 kvm_set_signal_mask(kvm_context, env->cpu_index, &set);
289 static int kvm_main_loop_cpu(CPUState *env)
291 struct vcpu_info *info = &vcpu_info[env->cpu_index];
293 setup_kernel_sigmask(env);
294 pthread_mutex_lock(&qemu_mutex);
296 kvm_qemu_init_env(env);
297 env->ready_for_interrupt_injection = 1;
298 #ifdef TARGET_I386
299 kvm_tpr_vcpu_start(env);
300 #endif
302 cpu_single_env = env;
303 while (1) {
304 while (!has_work(env))
305 kvm_main_loop_wait(env, 10);
306 if (env->interrupt_request & CPU_INTERRUPT_HARD)
307 env->hflags &= ~HF_HALTED_MASK;
308 if (!kvm_irqchip_in_kernel(kvm_context) && info->sipi_needed)
309 update_regs_for_sipi(env);
310 if (!kvm_irqchip_in_kernel(kvm_context) && info->init)
311 update_regs_for_init(env);
312 if (!(env->hflags & HF_HALTED_MASK) && !info->init)
313 kvm_cpu_exec(env);
314 env->interrupt_request &= ~CPU_INTERRUPT_EXIT;
315 kvm_main_loop_wait(env, 0);
316 if (qemu_shutdown_requested())
317 break;
318 else if (qemu_powerdown_requested())
319 qemu_system_powerdown();
320 else if (qemu_reset_requested()) {
321 env->interrupt_request = 0;
322 qemu_system_reset();
323 kvm_arch_load_regs(env);
326 pthread_mutex_unlock(&qemu_mutex);
327 return 0;
330 static void *ap_main_loop(void *_env)
332 CPUState *env = _env;
333 sigset_t signals;
335 vcpu = &vcpu_info[env->cpu_index];
336 vcpu->env = env;
337 vcpu->env->thread_id = kvm_get_thread_id();
338 sigfillset(&signals);
339 //sigdelset(&signals, SIG_IPI);
340 sigprocmask(SIG_BLOCK, &signals, NULL);
341 kvm_create_vcpu(kvm_context, env->cpu_index);
342 kvm_qemu_init_env(env);
343 if (kvm_irqchip_in_kernel(kvm_context))
344 env->hflags &= ~HF_HALTED_MASK;
345 kvm_main_loop_cpu(env);
346 return NULL;
349 static void qemu_kvm_init_signal_table(struct qemu_kvm_signal_table *sigtab)
351 sigemptyset(&sigtab->sigset);
352 sigfillset(&sigtab->negsigset);
355 static void kvm_add_signal(struct qemu_kvm_signal_table *sigtab, int signum)
357 sigaddset(&sigtab->sigset, signum);
358 sigdelset(&sigtab->negsigset, signum);
361 void kvm_init_new_ap(int cpu, CPUState *env)
363 pthread_create(&vcpu_info[cpu].thread, NULL, ap_main_loop, env);
366 int kvm_init_ap(void)
368 CPUState *env = first_cpu->next_cpu;
369 int i;
371 #ifdef TARGET_I386
372 kvm_tpr_opt_setup();
373 #endif
374 qemu_add_vm_change_state_handler(kvm_vm_state_change_handler, NULL);
375 qemu_kvm_init_signal_table(&io_signal_table);
376 kvm_add_signal(&io_signal_table, SIGIO);
377 kvm_add_signal(&io_signal_table, SIGALRM);
378 kvm_add_signal(&io_signal_table, SIGUSR2);
379 kvm_add_signal(&io_signal_table, SIG_IPI);
380 sigprocmask(SIG_BLOCK, &io_signal_table.sigset, NULL);
382 vcpu = &vcpu_info[0];
383 vcpu->env = first_cpu;
384 vcpu->env->thread_id = kvm_get_thread_id();
385 signal(SIG_IPI, sig_ipi_handler);
386 for (i = 1; i < smp_cpus; ++i) {
387 kvm_init_new_ap(i, env);
388 env = env->next_cpu;
390 return 0;
393 int kvm_main_loop(void)
395 vcpu_info[0].thread = pthread_self();
396 pthread_mutex_unlock(&qemu_mutex);
397 return kvm_main_loop_cpu(first_cpu);
400 static int kvm_debug(void *opaque, int vcpu)
402 CPUState *env = cpu_single_env;
404 env->exception_index = EXCP_DEBUG;
405 return 1;
408 static int kvm_inb(void *opaque, uint16_t addr, uint8_t *data)
410 *data = cpu_inb(0, addr);
411 return 0;
414 static int kvm_inw(void *opaque, uint16_t addr, uint16_t *data)
416 *data = cpu_inw(0, addr);
417 return 0;
420 static int kvm_inl(void *opaque, uint16_t addr, uint32_t *data)
422 *data = cpu_inl(0, addr);
423 return 0;
426 #define PM_IO_BASE 0xb000
428 static int kvm_outb(void *opaque, uint16_t addr, uint8_t data)
430 if (addr == 0xb2) {
431 switch (data) {
432 case 0: {
433 cpu_outb(0, 0xb3, 0);
434 break;
436 case 0xf0: {
437 unsigned x;
439 /* enable acpi */
440 x = cpu_inw(0, PM_IO_BASE + 4);
441 x &= ~1;
442 cpu_outw(0, PM_IO_BASE + 4, x);
443 break;
445 case 0xf1: {
446 unsigned x;
448 /* enable acpi */
449 x = cpu_inw(0, PM_IO_BASE + 4);
450 x |= 1;
451 cpu_outw(0, PM_IO_BASE + 4, x);
452 break;
454 default:
455 break;
457 return 0;
459 cpu_outb(0, addr, data);
460 return 0;
463 static int kvm_outw(void *opaque, uint16_t addr, uint16_t data)
465 cpu_outw(0, addr, data);
466 return 0;
469 static int kvm_outl(void *opaque, uint16_t addr, uint32_t data)
471 cpu_outl(0, addr, data);
472 return 0;
475 static int kvm_mmio_read(void *opaque, uint64_t addr, uint8_t *data, int len)
477 cpu_physical_memory_rw(addr, data, len, 0);
478 return 0;
481 static int kvm_mmio_write(void *opaque, uint64_t addr, uint8_t *data, int len)
483 cpu_physical_memory_rw(addr, data, len, 1);
484 return 0;
487 static int kvm_io_window(void *opaque)
489 return 1;
493 static int kvm_halt(void *opaque, int vcpu)
495 return kvm_arch_halt(opaque, vcpu);
498 static int kvm_shutdown(void *opaque, int vcpu)
500 qemu_system_reset_request();
501 return 1;
504 static struct kvm_callbacks qemu_kvm_ops = {
505 .debug = kvm_debug,
506 .inb = kvm_inb,
507 .inw = kvm_inw,
508 .inl = kvm_inl,
509 .outb = kvm_outb,
510 .outw = kvm_outw,
511 .outl = kvm_outl,
512 .mmio_read = kvm_mmio_read,
513 .mmio_write = kvm_mmio_write,
514 .halt = kvm_halt,
515 .shutdown = kvm_shutdown,
516 .io_window = kvm_io_window,
517 .try_push_interrupts = try_push_interrupts,
518 .post_kvm_run = post_kvm_run,
519 .pre_kvm_run = pre_kvm_run,
520 #ifdef TARGET_I386
521 .tpr_access = handle_tpr_access,
522 #endif
523 #ifdef TARGET_PPC
524 .powerpc_dcr_read = handle_powerpc_dcr_read,
525 .powerpc_dcr_write = handle_powerpc_dcr_write,
526 #endif
529 int kvm_qemu_init()
531 /* Try to initialize kvm */
532 kvm_context = kvm_init(&qemu_kvm_ops, cpu_single_env);
533 if (!kvm_context) {
534 return -1;
536 pthread_mutex_lock(&qemu_mutex);
538 return 0;
541 int kvm_qemu_create_context(void)
543 int r;
544 if (!kvm_irqchip) {
545 kvm_disable_irqchip_creation(kvm_context);
547 if (kvm_create(kvm_context, phys_ram_size, (void**)&phys_ram_base) < 0) {
548 kvm_qemu_destroy();
549 return -1;
551 r = kvm_arch_qemu_create_context();
552 if(r <0)
553 kvm_qemu_destroy();
554 return 0;
557 void kvm_qemu_destroy(void)
559 kvm_finalize(kvm_context);
562 void kvm_cpu_register_physical_memory(target_phys_addr_t start_addr,
563 unsigned long size,
564 unsigned long phys_offset)
566 #ifdef KVM_CAP_USER_MEMORY
567 int r = 0;
569 r = kvm_check_extension(kvm_context, KVM_CAP_USER_MEMORY);
570 if (r) {
571 if (!(phys_offset & ~TARGET_PAGE_MASK)) {
572 r = kvm_is_allocated_mem(kvm_context, start_addr, size);
573 if (r)
574 return;
575 r = kvm_is_intersecting_mem(kvm_context, start_addr);
576 if (r)
577 kvm_create_mem_hole(kvm_context, start_addr, size);
578 r = kvm_register_userspace_phys_mem(kvm_context, start_addr,
579 phys_ram_base + phys_offset,
580 size, 0);
582 if (phys_offset & IO_MEM_ROM) {
583 phys_offset &= ~IO_MEM_ROM;
584 r = kvm_is_intersecting_mem(kvm_context, start_addr);
585 if (r)
586 kvm_create_mem_hole(kvm_context, start_addr, size);
587 r = kvm_register_userspace_phys_mem(kvm_context, start_addr,
588 phys_ram_base + phys_offset,
589 size, 0);
591 if (r < 0) {
592 printf("kvm_cpu_register_physical_memory: failed\n");
593 exit(1);
595 return;
597 #endif
598 if (phys_offset & IO_MEM_ROM) {
599 phys_offset &= ~IO_MEM_ROM;
600 memcpy(phys_ram_base + start_addr, phys_ram_base + phys_offset, size);
604 int kvm_qemu_check_extension(int ext)
606 return kvm_check_extension(kvm_context, ext);
609 int kvm_qemu_init_env(CPUState *cenv)
611 return kvm_arch_qemu_init_env(cenv);
614 int kvm_update_debugger(CPUState *env)
616 struct kvm_debug_guest dbg;
617 int i;
619 dbg.enabled = 0;
620 if (env->nb_breakpoints || env->singlestep_enabled) {
621 dbg.enabled = 1;
622 for (i = 0; i < 4 && i < env->nb_breakpoints; ++i) {
623 dbg.breakpoints[i].enabled = 1;
624 dbg.breakpoints[i].address = env->breakpoints[i];
626 dbg.singlestep = env->singlestep_enabled;
628 return kvm_guest_debug(kvm_context, env->cpu_index, &dbg);
633 * dirty pages logging
635 /* FIXME: use unsigned long pointer instead of unsigned char */
636 unsigned char *kvm_dirty_bitmap = NULL;
637 int kvm_physical_memory_set_dirty_tracking(int enable)
639 int r = 0;
641 if (!kvm_enabled())
642 return 0;
644 if (enable) {
645 if (!kvm_dirty_bitmap) {
646 unsigned bitmap_size = BITMAP_SIZE(phys_ram_size);
647 kvm_dirty_bitmap = qemu_malloc(bitmap_size);
648 if (kvm_dirty_bitmap == NULL) {
649 perror("Failed to allocate dirty pages bitmap");
650 r=-1;
652 else {
653 r = kvm_dirty_pages_log_enable_all(kvm_context);
657 else {
658 if (kvm_dirty_bitmap) {
659 r = kvm_dirty_pages_log_reset(kvm_context);
660 qemu_free(kvm_dirty_bitmap);
661 kvm_dirty_bitmap = NULL;
664 return r;
667 /* get kvm's dirty pages bitmap and update qemu's */
668 int kvm_get_dirty_pages_log_range(unsigned long start_addr,
669 unsigned char *bitmap,
670 unsigned int offset,
671 unsigned long mem_size)
673 unsigned int i, j, n=0;
674 unsigned char c;
675 unsigned page_number, addr, addr1;
676 unsigned int len = ((mem_size/TARGET_PAGE_SIZE) + 7) / 8;
679 * bitmap-traveling is faster than memory-traveling (for addr...)
680 * especially when most of the memory is not dirty.
682 for (i=0; i<len; i++) {
683 c = bitmap[i];
684 while (c>0) {
685 j = ffsl(c) - 1;
686 c &= ~(1u<<j);
687 page_number = i * 8 + j;
688 addr1 = page_number * TARGET_PAGE_SIZE;
689 addr = offset + addr1;
690 cpu_physical_memory_set_dirty(addr);
691 n++;
694 return 0;
696 int kvm_get_dirty_bitmap_cb(unsigned long start, unsigned long len,
697 void *bitmap, void *opaque)
699 return kvm_get_dirty_pages_log_range(start, bitmap, start, len);
703 * get kvm's dirty pages bitmap and update qemu's
704 * we only care about physical ram, which resides in slots 0 and 3
706 int kvm_update_dirty_pages_log(void)
708 int r = 0;
711 r = kvm_get_dirty_pages_range(kvm_context, 0, phys_ram_size,
712 kvm_dirty_bitmap, NULL,
713 kvm_get_dirty_bitmap_cb);
714 return r;
717 int kvm_get_phys_ram_page_bitmap(unsigned char *bitmap)
719 unsigned int bsize = BITMAP_SIZE(phys_ram_size);
720 unsigned int brsize = BITMAP_SIZE(ram_size);
721 unsigned int extra_pages = (phys_ram_size - ram_size) / TARGET_PAGE_SIZE;
722 unsigned int extra_bytes = (extra_pages +7)/8;
723 unsigned int hole_start = BITMAP_SIZE(0xa0000);
724 unsigned int hole_end = BITMAP_SIZE(0xc0000);
726 memset(bitmap, 0xFF, brsize + extra_bytes);
727 memset(bitmap + hole_start, 0, hole_end - hole_start);
728 memset(bitmap + brsize + extra_bytes, 0, bsize - brsize - extra_bytes);
730 return 0;
733 #ifdef KVM_CAP_IRQCHIP
735 int kvm_set_irq(int irq, int level)
737 return kvm_set_irq_level(kvm_context, irq, level);
740 #endif
742 void qemu_kvm_aio_wait_start(void)
746 void qemu_kvm_aio_wait(void)
748 if (!cpu_single_env || cpu_single_env->cpu_index == 0) {
749 pthread_mutex_unlock(&qemu_mutex);
750 kvm_eat_signal(cpu_single_env, 1000);
751 pthread_mutex_lock(&qemu_mutex);
752 } else {
753 pthread_cond_wait(&qemu_aio_cond, &qemu_mutex);
757 void qemu_kvm_aio_wait_end(void)
761 int qemu_kvm_get_dirty_pages(unsigned long phys_addr, void *buf)
763 return kvm_get_dirty_pages(kvm_context, phys_addr, buf);
766 void *kvm_cpu_create_phys_mem(target_phys_addr_t start_addr,
767 unsigned long size, int log, int writable)
769 return kvm_create_phys_mem(kvm_context, start_addr, size, log, writable);
772 void kvm_cpu_destroy_phys_mem(target_phys_addr_t start_addr,
773 unsigned long size)
775 kvm_destroy_phys_mem(kvm_context, start_addr, size);