Fix the prototype of virtio_net_init()
[qemu-kvm/fedora.git] / qemu-kvm.c
blobe4fba789dcc86f3aaac92c321343fe72b3d02882
1 /*
2 * qemu/kvm integration
4 * Copyright (C) 2006-2008 Qumranet Technologies
6 * Licensed under the terms of the GNU GPL version 2 or higher.
7 */
8 #include "config.h"
9 #include "config-host.h"
11 int kvm_allowed = 1;
12 int kvm_irqchip = 1;
13 int kvm_pit = 1;
14 int kvm_pit_reinject = 1;
15 int kvm_nested = 0;
17 #include <assert.h>
18 #include <string.h>
19 #include "hw/hw.h"
20 #include "sysemu.h"
21 #include "qemu-common.h"
22 #include "console.h"
23 #include "block.h"
24 #include "compatfd.h"
25 #include "gdbstub.h"
27 #include "qemu-kvm.h"
28 #include <libkvm.h>
29 #include <pthread.h>
30 #include <sys/utsname.h>
31 #include <sys/syscall.h>
32 #include <sys/mman.h>
34 #define false 0
35 #define true 1
37 extern void perror(const char *s);
39 kvm_context_t kvm_context;
41 extern int smp_cpus;
43 pthread_mutex_t qemu_mutex = PTHREAD_MUTEX_INITIALIZER;
44 pthread_cond_t qemu_vcpu_cond = PTHREAD_COND_INITIALIZER;
45 pthread_cond_t qemu_system_cond = PTHREAD_COND_INITIALIZER;
46 pthread_cond_t qemu_pause_cond = PTHREAD_COND_INITIALIZER;
47 pthread_cond_t qemu_work_cond = PTHREAD_COND_INITIALIZER;
48 __thread struct CPUState *current_env;
50 static int qemu_system_ready;
52 #define SIG_IPI (SIGRTMIN+4)
54 pthread_t io_thread;
55 static int io_thread_fd = -1;
56 static int io_thread_sigfd = -1;
58 static CPUState *kvm_debug_cpu_requested;
60 /* The list of ioperm_data */
61 static LIST_HEAD(, ioperm_data) ioperm_head;
63 static inline unsigned long kvm_get_thread_id(void)
65 return syscall(SYS_gettid);
68 static void qemu_cond_wait(pthread_cond_t *cond)
70 CPUState *env = cpu_single_env;
71 static const struct timespec ts = {
72 .tv_sec = 0,
73 .tv_nsec = 100000,
76 pthread_cond_timedwait(cond, &qemu_mutex, &ts);
77 cpu_single_env = env;
80 static void sig_ipi_handler(int n)
84 static void on_vcpu(CPUState *env, void (*func)(void *data), void *data)
86 struct qemu_work_item wi;
88 if (env == current_env) {
89 func(data);
90 return;
93 wi.func = func;
94 wi.data = data;
95 if (!env->kvm_cpu_state.queued_work_first)
96 env->kvm_cpu_state.queued_work_first = &wi;
97 else
98 env->kvm_cpu_state.queued_work_last->next = &wi;
99 env->kvm_cpu_state.queued_work_last = &wi;
100 wi.next = NULL;
101 wi.done = false;
103 pthread_kill(env->kvm_cpu_state.thread, SIG_IPI);
104 while (!wi.done)
105 qemu_cond_wait(&qemu_work_cond);
108 static void inject_interrupt(void *data)
110 cpu_interrupt(current_env, (int)data);
113 void kvm_inject_interrupt(CPUState *env, int mask)
115 on_vcpu(env, inject_interrupt, (void *)mask);
118 void kvm_update_interrupt_request(CPUState *env)
120 int signal = 0;
122 if (env) {
123 if (!current_env || !current_env->kvm_cpu_state.created)
124 signal = 1;
126 * Testing for created here is really redundant
128 if (current_env && current_env->kvm_cpu_state.created &&
129 env != current_env && !env->kvm_cpu_state.signalled)
130 signal = 1;
132 if (signal) {
133 env->kvm_cpu_state.signalled = 1;
134 if (env->kvm_cpu_state.thread)
135 pthread_kill(env->kvm_cpu_state.thread, SIG_IPI);
140 void kvm_update_after_sipi(CPUState *env)
142 env->kvm_cpu_state.sipi_needed = 1;
143 kvm_update_interrupt_request(env);
146 void kvm_apic_init(CPUState *env)
148 if (env->cpu_index != 0)
149 env->kvm_cpu_state.init = 1;
150 kvm_update_interrupt_request(env);
153 #include <signal.h>
155 static int try_push_interrupts(void *opaque)
157 return kvm_arch_try_push_interrupts(opaque);
160 static void post_kvm_run(void *opaque, void *data)
162 CPUState *env = (CPUState *)data;
164 pthread_mutex_lock(&qemu_mutex);
165 kvm_arch_post_kvm_run(opaque, env);
168 static int pre_kvm_run(void *opaque, void *data)
170 CPUState *env = (CPUState *)data;
172 kvm_arch_pre_kvm_run(opaque, env);
174 if (env->interrupt_request & CPU_INTERRUPT_EXIT)
175 return 1;
176 pthread_mutex_unlock(&qemu_mutex);
177 return 0;
180 static void kvm_do_load_registers(void *_env)
182 CPUState *env = _env;
184 kvm_arch_load_regs(env);
187 void kvm_load_registers(CPUState *env)
189 if (kvm_enabled() && qemu_system_ready)
190 on_vcpu(env, kvm_do_load_registers, env);
193 static void kvm_do_save_registers(void *_env)
195 CPUState *env = _env;
197 kvm_arch_save_regs(env);
200 void kvm_save_registers(CPUState *env)
202 if (kvm_enabled())
203 on_vcpu(env, kvm_do_save_registers, env);
206 int kvm_cpu_exec(CPUState *env)
208 int r;
210 r = kvm_run(kvm_context, env->cpu_index, env);
211 if (r < 0) {
212 printf("kvm_run returned %d\n", r);
213 exit(1);
216 return 0;
219 extern int vm_running;
221 static int has_work(CPUState *env)
223 if (!vm_running || (env && env->kvm_cpu_state.stopped))
224 return 0;
225 if (!env->halted)
226 return 1;
227 return kvm_arch_has_work(env);
230 static void flush_queued_work(CPUState *env)
232 struct qemu_work_item *wi;
234 if (!env->kvm_cpu_state.queued_work_first)
235 return;
237 while ((wi = env->kvm_cpu_state.queued_work_first)) {
238 env->kvm_cpu_state.queued_work_first = wi->next;
239 wi->func(wi->data);
240 wi->done = true;
242 env->kvm_cpu_state.queued_work_last = NULL;
243 pthread_cond_broadcast(&qemu_work_cond);
246 static void kvm_main_loop_wait(CPUState *env, int timeout)
248 struct timespec ts;
249 int r, e;
250 siginfo_t siginfo;
251 sigset_t waitset;
253 pthread_mutex_unlock(&qemu_mutex);
255 ts.tv_sec = timeout / 1000;
256 ts.tv_nsec = (timeout % 1000) * 1000000;
257 sigemptyset(&waitset);
258 sigaddset(&waitset, SIG_IPI);
260 r = sigtimedwait(&waitset, &siginfo, &ts);
261 e = errno;
263 pthread_mutex_lock(&qemu_mutex);
265 if (r == -1 && !(e == EAGAIN || e == EINTR)) {
266 printf("sigtimedwait: %s\n", strerror(e));
267 exit(1);
270 cpu_single_env = env;
271 flush_queued_work(env);
273 if (env->kvm_cpu_state.stop) {
274 env->kvm_cpu_state.stop = 0;
275 env->kvm_cpu_state.stopped = 1;
276 pthread_cond_signal(&qemu_pause_cond);
279 env->kvm_cpu_state.signalled = 0;
282 static int all_threads_paused(void)
284 CPUState *penv = first_cpu;
286 while (penv) {
287 if (penv->kvm_cpu_state.stop)
288 return 0;
289 penv = (CPUState *)penv->next_cpu;
292 return 1;
295 static void pause_all_threads(void)
297 CPUState *penv = first_cpu;
299 assert(!cpu_single_env);
301 while (penv) {
302 penv->kvm_cpu_state.stop = 1;
303 pthread_kill(penv->kvm_cpu_state.thread, SIG_IPI);
304 penv = (CPUState *)penv->next_cpu;
307 while (!all_threads_paused())
308 qemu_cond_wait(&qemu_pause_cond);
311 static void resume_all_threads(void)
313 CPUState *penv = first_cpu;
315 assert(!cpu_single_env);
317 while (penv) {
318 penv->kvm_cpu_state.stop = 0;
319 penv->kvm_cpu_state.stopped = 0;
320 pthread_kill(penv->kvm_cpu_state.thread, SIG_IPI);
321 penv = (CPUState *)penv->next_cpu;
325 static void kvm_vm_state_change_handler(void *context, int running)
327 if (running)
328 resume_all_threads();
329 else
330 pause_all_threads();
333 static void update_regs_for_sipi(CPUState *env)
335 kvm_arch_update_regs_for_sipi(env);
336 env->kvm_cpu_state.sipi_needed = 0;
339 static void update_regs_for_init(CPUState *env)
341 #ifdef TARGET_I386
342 SegmentCache cs = env->segs[R_CS];
343 #endif
345 cpu_reset(env);
347 #ifdef TARGET_I386
348 /* restore SIPI vector */
349 if(env->kvm_cpu_state.sipi_needed)
350 env->segs[R_CS] = cs;
351 #endif
353 env->kvm_cpu_state.init = 0;
354 kvm_arch_load_regs(env);
357 static void setup_kernel_sigmask(CPUState *env)
359 sigset_t set;
361 sigemptyset(&set);
362 sigaddset(&set, SIGUSR2);
363 sigaddset(&set, SIGIO);
364 sigaddset(&set, SIGALRM);
365 sigprocmask(SIG_BLOCK, &set, NULL);
367 sigprocmask(SIG_BLOCK, NULL, &set);
368 sigdelset(&set, SIG_IPI);
370 kvm_set_signal_mask(kvm_context, env->cpu_index, &set);
373 void qemu_kvm_system_reset(void)
375 CPUState *penv = first_cpu;
377 pause_all_threads();
379 qemu_system_reset();
381 while (penv) {
382 kvm_arch_cpu_reset(penv);
383 penv = (CPUState *)penv->next_cpu;
386 resume_all_threads();
389 static int kvm_main_loop_cpu(CPUState *env)
391 setup_kernel_sigmask(env);
393 pthread_mutex_lock(&qemu_mutex);
394 if (kvm_irqchip_in_kernel(kvm_context))
395 env->halted = 0;
397 kvm_qemu_init_env(env);
398 #ifdef TARGET_I386
399 kvm_tpr_vcpu_start(env);
400 #endif
402 cpu_single_env = env;
403 kvm_load_registers(env);
405 while (1) {
406 while (!has_work(env))
407 kvm_main_loop_wait(env, 1000);
408 if (env->interrupt_request & (CPU_INTERRUPT_HARD | CPU_INTERRUPT_NMI))
409 env->halted = 0;
410 if (!kvm_irqchip_in_kernel(kvm_context)) {
411 if (env->kvm_cpu_state.init)
412 update_regs_for_init(env);
413 if (env->kvm_cpu_state.sipi_needed)
414 update_regs_for_sipi(env);
416 if (!env->halted && !env->kvm_cpu_state.init)
417 kvm_cpu_exec(env);
418 env->interrupt_request &= ~CPU_INTERRUPT_EXIT;
419 kvm_main_loop_wait(env, 0);
421 pthread_mutex_unlock(&qemu_mutex);
422 return 0;
425 static void *ap_main_loop(void *_env)
427 CPUState *env = _env;
428 sigset_t signals;
429 struct ioperm_data *data = NULL;
431 current_env = env;
432 env->thread_id = kvm_get_thread_id();
433 sigfillset(&signals);
434 sigprocmask(SIG_BLOCK, &signals, NULL);
435 kvm_create_vcpu(kvm_context, env->cpu_index);
436 kvm_qemu_init_env(env);
438 #ifdef USE_KVM_DEVICE_ASSIGNMENT
439 /* do ioperm for io ports of assigned devices */
440 LIST_FOREACH(data, &ioperm_head, entries)
441 on_vcpu(env, kvm_arch_do_ioperm, data);
442 #endif
444 /* signal VCPU creation */
445 pthread_mutex_lock(&qemu_mutex);
446 current_env->kvm_cpu_state.created = 1;
447 pthread_cond_signal(&qemu_vcpu_cond);
449 /* and wait for machine initialization */
450 while (!qemu_system_ready)
451 qemu_cond_wait(&qemu_system_cond);
452 pthread_mutex_unlock(&qemu_mutex);
454 kvm_main_loop_cpu(env);
455 return NULL;
458 void kvm_init_vcpu(CPUState *env)
460 int cpu = env->cpu_index;
461 pthread_create(&env->kvm_cpu_state.thread, NULL, ap_main_loop, env);
463 while (env->kvm_cpu_state.created == 0)
464 qemu_cond_wait(&qemu_vcpu_cond);
467 int kvm_init_ap(void)
469 #ifdef TARGET_I386
470 kvm_tpr_opt_setup();
471 #endif
472 qemu_add_vm_change_state_handler(kvm_vm_state_change_handler, NULL);
474 signal(SIG_IPI, sig_ipi_handler);
475 return 0;
478 void qemu_kvm_notify_work(void)
480 uint64_t value = 1;
481 char buffer[8];
482 size_t offset = 0;
484 if (io_thread_fd == -1)
485 return;
487 memcpy(buffer, &value, sizeof(value));
489 while (offset < 8) {
490 ssize_t len;
492 len = write(io_thread_fd, buffer + offset, 8 - offset);
493 if (len == -1 && errno == EINTR)
494 continue;
496 if (len <= 0)
497 break;
499 offset += len;
502 if (offset != 8)
503 fprintf(stderr, "failed to notify io thread\n");
506 /* If we have signalfd, we mask out the signals we want to handle and then
507 * use signalfd to listen for them. We rely on whatever the current signal
508 * handler is to dispatch the signals when we receive them.
511 static void sigfd_handler(void *opaque)
513 int fd = (unsigned long)opaque;
514 struct qemu_signalfd_siginfo info;
515 struct sigaction action;
516 ssize_t len;
518 while (1) {
519 do {
520 len = read(fd, &info, sizeof(info));
521 } while (len == -1 && errno == EINTR);
523 if (len == -1 && errno == EAGAIN)
524 break;
526 if (len != sizeof(info)) {
527 printf("read from sigfd returned %ld: %m\n", len);
528 return;
531 sigaction(info.ssi_signo, NULL, &action);
532 if (action.sa_handler)
533 action.sa_handler(info.ssi_signo);
538 /* Used to break IO thread out of select */
539 static void io_thread_wakeup(void *opaque)
541 int fd = (unsigned long)opaque;
542 char buffer[8];
543 size_t offset = 0;
545 while (offset < 8) {
546 ssize_t len;
548 len = read(fd, buffer + offset, 8 - offset);
549 if (len == -1 && errno == EINTR)
550 continue;
552 if (len <= 0)
553 break;
555 offset += len;
559 int kvm_main_loop(void)
561 int fds[2];
562 sigset_t mask;
563 int sigfd;
565 io_thread = pthread_self();
566 qemu_system_ready = 1;
568 if (qemu_eventfd(fds) == -1) {
569 fprintf(stderr, "failed to create eventfd\n");
570 return -errno;
573 qemu_set_fd_handler2(fds[0], NULL, io_thread_wakeup, NULL,
574 (void *)(unsigned long)fds[0]);
576 io_thread_fd = fds[1];
578 sigemptyset(&mask);
579 sigaddset(&mask, SIGIO);
580 sigaddset(&mask, SIGALRM);
581 sigprocmask(SIG_BLOCK, &mask, NULL);
583 sigfd = qemu_signalfd(&mask);
584 if (sigfd == -1) {
585 fprintf(stderr, "failed to create signalfd\n");
586 return -errno;
589 fcntl(sigfd, F_SETFL, O_NONBLOCK);
591 qemu_set_fd_handler2(sigfd, NULL, sigfd_handler, NULL,
592 (void *)(unsigned long)sigfd);
594 pthread_cond_broadcast(&qemu_system_cond);
596 io_thread_sigfd = sigfd;
597 cpu_single_env = NULL;
599 while (1) {
600 main_loop_wait(1000);
601 if (qemu_shutdown_requested())
602 break;
603 else if (qemu_powerdown_requested())
604 qemu_system_powerdown();
605 else if (qemu_reset_requested())
606 qemu_kvm_system_reset();
607 #ifdef CONFIG_GDBSTUB
608 else if (kvm_debug_cpu_requested) {
609 gdb_set_stop_cpu(kvm_debug_cpu_requested);
610 vm_stop(EXCP_DEBUG);
611 kvm_debug_cpu_requested = NULL;
613 #endif
616 pause_all_threads();
617 pthread_mutex_unlock(&qemu_mutex);
619 return 0;
622 #ifdef KVM_CAP_SET_GUEST_DEBUG
623 int kvm_debug(void *opaque, void *data, struct kvm_debug_exit_arch *arch_info)
625 int handle = kvm_arch_debug(arch_info);
626 struct CPUState *env = data;
628 if (handle) {
629 kvm_debug_cpu_requested = env;
630 env->kvm_cpu_state.stopped = 1;
632 return handle;
634 #endif
636 static int kvm_inb(void *opaque, uint16_t addr, uint8_t *data)
638 *data = cpu_inb(0, addr);
639 return 0;
642 static int kvm_inw(void *opaque, uint16_t addr, uint16_t *data)
644 *data = cpu_inw(0, addr);
645 return 0;
648 static int kvm_inl(void *opaque, uint16_t addr, uint32_t *data)
650 *data = cpu_inl(0, addr);
651 return 0;
654 #define PM_IO_BASE 0xb000
656 static int kvm_outb(void *opaque, uint16_t addr, uint8_t data)
658 if (addr == 0xb2) {
659 switch (data) {
660 case 0: {
661 cpu_outb(0, 0xb3, 0);
662 break;
664 case 0xf0: {
665 unsigned x;
667 /* enable acpi */
668 x = cpu_inw(0, PM_IO_BASE + 4);
669 x &= ~1;
670 cpu_outw(0, PM_IO_BASE + 4, x);
671 break;
673 case 0xf1: {
674 unsigned x;
676 /* enable acpi */
677 x = cpu_inw(0, PM_IO_BASE + 4);
678 x |= 1;
679 cpu_outw(0, PM_IO_BASE + 4, x);
680 break;
682 default:
683 break;
685 return 0;
687 cpu_outb(0, addr, data);
688 return 0;
691 static int kvm_outw(void *opaque, uint16_t addr, uint16_t data)
693 cpu_outw(0, addr, data);
694 return 0;
697 static int kvm_outl(void *opaque, uint16_t addr, uint32_t data)
699 cpu_outl(0, addr, data);
700 return 0;
703 static int kvm_mmio_read(void *opaque, uint64_t addr, uint8_t *data, int len)
705 cpu_physical_memory_rw(addr, data, len, 0);
706 return 0;
709 static int kvm_mmio_write(void *opaque, uint64_t addr, uint8_t *data, int len)
711 cpu_physical_memory_rw(addr, data, len, 1);
712 return 0;
715 static int kvm_io_window(void *opaque)
717 return 1;
721 static int kvm_halt(void *opaque, int vcpu)
723 return kvm_arch_halt(opaque, vcpu);
726 static int kvm_shutdown(void *opaque, void *data)
728 struct CPUState *env = (struct CPUState *)data;
730 /* stop the current vcpu from going back to guest mode */
731 env->kvm_cpu_state.stopped = 1;
733 qemu_system_reset_request();
734 return 1;
737 static struct kvm_callbacks qemu_kvm_ops = {
738 #ifdef KVM_CAP_SET_GUEST_DEBUG
739 .debug = kvm_debug,
740 #endif
741 .inb = kvm_inb,
742 .inw = kvm_inw,
743 .inl = kvm_inl,
744 .outb = kvm_outb,
745 .outw = kvm_outw,
746 .outl = kvm_outl,
747 .mmio_read = kvm_mmio_read,
748 .mmio_write = kvm_mmio_write,
749 .halt = kvm_halt,
750 .shutdown = kvm_shutdown,
751 .io_window = kvm_io_window,
752 .try_push_interrupts = try_push_interrupts,
753 #ifdef KVM_CAP_USER_NMI
754 .push_nmi = kvm_arch_push_nmi,
755 #endif
756 .post_kvm_run = post_kvm_run,
757 .pre_kvm_run = pre_kvm_run,
758 #ifdef TARGET_I386
759 .tpr_access = handle_tpr_access,
760 #endif
761 #ifdef TARGET_PPC
762 .powerpc_dcr_read = handle_powerpc_dcr_read,
763 .powerpc_dcr_write = handle_powerpc_dcr_write,
764 #endif
767 int kvm_qemu_init()
769 /* Try to initialize kvm */
770 kvm_context = kvm_init(&qemu_kvm_ops, cpu_single_env);
771 if (!kvm_context) {
772 return -1;
774 pthread_mutex_lock(&qemu_mutex);
776 return 0;
779 #ifdef TARGET_I386
780 static int destroy_region_works = 0;
781 #endif
783 int kvm_qemu_create_context(void)
785 int r;
786 if (!kvm_irqchip) {
787 kvm_disable_irqchip_creation(kvm_context);
789 if (!kvm_pit) {
790 kvm_disable_pit_creation(kvm_context);
792 if (kvm_create(kvm_context, phys_ram_size, (void**)&phys_ram_base) < 0) {
793 kvm_qemu_destroy();
794 return -1;
796 r = kvm_arch_qemu_create_context();
797 if(r <0)
798 kvm_qemu_destroy();
799 if (kvm_pit && !kvm_pit_reinject) {
800 if (kvm_reinject_control(kvm_context, 0)) {
801 fprintf(stderr, "failure to disable in-kernel PIT reinjection\n");
802 return -1;
805 #ifdef TARGET_I386
806 destroy_region_works = kvm_destroy_memory_region_works(kvm_context);
807 #endif
808 return 0;
811 void kvm_qemu_destroy(void)
813 kvm_finalize(kvm_context);
816 #ifdef TARGET_I386
817 static int must_use_aliases_source(target_phys_addr_t addr)
819 if (destroy_region_works)
820 return false;
821 if (addr == 0xa0000 || addr == 0xa8000)
822 return true;
823 return false;
826 static int must_use_aliases_target(target_phys_addr_t addr)
828 if (destroy_region_works)
829 return false;
830 if (addr >= 0xe0000000 && addr < 0x100000000ull)
831 return true;
832 return false;
835 static struct mapping {
836 target_phys_addr_t phys;
837 ram_addr_t ram;
838 ram_addr_t len;
839 } mappings[50];
840 static int nr_mappings;
842 static struct mapping *find_ram_mapping(ram_addr_t ram_addr)
844 struct mapping *p;
846 for (p = mappings; p < mappings + nr_mappings; ++p) {
847 if (p->ram <= ram_addr && ram_addr < p->ram + p->len) {
848 return p;
851 return NULL;
854 static struct mapping *find_mapping(target_phys_addr_t start_addr)
856 struct mapping *p;
858 for (p = mappings; p < mappings + nr_mappings; ++p) {
859 if (p->phys <= start_addr && start_addr < p->phys + p->len) {
860 return p;
863 return NULL;
866 static void drop_mapping(target_phys_addr_t start_addr)
868 struct mapping *p = find_mapping(start_addr);
870 if (p)
871 *p = mappings[--nr_mappings];
873 #endif
875 void kvm_cpu_register_physical_memory(target_phys_addr_t start_addr,
876 unsigned long size,
877 unsigned long phys_offset)
879 int r = 0;
880 unsigned long area_flags = phys_offset & ~TARGET_PAGE_MASK;
881 #ifdef TARGET_I386
882 struct mapping *p;
883 #endif
885 phys_offset &= ~IO_MEM_ROM;
887 if (area_flags == IO_MEM_UNASSIGNED) {
888 #ifdef TARGET_I386
889 if (must_use_aliases_source(start_addr)) {
890 kvm_destroy_memory_alias(kvm_context, start_addr);
891 return;
893 if (must_use_aliases_target(start_addr))
894 return;
895 #endif
896 kvm_unregister_memory_area(kvm_context, start_addr, size);
897 return;
900 r = kvm_is_containing_region(kvm_context, start_addr, size);
901 if (r)
902 return;
904 if (area_flags >= TLB_MMIO)
905 return;
907 #ifdef TARGET_I386
908 if (must_use_aliases_source(start_addr)) {
909 p = find_ram_mapping(phys_offset);
910 if (p) {
911 kvm_create_memory_alias(kvm_context, start_addr, size,
912 p->phys + (phys_offset - p->ram));
914 return;
916 #endif
918 r = kvm_register_phys_mem(kvm_context, start_addr,
919 phys_ram_base + phys_offset,
920 size, 0);
921 if (r < 0) {
922 printf("kvm_cpu_register_physical_memory: failed\n");
923 exit(1);
926 #ifdef TARGET_I386
927 drop_mapping(start_addr);
928 p = &mappings[nr_mappings++];
929 p->phys = start_addr;
930 p->ram = phys_offset;
931 p->len = size;
932 #endif
934 return;
937 void kvm_cpu_unregister_physical_memory(target_phys_addr_t start_addr,
938 target_phys_addr_t size,
939 unsigned long phys_offset)
941 kvm_unregister_memory_area(kvm_context, start_addr, size);
944 int kvm_setup_guest_memory(void *area, unsigned long size)
946 int ret = 0;
948 #ifdef MADV_DONTFORK
949 if (kvm_enabled() && !kvm_has_sync_mmu())
950 ret = madvise(area, size, MADV_DONTFORK);
951 #endif
953 if (ret)
954 perror ("madvise");
956 return ret;
959 int kvm_qemu_check_extension(int ext)
961 return kvm_check_extension(kvm_context, ext);
964 int kvm_qemu_init_env(CPUState *cenv)
966 return kvm_arch_qemu_init_env(cenv);
969 #ifdef KVM_CAP_SET_GUEST_DEBUG
970 struct kvm_sw_breakpoint_head kvm_sw_breakpoints =
971 TAILQ_HEAD_INITIALIZER(kvm_sw_breakpoints);
973 struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(target_ulong pc)
975 struct kvm_sw_breakpoint *bp;
977 TAILQ_FOREACH(bp, &kvm_sw_breakpoints, entry) {
978 if (bp->pc == pc)
979 return bp;
981 return NULL;
984 struct kvm_set_guest_debug_data {
985 struct kvm_guest_debug dbg;
986 int err;
989 void kvm_invoke_set_guest_debug(void *data)
991 struct kvm_set_guest_debug_data *dbg_data = data;
993 dbg_data->err = kvm_set_guest_debug(kvm_context, cpu_single_env->cpu_index,
994 &dbg_data->dbg);
997 int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
999 struct kvm_set_guest_debug_data data;
1001 data.dbg.control = 0;
1002 if (env->singlestep_enabled)
1003 data.dbg.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
1005 kvm_arch_update_guest_debug(env, &data.dbg);
1006 data.dbg.control |= reinject_trap;
1008 on_vcpu(env, kvm_invoke_set_guest_debug, &data);
1009 return data.err;
1012 int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
1013 target_ulong len, int type)
1015 struct kvm_sw_breakpoint *bp;
1016 CPUState *env;
1017 int err;
1019 if (type == GDB_BREAKPOINT_SW) {
1020 bp = kvm_find_sw_breakpoint(addr);
1021 if (bp) {
1022 bp->use_count++;
1023 return 0;
1026 bp = qemu_malloc(sizeof(struct kvm_sw_breakpoint));
1027 if (!bp)
1028 return -ENOMEM;
1030 bp->pc = addr;
1031 bp->use_count = 1;
1032 err = kvm_arch_insert_sw_breakpoint(current_env, bp);
1033 if (err) {
1034 free(bp);
1035 return err;
1038 TAILQ_INSERT_HEAD(&kvm_sw_breakpoints, bp, entry);
1039 } else {
1040 err = kvm_arch_insert_hw_breakpoint(addr, len, type);
1041 if (err)
1042 return err;
1045 for (env = first_cpu; env != NULL; env = env->next_cpu) {
1046 err = kvm_update_guest_debug(env, 0);
1047 if (err)
1048 return err;
1050 return 0;
1053 int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr,
1054 target_ulong len, int type)
1056 struct kvm_sw_breakpoint *bp;
1057 CPUState *env;
1058 int err;
1060 if (type == GDB_BREAKPOINT_SW) {
1061 bp = kvm_find_sw_breakpoint(addr);
1062 if (!bp)
1063 return -ENOENT;
1065 if (bp->use_count > 1) {
1066 bp->use_count--;
1067 return 0;
1070 err = kvm_arch_remove_sw_breakpoint(current_env, bp);
1071 if (err)
1072 return err;
1074 TAILQ_REMOVE(&kvm_sw_breakpoints, bp, entry);
1075 qemu_free(bp);
1076 } else {
1077 err = kvm_arch_remove_hw_breakpoint(addr, len, type);
1078 if (err)
1079 return err;
1082 for (env = first_cpu; env != NULL; env = env->next_cpu) {
1083 err = kvm_update_guest_debug(env, 0);
1084 if (err)
1085 return err;
1087 return 0;
1090 void kvm_remove_all_breakpoints(CPUState *current_env)
1092 struct kvm_sw_breakpoint *bp, *next;
1093 CPUState *env;
1095 TAILQ_FOREACH_SAFE(bp, &kvm_sw_breakpoints, entry, next) {
1096 if (kvm_arch_remove_sw_breakpoint(current_env, bp) != 0) {
1097 /* Try harder to find a CPU that currently sees the breakpoint. */
1098 for (env = first_cpu; env != NULL; env = env->next_cpu) {
1099 if (kvm_arch_remove_sw_breakpoint(env, bp) == 0)
1100 break;
1104 kvm_arch_remove_all_hw_breakpoints();
1106 for (env = first_cpu; env != NULL; env = env->next_cpu)
1107 kvm_update_guest_debug(env, 0);
1110 #else /* !KVM_CAP_SET_GUEST_DEBUG */
1112 int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
1114 return -EINVAL;
1117 int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
1118 target_ulong len, int type)
1120 return -EINVAL;
1123 int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr,
1124 target_ulong len, int type)
1126 return -EINVAL;
1129 void kvm_remove_all_breakpoints(CPUState *current_env)
1132 #endif /* !KVM_CAP_SET_GUEST_DEBUG */
1135 * dirty pages logging
1137 /* FIXME: use unsigned long pointer instead of unsigned char */
1138 unsigned char *kvm_dirty_bitmap = NULL;
1139 int kvm_physical_memory_set_dirty_tracking(int enable)
1141 int r = 0;
1143 if (!kvm_enabled())
1144 return 0;
1146 if (enable) {
1147 if (!kvm_dirty_bitmap) {
1148 unsigned bitmap_size = BITMAP_SIZE(phys_ram_size);
1149 kvm_dirty_bitmap = qemu_malloc(bitmap_size);
1150 if (kvm_dirty_bitmap == NULL) {
1151 perror("Failed to allocate dirty pages bitmap");
1152 r=-1;
1154 else {
1155 r = kvm_dirty_pages_log_enable_all(kvm_context);
1159 else {
1160 if (kvm_dirty_bitmap) {
1161 r = kvm_dirty_pages_log_reset(kvm_context);
1162 qemu_free(kvm_dirty_bitmap);
1163 kvm_dirty_bitmap = NULL;
1166 return r;
1169 /* get kvm's dirty pages bitmap and update qemu's */
1170 int kvm_get_dirty_pages_log_range(unsigned long start_addr,
1171 unsigned char *bitmap,
1172 unsigned int offset,
1173 unsigned long mem_size)
1175 unsigned int i, j, n=0;
1176 unsigned char c;
1177 unsigned long page_number, addr, addr1;
1178 ram_addr_t ram_addr;
1179 unsigned int len = ((mem_size/TARGET_PAGE_SIZE) + 7) / 8;
1182 * bitmap-traveling is faster than memory-traveling (for addr...)
1183 * especially when most of the memory is not dirty.
1185 for (i=0; i<len; i++) {
1186 c = bitmap[i];
1187 while (c>0) {
1188 j = ffsl(c) - 1;
1189 c &= ~(1u<<j);
1190 page_number = i * 8 + j;
1191 addr1 = page_number * TARGET_PAGE_SIZE;
1192 addr = offset + addr1;
1193 ram_addr = cpu_get_physical_page_desc(addr);
1194 cpu_physical_memory_set_dirty(ram_addr);
1195 n++;
1198 return 0;
1200 int kvm_get_dirty_bitmap_cb(unsigned long start, unsigned long len,
1201 void *bitmap, void *opaque)
1203 return kvm_get_dirty_pages_log_range(start, bitmap, start, len);
1207 * get kvm's dirty pages bitmap and update qemu's
1208 * we only care about physical ram, which resides in slots 0 and 3
1210 int kvm_update_dirty_pages_log(void)
1212 int r = 0;
1215 r = kvm_get_dirty_pages_range(kvm_context, 0, phys_ram_size,
1216 kvm_dirty_bitmap, NULL,
1217 kvm_get_dirty_bitmap_cb);
1218 return r;
1221 void kvm_qemu_log_memory(target_phys_addr_t start, target_phys_addr_t size,
1222 int log)
1224 if (log)
1225 kvm_dirty_pages_log_enable_slot(kvm_context, start, size);
1226 else {
1227 #ifdef TARGET_I386
1228 if (must_use_aliases_target(start))
1229 return;
1230 #endif
1231 kvm_dirty_pages_log_disable_slot(kvm_context, start, size);
1235 int kvm_get_phys_ram_page_bitmap(unsigned char *bitmap)
1237 unsigned int bsize = BITMAP_SIZE(phys_ram_size);
1238 unsigned int brsize = BITMAP_SIZE(ram_size);
1239 unsigned int extra_pages = (phys_ram_size - ram_size) / TARGET_PAGE_SIZE;
1240 unsigned int extra_bytes = (extra_pages +7)/8;
1241 unsigned int hole_start = BITMAP_SIZE(0xa0000);
1242 unsigned int hole_end = BITMAP_SIZE(0xc0000);
1244 memset(bitmap, 0xFF, brsize + extra_bytes);
1245 memset(bitmap + hole_start, 0, hole_end - hole_start);
1246 memset(bitmap + brsize + extra_bytes, 0, bsize - brsize - extra_bytes);
1248 return 0;
1251 #ifdef KVM_CAP_IRQCHIP
1253 int kvm_set_irq(int irq, int level)
1255 return kvm_set_irq_level(kvm_context, irq, level);
1258 #endif
1260 int qemu_kvm_get_dirty_pages(unsigned long phys_addr, void *buf)
1262 return kvm_get_dirty_pages(kvm_context, phys_addr, buf);
1265 void *kvm_cpu_create_phys_mem(target_phys_addr_t start_addr,
1266 unsigned long size, int log, int writable)
1268 return kvm_create_phys_mem(kvm_context, start_addr, size, log, writable);
1271 void kvm_cpu_destroy_phys_mem(target_phys_addr_t start_addr,
1272 unsigned long size)
1274 kvm_destroy_phys_mem(kvm_context, start_addr, size);
1277 void kvm_mutex_unlock(void)
1279 assert(!cpu_single_env);
1280 pthread_mutex_unlock(&qemu_mutex);
1283 void kvm_mutex_lock(void)
1285 pthread_mutex_lock(&qemu_mutex);
1286 cpu_single_env = NULL;
1289 int qemu_kvm_register_coalesced_mmio(target_phys_addr_t addr, unsigned int size)
1291 return kvm_register_coalesced_mmio(kvm_context, addr, size);
1294 int qemu_kvm_unregister_coalesced_mmio(target_phys_addr_t addr,
1295 unsigned int size)
1297 return kvm_unregister_coalesced_mmio(kvm_context, addr, size);
1300 int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
1302 return kvm_register_coalesced_mmio(kvm_context, start, size);
1305 int kvm_uncoalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
1307 return kvm_unregister_coalesced_mmio(kvm_context, start, size);
1310 #ifdef USE_KVM_DEVICE_ASSIGNMENT
1311 void kvm_add_ioperm_data(struct ioperm_data *data)
1313 LIST_INSERT_HEAD(&ioperm_head, data, entries);
1316 void kvm_ioperm(CPUState *env, void *data)
1318 if (kvm_enabled() && qemu_system_ready)
1319 on_vcpu(env, kvm_arch_do_ioperm, data);
1322 #endif
1324 void kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr, target_phys_addr_t end_addr)
1326 #ifndef TARGET_IA64
1327 void *buf;
1329 #ifdef TARGET_I386
1330 if (must_use_aliases_source(start_addr))
1331 return;
1332 #endif
1334 buf = qemu_malloc((end_addr - start_addr) / 8 + 2);
1335 kvm_get_dirty_pages_range(kvm_context, start_addr, end_addr - start_addr,
1336 buf, NULL, kvm_get_dirty_bitmap_cb);
1337 qemu_free(buf);
1338 #endif
1341 int kvm_log_start(target_phys_addr_t phys_addr, target_phys_addr_t len)
1343 #ifdef TARGET_I386
1344 if (must_use_aliases_source(phys_addr))
1345 return 0;
1346 #endif
1347 kvm_qemu_log_memory(phys_addr, len, 1);
1348 return 0;
1351 int kvm_log_stop(target_phys_addr_t phys_addr, target_phys_addr_t len)
1353 #ifdef TARGET_I386
1354 if (must_use_aliases_source(phys_addr))
1355 return 0;
1356 #endif
1357 kvm_qemu_log_memory(phys_addr, len, 0);
1358 return 0;
1361 /* hack: both libkvm and upstream qemu define kvm_has_sync_mmu(), differently */
1362 #undef kvm_has_sync_mmu
1363 int qemu_kvm_has_sync_mmu(void)
1365 return kvm_has_sync_mmu(kvm_context);
1368 void qemu_kvm_cpu_stop(CPUState *env)
1370 if (kvm_enabled())
1371 env->kvm_cpu_state.stopped = 1;