Device-assignment: add assigned_dev_update_irq() prototype
[qemu-kvm/fedora.git] / qemu-kvm.c
blobf4c92fb5ad0751def6f4dc0892a75f385ddffe0c
1 /*
2 * qemu/kvm integration
4 * Copyright (C) 2006-2008 Qumranet Technologies
6 * Licensed under the terms of the GNU GPL version 2 or higher.
7 */
8 #include "config.h"
9 #include "config-host.h"
11 #include <assert.h>
12 #include <string.h>
13 #include "hw/hw.h"
14 #include "sysemu.h"
15 #include "qemu-common.h"
16 #include "console.h"
17 #include "block.h"
18 #include "compatfd.h"
19 #include "gdbstub.h"
21 #include "qemu-kvm.h"
22 #include <libkvm.h>
23 #include <pthread.h>
24 #include <sys/utsname.h>
25 #include <sys/syscall.h>
26 #include <sys/mman.h>
28 #define false 0
29 #define true 1
31 extern void perror(const char *s);
33 int kvm_allowed = 1;
34 int kvm_irqchip = 1;
35 int kvm_pit = 1;
36 int kvm_pit_reinject = 1;
37 int kvm_nested = 0;
38 kvm_context_t kvm_context;
40 extern int smp_cpus;
42 pthread_mutex_t qemu_mutex = PTHREAD_MUTEX_INITIALIZER;
43 pthread_cond_t qemu_vcpu_cond = PTHREAD_COND_INITIALIZER;
44 pthread_cond_t qemu_system_cond = PTHREAD_COND_INITIALIZER;
45 pthread_cond_t qemu_pause_cond = PTHREAD_COND_INITIALIZER;
46 pthread_cond_t qemu_work_cond = PTHREAD_COND_INITIALIZER;
47 __thread struct CPUState *current_env;
49 static int qemu_system_ready;
51 #define SIG_IPI (SIGRTMIN+4)
53 pthread_t io_thread;
54 static int io_thread_fd = -1;
55 static int io_thread_sigfd = -1;
57 static CPUState *kvm_debug_cpu_requested;
59 /* The list of ioperm_data */
60 static LIST_HEAD(, ioperm_data) ioperm_head;
62 static inline unsigned long kvm_get_thread_id(void)
64 return syscall(SYS_gettid);
67 static void qemu_cond_wait(pthread_cond_t *cond)
69 CPUState *env = cpu_single_env;
70 static const struct timespec ts = {
71 .tv_sec = 0,
72 .tv_nsec = 100000,
75 pthread_cond_timedwait(cond, &qemu_mutex, &ts);
76 cpu_single_env = env;
79 static void sig_ipi_handler(int n)
83 static void on_vcpu(CPUState *env, void (*func)(void *data), void *data)
85 struct qemu_work_item wi;
87 if (env == current_env) {
88 func(data);
89 return;
92 wi.func = func;
93 wi.data = data;
94 if (!env->kvm_cpu_state.queued_work_first)
95 env->kvm_cpu_state.queued_work_first = &wi;
96 else
97 env->kvm_cpu_state.queued_work_last->next = &wi;
98 env->kvm_cpu_state.queued_work_last = &wi;
99 wi.next = NULL;
100 wi.done = false;
102 pthread_kill(env->kvm_cpu_state.thread, SIG_IPI);
103 while (!wi.done)
104 qemu_cond_wait(&qemu_work_cond);
107 static void inject_interrupt(void *data)
109 cpu_interrupt(current_env, (int)data);
112 void kvm_inject_interrupt(CPUState *env, int mask)
114 on_vcpu(env, inject_interrupt, (void *)mask);
117 void kvm_update_interrupt_request(CPUState *env)
119 int signal = 0;
121 if (env) {
122 if (!current_env || !current_env->kvm_cpu_state.created)
123 signal = 1;
125 * Testing for created here is really redundant
127 if (current_env && current_env->kvm_cpu_state.created &&
128 env != current_env && !env->kvm_cpu_state.signalled)
129 signal = 1;
131 if (signal) {
132 env->kvm_cpu_state.signalled = 1;
133 if (env->kvm_cpu_state.thread)
134 pthread_kill(env->kvm_cpu_state.thread, SIG_IPI);
139 void kvm_update_after_sipi(CPUState *env)
141 env->kvm_cpu_state.sipi_needed = 1;
142 kvm_update_interrupt_request(env);
145 void kvm_apic_init(CPUState *env)
147 if (env->cpu_index != 0)
148 env->kvm_cpu_state.init = 1;
149 kvm_update_interrupt_request(env);
152 #include <signal.h>
154 static int try_push_interrupts(void *opaque)
156 return kvm_arch_try_push_interrupts(opaque);
159 static void post_kvm_run(void *opaque, void *data)
161 CPUState *env = (CPUState *)data;
163 pthread_mutex_lock(&qemu_mutex);
164 kvm_arch_post_kvm_run(opaque, env);
167 static int pre_kvm_run(void *opaque, void *data)
169 CPUState *env = (CPUState *)data;
171 kvm_arch_pre_kvm_run(opaque, env);
173 if (env->interrupt_request & CPU_INTERRUPT_EXIT)
174 return 1;
175 pthread_mutex_unlock(&qemu_mutex);
176 return 0;
179 static void kvm_do_load_registers(void *_env)
181 CPUState *env = _env;
183 kvm_arch_load_regs(env);
186 void kvm_load_registers(CPUState *env)
188 if (kvm_enabled() && qemu_system_ready)
189 on_vcpu(env, kvm_do_load_registers, env);
192 static void kvm_do_save_registers(void *_env)
194 CPUState *env = _env;
196 kvm_arch_save_regs(env);
199 void kvm_save_registers(CPUState *env)
201 if (kvm_enabled())
202 on_vcpu(env, kvm_do_save_registers, env);
205 int kvm_cpu_exec(CPUState *env)
207 int r;
209 r = kvm_run(kvm_context, env->cpu_index, env);
210 if (r < 0) {
211 printf("kvm_run returned %d\n", r);
212 exit(1);
215 return 0;
218 extern int vm_running;
220 static int has_work(CPUState *env)
222 if (!vm_running || (env && env->kvm_cpu_state.stopped))
223 return 0;
224 if (!env->halted)
225 return 1;
226 return kvm_arch_has_work(env);
229 static void flush_queued_work(CPUState *env)
231 struct qemu_work_item *wi;
233 if (!env->kvm_cpu_state.queued_work_first)
234 return;
236 while ((wi = env->kvm_cpu_state.queued_work_first)) {
237 env->kvm_cpu_state.queued_work_first = wi->next;
238 wi->func(wi->data);
239 wi->done = true;
241 env->kvm_cpu_state.queued_work_last = NULL;
242 pthread_cond_broadcast(&qemu_work_cond);
245 static void kvm_main_loop_wait(CPUState *env, int timeout)
247 struct timespec ts;
248 int r, e;
249 siginfo_t siginfo;
250 sigset_t waitset;
252 pthread_mutex_unlock(&qemu_mutex);
254 ts.tv_sec = timeout / 1000;
255 ts.tv_nsec = (timeout % 1000) * 1000000;
256 sigemptyset(&waitset);
257 sigaddset(&waitset, SIG_IPI);
259 r = sigtimedwait(&waitset, &siginfo, &ts);
260 e = errno;
262 pthread_mutex_lock(&qemu_mutex);
264 if (r == -1 && !(e == EAGAIN || e == EINTR)) {
265 printf("sigtimedwait: %s\n", strerror(e));
266 exit(1);
269 cpu_single_env = env;
270 flush_queued_work(env);
272 if (env->kvm_cpu_state.stop) {
273 env->kvm_cpu_state.stop = 0;
274 env->kvm_cpu_state.stopped = 1;
275 pthread_cond_signal(&qemu_pause_cond);
278 env->kvm_cpu_state.signalled = 0;
281 static int all_threads_paused(void)
283 CPUState *penv = first_cpu;
285 while (penv) {
286 if (penv->kvm_cpu_state.stop)
287 return 0;
288 penv = (CPUState *)penv->next_cpu;
291 return 1;
294 static void pause_all_threads(void)
296 CPUState *penv = first_cpu;
298 assert(!cpu_single_env);
300 while (penv) {
301 penv->kvm_cpu_state.stop = 1;
302 pthread_kill(penv->kvm_cpu_state.thread, SIG_IPI);
303 penv = (CPUState *)penv->next_cpu;
306 while (!all_threads_paused())
307 qemu_cond_wait(&qemu_pause_cond);
310 static void resume_all_threads(void)
312 CPUState *penv = first_cpu;
314 assert(!cpu_single_env);
316 while (penv) {
317 penv->kvm_cpu_state.stop = 0;
318 penv->kvm_cpu_state.stopped = 0;
319 pthread_kill(penv->kvm_cpu_state.thread, SIG_IPI);
320 penv = (CPUState *)penv->next_cpu;
324 static void kvm_vm_state_change_handler(void *context, int running)
326 if (running)
327 resume_all_threads();
328 else
329 pause_all_threads();
332 static void update_regs_for_sipi(CPUState *env)
334 kvm_arch_update_regs_for_sipi(env);
335 env->kvm_cpu_state.sipi_needed = 0;
338 static void update_regs_for_init(CPUState *env)
340 #ifdef TARGET_I386
341 SegmentCache cs = env->segs[R_CS];
342 #endif
344 cpu_reset(env);
346 #ifdef TARGET_I386
347 /* restore SIPI vector */
348 if(env->kvm_cpu_state.sipi_needed)
349 env->segs[R_CS] = cs;
350 #endif
352 env->kvm_cpu_state.init = 0;
353 kvm_arch_load_regs(env);
356 static void setup_kernel_sigmask(CPUState *env)
358 sigset_t set;
360 sigemptyset(&set);
361 sigaddset(&set, SIGUSR2);
362 sigaddset(&set, SIGIO);
363 sigaddset(&set, SIGALRM);
364 sigprocmask(SIG_BLOCK, &set, NULL);
366 sigprocmask(SIG_BLOCK, NULL, &set);
367 sigdelset(&set, SIG_IPI);
369 kvm_set_signal_mask(kvm_context, env->cpu_index, &set);
372 void qemu_kvm_system_reset(void)
374 CPUState *penv = first_cpu;
376 pause_all_threads();
378 qemu_system_reset();
380 while (penv) {
381 kvm_arch_cpu_reset(penv);
382 penv = (CPUState *)penv->next_cpu;
385 resume_all_threads();
388 static int kvm_main_loop_cpu(CPUState *env)
390 setup_kernel_sigmask(env);
392 pthread_mutex_lock(&qemu_mutex);
393 if (kvm_irqchip_in_kernel(kvm_context))
394 env->halted = 0;
396 kvm_qemu_init_env(env);
397 #ifdef TARGET_I386
398 kvm_tpr_vcpu_start(env);
399 #endif
401 cpu_single_env = env;
402 kvm_load_registers(env);
404 while (1) {
405 while (!has_work(env))
406 kvm_main_loop_wait(env, 1000);
407 if (env->interrupt_request & (CPU_INTERRUPT_HARD | CPU_INTERRUPT_NMI))
408 env->halted = 0;
409 if (!kvm_irqchip_in_kernel(kvm_context)) {
410 if (env->kvm_cpu_state.init)
411 update_regs_for_init(env);
412 if (env->kvm_cpu_state.sipi_needed)
413 update_regs_for_sipi(env);
415 if (!env->halted && !env->kvm_cpu_state.init)
416 kvm_cpu_exec(env);
417 env->interrupt_request &= ~CPU_INTERRUPT_EXIT;
418 kvm_main_loop_wait(env, 0);
420 pthread_mutex_unlock(&qemu_mutex);
421 return 0;
424 static void *ap_main_loop(void *_env)
426 CPUState *env = _env;
427 sigset_t signals;
428 struct ioperm_data *data = NULL;
430 current_env = env;
431 env->thread_id = kvm_get_thread_id();
432 sigfillset(&signals);
433 sigprocmask(SIG_BLOCK, &signals, NULL);
434 kvm_create_vcpu(kvm_context, env->cpu_index);
435 kvm_qemu_init_env(env);
437 #ifdef USE_KVM_DEVICE_ASSIGNMENT
438 /* do ioperm for io ports of assigned devices */
439 LIST_FOREACH(data, &ioperm_head, entries)
440 on_vcpu(env, kvm_arch_do_ioperm, data);
441 #endif
443 /* signal VCPU creation */
444 pthread_mutex_lock(&qemu_mutex);
445 current_env->kvm_cpu_state.created = 1;
446 pthread_cond_signal(&qemu_vcpu_cond);
448 /* and wait for machine initialization */
449 while (!qemu_system_ready)
450 qemu_cond_wait(&qemu_system_cond);
451 pthread_mutex_unlock(&qemu_mutex);
453 kvm_main_loop_cpu(env);
454 return NULL;
457 void kvm_init_vcpu(CPUState *env)
459 int cpu = env->cpu_index;
460 pthread_create(&env->kvm_cpu_state.thread, NULL, ap_main_loop, env);
462 while (env->kvm_cpu_state.created == 0)
463 qemu_cond_wait(&qemu_vcpu_cond);
466 int kvm_init_ap(void)
468 #ifdef TARGET_I386
469 kvm_tpr_opt_setup();
470 #endif
471 qemu_add_vm_change_state_handler(kvm_vm_state_change_handler, NULL);
473 signal(SIG_IPI, sig_ipi_handler);
474 return 0;
477 void qemu_kvm_notify_work(void)
479 uint64_t value = 1;
480 char buffer[8];
481 size_t offset = 0;
483 if (io_thread_fd == -1)
484 return;
486 memcpy(buffer, &value, sizeof(value));
488 while (offset < 8) {
489 ssize_t len;
491 len = write(io_thread_fd, buffer + offset, 8 - offset);
492 if (len == -1 && errno == EINTR)
493 continue;
495 if (len <= 0)
496 break;
498 offset += len;
501 if (offset != 8)
502 fprintf(stderr, "failed to notify io thread\n");
505 /* If we have signalfd, we mask out the signals we want to handle and then
506 * use signalfd to listen for them. We rely on whatever the current signal
507 * handler is to dispatch the signals when we receive them.
510 static void sigfd_handler(void *opaque)
512 int fd = (unsigned long)opaque;
513 struct qemu_signalfd_siginfo info;
514 struct sigaction action;
515 ssize_t len;
517 while (1) {
518 do {
519 len = read(fd, &info, sizeof(info));
520 } while (len == -1 && errno == EINTR);
522 if (len == -1 && errno == EAGAIN)
523 break;
525 if (len != sizeof(info)) {
526 printf("read from sigfd returned %ld: %m\n", len);
527 return;
530 sigaction(info.ssi_signo, NULL, &action);
531 if (action.sa_handler)
532 action.sa_handler(info.ssi_signo);
537 /* Used to break IO thread out of select */
538 static void io_thread_wakeup(void *opaque)
540 int fd = (unsigned long)opaque;
541 char buffer[8];
542 size_t offset = 0;
544 while (offset < 8) {
545 ssize_t len;
547 len = read(fd, buffer + offset, 8 - offset);
548 if (len == -1 && errno == EINTR)
549 continue;
551 if (len <= 0)
552 break;
554 offset += len;
558 int kvm_main_loop(void)
560 int fds[2];
561 sigset_t mask;
562 int sigfd;
564 io_thread = pthread_self();
565 qemu_system_ready = 1;
567 if (qemu_eventfd(fds) == -1) {
568 fprintf(stderr, "failed to create eventfd\n");
569 return -errno;
572 qemu_set_fd_handler2(fds[0], NULL, io_thread_wakeup, NULL,
573 (void *)(unsigned long)fds[0]);
575 io_thread_fd = fds[1];
577 sigemptyset(&mask);
578 sigaddset(&mask, SIGIO);
579 sigaddset(&mask, SIGALRM);
580 sigprocmask(SIG_BLOCK, &mask, NULL);
582 sigfd = qemu_signalfd(&mask);
583 if (sigfd == -1) {
584 fprintf(stderr, "failed to create signalfd\n");
585 return -errno;
588 fcntl(sigfd, F_SETFL, O_NONBLOCK);
590 qemu_set_fd_handler2(sigfd, NULL, sigfd_handler, NULL,
591 (void *)(unsigned long)sigfd);
593 pthread_cond_broadcast(&qemu_system_cond);
595 io_thread_sigfd = sigfd;
596 cpu_single_env = NULL;
598 while (1) {
599 main_loop_wait(1000);
600 if (qemu_shutdown_requested())
601 break;
602 else if (qemu_powerdown_requested())
603 qemu_system_powerdown();
604 else if (qemu_reset_requested())
605 qemu_kvm_system_reset();
606 #ifdef CONFIG_GDBSTUB
607 else if (kvm_debug_cpu_requested) {
608 gdb_set_stop_cpu(kvm_debug_cpu_requested);
609 vm_stop(EXCP_DEBUG);
610 kvm_debug_cpu_requested = NULL;
612 #endif
615 pause_all_threads();
616 pthread_mutex_unlock(&qemu_mutex);
618 return 0;
621 #ifdef KVM_CAP_SET_GUEST_DEBUG
622 int kvm_debug(void *opaque, void *data, struct kvm_debug_exit_arch *arch_info)
624 int handle = kvm_arch_debug(arch_info);
625 struct CPUState *env = data;
627 if (handle) {
628 kvm_debug_cpu_requested = env;
629 env->kvm_cpu_state.stopped = 1;
631 return handle;
633 #endif
635 static int kvm_inb(void *opaque, uint16_t addr, uint8_t *data)
637 *data = cpu_inb(0, addr);
638 return 0;
641 static int kvm_inw(void *opaque, uint16_t addr, uint16_t *data)
643 *data = cpu_inw(0, addr);
644 return 0;
647 static int kvm_inl(void *opaque, uint16_t addr, uint32_t *data)
649 *data = cpu_inl(0, addr);
650 return 0;
653 #define PM_IO_BASE 0xb000
655 static int kvm_outb(void *opaque, uint16_t addr, uint8_t data)
657 if (addr == 0xb2) {
658 switch (data) {
659 case 0: {
660 cpu_outb(0, 0xb3, 0);
661 break;
663 case 0xf0: {
664 unsigned x;
666 /* enable acpi */
667 x = cpu_inw(0, PM_IO_BASE + 4);
668 x &= ~1;
669 cpu_outw(0, PM_IO_BASE + 4, x);
670 break;
672 case 0xf1: {
673 unsigned x;
675 /* enable acpi */
676 x = cpu_inw(0, PM_IO_BASE + 4);
677 x |= 1;
678 cpu_outw(0, PM_IO_BASE + 4, x);
679 break;
681 default:
682 break;
684 return 0;
686 cpu_outb(0, addr, data);
687 return 0;
690 static int kvm_outw(void *opaque, uint16_t addr, uint16_t data)
692 cpu_outw(0, addr, data);
693 return 0;
696 static int kvm_outl(void *opaque, uint16_t addr, uint32_t data)
698 cpu_outl(0, addr, data);
699 return 0;
702 static int kvm_mmio_read(void *opaque, uint64_t addr, uint8_t *data, int len)
704 cpu_physical_memory_rw(addr, data, len, 0);
705 return 0;
708 static int kvm_mmio_write(void *opaque, uint64_t addr, uint8_t *data, int len)
710 cpu_physical_memory_rw(addr, data, len, 1);
711 return 0;
714 static int kvm_io_window(void *opaque)
716 return 1;
720 static int kvm_halt(void *opaque, int vcpu)
722 return kvm_arch_halt(opaque, vcpu);
725 static int kvm_shutdown(void *opaque, void *data)
727 struct CPUState *env = (struct CPUState *)data;
729 /* stop the current vcpu from going back to guest mode */
730 env->kvm_cpu_state.stopped = 1;
732 qemu_system_reset_request();
733 return 1;
736 static struct kvm_callbacks qemu_kvm_ops = {
737 #ifdef KVM_CAP_SET_GUEST_DEBUG
738 .debug = kvm_debug,
739 #endif
740 .inb = kvm_inb,
741 .inw = kvm_inw,
742 .inl = kvm_inl,
743 .outb = kvm_outb,
744 .outw = kvm_outw,
745 .outl = kvm_outl,
746 .mmio_read = kvm_mmio_read,
747 .mmio_write = kvm_mmio_write,
748 .halt = kvm_halt,
749 .shutdown = kvm_shutdown,
750 .io_window = kvm_io_window,
751 .try_push_interrupts = try_push_interrupts,
752 #ifdef KVM_CAP_USER_NMI
753 .push_nmi = kvm_arch_push_nmi,
754 #endif
755 .post_kvm_run = post_kvm_run,
756 .pre_kvm_run = pre_kvm_run,
757 #ifdef TARGET_I386
758 .tpr_access = handle_tpr_access,
759 #endif
760 #ifdef TARGET_PPC
761 .powerpc_dcr_read = handle_powerpc_dcr_read,
762 .powerpc_dcr_write = handle_powerpc_dcr_write,
763 #endif
766 int kvm_qemu_init()
768 /* Try to initialize kvm */
769 kvm_context = kvm_init(&qemu_kvm_ops, cpu_single_env);
770 if (!kvm_context) {
771 return -1;
773 pthread_mutex_lock(&qemu_mutex);
775 return 0;
778 #ifdef TARGET_I386
779 static int destroy_region_works = 0;
780 #endif
782 int kvm_qemu_create_context(void)
784 int r;
785 if (!kvm_irqchip) {
786 kvm_disable_irqchip_creation(kvm_context);
788 if (!kvm_pit) {
789 kvm_disable_pit_creation(kvm_context);
791 if (kvm_create(kvm_context, phys_ram_size, (void**)&phys_ram_base) < 0) {
792 kvm_qemu_destroy();
793 return -1;
795 r = kvm_arch_qemu_create_context();
796 if(r <0)
797 kvm_qemu_destroy();
798 if (kvm_pit && !kvm_pit_reinject) {
799 if (kvm_reinject_control(kvm_context, 0)) {
800 fprintf(stderr, "failure to disable in-kernel PIT reinjection\n");
801 return -1;
804 #ifdef TARGET_I386
805 destroy_region_works = kvm_destroy_memory_region_works(kvm_context);
806 #endif
807 return 0;
810 void kvm_qemu_destroy(void)
812 kvm_finalize(kvm_context);
815 #ifdef TARGET_I386
816 static int must_use_aliases_source(target_phys_addr_t addr)
818 if (destroy_region_works)
819 return false;
820 if (addr == 0xa0000 || addr == 0xa8000)
821 return true;
822 return false;
825 static int must_use_aliases_target(target_phys_addr_t addr)
827 if (destroy_region_works)
828 return false;
829 if (addr >= 0xe0000000 && addr < 0x100000000ull)
830 return true;
831 return false;
834 static struct mapping {
835 target_phys_addr_t phys;
836 ram_addr_t ram;
837 ram_addr_t len;
838 } mappings[50];
839 static int nr_mappings;
841 static struct mapping *find_ram_mapping(ram_addr_t ram_addr)
843 struct mapping *p;
845 for (p = mappings; p < mappings + nr_mappings; ++p) {
846 if (p->ram <= ram_addr && ram_addr < p->ram + p->len) {
847 return p;
850 return NULL;
853 static struct mapping *find_mapping(target_phys_addr_t start_addr)
855 struct mapping *p;
857 for (p = mappings; p < mappings + nr_mappings; ++p) {
858 if (p->phys <= start_addr && start_addr < p->phys + p->len) {
859 return p;
862 return NULL;
865 static void drop_mapping(target_phys_addr_t start_addr)
867 struct mapping *p = find_mapping(start_addr);
869 if (p)
870 *p = mappings[--nr_mappings];
872 #endif
874 void kvm_cpu_register_physical_memory(target_phys_addr_t start_addr,
875 unsigned long size,
876 unsigned long phys_offset)
878 int r = 0;
879 unsigned long area_flags = phys_offset & ~TARGET_PAGE_MASK;
880 #ifdef TARGET_I386
881 struct mapping *p;
882 #endif
884 phys_offset &= ~IO_MEM_ROM;
886 if (area_flags == IO_MEM_UNASSIGNED) {
887 #ifdef TARGET_I386
888 if (must_use_aliases_source(start_addr)) {
889 kvm_destroy_memory_alias(kvm_context, start_addr);
890 return;
892 if (must_use_aliases_target(start_addr))
893 return;
894 #endif
895 kvm_unregister_memory_area(kvm_context, start_addr, size);
896 return;
899 r = kvm_is_containing_region(kvm_context, start_addr, size);
900 if (r)
901 return;
903 if (area_flags >= TLB_MMIO)
904 return;
906 #ifdef TARGET_I386
907 if (must_use_aliases_source(start_addr)) {
908 p = find_ram_mapping(phys_offset);
909 if (p) {
910 kvm_create_memory_alias(kvm_context, start_addr, size,
911 p->phys + (phys_offset - p->ram));
913 return;
915 #endif
917 r = kvm_register_phys_mem(kvm_context, start_addr,
918 phys_ram_base + phys_offset,
919 size, 0);
920 if (r < 0) {
921 printf("kvm_cpu_register_physical_memory: failed\n");
922 exit(1);
925 #ifdef TARGET_I386
926 drop_mapping(start_addr);
927 p = &mappings[nr_mappings++];
928 p->phys = start_addr;
929 p->ram = phys_offset;
930 p->len = size;
931 #endif
933 return;
936 void kvm_cpu_unregister_physical_memory(target_phys_addr_t start_addr,
937 target_phys_addr_t size,
938 unsigned long phys_offset)
940 kvm_unregister_memory_area(kvm_context, start_addr, size);
943 int kvm_setup_guest_memory(void *area, unsigned long size)
945 int ret = 0;
947 #ifdef MADV_DONTFORK
948 if (kvm_enabled() && !kvm_has_sync_mmu())
949 ret = madvise(area, size, MADV_DONTFORK);
950 #endif
952 if (ret)
953 perror ("madvise");
955 return ret;
958 int kvm_qemu_check_extension(int ext)
960 return kvm_check_extension(kvm_context, ext);
963 int kvm_qemu_init_env(CPUState *cenv)
965 return kvm_arch_qemu_init_env(cenv);
968 #ifdef KVM_CAP_SET_GUEST_DEBUG
969 struct kvm_sw_breakpoint_head kvm_sw_breakpoints =
970 TAILQ_HEAD_INITIALIZER(kvm_sw_breakpoints);
972 struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(target_ulong pc)
974 struct kvm_sw_breakpoint *bp;
976 TAILQ_FOREACH(bp, &kvm_sw_breakpoints, entry) {
977 if (bp->pc == pc)
978 return bp;
980 return NULL;
983 struct kvm_set_guest_debug_data {
984 struct kvm_guest_debug dbg;
985 int err;
988 void kvm_invoke_set_guest_debug(void *data)
990 struct kvm_set_guest_debug_data *dbg_data = data;
992 dbg_data->err = kvm_set_guest_debug(kvm_context, cpu_single_env->cpu_index,
993 &dbg_data->dbg);
996 int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
998 struct kvm_set_guest_debug_data data;
1000 data.dbg.control = 0;
1001 if (env->singlestep_enabled)
1002 data.dbg.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
1004 kvm_arch_update_guest_debug(env, &data.dbg);
1005 data.dbg.control |= reinject_trap;
1007 on_vcpu(env, kvm_invoke_set_guest_debug, &data);
1008 return data.err;
1011 int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
1012 target_ulong len, int type)
1014 struct kvm_sw_breakpoint *bp;
1015 CPUState *env;
1016 int err;
1018 if (type == GDB_BREAKPOINT_SW) {
1019 bp = kvm_find_sw_breakpoint(addr);
1020 if (bp) {
1021 bp->use_count++;
1022 return 0;
1025 bp = qemu_malloc(sizeof(struct kvm_sw_breakpoint));
1026 if (!bp)
1027 return -ENOMEM;
1029 bp->pc = addr;
1030 bp->use_count = 1;
1031 err = kvm_arch_insert_sw_breakpoint(current_env, bp);
1032 if (err) {
1033 free(bp);
1034 return err;
1037 TAILQ_INSERT_HEAD(&kvm_sw_breakpoints, bp, entry);
1038 } else {
1039 err = kvm_arch_insert_hw_breakpoint(addr, len, type);
1040 if (err)
1041 return err;
1044 for (env = first_cpu; env != NULL; env = env->next_cpu) {
1045 err = kvm_update_guest_debug(env, 0);
1046 if (err)
1047 return err;
1049 return 0;
1052 int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr,
1053 target_ulong len, int type)
1055 struct kvm_sw_breakpoint *bp;
1056 CPUState *env;
1057 int err;
1059 if (type == GDB_BREAKPOINT_SW) {
1060 bp = kvm_find_sw_breakpoint(addr);
1061 if (!bp)
1062 return -ENOENT;
1064 if (bp->use_count > 1) {
1065 bp->use_count--;
1066 return 0;
1069 err = kvm_arch_remove_sw_breakpoint(current_env, bp);
1070 if (err)
1071 return err;
1073 TAILQ_REMOVE(&kvm_sw_breakpoints, bp, entry);
1074 qemu_free(bp);
1075 } else {
1076 err = kvm_arch_remove_hw_breakpoint(addr, len, type);
1077 if (err)
1078 return err;
1081 for (env = first_cpu; env != NULL; env = env->next_cpu) {
1082 err = kvm_update_guest_debug(env, 0);
1083 if (err)
1084 return err;
1086 return 0;
1089 void kvm_remove_all_breakpoints(CPUState *current_env)
1091 struct kvm_sw_breakpoint *bp, *next;
1092 CPUState *env;
1094 TAILQ_FOREACH_SAFE(bp, &kvm_sw_breakpoints, entry, next) {
1095 if (kvm_arch_remove_sw_breakpoint(current_env, bp) != 0) {
1096 /* Try harder to find a CPU that currently sees the breakpoint. */
1097 for (env = first_cpu; env != NULL; env = env->next_cpu) {
1098 if (kvm_arch_remove_sw_breakpoint(env, bp) == 0)
1099 break;
1103 kvm_arch_remove_all_hw_breakpoints();
1105 for (env = first_cpu; env != NULL; env = env->next_cpu)
1106 kvm_update_guest_debug(env, 0);
1109 #else /* !KVM_CAP_SET_GUEST_DEBUG */
1111 int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
1113 return -EINVAL;
1116 int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
1117 target_ulong len, int type)
1119 return -EINVAL;
1122 int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr,
1123 target_ulong len, int type)
1125 return -EINVAL;
1128 void kvm_remove_all_breakpoints(CPUState *current_env)
1131 #endif /* !KVM_CAP_SET_GUEST_DEBUG */
1134 * dirty pages logging
1136 /* FIXME: use unsigned long pointer instead of unsigned char */
1137 unsigned char *kvm_dirty_bitmap = NULL;
1138 int kvm_physical_memory_set_dirty_tracking(int enable)
1140 int r = 0;
1142 if (!kvm_enabled())
1143 return 0;
1145 if (enable) {
1146 if (!kvm_dirty_bitmap) {
1147 unsigned bitmap_size = BITMAP_SIZE(phys_ram_size);
1148 kvm_dirty_bitmap = qemu_malloc(bitmap_size);
1149 if (kvm_dirty_bitmap == NULL) {
1150 perror("Failed to allocate dirty pages bitmap");
1151 r=-1;
1153 else {
1154 r = kvm_dirty_pages_log_enable_all(kvm_context);
1158 else {
1159 if (kvm_dirty_bitmap) {
1160 r = kvm_dirty_pages_log_reset(kvm_context);
1161 qemu_free(kvm_dirty_bitmap);
1162 kvm_dirty_bitmap = NULL;
1165 return r;
1168 /* get kvm's dirty pages bitmap and update qemu's */
1169 int kvm_get_dirty_pages_log_range(unsigned long start_addr,
1170 unsigned char *bitmap,
1171 unsigned int offset,
1172 unsigned long mem_size)
1174 unsigned int i, j, n=0;
1175 unsigned char c;
1176 unsigned long page_number, addr, addr1;
1177 ram_addr_t ram_addr;
1178 unsigned int len = ((mem_size/TARGET_PAGE_SIZE) + 7) / 8;
1181 * bitmap-traveling is faster than memory-traveling (for addr...)
1182 * especially when most of the memory is not dirty.
1184 for (i=0; i<len; i++) {
1185 c = bitmap[i];
1186 while (c>0) {
1187 j = ffsl(c) - 1;
1188 c &= ~(1u<<j);
1189 page_number = i * 8 + j;
1190 addr1 = page_number * TARGET_PAGE_SIZE;
1191 addr = offset + addr1;
1192 ram_addr = cpu_get_physical_page_desc(addr);
1193 cpu_physical_memory_set_dirty(ram_addr);
1194 n++;
1197 return 0;
1199 int kvm_get_dirty_bitmap_cb(unsigned long start, unsigned long len,
1200 void *bitmap, void *opaque)
1202 return kvm_get_dirty_pages_log_range(start, bitmap, start, len);
1206 * get kvm's dirty pages bitmap and update qemu's
1207 * we only care about physical ram, which resides in slots 0 and 3
1209 int kvm_update_dirty_pages_log(void)
1211 int r = 0;
1214 r = kvm_get_dirty_pages_range(kvm_context, 0, phys_ram_size,
1215 kvm_dirty_bitmap, NULL,
1216 kvm_get_dirty_bitmap_cb);
1217 return r;
1220 void kvm_qemu_log_memory(target_phys_addr_t start, target_phys_addr_t size,
1221 int log)
1223 if (log)
1224 kvm_dirty_pages_log_enable_slot(kvm_context, start, size);
1225 else {
1226 #ifdef TARGET_I386
1227 if (must_use_aliases_target(start))
1228 return;
1229 #endif
1230 kvm_dirty_pages_log_disable_slot(kvm_context, start, size);
1234 int kvm_get_phys_ram_page_bitmap(unsigned char *bitmap)
1236 unsigned int bsize = BITMAP_SIZE(phys_ram_size);
1237 unsigned int brsize = BITMAP_SIZE(ram_size);
1238 unsigned int extra_pages = (phys_ram_size - ram_size) / TARGET_PAGE_SIZE;
1239 unsigned int extra_bytes = (extra_pages +7)/8;
1240 unsigned int hole_start = BITMAP_SIZE(0xa0000);
1241 unsigned int hole_end = BITMAP_SIZE(0xc0000);
1243 memset(bitmap, 0xFF, brsize + extra_bytes);
1244 memset(bitmap + hole_start, 0, hole_end - hole_start);
1245 memset(bitmap + brsize + extra_bytes, 0, bsize - brsize - extra_bytes);
1247 return 0;
1250 #ifdef KVM_CAP_IRQCHIP
1252 int kvm_set_irq(int irq, int level)
1254 return kvm_set_irq_level(kvm_context, irq, level);
1257 #endif
1259 int qemu_kvm_get_dirty_pages(unsigned long phys_addr, void *buf)
1261 return kvm_get_dirty_pages(kvm_context, phys_addr, buf);
1264 void *kvm_cpu_create_phys_mem(target_phys_addr_t start_addr,
1265 unsigned long size, int log, int writable)
1267 return kvm_create_phys_mem(kvm_context, start_addr, size, log, writable);
1270 void kvm_cpu_destroy_phys_mem(target_phys_addr_t start_addr,
1271 unsigned long size)
1273 kvm_destroy_phys_mem(kvm_context, start_addr, size);
1276 void kvm_mutex_unlock(void)
1278 assert(!cpu_single_env);
1279 pthread_mutex_unlock(&qemu_mutex);
1282 void kvm_mutex_lock(void)
1284 pthread_mutex_lock(&qemu_mutex);
1285 cpu_single_env = NULL;
1288 int qemu_kvm_register_coalesced_mmio(target_phys_addr_t addr, unsigned int size)
1290 return kvm_register_coalesced_mmio(kvm_context, addr, size);
1293 int qemu_kvm_unregister_coalesced_mmio(target_phys_addr_t addr,
1294 unsigned int size)
1296 return kvm_unregister_coalesced_mmio(kvm_context, addr, size);
1299 int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
1301 return kvm_register_coalesced_mmio(kvm_context, start, size);
1304 int kvm_uncoalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
1306 return kvm_unregister_coalesced_mmio(kvm_context, start, size);
1309 #ifdef USE_KVM_DEVICE_ASSIGNMENT
1310 void kvm_add_ioperm_data(struct ioperm_data *data)
1312 LIST_INSERT_HEAD(&ioperm_head, data, entries);
1315 void kvm_ioperm(CPUState *env, void *data)
1317 if (kvm_enabled() && qemu_system_ready)
1318 on_vcpu(env, kvm_arch_do_ioperm, data);
1321 #endif
1323 void kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr, target_phys_addr_t end_addr)
1325 #ifndef TARGET_IA64
1326 void *buf;
1328 #ifdef TARGET_I386
1329 if (must_use_aliases_source(start_addr))
1330 return;
1331 #endif
1333 buf = qemu_malloc((end_addr - start_addr) / 8 + 2);
1334 kvm_get_dirty_pages_range(kvm_context, start_addr, end_addr - start_addr,
1335 buf, NULL, kvm_get_dirty_bitmap_cb);
1336 qemu_free(buf);
1337 #endif
1340 int kvm_log_start(target_phys_addr_t phys_addr, target_phys_addr_t len)
1342 #ifdef TARGET_I386
1343 if (must_use_aliases_source(phys_addr))
1344 return 0;
1345 #endif
1346 kvm_qemu_log_memory(phys_addr, len, 1);
1347 return 0;
1350 int kvm_log_stop(target_phys_addr_t phys_addr, target_phys_addr_t len)
1352 #ifdef TARGET_I386
1353 if (must_use_aliases_source(phys_addr))
1354 return 0;
1355 #endif
1356 kvm_qemu_log_memory(phys_addr, len, 0);
1357 return 0;
1360 /* hack: both libkvm and upstream qemu define kvm_has_sync_mmu(), differently */
1361 #undef kvm_has_sync_mmu
1362 int qemu_kvm_has_sync_mmu(void)
1364 return kvm_has_sync_mmu(kvm_context);
1367 void qemu_kvm_cpu_stop(CPUState *env)
1369 if (kvm_enabled())
1370 env->kvm_cpu_state.stopped = 1;