virtio-net: Refuse loading if the source requires vnet header and the host doesn...
[qemu-kvm/aliguori.git] / qemu-kvm.c
blob10e2caa7c5a1191c9217161b7de57cdcdf1c313a
1 /*
2 * qemu/kvm integration
4 * Copyright (C) 2006-2008 Qumranet Technologies
6 * Licensed under the terms of the GNU GPL version 2 or higher.
7 */
8 #include "config.h"
9 #include "config-host.h"
11 #include <assert.h>
12 #include <string.h>
13 #include "hw/hw.h"
14 #include "sysemu.h"
15 #include "qemu-common.h"
16 #include "console.h"
17 #include "block.h"
18 #include "compatfd.h"
19 #include "gdbstub.h"
21 #include "qemu-kvm.h"
22 #include <libkvm.h>
23 #include <pthread.h>
24 #include <sys/utsname.h>
25 #include <sys/syscall.h>
26 #include <sys/mman.h>
28 #define false 0
29 #define true 1
31 int kvm_allowed = 1;
32 int kvm_irqchip = 1;
33 int kvm_pit = 1;
34 int kvm_pit_reinject = 1;
35 int kvm_nested = 0;
36 kvm_context_t kvm_context;
38 pthread_mutex_t qemu_mutex = PTHREAD_MUTEX_INITIALIZER;
39 pthread_cond_t qemu_vcpu_cond = PTHREAD_COND_INITIALIZER;
40 pthread_cond_t qemu_system_cond = PTHREAD_COND_INITIALIZER;
41 pthread_cond_t qemu_pause_cond = PTHREAD_COND_INITIALIZER;
42 pthread_cond_t qemu_work_cond = PTHREAD_COND_INITIALIZER;
43 __thread struct CPUState *current_env;
45 static int qemu_system_ready;
47 #define SIG_IPI (SIGRTMIN+4)
49 pthread_t io_thread;
50 static int io_thread_fd = -1;
51 static int io_thread_sigfd = -1;
53 static CPUState *kvm_debug_cpu_requested;
55 /* The list of ioperm_data */
56 static LIST_HEAD(, ioperm_data) ioperm_head;
58 static inline unsigned long kvm_get_thread_id(void)
60 return syscall(SYS_gettid);
63 static void qemu_cond_wait(pthread_cond_t *cond)
65 CPUState *env = cpu_single_env;
66 static const struct timespec ts = {
67 .tv_sec = 0,
68 .tv_nsec = 100000,
71 pthread_cond_timedwait(cond, &qemu_mutex, &ts);
72 cpu_single_env = env;
75 static void sig_ipi_handler(int n)
79 static void on_vcpu(CPUState *env, void (*func)(void *data), void *data)
81 struct qemu_work_item wi;
83 if (env == current_env) {
84 func(data);
85 return;
88 wi.func = func;
89 wi.data = data;
90 if (!env->kvm_cpu_state.queued_work_first)
91 env->kvm_cpu_state.queued_work_first = &wi;
92 else
93 env->kvm_cpu_state.queued_work_last->next = &wi;
94 env->kvm_cpu_state.queued_work_last = &wi;
95 wi.next = NULL;
96 wi.done = false;
98 pthread_kill(env->kvm_cpu_state.thread, SIG_IPI);
99 while (!wi.done)
100 qemu_cond_wait(&qemu_work_cond);
103 static void inject_interrupt(void *data)
105 cpu_interrupt(current_env, (long)data);
108 void kvm_inject_interrupt(CPUState *env, int mask)
110 on_vcpu(env, inject_interrupt, (void *)(long)mask);
113 void kvm_update_interrupt_request(CPUState *env)
115 int signal = 0;
117 if (env) {
118 if (!current_env || !current_env->kvm_cpu_state.created)
119 signal = 1;
121 * Testing for created here is really redundant
123 if (current_env && current_env->kvm_cpu_state.created &&
124 env != current_env && !env->kvm_cpu_state.signalled)
125 signal = 1;
127 if (signal) {
128 env->kvm_cpu_state.signalled = 1;
129 if (env->kvm_cpu_state.thread)
130 pthread_kill(env->kvm_cpu_state.thread, SIG_IPI);
135 void kvm_update_after_sipi(CPUState *env)
137 env->kvm_cpu_state.sipi_needed = 1;
138 kvm_update_interrupt_request(env);
141 void kvm_apic_init(CPUState *env)
143 if (env->cpu_index != 0)
144 env->kvm_cpu_state.init = 1;
145 kvm_update_interrupt_request(env);
148 #include <signal.h>
150 static int try_push_interrupts(void *opaque)
152 return kvm_arch_try_push_interrupts(opaque);
155 static void post_kvm_run(void *opaque, void *data)
157 CPUState *env = (CPUState *)data;
159 pthread_mutex_lock(&qemu_mutex);
160 kvm_arch_post_kvm_run(opaque, env);
163 static int pre_kvm_run(void *opaque, void *data)
165 CPUState *env = (CPUState *)data;
167 kvm_arch_pre_kvm_run(opaque, env);
169 if (env->interrupt_request & CPU_INTERRUPT_EXIT)
170 return 1;
171 pthread_mutex_unlock(&qemu_mutex);
172 return 0;
175 static void kvm_do_load_registers(void *_env)
177 CPUState *env = _env;
179 kvm_arch_load_regs(env);
182 void kvm_load_registers(CPUState *env)
184 if (kvm_enabled() && qemu_system_ready)
185 on_vcpu(env, kvm_do_load_registers, env);
188 static void kvm_do_save_registers(void *_env)
190 CPUState *env = _env;
192 kvm_arch_save_regs(env);
195 void kvm_save_registers(CPUState *env)
197 if (kvm_enabled())
198 on_vcpu(env, kvm_do_save_registers, env);
201 int kvm_cpu_exec(CPUState *env)
203 int r;
205 r = kvm_run(kvm_context, env->cpu_index, env);
206 if (r < 0) {
207 printf("kvm_run returned %d\n", r);
208 exit(1);
211 return 0;
214 static int has_work(CPUState *env)
216 if (!vm_running || (env && env->kvm_cpu_state.stopped))
217 return 0;
218 if (!env->halted)
219 return 1;
220 return kvm_arch_has_work(env);
223 static void flush_queued_work(CPUState *env)
225 struct qemu_work_item *wi;
227 if (!env->kvm_cpu_state.queued_work_first)
228 return;
230 while ((wi = env->kvm_cpu_state.queued_work_first)) {
231 env->kvm_cpu_state.queued_work_first = wi->next;
232 wi->func(wi->data);
233 wi->done = true;
235 env->kvm_cpu_state.queued_work_last = NULL;
236 pthread_cond_broadcast(&qemu_work_cond);
239 static void kvm_main_loop_wait(CPUState *env, int timeout)
241 struct timespec ts;
242 int r, e;
243 siginfo_t siginfo;
244 sigset_t waitset;
246 pthread_mutex_unlock(&qemu_mutex);
248 ts.tv_sec = timeout / 1000;
249 ts.tv_nsec = (timeout % 1000) * 1000000;
250 sigemptyset(&waitset);
251 sigaddset(&waitset, SIG_IPI);
253 r = sigtimedwait(&waitset, &siginfo, &ts);
254 e = errno;
256 pthread_mutex_lock(&qemu_mutex);
258 if (r == -1 && !(e == EAGAIN || e == EINTR)) {
259 printf("sigtimedwait: %s\n", strerror(e));
260 exit(1);
263 cpu_single_env = env;
264 flush_queued_work(env);
266 if (env->kvm_cpu_state.stop) {
267 env->kvm_cpu_state.stop = 0;
268 env->kvm_cpu_state.stopped = 1;
269 pthread_cond_signal(&qemu_pause_cond);
272 env->kvm_cpu_state.signalled = 0;
275 static int all_threads_paused(void)
277 CPUState *penv = first_cpu;
279 while (penv) {
280 if (penv->kvm_cpu_state.stop)
281 return 0;
282 penv = (CPUState *)penv->next_cpu;
285 return 1;
288 void qemu_kvm_pause_all_threads(void)
290 CPUState *penv = first_cpu;
292 while (penv) {
293 if (penv != cpu_single_env) {
294 penv->kvm_cpu_state.stop = 1;
295 pthread_kill(penv->kvm_cpu_state.thread, SIG_IPI);
296 } else {
297 penv->kvm_cpu_state.stop = 0;
298 penv->kvm_cpu_state.stopped = 1;
299 cpu_interrupt(penv, CPU_INTERRUPT_EXIT);
301 penv = (CPUState *)penv->next_cpu;
304 while (!all_threads_paused())
305 qemu_cond_wait(&qemu_pause_cond);
308 void qemu_kvm_resume_all_threads(void)
310 CPUState *penv = first_cpu;
312 assert(!cpu_single_env);
314 while (penv) {
315 penv->kvm_cpu_state.stop = 0;
316 penv->kvm_cpu_state.stopped = 0;
317 pthread_kill(penv->kvm_cpu_state.thread, SIG_IPI);
318 penv = (CPUState *)penv->next_cpu;
322 static void update_regs_for_sipi(CPUState *env)
324 kvm_arch_update_regs_for_sipi(env);
325 env->kvm_cpu_state.sipi_needed = 0;
328 static void update_regs_for_init(CPUState *env)
330 #ifdef TARGET_I386
331 SegmentCache cs = env->segs[R_CS];
332 #endif
334 cpu_reset(env);
336 #ifdef TARGET_I386
337 /* restore SIPI vector */
338 if(env->kvm_cpu_state.sipi_needed)
339 env->segs[R_CS] = cs;
340 #endif
342 env->kvm_cpu_state.init = 0;
343 kvm_arch_load_regs(env);
346 static void setup_kernel_sigmask(CPUState *env)
348 sigset_t set;
350 sigemptyset(&set);
351 sigaddset(&set, SIGUSR2);
352 sigaddset(&set, SIGIO);
353 sigaddset(&set, SIGALRM);
354 sigprocmask(SIG_BLOCK, &set, NULL);
356 sigprocmask(SIG_BLOCK, NULL, &set);
357 sigdelset(&set, SIG_IPI);
359 kvm_set_signal_mask(kvm_context, env->cpu_index, &set);
362 static void qemu_kvm_system_reset(void)
364 CPUState *penv = first_cpu;
366 qemu_kvm_pause_all_threads();
368 qemu_system_reset();
370 while (penv) {
371 kvm_arch_cpu_reset(penv);
372 penv = (CPUState *)penv->next_cpu;
375 qemu_kvm_resume_all_threads();
378 static int kvm_main_loop_cpu(CPUState *env)
380 setup_kernel_sigmask(env);
382 pthread_mutex_lock(&qemu_mutex);
383 if (kvm_irqchip_in_kernel(kvm_context))
384 env->halted = 0;
386 kvm_qemu_init_env(env);
387 #ifdef TARGET_I386
388 kvm_tpr_vcpu_start(env);
389 #endif
391 cpu_single_env = env;
392 kvm_load_registers(env);
394 while (1) {
395 while (!has_work(env))
396 kvm_main_loop_wait(env, 1000);
397 if (env->interrupt_request & (CPU_INTERRUPT_HARD | CPU_INTERRUPT_NMI))
398 env->halted = 0;
399 if (!kvm_irqchip_in_kernel(kvm_context)) {
400 if (env->kvm_cpu_state.init)
401 update_regs_for_init(env);
402 if (env->kvm_cpu_state.sipi_needed)
403 update_regs_for_sipi(env);
405 if (!env->halted && !env->kvm_cpu_state.init)
406 kvm_cpu_exec(env);
407 env->interrupt_request &= ~CPU_INTERRUPT_EXIT;
408 kvm_main_loop_wait(env, 0);
410 pthread_mutex_unlock(&qemu_mutex);
411 return 0;
414 static void *ap_main_loop(void *_env)
416 CPUState *env = _env;
417 sigset_t signals;
418 struct ioperm_data *data = NULL;
420 current_env = env;
421 env->thread_id = kvm_get_thread_id();
422 sigfillset(&signals);
423 sigprocmask(SIG_BLOCK, &signals, NULL);
424 kvm_create_vcpu(kvm_context, env->cpu_index);
425 kvm_qemu_init_env(env);
427 #ifdef USE_KVM_DEVICE_ASSIGNMENT
428 /* do ioperm for io ports of assigned devices */
429 LIST_FOREACH(data, &ioperm_head, entries)
430 on_vcpu(env, kvm_arch_do_ioperm, data);
431 #endif
433 /* signal VCPU creation */
434 pthread_mutex_lock(&qemu_mutex);
435 current_env->kvm_cpu_state.created = 1;
436 pthread_cond_signal(&qemu_vcpu_cond);
438 /* and wait for machine initialization */
439 while (!qemu_system_ready)
440 qemu_cond_wait(&qemu_system_cond);
441 pthread_mutex_unlock(&qemu_mutex);
443 kvm_main_loop_cpu(env);
444 return NULL;
447 void kvm_init_vcpu(CPUState *env)
449 pthread_create(&env->kvm_cpu_state.thread, NULL, ap_main_loop, env);
451 while (env->kvm_cpu_state.created == 0)
452 qemu_cond_wait(&qemu_vcpu_cond);
455 int kvm_init_ap(void)
457 #ifdef TARGET_I386
458 kvm_tpr_opt_setup();
459 #endif
461 signal(SIG_IPI, sig_ipi_handler);
462 return 0;
465 void qemu_kvm_notify_work(void)
467 uint64_t value = 1;
468 char buffer[8];
469 size_t offset = 0;
471 if (io_thread_fd == -1)
472 return;
474 memcpy(buffer, &value, sizeof(value));
476 while (offset < 8) {
477 ssize_t len;
479 len = write(io_thread_fd, buffer + offset, 8 - offset);
480 if (len == -1 && errno == EINTR)
481 continue;
483 if (len <= 0)
484 break;
486 offset += len;
489 if (offset != 8)
490 fprintf(stderr, "failed to notify io thread\n");
493 /* If we have signalfd, we mask out the signals we want to handle and then
494 * use signalfd to listen for them. We rely on whatever the current signal
495 * handler is to dispatch the signals when we receive them.
498 static void sigfd_handler(void *opaque)
500 int fd = (unsigned long)opaque;
501 struct qemu_signalfd_siginfo info;
502 struct sigaction action;
503 ssize_t len;
505 while (1) {
506 do {
507 len = read(fd, &info, sizeof(info));
508 } while (len == -1 && errno == EINTR);
510 if (len == -1 && errno == EAGAIN)
511 break;
513 if (len != sizeof(info)) {
514 printf("read from sigfd returned %ld: %m\n", len);
515 return;
518 sigaction(info.ssi_signo, NULL, &action);
519 if (action.sa_handler)
520 action.sa_handler(info.ssi_signo);
525 /* Used to break IO thread out of select */
526 static void io_thread_wakeup(void *opaque)
528 int fd = (unsigned long)opaque;
529 char buffer[8];
530 size_t offset = 0;
532 while (offset < 8) {
533 ssize_t len;
535 len = read(fd, buffer + offset, 8 - offset);
536 if (len == -1 && errno == EINTR)
537 continue;
539 if (len <= 0)
540 break;
542 offset += len;
546 int kvm_main_loop(void)
548 int fds[2];
549 sigset_t mask;
550 int sigfd;
552 io_thread = pthread_self();
553 qemu_system_ready = 1;
555 if (qemu_eventfd(fds) == -1) {
556 fprintf(stderr, "failed to create eventfd\n");
557 return -errno;
560 qemu_set_fd_handler2(fds[0], NULL, io_thread_wakeup, NULL,
561 (void *)(unsigned long)fds[0]);
563 io_thread_fd = fds[1];
565 sigemptyset(&mask);
566 sigaddset(&mask, SIGIO);
567 sigaddset(&mask, SIGALRM);
568 sigprocmask(SIG_BLOCK, &mask, NULL);
570 sigfd = qemu_signalfd(&mask);
571 if (sigfd == -1) {
572 fprintf(stderr, "failed to create signalfd\n");
573 return -errno;
576 fcntl(sigfd, F_SETFL, O_NONBLOCK);
578 qemu_set_fd_handler2(sigfd, NULL, sigfd_handler, NULL,
579 (void *)(unsigned long)sigfd);
581 pthread_cond_broadcast(&qemu_system_cond);
583 io_thread_sigfd = sigfd;
584 cpu_single_env = NULL;
586 while (1) {
587 main_loop_wait(1000);
588 if (qemu_shutdown_requested())
589 break;
590 else if (qemu_powerdown_requested())
591 qemu_system_powerdown();
592 else if (qemu_reset_requested())
593 qemu_kvm_system_reset();
594 #ifdef CONFIG_GDBSTUB
595 else if (kvm_debug_cpu_requested) {
596 gdb_set_stop_cpu(kvm_debug_cpu_requested);
597 vm_stop(EXCP_DEBUG);
598 kvm_debug_cpu_requested = NULL;
600 #endif
603 qemu_kvm_pause_all_threads();
604 pthread_mutex_unlock(&qemu_mutex);
606 return 0;
609 #ifdef KVM_CAP_SET_GUEST_DEBUG
610 static int kvm_debug(void *opaque, void *data,
611 struct kvm_debug_exit_arch *arch_info)
613 int handle = kvm_arch_debug(arch_info);
614 struct CPUState *env = data;
616 if (handle) {
617 kvm_debug_cpu_requested = env;
618 env->kvm_cpu_state.stopped = 1;
620 return handle;
622 #endif
624 static int kvm_inb(void *opaque, uint16_t addr, uint8_t *data)
626 *data = cpu_inb(0, addr);
627 return 0;
630 static int kvm_inw(void *opaque, uint16_t addr, uint16_t *data)
632 *data = cpu_inw(0, addr);
633 return 0;
636 static int kvm_inl(void *opaque, uint16_t addr, uint32_t *data)
638 *data = cpu_inl(0, addr);
639 return 0;
642 #define PM_IO_BASE 0xb000
644 static int kvm_outb(void *opaque, uint16_t addr, uint8_t data)
646 if (addr == 0xb2) {
647 switch (data) {
648 case 0: {
649 cpu_outb(0, 0xb3, 0);
650 break;
652 case 0xf0: {
653 unsigned x;
655 /* enable acpi */
656 x = cpu_inw(0, PM_IO_BASE + 4);
657 x &= ~1;
658 cpu_outw(0, PM_IO_BASE + 4, x);
659 break;
661 case 0xf1: {
662 unsigned x;
664 /* enable acpi */
665 x = cpu_inw(0, PM_IO_BASE + 4);
666 x |= 1;
667 cpu_outw(0, PM_IO_BASE + 4, x);
668 break;
670 default:
671 break;
673 return 0;
675 cpu_outb(0, addr, data);
676 return 0;
679 static int kvm_outw(void *opaque, uint16_t addr, uint16_t data)
681 cpu_outw(0, addr, data);
682 return 0;
685 static int kvm_outl(void *opaque, uint16_t addr, uint32_t data)
687 cpu_outl(0, addr, data);
688 return 0;
691 static int kvm_mmio_read(void *opaque, uint64_t addr, uint8_t *data, int len)
693 cpu_physical_memory_rw(addr, data, len, 0);
694 return 0;
697 static int kvm_mmio_write(void *opaque, uint64_t addr, uint8_t *data, int len)
699 cpu_physical_memory_rw(addr, data, len, 1);
700 return 0;
703 static int kvm_io_window(void *opaque)
705 return 1;
709 static int kvm_halt(void *opaque, int vcpu)
711 return kvm_arch_halt(opaque, vcpu);
714 static int kvm_shutdown(void *opaque, void *data)
716 struct CPUState *env = (struct CPUState *)data;
718 /* stop the current vcpu from going back to guest mode */
719 env->kvm_cpu_state.stopped = 1;
721 qemu_system_reset_request();
722 return 1;
725 static struct kvm_callbacks qemu_kvm_ops = {
726 #ifdef KVM_CAP_SET_GUEST_DEBUG
727 .debug = kvm_debug,
728 #endif
729 .inb = kvm_inb,
730 .inw = kvm_inw,
731 .inl = kvm_inl,
732 .outb = kvm_outb,
733 .outw = kvm_outw,
734 .outl = kvm_outl,
735 .mmio_read = kvm_mmio_read,
736 .mmio_write = kvm_mmio_write,
737 .halt = kvm_halt,
738 .shutdown = kvm_shutdown,
739 .io_window = kvm_io_window,
740 .try_push_interrupts = try_push_interrupts,
741 #ifdef KVM_CAP_USER_NMI
742 .push_nmi = kvm_arch_push_nmi,
743 #endif
744 .post_kvm_run = post_kvm_run,
745 .pre_kvm_run = pre_kvm_run,
746 #ifdef TARGET_I386
747 .tpr_access = handle_tpr_access,
748 #endif
749 #ifdef TARGET_PPC
750 .powerpc_dcr_read = handle_powerpc_dcr_read,
751 .powerpc_dcr_write = handle_powerpc_dcr_write,
752 #endif
755 int kvm_qemu_init()
757 /* Try to initialize kvm */
758 kvm_context = kvm_init(&qemu_kvm_ops, cpu_single_env);
759 if (!kvm_context) {
760 return -1;
762 pthread_mutex_lock(&qemu_mutex);
764 return 0;
767 #ifdef TARGET_I386
768 static int destroy_region_works = 0;
769 #endif
771 int kvm_qemu_create_context(void)
773 int r;
774 int i;
776 if (!kvm_irqchip) {
777 kvm_disable_irqchip_creation(kvm_context);
779 if (!kvm_pit) {
780 kvm_disable_pit_creation(kvm_context);
782 if (kvm_create(kvm_context, phys_ram_size, (void**)&phys_ram_base) < 0) {
783 kvm_qemu_destroy();
784 return -1;
786 r = kvm_arch_qemu_create_context();
787 if(r <0)
788 kvm_qemu_destroy();
789 if (kvm_pit && !kvm_pit_reinject) {
790 if (kvm_reinject_control(kvm_context, 0)) {
791 fprintf(stderr, "failure to disable in-kernel PIT reinjection\n");
792 return -1;
795 #ifdef TARGET_I386
796 destroy_region_works = kvm_destroy_memory_region_works(kvm_context);
797 #endif
799 if (kvm_irqchip && kvm_has_gsi_routing(kvm_context)) {
800 kvm_clear_gsi_routes(kvm_context);
801 for (i = 0; i < 8; ++i) {
802 if (i == 2)
803 continue;
804 r = kvm_add_irq_route(kvm_context, i, KVM_IRQCHIP_PIC_MASTER, i);
805 if (r < 0)
806 return r;
808 for (i = 8; i < 16; ++i) {
809 r = kvm_add_irq_route(kvm_context, i, KVM_IRQCHIP_PIC_SLAVE, i - 8);
810 if (r < 0)
811 return r;
813 for (i = 0; i < 24; ++i) {
814 r = kvm_add_irq_route(kvm_context, i, KVM_IRQCHIP_IOAPIC, i);
815 if (r < 0)
816 return r;
818 kvm_commit_irq_routes(kvm_context);
820 return 0;
823 void kvm_qemu_destroy(void)
825 kvm_finalize(kvm_context);
828 #ifdef TARGET_I386
829 static int must_use_aliases_source(target_phys_addr_t addr)
831 if (destroy_region_works)
832 return false;
833 if (addr == 0xa0000 || addr == 0xa8000)
834 return true;
835 return false;
838 static int must_use_aliases_target(target_phys_addr_t addr)
840 if (destroy_region_works)
841 return false;
842 if (addr >= 0xe0000000 && addr < 0x100000000ull)
843 return true;
844 return false;
847 static struct mapping {
848 target_phys_addr_t phys;
849 ram_addr_t ram;
850 ram_addr_t len;
851 } mappings[50];
852 static int nr_mappings;
854 static struct mapping *find_ram_mapping(ram_addr_t ram_addr)
856 struct mapping *p;
858 for (p = mappings; p < mappings + nr_mappings; ++p) {
859 if (p->ram <= ram_addr && ram_addr < p->ram + p->len) {
860 return p;
863 return NULL;
866 static struct mapping *find_mapping(target_phys_addr_t start_addr)
868 struct mapping *p;
870 for (p = mappings; p < mappings + nr_mappings; ++p) {
871 if (p->phys <= start_addr && start_addr < p->phys + p->len) {
872 return p;
875 return NULL;
878 static void drop_mapping(target_phys_addr_t start_addr)
880 struct mapping *p = find_mapping(start_addr);
882 if (p)
883 *p = mappings[--nr_mappings];
885 #endif
887 void kvm_cpu_register_physical_memory(target_phys_addr_t start_addr,
888 unsigned long size,
889 unsigned long phys_offset)
891 int r = 0;
892 unsigned long area_flags;
893 #ifdef TARGET_I386
894 struct mapping *p;
895 #endif
897 phys_offset &= ~IO_MEM_ROM;
898 area_flags = phys_offset & ~TARGET_PAGE_MASK;
900 if (area_flags != IO_MEM_RAM) {
901 #ifdef TARGET_I386
902 if (must_use_aliases_source(start_addr)) {
903 kvm_destroy_memory_alias(kvm_context, start_addr);
904 return;
906 if (must_use_aliases_target(start_addr))
907 return;
908 #endif
909 while (size > 0) {
910 p = find_mapping(start_addr);
911 if (p) {
912 kvm_unregister_memory_area(kvm_context, p->phys, p->len);
913 drop_mapping(p->phys);
915 start_addr += TARGET_PAGE_SIZE;
916 if (size > TARGET_PAGE_SIZE) {
917 size -= TARGET_PAGE_SIZE;
918 } else {
919 size = 0;
922 return;
925 r = kvm_is_containing_region(kvm_context, start_addr, size);
926 if (r)
927 return;
929 if (area_flags >= TLB_MMIO)
930 return;
932 #ifdef TARGET_I386
933 if (must_use_aliases_source(start_addr)) {
934 p = find_ram_mapping(phys_offset);
935 if (p) {
936 kvm_create_memory_alias(kvm_context, start_addr, size,
937 p->phys + (phys_offset - p->ram));
939 return;
941 #endif
943 r = kvm_register_phys_mem(kvm_context, start_addr,
944 phys_ram_base + phys_offset,
945 size, 0);
946 if (r < 0) {
947 printf("kvm_cpu_register_physical_memory: failed\n");
948 exit(1);
951 #ifdef TARGET_I386
952 drop_mapping(start_addr);
953 p = &mappings[nr_mappings++];
954 p->phys = start_addr;
955 p->ram = phys_offset;
956 p->len = size;
957 #endif
959 return;
962 void kvm_cpu_unregister_physical_memory(target_phys_addr_t start_addr,
963 target_phys_addr_t size,
964 unsigned long phys_offset)
966 kvm_unregister_memory_area(kvm_context, start_addr, size);
969 int kvm_setup_guest_memory(void *area, unsigned long size)
971 int ret = 0;
973 #ifdef MADV_DONTFORK
974 if (kvm_enabled() && !kvm_has_sync_mmu())
975 ret = madvise(area, size, MADV_DONTFORK);
976 #endif
978 if (ret)
979 perror ("madvise");
981 return ret;
984 int kvm_qemu_check_extension(int ext)
986 return kvm_check_extension(kvm_context, ext);
989 int kvm_qemu_init_env(CPUState *cenv)
991 return kvm_arch_qemu_init_env(cenv);
994 #ifdef KVM_CAP_SET_GUEST_DEBUG
995 struct kvm_sw_breakpoint_head kvm_sw_breakpoints =
996 TAILQ_HEAD_INITIALIZER(kvm_sw_breakpoints);
998 struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(target_ulong pc)
1000 struct kvm_sw_breakpoint *bp;
1002 TAILQ_FOREACH(bp, &kvm_sw_breakpoints, entry) {
1003 if (bp->pc == pc)
1004 return bp;
1006 return NULL;
1009 struct kvm_set_guest_debug_data {
1010 struct kvm_guest_debug dbg;
1011 int err;
1014 static void kvm_invoke_set_guest_debug(void *data)
1016 struct kvm_set_guest_debug_data *dbg_data = data;
1018 dbg_data->err = kvm_set_guest_debug(kvm_context, cpu_single_env->cpu_index,
1019 &dbg_data->dbg);
1022 int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
1024 struct kvm_set_guest_debug_data data;
1026 data.dbg.control = 0;
1027 if (env->singlestep_enabled)
1028 data.dbg.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
1030 kvm_arch_update_guest_debug(env, &data.dbg);
1031 data.dbg.control |= reinject_trap;
1033 on_vcpu(env, kvm_invoke_set_guest_debug, &data);
1034 return data.err;
1037 int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
1038 target_ulong len, int type)
1040 struct kvm_sw_breakpoint *bp;
1041 CPUState *env;
1042 int err;
1044 if (type == GDB_BREAKPOINT_SW) {
1045 bp = kvm_find_sw_breakpoint(addr);
1046 if (bp) {
1047 bp->use_count++;
1048 return 0;
1051 bp = qemu_malloc(sizeof(struct kvm_sw_breakpoint));
1052 if (!bp)
1053 return -ENOMEM;
1055 bp->pc = addr;
1056 bp->use_count = 1;
1057 err = kvm_arch_insert_sw_breakpoint(current_env, bp);
1058 if (err) {
1059 free(bp);
1060 return err;
1063 TAILQ_INSERT_HEAD(&kvm_sw_breakpoints, bp, entry);
1064 } else {
1065 err = kvm_arch_insert_hw_breakpoint(addr, len, type);
1066 if (err)
1067 return err;
1070 for (env = first_cpu; env != NULL; env = env->next_cpu) {
1071 err = kvm_update_guest_debug(env, 0);
1072 if (err)
1073 return err;
1075 return 0;
1078 int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr,
1079 target_ulong len, int type)
1081 struct kvm_sw_breakpoint *bp;
1082 CPUState *env;
1083 int err;
1085 if (type == GDB_BREAKPOINT_SW) {
1086 bp = kvm_find_sw_breakpoint(addr);
1087 if (!bp)
1088 return -ENOENT;
1090 if (bp->use_count > 1) {
1091 bp->use_count--;
1092 return 0;
1095 err = kvm_arch_remove_sw_breakpoint(current_env, bp);
1096 if (err)
1097 return err;
1099 TAILQ_REMOVE(&kvm_sw_breakpoints, bp, entry);
1100 qemu_free(bp);
1101 } else {
1102 err = kvm_arch_remove_hw_breakpoint(addr, len, type);
1103 if (err)
1104 return err;
1107 for (env = first_cpu; env != NULL; env = env->next_cpu) {
1108 err = kvm_update_guest_debug(env, 0);
1109 if (err)
1110 return err;
1112 return 0;
1115 void kvm_remove_all_breakpoints(CPUState *current_env)
1117 struct kvm_sw_breakpoint *bp, *next;
1118 CPUState *env;
1120 TAILQ_FOREACH_SAFE(bp, &kvm_sw_breakpoints, entry, next) {
1121 if (kvm_arch_remove_sw_breakpoint(current_env, bp) != 0) {
1122 /* Try harder to find a CPU that currently sees the breakpoint. */
1123 for (env = first_cpu; env != NULL; env = env->next_cpu) {
1124 if (kvm_arch_remove_sw_breakpoint(env, bp) == 0)
1125 break;
1129 kvm_arch_remove_all_hw_breakpoints();
1131 for (env = first_cpu; env != NULL; env = env->next_cpu)
1132 kvm_update_guest_debug(env, 0);
1135 #else /* !KVM_CAP_SET_GUEST_DEBUG */
1137 int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
1139 return -EINVAL;
1142 int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
1143 target_ulong len, int type)
1145 return -EINVAL;
1148 int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr,
1149 target_ulong len, int type)
1151 return -EINVAL;
1154 void kvm_remove_all_breakpoints(CPUState *current_env)
1157 #endif /* !KVM_CAP_SET_GUEST_DEBUG */
1160 * dirty pages logging
1162 /* FIXME: use unsigned long pointer instead of unsigned char */
1163 unsigned char *kvm_dirty_bitmap = NULL;
1164 int kvm_physical_memory_set_dirty_tracking(int enable)
1166 int r = 0;
1168 if (!kvm_enabled())
1169 return 0;
1171 if (enable) {
1172 if (!kvm_dirty_bitmap) {
1173 unsigned bitmap_size = BITMAP_SIZE(phys_ram_size);
1174 kvm_dirty_bitmap = qemu_malloc(bitmap_size);
1175 if (kvm_dirty_bitmap == NULL) {
1176 perror("Failed to allocate dirty pages bitmap");
1177 r=-1;
1179 else {
1180 r = kvm_dirty_pages_log_enable_all(kvm_context);
1184 else {
1185 if (kvm_dirty_bitmap) {
1186 r = kvm_dirty_pages_log_reset(kvm_context);
1187 qemu_free(kvm_dirty_bitmap);
1188 kvm_dirty_bitmap = NULL;
1191 return r;
1194 /* get kvm's dirty pages bitmap and update qemu's */
1195 static int kvm_get_dirty_pages_log_range(unsigned long start_addr,
1196 unsigned char *bitmap,
1197 unsigned int offset,
1198 unsigned long mem_size)
1200 unsigned int i, j, n=0;
1201 unsigned char c;
1202 unsigned long page_number, addr, addr1;
1203 ram_addr_t ram_addr;
1204 unsigned int len = ((mem_size/TARGET_PAGE_SIZE) + 7) / 8;
1207 * bitmap-traveling is faster than memory-traveling (for addr...)
1208 * especially when most of the memory is not dirty.
1210 for (i=0; i<len; i++) {
1211 c = bitmap[i];
1212 while (c>0) {
1213 j = ffsl(c) - 1;
1214 c &= ~(1u<<j);
1215 page_number = i * 8 + j;
1216 addr1 = page_number * TARGET_PAGE_SIZE;
1217 addr = offset + addr1;
1218 ram_addr = cpu_get_physical_page_desc(addr);
1219 cpu_physical_memory_set_dirty(ram_addr);
1220 n++;
1223 return 0;
1225 static int kvm_get_dirty_bitmap_cb(unsigned long start, unsigned long len,
1226 void *bitmap, void *opaque)
1228 return kvm_get_dirty_pages_log_range(start, bitmap, start, len);
1232 * get kvm's dirty pages bitmap and update qemu's
1233 * we only care about physical ram, which resides in slots 0 and 3
1235 int kvm_update_dirty_pages_log(void)
1237 int r = 0;
1240 r = kvm_get_dirty_pages_range(kvm_context, 0, phys_ram_size,
1241 kvm_dirty_bitmap, NULL,
1242 kvm_get_dirty_bitmap_cb);
1243 return r;
1246 void kvm_qemu_log_memory(target_phys_addr_t start, target_phys_addr_t size,
1247 int log)
1249 if (log)
1250 kvm_dirty_pages_log_enable_slot(kvm_context, start, size);
1251 else {
1252 #ifdef TARGET_I386
1253 if (must_use_aliases_target(start))
1254 return;
1255 #endif
1256 kvm_dirty_pages_log_disable_slot(kvm_context, start, size);
1260 int kvm_get_phys_ram_page_bitmap(unsigned char *bitmap)
1262 unsigned int bsize = BITMAP_SIZE(phys_ram_size);
1263 unsigned int brsize = BITMAP_SIZE(ram_size);
1264 unsigned int extra_pages = (phys_ram_size - ram_size) / TARGET_PAGE_SIZE;
1265 unsigned int extra_bytes = (extra_pages +7)/8;
1266 unsigned int hole_start = BITMAP_SIZE(0xa0000);
1267 unsigned int hole_end = BITMAP_SIZE(0xc0000);
1269 memset(bitmap, 0xFF, brsize + extra_bytes);
1270 memset(bitmap + hole_start, 0, hole_end - hole_start);
1271 memset(bitmap + brsize + extra_bytes, 0, bsize - brsize - extra_bytes);
1273 return 0;
1276 #ifdef KVM_CAP_IRQCHIP
1278 int kvm_set_irq(int irq, int level, int *status)
1280 return kvm_set_irq_level(kvm_context, irq, level, status);
1283 #endif
1285 int qemu_kvm_get_dirty_pages(unsigned long phys_addr, void *buf)
1287 return kvm_get_dirty_pages(kvm_context, phys_addr, buf);
1290 void *kvm_cpu_create_phys_mem(target_phys_addr_t start_addr,
1291 unsigned long size, int log, int writable)
1293 return kvm_create_phys_mem(kvm_context, start_addr, size, log, writable);
1296 void kvm_cpu_destroy_phys_mem(target_phys_addr_t start_addr,
1297 unsigned long size)
1299 kvm_destroy_phys_mem(kvm_context, start_addr, size);
1302 void kvm_mutex_unlock(void)
1304 assert(!cpu_single_env);
1305 pthread_mutex_unlock(&qemu_mutex);
1308 void kvm_mutex_lock(void)
1310 pthread_mutex_lock(&qemu_mutex);
1311 cpu_single_env = NULL;
1314 int qemu_kvm_register_coalesced_mmio(target_phys_addr_t addr, unsigned int size)
1316 return kvm_register_coalesced_mmio(kvm_context, addr, size);
1319 int qemu_kvm_unregister_coalesced_mmio(target_phys_addr_t addr,
1320 unsigned int size)
1322 return kvm_unregister_coalesced_mmio(kvm_context, addr, size);
1325 int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
1327 return kvm_register_coalesced_mmio(kvm_context, start, size);
1330 int kvm_uncoalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
1332 return kvm_unregister_coalesced_mmio(kvm_context, start, size);
1335 #ifdef USE_KVM_DEVICE_ASSIGNMENT
1336 void kvm_add_ioperm_data(struct ioperm_data *data)
1338 LIST_INSERT_HEAD(&ioperm_head, data, entries);
1341 void kvm_remove_ioperm_data(unsigned long start_port, unsigned long num)
1343 struct ioperm_data *data;
1345 data = LIST_FIRST(&ioperm_head);
1346 while (data) {
1347 struct ioperm_data *next = LIST_NEXT(data, entries);
1349 if (data->start_port == start_port && data->num == num) {
1350 LIST_REMOVE(data, entries);
1351 qemu_free(data);
1354 data = next;
1358 void kvm_ioperm(CPUState *env, void *data)
1360 if (kvm_enabled() && qemu_system_ready)
1361 on_vcpu(env, kvm_arch_do_ioperm, data);
1364 #endif
1366 void kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr, target_phys_addr_t end_addr)
1368 #ifndef TARGET_IA64
1369 void *buf;
1371 #ifdef TARGET_I386
1372 if (must_use_aliases_source(start_addr))
1373 return;
1374 #endif
1376 buf = qemu_malloc((end_addr - start_addr) / 8 + 2);
1377 kvm_get_dirty_pages_range(kvm_context, start_addr, end_addr - start_addr,
1378 buf, NULL, kvm_get_dirty_bitmap_cb);
1379 qemu_free(buf);
1380 #endif
1383 int kvm_log_start(target_phys_addr_t phys_addr, target_phys_addr_t len)
1385 #ifdef TARGET_I386
1386 if (must_use_aliases_source(phys_addr))
1387 return 0;
1388 #endif
1389 kvm_qemu_log_memory(phys_addr, len, 1);
1390 return 0;
1393 int kvm_log_stop(target_phys_addr_t phys_addr, target_phys_addr_t len)
1395 #ifdef TARGET_I386
1396 if (must_use_aliases_source(phys_addr))
1397 return 0;
1398 #endif
1399 kvm_qemu_log_memory(phys_addr, len, 0);
1400 return 0;
1403 /* hack: both libkvm and upstream qemu define kvm_has_sync_mmu(), differently */
1404 #undef kvm_has_sync_mmu
1405 int qemu_kvm_has_sync_mmu(void)
1407 return kvm_has_sync_mmu(kvm_context);
1410 void qemu_kvm_cpu_stop(CPUState *env)
1412 if (kvm_enabled())
1413 env->kvm_cpu_state.stopped = 1;