Drop qemu-kvm.h from e1000.c
[qemu-kvm/fedora.git] / qemu-kvm.c
blob68a9218836c73b5d09c30c681c012b2b3646a7f5
1 /*
2 * qemu/kvm integration
4 * Copyright (C) 2006-2008 Qumranet Technologies
6 * Licensed under the terms of the GNU GPL version 2 or higher.
7 */
8 #include "config.h"
9 #include "config-host.h"
11 #include <assert.h>
12 #include <string.h>
13 #include "hw/hw.h"
14 #include "sysemu.h"
15 #include "qemu-common.h"
16 #include "console.h"
17 #include "block.h"
18 #include "compatfd.h"
19 #include "gdbstub.h"
21 #include "qemu-kvm.h"
22 #include <libkvm.h>
23 #include <pthread.h>
24 #include <sys/utsname.h>
25 #include <sys/syscall.h>
26 #include <sys/mman.h>
28 #define false 0
29 #define true 1
31 int kvm_allowed = 1;
32 int kvm_irqchip = 1;
33 int kvm_pit = 1;
34 int kvm_pit_reinject = 1;
35 int kvm_nested = 0;
36 kvm_context_t kvm_context;
38 pthread_mutex_t qemu_mutex = PTHREAD_MUTEX_INITIALIZER;
39 pthread_cond_t qemu_vcpu_cond = PTHREAD_COND_INITIALIZER;
40 pthread_cond_t qemu_system_cond = PTHREAD_COND_INITIALIZER;
41 pthread_cond_t qemu_pause_cond = PTHREAD_COND_INITIALIZER;
42 pthread_cond_t qemu_work_cond = PTHREAD_COND_INITIALIZER;
43 __thread CPUState *current_env;
45 static int qemu_system_ready;
47 #define SIG_IPI (SIGRTMIN+4)
49 pthread_t io_thread;
50 static int io_thread_fd = -1;
51 static int io_thread_sigfd = -1;
53 static CPUState *kvm_debug_cpu_requested;
55 static uint64_t phys_ram_size;
57 /* The list of ioperm_data */
58 static LIST_HEAD(, ioperm_data) ioperm_head;
60 static inline unsigned long kvm_get_thread_id(void)
62 return syscall(SYS_gettid);
65 static void qemu_cond_wait(pthread_cond_t *cond)
67 CPUState *env = cpu_single_env;
68 static const struct timespec ts = {
69 .tv_sec = 0,
70 .tv_nsec = 100000,
73 pthread_cond_timedwait(cond, &qemu_mutex, &ts);
74 cpu_single_env = env;
77 static void sig_ipi_handler(int n)
81 static void on_vcpu(CPUState *env, void (*func)(void *data), void *data)
83 struct qemu_work_item wi;
85 if (env == current_env) {
86 func(data);
87 return;
90 wi.func = func;
91 wi.data = data;
92 if (!env->kvm_cpu_state.queued_work_first)
93 env->kvm_cpu_state.queued_work_first = &wi;
94 else
95 env->kvm_cpu_state.queued_work_last->next = &wi;
96 env->kvm_cpu_state.queued_work_last = &wi;
97 wi.next = NULL;
98 wi.done = false;
100 pthread_kill(env->kvm_cpu_state.thread, SIG_IPI);
101 while (!wi.done)
102 qemu_cond_wait(&qemu_work_cond);
105 static void inject_interrupt(void *data)
107 cpu_interrupt(current_env, (long)data);
110 void kvm_inject_interrupt(CPUState *env, int mask)
112 on_vcpu(env, inject_interrupt, (void *)(long)mask);
115 void kvm_update_interrupt_request(CPUState *env)
117 int signal = 0;
119 if (env) {
120 if (!current_env || !current_env->kvm_cpu_state.created)
121 signal = 1;
123 * Testing for created here is really redundant
125 if (current_env && current_env->kvm_cpu_state.created &&
126 env != current_env && !env->kvm_cpu_state.signalled)
127 signal = 1;
129 if (signal) {
130 env->kvm_cpu_state.signalled = 1;
131 if (env->kvm_cpu_state.thread)
132 pthread_kill(env->kvm_cpu_state.thread, SIG_IPI);
137 void kvm_update_after_sipi(CPUState *env)
139 env->kvm_cpu_state.sipi_needed = 1;
140 kvm_update_interrupt_request(env);
143 void kvm_apic_init(CPUState *env)
145 if (env->cpu_index != 0)
146 env->kvm_cpu_state.init = 1;
147 kvm_update_interrupt_request(env);
150 #include <signal.h>
152 static int try_push_interrupts(void *opaque)
154 return kvm_arch_try_push_interrupts(opaque);
157 static void post_kvm_run(void *opaque, void *data)
159 CPUState *env = (CPUState *)data;
161 pthread_mutex_lock(&qemu_mutex);
162 kvm_arch_post_kvm_run(opaque, env);
165 static int pre_kvm_run(void *opaque, void *data)
167 CPUState *env = (CPUState *)data;
169 kvm_arch_pre_kvm_run(opaque, env);
171 if (env->exit_request)
172 return 1;
173 pthread_mutex_unlock(&qemu_mutex);
174 return 0;
177 static void kvm_do_load_registers(void *_env)
179 CPUState *env = _env;
181 kvm_arch_load_regs(env);
184 void kvm_load_registers(CPUState *env)
186 if (kvm_enabled() && qemu_system_ready)
187 on_vcpu(env, kvm_do_load_registers, env);
190 static void kvm_do_save_registers(void *_env)
192 CPUState *env = _env;
194 kvm_arch_save_regs(env);
197 void kvm_save_registers(CPUState *env)
199 if (kvm_enabled())
200 on_vcpu(env, kvm_do_save_registers, env);
203 int kvm_cpu_exec(CPUState *env)
205 int r;
207 r = kvm_run(kvm_context, env->cpu_index, env);
208 if (r < 0) {
209 printf("kvm_run returned %d\n", r);
210 exit(1);
213 return 0;
216 static int has_work(CPUState *env)
218 if (!vm_running || (env && env->kvm_cpu_state.stopped))
219 return 0;
220 if (!env->halted)
221 return 1;
222 return kvm_arch_has_work(env);
225 static void flush_queued_work(CPUState *env)
227 struct qemu_work_item *wi;
229 if (!env->kvm_cpu_state.queued_work_first)
230 return;
232 while ((wi = env->kvm_cpu_state.queued_work_first)) {
233 env->kvm_cpu_state.queued_work_first = wi->next;
234 wi->func(wi->data);
235 wi->done = true;
237 env->kvm_cpu_state.queued_work_last = NULL;
238 pthread_cond_broadcast(&qemu_work_cond);
241 static void kvm_main_loop_wait(CPUState *env, int timeout)
243 struct timespec ts;
244 int r, e;
245 siginfo_t siginfo;
246 sigset_t waitset;
248 pthread_mutex_unlock(&qemu_mutex);
250 ts.tv_sec = timeout / 1000;
251 ts.tv_nsec = (timeout % 1000) * 1000000;
252 sigemptyset(&waitset);
253 sigaddset(&waitset, SIG_IPI);
255 r = sigtimedwait(&waitset, &siginfo, &ts);
256 e = errno;
258 pthread_mutex_lock(&qemu_mutex);
260 if (r == -1 && !(e == EAGAIN || e == EINTR)) {
261 printf("sigtimedwait: %s\n", strerror(e));
262 exit(1);
265 cpu_single_env = env;
266 flush_queued_work(env);
268 if (env->kvm_cpu_state.stop) {
269 env->kvm_cpu_state.stop = 0;
270 env->kvm_cpu_state.stopped = 1;
271 pthread_cond_signal(&qemu_pause_cond);
274 env->kvm_cpu_state.signalled = 0;
277 static int all_threads_paused(void)
279 CPUState *penv = first_cpu;
281 while (penv) {
282 if (penv->kvm_cpu_state.stop)
283 return 0;
284 penv = (CPUState *)penv->next_cpu;
287 return 1;
290 void qemu_kvm_pause_all_threads(void)
292 CPUState *penv = first_cpu;
294 while (penv) {
295 if (penv != cpu_single_env) {
296 penv->kvm_cpu_state.stop = 1;
297 pthread_kill(penv->kvm_cpu_state.thread, SIG_IPI);
298 } else {
299 penv->kvm_cpu_state.stop = 0;
300 penv->kvm_cpu_state.stopped = 1;
301 cpu_exit(penv);
303 penv = (CPUState *)penv->next_cpu;
306 while (!all_threads_paused())
307 qemu_cond_wait(&qemu_pause_cond);
310 void qemu_kvm_resume_all_threads(void)
312 CPUState *penv = first_cpu;
314 assert(!cpu_single_env);
316 while (penv) {
317 penv->kvm_cpu_state.stop = 0;
318 penv->kvm_cpu_state.stopped = 0;
319 pthread_kill(penv->kvm_cpu_state.thread, SIG_IPI);
320 penv = (CPUState *)penv->next_cpu;
324 static void update_regs_for_sipi(CPUState *env)
326 kvm_arch_update_regs_for_sipi(env);
327 env->kvm_cpu_state.sipi_needed = 0;
330 static void update_regs_for_init(CPUState *env)
332 #ifdef TARGET_I386
333 SegmentCache cs = env->segs[R_CS];
334 #endif
336 cpu_reset(env);
338 #ifdef TARGET_I386
339 /* restore SIPI vector */
340 if(env->kvm_cpu_state.sipi_needed)
341 env->segs[R_CS] = cs;
342 #endif
344 env->kvm_cpu_state.init = 0;
345 kvm_arch_load_regs(env);
348 static void setup_kernel_sigmask(CPUState *env)
350 sigset_t set;
352 sigemptyset(&set);
353 sigaddset(&set, SIGUSR2);
354 sigaddset(&set, SIGIO);
355 sigaddset(&set, SIGALRM);
356 sigprocmask(SIG_BLOCK, &set, NULL);
358 sigprocmask(SIG_BLOCK, NULL, &set);
359 sigdelset(&set, SIG_IPI);
361 kvm_set_signal_mask(kvm_context, env->cpu_index, &set);
364 static void qemu_kvm_system_reset(void)
366 CPUState *penv = first_cpu;
368 qemu_kvm_pause_all_threads();
370 qemu_system_reset();
372 while (penv) {
373 kvm_arch_cpu_reset(penv);
374 penv = (CPUState *)penv->next_cpu;
377 qemu_kvm_resume_all_threads();
380 static int kvm_main_loop_cpu(CPUState *env)
382 setup_kernel_sigmask(env);
384 pthread_mutex_lock(&qemu_mutex);
385 if (kvm_irqchip_in_kernel(kvm_context))
386 env->halted = 0;
388 kvm_qemu_init_env(env);
389 #ifdef TARGET_I386
390 kvm_tpr_vcpu_start(env);
391 #endif
393 cpu_single_env = env;
394 kvm_load_registers(env);
396 while (1) {
397 while (!has_work(env))
398 kvm_main_loop_wait(env, 1000);
399 if (env->interrupt_request & (CPU_INTERRUPT_HARD | CPU_INTERRUPT_NMI))
400 env->halted = 0;
401 if (!kvm_irqchip_in_kernel(kvm_context)) {
402 if (env->kvm_cpu_state.init)
403 update_regs_for_init(env);
404 if (env->kvm_cpu_state.sipi_needed)
405 update_regs_for_sipi(env);
407 if (!env->halted && !env->kvm_cpu_state.init)
408 kvm_cpu_exec(env);
409 env->exit_request = 0;
410 env->exception_index = EXCP_INTERRUPT;
411 kvm_main_loop_wait(env, 0);
413 pthread_mutex_unlock(&qemu_mutex);
414 return 0;
417 static void *ap_main_loop(void *_env)
419 CPUState *env = _env;
420 sigset_t signals;
421 struct ioperm_data *data = NULL;
423 current_env = env;
424 env->thread_id = kvm_get_thread_id();
425 sigfillset(&signals);
426 sigprocmask(SIG_BLOCK, &signals, NULL);
427 kvm_create_vcpu(kvm_context, env->cpu_index);
428 kvm_qemu_init_env(env);
430 #ifdef USE_KVM_DEVICE_ASSIGNMENT
431 /* do ioperm for io ports of assigned devices */
432 LIST_FOREACH(data, &ioperm_head, entries)
433 on_vcpu(env, kvm_arch_do_ioperm, data);
434 #endif
436 /* signal VCPU creation */
437 pthread_mutex_lock(&qemu_mutex);
438 current_env->kvm_cpu_state.created = 1;
439 pthread_cond_signal(&qemu_vcpu_cond);
441 /* and wait for machine initialization */
442 while (!qemu_system_ready)
443 qemu_cond_wait(&qemu_system_cond);
444 pthread_mutex_unlock(&qemu_mutex);
446 kvm_main_loop_cpu(env);
447 return NULL;
450 void kvm_init_vcpu(CPUState *env)
452 pthread_create(&env->kvm_cpu_state.thread, NULL, ap_main_loop, env);
454 while (env->kvm_cpu_state.created == 0)
455 qemu_cond_wait(&qemu_vcpu_cond);
458 int kvm_init_ap(void)
460 #ifdef TARGET_I386
461 kvm_tpr_opt_setup();
462 #endif
464 signal(SIG_IPI, sig_ipi_handler);
465 return 0;
468 void qemu_kvm_notify_work(void)
470 uint64_t value = 1;
471 char buffer[8];
472 size_t offset = 0;
474 if (io_thread_fd == -1)
475 return;
477 memcpy(buffer, &value, sizeof(value));
479 while (offset < 8) {
480 ssize_t len;
482 len = write(io_thread_fd, buffer + offset, 8 - offset);
483 if (len == -1 && errno == EINTR)
484 continue;
486 if (len <= 0)
487 break;
489 offset += len;
492 if (offset != 8)
493 fprintf(stderr, "failed to notify io thread\n");
496 /* If we have signalfd, we mask out the signals we want to handle and then
497 * use signalfd to listen for them. We rely on whatever the current signal
498 * handler is to dispatch the signals when we receive them.
501 static void sigfd_handler(void *opaque)
503 int fd = (unsigned long)opaque;
504 struct qemu_signalfd_siginfo info;
505 struct sigaction action;
506 ssize_t len;
508 while (1) {
509 do {
510 len = read(fd, &info, sizeof(info));
511 } while (len == -1 && errno == EINTR);
513 if (len == -1 && errno == EAGAIN)
514 break;
516 if (len != sizeof(info)) {
517 printf("read from sigfd returned %ld: %m\n", len);
518 return;
521 sigaction(info.ssi_signo, NULL, &action);
522 if (action.sa_handler)
523 action.sa_handler(info.ssi_signo);
528 /* Used to break IO thread out of select */
529 static void io_thread_wakeup(void *opaque)
531 int fd = (unsigned long)opaque;
532 char buffer[8];
533 size_t offset = 0;
535 while (offset < 8) {
536 ssize_t len;
538 len = read(fd, buffer + offset, 8 - offset);
539 if (len == -1 && errno == EINTR)
540 continue;
542 if (len <= 0)
543 break;
545 offset += len;
549 int kvm_main_loop(void)
551 int fds[2];
552 sigset_t mask;
553 int sigfd;
555 io_thread = pthread_self();
556 qemu_system_ready = 1;
558 if (qemu_eventfd(fds) == -1) {
559 fprintf(stderr, "failed to create eventfd\n");
560 return -errno;
563 qemu_set_fd_handler2(fds[0], NULL, io_thread_wakeup, NULL,
564 (void *)(unsigned long)fds[0]);
566 io_thread_fd = fds[1];
568 sigemptyset(&mask);
569 sigaddset(&mask, SIGIO);
570 sigaddset(&mask, SIGALRM);
571 sigprocmask(SIG_BLOCK, &mask, NULL);
573 sigfd = qemu_signalfd(&mask);
574 if (sigfd == -1) {
575 fprintf(stderr, "failed to create signalfd\n");
576 return -errno;
579 fcntl(sigfd, F_SETFL, O_NONBLOCK);
581 qemu_set_fd_handler2(sigfd, NULL, sigfd_handler, NULL,
582 (void *)(unsigned long)sigfd);
584 pthread_cond_broadcast(&qemu_system_cond);
586 io_thread_sigfd = sigfd;
587 cpu_single_env = NULL;
589 while (1) {
590 main_loop_wait(1000);
591 if (qemu_shutdown_requested())
592 break;
593 else if (qemu_powerdown_requested())
594 qemu_system_powerdown();
595 else if (qemu_reset_requested())
596 qemu_kvm_system_reset();
597 #ifdef CONFIG_GDBSTUB
598 else if (kvm_debug_cpu_requested) {
599 gdb_set_stop_cpu(kvm_debug_cpu_requested);
600 vm_stop(EXCP_DEBUG);
601 kvm_debug_cpu_requested = NULL;
603 #endif
606 qemu_kvm_pause_all_threads();
607 pthread_mutex_unlock(&qemu_mutex);
609 return 0;
612 #ifdef KVM_CAP_SET_GUEST_DEBUG
613 static int kvm_debug(void *opaque, void *data,
614 struct kvm_debug_exit_arch *arch_info)
616 int handle = kvm_arch_debug(arch_info);
617 CPUState *env = data;
619 if (handle) {
620 kvm_debug_cpu_requested = env;
621 env->kvm_cpu_state.stopped = 1;
623 return handle;
625 #endif
627 static int kvm_inb(void *opaque, uint16_t addr, uint8_t *data)
629 *data = cpu_inb(0, addr);
630 return 0;
633 static int kvm_inw(void *opaque, uint16_t addr, uint16_t *data)
635 *data = cpu_inw(0, addr);
636 return 0;
639 static int kvm_inl(void *opaque, uint16_t addr, uint32_t *data)
641 *data = cpu_inl(0, addr);
642 return 0;
645 #define PM_IO_BASE 0xb000
647 static int kvm_outb(void *opaque, uint16_t addr, uint8_t data)
649 if (addr == 0xb2) {
650 switch (data) {
651 case 0: {
652 cpu_outb(0, 0xb3, 0);
653 break;
655 case 0xf0: {
656 unsigned x;
658 /* enable acpi */
659 x = cpu_inw(0, PM_IO_BASE + 4);
660 x &= ~1;
661 cpu_outw(0, PM_IO_BASE + 4, x);
662 break;
664 case 0xf1: {
665 unsigned x;
667 /* enable acpi */
668 x = cpu_inw(0, PM_IO_BASE + 4);
669 x |= 1;
670 cpu_outw(0, PM_IO_BASE + 4, x);
671 break;
673 default:
674 break;
676 return 0;
678 cpu_outb(0, addr, data);
679 return 0;
682 static int kvm_outw(void *opaque, uint16_t addr, uint16_t data)
684 cpu_outw(0, addr, data);
685 return 0;
688 static int kvm_outl(void *opaque, uint16_t addr, uint32_t data)
690 cpu_outl(0, addr, data);
691 return 0;
694 static int kvm_mmio_read(void *opaque, uint64_t addr, uint8_t *data, int len)
696 cpu_physical_memory_rw(addr, data, len, 0);
697 return 0;
700 static int kvm_mmio_write(void *opaque, uint64_t addr, uint8_t *data, int len)
702 cpu_physical_memory_rw(addr, data, len, 1);
703 return 0;
706 static int kvm_io_window(void *opaque)
708 return 1;
712 static int kvm_halt(void *opaque, int vcpu)
714 return kvm_arch_halt(opaque, vcpu);
717 static int kvm_shutdown(void *opaque, void *data)
719 CPUState *env = (CPUState *)data;
721 /* stop the current vcpu from going back to guest mode */
722 env->kvm_cpu_state.stopped = 1;
724 qemu_system_reset_request();
725 return 1;
728 static struct kvm_callbacks qemu_kvm_ops = {
729 #ifdef KVM_CAP_SET_GUEST_DEBUG
730 .debug = kvm_debug,
731 #endif
732 .inb = kvm_inb,
733 .inw = kvm_inw,
734 .inl = kvm_inl,
735 .outb = kvm_outb,
736 .outw = kvm_outw,
737 .outl = kvm_outl,
738 .mmio_read = kvm_mmio_read,
739 .mmio_write = kvm_mmio_write,
740 .halt = kvm_halt,
741 .shutdown = kvm_shutdown,
742 .io_window = kvm_io_window,
743 .try_push_interrupts = try_push_interrupts,
744 #ifdef KVM_CAP_USER_NMI
745 .push_nmi = kvm_arch_push_nmi,
746 #endif
747 .post_kvm_run = post_kvm_run,
748 .pre_kvm_run = pre_kvm_run,
749 #ifdef TARGET_I386
750 .tpr_access = handle_tpr_access,
751 #endif
752 #ifdef TARGET_PPC
753 .powerpc_dcr_read = handle_powerpc_dcr_read,
754 .powerpc_dcr_write = handle_powerpc_dcr_write,
755 #endif
758 int kvm_qemu_init()
760 /* Try to initialize kvm */
761 kvm_context = kvm_init(&qemu_kvm_ops, cpu_single_env);
762 if (!kvm_context) {
763 return -1;
765 pthread_mutex_lock(&qemu_mutex);
767 return 0;
770 #ifdef TARGET_I386
771 static int destroy_region_works = 0;
772 #endif
774 int kvm_qemu_create_context(void)
776 int r;
777 int i;
779 if (!kvm_irqchip) {
780 kvm_disable_irqchip_creation(kvm_context);
782 if (!kvm_pit) {
783 kvm_disable_pit_creation(kvm_context);
785 if (kvm_create(kvm_context, 0, NULL) < 0) {
786 kvm_qemu_destroy();
787 return -1;
789 r = kvm_arch_qemu_create_context();
790 if(r <0)
791 kvm_qemu_destroy();
792 if (kvm_pit && !kvm_pit_reinject) {
793 if (kvm_reinject_control(kvm_context, 0)) {
794 fprintf(stderr, "failure to disable in-kernel PIT reinjection\n");
795 return -1;
798 #ifdef TARGET_I386
799 destroy_region_works = kvm_destroy_memory_region_works(kvm_context);
800 #endif
802 if (kvm_irqchip && kvm_has_gsi_routing(kvm_context)) {
803 kvm_clear_gsi_routes(kvm_context);
804 for (i = 0; i < 8; ++i) {
805 if (i == 2)
806 continue;
807 r = kvm_add_irq_route(kvm_context, i, KVM_IRQCHIP_PIC_MASTER, i);
808 if (r < 0)
809 return r;
811 for (i = 8; i < 16; ++i) {
812 r = kvm_add_irq_route(kvm_context, i, KVM_IRQCHIP_PIC_SLAVE, i - 8);
813 if (r < 0)
814 return r;
816 for (i = 0; i < 24; ++i) {
817 r = kvm_add_irq_route(kvm_context, i, KVM_IRQCHIP_IOAPIC, i);
818 if (r < 0)
819 return r;
821 kvm_commit_irq_routes(kvm_context);
823 return 0;
826 void kvm_qemu_destroy(void)
828 kvm_finalize(kvm_context);
831 #ifdef TARGET_I386
832 static int must_use_aliases_source(target_phys_addr_t addr)
834 if (destroy_region_works)
835 return false;
836 if (addr == 0xa0000 || addr == 0xa8000)
837 return true;
838 return false;
841 static int must_use_aliases_target(target_phys_addr_t addr)
843 if (destroy_region_works)
844 return false;
845 if (addr >= 0xe0000000 && addr < 0x100000000ull)
846 return true;
847 return false;
850 static struct mapping {
851 target_phys_addr_t phys;
852 ram_addr_t ram;
853 ram_addr_t len;
854 } mappings[50];
855 static int nr_mappings;
857 static struct mapping *find_ram_mapping(ram_addr_t ram_addr)
859 struct mapping *p;
861 for (p = mappings; p < mappings + nr_mappings; ++p) {
862 if (p->ram <= ram_addr && ram_addr < p->ram + p->len) {
863 return p;
866 return NULL;
869 static struct mapping *find_mapping(target_phys_addr_t start_addr)
871 struct mapping *p;
873 for (p = mappings; p < mappings + nr_mappings; ++p) {
874 if (p->phys <= start_addr && start_addr < p->phys + p->len) {
875 return p;
878 return NULL;
881 static void drop_mapping(target_phys_addr_t start_addr)
883 struct mapping *p = find_mapping(start_addr);
885 if (p)
886 *p = mappings[--nr_mappings];
888 #endif
890 void kvm_cpu_register_physical_memory(target_phys_addr_t start_addr,
891 unsigned long size,
892 unsigned long phys_offset)
894 int r = 0;
895 unsigned long area_flags;
896 #ifdef TARGET_I386
897 struct mapping *p;
898 #endif
900 if (start_addr + size > phys_ram_size) {
901 phys_ram_size = start_addr + size;
904 phys_offset &= ~IO_MEM_ROM;
905 area_flags = phys_offset & ~TARGET_PAGE_MASK;
907 if (area_flags != IO_MEM_RAM) {
908 #ifdef TARGET_I386
909 if (must_use_aliases_source(start_addr)) {
910 kvm_destroy_memory_alias(kvm_context, start_addr);
911 return;
913 if (must_use_aliases_target(start_addr))
914 return;
915 #endif
916 while (size > 0) {
917 p = find_mapping(start_addr);
918 if (p) {
919 kvm_unregister_memory_area(kvm_context, p->phys, p->len);
920 drop_mapping(p->phys);
922 start_addr += TARGET_PAGE_SIZE;
923 if (size > TARGET_PAGE_SIZE) {
924 size -= TARGET_PAGE_SIZE;
925 } else {
926 size = 0;
929 return;
932 r = kvm_is_containing_region(kvm_context, start_addr, size);
933 if (r)
934 return;
936 if (area_flags >= TLB_MMIO)
937 return;
939 #ifdef TARGET_I386
940 if (must_use_aliases_source(start_addr)) {
941 p = find_ram_mapping(phys_offset);
942 if (p) {
943 kvm_create_memory_alias(kvm_context, start_addr, size,
944 p->phys + (phys_offset - p->ram));
946 return;
948 #endif
950 r = kvm_register_phys_mem(kvm_context, start_addr,
951 qemu_get_ram_ptr(phys_offset),
952 size, 0);
953 if (r < 0) {
954 printf("kvm_cpu_register_physical_memory: failed\n");
955 exit(1);
958 #ifdef TARGET_I386
959 drop_mapping(start_addr);
960 p = &mappings[nr_mappings++];
961 p->phys = start_addr;
962 p->ram = phys_offset;
963 p->len = size;
964 #endif
966 return;
969 void kvm_cpu_unregister_physical_memory(target_phys_addr_t start_addr,
970 target_phys_addr_t size,
971 unsigned long phys_offset)
973 kvm_unregister_memory_area(kvm_context, start_addr, size);
976 int kvm_setup_guest_memory(void *area, unsigned long size)
978 int ret = 0;
980 #ifdef MADV_DONTFORK
981 if (kvm_enabled() && !kvm_has_sync_mmu())
982 ret = madvise(area, size, MADV_DONTFORK);
983 #endif
985 if (ret)
986 perror ("madvise");
988 return ret;
991 int kvm_qemu_check_extension(int ext)
993 return kvm_check_extension(kvm_context, ext);
996 int kvm_qemu_init_env(CPUState *cenv)
998 return kvm_arch_qemu_init_env(cenv);
1001 #ifdef KVM_CAP_SET_GUEST_DEBUG
1002 struct kvm_sw_breakpoint_head kvm_sw_breakpoints =
1003 TAILQ_HEAD_INITIALIZER(kvm_sw_breakpoints);
1005 struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(target_ulong pc)
1007 struct kvm_sw_breakpoint *bp;
1009 TAILQ_FOREACH(bp, &kvm_sw_breakpoints, entry) {
1010 if (bp->pc == pc)
1011 return bp;
1013 return NULL;
1016 struct kvm_set_guest_debug_data {
1017 struct kvm_guest_debug dbg;
1018 int err;
1021 static void kvm_invoke_set_guest_debug(void *data)
1023 struct kvm_set_guest_debug_data *dbg_data = data;
1025 dbg_data->err = kvm_set_guest_debug(kvm_context, cpu_single_env->cpu_index,
1026 &dbg_data->dbg);
1029 int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
1031 struct kvm_set_guest_debug_data data;
1033 data.dbg.control = 0;
1034 if (env->singlestep_enabled)
1035 data.dbg.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
1037 kvm_arch_update_guest_debug(env, &data.dbg);
1038 data.dbg.control |= reinject_trap;
1040 on_vcpu(env, kvm_invoke_set_guest_debug, &data);
1041 return data.err;
1044 int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
1045 target_ulong len, int type)
1047 struct kvm_sw_breakpoint *bp;
1048 CPUState *env;
1049 int err;
1051 if (type == GDB_BREAKPOINT_SW) {
1052 bp = kvm_find_sw_breakpoint(addr);
1053 if (bp) {
1054 bp->use_count++;
1055 return 0;
1058 bp = qemu_malloc(sizeof(struct kvm_sw_breakpoint));
1059 if (!bp)
1060 return -ENOMEM;
1062 bp->pc = addr;
1063 bp->use_count = 1;
1064 err = kvm_arch_insert_sw_breakpoint(current_env, bp);
1065 if (err) {
1066 free(bp);
1067 return err;
1070 TAILQ_INSERT_HEAD(&kvm_sw_breakpoints, bp, entry);
1071 } else {
1072 err = kvm_arch_insert_hw_breakpoint(addr, len, type);
1073 if (err)
1074 return err;
1077 for (env = first_cpu; env != NULL; env = env->next_cpu) {
1078 err = kvm_update_guest_debug(env, 0);
1079 if (err)
1080 return err;
1082 return 0;
1085 int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr,
1086 target_ulong len, int type)
1088 struct kvm_sw_breakpoint *bp;
1089 CPUState *env;
1090 int err;
1092 if (type == GDB_BREAKPOINT_SW) {
1093 bp = kvm_find_sw_breakpoint(addr);
1094 if (!bp)
1095 return -ENOENT;
1097 if (bp->use_count > 1) {
1098 bp->use_count--;
1099 return 0;
1102 err = kvm_arch_remove_sw_breakpoint(current_env, bp);
1103 if (err)
1104 return err;
1106 TAILQ_REMOVE(&kvm_sw_breakpoints, bp, entry);
1107 qemu_free(bp);
1108 } else {
1109 err = kvm_arch_remove_hw_breakpoint(addr, len, type);
1110 if (err)
1111 return err;
1114 for (env = first_cpu; env != NULL; env = env->next_cpu) {
1115 err = kvm_update_guest_debug(env, 0);
1116 if (err)
1117 return err;
1119 return 0;
1122 void kvm_remove_all_breakpoints(CPUState *current_env)
1124 struct kvm_sw_breakpoint *bp, *next;
1125 CPUState *env;
1127 TAILQ_FOREACH_SAFE(bp, &kvm_sw_breakpoints, entry, next) {
1128 if (kvm_arch_remove_sw_breakpoint(current_env, bp) != 0) {
1129 /* Try harder to find a CPU that currently sees the breakpoint. */
1130 for (env = first_cpu; env != NULL; env = env->next_cpu) {
1131 if (kvm_arch_remove_sw_breakpoint(env, bp) == 0)
1132 break;
1136 kvm_arch_remove_all_hw_breakpoints();
1138 for (env = first_cpu; env != NULL; env = env->next_cpu)
1139 kvm_update_guest_debug(env, 0);
1142 #else /* !KVM_CAP_SET_GUEST_DEBUG */
1144 int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
1146 return -EINVAL;
1149 int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
1150 target_ulong len, int type)
1152 return -EINVAL;
1155 int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr,
1156 target_ulong len, int type)
1158 return -EINVAL;
1161 void kvm_remove_all_breakpoints(CPUState *current_env)
1164 #endif /* !KVM_CAP_SET_GUEST_DEBUG */
1167 * dirty pages logging
1169 /* FIXME: use unsigned long pointer instead of unsigned char */
1170 unsigned char *kvm_dirty_bitmap = NULL;
1171 int kvm_physical_memory_set_dirty_tracking(int enable)
1173 int r = 0;
1175 if (!kvm_enabled())
1176 return 0;
1178 if (enable) {
1179 if (!kvm_dirty_bitmap) {
1180 unsigned bitmap_size = BITMAP_SIZE(phys_ram_size);
1181 kvm_dirty_bitmap = qemu_malloc(bitmap_size);
1182 if (kvm_dirty_bitmap == NULL) {
1183 perror("Failed to allocate dirty pages bitmap");
1184 r=-1;
1186 else {
1187 r = kvm_dirty_pages_log_enable_all(kvm_context);
1191 else {
1192 if (kvm_dirty_bitmap) {
1193 r = kvm_dirty_pages_log_reset(kvm_context);
1194 qemu_free(kvm_dirty_bitmap);
1195 kvm_dirty_bitmap = NULL;
1198 return r;
1201 /* get kvm's dirty pages bitmap and update qemu's */
1202 static int kvm_get_dirty_pages_log_range(unsigned long start_addr,
1203 unsigned char *bitmap,
1204 unsigned int offset,
1205 unsigned long mem_size)
1207 unsigned int i, j, n=0;
1208 unsigned char c;
1209 unsigned long page_number, addr, addr1;
1210 ram_addr_t ram_addr;
1211 unsigned int len = ((mem_size/TARGET_PAGE_SIZE) + 7) / 8;
1214 * bitmap-traveling is faster than memory-traveling (for addr...)
1215 * especially when most of the memory is not dirty.
1217 for (i=0; i<len; i++) {
1218 c = bitmap[i];
1219 while (c>0) {
1220 j = ffsl(c) - 1;
1221 c &= ~(1u<<j);
1222 page_number = i * 8 + j;
1223 addr1 = page_number * TARGET_PAGE_SIZE;
1224 addr = offset + addr1;
1225 ram_addr = cpu_get_physical_page_desc(addr);
1226 cpu_physical_memory_set_dirty(ram_addr);
1227 n++;
1230 return 0;
1232 static int kvm_get_dirty_bitmap_cb(unsigned long start, unsigned long len,
1233 void *bitmap, void *opaque)
1235 return kvm_get_dirty_pages_log_range(start, bitmap, start, len);
1239 * get kvm's dirty pages bitmap and update qemu's
1240 * we only care about physical ram, which resides in slots 0 and 3
1242 int kvm_update_dirty_pages_log(void)
1244 int r = 0;
1247 r = kvm_get_dirty_pages_range(kvm_context, 0, phys_ram_size,
1248 kvm_dirty_bitmap, NULL,
1249 kvm_get_dirty_bitmap_cb);
1250 return r;
1253 void kvm_qemu_log_memory(target_phys_addr_t start, target_phys_addr_t size,
1254 int log)
1256 if (log)
1257 kvm_dirty_pages_log_enable_slot(kvm_context, start, size);
1258 else {
1259 #ifdef TARGET_I386
1260 if (must_use_aliases_target(start))
1261 return;
1262 #endif
1263 kvm_dirty_pages_log_disable_slot(kvm_context, start, size);
1267 int kvm_get_phys_ram_page_bitmap(unsigned char *bitmap)
1269 unsigned int bsize = BITMAP_SIZE(phys_ram_size);
1270 unsigned int brsize = BITMAP_SIZE(ram_size);
1271 unsigned int extra_pages = (phys_ram_size - ram_size) / TARGET_PAGE_SIZE;
1272 unsigned int extra_bytes = (extra_pages +7)/8;
1273 unsigned int hole_start = BITMAP_SIZE(0xa0000);
1274 unsigned int hole_end = BITMAP_SIZE(0xc0000);
1276 memset(bitmap, 0xFF, brsize + extra_bytes);
1277 memset(bitmap + hole_start, 0, hole_end - hole_start);
1278 memset(bitmap + brsize + extra_bytes, 0, bsize - brsize - extra_bytes);
1280 return 0;
1283 #ifdef KVM_CAP_IRQCHIP
1285 int kvm_set_irq(int irq, int level, int *status)
1287 return kvm_set_irq_level(kvm_context, irq, level, status);
1290 #endif
1292 int qemu_kvm_get_dirty_pages(unsigned long phys_addr, void *buf)
1294 return kvm_get_dirty_pages(kvm_context, phys_addr, buf);
1297 void *kvm_cpu_create_phys_mem(target_phys_addr_t start_addr,
1298 unsigned long size, int log, int writable)
1300 return kvm_create_phys_mem(kvm_context, start_addr, size, log, writable);
1303 void kvm_cpu_destroy_phys_mem(target_phys_addr_t start_addr,
1304 unsigned long size)
1306 kvm_destroy_phys_mem(kvm_context, start_addr, size);
1309 void kvm_mutex_unlock(void)
1311 assert(!cpu_single_env);
1312 pthread_mutex_unlock(&qemu_mutex);
1315 void kvm_mutex_lock(void)
1317 pthread_mutex_lock(&qemu_mutex);
1318 cpu_single_env = NULL;
1321 int qemu_kvm_register_coalesced_mmio(target_phys_addr_t addr, unsigned int size)
1323 return kvm_register_coalesced_mmio(kvm_context, addr, size);
1326 int qemu_kvm_unregister_coalesced_mmio(target_phys_addr_t addr,
1327 unsigned int size)
1329 return kvm_unregister_coalesced_mmio(kvm_context, addr, size);
1332 int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
1334 return kvm_register_coalesced_mmio(kvm_context, start, size);
1337 int kvm_uncoalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
1339 return kvm_unregister_coalesced_mmio(kvm_context, start, size);
1342 #ifdef USE_KVM_DEVICE_ASSIGNMENT
1343 void kvm_add_ioperm_data(struct ioperm_data *data)
1345 LIST_INSERT_HEAD(&ioperm_head, data, entries);
1348 void kvm_remove_ioperm_data(unsigned long start_port, unsigned long num)
1350 struct ioperm_data *data;
1352 data = LIST_FIRST(&ioperm_head);
1353 while (data) {
1354 struct ioperm_data *next = LIST_NEXT(data, entries);
1356 if (data->start_port == start_port && data->num == num) {
1357 LIST_REMOVE(data, entries);
1358 qemu_free(data);
1361 data = next;
1365 void kvm_ioperm(CPUState *env, void *data)
1367 if (kvm_enabled() && qemu_system_ready)
1368 on_vcpu(env, kvm_arch_do_ioperm, data);
1371 #endif
1373 void kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr, target_phys_addr_t end_addr)
1375 #ifndef TARGET_IA64
1376 void *buf;
1378 #ifdef TARGET_I386
1379 if (must_use_aliases_source(start_addr))
1380 return;
1381 #endif
1383 buf = qemu_malloc((end_addr - start_addr) / 8 + 2);
1384 kvm_get_dirty_pages_range(kvm_context, start_addr, end_addr - start_addr,
1385 buf, NULL, kvm_get_dirty_bitmap_cb);
1386 qemu_free(buf);
1387 #endif
1390 int kvm_log_start(target_phys_addr_t phys_addr, target_phys_addr_t len)
1392 #ifdef TARGET_I386
1393 if (must_use_aliases_source(phys_addr))
1394 return 0;
1395 #endif
1397 #ifndef TARGET_IA64
1398 kvm_qemu_log_memory(phys_addr, len, 1);
1399 #endif
1400 return 0;
1403 int kvm_log_stop(target_phys_addr_t phys_addr, target_phys_addr_t len)
1405 #ifdef TARGET_I386
1406 if (must_use_aliases_source(phys_addr))
1407 return 0;
1408 #endif
1410 #ifndef TARGET_IA64
1411 kvm_qemu_log_memory(phys_addr, len, 0);
1412 #endif
1413 return 0;
1416 /* hack: both libkvm and upstream qemu define kvm_has_sync_mmu(), differently */
1417 #undef kvm_has_sync_mmu
1418 int qemu_kvm_has_sync_mmu(void)
1420 return kvm_has_sync_mmu(kvm_context);
1423 void qemu_kvm_cpu_stop(CPUState *env)
1425 if (kvm_enabled())
1426 env->kvm_cpu_state.stopped = 1;