Merge commit '6c6a58aee425338bf67ec8faffdcda56b0b82090' into upstream-merge
[qemu/qemu-dev-zwu.git] / qemu-kvm-x86.c
blob95b7aa5223e7b255f7d97a5c34a5256cab10807e
1 /*
2 * qemu/kvm integration, x86 specific code
4 * Copyright (C) 2006-2008 Qumranet Technologies
6 * Licensed under the terms of the GNU GPL version 2 or higher.
7 */
9 #include "config.h"
10 #include "config-host.h"
12 #include <string.h>
13 #include "hw/hw.h"
14 #include "gdbstub.h"
15 #include <sys/io.h>
17 #include "qemu-kvm.h"
18 #include "libkvm.h"
19 #include <pthread.h>
20 #include <sys/utsname.h>
21 #include <linux/kvm_para.h>
22 #include <sys/ioctl.h>
24 #include "kvm.h"
25 #include "hw/apic.h"
27 #define MSR_IA32_TSC 0x10
29 static struct kvm_msr_list *kvm_msr_list;
30 extern unsigned int kvm_shadow_memory;
31 static int kvm_has_msr_star;
32 static int kvm_has_vm_hsave_pa;
34 static int lm_capable_kernel;
36 int kvm_set_tss_addr(kvm_context_t kvm, unsigned long addr)
38 int r;
40 * Tell fw_cfg to notify the BIOS to reserve the range.
42 if (e820_add_entry(addr, 0x4000, E820_RESERVED) < 0) {
43 perror("e820_add_entry() table is full");
44 exit(1);
47 r = kvm_vm_ioctl(kvm_state, KVM_SET_TSS_ADDR, addr);
48 if (r < 0) {
49 fprintf(stderr, "kvm_set_tss_addr: %m\n");
50 return r;
52 return 0;
55 static int kvm_init_tss(kvm_context_t kvm)
57 int r;
59 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_SET_TSS_ADDR);
60 if (r > 0) {
62 * this address is 3 pages before the bios, and the bios should present
63 * as unavaible memory
65 r = kvm_set_tss_addr(kvm, 0xfeffd000);
66 if (r < 0) {
67 fprintf(stderr, "kvm_init_tss: unable to set tss addr\n");
68 return r;
70 } else {
71 fprintf(stderr, "kvm does not support KVM_CAP_SET_TSS_ADDR\n");
73 return 0;
76 static int kvm_set_identity_map_addr(kvm_context_t kvm, uint64_t addr)
78 #ifdef KVM_CAP_SET_IDENTITY_MAP_ADDR
79 int r;
81 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_SET_IDENTITY_MAP_ADDR);
82 if (r > 0) {
83 r = kvm_vm_ioctl(kvm_state, KVM_SET_IDENTITY_MAP_ADDR, &addr);
84 if (r == -1) {
85 fprintf(stderr, "kvm_set_identity_map_addr: %m\n");
86 return -errno;
88 return 0;
90 #endif
91 return -ENOSYS;
94 static int kvm_init_identity_map_page(kvm_context_t kvm)
96 #ifdef KVM_CAP_SET_IDENTITY_MAP_ADDR
97 int r;
99 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_SET_IDENTITY_MAP_ADDR);
100 if (r > 0) {
102 * this address is 4 pages before the bios, and the bios should present
103 * as unavaible memory
105 r = kvm_set_identity_map_addr(kvm, 0xfeffc000);
106 if (r < 0) {
107 fprintf(stderr, "kvm_init_identity_map_page: "
108 "unable to set identity mapping addr\n");
109 return r;
113 #endif
114 return 0;
117 static int kvm_create_pit(kvm_context_t kvm)
119 #ifdef KVM_CAP_PIT
120 int r;
122 kvm_state->pit_in_kernel = 0;
123 if (!kvm->no_pit_creation) {
124 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_PIT);
125 if (r > 0) {
126 r = kvm_vm_ioctl(kvm_state, KVM_CREATE_PIT);
127 if (r >= 0)
128 kvm_state->pit_in_kernel = 1;
129 else {
130 fprintf(stderr, "Create kernel PIC irqchip failed\n");
131 return r;
135 #endif
136 return 0;
139 int kvm_arch_create(kvm_context_t kvm, unsigned long phys_mem_bytes,
140 void **vm_mem)
142 int r = 0;
144 r = kvm_init_tss(kvm);
145 if (r < 0)
146 return r;
148 r = kvm_init_identity_map_page(kvm);
149 if (r < 0)
150 return r;
152 r = kvm_create_pit(kvm);
153 if (r < 0)
154 return r;
156 r = kvm_init_coalesced_mmio(kvm);
157 if (r < 0)
158 return r;
160 return 0;
163 #ifdef KVM_EXIT_TPR_ACCESS
165 static int kvm_handle_tpr_access(CPUState *env)
167 struct kvm_run *run = env->kvm_run;
168 kvm_tpr_access_report(env,
169 run->tpr_access.rip,
170 run->tpr_access.is_write);
171 return 0;
175 int kvm_enable_vapic(CPUState *env, uint64_t vapic)
177 struct kvm_vapic_addr va = {
178 .vapic_addr = vapic,
181 return kvm_vcpu_ioctl(env, KVM_SET_VAPIC_ADDR, &va);
184 #endif
186 int kvm_arch_run(CPUState *env)
188 int r = 0;
189 struct kvm_run *run = env->kvm_run;
192 switch (run->exit_reason) {
193 #ifdef KVM_EXIT_SET_TPR
194 case KVM_EXIT_SET_TPR:
195 break;
196 #endif
197 #ifdef KVM_EXIT_TPR_ACCESS
198 case KVM_EXIT_TPR_ACCESS:
199 r = kvm_handle_tpr_access(env);
200 break;
201 #endif
202 default:
203 r = 1;
204 break;
207 return r;
210 #ifdef KVM_CAP_IRQCHIP
212 int kvm_get_lapic(CPUState *env, struct kvm_lapic_state *s)
214 int r = 0;
216 if (!kvm_irqchip_in_kernel())
217 return r;
219 r = kvm_vcpu_ioctl(env, KVM_GET_LAPIC, s);
220 if (r < 0)
221 fprintf(stderr, "KVM_GET_LAPIC failed\n");
222 return r;
225 int kvm_set_lapic(CPUState *env, struct kvm_lapic_state *s)
227 int r = 0;
229 if (!kvm_irqchip_in_kernel())
230 return 0;
232 r = kvm_vcpu_ioctl(env, KVM_SET_LAPIC, s);
234 if (r < 0)
235 fprintf(stderr, "KVM_SET_LAPIC failed\n");
236 return r;
239 #endif
241 #ifdef KVM_CAP_PIT
243 int kvm_get_pit(kvm_context_t kvm, struct kvm_pit_state *s)
245 if (!kvm_pit_in_kernel())
246 return 0;
247 return kvm_vm_ioctl(kvm_state, KVM_GET_PIT, s);
250 int kvm_set_pit(kvm_context_t kvm, struct kvm_pit_state *s)
252 if (!kvm_pit_in_kernel())
253 return 0;
254 return kvm_vm_ioctl(kvm_state, KVM_SET_PIT, s);
257 #ifdef KVM_CAP_PIT_STATE2
258 int kvm_get_pit2(kvm_context_t kvm, struct kvm_pit_state2 *ps2)
260 if (!kvm_pit_in_kernel())
261 return 0;
262 return kvm_vm_ioctl(kvm_state, KVM_GET_PIT2, ps2);
265 int kvm_set_pit2(kvm_context_t kvm, struct kvm_pit_state2 *ps2)
267 if (!kvm_pit_in_kernel())
268 return 0;
269 return kvm_vm_ioctl(kvm_state, KVM_SET_PIT2, ps2);
272 #endif
273 #endif
275 int kvm_has_pit_state2(kvm_context_t kvm)
277 int r = 0;
279 #ifdef KVM_CAP_PIT_STATE2
280 r = kvm_check_extension(kvm_state, KVM_CAP_PIT_STATE2);
281 #endif
282 return r;
285 void kvm_show_code(CPUState *env)
287 #define SHOW_CODE_LEN 50
288 struct kvm_regs regs;
289 struct kvm_sregs sregs;
290 int r, n;
291 int back_offset;
292 unsigned char code;
293 char code_str[SHOW_CODE_LEN * 3 + 1];
294 unsigned long rip;
296 r = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
297 if (r < 0 ) {
298 perror("KVM_GET_SREGS");
299 return;
301 r = kvm_vcpu_ioctl(env, KVM_GET_REGS, &regs);
302 if (r < 0) {
303 perror("KVM_GET_REGS");
304 return;
306 rip = sregs.cs.base + regs.rip;
307 back_offset = regs.rip;
308 if (back_offset > 20)
309 back_offset = 20;
310 *code_str = 0;
311 for (n = -back_offset; n < SHOW_CODE_LEN-back_offset; ++n) {
312 if (n == 0)
313 strcat(code_str, " -->");
314 cpu_physical_memory_rw(rip + n, &code, 1, 1);
315 sprintf(code_str + strlen(code_str), " %02x", code);
317 fprintf(stderr, "code:%s\n", code_str);
322 * Returns available msr list. User must free.
324 static struct kvm_msr_list *kvm_get_msr_list(void)
326 struct kvm_msr_list sizer, *msrs;
327 int r;
329 sizer.nmsrs = 0;
330 r = kvm_ioctl(kvm_state, KVM_GET_MSR_INDEX_LIST, &sizer);
331 if (r < 0 && r != -E2BIG)
332 return NULL;
333 /* Old kernel modules had a bug and could write beyond the provided
334 memory. Allocate at least a safe amount of 1K. */
335 msrs = qemu_malloc(MAX(1024, sizeof(*msrs) +
336 sizer.nmsrs * sizeof(*msrs->indices)));
338 msrs->nmsrs = sizer.nmsrs;
339 r = kvm_ioctl(kvm_state, KVM_GET_MSR_INDEX_LIST, msrs);
340 if (r < 0) {
341 free(msrs);
342 errno = r;
343 return NULL;
345 return msrs;
348 int kvm_get_msrs(CPUState *env, struct kvm_msr_entry *msrs, int n)
350 struct kvm_msrs *kmsrs = qemu_malloc(sizeof *kmsrs + n * sizeof *msrs);
351 int r;
353 kmsrs->nmsrs = n;
354 memcpy(kmsrs->entries, msrs, n * sizeof *msrs);
355 r = kvm_vcpu_ioctl(env, KVM_GET_MSRS, kmsrs);
356 memcpy(msrs, kmsrs->entries, n * sizeof *msrs);
357 free(kmsrs);
358 return r;
361 int kvm_set_msrs(CPUState *env, struct kvm_msr_entry *msrs, int n)
363 struct kvm_msrs *kmsrs = qemu_malloc(sizeof *kmsrs + n * sizeof *msrs);
364 int r;
366 kmsrs->nmsrs = n;
367 memcpy(kmsrs->entries, msrs, n * sizeof *msrs);
368 r = kvm_vcpu_ioctl(env, KVM_SET_MSRS, kmsrs);
369 free(kmsrs);
370 return r;
373 int kvm_get_mce_cap_supported(kvm_context_t kvm, uint64_t *mce_cap,
374 int *max_banks)
376 #ifdef KVM_CAP_MCE
377 int r;
379 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_MCE);
380 if (r > 0) {
381 *max_banks = r;
382 return kvm_ioctl(kvm_state, KVM_X86_GET_MCE_CAP_SUPPORTED, mce_cap);
384 #endif
385 return -ENOSYS;
388 int kvm_setup_mce(CPUState *env, uint64_t *mcg_cap)
390 #ifdef KVM_CAP_MCE
391 return kvm_vcpu_ioctl(env, KVM_X86_SETUP_MCE, mcg_cap);
392 #else
393 return -ENOSYS;
394 #endif
397 int kvm_set_mce(CPUState *env, struct kvm_x86_mce *m)
399 #ifdef KVM_CAP_MCE
400 return kvm_vcpu_ioctl(env, KVM_X86_SET_MCE, m);
401 #else
402 return -ENOSYS;
403 #endif
406 static void print_seg(FILE *file, const char *name, struct kvm_segment *seg)
408 fprintf(stderr,
409 "%s %04x (%08llx/%08x p %d dpl %d db %d s %d type %x l %d"
410 " g %d avl %d)\n",
411 name, seg->selector, seg->base, seg->limit, seg->present,
412 seg->dpl, seg->db, seg->s, seg->type, seg->l, seg->g,
413 seg->avl);
416 static void print_dt(FILE *file, const char *name, struct kvm_dtable *dt)
418 fprintf(stderr, "%s %llx/%x\n", name, dt->base, dt->limit);
421 void kvm_show_regs(CPUState *env)
423 struct kvm_regs regs;
424 struct kvm_sregs sregs;
425 int r;
427 r = kvm_vcpu_ioctl(env, KVM_GET_REGS, &regs);
428 if (r < 0) {
429 perror("KVM_GET_REGS");
430 return;
432 fprintf(stderr,
433 "rax %016llx rbx %016llx rcx %016llx rdx %016llx\n"
434 "rsi %016llx rdi %016llx rsp %016llx rbp %016llx\n"
435 "r8 %016llx r9 %016llx r10 %016llx r11 %016llx\n"
436 "r12 %016llx r13 %016llx r14 %016llx r15 %016llx\n"
437 "rip %016llx rflags %08llx\n",
438 regs.rax, regs.rbx, regs.rcx, regs.rdx,
439 regs.rsi, regs.rdi, regs.rsp, regs.rbp,
440 regs.r8, regs.r9, regs.r10, regs.r11,
441 regs.r12, regs.r13, regs.r14, regs.r15,
442 regs.rip, regs.rflags);
443 r = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
444 if (r < 0) {
445 perror("KVM_GET_SREGS");
446 return;
448 print_seg(stderr, "cs", &sregs.cs);
449 print_seg(stderr, "ds", &sregs.ds);
450 print_seg(stderr, "es", &sregs.es);
451 print_seg(stderr, "ss", &sregs.ss);
452 print_seg(stderr, "fs", &sregs.fs);
453 print_seg(stderr, "gs", &sregs.gs);
454 print_seg(stderr, "tr", &sregs.tr);
455 print_seg(stderr, "ldt", &sregs.ldt);
456 print_dt(stderr, "gdt", &sregs.gdt);
457 print_dt(stderr, "idt", &sregs.idt);
458 fprintf(stderr, "cr0 %llx cr2 %llx cr3 %llx cr4 %llx cr8 %llx"
459 " efer %llx\n",
460 sregs.cr0, sregs.cr2, sregs.cr3, sregs.cr4, sregs.cr8,
461 sregs.efer);
464 static void kvm_set_cr8(CPUState *env, uint64_t cr8)
466 env->kvm_run->cr8 = cr8;
469 int kvm_setup_cpuid(CPUState *env, int nent,
470 struct kvm_cpuid_entry *entries)
472 struct kvm_cpuid *cpuid;
473 int r;
475 cpuid = qemu_malloc(sizeof(*cpuid) + nent * sizeof(*entries));
477 cpuid->nent = nent;
478 memcpy(cpuid->entries, entries, nent * sizeof(*entries));
479 r = kvm_vcpu_ioctl(env, KVM_SET_CPUID, cpuid);
481 free(cpuid);
482 return r;
485 int kvm_setup_cpuid2(CPUState *env, int nent,
486 struct kvm_cpuid_entry2 *entries)
488 struct kvm_cpuid2 *cpuid;
489 int r;
491 cpuid = qemu_malloc(sizeof(*cpuid) + nent * sizeof(*entries));
493 cpuid->nent = nent;
494 memcpy(cpuid->entries, entries, nent * sizeof(*entries));
495 r = kvm_vcpu_ioctl(env, KVM_SET_CPUID2, cpuid);
496 free(cpuid);
497 return r;
500 int kvm_set_shadow_pages(kvm_context_t kvm, unsigned int nrshadow_pages)
502 #ifdef KVM_CAP_MMU_SHADOW_CACHE_CONTROL
503 int r;
505 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION,
506 KVM_CAP_MMU_SHADOW_CACHE_CONTROL);
507 if (r > 0) {
508 r = kvm_vm_ioctl(kvm_state, KVM_SET_NR_MMU_PAGES, nrshadow_pages);
509 if (r < 0) {
510 fprintf(stderr, "kvm_set_shadow_pages: %m\n");
511 return r;
513 return 0;
515 #endif
516 return -1;
519 int kvm_get_shadow_pages(kvm_context_t kvm, unsigned int *nrshadow_pages)
521 #ifdef KVM_CAP_MMU_SHADOW_CACHE_CONTROL
522 int r;
524 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION,
525 KVM_CAP_MMU_SHADOW_CACHE_CONTROL);
526 if (r > 0) {
527 *nrshadow_pages = kvm_vm_ioctl(kvm_state, KVM_GET_NR_MMU_PAGES);
528 return 0;
530 #endif
531 return -1;
534 #ifdef KVM_CAP_VAPIC
535 static int kvm_enable_tpr_access_reporting(CPUState *env)
537 int r;
538 struct kvm_tpr_access_ctl tac = { .enabled = 1 };
540 r = kvm_ioctl(env->kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_VAPIC);
541 if (r <= 0)
542 return -ENOSYS;
543 return kvm_vcpu_ioctl(env, KVM_TPR_ACCESS_REPORTING, &tac);
545 #endif
547 #ifdef KVM_CAP_ADJUST_CLOCK
548 static struct kvm_clock_data kvmclock_data;
550 static void kvmclock_pre_save(void *opaque)
552 struct kvm_clock_data *cl = opaque;
554 kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, cl);
557 static int kvmclock_post_load(void *opaque, int version_id)
559 struct kvm_clock_data *cl = opaque;
561 return kvm_vm_ioctl(kvm_state, KVM_SET_CLOCK, cl);
564 static const VMStateDescription vmstate_kvmclock= {
565 .name = "kvmclock",
566 .version_id = 1,
567 .minimum_version_id = 1,
568 .minimum_version_id_old = 1,
569 .pre_save = kvmclock_pre_save,
570 .post_load = kvmclock_post_load,
571 .fields = (VMStateField []) {
572 VMSTATE_U64(clock, struct kvm_clock_data),
573 VMSTATE_END_OF_LIST()
576 #endif
578 int kvm_arch_qemu_create_context(void)
580 int i, r;
581 struct utsname utsname;
583 uname(&utsname);
584 lm_capable_kernel = strcmp(utsname.machine, "x86_64") == 0;
586 if (kvm_shadow_memory)
587 kvm_set_shadow_pages(kvm_context, kvm_shadow_memory);
589 kvm_msr_list = kvm_get_msr_list();
590 if (!kvm_msr_list)
591 return -1;
592 for (i = 0; i < kvm_msr_list->nmsrs; ++i) {
593 if (kvm_msr_list->indices[i] == MSR_STAR)
594 kvm_has_msr_star = 1;
595 if (kvm_msr_list->indices[i] == MSR_VM_HSAVE_PA)
596 kvm_has_vm_hsave_pa = 1;
599 #ifdef KVM_CAP_ADJUST_CLOCK
600 if (kvm_check_extension(kvm_state, KVM_CAP_ADJUST_CLOCK))
601 vmstate_register(0, &vmstate_kvmclock, &kvmclock_data);
602 #endif
604 r = kvm_set_boot_cpu_id(0);
605 if (r < 0 && r != -ENOSYS) {
606 return r;
609 return 0;
612 /* returns 0 on success, non-0 on failure */
613 static int get_msr_entry(struct kvm_msr_entry *entry, CPUState *env)
615 switch (entry->index) {
616 case MSR_IA32_SYSENTER_CS:
617 env->sysenter_cs = entry->data;
618 break;
619 case MSR_IA32_SYSENTER_ESP:
620 env->sysenter_esp = entry->data;
621 break;
622 case MSR_IA32_SYSENTER_EIP:
623 env->sysenter_eip = entry->data;
624 break;
625 case MSR_STAR:
626 env->star = entry->data;
627 break;
628 #ifdef TARGET_X86_64
629 case MSR_CSTAR:
630 env->cstar = entry->data;
631 break;
632 case MSR_KERNELGSBASE:
633 env->kernelgsbase = entry->data;
634 break;
635 case MSR_FMASK:
636 env->fmask = entry->data;
637 break;
638 case MSR_LSTAR:
639 env->lstar = entry->data;
640 break;
641 #endif
642 case MSR_IA32_TSC:
643 env->tsc = entry->data;
644 break;
645 case MSR_VM_HSAVE_PA:
646 env->vm_hsave = entry->data;
647 break;
648 case MSR_KVM_SYSTEM_TIME:
649 env->system_time_msr = entry->data;
650 break;
651 case MSR_KVM_WALL_CLOCK:
652 env->wall_clock_msr = entry->data;
653 break;
654 #ifdef KVM_CAP_MCE
655 case MSR_MCG_STATUS:
656 env->mcg_status = entry->data;
657 break;
658 case MSR_MCG_CTL:
659 env->mcg_ctl = entry->data;
660 break;
661 #endif
662 default:
663 #ifdef KVM_CAP_MCE
664 if (entry->index >= MSR_MC0_CTL && \
665 entry->index < MSR_MC0_CTL + (env->mcg_cap & 0xff) * 4) {
666 env->mce_banks[entry->index - MSR_MC0_CTL] = entry->data;
667 break;
669 #endif
670 printf("Warning unknown msr index 0x%x\n", entry->index);
671 return 1;
673 return 0;
676 static void kvm_arch_save_mpstate(CPUState *env)
678 #ifdef KVM_CAP_MP_STATE
679 int r;
680 struct kvm_mp_state mp_state;
682 r = kvm_get_mpstate(env, &mp_state);
683 if (r < 0) {
684 env->mp_state = -1;
685 } else {
686 env->mp_state = mp_state.mp_state;
687 if (kvm_irqchip_in_kernel()) {
688 env->halted = (env->mp_state == KVM_MP_STATE_HALTED);
691 #else
692 env->mp_state = -1;
693 #endif
696 static void kvm_arch_load_mpstate(CPUState *env)
698 #ifdef KVM_CAP_MP_STATE
699 struct kvm_mp_state mp_state;
702 * -1 indicates that the host did not support GET_MP_STATE ioctl,
703 * so don't touch it.
705 if (env->mp_state != -1) {
706 mp_state.mp_state = env->mp_state;
707 kvm_set_mpstate(env, &mp_state);
709 #endif
712 static void kvm_reset_mpstate(CPUState *env)
714 #ifdef KVM_CAP_MP_STATE
715 if (kvm_check_extension(kvm_state, KVM_CAP_MP_STATE)) {
716 if (kvm_irqchip_in_kernel()) {
717 env->mp_state = cpu_is_bsp(env) ? KVM_MP_STATE_RUNNABLE :
718 KVM_MP_STATE_UNINITIALIZED;
719 } else {
720 env->mp_state = KVM_MP_STATE_RUNNABLE;
723 #endif
726 static void set_v8086_seg(struct kvm_segment *lhs, const SegmentCache *rhs)
728 lhs->selector = rhs->selector;
729 lhs->base = rhs->base;
730 lhs->limit = rhs->limit;
731 lhs->type = 3;
732 lhs->present = 1;
733 lhs->dpl = 3;
734 lhs->db = 0;
735 lhs->s = 1;
736 lhs->l = 0;
737 lhs->g = 0;
738 lhs->avl = 0;
739 lhs->unusable = 0;
742 static void set_seg(struct kvm_segment *lhs, const SegmentCache *rhs)
744 unsigned flags = rhs->flags;
745 lhs->selector = rhs->selector;
746 lhs->base = rhs->base;
747 lhs->limit = rhs->limit;
748 lhs->type = (flags >> DESC_TYPE_SHIFT) & 15;
749 lhs->present = (flags & DESC_P_MASK) != 0;
750 lhs->dpl = rhs->selector & 3;
751 lhs->db = (flags >> DESC_B_SHIFT) & 1;
752 lhs->s = (flags & DESC_S_MASK) != 0;
753 lhs->l = (flags >> DESC_L_SHIFT) & 1;
754 lhs->g = (flags & DESC_G_MASK) != 0;
755 lhs->avl = (flags & DESC_AVL_MASK) != 0;
756 lhs->unusable = 0;
759 static void get_seg(SegmentCache *lhs, const struct kvm_segment *rhs)
761 lhs->selector = rhs->selector;
762 lhs->base = rhs->base;
763 lhs->limit = rhs->limit;
764 lhs->flags =
765 (rhs->type << DESC_TYPE_SHIFT)
766 | (rhs->present * DESC_P_MASK)
767 | (rhs->dpl << DESC_DPL_SHIFT)
768 | (rhs->db << DESC_B_SHIFT)
769 | (rhs->s * DESC_S_MASK)
770 | (rhs->l << DESC_L_SHIFT)
771 | (rhs->g * DESC_G_MASK)
772 | (rhs->avl * DESC_AVL_MASK);
775 void kvm_arch_load_regs(CPUState *env, int level)
777 struct kvm_regs regs;
778 struct kvm_fpu fpu;
779 struct kvm_sregs sregs;
780 struct kvm_msr_entry msrs[100];
781 int rc, n, i;
783 assert(kvm_cpu_is_stopped(env) || env->thread_id == kvm_get_thread_id());
785 regs.rax = env->regs[R_EAX];
786 regs.rbx = env->regs[R_EBX];
787 regs.rcx = env->regs[R_ECX];
788 regs.rdx = env->regs[R_EDX];
789 regs.rsi = env->regs[R_ESI];
790 regs.rdi = env->regs[R_EDI];
791 regs.rsp = env->regs[R_ESP];
792 regs.rbp = env->regs[R_EBP];
793 #ifdef TARGET_X86_64
794 regs.r8 = env->regs[8];
795 regs.r9 = env->regs[9];
796 regs.r10 = env->regs[10];
797 regs.r11 = env->regs[11];
798 regs.r12 = env->regs[12];
799 regs.r13 = env->regs[13];
800 regs.r14 = env->regs[14];
801 regs.r15 = env->regs[15];
802 #endif
804 regs.rflags = env->eflags;
805 regs.rip = env->eip;
807 kvm_set_regs(env, &regs);
809 memset(&fpu, 0, sizeof fpu);
810 fpu.fsw = env->fpus & ~(7 << 11);
811 fpu.fsw |= (env->fpstt & 7) << 11;
812 fpu.fcw = env->fpuc;
813 for (i = 0; i < 8; ++i)
814 fpu.ftwx |= (!env->fptags[i]) << i;
815 memcpy(fpu.fpr, env->fpregs, sizeof env->fpregs);
816 memcpy(fpu.xmm, env->xmm_regs, sizeof env->xmm_regs);
817 fpu.mxcsr = env->mxcsr;
818 kvm_set_fpu(env, &fpu);
820 memset(sregs.interrupt_bitmap, 0, sizeof(sregs.interrupt_bitmap));
821 if (env->interrupt_injected >= 0) {
822 sregs.interrupt_bitmap[env->interrupt_injected / 64] |=
823 (uint64_t)1 << (env->interrupt_injected % 64);
826 if ((env->eflags & VM_MASK)) {
827 set_v8086_seg(&sregs.cs, &env->segs[R_CS]);
828 set_v8086_seg(&sregs.ds, &env->segs[R_DS]);
829 set_v8086_seg(&sregs.es, &env->segs[R_ES]);
830 set_v8086_seg(&sregs.fs, &env->segs[R_FS]);
831 set_v8086_seg(&sregs.gs, &env->segs[R_GS]);
832 set_v8086_seg(&sregs.ss, &env->segs[R_SS]);
833 } else {
834 set_seg(&sregs.cs, &env->segs[R_CS]);
835 set_seg(&sregs.ds, &env->segs[R_DS]);
836 set_seg(&sregs.es, &env->segs[R_ES]);
837 set_seg(&sregs.fs, &env->segs[R_FS]);
838 set_seg(&sregs.gs, &env->segs[R_GS]);
839 set_seg(&sregs.ss, &env->segs[R_SS]);
841 if (env->cr[0] & CR0_PE_MASK) {
842 /* force ss cpl to cs cpl */
843 sregs.ss.selector = (sregs.ss.selector & ~3) |
844 (sregs.cs.selector & 3);
845 sregs.ss.dpl = sregs.ss.selector & 3;
849 set_seg(&sregs.tr, &env->tr);
850 set_seg(&sregs.ldt, &env->ldt);
852 sregs.idt.limit = env->idt.limit;
853 sregs.idt.base = env->idt.base;
854 sregs.gdt.limit = env->gdt.limit;
855 sregs.gdt.base = env->gdt.base;
857 sregs.cr0 = env->cr[0];
858 sregs.cr2 = env->cr[2];
859 sregs.cr3 = env->cr[3];
860 sregs.cr4 = env->cr[4];
862 sregs.cr8 = cpu_get_apic_tpr(env);
863 sregs.apic_base = cpu_get_apic_base(env);
865 sregs.efer = env->efer;
867 kvm_set_sregs(env, &sregs);
869 /* msrs */
870 n = 0;
871 /* Remember to increase msrs size if you add new registers below */
872 kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_CS, env->sysenter_cs);
873 kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp);
874 kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip);
875 if (kvm_has_msr_star)
876 kvm_msr_entry_set(&msrs[n++], MSR_STAR, env->star);
877 if (kvm_has_vm_hsave_pa)
878 kvm_msr_entry_set(&msrs[n++], MSR_VM_HSAVE_PA, env->vm_hsave);
879 #ifdef TARGET_X86_64
880 if (lm_capable_kernel) {
881 kvm_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar);
882 kvm_msr_entry_set(&msrs[n++], MSR_KERNELGSBASE, env->kernelgsbase);
883 kvm_msr_entry_set(&msrs[n++], MSR_FMASK, env->fmask);
884 kvm_msr_entry_set(&msrs[n++], MSR_LSTAR , env->lstar);
886 #endif
887 if (level == KVM_PUT_FULL_STATE) {
889 * KVM is yet unable to synchronize TSC values of multiple VCPUs on
890 * writeback. Until this is fixed, we only write the offset to SMP
891 * guests after migration, desynchronizing the VCPUs, but avoiding
892 * huge jump-backs that would occur without any writeback at all.
894 if (smp_cpus == 1 || env->tsc != 0) {
895 kvm_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc);
897 kvm_msr_entry_set(&msrs[n++], MSR_KVM_SYSTEM_TIME, env->system_time_msr);
898 kvm_msr_entry_set(&msrs[n++], MSR_KVM_WALL_CLOCK, env->wall_clock_msr);
900 #ifdef KVM_CAP_MCE
901 if (env->mcg_cap) {
902 if (level == KVM_PUT_RESET_STATE)
903 kvm_msr_entry_set(&msrs[n++], MSR_MCG_STATUS, env->mcg_status);
904 else if (level == KVM_PUT_FULL_STATE) {
905 kvm_msr_entry_set(&msrs[n++], MSR_MCG_STATUS, env->mcg_status);
906 kvm_msr_entry_set(&msrs[n++], MSR_MCG_CTL, env->mcg_ctl);
907 for (i = 0; i < (env->mcg_cap & 0xff); i++)
908 kvm_msr_entry_set(&msrs[n++], MSR_MC0_CTL + i, env->mce_banks[i]);
911 #endif
913 rc = kvm_set_msrs(env, msrs, n);
914 if (rc == -1)
915 perror("kvm_set_msrs FAILED");
917 if (level >= KVM_PUT_RESET_STATE) {
918 kvm_arch_load_mpstate(env);
919 kvm_load_lapic(env);
921 if (level == KVM_PUT_FULL_STATE) {
922 if (env->kvm_vcpu_update_vapic)
923 kvm_tpr_enable_vapic(env);
926 kvm_put_vcpu_events(env, level);
927 kvm_put_debugregs(env);
929 /* must be last */
930 kvm_guest_debug_workarounds(env);
933 void kvm_arch_save_regs(CPUState *env)
935 struct kvm_regs regs;
936 struct kvm_fpu fpu;
937 struct kvm_sregs sregs;
938 struct kvm_msr_entry msrs[100];
939 uint32_t hflags;
940 uint32_t i, n, rc, bit;
942 assert(kvm_cpu_is_stopped(env) || env->thread_id == kvm_get_thread_id());
944 kvm_get_regs(env, &regs);
946 env->regs[R_EAX] = regs.rax;
947 env->regs[R_EBX] = regs.rbx;
948 env->regs[R_ECX] = regs.rcx;
949 env->regs[R_EDX] = regs.rdx;
950 env->regs[R_ESI] = regs.rsi;
951 env->regs[R_EDI] = regs.rdi;
952 env->regs[R_ESP] = regs.rsp;
953 env->regs[R_EBP] = regs.rbp;
954 #ifdef TARGET_X86_64
955 env->regs[8] = regs.r8;
956 env->regs[9] = regs.r9;
957 env->regs[10] = regs.r10;
958 env->regs[11] = regs.r11;
959 env->regs[12] = regs.r12;
960 env->regs[13] = regs.r13;
961 env->regs[14] = regs.r14;
962 env->regs[15] = regs.r15;
963 #endif
965 env->eflags = regs.rflags;
966 env->eip = regs.rip;
968 kvm_get_fpu(env, &fpu);
969 env->fpstt = (fpu.fsw >> 11) & 7;
970 env->fpus = fpu.fsw;
971 env->fpuc = fpu.fcw;
972 for (i = 0; i < 8; ++i)
973 env->fptags[i] = !((fpu.ftwx >> i) & 1);
974 memcpy(env->fpregs, fpu.fpr, sizeof env->fpregs);
975 memcpy(env->xmm_regs, fpu.xmm, sizeof env->xmm_regs);
976 env->mxcsr = fpu.mxcsr;
978 kvm_get_sregs(env, &sregs);
980 /* There can only be one pending IRQ set in the bitmap at a time, so try
981 to find it and save its number instead (-1 for none). */
982 env->interrupt_injected = -1;
983 for (i = 0; i < ARRAY_SIZE(sregs.interrupt_bitmap); i++) {
984 if (sregs.interrupt_bitmap[i]) {
985 bit = ctz64(sregs.interrupt_bitmap[i]);
986 env->interrupt_injected = i * 64 + bit;
987 break;
991 get_seg(&env->segs[R_CS], &sregs.cs);
992 get_seg(&env->segs[R_DS], &sregs.ds);
993 get_seg(&env->segs[R_ES], &sregs.es);
994 get_seg(&env->segs[R_FS], &sregs.fs);
995 get_seg(&env->segs[R_GS], &sregs.gs);
996 get_seg(&env->segs[R_SS], &sregs.ss);
998 get_seg(&env->tr, &sregs.tr);
999 get_seg(&env->ldt, &sregs.ldt);
1001 env->idt.limit = sregs.idt.limit;
1002 env->idt.base = sregs.idt.base;
1003 env->gdt.limit = sregs.gdt.limit;
1004 env->gdt.base = sregs.gdt.base;
1006 env->cr[0] = sregs.cr0;
1007 env->cr[2] = sregs.cr2;
1008 env->cr[3] = sregs.cr3;
1009 env->cr[4] = sregs.cr4;
1011 cpu_set_apic_base(env, sregs.apic_base);
1013 env->efer = sregs.efer;
1014 //cpu_set_apic_tpr(env, sregs.cr8);
1016 #define HFLAG_COPY_MASK ~( \
1017 HF_CPL_MASK | HF_PE_MASK | HF_MP_MASK | HF_EM_MASK | \
1018 HF_TS_MASK | HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK | \
1019 HF_OSFXSR_MASK | HF_LMA_MASK | HF_CS32_MASK | \
1020 HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK)
1024 hflags = (env->segs[R_CS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK;
1025 hflags |= (env->cr[0] & CR0_PE_MASK) << (HF_PE_SHIFT - CR0_PE_SHIFT);
1026 hflags |= (env->cr[0] << (HF_MP_SHIFT - CR0_MP_SHIFT)) &
1027 (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK);
1028 hflags |= (env->eflags & (HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK));
1029 hflags |= (env->cr[4] & CR4_OSFXSR_MASK) <<
1030 (HF_OSFXSR_SHIFT - CR4_OSFXSR_SHIFT);
1032 if (env->efer & MSR_EFER_LMA) {
1033 hflags |= HF_LMA_MASK;
1036 if ((hflags & HF_LMA_MASK) && (env->segs[R_CS].flags & DESC_L_MASK)) {
1037 hflags |= HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK;
1038 } else {
1039 hflags |= (env->segs[R_CS].flags & DESC_B_MASK) >>
1040 (DESC_B_SHIFT - HF_CS32_SHIFT);
1041 hflags |= (env->segs[R_SS].flags & DESC_B_MASK) >>
1042 (DESC_B_SHIFT - HF_SS32_SHIFT);
1043 if (!(env->cr[0] & CR0_PE_MASK) ||
1044 (env->eflags & VM_MASK) ||
1045 !(hflags & HF_CS32_MASK)) {
1046 hflags |= HF_ADDSEG_MASK;
1047 } else {
1048 hflags |= ((env->segs[R_DS].base |
1049 env->segs[R_ES].base |
1050 env->segs[R_SS].base) != 0) <<
1051 HF_ADDSEG_SHIFT;
1054 env->hflags = (env->hflags & HFLAG_COPY_MASK) | hflags;
1056 /* msrs */
1057 n = 0;
1058 /* Remember to increase msrs size if you add new registers below */
1059 msrs[n++].index = MSR_IA32_SYSENTER_CS;
1060 msrs[n++].index = MSR_IA32_SYSENTER_ESP;
1061 msrs[n++].index = MSR_IA32_SYSENTER_EIP;
1062 if (kvm_has_msr_star)
1063 msrs[n++].index = MSR_STAR;
1064 msrs[n++].index = MSR_IA32_TSC;
1065 if (kvm_has_vm_hsave_pa)
1066 msrs[n++].index = MSR_VM_HSAVE_PA;
1067 #ifdef TARGET_X86_64
1068 if (lm_capable_kernel) {
1069 msrs[n++].index = MSR_CSTAR;
1070 msrs[n++].index = MSR_KERNELGSBASE;
1071 msrs[n++].index = MSR_FMASK;
1072 msrs[n++].index = MSR_LSTAR;
1074 #endif
1075 msrs[n++].index = MSR_KVM_SYSTEM_TIME;
1076 msrs[n++].index = MSR_KVM_WALL_CLOCK;
1078 #ifdef KVM_CAP_MCE
1079 if (env->mcg_cap) {
1080 msrs[n++].index = MSR_MCG_STATUS;
1081 msrs[n++].index = MSR_MCG_CTL;
1082 for (i = 0; i < (env->mcg_cap & 0xff) * 4; i++)
1083 msrs[n++].index = MSR_MC0_CTL + i;
1085 #endif
1087 rc = kvm_get_msrs(env, msrs, n);
1088 if (rc == -1) {
1089 perror("kvm_get_msrs FAILED");
1091 else {
1092 n = rc; /* actual number of MSRs */
1093 for (i=0 ; i<n; i++) {
1094 if (get_msr_entry(&msrs[i], env))
1095 return;
1098 kvm_arch_save_mpstate(env);
1099 kvm_save_lapic(env);
1100 kvm_get_vcpu_events(env);
1101 kvm_get_debugregs(env);
1104 static void do_cpuid_ent(struct kvm_cpuid_entry2 *e, uint32_t function,
1105 uint32_t count, CPUState *env)
1107 env->regs[R_EAX] = function;
1108 env->regs[R_ECX] = count;
1109 qemu_kvm_cpuid_on_env(env);
1110 e->function = function;
1111 e->flags = 0;
1112 e->index = 0;
1113 e->eax = env->regs[R_EAX];
1114 e->ebx = env->regs[R_EBX];
1115 e->ecx = env->regs[R_ECX];
1116 e->edx = env->regs[R_EDX];
1119 struct kvm_para_features {
1120 int cap;
1121 int feature;
1122 } para_features[] = {
1123 #ifdef KVM_CAP_CLOCKSOURCE
1124 { KVM_CAP_CLOCKSOURCE, KVM_FEATURE_CLOCKSOURCE },
1125 #endif
1126 #ifdef KVM_CAP_NOP_IO_DELAY
1127 { KVM_CAP_NOP_IO_DELAY, KVM_FEATURE_NOP_IO_DELAY },
1128 #endif
1129 #ifdef KVM_CAP_PV_MMU
1130 { KVM_CAP_PV_MMU, KVM_FEATURE_MMU_OP },
1131 #endif
1132 { -1, -1 }
1135 static int get_para_features(kvm_context_t kvm_context)
1137 int i, features = 0;
1139 for (i = 0; i < ARRAY_SIZE(para_features)-1; i++) {
1140 if (kvm_check_extension(kvm_state, para_features[i].cap))
1141 features |= (1 << para_features[i].feature);
1144 return features;
1147 static void kvm_trim_features(uint32_t *features, uint32_t supported)
1149 int i;
1150 uint32_t mask;
1152 for (i = 0; i < 32; ++i) {
1153 mask = 1U << i;
1154 if ((*features & mask) && !(supported & mask)) {
1155 *features &= ~mask;
1160 int kvm_arch_init_vcpu(CPUState *cenv)
1162 struct kvm_cpuid_entry2 cpuid_ent[100];
1163 #ifdef KVM_CPUID_SIGNATURE
1164 struct kvm_cpuid_entry2 *pv_ent;
1165 uint32_t signature[3];
1166 #endif
1167 int cpuid_nent = 0;
1168 CPUState copy;
1169 uint32_t i, j, limit;
1171 kvm_arch_reset_vcpu(cenv);
1173 #ifdef KVM_CPUID_SIGNATURE
1174 /* Paravirtualization CPUIDs */
1175 memcpy(signature, "KVMKVMKVM\0\0\0", 12);
1176 pv_ent = &cpuid_ent[cpuid_nent++];
1177 memset(pv_ent, 0, sizeof(*pv_ent));
1178 pv_ent->function = KVM_CPUID_SIGNATURE;
1179 pv_ent->eax = 0;
1180 pv_ent->ebx = signature[0];
1181 pv_ent->ecx = signature[1];
1182 pv_ent->edx = signature[2];
1184 pv_ent = &cpuid_ent[cpuid_nent++];
1185 memset(pv_ent, 0, sizeof(*pv_ent));
1186 pv_ent->function = KVM_CPUID_FEATURES;
1187 pv_ent->eax = get_para_features(kvm_context);
1188 #endif
1190 kvm_trim_features(&cenv->cpuid_features,
1191 kvm_arch_get_supported_cpuid(cenv, 1, R_EDX));
1193 /* prevent the hypervisor bit from being cleared by the kernel */
1194 i = cenv->cpuid_ext_features & CPUID_EXT_HYPERVISOR;
1195 kvm_trim_features(&cenv->cpuid_ext_features,
1196 kvm_arch_get_supported_cpuid(cenv, 1, R_ECX));
1197 cenv->cpuid_ext_features |= i;
1199 kvm_trim_features(&cenv->cpuid_ext2_features,
1200 kvm_arch_get_supported_cpuid(cenv, 0x80000001, R_EDX));
1201 kvm_trim_features(&cenv->cpuid_ext3_features,
1202 kvm_arch_get_supported_cpuid(cenv, 0x80000001, R_ECX));
1204 copy = *cenv;
1206 copy.regs[R_EAX] = 0;
1207 qemu_kvm_cpuid_on_env(&copy);
1208 limit = copy.regs[R_EAX];
1210 for (i = 0; i <= limit; ++i) {
1211 if (i == 4 || i == 0xb || i == 0xd) {
1212 for (j = 0; ; ++j) {
1213 do_cpuid_ent(&cpuid_ent[cpuid_nent], i, j, &copy);
1215 cpuid_ent[cpuid_nent].flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
1216 cpuid_ent[cpuid_nent].index = j;
1218 cpuid_nent++;
1220 if (i == 4 && copy.regs[R_EAX] == 0)
1221 break;
1222 if (i == 0xb && !(copy.regs[R_ECX] & 0xff00))
1223 break;
1224 if (i == 0xd && copy.regs[R_EAX] == 0)
1225 break;
1227 } else
1228 do_cpuid_ent(&cpuid_ent[cpuid_nent++], i, 0, &copy);
1231 copy.regs[R_EAX] = 0x80000000;
1232 qemu_kvm_cpuid_on_env(&copy);
1233 limit = copy.regs[R_EAX];
1235 for (i = 0x80000000; i <= limit; ++i)
1236 do_cpuid_ent(&cpuid_ent[cpuid_nent++], i, 0, &copy);
1238 kvm_setup_cpuid2(cenv, cpuid_nent, cpuid_ent);
1240 #ifdef KVM_CAP_MCE
1241 if (((cenv->cpuid_version >> 8)&0xF) >= 6
1242 && (cenv->cpuid_features&(CPUID_MCE|CPUID_MCA)) == (CPUID_MCE|CPUID_MCA)
1243 && kvm_check_extension(kvm_state, KVM_CAP_MCE) > 0) {
1244 uint64_t mcg_cap;
1245 int banks;
1247 if (kvm_get_mce_cap_supported(kvm_context, &mcg_cap, &banks))
1248 perror("kvm_get_mce_cap_supported FAILED");
1249 else {
1250 if (banks > MCE_BANKS_DEF)
1251 banks = MCE_BANKS_DEF;
1252 mcg_cap &= MCE_CAP_DEF;
1253 mcg_cap |= banks;
1254 if (kvm_setup_mce(cenv, &mcg_cap))
1255 perror("kvm_setup_mce FAILED");
1256 else
1257 cenv->mcg_cap = mcg_cap;
1260 #endif
1262 #ifdef KVM_EXIT_TPR_ACCESS
1263 kvm_enable_tpr_access_reporting(cenv);
1264 #endif
1265 kvm_reset_mpstate(cenv);
1266 return 0;
1269 int kvm_arch_halt(CPUState *env)
1272 if (!((env->interrupt_request & CPU_INTERRUPT_HARD) &&
1273 (env->eflags & IF_MASK)) &&
1274 !(env->interrupt_request & CPU_INTERRUPT_NMI)) {
1275 env->halted = 1;
1277 return 1;
1280 int kvm_arch_pre_run(CPUState *env, struct kvm_run *run)
1282 if (!kvm_irqchip_in_kernel())
1283 kvm_set_cr8(env, cpu_get_apic_tpr(env));
1284 return 0;
1287 int kvm_arch_has_work(CPUState *env)
1289 if (((env->interrupt_request & CPU_INTERRUPT_HARD) &&
1290 (env->eflags & IF_MASK)) ||
1291 (env->interrupt_request & CPU_INTERRUPT_NMI))
1292 return 1;
1293 return 0;
1296 int kvm_arch_try_push_interrupts(void *opaque)
1298 CPUState *env = cpu_single_env;
1299 int r, irq;
1301 if (kvm_is_ready_for_interrupt_injection(env) &&
1302 (env->interrupt_request & CPU_INTERRUPT_HARD) &&
1303 (env->eflags & IF_MASK)) {
1304 env->interrupt_request &= ~CPU_INTERRUPT_HARD;
1305 irq = cpu_get_pic_interrupt(env);
1306 if (irq >= 0) {
1307 r = kvm_inject_irq(env, irq);
1308 if (r < 0)
1309 printf("cpu %d fail inject %x\n", env->cpu_index, irq);
1313 return (env->interrupt_request & CPU_INTERRUPT_HARD) != 0;
1316 #ifdef KVM_CAP_USER_NMI
1317 void kvm_arch_push_nmi(void *opaque)
1319 CPUState *env = cpu_single_env;
1320 int r;
1322 if (likely(!(env->interrupt_request & CPU_INTERRUPT_NMI)))
1323 return;
1325 env->interrupt_request &= ~CPU_INTERRUPT_NMI;
1326 r = kvm_inject_nmi(env);
1327 if (r < 0)
1328 printf("cpu %d fail inject NMI\n", env->cpu_index);
1330 #endif /* KVM_CAP_USER_NMI */
1332 static int kvm_reset_msrs(CPUState *env)
1334 struct {
1335 struct kvm_msrs info;
1336 struct kvm_msr_entry entries[100];
1337 } msr_data;
1338 int n;
1339 struct kvm_msr_entry *msrs = msr_data.entries;
1341 if (!kvm_msr_list)
1342 return -1;
1344 for (n = 0; n < kvm_msr_list->nmsrs; n++) {
1345 kvm_msr_entry_set(&msrs[n], kvm_msr_list->indices[n], 0);
1348 msr_data.info.nmsrs = n;
1350 return kvm_vcpu_ioctl(env, KVM_SET_MSRS, &msr_data);
1354 void kvm_arch_cpu_reset(CPUState *env)
1356 kvm_reset_msrs(env);
1357 kvm_arch_reset_vcpu(env);
1358 kvm_reset_mpstate(env);
1361 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
1362 void kvm_arch_do_ioperm(void *_data)
1364 struct ioperm_data *data = _data;
1365 ioperm(data->start_port, data->num, data->turn_on);
1367 #endif
1370 * Setup x86 specific IRQ routing
1372 int kvm_arch_init_irq_routing(void)
1374 int i, r;
1376 if (kvm_irqchip && kvm_has_gsi_routing(kvm_context)) {
1377 kvm_clear_gsi_routes(kvm_context);
1378 for (i = 0; i < 8; ++i) {
1379 if (i == 2)
1380 continue;
1381 r = kvm_add_irq_route(kvm_context, i, KVM_IRQCHIP_PIC_MASTER, i);
1382 if (r < 0)
1383 return r;
1385 for (i = 8; i < 16; ++i) {
1386 r = kvm_add_irq_route(kvm_context, i, KVM_IRQCHIP_PIC_SLAVE, i - 8);
1387 if (r < 0)
1388 return r;
1390 for (i = 0; i < 24; ++i) {
1391 if (i == 0) {
1392 r = kvm_add_irq_route(kvm_context, i, KVM_IRQCHIP_IOAPIC, 2);
1393 } else if (i != 2) {
1394 r = kvm_add_irq_route(kvm_context, i, KVM_IRQCHIP_IOAPIC, i);
1396 if (r < 0)
1397 return r;
1399 kvm_commit_irq_routes(kvm_context);
1401 return 0;
1404 void kvm_arch_process_irqchip_events(CPUState *env)
1406 if (env->interrupt_request & CPU_INTERRUPT_INIT) {
1407 kvm_cpu_synchronize_state(env);
1408 do_cpu_init(env);
1410 if (env->interrupt_request & CPU_INTERRUPT_SIPI) {
1411 kvm_cpu_synchronize_state(env);
1412 do_cpu_sipi(env);