2 * qemu/kvm integration, x86 specific code
4 * Copyright (C) 2006-2008 Qumranet Technologies
6 * Licensed under the terms of the GNU GPL version 2 or higher.
10 #include "config-host.h"
20 #include <sys/utsname.h>
21 #include <linux/kvm_para.h>
22 #include <sys/ioctl.h>
27 #define MSR_IA32_TSC 0x10
29 static struct kvm_msr_list
*kvm_msr_list
;
30 extern unsigned int kvm_shadow_memory
;
31 static int kvm_has_msr_star
;
32 static int kvm_has_vm_hsave_pa
;
34 static int lm_capable_kernel
;
36 int kvm_set_tss_addr(kvm_context_t kvm
, unsigned long addr
)
40 * Tell fw_cfg to notify the BIOS to reserve the range.
42 if (e820_add_entry(addr
, 0x4000, E820_RESERVED
) < 0) {
43 perror("e820_add_entry() table is full");
47 r
= kvm_vm_ioctl(kvm_state
, KVM_SET_TSS_ADDR
, addr
);
49 fprintf(stderr
, "kvm_set_tss_addr: %m\n");
55 static int kvm_init_tss(kvm_context_t kvm
)
59 r
= kvm_ioctl(kvm_state
, KVM_CHECK_EXTENSION
, KVM_CAP_SET_TSS_ADDR
);
62 * this address is 3 pages before the bios, and the bios should present
65 r
= kvm_set_tss_addr(kvm
, 0xfeffd000);
67 fprintf(stderr
, "kvm_init_tss: unable to set tss addr\n");
71 fprintf(stderr
, "kvm does not support KVM_CAP_SET_TSS_ADDR\n");
76 static int kvm_set_identity_map_addr(kvm_context_t kvm
, uint64_t addr
)
78 #ifdef KVM_CAP_SET_IDENTITY_MAP_ADDR
81 r
= kvm_ioctl(kvm_state
, KVM_CHECK_EXTENSION
, KVM_CAP_SET_IDENTITY_MAP_ADDR
);
83 r
= kvm_vm_ioctl(kvm_state
, KVM_SET_IDENTITY_MAP_ADDR
, &addr
);
85 fprintf(stderr
, "kvm_set_identity_map_addr: %m\n");
94 static int kvm_init_identity_map_page(kvm_context_t kvm
)
96 #ifdef KVM_CAP_SET_IDENTITY_MAP_ADDR
99 r
= kvm_ioctl(kvm_state
, KVM_CHECK_EXTENSION
, KVM_CAP_SET_IDENTITY_MAP_ADDR
);
102 * this address is 4 pages before the bios, and the bios should present
103 * as unavaible memory
105 r
= kvm_set_identity_map_addr(kvm
, 0xfeffc000);
107 fprintf(stderr
, "kvm_init_identity_map_page: "
108 "unable to set identity mapping addr\n");
116 static int kvm_create_pit(kvm_context_t kvm
)
121 kvm_state
->pit_in_kernel
= 0;
122 if (!kvm
->no_pit_creation
) {
123 r
= kvm_ioctl(kvm_state
, KVM_CHECK_EXTENSION
, KVM_CAP_PIT
);
125 r
= kvm_vm_ioctl(kvm_state
, KVM_CREATE_PIT
);
127 kvm_state
->pit_in_kernel
= 1;
129 fprintf(stderr
, "Create kernel PIC irqchip failed\n");
138 int kvm_arch_create(kvm_context_t kvm
, unsigned long phys_mem_bytes
,
143 r
= kvm_init_tss(kvm
);
148 r
= kvm_init_identity_map_page(kvm
);
153 r
= kvm_create_pit(kvm
);
158 r
= kvm_init_coalesced_mmio(kvm
);
166 #ifdef KVM_EXIT_TPR_ACCESS
168 static int kvm_handle_tpr_access(CPUState
*env
)
170 struct kvm_run
*run
= env
->kvm_run
;
171 kvm_tpr_access_report(env
,
173 run
->tpr_access
.is_write
);
178 int kvm_enable_vapic(CPUState
*env
, uint64_t vapic
)
180 struct kvm_vapic_addr va
= {
184 return kvm_vcpu_ioctl(env
, KVM_SET_VAPIC_ADDR
, &va
);
189 int kvm_arch_run(CPUState
*env
)
192 struct kvm_run
*run
= env
->kvm_run
;
194 switch (run
->exit_reason
) {
195 #ifdef KVM_EXIT_SET_TPR
196 case KVM_EXIT_SET_TPR
:
199 #ifdef KVM_EXIT_TPR_ACCESS
200 case KVM_EXIT_TPR_ACCESS
:
201 r
= kvm_handle_tpr_access(env
);
212 #ifdef KVM_CAP_IRQCHIP
214 int kvm_get_lapic(CPUState
*env
, struct kvm_lapic_state
*s
)
218 if (!kvm_irqchip_in_kernel()) {
222 r
= kvm_vcpu_ioctl(env
, KVM_GET_LAPIC
, s
);
224 fprintf(stderr
, "KVM_GET_LAPIC failed\n");
229 int kvm_set_lapic(CPUState
*env
, struct kvm_lapic_state
*s
)
233 if (!kvm_irqchip_in_kernel()) {
237 r
= kvm_vcpu_ioctl(env
, KVM_SET_LAPIC
, s
);
240 fprintf(stderr
, "KVM_SET_LAPIC failed\n");
249 int kvm_get_pit(kvm_context_t kvm
, struct kvm_pit_state
*s
)
251 if (!kvm_pit_in_kernel()) {
254 return kvm_vm_ioctl(kvm_state
, KVM_GET_PIT
, s
);
257 int kvm_set_pit(kvm_context_t kvm
, struct kvm_pit_state
*s
)
259 if (!kvm_pit_in_kernel()) {
262 return kvm_vm_ioctl(kvm_state
, KVM_SET_PIT
, s
);
265 #ifdef KVM_CAP_PIT_STATE2
266 int kvm_get_pit2(kvm_context_t kvm
, struct kvm_pit_state2
*ps2
)
268 if (!kvm_pit_in_kernel()) {
271 return kvm_vm_ioctl(kvm_state
, KVM_GET_PIT2
, ps2
);
274 int kvm_set_pit2(kvm_context_t kvm
, struct kvm_pit_state2
*ps2
)
276 if (!kvm_pit_in_kernel()) {
279 return kvm_vm_ioctl(kvm_state
, KVM_SET_PIT2
, ps2
);
285 int kvm_has_pit_state2(kvm_context_t kvm
)
289 #ifdef KVM_CAP_PIT_STATE2
290 r
= kvm_check_extension(kvm_state
, KVM_CAP_PIT_STATE2
);
295 void kvm_show_code(CPUState
*env
)
297 #define SHOW_CODE_LEN 50
298 struct kvm_regs regs
;
299 struct kvm_sregs sregs
;
303 char code_str
[SHOW_CODE_LEN
* 3 + 1];
306 r
= kvm_vcpu_ioctl(env
, KVM_GET_SREGS
, &sregs
);
308 perror("KVM_GET_SREGS");
311 r
= kvm_vcpu_ioctl(env
, KVM_GET_REGS
, ®s
);
313 perror("KVM_GET_REGS");
316 rip
= sregs
.cs
.base
+ regs
.rip
;
317 back_offset
= regs
.rip
;
318 if (back_offset
> 20) {
322 for (n
= -back_offset
; n
< SHOW_CODE_LEN
-back_offset
; ++n
) {
324 strcat(code_str
, " -->");
326 cpu_physical_memory_rw(rip
+ n
, &code
, 1, 1);
327 sprintf(code_str
+ strlen(code_str
), " %02x", code
);
329 fprintf(stderr
, "code:%s\n", code_str
);
334 * Returns available msr list. User must free.
336 static struct kvm_msr_list
*kvm_get_msr_list(void)
338 struct kvm_msr_list sizer
, *msrs
;
342 r
= kvm_ioctl(kvm_state
, KVM_GET_MSR_INDEX_LIST
, &sizer
);
343 if (r
< 0 && r
!= -E2BIG
) {
346 /* Old kernel modules had a bug and could write beyond the provided
347 memory. Allocate at least a safe amount of 1K. */
348 msrs
= qemu_malloc(MAX(1024, sizeof(*msrs
) +
349 sizer
.nmsrs
* sizeof(*msrs
->indices
)));
351 msrs
->nmsrs
= sizer
.nmsrs
;
352 r
= kvm_ioctl(kvm_state
, KVM_GET_MSR_INDEX_LIST
, msrs
);
361 int kvm_get_msrs(CPUState
*env
, struct kvm_msr_entry
*msrs
, int n
)
363 struct kvm_msrs
*kmsrs
= qemu_malloc(sizeof *kmsrs
+ n
* sizeof *msrs
);
367 memcpy(kmsrs
->entries
, msrs
, n
* sizeof *msrs
);
368 r
= kvm_vcpu_ioctl(env
, KVM_GET_MSRS
, kmsrs
);
369 memcpy(msrs
, kmsrs
->entries
, n
* sizeof *msrs
);
374 int kvm_set_msrs(CPUState
*env
, struct kvm_msr_entry
*msrs
, int n
)
376 struct kvm_msrs
*kmsrs
= qemu_malloc(sizeof *kmsrs
+ n
* sizeof *msrs
);
380 memcpy(kmsrs
->entries
, msrs
, n
* sizeof *msrs
);
381 r
= kvm_vcpu_ioctl(env
, KVM_SET_MSRS
, kmsrs
);
386 int kvm_get_mce_cap_supported(kvm_context_t kvm
, uint64_t *mce_cap
,
392 r
= kvm_ioctl(kvm_state
, KVM_CHECK_EXTENSION
, KVM_CAP_MCE
);
395 return kvm_ioctl(kvm_state
, KVM_X86_GET_MCE_CAP_SUPPORTED
, mce_cap
);
401 int kvm_setup_mce(CPUState
*env
, uint64_t *mcg_cap
)
404 return kvm_vcpu_ioctl(env
, KVM_X86_SETUP_MCE
, mcg_cap
);
410 int kvm_set_mce(CPUState
*env
, struct kvm_x86_mce
*m
)
413 return kvm_vcpu_ioctl(env
, KVM_X86_SET_MCE
, m
);
419 static void print_seg(FILE *file
, const char *name
, struct kvm_segment
*seg
)
422 "%s %04x (%08llx/%08x p %d dpl %d db %d s %d type %x l %d"
424 name
, seg
->selector
, seg
->base
, seg
->limit
, seg
->present
,
425 seg
->dpl
, seg
->db
, seg
->s
, seg
->type
, seg
->l
, seg
->g
,
429 static void print_dt(FILE *file
, const char *name
, struct kvm_dtable
*dt
)
431 fprintf(stderr
, "%s %llx/%x\n", name
, dt
->base
, dt
->limit
);
434 void kvm_show_regs(CPUState
*env
)
436 struct kvm_regs regs
;
437 struct kvm_sregs sregs
;
440 r
= kvm_vcpu_ioctl(env
, KVM_GET_REGS
, ®s
);
442 perror("KVM_GET_REGS");
446 "rax %016llx rbx %016llx rcx %016llx rdx %016llx\n"
447 "rsi %016llx rdi %016llx rsp %016llx rbp %016llx\n"
448 "r8 %016llx r9 %016llx r10 %016llx r11 %016llx\n"
449 "r12 %016llx r13 %016llx r14 %016llx r15 %016llx\n"
450 "rip %016llx rflags %08llx\n",
451 regs
.rax
, regs
.rbx
, regs
.rcx
, regs
.rdx
,
452 regs
.rsi
, regs
.rdi
, regs
.rsp
, regs
.rbp
,
453 regs
.r8
, regs
.r9
, regs
.r10
, regs
.r11
,
454 regs
.r12
, regs
.r13
, regs
.r14
, regs
.r15
,
455 regs
.rip
, regs
.rflags
);
456 r
= kvm_vcpu_ioctl(env
, KVM_GET_SREGS
, &sregs
);
458 perror("KVM_GET_SREGS");
461 print_seg(stderr
, "cs", &sregs
.cs
);
462 print_seg(stderr
, "ds", &sregs
.ds
);
463 print_seg(stderr
, "es", &sregs
.es
);
464 print_seg(stderr
, "ss", &sregs
.ss
);
465 print_seg(stderr
, "fs", &sregs
.fs
);
466 print_seg(stderr
, "gs", &sregs
.gs
);
467 print_seg(stderr
, "tr", &sregs
.tr
);
468 print_seg(stderr
, "ldt", &sregs
.ldt
);
469 print_dt(stderr
, "gdt", &sregs
.gdt
);
470 print_dt(stderr
, "idt", &sregs
.idt
);
471 fprintf(stderr
, "cr0 %llx cr2 %llx cr3 %llx cr4 %llx cr8 %llx"
473 sregs
.cr0
, sregs
.cr2
, sregs
.cr3
, sregs
.cr4
, sregs
.cr8
,
477 static void kvm_set_cr8(CPUState
*env
, uint64_t cr8
)
479 env
->kvm_run
->cr8
= cr8
;
482 int kvm_setup_cpuid(CPUState
*env
, int nent
,
483 struct kvm_cpuid_entry
*entries
)
485 struct kvm_cpuid
*cpuid
;
488 cpuid
= qemu_malloc(sizeof(*cpuid
) + nent
* sizeof(*entries
));
491 memcpy(cpuid
->entries
, entries
, nent
* sizeof(*entries
));
492 r
= kvm_vcpu_ioctl(env
, KVM_SET_CPUID
, cpuid
);
498 int kvm_setup_cpuid2(CPUState
*env
, int nent
,
499 struct kvm_cpuid_entry2
*entries
)
501 struct kvm_cpuid2
*cpuid
;
504 cpuid
= qemu_malloc(sizeof(*cpuid
) + nent
* sizeof(*entries
));
507 memcpy(cpuid
->entries
, entries
, nent
* sizeof(*entries
));
508 r
= kvm_vcpu_ioctl(env
, KVM_SET_CPUID2
, cpuid
);
513 int kvm_set_shadow_pages(kvm_context_t kvm
, unsigned int nrshadow_pages
)
515 #ifdef KVM_CAP_MMU_SHADOW_CACHE_CONTROL
518 r
= kvm_ioctl(kvm_state
, KVM_CHECK_EXTENSION
,
519 KVM_CAP_MMU_SHADOW_CACHE_CONTROL
);
521 r
= kvm_vm_ioctl(kvm_state
, KVM_SET_NR_MMU_PAGES
, nrshadow_pages
);
523 fprintf(stderr
, "kvm_set_shadow_pages: %m\n");
532 int kvm_get_shadow_pages(kvm_context_t kvm
, unsigned int *nrshadow_pages
)
534 #ifdef KVM_CAP_MMU_SHADOW_CACHE_CONTROL
537 r
= kvm_ioctl(kvm_state
, KVM_CHECK_EXTENSION
,
538 KVM_CAP_MMU_SHADOW_CACHE_CONTROL
);
540 *nrshadow_pages
= kvm_vm_ioctl(kvm_state
, KVM_GET_NR_MMU_PAGES
);
548 static int kvm_enable_tpr_access_reporting(CPUState
*env
)
551 struct kvm_tpr_access_ctl tac
= { .enabled
= 1 };
553 r
= kvm_ioctl(env
->kvm_state
, KVM_CHECK_EXTENSION
, KVM_CAP_VAPIC
);
557 return kvm_vcpu_ioctl(env
, KVM_TPR_ACCESS_REPORTING
, &tac
);
561 #ifdef KVM_CAP_ADJUST_CLOCK
562 static struct kvm_clock_data kvmclock_data
;
564 static void kvmclock_pre_save(void *opaque
)
566 struct kvm_clock_data
*cl
= opaque
;
568 kvm_vm_ioctl(kvm_state
, KVM_GET_CLOCK
, cl
);
571 static int kvmclock_post_load(void *opaque
, int version_id
)
573 struct kvm_clock_data
*cl
= opaque
;
575 return kvm_vm_ioctl(kvm_state
, KVM_SET_CLOCK
, cl
);
578 static const VMStateDescription vmstate_kvmclock
= {
581 .minimum_version_id
= 1,
582 .minimum_version_id_old
= 1,
583 .pre_save
= kvmclock_pre_save
,
584 .post_load
= kvmclock_post_load
,
585 .fields
= (VMStateField
[]) {
586 VMSTATE_U64(clock
, struct kvm_clock_data
),
587 VMSTATE_END_OF_LIST()
592 int kvm_arch_qemu_create_context(void)
595 struct utsname utsname
;
598 lm_capable_kernel
= strcmp(utsname
.machine
, "x86_64") == 0;
600 if (kvm_shadow_memory
) {
601 kvm_set_shadow_pages(kvm_context
, kvm_shadow_memory
);
604 kvm_msr_list
= kvm_get_msr_list();
608 for (i
= 0; i
< kvm_msr_list
->nmsrs
; ++i
) {
609 if (kvm_msr_list
->indices
[i
] == MSR_STAR
) {
610 kvm_has_msr_star
= 1;
612 if (kvm_msr_list
->indices
[i
] == MSR_VM_HSAVE_PA
) {
613 kvm_has_vm_hsave_pa
= 1;
617 #ifdef KVM_CAP_ADJUST_CLOCK
618 if (kvm_check_extension(kvm_state
, KVM_CAP_ADJUST_CLOCK
)) {
619 vmstate_register(NULL
, 0, &vmstate_kvmclock
, &kvmclock_data
);
623 r
= kvm_set_boot_cpu_id(0);
624 if (r
< 0 && r
!= -ENOSYS
) {
631 /* returns 0 on success, non-0 on failure */
632 static int get_msr_entry(struct kvm_msr_entry
*entry
, CPUState
*env
)
634 switch (entry
->index
) {
635 case MSR_IA32_SYSENTER_CS
:
636 env
->sysenter_cs
= entry
->data
;
638 case MSR_IA32_SYSENTER_ESP
:
639 env
->sysenter_esp
= entry
->data
;
641 case MSR_IA32_SYSENTER_EIP
:
642 env
->sysenter_eip
= entry
->data
;
645 env
->star
= entry
->data
;
649 env
->cstar
= entry
->data
;
651 case MSR_KERNELGSBASE
:
652 env
->kernelgsbase
= entry
->data
;
655 env
->fmask
= entry
->data
;
658 env
->lstar
= entry
->data
;
662 env
->tsc
= entry
->data
;
664 case MSR_VM_HSAVE_PA
:
665 env
->vm_hsave
= entry
->data
;
667 case MSR_KVM_SYSTEM_TIME
:
668 env
->system_time_msr
= entry
->data
;
670 case MSR_KVM_WALL_CLOCK
:
671 env
->wall_clock_msr
= entry
->data
;
675 env
->mcg_status
= entry
->data
;
678 env
->mcg_ctl
= entry
->data
;
683 if (entry
->index
>= MSR_MC0_CTL
&&
684 entry
->index
< MSR_MC0_CTL
+ (env
->mcg_cap
& 0xff) * 4) {
685 env
->mce_banks
[entry
->index
- MSR_MC0_CTL
] = entry
->data
;
689 printf("Warning unknown msr index 0x%x\n", entry
->index
);
695 static void kvm_arch_save_mpstate(CPUState
*env
)
697 #ifdef KVM_CAP_MP_STATE
699 struct kvm_mp_state mp_state
;
701 r
= kvm_get_mpstate(env
, &mp_state
);
705 env
->mp_state
= mp_state
.mp_state
;
706 if (kvm_irqchip_in_kernel()) {
707 env
->halted
= (env
->mp_state
== KVM_MP_STATE_HALTED
);
715 static void kvm_arch_load_mpstate(CPUState
*env
)
717 #ifdef KVM_CAP_MP_STATE
718 struct kvm_mp_state mp_state
;
721 * -1 indicates that the host did not support GET_MP_STATE ioctl,
724 if (env
->mp_state
!= -1) {
725 mp_state
.mp_state
= env
->mp_state
;
726 kvm_set_mpstate(env
, &mp_state
);
731 static void kvm_reset_mpstate(CPUState
*env
)
733 #ifdef KVM_CAP_MP_STATE
734 if (kvm_check_extension(kvm_state
, KVM_CAP_MP_STATE
)) {
735 if (kvm_irqchip_in_kernel()) {
736 env
->mp_state
= cpu_is_bsp(env
) ? KVM_MP_STATE_RUNNABLE
:
737 KVM_MP_STATE_UNINITIALIZED
;
739 env
->mp_state
= KVM_MP_STATE_RUNNABLE
;
745 static void set_v8086_seg(struct kvm_segment
*lhs
, const SegmentCache
*rhs
)
747 lhs
->selector
= rhs
->selector
;
748 lhs
->base
= rhs
->base
;
749 lhs
->limit
= rhs
->limit
;
761 static void set_seg(struct kvm_segment
*lhs
, const SegmentCache
*rhs
)
763 unsigned flags
= rhs
->flags
;
764 lhs
->selector
= rhs
->selector
;
765 lhs
->base
= rhs
->base
;
766 lhs
->limit
= rhs
->limit
;
767 lhs
->type
= (flags
>> DESC_TYPE_SHIFT
) & 15;
768 lhs
->present
= (flags
& DESC_P_MASK
) != 0;
769 lhs
->dpl
= rhs
->selector
& 3;
770 lhs
->db
= (flags
>> DESC_B_SHIFT
) & 1;
771 lhs
->s
= (flags
& DESC_S_MASK
) != 0;
772 lhs
->l
= (flags
>> DESC_L_SHIFT
) & 1;
773 lhs
->g
= (flags
& DESC_G_MASK
) != 0;
774 lhs
->avl
= (flags
& DESC_AVL_MASK
) != 0;
778 static void get_seg(SegmentCache
*lhs
, const struct kvm_segment
*rhs
)
780 lhs
->selector
= rhs
->selector
;
781 lhs
->base
= rhs
->base
;
782 lhs
->limit
= rhs
->limit
;
784 (rhs
->type
<< DESC_TYPE_SHIFT
)
785 | (rhs
->present
* DESC_P_MASK
)
786 | (rhs
->dpl
<< DESC_DPL_SHIFT
)
787 | (rhs
->db
<< DESC_B_SHIFT
)
788 | (rhs
->s
* DESC_S_MASK
)
789 | (rhs
->l
<< DESC_L_SHIFT
)
790 | (rhs
->g
* DESC_G_MASK
)
791 | (rhs
->avl
* DESC_AVL_MASK
);
794 #define XSAVE_CWD_RIP 2
795 #define XSAVE_CWD_RDP 4
796 #define XSAVE_MXCSR 6
797 #define XSAVE_ST_SPACE 8
798 #define XSAVE_XMM_SPACE 40
799 #define XSAVE_XSTATE_BV 128
800 #define XSAVE_YMMH_SPACE 144
802 void kvm_arch_load_regs(CPUState
*env
, int level
)
804 struct kvm_regs regs
;
806 struct kvm_sregs sregs
;
807 struct kvm_msr_entry msrs
[100];
810 assert(kvm_cpu_is_stopped(env
) || env
->thread_id
== kvm_get_thread_id());
812 regs
.rax
= env
->regs
[R_EAX
];
813 regs
.rbx
= env
->regs
[R_EBX
];
814 regs
.rcx
= env
->regs
[R_ECX
];
815 regs
.rdx
= env
->regs
[R_EDX
];
816 regs
.rsi
= env
->regs
[R_ESI
];
817 regs
.rdi
= env
->regs
[R_EDI
];
818 regs
.rsp
= env
->regs
[R_ESP
];
819 regs
.rbp
= env
->regs
[R_EBP
];
821 regs
.r8
= env
->regs
[8];
822 regs
.r9
= env
->regs
[9];
823 regs
.r10
= env
->regs
[10];
824 regs
.r11
= env
->regs
[11];
825 regs
.r12
= env
->regs
[12];
826 regs
.r13
= env
->regs
[13];
827 regs
.r14
= env
->regs
[14];
828 regs
.r15
= env
->regs
[15];
831 regs
.rflags
= env
->eflags
;
834 kvm_set_regs(env
, ®s
);
837 if (kvm_check_extension(kvm_state
, KVM_CAP_XSAVE
)) {
838 struct kvm_xsave
* xsave
;
840 uint16_t cwd
, swd
, twd
, fop
;
842 xsave
= qemu_memalign(4096, sizeof(struct kvm_xsave
));
843 memset(xsave
, 0, sizeof(struct kvm_xsave
));
844 cwd
= swd
= twd
= fop
= 0;
845 swd
= env
->fpus
& ~(7 << 11);
846 swd
|= (env
->fpstt
& 7) << 11;
848 for (i
= 0; i
< 8; ++i
) {
849 twd
|= (!env
->fptags
[i
]) << i
;
851 xsave
->region
[0] = (uint32_t)(swd
<< 16) + cwd
;
852 xsave
->region
[1] = (uint32_t)(fop
<< 16) + twd
;
853 memcpy(&xsave
->region
[XSAVE_ST_SPACE
], env
->fpregs
,
855 memcpy(&xsave
->region
[XSAVE_XMM_SPACE
], env
->xmm_regs
,
856 sizeof env
->xmm_regs
);
857 xsave
->region
[XSAVE_MXCSR
] = env
->mxcsr
;
858 *(uint64_t *)&xsave
->region
[XSAVE_XSTATE_BV
] = env
->xstate_bv
;
859 memcpy(&xsave
->region
[XSAVE_YMMH_SPACE
], env
->ymmh_regs
,
860 sizeof env
->ymmh_regs
);
861 kvm_set_xsave(env
, xsave
);
862 if (kvm_check_extension(kvm_state
, KVM_CAP_XCRS
)) {
863 struct kvm_xcrs xcrs
;
867 xcrs
.xcrs
[0].xcr
= 0;
868 xcrs
.xcrs
[0].value
= env
->xcr0
;
869 kvm_set_xcrs(env
, &xcrs
);
874 memset(&fpu
, 0, sizeof fpu
);
875 fpu
.fsw
= env
->fpus
& ~(7 << 11);
876 fpu
.fsw
|= (env
->fpstt
& 7) << 11;
878 for (i
= 0; i
< 8; ++i
) {
879 fpu
.ftwx
|= (!env
->fptags
[i
]) << i
;
881 memcpy(fpu
.fpr
, env
->fpregs
, sizeof env
->fpregs
);
882 memcpy(fpu
.xmm
, env
->xmm_regs
, sizeof env
->xmm_regs
);
883 fpu
.mxcsr
= env
->mxcsr
;
884 kvm_set_fpu(env
, &fpu
);
889 memset(sregs
.interrupt_bitmap
, 0, sizeof(sregs
.interrupt_bitmap
));
890 if (env
->interrupt_injected
>= 0) {
891 sregs
.interrupt_bitmap
[env
->interrupt_injected
/ 64] |=
892 (uint64_t)1 << (env
->interrupt_injected
% 64);
895 if ((env
->eflags
& VM_MASK
)) {
896 set_v8086_seg(&sregs
.cs
, &env
->segs
[R_CS
]);
897 set_v8086_seg(&sregs
.ds
, &env
->segs
[R_DS
]);
898 set_v8086_seg(&sregs
.es
, &env
->segs
[R_ES
]);
899 set_v8086_seg(&sregs
.fs
, &env
->segs
[R_FS
]);
900 set_v8086_seg(&sregs
.gs
, &env
->segs
[R_GS
]);
901 set_v8086_seg(&sregs
.ss
, &env
->segs
[R_SS
]);
903 set_seg(&sregs
.cs
, &env
->segs
[R_CS
]);
904 set_seg(&sregs
.ds
, &env
->segs
[R_DS
]);
905 set_seg(&sregs
.es
, &env
->segs
[R_ES
]);
906 set_seg(&sregs
.fs
, &env
->segs
[R_FS
]);
907 set_seg(&sregs
.gs
, &env
->segs
[R_GS
]);
908 set_seg(&sregs
.ss
, &env
->segs
[R_SS
]);
910 if (env
->cr
[0] & CR0_PE_MASK
) {
911 /* force ss cpl to cs cpl */
912 sregs
.ss
.selector
= (sregs
.ss
.selector
& ~3) |
913 (sregs
.cs
.selector
& 3);
914 sregs
.ss
.dpl
= sregs
.ss
.selector
& 3;
918 set_seg(&sregs
.tr
, &env
->tr
);
919 set_seg(&sregs
.ldt
, &env
->ldt
);
921 sregs
.idt
.limit
= env
->idt
.limit
;
922 sregs
.idt
.base
= env
->idt
.base
;
923 sregs
.gdt
.limit
= env
->gdt
.limit
;
924 sregs
.gdt
.base
= env
->gdt
.base
;
926 sregs
.cr0
= env
->cr
[0];
927 sregs
.cr2
= env
->cr
[2];
928 sregs
.cr3
= env
->cr
[3];
929 sregs
.cr4
= env
->cr
[4];
931 sregs
.cr8
= cpu_get_apic_tpr(env
->apic_state
);
932 sregs
.apic_base
= cpu_get_apic_base(env
->apic_state
);
934 sregs
.efer
= env
->efer
;
936 kvm_set_sregs(env
, &sregs
);
940 /* Remember to increase msrs size if you add new registers below */
941 kvm_msr_entry_set(&msrs
[n
++], MSR_IA32_SYSENTER_CS
, env
->sysenter_cs
);
942 kvm_msr_entry_set(&msrs
[n
++], MSR_IA32_SYSENTER_ESP
, env
->sysenter_esp
);
943 kvm_msr_entry_set(&msrs
[n
++], MSR_IA32_SYSENTER_EIP
, env
->sysenter_eip
);
944 if (kvm_has_msr_star
) {
945 kvm_msr_entry_set(&msrs
[n
++], MSR_STAR
, env
->star
);
947 if (kvm_has_vm_hsave_pa
) {
948 kvm_msr_entry_set(&msrs
[n
++], MSR_VM_HSAVE_PA
, env
->vm_hsave
);
951 if (lm_capable_kernel
) {
952 kvm_msr_entry_set(&msrs
[n
++], MSR_CSTAR
, env
->cstar
);
953 kvm_msr_entry_set(&msrs
[n
++], MSR_KERNELGSBASE
, env
->kernelgsbase
);
954 kvm_msr_entry_set(&msrs
[n
++], MSR_FMASK
, env
->fmask
);
955 kvm_msr_entry_set(&msrs
[n
++], MSR_LSTAR
, env
->lstar
);
958 if (level
== KVM_PUT_FULL_STATE
) {
960 * KVM is yet unable to synchronize TSC values of multiple VCPUs on
961 * writeback. Until this is fixed, we only write the offset to SMP
962 * guests after migration, desynchronizing the VCPUs, but avoiding
963 * huge jump-backs that would occur without any writeback at all.
965 if (smp_cpus
== 1 || env
->tsc
!= 0) {
966 kvm_msr_entry_set(&msrs
[n
++], MSR_IA32_TSC
, env
->tsc
);
968 kvm_msr_entry_set(&msrs
[n
++], MSR_KVM_SYSTEM_TIME
, env
->system_time_msr
);
969 kvm_msr_entry_set(&msrs
[n
++], MSR_KVM_WALL_CLOCK
, env
->wall_clock_msr
);
973 if (level
== KVM_PUT_RESET_STATE
) {
974 kvm_msr_entry_set(&msrs
[n
++], MSR_MCG_STATUS
, env
->mcg_status
);
975 } else if (level
== KVM_PUT_FULL_STATE
) {
976 kvm_msr_entry_set(&msrs
[n
++], MSR_MCG_STATUS
, env
->mcg_status
);
977 kvm_msr_entry_set(&msrs
[n
++], MSR_MCG_CTL
, env
->mcg_ctl
);
978 for (i
= 0; i
< (env
->mcg_cap
& 0xff); i
++) {
979 kvm_msr_entry_set(&msrs
[n
++], MSR_MC0_CTL
+ i
, env
->mce_banks
[i
]);
985 rc
= kvm_set_msrs(env
, msrs
, n
);
987 perror("kvm_set_msrs FAILED");
990 if (level
>= KVM_PUT_RESET_STATE
) {
991 kvm_arch_load_mpstate(env
);
994 if (level
== KVM_PUT_FULL_STATE
) {
995 if (env
->kvm_vcpu_update_vapic
) {
996 kvm_tpr_enable_vapic(env
);
1000 kvm_put_vcpu_events(env
, level
);
1001 kvm_put_debugregs(env
);
1004 kvm_guest_debug_workarounds(env
);
1007 void kvm_arch_save_regs(CPUState
*env
)
1009 struct kvm_regs regs
;
1011 struct kvm_sregs sregs
;
1012 struct kvm_msr_entry msrs
[100];
1014 uint32_t i
, n
, rc
, bit
;
1016 assert(kvm_cpu_is_stopped(env
) || env
->thread_id
== kvm_get_thread_id());
1018 kvm_get_regs(env
, ®s
);
1020 env
->regs
[R_EAX
] = regs
.rax
;
1021 env
->regs
[R_EBX
] = regs
.rbx
;
1022 env
->regs
[R_ECX
] = regs
.rcx
;
1023 env
->regs
[R_EDX
] = regs
.rdx
;
1024 env
->regs
[R_ESI
] = regs
.rsi
;
1025 env
->regs
[R_EDI
] = regs
.rdi
;
1026 env
->regs
[R_ESP
] = regs
.rsp
;
1027 env
->regs
[R_EBP
] = regs
.rbp
;
1028 #ifdef TARGET_X86_64
1029 env
->regs
[8] = regs
.r8
;
1030 env
->regs
[9] = regs
.r9
;
1031 env
->regs
[10] = regs
.r10
;
1032 env
->regs
[11] = regs
.r11
;
1033 env
->regs
[12] = regs
.r12
;
1034 env
->regs
[13] = regs
.r13
;
1035 env
->regs
[14] = regs
.r14
;
1036 env
->regs
[15] = regs
.r15
;
1039 env
->eflags
= regs
.rflags
;
1040 env
->eip
= regs
.rip
;
1042 #ifdef KVM_CAP_XSAVE
1043 if (kvm_check_extension(kvm_state
, KVM_CAP_XSAVE
)) {
1044 struct kvm_xsave
* xsave
;
1045 uint16_t cwd
, swd
, twd
, fop
;
1046 xsave
= qemu_memalign(4096, sizeof(struct kvm_xsave
));
1047 kvm_get_xsave(env
, xsave
);
1048 cwd
= (uint16_t)xsave
->region
[0];
1049 swd
= (uint16_t)(xsave
->region
[0] >> 16);
1050 twd
= (uint16_t)xsave
->region
[1];
1051 fop
= (uint16_t)(xsave
->region
[1] >> 16);
1052 env
->fpstt
= (swd
>> 11) & 7;
1055 for (i
= 0; i
< 8; ++i
) {
1056 env
->fptags
[i
] = !((twd
>> i
) & 1);
1058 env
->mxcsr
= xsave
->region
[XSAVE_MXCSR
];
1059 memcpy(env
->fpregs
, &xsave
->region
[XSAVE_ST_SPACE
],
1060 sizeof env
->fpregs
);
1061 memcpy(env
->xmm_regs
, &xsave
->region
[XSAVE_XMM_SPACE
],
1062 sizeof env
->xmm_regs
);
1063 env
->xstate_bv
= *(uint64_t *)&xsave
->region
[XSAVE_XSTATE_BV
];
1064 memcpy(env
->ymmh_regs
, &xsave
->region
[XSAVE_YMMH_SPACE
],
1065 sizeof env
->ymmh_regs
);
1066 if (kvm_check_extension(kvm_state
, KVM_CAP_XCRS
)) {
1067 struct kvm_xcrs xcrs
;
1069 kvm_get_xcrs(env
, &xcrs
);
1070 if (xcrs
.xcrs
[0].xcr
== 0) {
1071 env
->xcr0
= xcrs
.xcrs
[0].value
;
1077 kvm_get_fpu(env
, &fpu
);
1078 env
->fpstt
= (fpu
.fsw
>> 11) & 7;
1079 env
->fpus
= fpu
.fsw
;
1080 env
->fpuc
= fpu
.fcw
;
1081 for (i
= 0; i
< 8; ++i
) {
1082 env
->fptags
[i
] = !((fpu
.ftwx
>> i
) & 1);
1084 memcpy(env
->fpregs
, fpu
.fpr
, sizeof env
->fpregs
);
1085 memcpy(env
->xmm_regs
, fpu
.xmm
, sizeof env
->xmm_regs
);
1086 env
->mxcsr
= fpu
.mxcsr
;
1087 #ifdef KVM_CAP_XSAVE
1091 kvm_get_sregs(env
, &sregs
);
1093 /* There can only be one pending IRQ set in the bitmap at a time, so try
1094 to find it and save its number instead (-1 for none). */
1095 env
->interrupt_injected
= -1;
1096 for (i
= 0; i
< ARRAY_SIZE(sregs
.interrupt_bitmap
); i
++) {
1097 if (sregs
.interrupt_bitmap
[i
]) {
1098 bit
= ctz64(sregs
.interrupt_bitmap
[i
]);
1099 env
->interrupt_injected
= i
* 64 + bit
;
1104 get_seg(&env
->segs
[R_CS
], &sregs
.cs
);
1105 get_seg(&env
->segs
[R_DS
], &sregs
.ds
);
1106 get_seg(&env
->segs
[R_ES
], &sregs
.es
);
1107 get_seg(&env
->segs
[R_FS
], &sregs
.fs
);
1108 get_seg(&env
->segs
[R_GS
], &sregs
.gs
);
1109 get_seg(&env
->segs
[R_SS
], &sregs
.ss
);
1111 get_seg(&env
->tr
, &sregs
.tr
);
1112 get_seg(&env
->ldt
, &sregs
.ldt
);
1114 env
->idt
.limit
= sregs
.idt
.limit
;
1115 env
->idt
.base
= sregs
.idt
.base
;
1116 env
->gdt
.limit
= sregs
.gdt
.limit
;
1117 env
->gdt
.base
= sregs
.gdt
.base
;
1119 env
->cr
[0] = sregs
.cr0
;
1120 env
->cr
[2] = sregs
.cr2
;
1121 env
->cr
[3] = sregs
.cr3
;
1122 env
->cr
[4] = sregs
.cr4
;
1124 cpu_set_apic_base(env
->apic_state
, sregs
.apic_base
);
1126 env
->efer
= sregs
.efer
;
1127 //cpu_set_apic_tpr(env, sregs.cr8);
1129 #define HFLAG_COPY_MASK ~( \
1130 HF_CPL_MASK | HF_PE_MASK | HF_MP_MASK | HF_EM_MASK | \
1131 HF_TS_MASK | HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK | \
1132 HF_OSFXSR_MASK | HF_LMA_MASK | HF_CS32_MASK | \
1133 HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK)
1135 hflags
= (env
->segs
[R_CS
].flags
>> DESC_DPL_SHIFT
) & HF_CPL_MASK
;
1136 hflags
|= (env
->cr
[0] & CR0_PE_MASK
) << (HF_PE_SHIFT
- CR0_PE_SHIFT
);
1137 hflags
|= (env
->cr
[0] << (HF_MP_SHIFT
- CR0_MP_SHIFT
)) &
1138 (HF_MP_MASK
| HF_EM_MASK
| HF_TS_MASK
);
1139 hflags
|= (env
->eflags
& (HF_TF_MASK
| HF_VM_MASK
| HF_IOPL_MASK
));
1140 hflags
|= (env
->cr
[4] & CR4_OSFXSR_MASK
) <<
1141 (HF_OSFXSR_SHIFT
- CR4_OSFXSR_SHIFT
);
1143 if (env
->efer
& MSR_EFER_LMA
) {
1144 hflags
|= HF_LMA_MASK
;
1147 if ((hflags
& HF_LMA_MASK
) && (env
->segs
[R_CS
].flags
& DESC_L_MASK
)) {
1148 hflags
|= HF_CS32_MASK
| HF_SS32_MASK
| HF_CS64_MASK
;
1150 hflags
|= (env
->segs
[R_CS
].flags
& DESC_B_MASK
) >>
1151 (DESC_B_SHIFT
- HF_CS32_SHIFT
);
1152 hflags
|= (env
->segs
[R_SS
].flags
& DESC_B_MASK
) >>
1153 (DESC_B_SHIFT
- HF_SS32_SHIFT
);
1154 if (!(env
->cr
[0] & CR0_PE_MASK
) ||
1155 (env
->eflags
& VM_MASK
) ||
1156 !(hflags
& HF_CS32_MASK
)) {
1157 hflags
|= HF_ADDSEG_MASK
;
1159 hflags
|= ((env
->segs
[R_DS
].base
|
1160 env
->segs
[R_ES
].base
|
1161 env
->segs
[R_SS
].base
) != 0) <<
1165 env
->hflags
= (env
->hflags
& HFLAG_COPY_MASK
) | hflags
;
1169 /* Remember to increase msrs size if you add new registers below */
1170 msrs
[n
++].index
= MSR_IA32_SYSENTER_CS
;
1171 msrs
[n
++].index
= MSR_IA32_SYSENTER_ESP
;
1172 msrs
[n
++].index
= MSR_IA32_SYSENTER_EIP
;
1173 if (kvm_has_msr_star
) {
1174 msrs
[n
++].index
= MSR_STAR
;
1176 msrs
[n
++].index
= MSR_IA32_TSC
;
1177 if (kvm_has_vm_hsave_pa
)
1178 msrs
[n
++].index
= MSR_VM_HSAVE_PA
;
1179 #ifdef TARGET_X86_64
1180 if (lm_capable_kernel
) {
1181 msrs
[n
++].index
= MSR_CSTAR
;
1182 msrs
[n
++].index
= MSR_KERNELGSBASE
;
1183 msrs
[n
++].index
= MSR_FMASK
;
1184 msrs
[n
++].index
= MSR_LSTAR
;
1187 msrs
[n
++].index
= MSR_KVM_SYSTEM_TIME
;
1188 msrs
[n
++].index
= MSR_KVM_WALL_CLOCK
;
1192 msrs
[n
++].index
= MSR_MCG_STATUS
;
1193 msrs
[n
++].index
= MSR_MCG_CTL
;
1194 for (i
= 0; i
< (env
->mcg_cap
& 0xff) * 4; i
++)
1195 msrs
[n
++].index
= MSR_MC0_CTL
+ i
;
1199 rc
= kvm_get_msrs(env
, msrs
, n
);
1201 perror("kvm_get_msrs FAILED");
1203 n
= rc
; /* actual number of MSRs */
1204 for (i
=0 ; i
<n
; i
++) {
1205 if (get_msr_entry(&msrs
[i
], env
)) {
1210 kvm_arch_save_mpstate(env
);
1211 kvm_save_lapic(env
);
1212 kvm_get_vcpu_events(env
);
1213 kvm_get_debugregs(env
);
1216 static int _kvm_arch_init_vcpu(CPUState
*env
)
1218 kvm_arch_reset_vcpu(env
);
1221 if (((env
->cpuid_version
>> 8)&0xF) >= 6
1222 && (env
->cpuid_features
&(CPUID_MCE
|CPUID_MCA
)) == (CPUID_MCE
|CPUID_MCA
)
1223 && kvm_check_extension(kvm_state
, KVM_CAP_MCE
) > 0) {
1227 if (kvm_get_mce_cap_supported(kvm_context
, &mcg_cap
, &banks
)) {
1228 perror("kvm_get_mce_cap_supported FAILED");
1230 if (banks
> MCE_BANKS_DEF
)
1231 banks
= MCE_BANKS_DEF
;
1232 mcg_cap
&= MCE_CAP_DEF
;
1234 if (kvm_setup_mce(env
, &mcg_cap
)) {
1235 perror("kvm_setup_mce FAILED");
1237 env
->mcg_cap
= mcg_cap
;
1243 #ifdef KVM_EXIT_TPR_ACCESS
1244 kvm_enable_tpr_access_reporting(env
);
1246 kvm_reset_mpstate(env
);
1250 int kvm_arch_halt(CPUState
*env
)
1253 if (!((env
->interrupt_request
& CPU_INTERRUPT_HARD
) &&
1254 (env
->eflags
& IF_MASK
)) &&
1255 !(env
->interrupt_request
& CPU_INTERRUPT_NMI
)) {
1261 int kvm_arch_pre_run(CPUState
*env
, struct kvm_run
*run
)
1263 if (!kvm_irqchip_in_kernel()) {
1264 kvm_set_cr8(env
, cpu_get_apic_tpr(env
->apic_state
));
1269 int kvm_arch_has_work(CPUState
*env
)
1271 if (((env
->interrupt_request
& CPU_INTERRUPT_HARD
) &&
1272 (env
->eflags
& IF_MASK
)) ||
1273 (env
->interrupt_request
& CPU_INTERRUPT_NMI
)) {
1279 int kvm_arch_try_push_interrupts(void *opaque
)
1281 CPUState
*env
= cpu_single_env
;
1284 if (kvm_is_ready_for_interrupt_injection(env
) &&
1285 (env
->interrupt_request
& CPU_INTERRUPT_HARD
) &&
1286 (env
->eflags
& IF_MASK
)) {
1287 env
->interrupt_request
&= ~CPU_INTERRUPT_HARD
;
1288 irq
= cpu_get_pic_interrupt(env
);
1290 r
= kvm_inject_irq(env
, irq
);
1292 printf("cpu %d fail inject %x\n", env
->cpu_index
, irq
);
1297 return (env
->interrupt_request
& CPU_INTERRUPT_HARD
) != 0;
1300 #ifdef KVM_CAP_USER_NMI
1301 void kvm_arch_push_nmi(void *opaque
)
1303 CPUState
*env
= cpu_single_env
;
1306 if (likely(!(env
->interrupt_request
& CPU_INTERRUPT_NMI
))) {
1310 env
->interrupt_request
&= ~CPU_INTERRUPT_NMI
;
1311 r
= kvm_inject_nmi(env
);
1313 printf("cpu %d fail inject NMI\n", env
->cpu_index
);
1316 #endif /* KVM_CAP_USER_NMI */
1318 static int kvm_reset_msrs(CPUState
*env
)
1321 struct kvm_msrs info
;
1322 struct kvm_msr_entry entries
[100];
1325 struct kvm_msr_entry
*msrs
= msr_data
.entries
;
1329 if (!kvm_msr_list
) {
1333 for (n
= 0; n
< kvm_msr_list
->nmsrs
; n
++) {
1334 index
= kvm_msr_list
->indices
[n
];
1337 data
= 0x0007040600070406ULL
;
1342 kvm_msr_entry_set(&msrs
[n
], kvm_msr_list
->indices
[n
], data
);
1345 msr_data
.info
.nmsrs
= n
;
1347 return kvm_vcpu_ioctl(env
, KVM_SET_MSRS
, &msr_data
);
1351 void kvm_arch_cpu_reset(CPUState
*env
)
1353 kvm_reset_msrs(env
);
1354 kvm_arch_reset_vcpu(env
);
1355 kvm_reset_mpstate(env
);
1358 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
1359 void kvm_arch_do_ioperm(void *_data
)
1361 struct ioperm_data
*data
= _data
;
1362 ioperm(data
->start_port
, data
->num
, data
->turn_on
);
1367 * Setup x86 specific IRQ routing
1369 int kvm_arch_init_irq_routing(void)
1373 if (kvm_irqchip
&& kvm_has_gsi_routing(kvm_context
)) {
1374 kvm_clear_gsi_routes(kvm_context
);
1375 for (i
= 0; i
< 8; ++i
) {
1379 r
= kvm_add_irq_route(kvm_context
, i
, KVM_IRQCHIP_PIC_MASTER
, i
);
1384 for (i
= 8; i
< 16; ++i
) {
1385 r
= kvm_add_irq_route(kvm_context
, i
, KVM_IRQCHIP_PIC_SLAVE
, i
- 8);
1390 for (i
= 0; i
< 24; ++i
) {
1392 r
= kvm_add_irq_route(kvm_context
, i
, KVM_IRQCHIP_IOAPIC
, 2);
1393 } else if (i
!= 2) {
1394 r
= kvm_add_irq_route(kvm_context
, i
, KVM_IRQCHIP_IOAPIC
, i
);
1400 kvm_commit_irq_routes(kvm_context
);
1405 void kvm_arch_process_irqchip_events(CPUState
*env
)
1407 if (env
->interrupt_request
& CPU_INTERRUPT_INIT
) {
1408 kvm_cpu_synchronize_state(env
);
1411 if (env
->interrupt_request
& CPU_INTERRUPT_SIPI
) {
1412 kvm_cpu_synchronize_state(env
);