3 #include "config-host.h"
6 #define KVM_ALLOWED_DEFAULT 1
8 #define KVM_ALLOWED_DEFAULT 0
11 int kvm_allowed
= KVM_ALLOWED_DEFAULT
;
21 #include <sys/utsname.h>
23 #define MSR_IA32_TSC 0x10
25 extern void perror(const char *s
);
27 kvm_context_t kvm_context
;
28 static struct kvm_msr_list
*kvm_msr_list
;
29 static int kvm_has_msr_star
;
33 pthread_mutex_t qemu_mutex
= PTHREAD_MUTEX_INITIALIZER
;
34 static __thread CPUState
*vcpu_env
;
36 static sigset_t io_sigset
, io_negsigset
;
40 #define SIG_IPI (SIGRTMIN+4)
49 static void sig_ipi_handler(int n
)
53 void kvm_update_interrupt_request(CPUState
*env
)
55 if (env
&& env
!= vcpu_env
) {
56 if (vcpu_info
[env
->cpu_index
].signalled
)
58 vcpu_info
[env
->cpu_index
].signalled
= 1;
59 if (vcpu_info
[env
->cpu_index
].thread
)
60 pthread_kill(vcpu_info
[env
->cpu_index
].thread
, SIG_IPI
);
64 void kvm_update_after_sipi(CPUState
*env
)
66 vcpu_info
[env
->cpu_index
].sipi_needed
= 1;
67 kvm_update_interrupt_request(env
);
70 * the qemu bios waits using a busy loop that's much too short for
71 * kvm. add a wait after the first sipi.
74 static int first_sipi
= 1;
83 void kvm_apic_init(CPUState
*env
)
85 vcpu_info
[env
->cpu_index
].init
= 1;
86 kvm_update_interrupt_request(env
);
89 static void set_msr_entry(struct kvm_msr_entry
*entry
, uint32_t index
,
96 /* returns 0 on success, non-0 on failure */
97 static int get_msr_entry(struct kvm_msr_entry
*entry
, CPUState
*env
)
99 switch (entry
->index
) {
100 case MSR_IA32_SYSENTER_CS
:
101 env
->sysenter_cs
= entry
->data
;
103 case MSR_IA32_SYSENTER_ESP
:
104 env
->sysenter_esp
= entry
->data
;
106 case MSR_IA32_SYSENTER_EIP
:
107 env
->sysenter_eip
= entry
->data
;
110 env
->star
= entry
->data
;
114 env
->cstar
= entry
->data
;
116 case MSR_KERNELGSBASE
:
117 env
->kernelgsbase
= entry
->data
;
120 env
->fmask
= entry
->data
;
123 env
->lstar
= entry
->data
;
127 env
->tsc
= entry
->data
;
130 printf("Warning unknown msr index 0x%x\n", entry
->index
);
142 static void set_v8086_seg(struct kvm_segment
*lhs
, const SegmentCache
*rhs
)
144 lhs
->selector
= rhs
->selector
;
145 lhs
->base
= rhs
->base
;
146 lhs
->limit
= rhs
->limit
;
158 static void set_seg(struct kvm_segment
*lhs
, const SegmentCache
*rhs
)
160 unsigned flags
= rhs
->flags
;
161 lhs
->selector
= rhs
->selector
;
162 lhs
->base
= rhs
->base
;
163 lhs
->limit
= rhs
->limit
;
164 lhs
->type
= (flags
>> DESC_TYPE_SHIFT
) & 15;
165 lhs
->present
= (flags
& DESC_P_MASK
) != 0;
166 lhs
->dpl
= rhs
->selector
& 3;
167 lhs
->db
= (flags
>> DESC_B_SHIFT
) & 1;
168 lhs
->s
= (flags
& DESC_S_MASK
) != 0;
169 lhs
->l
= (flags
>> DESC_L_SHIFT
) & 1;
170 lhs
->g
= (flags
& DESC_G_MASK
) != 0;
171 lhs
->avl
= (flags
& DESC_AVL_MASK
) != 0;
175 static void get_seg(SegmentCache
*lhs
, const struct kvm_segment
*rhs
)
177 lhs
->selector
= rhs
->selector
;
178 lhs
->base
= rhs
->base
;
179 lhs
->limit
= rhs
->limit
;
181 (rhs
->type
<< DESC_TYPE_SHIFT
)
182 | (rhs
->present
* DESC_P_MASK
)
183 | (rhs
->dpl
<< DESC_DPL_SHIFT
)
184 | (rhs
->db
<< DESC_B_SHIFT
)
185 | (rhs
->s
* DESC_S_MASK
)
186 | (rhs
->l
<< DESC_L_SHIFT
)
187 | (rhs
->g
* DESC_G_MASK
)
188 | (rhs
->avl
* DESC_AVL_MASK
);
191 /* the reset values of qemu are not compatible to SVM
192 * this function is used to fix the segment descriptor values */
193 static void fix_realmode_dataseg(struct kvm_segment
*seg
)
200 static void load_regs(CPUState
*env
)
202 struct kvm_regs regs
;
204 struct kvm_sregs sregs
;
205 struct kvm_msr_entry msrs
[MSR_COUNT
];
208 regs
.rax
= env
->regs
[R_EAX
];
209 regs
.rbx
= env
->regs
[R_EBX
];
210 regs
.rcx
= env
->regs
[R_ECX
];
211 regs
.rdx
= env
->regs
[R_EDX
];
212 regs
.rsi
= env
->regs
[R_ESI
];
213 regs
.rdi
= env
->regs
[R_EDI
];
214 regs
.rsp
= env
->regs
[R_ESP
];
215 regs
.rbp
= env
->regs
[R_EBP
];
217 regs
.r8
= env
->regs
[8];
218 regs
.r9
= env
->regs
[9];
219 regs
.r10
= env
->regs
[10];
220 regs
.r11
= env
->regs
[11];
221 regs
.r12
= env
->regs
[12];
222 regs
.r13
= env
->regs
[13];
223 regs
.r14
= env
->regs
[14];
224 regs
.r15
= env
->regs
[15];
227 regs
.rflags
= env
->eflags
;
230 kvm_set_regs(kvm_context
, env
->cpu_index
, ®s
);
232 memset(&fpu
, 0, sizeof fpu
);
233 fpu
.fsw
= env
->fpus
& ~(7 << 11);
234 fpu
.fsw
|= (env
->fpstt
& 7) << 11;
236 for (i
= 0; i
< 8; ++i
)
237 fpu
.ftwx
|= (!env
->fptags
[i
]) << i
;
238 memcpy(fpu
.fpr
, env
->fpregs
, sizeof env
->fpregs
);
239 memcpy(fpu
.xmm
, env
->xmm_regs
, sizeof env
->xmm_regs
);
240 fpu
.mxcsr
= env
->mxcsr
;
241 kvm_set_fpu(kvm_context
, env
->cpu_index
, &fpu
);
243 memcpy(sregs
.interrupt_bitmap
, env
->kvm_interrupt_bitmap
, sizeof(sregs
.interrupt_bitmap
));
245 if ((env
->eflags
& VM_MASK
)) {
246 set_v8086_seg(&sregs
.cs
, &env
->segs
[R_CS
]);
247 set_v8086_seg(&sregs
.ds
, &env
->segs
[R_DS
]);
248 set_v8086_seg(&sregs
.es
, &env
->segs
[R_ES
]);
249 set_v8086_seg(&sregs
.fs
, &env
->segs
[R_FS
]);
250 set_v8086_seg(&sregs
.gs
, &env
->segs
[R_GS
]);
251 set_v8086_seg(&sregs
.ss
, &env
->segs
[R_SS
]);
253 set_seg(&sregs
.cs
, &env
->segs
[R_CS
]);
254 set_seg(&sregs
.ds
, &env
->segs
[R_DS
]);
255 set_seg(&sregs
.es
, &env
->segs
[R_ES
]);
256 set_seg(&sregs
.fs
, &env
->segs
[R_FS
]);
257 set_seg(&sregs
.gs
, &env
->segs
[R_GS
]);
258 set_seg(&sregs
.ss
, &env
->segs
[R_SS
]);
260 if (env
->cr
[0] & CR0_PE_MASK
) {
261 /* force ss cpl to cs cpl */
262 sregs
.ss
.selector
= (sregs
.ss
.selector
& ~3) |
263 (sregs
.cs
.selector
& 3);
264 sregs
.ss
.dpl
= sregs
.ss
.selector
& 3;
267 if (!(env
->cr
[0] & CR0_PG_MASK
)) {
268 fix_realmode_dataseg(&sregs
.cs
);
269 fix_realmode_dataseg(&sregs
.ds
);
270 fix_realmode_dataseg(&sregs
.es
);
271 fix_realmode_dataseg(&sregs
.fs
);
272 fix_realmode_dataseg(&sregs
.gs
);
273 fix_realmode_dataseg(&sregs
.ss
);
277 set_seg(&sregs
.tr
, &env
->tr
);
278 set_seg(&sregs
.ldt
, &env
->ldt
);
280 sregs
.idt
.limit
= env
->idt
.limit
;
281 sregs
.idt
.base
= env
->idt
.base
;
282 sregs
.gdt
.limit
= env
->gdt
.limit
;
283 sregs
.gdt
.base
= env
->gdt
.base
;
285 sregs
.cr0
= env
->cr
[0];
286 sregs
.cr2
= env
->cr
[2];
287 sregs
.cr3
= env
->cr
[3];
288 sregs
.cr4
= env
->cr
[4];
290 sregs
.apic_base
= cpu_get_apic_base(env
);
291 sregs
.efer
= env
->efer
;
292 sregs
.cr8
= cpu_get_apic_tpr(env
);
294 kvm_set_sregs(kvm_context
, env
->cpu_index
, &sregs
);
298 set_msr_entry(&msrs
[n
++], MSR_IA32_SYSENTER_CS
, env
->sysenter_cs
);
299 set_msr_entry(&msrs
[n
++], MSR_IA32_SYSENTER_ESP
, env
->sysenter_esp
);
300 set_msr_entry(&msrs
[n
++], MSR_IA32_SYSENTER_EIP
, env
->sysenter_eip
);
301 if (kvm_has_msr_star
)
302 set_msr_entry(&msrs
[n
++], MSR_STAR
, env
->star
);
303 set_msr_entry(&msrs
[n
++], MSR_IA32_TSC
, env
->tsc
);
305 set_msr_entry(&msrs
[n
++], MSR_CSTAR
, env
->cstar
);
306 set_msr_entry(&msrs
[n
++], MSR_KERNELGSBASE
, env
->kernelgsbase
);
307 set_msr_entry(&msrs
[n
++], MSR_FMASK
, env
->fmask
);
308 set_msr_entry(&msrs
[n
++], MSR_LSTAR
, env
->lstar
);
311 rc
= kvm_set_msrs(kvm_context
, env
->cpu_index
, msrs
, n
);
313 perror("kvm_set_msrs FAILED");
317 static void save_regs(CPUState
*env
)
319 struct kvm_regs regs
;
321 struct kvm_sregs sregs
;
322 struct kvm_msr_entry msrs
[MSR_COUNT
];
326 kvm_get_regs(kvm_context
, env
->cpu_index
, ®s
);
328 env
->regs
[R_EAX
] = regs
.rax
;
329 env
->regs
[R_EBX
] = regs
.rbx
;
330 env
->regs
[R_ECX
] = regs
.rcx
;
331 env
->regs
[R_EDX
] = regs
.rdx
;
332 env
->regs
[R_ESI
] = regs
.rsi
;
333 env
->regs
[R_EDI
] = regs
.rdi
;
334 env
->regs
[R_ESP
] = regs
.rsp
;
335 env
->regs
[R_EBP
] = regs
.rbp
;
337 env
->regs
[8] = regs
.r8
;
338 env
->regs
[9] = regs
.r9
;
339 env
->regs
[10] = regs
.r10
;
340 env
->regs
[11] = regs
.r11
;
341 env
->regs
[12] = regs
.r12
;
342 env
->regs
[13] = regs
.r13
;
343 env
->regs
[14] = regs
.r14
;
344 env
->regs
[15] = regs
.r15
;
347 env
->eflags
= regs
.rflags
;
350 kvm_get_fpu(kvm_context
, env
->cpu_index
, &fpu
);
351 env
->fpstt
= (fpu
.fsw
>> 11) & 7;
354 for (i
= 0; i
< 8; ++i
)
355 env
->fptags
[i
] = !((fpu
.ftwx
>> i
) & 1);
356 memcpy(env
->fpregs
, fpu
.fpr
, sizeof env
->fpregs
);
357 memcpy(env
->xmm_regs
, fpu
.xmm
, sizeof env
->xmm_regs
);
358 env
->mxcsr
= fpu
.mxcsr
;
360 kvm_get_sregs(kvm_context
, env
->cpu_index
, &sregs
);
362 memcpy(env
->kvm_interrupt_bitmap
, sregs
.interrupt_bitmap
, sizeof(env
->kvm_interrupt_bitmap
));
364 get_seg(&env
->segs
[R_CS
], &sregs
.cs
);
365 get_seg(&env
->segs
[R_DS
], &sregs
.ds
);
366 get_seg(&env
->segs
[R_ES
], &sregs
.es
);
367 get_seg(&env
->segs
[R_FS
], &sregs
.fs
);
368 get_seg(&env
->segs
[R_GS
], &sregs
.gs
);
369 get_seg(&env
->segs
[R_SS
], &sregs
.ss
);
371 get_seg(&env
->tr
, &sregs
.tr
);
372 get_seg(&env
->ldt
, &sregs
.ldt
);
374 env
->idt
.limit
= sregs
.idt
.limit
;
375 env
->idt
.base
= sregs
.idt
.base
;
376 env
->gdt
.limit
= sregs
.gdt
.limit
;
377 env
->gdt
.base
= sregs
.gdt
.base
;
379 env
->cr
[0] = sregs
.cr0
;
380 env
->cr
[2] = sregs
.cr2
;
381 env
->cr
[3] = sregs
.cr3
;
382 env
->cr
[4] = sregs
.cr4
;
384 cpu_set_apic_base(env
, sregs
.apic_base
);
386 env
->efer
= sregs
.efer
;
387 //cpu_set_apic_tpr(env, sregs.cr8);
389 #define HFLAG_COPY_MASK ~( \
390 HF_CPL_MASK | HF_PE_MASK | HF_MP_MASK | HF_EM_MASK | \
391 HF_TS_MASK | HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK | \
392 HF_OSFXSR_MASK | HF_LMA_MASK | HF_CS32_MASK | \
393 HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK)
397 hflags
= (env
->segs
[R_CS
].flags
>> DESC_DPL_SHIFT
) & HF_CPL_MASK
;
398 hflags
|= (env
->cr
[0] & CR0_PE_MASK
) << (HF_PE_SHIFT
- CR0_PE_SHIFT
);
399 hflags
|= (env
->cr
[0] << (HF_MP_SHIFT
- CR0_MP_SHIFT
)) &
400 (HF_MP_MASK
| HF_EM_MASK
| HF_TS_MASK
);
401 hflags
|= (env
->eflags
& (HF_TF_MASK
| HF_VM_MASK
| HF_IOPL_MASK
));
402 hflags
|= (env
->cr
[4] & CR4_OSFXSR_MASK
) <<
403 (HF_OSFXSR_SHIFT
- CR4_OSFXSR_SHIFT
);
405 if (env
->efer
& MSR_EFER_LMA
) {
406 hflags
|= HF_LMA_MASK
;
409 if ((hflags
& HF_LMA_MASK
) && (env
->segs
[R_CS
].flags
& DESC_L_MASK
)) {
410 hflags
|= HF_CS32_MASK
| HF_SS32_MASK
| HF_CS64_MASK
;
412 hflags
|= (env
->segs
[R_CS
].flags
& DESC_B_MASK
) >>
413 (DESC_B_SHIFT
- HF_CS32_SHIFT
);
414 hflags
|= (env
->segs
[R_SS
].flags
& DESC_B_MASK
) >>
415 (DESC_B_SHIFT
- HF_SS32_SHIFT
);
416 if (!(env
->cr
[0] & CR0_PE_MASK
) ||
417 (env
->eflags
& VM_MASK
) ||
418 !(hflags
& HF_CS32_MASK
)) {
419 hflags
|= HF_ADDSEG_MASK
;
421 hflags
|= ((env
->segs
[R_DS
].base
|
422 env
->segs
[R_ES
].base
|
423 env
->segs
[R_SS
].base
) != 0) <<
427 env
->hflags
= (env
->hflags
& HFLAG_COPY_MASK
) | hflags
;
428 env
->cc_src
= env
->eflags
& (CC_O
| CC_S
| CC_Z
| CC_A
| CC_P
| CC_C
);
429 env
->df
= 1 - (2 * ((env
->eflags
>> 10) & 1));
430 env
->cc_op
= CC_OP_EFLAGS
;
431 env
->eflags
&= ~(DF_MASK
| CC_O
| CC_S
| CC_Z
| CC_A
| CC_P
| CC_C
);
435 msrs
[n
++].index
= MSR_IA32_SYSENTER_CS
;
436 msrs
[n
++].index
= MSR_IA32_SYSENTER_ESP
;
437 msrs
[n
++].index
= MSR_IA32_SYSENTER_EIP
;
438 if (kvm_has_msr_star
)
439 msrs
[n
++].index
= MSR_STAR
;
440 msrs
[n
++].index
= MSR_IA32_TSC
;
442 msrs
[n
++].index
= MSR_CSTAR
;
443 msrs
[n
++].index
= MSR_KERNELGSBASE
;
444 msrs
[n
++].index
= MSR_FMASK
;
445 msrs
[n
++].index
= MSR_LSTAR
;
447 rc
= kvm_get_msrs(kvm_context
, env
->cpu_index
, msrs
, n
);
449 perror("kvm_get_msrs FAILED");
452 n
= rc
; /* actual number of MSRs */
453 for (i
=0 ; i
<n
; i
++) {
454 if (get_msr_entry(&msrs
[i
], env
))
463 static int try_push_interrupts(void *opaque
)
465 CPUState
*env
= cpu_single_env
;
468 if (env
->ready_for_interrupt_injection
&&
469 (env
->interrupt_request
& CPU_INTERRUPT_HARD
) &&
470 (env
->eflags
& IF_MASK
)) {
471 env
->interrupt_request
&= ~CPU_INTERRUPT_HARD
;
472 irq
= cpu_get_pic_interrupt(env
);
474 r
= kvm_inject_irq(kvm_context
, env
->cpu_index
, irq
);
476 printf("cpu %d fail inject %x\n", env
->cpu_index
, irq
);
480 return (env
->interrupt_request
& CPU_INTERRUPT_HARD
) != 0;
483 static void post_kvm_run(void *opaque
, int vcpu
)
485 CPUState
*env
= vcpu_env
;
487 pthread_mutex_lock(&qemu_mutex
);
488 cpu_single_env
= env
;
489 env
->eflags
= kvm_get_interrupt_flag(kvm_context
, vcpu
)
490 ? env
->eflags
| IF_MASK
: env
->eflags
& ~IF_MASK
;
491 env
->ready_for_interrupt_injection
492 = kvm_is_ready_for_interrupt_injection(kvm_context
, vcpu
);
493 //cpu_set_apic_tpr(env, kvm_run->cr8);
494 cpu_set_apic_base(env
, kvm_get_apic_base(kvm_context
, vcpu
));
497 static int pre_kvm_run(void *opaque
, int vcpu
)
499 CPUState
*env
= cpu_single_env
;
501 if (env
->cpu_index
== 0 && wait_hack
) {
506 pthread_mutex_unlock(&qemu_mutex
);
507 for (i
= 0; i
< 10; ++i
)
509 pthread_mutex_lock(&qemu_mutex
);
512 kvm_set_cr8(kvm_context
, vcpu
, cpu_get_apic_tpr(env
));
513 if (env
->interrupt_request
& CPU_INTERRUPT_EXIT
)
515 pthread_mutex_unlock(&qemu_mutex
);
519 void kvm_load_registers(CPUState
*env
)
525 void kvm_save_registers(CPUState
*env
)
531 int kvm_cpu_exec(CPUState
*env
)
535 r
= kvm_run(kvm_context
, env
->cpu_index
);
537 printf("kvm_run returned %d\n", r
);
544 extern int vm_running
;
546 static int has_work(CPUState
*env
)
550 if (!(env
->hflags
& HF_HALTED_MASK
))
552 if (env
->interrupt_request
& (CPU_INTERRUPT_HARD
| CPU_INTERRUPT_EXIT
))
557 static int kvm_eat_signal(CPUState
*env
, int timeout
)
564 ts
.tv_sec
= timeout
/ 1000;
565 ts
.tv_nsec
= (timeout
% 1000) * 1000000;
566 r
= sigtimedwait(&io_sigset
, &siginfo
, &ts
);
567 if (r
== -1 && (errno
== EAGAIN
|| errno
== EINTR
) && !timeout
)
570 pthread_mutex_lock(&qemu_mutex
);
571 cpu_single_env
= vcpu_env
;
572 if (r
== -1 && !(errno
== EAGAIN
|| errno
== EINTR
)) {
573 printf("sigtimedwait: %s\n", strerror(e
));
577 sigaction(siginfo
.si_signo
, NULL
, &sa
);
578 sa
.sa_handler(siginfo
.si_signo
);
581 pthread_mutex_unlock(&qemu_mutex
);
587 static int kvm_eat_signals(CPUState
*env
, int timeout
)
591 while (kvm_eat_signal(env
, 0))
594 r
= kvm_eat_signal(env
, timeout
);
596 while (kvm_eat_signal(env
, 0))
600 * we call select() even if no signal was received, to account for
601 * for which there is no signal handler installed.
603 pthread_mutex_lock(&qemu_mutex
);
604 cpu_single_env
= vcpu_env
;
606 pthread_mutex_unlock(&qemu_mutex
);
609 static void kvm_main_loop_wait(CPUState
*env
, int timeout
)
611 if (vcpu_info
[env
->cpu_index
].signalled
&& timeout
)
613 pthread_mutex_unlock(&qemu_mutex
);
614 if (env
->cpu_index
== 0)
615 kvm_eat_signals(env
, timeout
);
622 sigaddset(&set
, SIG_IPI
);
625 pthread_mutex_lock(&qemu_mutex
);
626 cpu_single_env
= env
;
628 vcpu_info
[env
->cpu_index
].signalled
= 0;
631 static void update_regs_for_sipi(CPUState
*env
)
633 SegmentCache cs
= env
->segs
[R_CS
];
636 env
->segs
[R_CS
] = cs
;
639 vcpu_info
[env
->cpu_index
].sipi_needed
= 0;
640 vcpu_info
[env
->cpu_index
].init
= 0;
643 static void update_regs_for_init(CPUState
*env
)
649 static void setup_kernel_sigmask(CPUState
*env
)
653 sigprocmask(SIG_BLOCK
, NULL
, &set
);
654 sigdelset(&set
, SIG_IPI
);
655 if (env
->cpu_index
== 0)
656 sigandset(&set
, &set
, &io_negsigset
);
658 kvm_set_signal_mask(kvm_context
, env
->cpu_index
, &set
);
661 static int kvm_main_loop_cpu(CPUState
*env
)
663 struct vcpu_info
*info
= &vcpu_info
[env
->cpu_index
];
665 setup_kernel_sigmask(env
);
666 pthread_mutex_lock(&qemu_mutex
);
667 cpu_single_env
= env
;
669 while (!has_work(env
))
670 kvm_main_loop_wait(env
, 10);
671 if (env
->interrupt_request
& CPU_INTERRUPT_HARD
)
672 env
->hflags
&= ~HF_HALTED_MASK
;
673 if (info
->sipi_needed
)
674 update_regs_for_sipi(env
);
676 update_regs_for_init(env
);
677 if (!(env
->hflags
& HF_HALTED_MASK
) && !info
->init
)
679 env
->interrupt_request
&= ~CPU_INTERRUPT_EXIT
;
680 kvm_main_loop_wait(env
, 0);
681 if (qemu_shutdown_requested())
683 else if (qemu_powerdown_requested())
684 qemu_system_powerdown();
685 else if (qemu_reset_requested()) {
686 env
->interrupt_request
= 0;
691 pthread_mutex_unlock(&qemu_mutex
);
695 static void *ap_main_loop(void *_env
)
697 CPUState
*env
= _env
;
701 sigfillset(&signals
);
702 //sigdelset(&signals, SIG_IPI);
703 sigprocmask(SIG_BLOCK
, &signals
, NULL
);
704 kvm_create_vcpu(kvm_context
, env
->cpu_index
);
705 kvm_qemu_init_env(env
);
706 kvm_main_loop_cpu(env
);
710 static void kvm_add_signal(int signum
)
712 sigaddset(&io_sigset
, signum
);
713 sigdelset(&io_negsigset
, signum
);
714 sigprocmask(SIG_BLOCK
, &io_sigset
, NULL
);
717 int kvm_main_loop(void)
719 CPUState
*env
= first_cpu
->next_cpu
;
722 sigemptyset(&io_sigset
);
723 sigfillset(&io_negsigset
);
724 kvm_add_signal(SIGIO
);
725 kvm_add_signal(SIGALRM
);
726 kvm_add_signal(SIGUSR2
);
727 kvm_add_signal(SIG_IPI
);
729 vcpu_env
= first_cpu
;
730 signal(SIG_IPI
, sig_ipi_handler
);
731 for (i
= 1; i
< smp_cpus
; ++i
) {
732 pthread_create(&vcpu_info
[i
].thread
, NULL
, ap_main_loop
, env
);
735 vcpu_info
[0].thread
= pthread_self();
736 return kvm_main_loop_cpu(first_cpu
);
739 static int kvm_debug(void *opaque
, int vcpu
)
741 CPUState
*env
= cpu_single_env
;
743 env
->exception_index
= EXCP_DEBUG
;
747 static int kvm_inb(void *opaque
, uint16_t addr
, uint8_t *data
)
749 *data
= cpu_inb(0, addr
);
753 static int kvm_inw(void *opaque
, uint16_t addr
, uint16_t *data
)
755 *data
= cpu_inw(0, addr
);
759 static int kvm_inl(void *opaque
, uint16_t addr
, uint32_t *data
)
761 *data
= cpu_inl(0, addr
);
765 #define PM_IO_BASE 0xb000
767 static int kvm_outb(void *opaque
, uint16_t addr
, uint8_t data
)
772 cpu_outb(0, 0xb3, 0);
779 x
= cpu_inw(0, PM_IO_BASE
+ 4);
781 cpu_outw(0, PM_IO_BASE
+ 4, x
);
788 x
= cpu_inw(0, PM_IO_BASE
+ 4);
790 cpu_outw(0, PM_IO_BASE
+ 4, x
);
798 cpu_outb(0, addr
, data
);
802 static int kvm_outw(void *opaque
, uint16_t addr
, uint16_t data
)
804 cpu_outw(0, addr
, data
);
808 static int kvm_outl(void *opaque
, uint16_t addr
, uint32_t data
)
810 cpu_outl(0, addr
, data
);
814 static int kvm_readb(void *opaque
, uint64_t addr
, uint8_t *data
)
816 *data
= ldub_phys(addr
);
820 static int kvm_readw(void *opaque
, uint64_t addr
, uint16_t *data
)
822 *data
= lduw_phys(addr
);
826 static int kvm_readl(void *opaque
, uint64_t addr
, uint32_t *data
)
828 *data
= ldl_phys(addr
);
832 static int kvm_readq(void *opaque
, uint64_t addr
, uint64_t *data
)
834 *data
= ldq_phys(addr
);
838 static int kvm_writeb(void *opaque
, uint64_t addr
, uint8_t data
)
840 stb_phys(addr
, data
);
844 static int kvm_writew(void *opaque
, uint64_t addr
, uint16_t data
)
846 stw_phys(addr
, data
);
850 static int kvm_writel(void *opaque
, uint64_t addr
, uint32_t data
)
852 stl_phys(addr
, data
);
856 static int kvm_writeq(void *opaque
, uint64_t addr
, uint64_t data
)
858 stq_phys(addr
, data
);
862 static int kvm_io_window(void *opaque
)
868 static int kvm_halt(void *opaque
, int vcpu
)
870 CPUState
*env
= cpu_single_env
;
872 if (!((env
->interrupt_request
& CPU_INTERRUPT_HARD
) &&
873 (env
->eflags
& IF_MASK
))) {
874 env
->hflags
|= HF_HALTED_MASK
;
875 env
->exception_index
= EXCP_HLT
;
881 static int kvm_shutdown(void *opaque
, int vcpu
)
883 qemu_system_reset_request();
887 static struct kvm_callbacks qemu_kvm_ops
= {
899 .writeb
= kvm_writeb
,
900 .writew
= kvm_writew
,
901 .writel
= kvm_writel
,
902 .writeq
= kvm_writeq
,
904 .shutdown
= kvm_shutdown
,
905 .io_window
= kvm_io_window
,
906 .try_push_interrupts
= try_push_interrupts
,
907 .post_kvm_run
= post_kvm_run
,
908 .pre_kvm_run
= pre_kvm_run
,
913 /* Try to initialize kvm */
914 kvm_context
= kvm_init(&qemu_kvm_ops
, cpu_single_env
);
922 int kvm_qemu_create_context(void)
926 if (kvm_create(kvm_context
, phys_ram_size
, (void**)&phys_ram_base
) < 0) {
930 kvm_msr_list
= kvm_get_msr_list(kvm_context
);
935 for (i
= 0; i
< kvm_msr_list
->nmsrs
; ++i
)
936 if (kvm_msr_list
->indices
[i
] == MSR_STAR
)
937 kvm_has_msr_star
= 1;
941 void kvm_qemu_destroy(void)
943 kvm_finalize(kvm_context
);
946 static void host_cpuid(uint32_t function
, uint32_t *eax
, uint32_t *ebx
,
947 uint32_t *ecx
, uint32_t *edx
)
954 "sub $128, %%rsp \n\t" /* skip red zone */
955 "push %0; push %%rsi \n\t"
956 "push %%rax; push %%rbx; push %%rcx; push %%rdx \n\t"
957 "mov 8*5(%%rsp), %%rsi \n\t"
958 "mov (%%rsi), %%eax \n\t"
960 "mov %%eax, (%%rsi) \n\t"
961 "mov %%ebx, 4(%%rsi) \n\t"
962 "mov %%ecx, 8(%%rsi) \n\t"
963 "mov %%edx, 12(%%rsi) \n\t"
964 "pop %%rdx; pop %%rcx; pop %%rbx; pop %%rax \n\t"
965 "pop %%rsi; pop %0 \n\t"
968 "push %0; push %%esi \n\t"
969 "push %%eax; push %%ebx; push %%ecx; push %%edx \n\t"
970 "mov 4*5(%%esp), %%esi \n\t"
971 "mov (%%esi), %%eax \n\t"
973 "mov %%eax, (%%esi) \n\t"
974 "mov %%ebx, 4(%%esi) \n\t"
975 "mov %%ecx, 8(%%esi) \n\t"
976 "mov %%edx, 12(%%esi) \n\t"
977 "pop %%edx; pop %%ecx; pop %%ebx; pop %%eax \n\t"
978 "pop %%esi; pop %0 \n\t"
980 : : "rm"(vec
) : "memory");
991 static void do_cpuid_ent(struct kvm_cpuid_entry
*e
, uint32_t function
,
994 env
->regs
[R_EAX
] = function
;
995 qemu_kvm_cpuid_on_env(env
);
996 e
->function
= function
;
997 e
->eax
= env
->regs
[R_EAX
];
998 e
->ebx
= env
->regs
[R_EBX
];
999 e
->ecx
= env
->regs
[R_ECX
];
1000 e
->edx
= env
->regs
[R_EDX
];
1001 if (function
== 0x80000001) {
1002 uint32_t h_eax
, h_edx
;
1003 struct utsname utsname
;
1004 int lm_capable_kernel
;
1006 host_cpuid(function
, &h_eax
, NULL
, NULL
, &h_edx
);
1008 lm_capable_kernel
= strcmp(utsname
.machine
, "x86_64") == 0;
1011 if ((h_edx
& 0x20000000) == 0 || !lm_capable_kernel
)
1012 e
->edx
&= ~0x20000000u
;
1014 if ((h_edx
& 0x00000800) == 0)
1015 e
->edx
&= ~0x00000800u
;
1017 if ((h_edx
& 0x00100000) == 0)
1018 e
->edx
&= ~0x00100000u
;
1020 // sysenter isn't supported on compatibility mode on AMD. and syscall
1021 // isn't supported in compatibility mode on Intel. so advertise the
1022 // actuall cpu, and say goodbye to migration between different vendors
1023 // is you use compatibility mode.
1024 if (function
== 0) {
1027 host_cpuid(0, NULL
, &bcd
[0], &bcd
[1], &bcd
[2]);
1034 int kvm_qemu_init_env(CPUState
*cenv
)
1036 struct kvm_cpuid_entry cpuid_ent
[100];
1043 copy
.regs
[R_EAX
] = 0;
1044 qemu_kvm_cpuid_on_env(©
);
1045 limit
= copy
.regs
[R_EAX
];
1047 for (i
= 0; i
<= limit
; ++i
)
1048 do_cpuid_ent(&cpuid_ent
[cpuid_nent
++], i
, ©
);
1050 copy
.regs
[R_EAX
] = 0x80000000;
1051 qemu_kvm_cpuid_on_env(©
);
1052 limit
= copy
.regs
[R_EAX
];
1054 for (i
= 0x80000000; i
<= limit
; ++i
)
1055 do_cpuid_ent(&cpuid_ent
[cpuid_nent
++], i
, ©
);
1057 kvm_setup_cpuid(kvm_context
, cenv
->cpu_index
, cpuid_nent
, cpuid_ent
);
1062 int kvm_update_debugger(CPUState
*env
)
1064 struct kvm_debug_guest dbg
;
1068 if (env
->nb_breakpoints
|| env
->singlestep_enabled
) {
1070 for (i
= 0; i
< 4 && i
< env
->nb_breakpoints
; ++i
) {
1071 dbg
.breakpoints
[i
].enabled
= 1;
1072 dbg
.breakpoints
[i
].address
= env
->breakpoints
[i
];
1074 dbg
.singlestep
= env
->singlestep_enabled
;
1076 return kvm_guest_debug(kvm_context
, env
->cpu_index
, &dbg
);
1081 * dirty pages logging
1083 /* FIXME: use unsigned long pointer instead of unsigned char */
1084 unsigned char *kvm_dirty_bitmap
= NULL
;
1085 int kvm_physical_memory_set_dirty_tracking(int enable
)
1093 if (!kvm_dirty_bitmap
) {
1094 unsigned bitmap_size
= BITMAP_SIZE(phys_ram_size
);
1095 kvm_dirty_bitmap
= qemu_malloc(bitmap_size
);
1096 if (kvm_dirty_bitmap
== NULL
) {
1097 perror("Failed to allocate dirty pages bitmap");
1101 r
= kvm_dirty_pages_log_enable_all(kvm_context
);
1106 if (kvm_dirty_bitmap
) {
1107 r
= kvm_dirty_pages_log_reset(kvm_context
);
1108 qemu_free(kvm_dirty_bitmap
);
1109 kvm_dirty_bitmap
= NULL
;
1115 /* get kvm's dirty pages bitmap and update qemu's */
1116 int kvm_get_dirty_pages_log_slot(int slot
,
1117 unsigned char *bitmap
,
1118 unsigned int offset
,
1122 unsigned int i
, j
, n
=0;
1124 unsigned page_number
, addr
, addr1
;
1126 memset(bitmap
, 0, len
);
1127 r
= kvm_get_dirty_pages(kvm_context
, slot
, bitmap
);
1132 * bitmap-traveling is faster than memory-traveling (for addr...)
1133 * especially when most of the memory is not dirty.
1135 for (i
=0; i
<len
; i
++) {
1140 page_number
= i
* 8 + j
;
1141 addr1
= page_number
* TARGET_PAGE_SIZE
;
1142 addr
= offset
+ addr1
;
1143 cpu_physical_memory_set_dirty(addr
);
1151 * get kvm's dirty pages bitmap and update qemu's
1152 * we only care about physical ram, which resides in slots 0 and 3
1154 int kvm_update_dirty_pages_log(void)
1158 len
= BITMAP_SIZE(0xa0000);
1159 r
= kvm_get_dirty_pages_log_slot(3, kvm_dirty_bitmap
, 0 , len
);
1160 len
= BITMAP_SIZE(phys_ram_size
- 0xc0000);
1161 r
= r
|| kvm_get_dirty_pages_log_slot(0, kvm_dirty_bitmap
, 0xc0000, len
);
1165 int kvm_get_phys_ram_page_bitmap(unsigned char *bitmap
)
1167 int r
=0, len
, offset
;
1169 len
= BITMAP_SIZE(phys_ram_size
);
1170 memset(bitmap
, 0, len
);
1172 r
= kvm_get_mem_map(kvm_context
, 3, bitmap
);
1176 offset
= BITMAP_SIZE(0xc0000);
1177 r
= kvm_get_mem_map(kvm_context
, 0, bitmap
+ offset
);