3 #include "config-host.h"
6 #define KVM_ALLOWED_DEFAULT 1
8 #define KVM_ALLOWED_DEFAULT 0
11 int kvm_allowed
= KVM_ALLOWED_DEFAULT
;
22 #define MSR_IA32_TSC 0x10
24 extern void perror(const char *s
);
26 kvm_context_t kvm_context
;
27 static struct kvm_msr_list
*kvm_msr_list
;
28 static int kvm_has_msr_star
;
32 pthread_mutex_t qemu_mutex
= PTHREAD_MUTEX_INITIALIZER
;
33 static __thread CPUState
*vcpu_env
;
35 static sigset_t io_sigset
, io_negsigset
;
39 #define SIG_IPI (SIGRTMIN+4)
48 static void sig_ipi_handler(int n
)
52 void kvm_update_interrupt_request(CPUState
*env
)
54 if (env
&& env
!= vcpu_env
) {
55 if (vcpu_info
[env
->cpu_index
].signalled
)
57 vcpu_info
[env
->cpu_index
].signalled
= 1;
58 if (vcpu_info
[env
->cpu_index
].thread
)
59 pthread_kill(vcpu_info
[env
->cpu_index
].thread
, SIG_IPI
);
63 void kvm_update_after_sipi(CPUState
*env
)
65 vcpu_info
[env
->cpu_index
].sipi_needed
= 1;
66 kvm_update_interrupt_request(env
);
69 * the qemu bios waits using a busy loop that's much too short for
70 * kvm. add a wait after the first sipi.
73 static int first_sipi
= 1;
82 void kvm_apic_init(CPUState
*env
)
84 vcpu_info
[env
->cpu_index
].init
= 1;
85 kvm_update_interrupt_request(env
);
88 static void set_msr_entry(struct kvm_msr_entry
*entry
, uint32_t index
,
95 /* returns 0 on success, non-0 on failure */
96 static int get_msr_entry(struct kvm_msr_entry
*entry
, CPUState
*env
)
98 switch (entry
->index
) {
99 case MSR_IA32_SYSENTER_CS
:
100 env
->sysenter_cs
= entry
->data
;
102 case MSR_IA32_SYSENTER_ESP
:
103 env
->sysenter_esp
= entry
->data
;
105 case MSR_IA32_SYSENTER_EIP
:
106 env
->sysenter_eip
= entry
->data
;
109 env
->star
= entry
->data
;
113 env
->cstar
= entry
->data
;
115 case MSR_KERNELGSBASE
:
116 env
->kernelgsbase
= entry
->data
;
119 env
->fmask
= entry
->data
;
122 env
->lstar
= entry
->data
;
126 env
->tsc
= entry
->data
;
129 printf("Warning unknown msr index 0x%x\n", entry
->index
);
141 static void set_v8086_seg(struct kvm_segment
*lhs
, const SegmentCache
*rhs
)
143 lhs
->selector
= rhs
->selector
;
144 lhs
->base
= rhs
->base
;
145 lhs
->limit
= rhs
->limit
;
157 static void set_seg(struct kvm_segment
*lhs
, const SegmentCache
*rhs
)
159 unsigned flags
= rhs
->flags
;
160 lhs
->selector
= rhs
->selector
;
161 lhs
->base
= rhs
->base
;
162 lhs
->limit
= rhs
->limit
;
163 lhs
->type
= (flags
>> DESC_TYPE_SHIFT
) & 15;
164 lhs
->present
= (flags
& DESC_P_MASK
) != 0;
165 lhs
->dpl
= rhs
->selector
& 3;
166 lhs
->db
= (flags
>> DESC_B_SHIFT
) & 1;
167 lhs
->s
= (flags
& DESC_S_MASK
) != 0;
168 lhs
->l
= (flags
>> DESC_L_SHIFT
) & 1;
169 lhs
->g
= (flags
& DESC_G_MASK
) != 0;
170 lhs
->avl
= (flags
& DESC_AVL_MASK
) != 0;
174 static void get_seg(SegmentCache
*lhs
, const struct kvm_segment
*rhs
)
176 lhs
->selector
= rhs
->selector
;
177 lhs
->base
= rhs
->base
;
178 lhs
->limit
= rhs
->limit
;
180 (rhs
->type
<< DESC_TYPE_SHIFT
)
181 | (rhs
->present
* DESC_P_MASK
)
182 | (rhs
->dpl
<< DESC_DPL_SHIFT
)
183 | (rhs
->db
<< DESC_B_SHIFT
)
184 | (rhs
->s
* DESC_S_MASK
)
185 | (rhs
->l
<< DESC_L_SHIFT
)
186 | (rhs
->g
* DESC_G_MASK
)
187 | (rhs
->avl
* DESC_AVL_MASK
);
190 /* the reset values of qemu are not compatible to SVM
191 * this function is used to fix the segment descriptor values */
192 static void fix_realmode_dataseg(struct kvm_segment
*seg
)
199 static void load_regs(CPUState
*env
)
201 struct kvm_regs regs
;
203 struct kvm_sregs sregs
;
204 struct kvm_msr_entry msrs
[MSR_COUNT
];
207 regs
.rax
= env
->regs
[R_EAX
];
208 regs
.rbx
= env
->regs
[R_EBX
];
209 regs
.rcx
= env
->regs
[R_ECX
];
210 regs
.rdx
= env
->regs
[R_EDX
];
211 regs
.rsi
= env
->regs
[R_ESI
];
212 regs
.rdi
= env
->regs
[R_EDI
];
213 regs
.rsp
= env
->regs
[R_ESP
];
214 regs
.rbp
= env
->regs
[R_EBP
];
216 regs
.r8
= env
->regs
[8];
217 regs
.r9
= env
->regs
[9];
218 regs
.r10
= env
->regs
[10];
219 regs
.r11
= env
->regs
[11];
220 regs
.r12
= env
->regs
[12];
221 regs
.r13
= env
->regs
[13];
222 regs
.r14
= env
->regs
[14];
223 regs
.r15
= env
->regs
[15];
226 regs
.rflags
= env
->eflags
;
229 kvm_set_regs(kvm_context
, env
->cpu_index
, ®s
);
231 memset(&fpu
, 0, sizeof fpu
);
232 fpu
.fsw
= env
->fpus
& ~(7 << 11);
233 fpu
.fsw
|= (env
->fpstt
& 7) << 11;
235 for (i
= 0; i
< 8; ++i
)
236 fpu
.ftwx
|= (!env
->fptags
[i
]) << i
;
237 memcpy(fpu
.fpr
, env
->fpregs
, sizeof env
->fpregs
);
238 memcpy(fpu
.xmm
, env
->xmm_regs
, sizeof env
->xmm_regs
);
239 fpu
.mxcsr
= env
->mxcsr
;
240 kvm_set_fpu(kvm_context
, env
->cpu_index
, &fpu
);
242 memcpy(sregs
.interrupt_bitmap
, env
->kvm_interrupt_bitmap
, sizeof(sregs
.interrupt_bitmap
));
244 if ((env
->eflags
& VM_MASK
)) {
245 set_v8086_seg(&sregs
.cs
, &env
->segs
[R_CS
]);
246 set_v8086_seg(&sregs
.ds
, &env
->segs
[R_DS
]);
247 set_v8086_seg(&sregs
.es
, &env
->segs
[R_ES
]);
248 set_v8086_seg(&sregs
.fs
, &env
->segs
[R_FS
]);
249 set_v8086_seg(&sregs
.gs
, &env
->segs
[R_GS
]);
250 set_v8086_seg(&sregs
.ss
, &env
->segs
[R_SS
]);
252 set_seg(&sregs
.cs
, &env
->segs
[R_CS
]);
253 set_seg(&sregs
.ds
, &env
->segs
[R_DS
]);
254 set_seg(&sregs
.es
, &env
->segs
[R_ES
]);
255 set_seg(&sregs
.fs
, &env
->segs
[R_FS
]);
256 set_seg(&sregs
.gs
, &env
->segs
[R_GS
]);
257 set_seg(&sregs
.ss
, &env
->segs
[R_SS
]);
259 if (env
->cr
[0] & CR0_PE_MASK
) {
260 /* force ss cpl to cs cpl */
261 sregs
.ss
.selector
= (sregs
.ss
.selector
& ~3) |
262 (sregs
.cs
.selector
& 3);
263 sregs
.ss
.dpl
= sregs
.ss
.selector
& 3;
266 if (!(env
->cr
[0] & CR0_PG_MASK
)) {
267 fix_realmode_dataseg(&sregs
.cs
);
268 fix_realmode_dataseg(&sregs
.ds
);
269 fix_realmode_dataseg(&sregs
.es
);
270 fix_realmode_dataseg(&sregs
.fs
);
271 fix_realmode_dataseg(&sregs
.gs
);
272 fix_realmode_dataseg(&sregs
.ss
);
276 set_seg(&sregs
.tr
, &env
->tr
);
277 set_seg(&sregs
.ldt
, &env
->ldt
);
279 sregs
.idt
.limit
= env
->idt
.limit
;
280 sregs
.idt
.base
= env
->idt
.base
;
281 sregs
.gdt
.limit
= env
->gdt
.limit
;
282 sregs
.gdt
.base
= env
->gdt
.base
;
284 sregs
.cr0
= env
->cr
[0];
285 sregs
.cr2
= env
->cr
[2];
286 sregs
.cr3
= env
->cr
[3];
287 sregs
.cr4
= env
->cr
[4];
289 sregs
.apic_base
= cpu_get_apic_base(env
);
290 sregs
.efer
= env
->efer
;
291 sregs
.cr8
= cpu_get_apic_tpr(env
);
293 kvm_set_sregs(kvm_context
, env
->cpu_index
, &sregs
);
297 set_msr_entry(&msrs
[n
++], MSR_IA32_SYSENTER_CS
, env
->sysenter_cs
);
298 set_msr_entry(&msrs
[n
++], MSR_IA32_SYSENTER_ESP
, env
->sysenter_esp
);
299 set_msr_entry(&msrs
[n
++], MSR_IA32_SYSENTER_EIP
, env
->sysenter_eip
);
300 if (kvm_has_msr_star
)
301 set_msr_entry(&msrs
[n
++], MSR_STAR
, env
->star
);
302 set_msr_entry(&msrs
[n
++], MSR_IA32_TSC
, env
->tsc
);
304 set_msr_entry(&msrs
[n
++], MSR_CSTAR
, env
->cstar
);
305 set_msr_entry(&msrs
[n
++], MSR_KERNELGSBASE
, env
->kernelgsbase
);
306 set_msr_entry(&msrs
[n
++], MSR_FMASK
, env
->fmask
);
307 set_msr_entry(&msrs
[n
++], MSR_LSTAR
, env
->lstar
);
310 rc
= kvm_set_msrs(kvm_context
, env
->cpu_index
, msrs
, n
);
312 perror("kvm_set_msrs FAILED");
316 static void save_regs(CPUState
*env
)
318 struct kvm_regs regs
;
320 struct kvm_sregs sregs
;
321 struct kvm_msr_entry msrs
[MSR_COUNT
];
325 kvm_get_regs(kvm_context
, env
->cpu_index
, ®s
);
327 env
->regs
[R_EAX
] = regs
.rax
;
328 env
->regs
[R_EBX
] = regs
.rbx
;
329 env
->regs
[R_ECX
] = regs
.rcx
;
330 env
->regs
[R_EDX
] = regs
.rdx
;
331 env
->regs
[R_ESI
] = regs
.rsi
;
332 env
->regs
[R_EDI
] = regs
.rdi
;
333 env
->regs
[R_ESP
] = regs
.rsp
;
334 env
->regs
[R_EBP
] = regs
.rbp
;
336 env
->regs
[8] = regs
.r8
;
337 env
->regs
[9] = regs
.r9
;
338 env
->regs
[10] = regs
.r10
;
339 env
->regs
[11] = regs
.r11
;
340 env
->regs
[12] = regs
.r12
;
341 env
->regs
[13] = regs
.r13
;
342 env
->regs
[14] = regs
.r14
;
343 env
->regs
[15] = regs
.r15
;
346 env
->eflags
= regs
.rflags
;
349 kvm_get_fpu(kvm_context
, env
->cpu_index
, &fpu
);
350 env
->fpstt
= (fpu
.fsw
>> 11) & 7;
353 for (i
= 0; i
< 8; ++i
)
354 env
->fptags
[i
] = !((fpu
.ftwx
>> i
) & 1);
355 memcpy(env
->fpregs
, fpu
.fpr
, sizeof env
->fpregs
);
356 memcpy(env
->xmm_regs
, fpu
.xmm
, sizeof env
->xmm_regs
);
357 env
->mxcsr
= fpu
.mxcsr
;
359 kvm_get_sregs(kvm_context
, env
->cpu_index
, &sregs
);
361 memcpy(env
->kvm_interrupt_bitmap
, sregs
.interrupt_bitmap
, sizeof(env
->kvm_interrupt_bitmap
));
363 get_seg(&env
->segs
[R_CS
], &sregs
.cs
);
364 get_seg(&env
->segs
[R_DS
], &sregs
.ds
);
365 get_seg(&env
->segs
[R_ES
], &sregs
.es
);
366 get_seg(&env
->segs
[R_FS
], &sregs
.fs
);
367 get_seg(&env
->segs
[R_GS
], &sregs
.gs
);
368 get_seg(&env
->segs
[R_SS
], &sregs
.ss
);
370 get_seg(&env
->tr
, &sregs
.tr
);
371 get_seg(&env
->ldt
, &sregs
.ldt
);
373 env
->idt
.limit
= sregs
.idt
.limit
;
374 env
->idt
.base
= sregs
.idt
.base
;
375 env
->gdt
.limit
= sregs
.gdt
.limit
;
376 env
->gdt
.base
= sregs
.gdt
.base
;
378 env
->cr
[0] = sregs
.cr0
;
379 env
->cr
[2] = sregs
.cr2
;
380 env
->cr
[3] = sregs
.cr3
;
381 env
->cr
[4] = sregs
.cr4
;
383 cpu_set_apic_base(env
, sregs
.apic_base
);
385 env
->efer
= sregs
.efer
;
386 //cpu_set_apic_tpr(env, sregs.cr8);
388 #define HFLAG_COPY_MASK ~( \
389 HF_CPL_MASK | HF_PE_MASK | HF_MP_MASK | HF_EM_MASK | \
390 HF_TS_MASK | HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK | \
391 HF_OSFXSR_MASK | HF_LMA_MASK | HF_CS32_MASK | \
392 HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK)
396 hflags
= (env
->segs
[R_CS
].flags
>> DESC_DPL_SHIFT
) & HF_CPL_MASK
;
397 hflags
|= (env
->cr
[0] & CR0_PE_MASK
) << (HF_PE_SHIFT
- CR0_PE_SHIFT
);
398 hflags
|= (env
->cr
[0] << (HF_MP_SHIFT
- CR0_MP_SHIFT
)) &
399 (HF_MP_MASK
| HF_EM_MASK
| HF_TS_MASK
);
400 hflags
|= (env
->eflags
& (HF_TF_MASK
| HF_VM_MASK
| HF_IOPL_MASK
));
401 hflags
|= (env
->cr
[4] & CR4_OSFXSR_MASK
) <<
402 (HF_OSFXSR_SHIFT
- CR4_OSFXSR_SHIFT
);
404 if (env
->efer
& MSR_EFER_LMA
) {
405 hflags
|= HF_LMA_MASK
;
408 if ((hflags
& HF_LMA_MASK
) && (env
->segs
[R_CS
].flags
& DESC_L_MASK
)) {
409 hflags
|= HF_CS32_MASK
| HF_SS32_MASK
| HF_CS64_MASK
;
411 hflags
|= (env
->segs
[R_CS
].flags
& DESC_B_MASK
) >>
412 (DESC_B_SHIFT
- HF_CS32_SHIFT
);
413 hflags
|= (env
->segs
[R_SS
].flags
& DESC_B_MASK
) >>
414 (DESC_B_SHIFT
- HF_SS32_SHIFT
);
415 if (!(env
->cr
[0] & CR0_PE_MASK
) ||
416 (env
->eflags
& VM_MASK
) ||
417 !(hflags
& HF_CS32_MASK
)) {
418 hflags
|= HF_ADDSEG_MASK
;
420 hflags
|= ((env
->segs
[R_DS
].base
|
421 env
->segs
[R_ES
].base
|
422 env
->segs
[R_SS
].base
) != 0) <<
426 env
->hflags
= (env
->hflags
& HFLAG_COPY_MASK
) | hflags
;
427 env
->cc_src
= env
->eflags
& (CC_O
| CC_S
| CC_Z
| CC_A
| CC_P
| CC_C
);
428 env
->df
= 1 - (2 * ((env
->eflags
>> 10) & 1));
429 env
->cc_op
= CC_OP_EFLAGS
;
430 env
->eflags
&= ~(DF_MASK
| CC_O
| CC_S
| CC_Z
| CC_A
| CC_P
| CC_C
);
434 msrs
[n
++].index
= MSR_IA32_SYSENTER_CS
;
435 msrs
[n
++].index
= MSR_IA32_SYSENTER_ESP
;
436 msrs
[n
++].index
= MSR_IA32_SYSENTER_EIP
;
437 if (kvm_has_msr_star
)
438 msrs
[n
++].index
= MSR_STAR
;
439 msrs
[n
++].index
= MSR_IA32_TSC
;
441 msrs
[n
++].index
= MSR_CSTAR
;
442 msrs
[n
++].index
= MSR_KERNELGSBASE
;
443 msrs
[n
++].index
= MSR_FMASK
;
444 msrs
[n
++].index
= MSR_LSTAR
;
446 rc
= kvm_get_msrs(kvm_context
, env
->cpu_index
, msrs
, n
);
448 perror("kvm_get_msrs FAILED");
451 n
= rc
; /* actual number of MSRs */
452 for (i
=0 ; i
<n
; i
++) {
453 if (get_msr_entry(&msrs
[i
], env
))
462 static int try_push_interrupts(void *opaque
)
464 CPUState
*env
= cpu_single_env
;
467 if (env
->ready_for_interrupt_injection
&&
468 (env
->interrupt_request
& CPU_INTERRUPT_HARD
) &&
469 (env
->eflags
& IF_MASK
)) {
470 env
->interrupt_request
&= ~CPU_INTERRUPT_HARD
;
471 irq
= cpu_get_pic_interrupt(env
);
473 r
= kvm_inject_irq(kvm_context
, env
->cpu_index
, irq
);
475 printf("cpu %d fail inject %x\n", env
->cpu_index
, irq
);
479 return (env
->interrupt_request
& CPU_INTERRUPT_HARD
) != 0;
482 static void post_kvm_run(void *opaque
, int vcpu
)
484 CPUState
*env
= vcpu_env
;
486 pthread_mutex_lock(&qemu_mutex
);
487 cpu_single_env
= env
;
488 env
->eflags
= kvm_get_interrupt_flag(kvm_context
, vcpu
)
489 ? env
->eflags
| IF_MASK
: env
->eflags
& ~IF_MASK
;
490 env
->ready_for_interrupt_injection
491 = kvm_is_ready_for_interrupt_injection(kvm_context
, vcpu
);
492 //cpu_set_apic_tpr(env, kvm_run->cr8);
493 cpu_set_apic_base(env
, kvm_get_apic_base(kvm_context
, vcpu
));
496 static int pre_kvm_run(void *opaque
, int vcpu
)
498 CPUState
*env
= cpu_single_env
;
500 if (env
->cpu_index
== 0 && wait_hack
) {
505 pthread_mutex_unlock(&qemu_mutex
);
506 for (i
= 0; i
< 10; ++i
)
508 pthread_mutex_lock(&qemu_mutex
);
511 kvm_set_cr8(kvm_context
, vcpu
, cpu_get_apic_tpr(env
));
512 if (env
->interrupt_request
& CPU_INTERRUPT_EXIT
)
514 pthread_mutex_unlock(&qemu_mutex
);
518 void kvm_load_registers(CPUState
*env
)
524 void kvm_save_registers(CPUState
*env
)
530 int kvm_cpu_exec(CPUState
*env
)
534 r
= kvm_run(kvm_context
, env
->cpu_index
);
536 printf("kvm_run returned %d\n", r
);
543 extern int vm_running
;
545 static int has_work(CPUState
*env
)
549 if (!(env
->hflags
& HF_HALTED_MASK
))
551 if (env
->interrupt_request
& (CPU_INTERRUPT_HARD
| CPU_INTERRUPT_EXIT
))
556 static void kvm_eat_signals(CPUState
*env
, int timeout
)
563 ts
.tv_sec
= timeout
/ 1000;
564 ts
.tv_nsec
= (timeout
% 1000) * 1000000;
565 r
= sigtimedwait(&io_sigset
, &siginfo
, &ts
);
566 if (r
== -1 && (errno
== EAGAIN
|| errno
== EINTR
) && !timeout
)
569 pthread_mutex_lock(&qemu_mutex
);
570 cpu_single_env
= vcpu_env
;
571 if (r
== -1 && !(errno
== EAGAIN
|| errno
== EINTR
)) {
572 printf("sigtimedwait: %s\n", strerror(e
));
576 sigaction(siginfo
.si_signo
, NULL
, &sa
);
577 sa
.sa_handler(siginfo
.si_signo
);
580 * we call select() even if no signal was received, to account for
581 * for which there is no signal handler installed.
584 pthread_mutex_unlock(&qemu_mutex
);
587 static void kvm_main_loop_wait(CPUState
*env
, int timeout
)
589 if (vcpu_info
[env
->cpu_index
].signalled
&& timeout
)
591 pthread_mutex_unlock(&qemu_mutex
);
592 if (env
->cpu_index
== 0)
593 kvm_eat_signals(env
, timeout
);
600 sigaddset(&set
, SIG_IPI
);
603 pthread_mutex_lock(&qemu_mutex
);
604 cpu_single_env
= env
;
606 vcpu_info
[env
->cpu_index
].signalled
= 0;
609 static void update_regs_for_sipi(CPUState
*env
)
611 SegmentCache cs
= env
->segs
[R_CS
];
614 env
->segs
[R_CS
] = cs
;
617 vcpu_info
[env
->cpu_index
].sipi_needed
= 0;
618 vcpu_info
[env
->cpu_index
].init
= 0;
621 static void update_regs_for_init(CPUState
*env
)
627 static void setup_kernel_sigmask(CPUState
*env
)
631 sigprocmask(SIG_BLOCK
, NULL
, &set
);
632 sigdelset(&set
, SIG_IPI
);
633 if (env
->cpu_index
== 0)
634 sigandset(&set
, &set
, &io_negsigset
);
636 kvm_set_signal_mask(kvm_context
, env
->cpu_index
, &set
);
639 static int kvm_main_loop_cpu(CPUState
*env
)
641 setup_kernel_sigmask(env
);
642 pthread_mutex_lock(&qemu_mutex
);
643 cpu_single_env
= env
;
645 while (!has_work(env
))
646 kvm_main_loop_wait(env
, 10);
647 if (env
->interrupt_request
& CPU_INTERRUPT_HARD
)
648 env
->hflags
&= ~HF_HALTED_MASK
;
649 if (vcpu_info
[env
->cpu_index
].sipi_needed
)
650 update_regs_for_sipi(env
);
651 if (vcpu_info
[env
->cpu_index
].init
)
652 update_regs_for_init(env
);
653 if (!(env
->hflags
& HF_HALTED_MASK
) && !vcpu_info
[env
->cpu_index
].init
)
655 env
->interrupt_request
&= ~CPU_INTERRUPT_EXIT
;
656 kvm_main_loop_wait(env
, 0);
657 if (qemu_shutdown_requested())
659 else if (qemu_powerdown_requested())
660 qemu_system_powerdown();
661 else if (qemu_reset_requested()) {
662 env
->interrupt_request
= 0;
667 pthread_mutex_unlock(&qemu_mutex
);
671 static void *ap_main_loop(void *_env
)
673 CPUState
*env
= _env
;
677 sigfillset(&signals
);
678 //sigdelset(&signals, SIG_IPI);
679 sigprocmask(SIG_BLOCK
, &signals
, NULL
);
680 kvm_create_vcpu(kvm_context
, env
->cpu_index
);
681 kvm_qemu_init_env(env
);
682 kvm_main_loop_cpu(env
);
686 static void kvm_add_signal(int signum
)
688 sigaddset(&io_sigset
, signum
);
689 sigdelset(&io_negsigset
, signum
);
690 sigprocmask(SIG_BLOCK
, &io_sigset
, NULL
);
693 int kvm_main_loop(void)
695 CPUState
*env
= first_cpu
->next_cpu
;
698 sigemptyset(&io_sigset
);
699 sigfillset(&io_negsigset
);
700 kvm_add_signal(SIGIO
);
701 kvm_add_signal(SIGALRM
);
702 kvm_add_signal(SIGUSR2
);
703 kvm_add_signal(SIG_IPI
);
705 vcpu_env
= first_cpu
;
706 signal(SIG_IPI
, sig_ipi_handler
);
707 for (i
= 1; i
< smp_cpus
; ++i
) {
708 pthread_create(&vcpu_info
[i
].thread
, NULL
, ap_main_loop
, env
);
711 vcpu_info
[0].thread
= pthread_self();
712 return kvm_main_loop_cpu(first_cpu
);
715 static int kvm_debug(void *opaque
, int vcpu
)
717 CPUState
*env
= cpu_single_env
;
719 env
->exception_index
= EXCP_DEBUG
;
723 static int kvm_inb(void *opaque
, uint16_t addr
, uint8_t *data
)
725 *data
= cpu_inb(0, addr
);
729 static int kvm_inw(void *opaque
, uint16_t addr
, uint16_t *data
)
731 *data
= cpu_inw(0, addr
);
735 static int kvm_inl(void *opaque
, uint16_t addr
, uint32_t *data
)
737 *data
= cpu_inl(0, addr
);
741 #define PM_IO_BASE 0xb000
743 static int kvm_outb(void *opaque
, uint16_t addr
, uint8_t data
)
748 cpu_outb(0, 0xb3, 0);
755 x
= cpu_inw(0, PM_IO_BASE
+ 4);
757 cpu_outw(0, PM_IO_BASE
+ 4, x
);
764 x
= cpu_inw(0, PM_IO_BASE
+ 4);
766 cpu_outw(0, PM_IO_BASE
+ 4, x
);
774 cpu_outb(0, addr
, data
);
778 static int kvm_outw(void *opaque
, uint16_t addr
, uint16_t data
)
780 cpu_outw(0, addr
, data
);
784 static int kvm_outl(void *opaque
, uint16_t addr
, uint32_t data
)
786 cpu_outl(0, addr
, data
);
790 static int kvm_readb(void *opaque
, uint64_t addr
, uint8_t *data
)
792 *data
= ldub_phys(addr
);
796 static int kvm_readw(void *opaque
, uint64_t addr
, uint16_t *data
)
798 *data
= lduw_phys(addr
);
802 static int kvm_readl(void *opaque
, uint64_t addr
, uint32_t *data
)
804 *data
= ldl_phys(addr
);
808 static int kvm_readq(void *opaque
, uint64_t addr
, uint64_t *data
)
810 *data
= ldq_phys(addr
);
814 static int kvm_writeb(void *opaque
, uint64_t addr
, uint8_t data
)
816 stb_phys(addr
, data
);
820 static int kvm_writew(void *opaque
, uint64_t addr
, uint16_t data
)
822 stw_phys(addr
, data
);
826 static int kvm_writel(void *opaque
, uint64_t addr
, uint32_t data
)
828 stl_phys(addr
, data
);
832 static int kvm_writeq(void *opaque
, uint64_t addr
, uint64_t data
)
834 stq_phys(addr
, data
);
838 static int kvm_io_window(void *opaque
)
844 static int kvm_halt(void *opaque
, int vcpu
)
846 CPUState
*env
= cpu_single_env
;
848 if (!((env
->interrupt_request
& CPU_INTERRUPT_HARD
) &&
849 (env
->eflags
& IF_MASK
))) {
850 env
->hflags
|= HF_HALTED_MASK
;
851 env
->exception_index
= EXCP_HLT
;
857 static int kvm_shutdown(void *opaque
, int vcpu
)
859 qemu_system_reset_request();
863 static struct kvm_callbacks qemu_kvm_ops
= {
875 .writeb
= kvm_writeb
,
876 .writew
= kvm_writew
,
877 .writel
= kvm_writel
,
878 .writeq
= kvm_writeq
,
880 .shutdown
= kvm_shutdown
,
881 .io_window
= kvm_io_window
,
882 .try_push_interrupts
= try_push_interrupts
,
883 .post_kvm_run
= post_kvm_run
,
884 .pre_kvm_run
= pre_kvm_run
,
889 /* Try to initialize kvm */
890 kvm_context
= kvm_init(&qemu_kvm_ops
, cpu_single_env
);
898 int kvm_qemu_create_context(void)
902 if (kvm_create(kvm_context
, phys_ram_size
, (void**)&phys_ram_base
) < 0) {
906 kvm_msr_list
= kvm_get_msr_list(kvm_context
);
911 for (i
= 0; i
< kvm_msr_list
->nmsrs
; ++i
)
912 if (kvm_msr_list
->indices
[i
] == MSR_STAR
)
913 kvm_has_msr_star
= 1;
917 void kvm_qemu_destroy(void)
919 kvm_finalize(kvm_context
);
922 static void host_cpuid(uint32_t function
, uint32_t *eax
, uint32_t *ebx
,
923 uint32_t *ecx
, uint32_t *edx
)
930 "sub $128, %%rsp \n\t" /* skip red zone */
931 "push %0; push %%rsi \n\t"
932 "push %%rax; push %%rbx; push %%rcx; push %%rdx \n\t"
933 "mov 8*5(%%rsp), %%rsi \n\t"
934 "mov (%%rsi), %%eax \n\t"
936 "mov %%eax, (%%rsi) \n\t"
937 "mov %%ebx, 4(%%rsi) \n\t"
938 "mov %%ecx, 8(%%rsi) \n\t"
939 "mov %%edx, 12(%%rsi) \n\t"
940 "pop %%rdx; pop %%rcx; pop %%rbx; pop %%rax \n\t"
941 "pop %%rsi; pop %0 \n\t"
944 "push %0; push %%esi \n\t"
945 "push %%eax; push %%ebx; push %%ecx; push %%edx \n\t"
946 "mov 4*5(%%esp), %%esi \n\t"
947 "mov (%%esi), %%eax \n\t"
949 "mov %%eax, (%%esi) \n\t"
950 "mov %%ebx, 4(%%esi) \n\t"
951 "mov %%ecx, 8(%%esi) \n\t"
952 "mov %%edx, 12(%%esi) \n\t"
953 "pop %%edx; pop %%ecx; pop %%ebx; pop %%eax \n\t"
954 "pop %%esi; pop %0 \n\t"
956 : : "rm"(vec
) : "memory");
967 static void do_cpuid_ent(struct kvm_cpuid_entry
*e
, uint32_t function
,
970 env
->regs
[R_EAX
] = function
;
971 qemu_kvm_cpuid_on_env(env
);
972 e
->function
= function
;
973 e
->eax
= env
->regs
[R_EAX
];
974 e
->ebx
= env
->regs
[R_EBX
];
975 e
->ecx
= env
->regs
[R_ECX
];
976 e
->edx
= env
->regs
[R_EDX
];
977 if (function
== 0x80000001) {
978 uint32_t h_eax
, h_edx
;
980 host_cpuid(function
, &h_eax
, NULL
, NULL
, &h_edx
);
983 if ((h_edx
& 0x20000000) == 0)
984 e
->edx
&= ~0x20000000u
;
986 if ((h_edx
& 0x00000800) == 0)
987 e
->edx
&= ~0x00000800u
;
989 if ((h_edx
& 0x00100000) == 0)
990 e
->edx
&= ~0x00100000u
;
992 // sysenter isn't supported on compatibility mode on AMD. and syscall
993 // isn't supported in compatibility mode on Intel. so advertise the
994 // actuall cpu, and say goodbye to migration between different vendors
995 // is you use compatibility mode.
999 host_cpuid(0, NULL
, &bcd
[0], &bcd
[1], &bcd
[2]);
1006 int kvm_qemu_init_env(CPUState
*cenv
)
1008 struct kvm_cpuid_entry cpuid_ent
[100];
1015 copy
.regs
[R_EAX
] = 0;
1016 qemu_kvm_cpuid_on_env(©
);
1017 limit
= copy
.regs
[R_EAX
];
1019 for (i
= 0; i
<= limit
; ++i
)
1020 do_cpuid_ent(&cpuid_ent
[cpuid_nent
++], i
, ©
);
1022 copy
.regs
[R_EAX
] = 0x80000000;
1023 qemu_kvm_cpuid_on_env(©
);
1024 limit
= copy
.regs
[R_EAX
];
1026 for (i
= 0x80000000; i
<= limit
; ++i
)
1027 do_cpuid_ent(&cpuid_ent
[cpuid_nent
++], i
, ©
);
1029 kvm_setup_cpuid(kvm_context
, cenv
->cpu_index
, cpuid_nent
, cpuid_ent
);
1034 int kvm_update_debugger(CPUState
*env
)
1036 struct kvm_debug_guest dbg
;
1040 if (env
->nb_breakpoints
|| env
->singlestep_enabled
) {
1042 for (i
= 0; i
< 4 && i
< env
->nb_breakpoints
; ++i
) {
1043 dbg
.breakpoints
[i
].enabled
= 1;
1044 dbg
.breakpoints
[i
].address
= env
->breakpoints
[i
];
1046 dbg
.singlestep
= env
->singlestep_enabled
;
1048 return kvm_guest_debug(kvm_context
, env
->cpu_index
, &dbg
);
1053 * dirty pages logging
1055 /* FIXME: use unsigned long pointer instead of unsigned char */
1056 unsigned char *kvm_dirty_bitmap
= NULL
;
1057 int kvm_physical_memory_set_dirty_tracking(int enable
)
1065 if (!kvm_dirty_bitmap
) {
1066 unsigned bitmap_size
= BITMAP_SIZE(phys_ram_size
);
1067 kvm_dirty_bitmap
= qemu_malloc(bitmap_size
);
1068 if (kvm_dirty_bitmap
== NULL
) {
1069 perror("Failed to allocate dirty pages bitmap");
1073 r
= kvm_dirty_pages_log_enable_all(kvm_context
);
1078 if (kvm_dirty_bitmap
) {
1079 r
= kvm_dirty_pages_log_reset(kvm_context
);
1080 qemu_free(kvm_dirty_bitmap
);
1081 kvm_dirty_bitmap
= NULL
;
1087 /* get kvm's dirty pages bitmap and update qemu's */
1088 int kvm_get_dirty_pages_log_slot(int slot
,
1089 unsigned char *bitmap
,
1090 unsigned int offset
,
1094 unsigned int i
, j
, n
=0;
1096 unsigned page_number
, addr
, addr1
;
1098 memset(bitmap
, 0, len
);
1099 r
= kvm_get_dirty_pages(kvm_context
, slot
, bitmap
);
1104 * bitmap-traveling is faster than memory-traveling (for addr...)
1105 * especially when most of the memory is not dirty.
1107 for (i
=0; i
<len
; i
++) {
1112 page_number
= i
* 8 + j
;
1113 addr1
= page_number
* TARGET_PAGE_SIZE
;
1114 addr
= offset
+ addr1
;
1115 cpu_physical_memory_set_dirty(addr
);
1123 * get kvm's dirty pages bitmap and update qemu's
1124 * we only care about physical ram, which resides in slots 0 and 3
1126 int kvm_update_dirty_pages_log(void)
1130 len
= BITMAP_SIZE(0xa0000);
1131 r
= kvm_get_dirty_pages_log_slot(3, kvm_dirty_bitmap
, 0 , len
);
1132 len
= BITMAP_SIZE(phys_ram_size
- 0xc0000);
1133 r
= r
|| kvm_get_dirty_pages_log_slot(0, kvm_dirty_bitmap
, 0xc0000, len
);
1137 int kvm_get_phys_ram_page_bitmap(unsigned char *bitmap
)
1139 int r
=0, len
, offset
;
1141 len
= BITMAP_SIZE(phys_ram_size
);
1142 memset(bitmap
, 0, len
);
1144 r
= kvm_get_mem_map(kvm_context
, 3, bitmap
);
1148 offset
= BITMAP_SIZE(0xc0000);
1149 r
= kvm_get_mem_map(kvm_context
, 0, bitmap
+ offset
);