kvm: external module: adapt to new CPU_*_FROZEN in Linux 2.6.22-rc1
[qemu-kvm/fedora.git] / qemu-kvm.c
blob184999789af2369547dd4a5d3252e7e8209ae8f6
2 #include "config.h"
3 #include "config-host.h"
5 #ifdef USE_KVM
7 #include "exec.h"
9 #include "qemu-kvm.h"
10 #include <kvmctl.h>
11 #include <string.h>
13 #define MSR_IA32_TSC 0x10
15 extern void perror(const char *s);
17 int kvm_allowed = 1;
18 kvm_context_t kvm_context;
19 static struct kvm_msr_list *kvm_msr_list;
20 static int kvm_has_msr_star;
22 #define NR_CPU 16
23 static CPUState *saved_env[NR_CPU];
25 static void set_msr_entry(struct kvm_msr_entry *entry, uint32_t index,
26 uint64_t data)
28 entry->index = index;
29 entry->data = data;
32 /* returns 0 on success, non-0 on failure */
33 static int get_msr_entry(struct kvm_msr_entry *entry, CPUState *env)
35 switch (entry->index) {
36 case MSR_IA32_SYSENTER_CS:
37 env->sysenter_cs = entry->data;
38 break;
39 case MSR_IA32_SYSENTER_ESP:
40 env->sysenter_esp = entry->data;
41 break;
42 case MSR_IA32_SYSENTER_EIP:
43 env->sysenter_eip = entry->data;
44 break;
45 case MSR_STAR:
46 env->star = entry->data;
47 break;
48 #ifdef TARGET_X86_64
49 case MSR_CSTAR:
50 env->cstar = entry->data;
51 break;
52 case MSR_KERNELGSBASE:
53 env->kernelgsbase = entry->data;
54 break;
55 case MSR_FMASK:
56 env->fmask = entry->data;
57 break;
58 case MSR_LSTAR:
59 env->lstar = entry->data;
60 break;
61 #endif
62 case MSR_IA32_TSC:
63 env->tsc = entry->data;
64 break;
65 default:
66 printf("Warning unknown msr index 0x%x\n", entry->index);
67 return 1;
69 return 0;
72 #ifdef TARGET_X86_64
73 #define MSR_COUNT 9
74 #else
75 #define MSR_COUNT 5
76 #endif
78 static void set_v8086_seg(struct kvm_segment *lhs, const SegmentCache *rhs)
80 lhs->selector = rhs->selector;
81 lhs->base = rhs->base;
82 lhs->limit = rhs->limit;
83 lhs->type = 3;
84 lhs->present = 1;
85 lhs->dpl = 3;
86 lhs->db = 0;
87 lhs->s = 1;
88 lhs->l = 0;
89 lhs->g = 0;
90 lhs->avl = 0;
91 lhs->unusable = 0;
94 static void set_seg(struct kvm_segment *lhs, const SegmentCache *rhs)
96 unsigned flags = rhs->flags;
97 lhs->selector = rhs->selector;
98 lhs->base = rhs->base;
99 lhs->limit = rhs->limit;
100 lhs->type = (flags >> DESC_TYPE_SHIFT) & 15;
101 lhs->present = (flags & DESC_P_MASK) != 0;
102 lhs->dpl = rhs->selector & 3;
103 lhs->db = (flags >> DESC_B_SHIFT) & 1;
104 lhs->s = (flags & DESC_S_MASK) != 0;
105 lhs->l = (flags >> DESC_L_SHIFT) & 1;
106 lhs->g = (flags & DESC_G_MASK) != 0;
107 lhs->avl = (flags & DESC_AVL_MASK) != 0;
108 lhs->unusable = 0;
111 static void get_seg(SegmentCache *lhs, const struct kvm_segment *rhs)
113 lhs->selector = rhs->selector;
114 lhs->base = rhs->base;
115 lhs->limit = rhs->limit;
116 lhs->flags =
117 (rhs->type << DESC_TYPE_SHIFT)
118 | (rhs->present * DESC_P_MASK)
119 | (rhs->dpl << DESC_DPL_SHIFT)
120 | (rhs->db << DESC_B_SHIFT)
121 | (rhs->s * DESC_S_MASK)
122 | (rhs->l << DESC_L_SHIFT)
123 | (rhs->g * DESC_G_MASK)
124 | (rhs->avl * DESC_AVL_MASK);
127 /* the reset values of qemu are not compatible to SVM
128 * this function is used to fix the segment descriptor values */
129 static void fix_realmode_dataseg(struct kvm_segment *seg)
131 seg->type = 0x02;
132 seg->present = 1;
133 seg->s = 1;
136 static void load_regs(CPUState *env)
138 struct kvm_regs regs;
139 struct kvm_fpu fpu;
140 struct kvm_sregs sregs;
141 struct kvm_msr_entry msrs[MSR_COUNT];
142 int rc, n, i;
144 /* hack: save env */
145 if (!saved_env[0])
146 saved_env[0] = env;
148 regs.rax = env->regs[R_EAX];
149 regs.rbx = env->regs[R_EBX];
150 regs.rcx = env->regs[R_ECX];
151 regs.rdx = env->regs[R_EDX];
152 regs.rsi = env->regs[R_ESI];
153 regs.rdi = env->regs[R_EDI];
154 regs.rsp = env->regs[R_ESP];
155 regs.rbp = env->regs[R_EBP];
156 #ifdef TARGET_X86_64
157 regs.r8 = env->regs[8];
158 regs.r9 = env->regs[9];
159 regs.r10 = env->regs[10];
160 regs.r11 = env->regs[11];
161 regs.r12 = env->regs[12];
162 regs.r13 = env->regs[13];
163 regs.r14 = env->regs[14];
164 regs.r15 = env->regs[15];
165 #endif
167 regs.rflags = env->eflags;
168 regs.rip = env->eip;
170 kvm_set_regs(kvm_context, 0, &regs);
172 memset(&fpu, 0, sizeof fpu);
173 fpu.fsw = env->fpus & ~(7 << 11);
174 fpu.fsw |= (env->fpstt & 7) << 11;
175 fpu.fcw = env->fpuc;
176 for (i = 0; i < 8; ++i)
177 fpu.ftwx |= (!env->fptags[i]) << i;
178 memcpy(fpu.fpr, env->fpregs, sizeof env->fpregs);
179 memcpy(fpu.xmm, env->xmm_regs, sizeof env->xmm_regs);
180 fpu.mxcsr = env->mxcsr;
181 kvm_set_fpu(kvm_context, 0, &fpu);
183 memcpy(sregs.interrupt_bitmap, env->kvm_interrupt_bitmap, sizeof(sregs.interrupt_bitmap));
185 if ((env->eflags & VM_MASK)) {
186 set_v8086_seg(&sregs.cs, &env->segs[R_CS]);
187 set_v8086_seg(&sregs.ds, &env->segs[R_DS]);
188 set_v8086_seg(&sregs.es, &env->segs[R_ES]);
189 set_v8086_seg(&sregs.fs, &env->segs[R_FS]);
190 set_v8086_seg(&sregs.gs, &env->segs[R_GS]);
191 set_v8086_seg(&sregs.ss, &env->segs[R_SS]);
192 } else {
193 set_seg(&sregs.cs, &env->segs[R_CS]);
194 set_seg(&sregs.ds, &env->segs[R_DS]);
195 set_seg(&sregs.es, &env->segs[R_ES]);
196 set_seg(&sregs.fs, &env->segs[R_FS]);
197 set_seg(&sregs.gs, &env->segs[R_GS]);
198 set_seg(&sregs.ss, &env->segs[R_SS]);
200 if (env->cr[0] & CR0_PE_MASK) {
201 /* force ss cpl to cs cpl */
202 sregs.ss.selector = (sregs.ss.selector & ~3) |
203 (sregs.cs.selector & 3);
204 sregs.ss.dpl = sregs.ss.selector & 3;
207 if (!(env->cr[0] & CR0_PG_MASK)) {
208 fix_realmode_dataseg(&sregs.cs);
209 fix_realmode_dataseg(&sregs.ds);
210 fix_realmode_dataseg(&sregs.es);
211 fix_realmode_dataseg(&sregs.fs);
212 fix_realmode_dataseg(&sregs.gs);
213 fix_realmode_dataseg(&sregs.ss);
217 set_seg(&sregs.tr, &env->tr);
218 set_seg(&sregs.ldt, &env->ldt);
220 sregs.idt.limit = env->idt.limit;
221 sregs.idt.base = env->idt.base;
222 sregs.gdt.limit = env->gdt.limit;
223 sregs.gdt.base = env->gdt.base;
225 sregs.cr0 = env->cr[0];
226 sregs.cr2 = env->cr[2];
227 sregs.cr3 = env->cr[3];
228 sregs.cr4 = env->cr[4];
230 sregs.apic_base = cpu_get_apic_base(env);
231 sregs.efer = env->efer;
232 sregs.cr8 = cpu_get_apic_tpr(env);
234 kvm_set_sregs(kvm_context, 0, &sregs);
236 /* msrs */
237 n = 0;
238 set_msr_entry(&msrs[n++], MSR_IA32_SYSENTER_CS, env->sysenter_cs);
239 set_msr_entry(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp);
240 set_msr_entry(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip);
241 if (kvm_has_msr_star)
242 set_msr_entry(&msrs[n++], MSR_STAR, env->star);
243 set_msr_entry(&msrs[n++], MSR_IA32_TSC, env->tsc);
244 #ifdef TARGET_X86_64
245 set_msr_entry(&msrs[n++], MSR_CSTAR, env->cstar);
246 set_msr_entry(&msrs[n++], MSR_KERNELGSBASE, env->kernelgsbase);
247 set_msr_entry(&msrs[n++], MSR_FMASK, env->fmask);
248 set_msr_entry(&msrs[n++], MSR_LSTAR , env->lstar);
249 #endif
251 rc = kvm_set_msrs(kvm_context, 0, msrs, n);
252 if (rc == -1)
253 perror("kvm_set_msrs FAILED");
257 static void save_regs(CPUState *env)
259 struct kvm_regs regs;
260 struct kvm_fpu fpu;
261 struct kvm_sregs sregs;
262 struct kvm_msr_entry msrs[MSR_COUNT];
263 uint32_t hflags;
264 uint32_t i, n, rc;
266 kvm_get_regs(kvm_context, 0, &regs);
268 env->regs[R_EAX] = regs.rax;
269 env->regs[R_EBX] = regs.rbx;
270 env->regs[R_ECX] = regs.rcx;
271 env->regs[R_EDX] = regs.rdx;
272 env->regs[R_ESI] = regs.rsi;
273 env->regs[R_EDI] = regs.rdi;
274 env->regs[R_ESP] = regs.rsp;
275 env->regs[R_EBP] = regs.rbp;
276 #ifdef TARGET_X86_64
277 env->regs[8] = regs.r8;
278 env->regs[9] = regs.r9;
279 env->regs[10] = regs.r10;
280 env->regs[11] = regs.r11;
281 env->regs[12] = regs.r12;
282 env->regs[13] = regs.r13;
283 env->regs[14] = regs.r14;
284 env->regs[15] = regs.r15;
285 #endif
287 env->eflags = regs.rflags;
288 env->eip = regs.rip;
290 kvm_get_fpu(kvm_context, 0, &fpu);
291 env->fpstt = (fpu.fsw >> 11) & 7;
292 env->fpus = fpu.fsw;
293 env->fpuc = fpu.fcw;
294 for (i = 0; i < 8; ++i)
295 env->fptags[i] = !((fpu.ftwx >> i) & 1);
296 memcpy(env->fpregs, fpu.fpr, sizeof env->fpregs);
297 memcpy(env->xmm_regs, fpu.xmm, sizeof env->xmm_regs);
298 env->mxcsr = fpu.mxcsr;
300 kvm_get_sregs(kvm_context, 0, &sregs);
302 memcpy(env->kvm_interrupt_bitmap, sregs.interrupt_bitmap, sizeof(env->kvm_interrupt_bitmap));
304 get_seg(&env->segs[R_CS], &sregs.cs);
305 get_seg(&env->segs[R_DS], &sregs.ds);
306 get_seg(&env->segs[R_ES], &sregs.es);
307 get_seg(&env->segs[R_FS], &sregs.fs);
308 get_seg(&env->segs[R_GS], &sregs.gs);
309 get_seg(&env->segs[R_SS], &sregs.ss);
311 get_seg(&env->tr, &sregs.tr);
312 get_seg(&env->ldt, &sregs.ldt);
314 env->idt.limit = sregs.idt.limit;
315 env->idt.base = sregs.idt.base;
316 env->gdt.limit = sregs.gdt.limit;
317 env->gdt.base = sregs.gdt.base;
319 env->cr[0] = sregs.cr0;
320 env->cr[2] = sregs.cr2;
321 env->cr[3] = sregs.cr3;
322 env->cr[4] = sregs.cr4;
324 cpu_set_apic_base(env, sregs.apic_base);
326 env->efer = sregs.efer;
327 //cpu_set_apic_tpr(env, sregs.cr8);
329 #define HFLAG_COPY_MASK ~( \
330 HF_CPL_MASK | HF_PE_MASK | HF_MP_MASK | HF_EM_MASK | \
331 HF_TS_MASK | HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK | \
332 HF_OSFXSR_MASK | HF_LMA_MASK | HF_CS32_MASK | \
333 HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK)
337 hflags = (env->segs[R_CS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK;
338 hflags |= (env->cr[0] & CR0_PE_MASK) << (HF_PE_SHIFT - CR0_PE_SHIFT);
339 hflags |= (env->cr[0] << (HF_MP_SHIFT - CR0_MP_SHIFT)) &
340 (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK);
341 hflags |= (env->eflags & (HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK));
342 hflags |= (env->cr[4] & CR4_OSFXSR_MASK) <<
343 (HF_OSFXSR_SHIFT - CR4_OSFXSR_SHIFT);
345 if (env->efer & MSR_EFER_LMA) {
346 hflags |= HF_LMA_MASK;
349 if ((hflags & HF_LMA_MASK) && (env->segs[R_CS].flags & DESC_L_MASK)) {
350 hflags |= HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK;
351 } else {
352 hflags |= (env->segs[R_CS].flags & DESC_B_MASK) >>
353 (DESC_B_SHIFT - HF_CS32_SHIFT);
354 hflags |= (env->segs[R_SS].flags & DESC_B_MASK) >>
355 (DESC_B_SHIFT - HF_SS32_SHIFT);
356 if (!(env->cr[0] & CR0_PE_MASK) ||
357 (env->eflags & VM_MASK) ||
358 !(hflags & HF_CS32_MASK)) {
359 hflags |= HF_ADDSEG_MASK;
360 } else {
361 hflags |= ((env->segs[R_DS].base |
362 env->segs[R_ES].base |
363 env->segs[R_SS].base) != 0) <<
364 HF_ADDSEG_SHIFT;
367 env->hflags = (env->hflags & HFLAG_COPY_MASK) | hflags;
368 CC_SRC = env->eflags & (CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
369 DF = 1 - (2 * ((env->eflags >> 10) & 1));
370 CC_OP = CC_OP_EFLAGS;
371 env->eflags &= ~(DF_MASK | CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
373 tlb_flush(env, 1);
375 /* msrs */
376 n = 0;
377 msrs[n++].index = MSR_IA32_SYSENTER_CS;
378 msrs[n++].index = MSR_IA32_SYSENTER_ESP;
379 msrs[n++].index = MSR_IA32_SYSENTER_EIP;
380 if (kvm_has_msr_star)
381 msrs[n++].index = MSR_STAR;
382 msrs[n++].index = MSR_IA32_TSC;
383 #ifdef TARGET_X86_64
384 msrs[n++].index = MSR_CSTAR;
385 msrs[n++].index = MSR_KERNELGSBASE;
386 msrs[n++].index = MSR_FMASK;
387 msrs[n++].index = MSR_LSTAR;
388 #endif
389 rc = kvm_get_msrs(kvm_context, 0, msrs, n);
390 if (rc == -1) {
391 perror("kvm_get_msrs FAILED");
393 else {
394 n = rc; /* actual number of MSRs */
395 for (i=0 ; i<n; i++) {
396 if (get_msr_entry(&msrs[i], env))
397 return;
402 #include <signal.h>
405 static int try_push_interrupts(void *opaque)
407 CPUState **envs = opaque, *env;
408 env = envs[0];
410 if (env->ready_for_interrupt_injection &&
411 (env->interrupt_request & CPU_INTERRUPT_HARD) &&
412 (env->eflags & IF_MASK)) {
413 env->interrupt_request &= ~CPU_INTERRUPT_HARD;
414 // for now using cpu 0
415 kvm_inject_irq(kvm_context, 0, cpu_get_pic_interrupt(env));
418 return (env->interrupt_request & CPU_INTERRUPT_HARD) != 0;
421 static void post_kvm_run(void *opaque, int vcpu)
423 CPUState **envs = opaque, *env;
424 env = envs[0];
426 env->eflags = kvm_get_interrupt_flag(kvm_context, vcpu)
427 ? env->eflags | IF_MASK : env->eflags & ~IF_MASK;
428 env->ready_for_interrupt_injection
429 = kvm_is_ready_for_interrupt_injection(kvm_context, vcpu);
430 //cpu_set_apic_tpr(env, kvm_run->cr8);
431 cpu_set_apic_base(env, kvm_get_apic_base(kvm_context, vcpu));
434 static void pre_kvm_run(void *opaque, int vcpu)
436 CPUState **envs = opaque, *env;
437 env = envs[0];
439 kvm_set_cr8(kvm_context, vcpu, cpu_get_apic_tpr(env));
442 void kvm_load_registers(CPUState *env)
444 load_regs(env);
447 void kvm_save_registers(CPUState *env)
449 save_regs(env);
452 int kvm_cpu_exec(CPUState *env)
454 int r;
455 int pending = (!env->ready_for_interrupt_injection ||
456 ((env->interrupt_request & CPU_INTERRUPT_HARD) &&
457 (env->eflags & IF_MASK)));
459 if (!pending && (env->interrupt_request & CPU_INTERRUPT_EXIT)) {
460 env->interrupt_request &= ~CPU_INTERRUPT_EXIT;
461 env->exception_index = EXCP_INTERRUPT;
462 cpu_loop_exit();
466 if (!saved_env[0])
467 saved_env[0] = env;
469 r = kvm_run(kvm_context, 0);
470 if (r < 0) {
471 printf("kvm_run returned %d\n", r);
472 exit(1);
475 return 0;
478 static int kvm_debug(void *opaque, int vcpu)
480 CPUState **envs = opaque;
482 env = envs[0];
483 env->exception_index = EXCP_DEBUG;
484 return 1;
487 static int kvm_inb(void *opaque, uint16_t addr, uint8_t *data)
489 *data = cpu_inb(0, addr);
490 return 0;
493 static int kvm_inw(void *opaque, uint16_t addr, uint16_t *data)
495 *data = cpu_inw(0, addr);
496 return 0;
499 static int kvm_inl(void *opaque, uint16_t addr, uint32_t *data)
501 *data = cpu_inl(0, addr);
502 return 0;
505 #define PM_IO_BASE 0xb000
507 static int kvm_outb(void *opaque, uint16_t addr, uint8_t data)
509 if (addr == 0xb2) {
510 switch (data) {
511 case 0: {
512 cpu_outb(0, 0xb3, 0);
513 break;
515 case 0xf0: {
516 unsigned x;
518 /* enable acpi */
519 x = cpu_inw(0, PM_IO_BASE + 4);
520 x &= ~1;
521 cpu_outw(0, PM_IO_BASE + 4, x);
522 break;
524 case 0xf1: {
525 unsigned x;
527 /* enable acpi */
528 x = cpu_inw(0, PM_IO_BASE + 4);
529 x |= 1;
530 cpu_outw(0, PM_IO_BASE + 4, x);
531 break;
533 default:
534 break;
536 return 0;
538 cpu_outb(0, addr, data);
539 return 0;
542 static int kvm_outw(void *opaque, uint16_t addr, uint16_t data)
544 cpu_outw(0, addr, data);
545 return 0;
548 static int kvm_outl(void *opaque, uint16_t addr, uint32_t data)
550 cpu_outl(0, addr, data);
551 return 0;
554 static int kvm_readb(void *opaque, uint64_t addr, uint8_t *data)
556 *data = ldub_phys(addr);
557 return 0;
560 static int kvm_readw(void *opaque, uint64_t addr, uint16_t *data)
562 *data = lduw_phys(addr);
563 return 0;
566 static int kvm_readl(void *opaque, uint64_t addr, uint32_t *data)
568 *data = ldl_phys(addr);
569 return 0;
572 static int kvm_readq(void *opaque, uint64_t addr, uint64_t *data)
574 *data = ldq_phys(addr);
575 return 0;
578 static int kvm_writeb(void *opaque, uint64_t addr, uint8_t data)
580 stb_phys(addr, data);
581 return 0;
584 static int kvm_writew(void *opaque, uint64_t addr, uint16_t data)
586 stw_phys(addr, data);
587 return 0;
590 static int kvm_writel(void *opaque, uint64_t addr, uint32_t data)
592 stl_phys(addr, data);
593 return 0;
596 static int kvm_writeq(void *opaque, uint64_t addr, uint64_t data)
598 stq_phys(addr, data);
599 return 0;
602 static int kvm_io_window(void *opaque)
604 return 1;
608 static int kvm_halt(void *opaque, int vcpu)
610 CPUState **envs = opaque, *env;
612 env = envs[0];
613 if (!((env->interrupt_request & CPU_INTERRUPT_HARD) &&
614 (env->eflags & IF_MASK))) {
615 env->hflags |= HF_HALTED_MASK;
616 env->exception_index = EXCP_HLT;
619 return 1;
622 static int kvm_shutdown(void *opaque, int vcpu)
624 qemu_system_reset_request();
625 return 1;
628 static struct kvm_callbacks qemu_kvm_ops = {
629 .debug = kvm_debug,
630 .inb = kvm_inb,
631 .inw = kvm_inw,
632 .inl = kvm_inl,
633 .outb = kvm_outb,
634 .outw = kvm_outw,
635 .outl = kvm_outl,
636 .readb = kvm_readb,
637 .readw = kvm_readw,
638 .readl = kvm_readl,
639 .readq = kvm_readq,
640 .writeb = kvm_writeb,
641 .writew = kvm_writew,
642 .writel = kvm_writel,
643 .writeq = kvm_writeq,
644 .halt = kvm_halt,
645 .shutdown = kvm_shutdown,
646 .io_window = kvm_io_window,
647 .try_push_interrupts = try_push_interrupts,
648 .post_kvm_run = post_kvm_run,
649 .pre_kvm_run = pre_kvm_run,
652 int kvm_qemu_init()
654 /* Try to initialize kvm */
655 kvm_context = kvm_init(&qemu_kvm_ops, saved_env);
656 if (!kvm_context) {
657 return -1;
660 return 0;
663 int kvm_qemu_create_context(void)
665 int i;
667 if (kvm_create(kvm_context, phys_ram_size, (void**)&phys_ram_base) < 0) {
668 kvm_qemu_destroy();
669 return -1;
671 kvm_msr_list = kvm_get_msr_list(kvm_context);
672 if (!kvm_msr_list) {
673 kvm_qemu_destroy();
674 return -1;
676 for (i = 0; i < kvm_msr_list->nmsrs; ++i)
677 if (kvm_msr_list->indices[i] == MSR_STAR)
678 kvm_has_msr_star = 1;
679 return 0;
682 void kvm_qemu_destroy(void)
684 kvm_finalize(kvm_context);
687 static void host_cpuid(uint32_t function, uint32_t *eax, uint32_t *ebx,
688 uint32_t *ecx, uint32_t *edx)
690 uint32_t vec[4];
692 vec[0] = function;
693 asm volatile (
694 #ifdef __x86_64__
695 "sub $128, %%rsp \n\t" /* skip red zone */
696 "push %0; push %%rsi \n\t"
697 "push %%rax; push %%rbx; push %%rcx; push %%rdx \n\t"
698 "mov 8*5(%%rsp), %%rsi \n\t"
699 "mov (%%rsi), %%eax \n\t"
700 "cpuid \n\t"
701 "mov %%eax, (%%rsi) \n\t"
702 "mov %%ebx, 4(%%rsi) \n\t"
703 "mov %%ecx, 8(%%rsi) \n\t"
704 "mov %%edx, 12(%%rsi) \n\t"
705 "pop %%rdx; pop %%rcx; pop %%rbx; pop %%rax \n\t"
706 "pop %%rsi; pop %0 \n\t"
707 "add $128, %%rsp"
708 #else
709 "push %0; push %%esi \n\t"
710 "push %%eax; push %%ebx; push %%ecx; push %%edx \n\t"
711 "mov 4*5(%%esp), %%esi \n\t"
712 "mov (%%esi), %%eax \n\t"
713 "cpuid \n\t"
714 "mov %%eax, (%%esi) \n\t"
715 "mov %%ebx, 4(%%esi) \n\t"
716 "mov %%ecx, 8(%%esi) \n\t"
717 "mov %%edx, 12(%%esi) \n\t"
718 "pop %%edx; pop %%ecx; pop %%ebx; pop %%eax \n\t"
719 "pop %%esi; pop %0 \n\t"
720 #endif
721 : : "rm"(vec) : "memory");
722 if (eax)
723 *eax = vec[0];
724 if (ebx)
725 *ebx = vec[1];
726 if (ecx)
727 *ecx = vec[2];
728 if (edx)
729 *edx = vec[3];
732 static void do_cpuid_ent(struct kvm_cpuid_entry *e, uint32_t function)
734 EAX = function;
735 helper_cpuid();
736 e->function = function;
737 e->eax = EAX;
738 e->ebx = EBX;
739 e->ecx = ECX;
740 e->edx = EDX;
741 if (function == 0x80000001) {
742 uint32_t h_eax, h_edx;
744 host_cpuid(function, &h_eax, NULL, NULL, &h_edx);
746 // long mode
747 if ((h_edx & 0x20000000) == 0)
748 e->edx &= ~0x20000000u;
749 // syscall
750 if ((h_edx & 0x00000800) == 0)
751 e->edx &= ~0x00000800u;
752 // nx
753 if ((h_edx & 0x00100000) == 0)
754 e->edx &= ~0x00100000u;
756 // sysenter isn't supported on compatibility mode on AMD. and syscall
757 // isn't supported in compatibility mode on Intel. so advertise the
758 // actuall cpu, and say goodbye to migration between different vendors
759 // is you use compatibility mode.
760 if (function == 0) {
761 uint32_t bcd[3];
763 host_cpuid(0, NULL, &bcd[0], &bcd[1], &bcd[2]);
764 e->ebx = bcd[0];
765 e->ecx = bcd[1];
766 e->edx = bcd[2];
770 int kvm_qemu_init_env(CPUState *cenv)
772 struct kvm_cpuid_entry cpuid_ent[100];
773 int cpuid_nent = 0;
774 CPUState *oldenv = env;
775 CPUState copy;
776 uint32_t i, limit;
777 #define DECLARE_HOST_REGS
778 #include "hostregs_helper.h"
780 #define SAVE_HOST_REGS
781 #include "hostregs_helper.h"
783 copy = *cenv;
784 env = cenv;
786 EAX = 0;
787 helper_cpuid();
788 limit = EAX;
789 for (i = 0; i <= limit; ++i)
790 do_cpuid_ent(&cpuid_ent[cpuid_nent++], i);
791 EAX = 0x80000000;
792 helper_cpuid();
793 limit = EAX;
794 for (i = 0x80000000; i <= limit; ++i)
795 do_cpuid_ent(&cpuid_ent[cpuid_nent++], i);
797 kvm_setup_cpuid(kvm_context, 0, cpuid_nent, cpuid_ent);
799 #include "hostregs_helper.h"
801 env = oldenv;
803 return 0;
806 int kvm_update_debugger(CPUState *env)
808 struct kvm_debug_guest dbg;
809 int i;
811 dbg.enabled = 0;
812 if (env->nb_breakpoints || env->singlestep_enabled) {
813 dbg.enabled = 1;
814 for (i = 0; i < 4 && i < env->nb_breakpoints; ++i) {
815 dbg.breakpoints[i].enabled = 1;
816 dbg.breakpoints[i].address = env->breakpoints[i];
818 dbg.singlestep = env->singlestep_enabled;
820 return kvm_guest_debug(kvm_context, 0, &dbg);
825 * dirty pages logging
827 /* FIXME: use unsigned long pointer instead of unsigned char */
828 unsigned char *kvm_dirty_bitmap = NULL;
829 int kvm_physical_memory_set_dirty_tracking(int enable)
831 int r = 0;
833 if (!kvm_allowed)
834 return 0;
836 if (enable) {
837 if (!kvm_dirty_bitmap) {
838 unsigned bitmap_size = BITMAP_SIZE(phys_ram_size);
839 kvm_dirty_bitmap = qemu_malloc(bitmap_size);
840 if (kvm_dirty_bitmap == NULL) {
841 perror("Failed to allocate dirty pages bitmap");
842 r=-1;
844 else {
845 r = kvm_dirty_pages_log_enable_all(kvm_context);
849 else {
850 if (kvm_dirty_bitmap) {
851 r = kvm_dirty_pages_log_reset(kvm_context);
852 qemu_free(kvm_dirty_bitmap);
853 kvm_dirty_bitmap = NULL;
856 return r;
859 /* get kvm's dirty pages bitmap and update qemu's */
860 int kvm_get_dirty_pages_log_slot(int slot,
861 unsigned char *bitmap,
862 unsigned int offset,
863 unsigned int len)
865 int r;
866 unsigned int i, j, n=0;
867 unsigned char c;
868 unsigned page_number, addr, addr1;
870 memset(bitmap, 0, len);
871 r = kvm_get_dirty_pages(kvm_context, slot, bitmap);
872 if (r)
873 return r;
876 * bitmap-traveling is faster than memory-traveling (for addr...)
877 * especially when most of the memory is not dirty.
879 for (i=0; i<len; i++) {
880 c = bitmap[i];
881 while (c>0) {
882 j = ffsl(c) - 1;
883 c &= ~(1u<<j);
884 page_number = i * 8 + j;
885 addr1 = page_number * TARGET_PAGE_SIZE;
886 addr = offset + addr1;
887 cpu_physical_memory_set_dirty(addr);
888 n++;
891 return 0;
895 * get kvm's dirty pages bitmap and update qemu's
896 * we only care about physical ram, which resides in slots 0 and 3
898 int kvm_update_dirty_pages_log(void)
900 int r = 0, len;
902 len = BITMAP_SIZE(0xa0000);
903 r = kvm_get_dirty_pages_log_slot(3, kvm_dirty_bitmap, 0 , len);
904 len = BITMAP_SIZE(phys_ram_size - 0xc0000);
905 r = r || kvm_get_dirty_pages_log_slot(0, kvm_dirty_bitmap, 0xc0000, len);
906 return r;
909 int kvm_get_phys_ram_page_bitmap(unsigned char *bitmap)
911 int r=0, len, offset;
913 len = BITMAP_SIZE(phys_ram_size);
914 memset(bitmap, 0, len);
916 r = kvm_get_mem_map(kvm_context, 3, bitmap);
917 if (r)
918 goto out;
920 offset = BITMAP_SIZE(0xc0000);
921 r = kvm_get_mem_map(kvm_context, 0, bitmap + offset);
923 out:
924 return r;
926 #endif