kvm: libkvm: adapt to api 11 (implicit pio and mmio completion)
[qemu-kvm/fedora.git] / qemu-kvm.c
blob212570a197986c3516ceaa64e2905c4ac6e2137a
2 #include "config.h"
3 #include "config-host.h"
5 #ifdef USE_KVM
7 #include "exec.h"
9 #include "qemu-kvm.h"
10 #include <kvmctl.h>
11 #include <string.h>
13 #define MSR_IA32_TSC 0x10
15 extern void perror(const char *s);
17 int kvm_allowed = 1;
18 kvm_context_t kvm_context;
19 static struct kvm_msr_list *kvm_msr_list;
20 static int kvm_has_msr_star;
22 #define NR_CPU 16
23 static CPUState *saved_env[NR_CPU];
25 static void set_msr_entry(struct kvm_msr_entry *entry, uint32_t index,
26 uint64_t data)
28 entry->index = index;
29 entry->data = data;
32 /* returns 0 on success, non-0 on failure */
33 static int get_msr_entry(struct kvm_msr_entry *entry, CPUState *env)
35 switch (entry->index) {
36 case MSR_IA32_SYSENTER_CS:
37 env->sysenter_cs = entry->data;
38 break;
39 case MSR_IA32_SYSENTER_ESP:
40 env->sysenter_esp = entry->data;
41 break;
42 case MSR_IA32_SYSENTER_EIP:
43 env->sysenter_eip = entry->data;
44 break;
45 case MSR_STAR:
46 env->star = entry->data;
47 break;
48 #ifdef TARGET_X86_64
49 case MSR_CSTAR:
50 env->cstar = entry->data;
51 break;
52 case MSR_KERNELGSBASE:
53 env->kernelgsbase = entry->data;
54 break;
55 case MSR_FMASK:
56 env->fmask = entry->data;
57 break;
58 case MSR_LSTAR:
59 env->lstar = entry->data;
60 break;
61 #endif
62 case MSR_IA32_TSC:
63 env->tsc = entry->data;
64 break;
65 default:
66 printf("Warning unknown msr index 0x%x\n", entry->index);
67 return 1;
69 return 0;
72 #ifdef TARGET_X86_64
73 #define MSR_COUNT 9
74 #else
75 #define MSR_COUNT 5
76 #endif
78 static void set_v8086_seg(struct kvm_segment *lhs, const SegmentCache *rhs)
80 lhs->selector = rhs->selector;
81 lhs->base = rhs->base;
82 lhs->limit = rhs->limit;
83 lhs->type = 3;
84 lhs->present = 1;
85 lhs->dpl = 3;
86 lhs->db = 0;
87 lhs->s = 1;
88 lhs->l = 0;
89 lhs->g = 0;
90 lhs->avl = 0;
91 lhs->unusable = 0;
94 static void set_seg(struct kvm_segment *lhs, const SegmentCache *rhs)
96 unsigned flags = rhs->flags;
97 lhs->selector = rhs->selector;
98 lhs->base = rhs->base;
99 lhs->limit = rhs->limit;
100 lhs->type = (flags >> DESC_TYPE_SHIFT) & 15;
101 lhs->present = (flags & DESC_P_MASK) != 0;
102 lhs->dpl = rhs->selector & 3;
103 lhs->db = (flags >> DESC_B_SHIFT) & 1;
104 lhs->s = (flags & DESC_S_MASK) != 0;
105 lhs->l = (flags >> DESC_L_SHIFT) & 1;
106 lhs->g = (flags & DESC_G_MASK) != 0;
107 lhs->avl = (flags & DESC_AVL_MASK) != 0;
108 lhs->unusable = 0;
111 static void get_seg(SegmentCache *lhs, const struct kvm_segment *rhs)
113 lhs->selector = rhs->selector;
114 lhs->base = rhs->base;
115 lhs->limit = rhs->limit;
116 lhs->flags =
117 (rhs->type << DESC_TYPE_SHIFT)
118 | (rhs->present * DESC_P_MASK)
119 | (rhs->dpl << DESC_DPL_SHIFT)
120 | (rhs->db << DESC_B_SHIFT)
121 | (rhs->s * DESC_S_MASK)
122 | (rhs->l << DESC_L_SHIFT)
123 | (rhs->g * DESC_G_MASK)
124 | (rhs->avl * DESC_AVL_MASK);
127 /* the reset values of qemu are not compatible to SVM
128 * this function is used to fix the segment descriptor values */
129 static void fix_realmode_dataseg(struct kvm_segment *seg)
131 seg->type = 0x02;
132 seg->present = 1;
133 seg->s = 1;
136 static void load_regs(CPUState *env)
138 struct kvm_regs regs;
139 struct kvm_fpu fpu;
140 struct kvm_sregs sregs;
141 struct kvm_msr_entry msrs[MSR_COUNT];
142 int rc, n, i;
144 /* hack: save env */
145 if (!saved_env[0])
146 saved_env[0] = env;
148 regs.rax = env->regs[R_EAX];
149 regs.rbx = env->regs[R_EBX];
150 regs.rcx = env->regs[R_ECX];
151 regs.rdx = env->regs[R_EDX];
152 regs.rsi = env->regs[R_ESI];
153 regs.rdi = env->regs[R_EDI];
154 regs.rsp = env->regs[R_ESP];
155 regs.rbp = env->regs[R_EBP];
156 #ifdef TARGET_X86_64
157 regs.r8 = env->regs[8];
158 regs.r9 = env->regs[9];
159 regs.r10 = env->regs[10];
160 regs.r11 = env->regs[11];
161 regs.r12 = env->regs[12];
162 regs.r13 = env->regs[13];
163 regs.r14 = env->regs[14];
164 regs.r15 = env->regs[15];
165 #endif
167 regs.rflags = env->eflags;
168 regs.rip = env->eip;
170 kvm_set_regs(kvm_context, 0, &regs);
172 memset(&fpu, 0, sizeof fpu);
173 fpu.fsw = env->fpus & ~(7 << 11);
174 fpu.fsw |= (env->fpstt & 7) << 11;
175 fpu.fcw = env->fpuc;
176 for (i = 0; i < 8; ++i)
177 fpu.ftwx |= (!env->fptags[i]) << i;
178 memcpy(fpu.fpr, env->fpregs, sizeof env->fpregs);
179 memcpy(fpu.xmm, env->xmm_regs, sizeof env->xmm_regs);
180 fpu.mxcsr = env->mxcsr;
181 kvm_set_fpu(kvm_context, 0, &fpu);
183 memcpy(sregs.interrupt_bitmap, env->kvm_interrupt_bitmap, sizeof(sregs.interrupt_bitmap));
185 if ((env->eflags & VM_MASK)) {
186 set_v8086_seg(&sregs.cs, &env->segs[R_CS]);
187 set_v8086_seg(&sregs.ds, &env->segs[R_DS]);
188 set_v8086_seg(&sregs.es, &env->segs[R_ES]);
189 set_v8086_seg(&sregs.fs, &env->segs[R_FS]);
190 set_v8086_seg(&sregs.gs, &env->segs[R_GS]);
191 set_v8086_seg(&sregs.ss, &env->segs[R_SS]);
192 } else {
193 set_seg(&sregs.cs, &env->segs[R_CS]);
194 set_seg(&sregs.ds, &env->segs[R_DS]);
195 set_seg(&sregs.es, &env->segs[R_ES]);
196 set_seg(&sregs.fs, &env->segs[R_FS]);
197 set_seg(&sregs.gs, &env->segs[R_GS]);
198 set_seg(&sregs.ss, &env->segs[R_SS]);
200 if (env->cr[0] & CR0_PE_MASK) {
201 /* force ss cpl to cs cpl */
202 sregs.ss.selector = (sregs.ss.selector & ~3) |
203 (sregs.cs.selector & 3);
204 sregs.ss.dpl = sregs.ss.selector & 3;
207 if (!(env->cr[0] & CR0_PG_MASK)) {
208 fix_realmode_dataseg(&sregs.cs);
209 fix_realmode_dataseg(&sregs.ds);
210 fix_realmode_dataseg(&sregs.es);
211 fix_realmode_dataseg(&sregs.fs);
212 fix_realmode_dataseg(&sregs.gs);
213 fix_realmode_dataseg(&sregs.ss);
217 set_seg(&sregs.tr, &env->tr);
218 set_seg(&sregs.ldt, &env->ldt);
220 sregs.idt.limit = env->idt.limit;
221 sregs.idt.base = env->idt.base;
222 sregs.gdt.limit = env->gdt.limit;
223 sregs.gdt.base = env->gdt.base;
225 sregs.cr0 = env->cr[0];
226 sregs.cr2 = env->cr[2];
227 sregs.cr3 = env->cr[3];
228 sregs.cr4 = env->cr[4];
230 sregs.apic_base = cpu_get_apic_base(env);
231 sregs.efer = env->efer;
232 sregs.cr8 = cpu_get_apic_tpr(env);
234 kvm_set_sregs(kvm_context, 0, &sregs);
236 /* msrs */
237 n = 0;
238 set_msr_entry(&msrs[n++], MSR_IA32_SYSENTER_CS, env->sysenter_cs);
239 set_msr_entry(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp);
240 set_msr_entry(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip);
241 if (kvm_has_msr_star)
242 set_msr_entry(&msrs[n++], MSR_STAR, env->star);
243 set_msr_entry(&msrs[n++], MSR_IA32_TSC, env->tsc);
244 #ifdef TARGET_X86_64
245 set_msr_entry(&msrs[n++], MSR_CSTAR, env->cstar);
246 set_msr_entry(&msrs[n++], MSR_KERNELGSBASE, env->kernelgsbase);
247 set_msr_entry(&msrs[n++], MSR_FMASK, env->fmask);
248 set_msr_entry(&msrs[n++], MSR_LSTAR , env->lstar);
249 #endif
251 rc = kvm_set_msrs(kvm_context, 0, msrs, n);
252 if (rc == -1)
253 perror("kvm_set_msrs FAILED");
257 static void save_regs(CPUState *env)
259 struct kvm_regs regs;
260 struct kvm_fpu fpu;
261 struct kvm_sregs sregs;
262 struct kvm_msr_entry msrs[MSR_COUNT];
263 uint32_t hflags;
264 uint32_t i, n, rc;
266 kvm_get_regs(kvm_context, 0, &regs);
268 env->regs[R_EAX] = regs.rax;
269 env->regs[R_EBX] = regs.rbx;
270 env->regs[R_ECX] = regs.rcx;
271 env->regs[R_EDX] = regs.rdx;
272 env->regs[R_ESI] = regs.rsi;
273 env->regs[R_EDI] = regs.rdi;
274 env->regs[R_ESP] = regs.rsp;
275 env->regs[R_EBP] = regs.rbp;
276 #ifdef TARGET_X86_64
277 env->regs[8] = regs.r8;
278 env->regs[9] = regs.r9;
279 env->regs[10] = regs.r10;
280 env->regs[11] = regs.r11;
281 env->regs[12] = regs.r12;
282 env->regs[13] = regs.r13;
283 env->regs[14] = regs.r14;
284 env->regs[15] = regs.r15;
285 #endif
287 env->eflags = regs.rflags;
288 env->eip = regs.rip;
290 kvm_get_fpu(kvm_context, 0, &fpu);
291 env->fpstt = (fpu.fsw >> 11) & 7;
292 env->fpus = fpu.fsw;
293 env->fpuc = fpu.fcw;
294 for (i = 0; i < 8; ++i)
295 env->fptags[i] = !((fpu.ftwx >> i) & 1);
296 memcpy(env->fpregs, fpu.fpr, sizeof env->fpregs);
297 memcpy(env->xmm_regs, fpu.xmm, sizeof env->xmm_regs);
298 env->mxcsr = fpu.mxcsr;
300 kvm_get_sregs(kvm_context, 0, &sregs);
302 memcpy(env->kvm_interrupt_bitmap, sregs.interrupt_bitmap, sizeof(env->kvm_interrupt_bitmap));
304 get_seg(&env->segs[R_CS], &sregs.cs);
305 get_seg(&env->segs[R_DS], &sregs.ds);
306 get_seg(&env->segs[R_ES], &sregs.es);
307 get_seg(&env->segs[R_FS], &sregs.fs);
308 get_seg(&env->segs[R_GS], &sregs.gs);
309 get_seg(&env->segs[R_SS], &sregs.ss);
311 get_seg(&env->tr, &sregs.tr);
312 get_seg(&env->ldt, &sregs.ldt);
314 env->idt.limit = sregs.idt.limit;
315 env->idt.base = sregs.idt.base;
316 env->gdt.limit = sregs.gdt.limit;
317 env->gdt.base = sregs.gdt.base;
319 env->cr[0] = sregs.cr0;
320 env->cr[2] = sregs.cr2;
321 env->cr[3] = sregs.cr3;
322 env->cr[4] = sregs.cr4;
324 cpu_set_apic_base(env, sregs.apic_base);
326 env->efer = sregs.efer;
327 //cpu_set_apic_tpr(env, sregs.cr8);
329 #define HFLAG_COPY_MASK ~( \
330 HF_CPL_MASK | HF_PE_MASK | HF_MP_MASK | HF_EM_MASK | \
331 HF_TS_MASK | HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK | \
332 HF_OSFXSR_MASK | HF_LMA_MASK | HF_CS32_MASK | \
333 HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK)
337 hflags = (env->segs[R_CS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK;
338 hflags |= (env->cr[0] & CR0_PE_MASK) << (HF_PE_SHIFT - CR0_PE_SHIFT);
339 hflags |= (env->cr[0] << (HF_MP_SHIFT - CR0_MP_SHIFT)) &
340 (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK);
341 hflags |= (env->eflags & (HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK));
342 hflags |= (env->cr[4] & CR4_OSFXSR_MASK) <<
343 (HF_OSFXSR_SHIFT - CR4_OSFXSR_SHIFT);
345 if (env->efer & MSR_EFER_LMA) {
346 hflags |= HF_LMA_MASK;
349 if ((hflags & HF_LMA_MASK) && (env->segs[R_CS].flags & DESC_L_MASK)) {
350 hflags |= HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK;
351 } else {
352 hflags |= (env->segs[R_CS].flags & DESC_B_MASK) >>
353 (DESC_B_SHIFT - HF_CS32_SHIFT);
354 hflags |= (env->segs[R_SS].flags & DESC_B_MASK) >>
355 (DESC_B_SHIFT - HF_SS32_SHIFT);
356 if (!(env->cr[0] & CR0_PE_MASK) ||
357 (env->eflags & VM_MASK) ||
358 !(hflags & HF_CS32_MASK)) {
359 hflags |= HF_ADDSEG_MASK;
360 } else {
361 hflags |= ((env->segs[R_DS].base |
362 env->segs[R_ES].base |
363 env->segs[R_SS].base) != 0) <<
364 HF_ADDSEG_SHIFT;
367 env->hflags = (env->hflags & HFLAG_COPY_MASK) | hflags;
368 CC_SRC = env->eflags & (CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
369 DF = 1 - (2 * ((env->eflags >> 10) & 1));
370 CC_OP = CC_OP_EFLAGS;
371 env->eflags &= ~(DF_MASK | CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
373 tlb_flush(env, 1);
375 /* msrs */
376 n = 0;
377 msrs[n++].index = MSR_IA32_SYSENTER_CS;
378 msrs[n++].index = MSR_IA32_SYSENTER_ESP;
379 msrs[n++].index = MSR_IA32_SYSENTER_EIP;
380 if (kvm_has_msr_star)
381 msrs[n++].index = MSR_STAR;
382 msrs[n++].index = MSR_IA32_TSC;
383 #ifdef TARGET_X86_64
384 msrs[n++].index = MSR_CSTAR;
385 msrs[n++].index = MSR_KERNELGSBASE;
386 msrs[n++].index = MSR_FMASK;
387 msrs[n++].index = MSR_LSTAR;
388 #endif
389 rc = kvm_get_msrs(kvm_context, 0, msrs, n);
390 if (rc == -1) {
391 perror("kvm_get_msrs FAILED");
393 else {
394 n = rc; /* actual number of MSRs */
395 for (i=0 ; i<n; i++) {
396 if (get_msr_entry(&msrs[i], env))
397 return;
402 #include <signal.h>
405 static int try_push_interrupts(void *opaque)
407 CPUState **envs = opaque, *env;
408 env = envs[0];
410 if (env->ready_for_interrupt_injection &&
411 (env->interrupt_request & CPU_INTERRUPT_HARD) &&
412 (env->eflags & IF_MASK)) {
413 env->interrupt_request &= ~CPU_INTERRUPT_HARD;
414 // for now using cpu 0
415 kvm_inject_irq(kvm_context, 0, cpu_get_pic_interrupt(env));
418 return (env->interrupt_request & CPU_INTERRUPT_HARD) != 0;
421 static void post_kvm_run(void *opaque, struct kvm_run *kvm_run)
423 CPUState **envs = opaque, *env;
424 env = envs[0];
426 env->eflags = (kvm_run->if_flag) ? env->eflags | IF_MASK:env->eflags & ~IF_MASK;
427 env->ready_for_interrupt_injection = kvm_run->ready_for_interrupt_injection;
428 //cpu_set_apic_tpr(env, kvm_run->cr8);
429 cpu_set_apic_base(env, kvm_run->apic_base);
432 static void pre_kvm_run(void *opaque, struct kvm_run *kvm_run)
434 CPUState **envs = opaque, *env;
435 env = envs[0];
437 kvm_run->cr8 = cpu_get_apic_tpr(env);
440 void kvm_load_registers(CPUState *env)
442 load_regs(env);
445 void kvm_save_registers(CPUState *env)
447 save_regs(env);
450 int kvm_cpu_exec(CPUState *env)
452 int r;
453 int pending = (!env->ready_for_interrupt_injection ||
454 ((env->interrupt_request & CPU_INTERRUPT_HARD) &&
455 (env->eflags & IF_MASK)));
457 if (!pending && (env->interrupt_request & CPU_INTERRUPT_EXIT)) {
458 env->interrupt_request &= ~CPU_INTERRUPT_EXIT;
459 env->exception_index = EXCP_INTERRUPT;
460 cpu_loop_exit();
464 if (!saved_env[0])
465 saved_env[0] = env;
467 r = kvm_run(kvm_context, 0);
468 if (r < 0) {
469 printf("kvm_run returned %d\n", r);
470 exit(1);
473 return 0;
476 static int kvm_debug(void *opaque, int vcpu)
478 CPUState **envs = opaque;
480 env = envs[0];
481 env->exception_index = EXCP_DEBUG;
482 return 1;
485 static int kvm_inb(void *opaque, uint16_t addr, uint8_t *data)
487 *data = cpu_inb(0, addr);
488 return 0;
491 static int kvm_inw(void *opaque, uint16_t addr, uint16_t *data)
493 *data = cpu_inw(0, addr);
494 return 0;
497 static int kvm_inl(void *opaque, uint16_t addr, uint32_t *data)
499 *data = cpu_inl(0, addr);
500 return 0;
503 #define PM_IO_BASE 0xb000
505 static int kvm_outb(void *opaque, uint16_t addr, uint8_t data)
507 if (addr == 0xb2) {
508 switch (data) {
509 case 0: {
510 cpu_outb(0, 0xb3, 0);
511 break;
513 case 0xf0: {
514 unsigned x;
516 /* enable acpi */
517 x = cpu_inw(0, PM_IO_BASE + 4);
518 x &= ~1;
519 cpu_outw(0, PM_IO_BASE + 4, x);
520 break;
522 case 0xf1: {
523 unsigned x;
525 /* enable acpi */
526 x = cpu_inw(0, PM_IO_BASE + 4);
527 x |= 1;
528 cpu_outw(0, PM_IO_BASE + 4, x);
529 break;
531 default:
532 break;
534 return 0;
536 cpu_outb(0, addr, data);
537 return 0;
540 static int kvm_outw(void *opaque, uint16_t addr, uint16_t data)
542 cpu_outw(0, addr, data);
543 return 0;
546 static int kvm_outl(void *opaque, uint16_t addr, uint32_t data)
548 cpu_outl(0, addr, data);
549 return 0;
552 static int kvm_readb(void *opaque, uint64_t addr, uint8_t *data)
554 *data = ldub_phys(addr);
555 return 0;
558 static int kvm_readw(void *opaque, uint64_t addr, uint16_t *data)
560 *data = lduw_phys(addr);
561 return 0;
564 static int kvm_readl(void *opaque, uint64_t addr, uint32_t *data)
566 *data = ldl_phys(addr);
567 return 0;
570 static int kvm_readq(void *opaque, uint64_t addr, uint64_t *data)
572 *data = ldq_phys(addr);
573 return 0;
576 static int kvm_writeb(void *opaque, uint64_t addr, uint8_t data)
578 stb_phys(addr, data);
579 return 0;
582 static int kvm_writew(void *opaque, uint64_t addr, uint16_t data)
584 stw_phys(addr, data);
585 return 0;
588 static int kvm_writel(void *opaque, uint64_t addr, uint32_t data)
590 stl_phys(addr, data);
591 return 0;
594 static int kvm_writeq(void *opaque, uint64_t addr, uint64_t data)
596 stq_phys(addr, data);
597 return 0;
600 static int kvm_io_window(void *opaque)
602 return 1;
606 static int kvm_halt(void *opaque, int vcpu)
608 CPUState **envs = opaque, *env;
610 env = envs[0];
611 if (!((env->interrupt_request & CPU_INTERRUPT_HARD) &&
612 (env->eflags & IF_MASK))) {
613 env->hflags |= HF_HALTED_MASK;
614 env->exception_index = EXCP_HLT;
617 return 1;
620 static int kvm_shutdown(void *opaque, int vcpu)
622 qemu_system_reset_request();
623 return 1;
626 static struct kvm_callbacks qemu_kvm_ops = {
627 .debug = kvm_debug,
628 .inb = kvm_inb,
629 .inw = kvm_inw,
630 .inl = kvm_inl,
631 .outb = kvm_outb,
632 .outw = kvm_outw,
633 .outl = kvm_outl,
634 .readb = kvm_readb,
635 .readw = kvm_readw,
636 .readl = kvm_readl,
637 .readq = kvm_readq,
638 .writeb = kvm_writeb,
639 .writew = kvm_writew,
640 .writel = kvm_writel,
641 .writeq = kvm_writeq,
642 .halt = kvm_halt,
643 .shutdown = kvm_shutdown,
644 .io_window = kvm_io_window,
645 .try_push_interrupts = try_push_interrupts,
646 .post_kvm_run = post_kvm_run,
647 .pre_kvm_run = pre_kvm_run,
650 int kvm_qemu_init()
652 /* Try to initialize kvm */
653 kvm_context = kvm_init(&qemu_kvm_ops, saved_env);
654 if (!kvm_context) {
655 return -1;
658 return 0;
661 int kvm_qemu_create_context(void)
663 int i;
665 if (kvm_create(kvm_context, phys_ram_size, (void**)&phys_ram_base) < 0) {
666 kvm_qemu_destroy();
667 return -1;
669 kvm_msr_list = kvm_get_msr_list(kvm_context);
670 if (!kvm_msr_list) {
671 kvm_qemu_destroy();
672 return -1;
674 for (i = 0; i < kvm_msr_list->nmsrs; ++i)
675 if (kvm_msr_list->indices[i] == MSR_STAR)
676 kvm_has_msr_star = 1;
677 return 0;
680 void kvm_qemu_destroy(void)
682 kvm_finalize(kvm_context);
685 static void host_cpuid(uint32_t function, uint32_t *eax, uint32_t *ebx,
686 uint32_t *ecx, uint32_t *edx)
688 uint32_t vec[4];
690 vec[0] = function;
691 asm volatile (
692 #ifdef __x86_64__
693 "sub $128, %%rsp \n\t" /* skip red zone */
694 "push %0; push %%rsi \n\t"
695 "push %%rax; push %%rbx; push %%rcx; push %%rdx \n\t"
696 "mov 8*5(%%rsp), %%rsi \n\t"
697 "mov (%%rsi), %%eax \n\t"
698 "cpuid \n\t"
699 "mov %%eax, (%%rsi) \n\t"
700 "mov %%ebx, 4(%%rsi) \n\t"
701 "mov %%ecx, 8(%%rsi) \n\t"
702 "mov %%edx, 12(%%rsi) \n\t"
703 "pop %%rdx; pop %%rcx; pop %%rbx; pop %%rax \n\t"
704 "pop %%rsi; pop %0 \n\t"
705 "add $128, %%rsp"
706 #else
707 "push %0; push %%esi \n\t"
708 "push %%eax; push %%ebx; push %%ecx; push %%edx \n\t"
709 "mov 4*5(%%esp), %%esi \n\t"
710 "mov (%%esi), %%eax \n\t"
711 "cpuid \n\t"
712 "mov %%eax, (%%esi) \n\t"
713 "mov %%ebx, 4(%%esi) \n\t"
714 "mov %%ecx, 8(%%esi) \n\t"
715 "mov %%edx, 12(%%esi) \n\t"
716 "pop %%edx; pop %%ecx; pop %%ebx; pop %%eax \n\t"
717 "pop %%esi; pop %0 \n\t"
718 #endif
719 : : "rm"(vec) : "memory");
720 if (eax)
721 *eax = vec[0];
722 if (ebx)
723 *ebx = vec[1];
724 if (ecx)
725 *ecx = vec[2];
726 if (edx)
727 *edx = vec[3];
730 static void do_cpuid_ent(struct kvm_cpuid_entry *e, uint32_t function)
732 EAX = function;
733 helper_cpuid();
734 e->function = function;
735 e->eax = EAX;
736 e->ebx = EBX;
737 e->ecx = ECX;
738 e->edx = EDX;
739 if (function == 0x80000001) {
740 uint32_t h_eax, h_edx;
742 host_cpuid(function, &h_eax, NULL, NULL, &h_edx);
744 // long mode
745 if ((h_edx & 0x20000000) == 0)
746 e->edx &= ~0x20000000u;
747 // syscall
748 if ((h_edx & 0x00000800) == 0)
749 e->edx &= ~0x00000800u;
750 // nx
751 if ((h_edx & 0x00100000) == 0)
752 e->edx &= ~0x00100000u;
754 // sysenter isn't supported on compatibility mode on AMD. and syscall
755 // isn't supported in compatibility mode on Intel. so advertise the
756 // actuall cpu, and say goodbye to migration between different vendors
757 // is you use compatibility mode.
758 if (function == 0) {
759 uint32_t bcd[3];
761 host_cpuid(0, NULL, &bcd[0], &bcd[1], &bcd[2]);
762 e->ebx = bcd[0];
763 e->ecx = bcd[1];
764 e->edx = bcd[2];
768 int kvm_qemu_init_env(CPUState *cenv)
770 struct kvm_cpuid_entry cpuid_ent[100];
771 int cpuid_nent = 0;
772 CPUState *oldenv = env;
773 CPUState copy;
774 uint32_t i, limit;
775 #define DECLARE_HOST_REGS
776 #include "hostregs_helper.h"
778 #define SAVE_HOST_REGS
779 #include "hostregs_helper.h"
781 copy = *cenv;
782 env = cenv;
784 EAX = 0;
785 helper_cpuid();
786 limit = EAX;
787 for (i = 0; i <= limit; ++i)
788 do_cpuid_ent(&cpuid_ent[cpuid_nent++], i);
789 EAX = 0x80000000;
790 helper_cpuid();
791 limit = EAX;
792 for (i = 0x80000000; i <= limit; ++i)
793 do_cpuid_ent(&cpuid_ent[cpuid_nent++], i);
795 kvm_setup_cpuid(kvm_context, 0, cpuid_nent, cpuid_ent);
797 #include "hostregs_helper.h"
799 env = oldenv;
801 return 0;
804 int kvm_update_debugger(CPUState *env)
806 struct kvm_debug_guest dbg;
807 int i;
809 dbg.enabled = 0;
810 if (env->nb_breakpoints || env->singlestep_enabled) {
811 dbg.enabled = 1;
812 for (i = 0; i < 4 && i < env->nb_breakpoints; ++i) {
813 dbg.breakpoints[i].enabled = 1;
814 dbg.breakpoints[i].address = env->breakpoints[i];
816 dbg.singlestep = env->singlestep_enabled;
818 return kvm_guest_debug(kvm_context, 0, &dbg);
823 * dirty pages logging
825 /* FIXME: use unsigned long pointer instead of unsigned char */
826 unsigned char *kvm_dirty_bitmap = NULL;
827 int kvm_physical_memory_set_dirty_tracking(int enable)
829 int r = 0;
831 if (!kvm_allowed)
832 return 0;
834 if (enable) {
835 if (!kvm_dirty_bitmap) {
836 unsigned bitmap_size = BITMAP_SIZE(phys_ram_size);
837 kvm_dirty_bitmap = qemu_malloc(bitmap_size);
838 if (kvm_dirty_bitmap == NULL) {
839 perror("Failed to allocate dirty pages bitmap");
840 r=-1;
842 else {
843 r = kvm_dirty_pages_log_enable_all(kvm_context);
847 else {
848 if (kvm_dirty_bitmap) {
849 r = kvm_dirty_pages_log_reset(kvm_context);
850 qemu_free(kvm_dirty_bitmap);
851 kvm_dirty_bitmap = NULL;
854 return r;
857 /* get kvm's dirty pages bitmap and update qemu's */
858 int kvm_get_dirty_pages_log_slot(int slot,
859 unsigned char *bitmap,
860 unsigned int offset,
861 unsigned int len)
863 int r;
864 unsigned int i, j, n=0;
865 unsigned char c;
866 unsigned page_number, addr, addr1;
868 memset(bitmap, 0, len);
869 r = kvm_get_dirty_pages(kvm_context, slot, bitmap);
870 if (r)
871 return r;
874 * bitmap-traveling is faster than memory-traveling (for addr...)
875 * especially when most of the memory is not dirty.
877 for (i=0; i<len; i++) {
878 c = bitmap[i];
879 while (c>0) {
880 j = ffsl(c) - 1;
881 c &= ~(1u<<j);
882 page_number = i * 8 + j;
883 addr1 = page_number * TARGET_PAGE_SIZE;
884 addr = offset + addr1;
885 cpu_physical_memory_set_dirty(addr);
886 n++;
889 return 0;
893 * get kvm's dirty pages bitmap and update qemu's
894 * we only care about physical ram, which resides in slots 0 and 3
896 int kvm_update_dirty_pages_log(void)
898 int r = 0, len;
900 len = BITMAP_SIZE(0xa0000);
901 r = kvm_get_dirty_pages_log_slot(3, kvm_dirty_bitmap, 0 , len);
902 len = BITMAP_SIZE(phys_ram_size - 0xc0000);
903 r = r || kvm_get_dirty_pages_log_slot(0, kvm_dirty_bitmap, 0xc0000, len);
904 return r;
907 int kvm_get_phys_ram_page_bitmap(unsigned char *bitmap)
909 int r=0, len, offset;
911 len = BITMAP_SIZE(phys_ram_size);
912 memset(bitmap, 0, len);
914 r = kvm_get_mem_map(kvm_context, 3, bitmap);
915 if (r)
916 goto out;
918 offset = BITMAP_SIZE(0xc0000);
919 r = kvm_get_mem_map(kvm_context, 0, bitmap + offset);
921 out:
922 return r;
924 #endif