Replace the nic model with rtl8139.
[qemu-kvm/fedora.git] / qemu-kvm.c
blobb5eee93619cee214567562bcd5cda3c1c3527962
2 #include "config.h"
3 #include "config-host.h"
5 #ifdef USE_KVM
7 #include "exec.h"
9 #include "qemu-kvm.h"
10 #include <kvmctl.h>
11 #include <string.h>
13 #define MSR_IA32_TSC 0x10
15 extern void perror(const char *s);
17 int kvm_allowed = 1;
18 kvm_context_t kvm_context;
19 static struct kvm_msr_list *kvm_msr_list;
20 static int kvm_has_msr_star;
22 #define NR_CPU 16
23 static CPUState *saved_env[NR_CPU];
25 static void set_msr_entry(struct kvm_msr_entry *entry, uint32_t index,
26 uint64_t data)
28 entry->index = index;
29 entry->data = data;
32 /* returns 0 on success, non-0 on failure */
33 static int get_msr_entry(struct kvm_msr_entry *entry, CPUState *env)
35 switch (entry->index) {
36 case MSR_IA32_SYSENTER_CS:
37 env->sysenter_cs = entry->data;
38 break;
39 case MSR_IA32_SYSENTER_ESP:
40 env->sysenter_esp = entry->data;
41 break;
42 case MSR_IA32_SYSENTER_EIP:
43 env->sysenter_eip = entry->data;
44 break;
45 case MSR_STAR:
46 env->star = entry->data;
47 break;
48 #ifdef TARGET_X86_64
49 case MSR_CSTAR:
50 env->cstar = entry->data;
51 break;
52 case MSR_KERNELGSBASE:
53 env->kernelgsbase = entry->data;
54 break;
55 case MSR_FMASK:
56 env->fmask = entry->data;
57 break;
58 case MSR_LSTAR:
59 env->lstar = entry->data;
60 break;
61 #endif
62 case MSR_IA32_TSC:
63 env->tsc = entry->data;
64 break;
65 default:
66 printf("Warning unknown msr index 0x%x\n", entry->index);
67 return 1;
69 return 0;
72 #ifdef TARGET_X86_64
73 #define MSR_COUNT 9
74 #else
75 #define MSR_COUNT 5
76 #endif
78 static void set_v8086_seg(struct kvm_segment *lhs, const SegmentCache *rhs)
80 lhs->selector = rhs->selector;
81 lhs->base = rhs->base;
82 lhs->limit = rhs->limit;
83 lhs->type = 3;
84 lhs->present = 1;
85 lhs->dpl = 3;
86 lhs->db = 0;
87 lhs->s = 1;
88 lhs->l = 0;
89 lhs->g = 0;
90 lhs->avl = 0;
91 lhs->unusable = 0;
94 static void set_seg(struct kvm_segment *lhs, const SegmentCache *rhs)
96 unsigned flags = rhs->flags;
97 lhs->selector = rhs->selector;
98 lhs->base = rhs->base;
99 lhs->limit = rhs->limit;
100 lhs->type = (flags >> DESC_TYPE_SHIFT) & 15;
101 lhs->present = (flags & DESC_P_MASK) != 0;
102 lhs->dpl = rhs->selector & 3;
103 lhs->db = (flags >> DESC_B_SHIFT) & 1;
104 lhs->s = (flags & DESC_S_MASK) != 0;
105 lhs->l = (flags >> DESC_L_SHIFT) & 1;
106 lhs->g = (flags & DESC_G_MASK) != 0;
107 lhs->avl = (flags & DESC_AVL_MASK) != 0;
108 lhs->unusable = 0;
111 static void get_seg(SegmentCache *lhs, const struct kvm_segment *rhs)
113 lhs->selector = rhs->selector;
114 lhs->base = rhs->base;
115 lhs->limit = rhs->limit;
116 lhs->flags =
117 (rhs->type << DESC_TYPE_SHIFT)
118 | (rhs->present * DESC_P_MASK)
119 | (rhs->dpl << DESC_DPL_SHIFT)
120 | (rhs->db << DESC_B_SHIFT)
121 | (rhs->s * DESC_S_MASK)
122 | (rhs->l << DESC_L_SHIFT)
123 | (rhs->g * DESC_G_MASK)
124 | (rhs->avl * DESC_AVL_MASK);
127 static void load_regs(CPUState *env)
129 struct kvm_regs regs;
130 struct kvm_sregs sregs;
131 struct kvm_msr_entry msrs[MSR_COUNT];
132 int rc, n;
134 /* hack: save env */
135 if (!saved_env[0])
136 saved_env[0] = env;
138 regs.rax = env->regs[R_EAX];
139 regs.rbx = env->regs[R_EBX];
140 regs.rcx = env->regs[R_ECX];
141 regs.rdx = env->regs[R_EDX];
142 regs.rsi = env->regs[R_ESI];
143 regs.rdi = env->regs[R_EDI];
144 regs.rsp = env->regs[R_ESP];
145 regs.rbp = env->regs[R_EBP];
146 #ifdef TARGET_X86_64
147 regs.r8 = env->regs[8];
148 regs.r9 = env->regs[9];
149 regs.r10 = env->regs[10];
150 regs.r11 = env->regs[11];
151 regs.r12 = env->regs[12];
152 regs.r13 = env->regs[13];
153 regs.r14 = env->regs[14];
154 regs.r15 = env->regs[15];
155 #endif
157 regs.rflags = env->eflags;
158 regs.rip = env->eip;
160 kvm_set_regs(kvm_context, 0, &regs);
162 memcpy(sregs.interrupt_bitmap, env->kvm_interrupt_bitmap, sizeof(sregs.interrupt_bitmap));
164 if ((env->eflags & VM_MASK)) {
165 set_v8086_seg(&sregs.cs, &env->segs[R_CS]);
166 set_v8086_seg(&sregs.ds, &env->segs[R_DS]);
167 set_v8086_seg(&sregs.es, &env->segs[R_ES]);
168 set_v8086_seg(&sregs.fs, &env->segs[R_FS]);
169 set_v8086_seg(&sregs.gs, &env->segs[R_GS]);
170 set_v8086_seg(&sregs.ss, &env->segs[R_SS]);
171 } else {
172 set_seg(&sregs.cs, &env->segs[R_CS]);
173 set_seg(&sregs.ds, &env->segs[R_DS]);
174 set_seg(&sregs.es, &env->segs[R_ES]);
175 set_seg(&sregs.fs, &env->segs[R_FS]);
176 set_seg(&sregs.gs, &env->segs[R_GS]);
177 set_seg(&sregs.ss, &env->segs[R_SS]);
179 if (env->cr[0] & CR0_PE_MASK) {
180 /* force ss cpl to cs cpl */
181 sregs.ss.selector = (sregs.ss.selector & ~3) |
182 (sregs.cs.selector & 3);
183 sregs.ss.dpl = sregs.ss.selector & 3;
187 set_seg(&sregs.tr, &env->tr);
188 set_seg(&sregs.ldt, &env->ldt);
190 sregs.idt.limit = env->idt.limit;
191 sregs.idt.base = env->idt.base;
192 sregs.gdt.limit = env->gdt.limit;
193 sregs.gdt.base = env->gdt.base;
195 sregs.cr0 = env->cr[0];
196 sregs.cr2 = env->cr[2];
197 sregs.cr3 = env->cr[3];
198 sregs.cr4 = env->cr[4];
199 sregs.cr8 = cpu_get_apic_tpr(env);
200 sregs.apic_base = cpu_get_apic_base(env);
201 sregs.efer = env->efer;
203 kvm_set_sregs(kvm_context, 0, &sregs);
205 /* msrs */
206 n = 0;
207 set_msr_entry(&msrs[n++], MSR_IA32_SYSENTER_CS, env->sysenter_cs);
208 set_msr_entry(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp);
209 set_msr_entry(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip);
210 if (kvm_has_msr_star)
211 set_msr_entry(&msrs[n++], MSR_STAR, env->star);
212 set_msr_entry(&msrs[n++], MSR_IA32_TSC, env->tsc);
213 #ifdef TARGET_X86_64
214 set_msr_entry(&msrs[n++], MSR_CSTAR, env->cstar);
215 set_msr_entry(&msrs[n++], MSR_KERNELGSBASE, env->kernelgsbase);
216 set_msr_entry(&msrs[n++], MSR_FMASK, env->fmask);
217 set_msr_entry(&msrs[n++], MSR_LSTAR , env->lstar);
218 #endif
220 rc = kvm_set_msrs(kvm_context, 0, msrs, n);
221 if (rc == -1)
222 perror("kvm_set_msrs FAILED");
226 static void save_regs(CPUState *env)
228 struct kvm_regs regs;
229 struct kvm_sregs sregs;
230 struct kvm_msr_entry msrs[MSR_COUNT];
231 uint32_t hflags;
232 uint32_t i, n, rc;
234 kvm_get_regs(kvm_context, 0, &regs);
236 env->regs[R_EAX] = regs.rax;
237 env->regs[R_EBX] = regs.rbx;
238 env->regs[R_ECX] = regs.rcx;
239 env->regs[R_EDX] = regs.rdx;
240 env->regs[R_ESI] = regs.rsi;
241 env->regs[R_EDI] = regs.rdi;
242 env->regs[R_ESP] = regs.rsp;
243 env->regs[R_EBP] = regs.rbp;
244 #ifdef TARGET_X86_64
245 env->regs[8] = regs.r8;
246 env->regs[9] = regs.r9;
247 env->regs[10] = regs.r10;
248 env->regs[11] = regs.r11;
249 env->regs[12] = regs.r12;
250 env->regs[13] = regs.r13;
251 env->regs[14] = regs.r14;
252 env->regs[15] = regs.r15;
253 #endif
255 env->eflags = regs.rflags;
256 env->eip = regs.rip;
258 kvm_get_sregs(kvm_context, 0, &sregs);
260 memcpy(env->kvm_interrupt_bitmap, sregs.interrupt_bitmap, sizeof(env->kvm_interrupt_bitmap));
262 get_seg(&env->segs[R_CS], &sregs.cs);
263 get_seg(&env->segs[R_DS], &sregs.ds);
264 get_seg(&env->segs[R_ES], &sregs.es);
265 get_seg(&env->segs[R_FS], &sregs.fs);
266 get_seg(&env->segs[R_GS], &sregs.gs);
267 get_seg(&env->segs[R_SS], &sregs.ss);
269 get_seg(&env->tr, &sregs.tr);
270 get_seg(&env->ldt, &sregs.ldt);
272 env->idt.limit = sregs.idt.limit;
273 env->idt.base = sregs.idt.base;
274 env->gdt.limit = sregs.gdt.limit;
275 env->gdt.base = sregs.gdt.base;
277 env->cr[0] = sregs.cr0;
278 env->cr[2] = sregs.cr2;
279 env->cr[3] = sregs.cr3;
280 env->cr[4] = sregs.cr4;
282 cpu_set_apic_tpr(env, sregs.cr8);
283 cpu_set_apic_base(env, sregs.apic_base);
285 env->efer = sregs.efer;
287 #define HFLAG_COPY_MASK ~( \
288 HF_CPL_MASK | HF_PE_MASK | HF_MP_MASK | HF_EM_MASK | \
289 HF_TS_MASK | HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK | \
290 HF_OSFXSR_MASK | HF_LMA_MASK | HF_CS32_MASK | \
291 HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK)
295 hflags = (env->segs[R_CS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK;
296 hflags |= (env->cr[0] & CR0_PE_MASK) << (HF_PE_SHIFT - CR0_PE_SHIFT);
297 hflags |= (env->cr[0] << (HF_MP_SHIFT - CR0_MP_SHIFT)) &
298 (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK);
299 hflags |= (env->eflags & (HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK));
300 hflags |= (env->cr[4] & CR4_OSFXSR_MASK) <<
301 (HF_OSFXSR_SHIFT - CR4_OSFXSR_SHIFT);
303 if (env->efer & MSR_EFER_LMA) {
304 hflags |= HF_LMA_MASK;
307 if ((hflags & HF_LMA_MASK) && (env->segs[R_CS].flags & DESC_L_MASK)) {
308 hflags |= HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK;
309 } else {
310 hflags |= (env->segs[R_CS].flags & DESC_B_MASK) >>
311 (DESC_B_SHIFT - HF_CS32_SHIFT);
312 hflags |= (env->segs[R_SS].flags & DESC_B_MASK) >>
313 (DESC_B_SHIFT - HF_SS32_SHIFT);
314 if (!(env->cr[0] & CR0_PE_MASK) ||
315 (env->eflags & VM_MASK) ||
316 !(hflags & HF_CS32_MASK)) {
317 hflags |= HF_ADDSEG_MASK;
318 } else {
319 hflags |= ((env->segs[R_DS].base |
320 env->segs[R_ES].base |
321 env->segs[R_SS].base) != 0) <<
322 HF_ADDSEG_SHIFT;
325 env->hflags = (env->hflags & HFLAG_COPY_MASK) | hflags;
326 CC_SRC = env->eflags & (CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
327 DF = 1 - (2 * ((env->eflags >> 10) & 1));
328 CC_OP = CC_OP_EFLAGS;
329 env->eflags &= ~(DF_MASK | CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
331 tlb_flush(env, 1);
333 /* msrs */
334 n = 0;
335 msrs[n++].index = MSR_IA32_SYSENTER_CS;
336 msrs[n++].index = MSR_IA32_SYSENTER_ESP;
337 msrs[n++].index = MSR_IA32_SYSENTER_EIP;
338 if (kvm_has_msr_star)
339 msrs[n++].index = MSR_STAR;
340 msrs[n++].index = MSR_IA32_TSC;
341 #ifdef TARGET_X86_64
342 msrs[n++].index = MSR_CSTAR;
343 msrs[n++].index = MSR_KERNELGSBASE;
344 msrs[n++].index = MSR_FMASK;
345 msrs[n++].index = MSR_LSTAR;
346 #endif
347 rc = kvm_get_msrs(kvm_context, 0, msrs, n);
348 if (rc == -1) {
349 perror("kvm_get_msrs FAILED");
351 else {
352 n = rc; /* actual number of MSRs */
353 for (i=0 ; i<n; i++) {
354 if (get_msr_entry(&msrs[i], env))
355 return;
360 #include <signal.h>
363 static int try_push_interrupts(void *opaque)
365 CPUState **envs = opaque, *env;
366 env = envs[0];
368 if (env->ready_for_interrupt_injection &&
369 (env->interrupt_request & CPU_INTERRUPT_HARD) &&
370 (env->eflags & IF_MASK)) {
371 env->interrupt_request &= ~CPU_INTERRUPT_HARD;
372 // for now using cpu 0
373 kvm_inject_irq(kvm_context, 0, cpu_get_pic_interrupt(env));
376 return (env->interrupt_request & CPU_INTERRUPT_HARD) != 0;
379 static void post_kvm_run(void *opaque, struct kvm_run *kvm_run)
381 CPUState **envs = opaque, *env;
382 env = envs[0];
384 env->eflags = (kvm_run->if_flag) ? env->eflags | IF_MASK:env->eflags & ~IF_MASK;
385 env->ready_for_interrupt_injection = kvm_run->ready_for_interrupt_injection;
386 cpu_set_apic_tpr(env, kvm_run->cr8);
387 cpu_set_apic_base(env, kvm_run->apic_base);
390 void kvm_load_registers(CPUState *env)
392 load_regs(env);
395 void kvm_save_registers(CPUState *env)
397 save_regs(env);
400 int kvm_cpu_exec(CPUState *env)
402 int pending = (!env->ready_for_interrupt_injection ||
403 ((env->interrupt_request & CPU_INTERRUPT_HARD) &&
404 (env->eflags & IF_MASK)));
406 if (!pending && (env->interrupt_request & CPU_INTERRUPT_EXIT)) {
407 env->interrupt_request &= ~CPU_INTERRUPT_EXIT;
408 env->exception_index = EXCP_INTERRUPT;
409 cpu_loop_exit();
413 if (!saved_env[0])
414 saved_env[0] = env;
416 kvm_run(kvm_context, 0);
418 return 0;
422 static int kvm_cpuid(void *opaque, uint64_t *rax, uint64_t *rbx,
423 uint64_t *rcx, uint64_t *rdx)
425 CPUState **envs = opaque;
426 CPUState *saved_env;
427 uint32_t eax = *rax;
429 saved_env = env;
430 env = envs[0];
432 env->regs[R_EAX] = *rax;
433 env->regs[R_EBX] = *rbx;
434 env->regs[R_ECX] = *rcx;
435 env->regs[R_EDX] = *rdx;
436 helper_cpuid();
437 *rdx = env->regs[R_EDX];
438 *rcx = env->regs[R_ECX];
439 *rbx = env->regs[R_EBX];
440 *rax = env->regs[R_EAX];
441 // don't report long mode/syscall/nx if no native support
442 if (eax == 0x80000001) {
443 unsigned long h_eax = eax, h_edx;
446 // push/pop hack to workaround gcc 3 register pressure trouble
447 asm (
448 #ifdef __x86_64__
449 "push %%rbx; push %%rcx; cpuid; pop %%rcx; pop %%rbx"
450 #else
451 "push %%ebx; push %%ecx; cpuid; pop %%ecx; pop %%ebx"
452 #endif
453 : "+a"(h_eax), "=d"(h_edx));
455 // long mode
456 if ((h_edx & 0x20000000) == 0)
457 *rdx &= ~0x20000000ull;
458 // syscall
459 if ((h_edx & 0x00000800) == 0)
460 *rdx &= ~0x00000800ull;
461 // nx
462 if ((h_edx & 0x00100000) == 0)
463 *rdx &= ~0x00100000ull;
465 env = saved_env;
466 return 0;
469 static int kvm_debug(void *opaque, int vcpu)
471 CPUState **envs = opaque;
473 env = envs[0];
474 env->exception_index = EXCP_DEBUG;
475 return 1;
478 static int kvm_inb(void *opaque, uint16_t addr, uint8_t *data)
480 *data = cpu_inb(0, addr);
481 return 0;
484 static int kvm_inw(void *opaque, uint16_t addr, uint16_t *data)
486 *data = cpu_inw(0, addr);
487 return 0;
490 static int kvm_inl(void *opaque, uint16_t addr, uint32_t *data)
492 *data = cpu_inl(0, addr);
493 return 0;
496 static int kvm_outb(void *opaque, uint16_t addr, uint8_t data)
498 cpu_outb(0, addr, data);
499 return 0;
502 static int kvm_outw(void *opaque, uint16_t addr, uint16_t data)
504 cpu_outw(0, addr, data);
505 return 0;
508 static int kvm_outl(void *opaque, uint16_t addr, uint32_t data)
510 cpu_outl(0, addr, data);
511 return 0;
514 static int kvm_readb(void *opaque, uint64_t addr, uint8_t *data)
516 *data = ldub_phys(addr);
517 return 0;
520 static int kvm_readw(void *opaque, uint64_t addr, uint16_t *data)
522 *data = lduw_phys(addr);
523 return 0;
526 static int kvm_readl(void *opaque, uint64_t addr, uint32_t *data)
528 *data = ldl_phys(addr);
529 return 0;
532 static int kvm_readq(void *opaque, uint64_t addr, uint64_t *data)
534 *data = ldq_phys(addr);
535 return 0;
538 static int kvm_writeb(void *opaque, uint64_t addr, uint8_t data)
540 stb_phys(addr, data);
541 return 0;
544 static int kvm_writew(void *opaque, uint64_t addr, uint16_t data)
546 stw_phys(addr, data);
547 return 0;
550 static int kvm_writel(void *opaque, uint64_t addr, uint32_t data)
552 stl_phys(addr, data);
553 return 0;
556 static int kvm_writeq(void *opaque, uint64_t addr, uint64_t data)
558 stq_phys(addr, data);
559 return 0;
562 static int kvm_io_window(void *opaque)
564 return 1;
568 static int kvm_halt(void *opaque, int vcpu)
570 CPUState **envs = opaque, *env;
572 env = envs[0];
573 if (!((env->interrupt_request & CPU_INTERRUPT_HARD) &&
574 (env->eflags & IF_MASK))) {
575 env->hflags |= HF_HALTED_MASK;
576 env->exception_index = EXCP_HLT;
579 return 1;
582 static struct kvm_callbacks qemu_kvm_ops = {
583 .cpuid = kvm_cpuid,
584 .debug = kvm_debug,
585 .inb = kvm_inb,
586 .inw = kvm_inw,
587 .inl = kvm_inl,
588 .outb = kvm_outb,
589 .outw = kvm_outw,
590 .outl = kvm_outl,
591 .readb = kvm_readb,
592 .readw = kvm_readw,
593 .readl = kvm_readl,
594 .readq = kvm_readq,
595 .writeb = kvm_writeb,
596 .writew = kvm_writew,
597 .writel = kvm_writel,
598 .writeq = kvm_writeq,
599 .halt = kvm_halt,
600 .io_window = kvm_io_window,
601 .try_push_interrupts = try_push_interrupts,
602 .post_kvm_run = post_kvm_run,
605 int kvm_qemu_init()
607 /* Try to initialize kvm */
608 kvm_context = kvm_init(&qemu_kvm_ops, saved_env);
609 if (!kvm_context) {
610 return -1;
613 return 0;
616 int kvm_qemu_create_context(void)
618 int i;
620 if (kvm_create(kvm_context, phys_ram_size, (void**)&phys_ram_base) < 0) {
621 kvm_qemu_destroy();
622 return -1;
624 kvm_msr_list = kvm_get_msr_list(kvm_context);
625 if (!kvm_msr_list) {
626 kvm_qemu_destroy();
627 return -1;
629 for (i = 0; i < kvm_msr_list->nmsrs; ++i)
630 if (kvm_msr_list->indices[i] == MSR_STAR)
631 kvm_has_msr_star = 1;
632 return 0;
635 void kvm_qemu_destroy(void)
637 kvm_finalize(kvm_context);
640 int kvm_update_debugger(CPUState *env)
642 struct kvm_debug_guest dbg;
643 int i;
645 dbg.enabled = 0;
646 if (env->nb_breakpoints || env->singlestep_enabled) {
647 dbg.enabled = 1;
648 for (i = 0; i < 4 && i < env->nb_breakpoints; ++i) {
649 dbg.breakpoints[i].enabled = 1;
650 dbg.breakpoints[i].address = env->breakpoints[i];
652 dbg.singlestep = env->singlestep_enabled;
654 return kvm_guest_debug(kvm_context, 0, &dbg);
658 #endif