qemu-kvm: Use upstream kvm_arch_get/put_registers
[qemu-kvm.git] / qemu-kvm-x86.c
blobf7cd30da2862ad0433f0e8cc7e48eccff65c9a90
1 /*
2 * qemu/kvm integration, x86 specific code
4 * Copyright (C) 2006-2008 Qumranet Technologies
6 * Licensed under the terms of the GNU GPL version 2 or higher.
7 */
9 #include "config.h"
10 #include "config-host.h"
12 #include <string.h>
13 #include "hw/hw.h"
14 #include "gdbstub.h"
15 #include <sys/io.h>
17 #include "qemu-kvm.h"
18 #include "libkvm.h"
19 #include <pthread.h>
20 #include <sys/utsname.h>
21 #include <linux/kvm_para.h>
22 #include <sys/ioctl.h>
24 #include "kvm.h"
25 #include "hw/apic.h"
27 #define MSR_IA32_TSC 0x10
29 extern unsigned int kvm_shadow_memory;
31 int kvm_set_tss_addr(kvm_context_t kvm, unsigned long addr)
33 int r;
35 r = kvm_vm_ioctl(kvm_state, KVM_SET_TSS_ADDR, addr);
36 if (r < 0) {
37 fprintf(stderr, "kvm_set_tss_addr: %m\n");
38 return r;
40 return 0;
43 static int kvm_init_tss(kvm_context_t kvm)
45 int r;
47 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_SET_TSS_ADDR);
48 if (r > 0) {
50 * this address is 3 pages before the bios, and the bios should present
51 * as unavaible memory
53 r = kvm_set_tss_addr(kvm, 0xfeffd000);
54 if (r < 0) {
55 fprintf(stderr, "kvm_init_tss: unable to set tss addr\n");
56 return r;
58 } else {
59 fprintf(stderr, "kvm does not support KVM_CAP_SET_TSS_ADDR\n");
61 return 0;
64 static int kvm_set_identity_map_addr(kvm_context_t kvm, uint64_t addr)
66 #ifdef KVM_CAP_SET_IDENTITY_MAP_ADDR
67 int r;
69 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_SET_IDENTITY_MAP_ADDR);
70 if (r > 0) {
71 r = kvm_vm_ioctl(kvm_state, KVM_SET_IDENTITY_MAP_ADDR, &addr);
72 if (r == -1) {
73 fprintf(stderr, "kvm_set_identity_map_addr: %m\n");
74 return -errno;
76 return 0;
78 #endif
79 return -ENOSYS;
82 static int kvm_init_identity_map_page(kvm_context_t kvm)
84 #ifdef KVM_CAP_SET_IDENTITY_MAP_ADDR
85 int r;
87 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_SET_IDENTITY_MAP_ADDR);
88 if (r > 0) {
90 * this address is 4 pages before the bios, and the bios should present
91 * as unavaible memory
93 r = kvm_set_identity_map_addr(kvm, 0xfeffc000);
94 if (r < 0) {
95 fprintf(stderr, "kvm_init_identity_map_page: "
96 "unable to set identity mapping addr\n");
97 return r;
100 #endif
101 return 0;
104 static int kvm_create_pit(kvm_context_t kvm)
106 #ifdef KVM_CAP_PIT
107 int r;
109 kvm_state->pit_in_kernel = 0;
110 if (!kvm->no_pit_creation) {
111 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_PIT);
112 if (r > 0) {
113 r = kvm_vm_ioctl(kvm_state, KVM_CREATE_PIT);
114 if (r >= 0) {
115 kvm_state->pit_in_kernel = 1;
116 } else {
117 fprintf(stderr, "Create kernel PIC irqchip failed\n");
118 return r;
122 #endif
123 return 0;
126 int kvm_arch_create(kvm_context_t kvm, unsigned long phys_mem_bytes,
127 void **vm_mem)
129 int r = 0;
131 r = kvm_init_tss(kvm);
132 if (r < 0) {
133 return r;
136 r = kvm_init_identity_map_page(kvm);
137 if (r < 0) {
138 return r;
142 * Tell fw_cfg to notify the BIOS to reserve the range.
144 if (e820_add_entry(0xfeffc000, 0x4000, E820_RESERVED) < 0) {
145 perror("e820_add_entry() table is full");
146 exit(1);
149 r = kvm_create_pit(kvm);
150 if (r < 0) {
151 return r;
154 r = kvm_init_coalesced_mmio(kvm);
155 if (r < 0) {
156 return r;
159 return 0;
162 #ifdef KVM_EXIT_TPR_ACCESS
164 static int kvm_handle_tpr_access(CPUState *env)
166 struct kvm_run *run = env->kvm_run;
167 kvm_tpr_access_report(env,
168 run->tpr_access.rip,
169 run->tpr_access.is_write);
170 return 0;
174 int kvm_enable_vapic(CPUState *env, uint64_t vapic)
176 struct kvm_vapic_addr va = {
177 .vapic_addr = vapic,
180 return kvm_vcpu_ioctl(env, KVM_SET_VAPIC_ADDR, &va);
183 #endif
185 extern CPUState *kvm_debug_cpu_requested;
187 int kvm_arch_run(CPUState *env)
189 int r = 0;
190 struct kvm_run *run = env->kvm_run;
192 switch (run->exit_reason) {
193 #ifdef KVM_EXIT_SET_TPR
194 case KVM_EXIT_SET_TPR:
195 break;
196 #endif
197 #ifdef KVM_EXIT_TPR_ACCESS
198 case KVM_EXIT_TPR_ACCESS:
199 r = kvm_handle_tpr_access(env);
200 break;
201 #endif
202 #ifdef KVM_CAP_SET_GUEST_DEBUG
203 case KVM_EXIT_DEBUG:
204 DPRINTF("kvm_exit_debug\n");
205 r = kvm_handle_debug(&run->debug.arch);
206 if (r == EXCP_DEBUG) {
207 kvm_debug_cpu_requested = env;
208 env->stopped = 1;
210 break;
211 #endif /* KVM_CAP_SET_GUEST_DEBUG */
212 default:
213 r = -1;
214 break;
217 return r;
220 #ifdef KVM_CAP_IRQCHIP
222 int kvm_get_lapic(CPUState *env, struct kvm_lapic_state *s)
224 int r = 0;
226 if (!kvm_irqchip_in_kernel()) {
227 return r;
230 r = kvm_vcpu_ioctl(env, KVM_GET_LAPIC, s);
231 if (r < 0) {
232 fprintf(stderr, "KVM_GET_LAPIC failed\n");
234 return r;
237 int kvm_set_lapic(CPUState *env, struct kvm_lapic_state *s)
239 int r = 0;
241 if (!kvm_irqchip_in_kernel()) {
242 return 0;
245 r = kvm_vcpu_ioctl(env, KVM_SET_LAPIC, s);
247 if (r < 0) {
248 fprintf(stderr, "KVM_SET_LAPIC failed\n");
250 return r;
253 #endif
255 #ifdef KVM_CAP_PIT
257 int kvm_get_pit(kvm_context_t kvm, struct kvm_pit_state *s)
259 if (!kvm_pit_in_kernel()) {
260 return 0;
262 return kvm_vm_ioctl(kvm_state, KVM_GET_PIT, s);
265 int kvm_set_pit(kvm_context_t kvm, struct kvm_pit_state *s)
267 if (!kvm_pit_in_kernel()) {
268 return 0;
270 return kvm_vm_ioctl(kvm_state, KVM_SET_PIT, s);
273 #ifdef KVM_CAP_PIT_STATE2
274 int kvm_get_pit2(kvm_context_t kvm, struct kvm_pit_state2 *ps2)
276 if (!kvm_pit_in_kernel()) {
277 return 0;
279 return kvm_vm_ioctl(kvm_state, KVM_GET_PIT2, ps2);
282 int kvm_set_pit2(kvm_context_t kvm, struct kvm_pit_state2 *ps2)
284 if (!kvm_pit_in_kernel()) {
285 return 0;
287 return kvm_vm_ioctl(kvm_state, KVM_SET_PIT2, ps2);
290 #endif
291 #endif
293 int kvm_has_pit_state2(kvm_context_t kvm)
295 int r = 0;
297 #ifdef KVM_CAP_PIT_STATE2
298 r = kvm_check_extension(kvm_state, KVM_CAP_PIT_STATE2);
299 #endif
300 return r;
303 void kvm_show_code(CPUState *env)
305 #define SHOW_CODE_LEN 50
306 struct kvm_regs regs;
307 struct kvm_sregs sregs;
308 int r, n;
309 int back_offset;
310 unsigned char code;
311 char code_str[SHOW_CODE_LEN * 3 + 1];
312 unsigned long rip;
314 r = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
315 if (r < 0 ) {
316 perror("KVM_GET_SREGS");
317 return;
319 r = kvm_vcpu_ioctl(env, KVM_GET_REGS, &regs);
320 if (r < 0) {
321 perror("KVM_GET_REGS");
322 return;
324 rip = sregs.cs.base + regs.rip;
325 back_offset = regs.rip;
326 if (back_offset > 20) {
327 back_offset = 20;
329 *code_str = 0;
330 for (n = -back_offset; n < SHOW_CODE_LEN-back_offset; ++n) {
331 if (n == 0) {
332 strcat(code_str, " -->");
334 cpu_physical_memory_rw(rip + n, &code, 1, 1);
335 sprintf(code_str + strlen(code_str), " %02x", code);
337 fprintf(stderr, "code:%s\n", code_str);
340 static void print_seg(FILE *file, const char *name, struct kvm_segment *seg)
342 fprintf(stderr,
343 "%s %04x (%08llx/%08x p %d dpl %d db %d s %d type %x l %d"
344 " g %d avl %d)\n",
345 name, seg->selector, seg->base, seg->limit, seg->present,
346 seg->dpl, seg->db, seg->s, seg->type, seg->l, seg->g,
347 seg->avl);
350 static void print_dt(FILE *file, const char *name, struct kvm_dtable *dt)
352 fprintf(stderr, "%s %llx/%x\n", name, dt->base, dt->limit);
355 void kvm_show_regs(CPUState *env)
357 struct kvm_regs regs;
358 struct kvm_sregs sregs;
359 int r;
361 r = kvm_vcpu_ioctl(env, KVM_GET_REGS, &regs);
362 if (r < 0) {
363 perror("KVM_GET_REGS");
364 return;
366 fprintf(stderr,
367 "rax %016llx rbx %016llx rcx %016llx rdx %016llx\n"
368 "rsi %016llx rdi %016llx rsp %016llx rbp %016llx\n"
369 "r8 %016llx r9 %016llx r10 %016llx r11 %016llx\n"
370 "r12 %016llx r13 %016llx r14 %016llx r15 %016llx\n"
371 "rip %016llx rflags %08llx\n",
372 regs.rax, regs.rbx, regs.rcx, regs.rdx,
373 regs.rsi, regs.rdi, regs.rsp, regs.rbp,
374 regs.r8, regs.r9, regs.r10, regs.r11,
375 regs.r12, regs.r13, regs.r14, regs.r15,
376 regs.rip, regs.rflags);
377 r = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
378 if (r < 0) {
379 perror("KVM_GET_SREGS");
380 return;
382 print_seg(stderr, "cs", &sregs.cs);
383 print_seg(stderr, "ds", &sregs.ds);
384 print_seg(stderr, "es", &sregs.es);
385 print_seg(stderr, "ss", &sregs.ss);
386 print_seg(stderr, "fs", &sregs.fs);
387 print_seg(stderr, "gs", &sregs.gs);
388 print_seg(stderr, "tr", &sregs.tr);
389 print_seg(stderr, "ldt", &sregs.ldt);
390 print_dt(stderr, "gdt", &sregs.gdt);
391 print_dt(stderr, "idt", &sregs.idt);
392 fprintf(stderr, "cr0 %llx cr2 %llx cr3 %llx cr4 %llx cr8 %llx"
393 " efer %llx\n",
394 sregs.cr0, sregs.cr2, sregs.cr3, sregs.cr4, sregs.cr8,
395 sregs.efer);
398 static void kvm_set_cr8(CPUState *env, uint64_t cr8)
400 env->kvm_run->cr8 = cr8;
403 int kvm_set_shadow_pages(kvm_context_t kvm, unsigned int nrshadow_pages)
405 #ifdef KVM_CAP_MMU_SHADOW_CACHE_CONTROL
406 int r;
408 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION,
409 KVM_CAP_MMU_SHADOW_CACHE_CONTROL);
410 if (r > 0) {
411 r = kvm_vm_ioctl(kvm_state, KVM_SET_NR_MMU_PAGES, nrshadow_pages);
412 if (r < 0) {
413 fprintf(stderr, "kvm_set_shadow_pages: %m\n");
414 return r;
416 return 0;
418 #endif
419 return -1;
422 int kvm_get_shadow_pages(kvm_context_t kvm, unsigned int *nrshadow_pages)
424 #ifdef KVM_CAP_MMU_SHADOW_CACHE_CONTROL
425 int r;
427 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION,
428 KVM_CAP_MMU_SHADOW_CACHE_CONTROL);
429 if (r > 0) {
430 *nrshadow_pages = kvm_vm_ioctl(kvm_state, KVM_GET_NR_MMU_PAGES);
431 return 0;
433 #endif
434 return -1;
437 #ifdef KVM_CAP_VAPIC
438 static int kvm_enable_tpr_access_reporting(CPUState *env)
440 int r;
441 struct kvm_tpr_access_ctl tac = { .enabled = 1 };
443 r = kvm_ioctl(env->kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_VAPIC);
444 if (r <= 0) {
445 return -ENOSYS;
447 return kvm_vcpu_ioctl(env, KVM_TPR_ACCESS_REPORTING, &tac);
449 #endif
451 int kvm_arch_qemu_create_context(void)
453 int r;
454 struct utsname utsname;
456 uname(&utsname);
457 lm_capable_kernel = strcmp(utsname.machine, "x86_64") == 0;
459 if (kvm_shadow_memory) {
460 kvm_set_shadow_pages(kvm_context, kvm_shadow_memory);
463 /* initialize has_msr_star/has_msr_hsave_pa */
464 r = kvm_get_supported_msrs(kvm_state);
465 if (r < 0) {
466 return r;
469 r = kvm_set_boot_cpu_id(0);
470 if (r < 0 && r != -ENOSYS) {
471 return r;
474 return 0;
477 static int _kvm_arch_init_vcpu(CPUState *env)
479 kvm_arch_reset_vcpu(env);
481 #ifdef KVM_EXIT_TPR_ACCESS
482 kvm_enable_tpr_access_reporting(env);
483 #endif
484 return 0;
487 int kvm_arch_halt(CPUState *env)
490 if (!((env->interrupt_request & CPU_INTERRUPT_HARD) &&
491 (env->eflags & IF_MASK)) &&
492 !(env->interrupt_request & CPU_INTERRUPT_NMI)) {
493 env->halted = 1;
495 return 1;
498 void kvm_arch_pre_run(CPUState *env, struct kvm_run *run)
500 if (!kvm_irqchip_in_kernel()) {
501 kvm_set_cr8(env, cpu_get_apic_tpr(env->apic_state));
505 int kvm_arch_has_work(CPUState *env)
507 if (((env->interrupt_request & CPU_INTERRUPT_HARD) &&
508 (env->eflags & IF_MASK)) ||
509 (env->interrupt_request & CPU_INTERRUPT_NMI)) {
510 return 1;
512 return 0;
515 int kvm_arch_try_push_interrupts(void *opaque)
517 CPUState *env = cpu_single_env;
518 int r, irq;
520 if (kvm_is_ready_for_interrupt_injection(env) &&
521 (env->interrupt_request & CPU_INTERRUPT_HARD) &&
522 (env->eflags & IF_MASK)) {
523 env->interrupt_request &= ~CPU_INTERRUPT_HARD;
524 irq = cpu_get_pic_interrupt(env);
525 if (irq >= 0) {
526 r = kvm_inject_irq(env, irq);
527 if (r < 0) {
528 printf("cpu %d fail inject %x\n", env->cpu_index, irq);
533 return (env->interrupt_request & CPU_INTERRUPT_HARD) != 0;
536 #ifdef KVM_CAP_USER_NMI
537 void kvm_arch_push_nmi(void *opaque)
539 CPUState *env = cpu_single_env;
540 int r;
542 if (likely(!(env->interrupt_request & CPU_INTERRUPT_NMI))) {
543 return;
546 env->interrupt_request &= ~CPU_INTERRUPT_NMI;
547 r = kvm_inject_nmi(env);
548 if (r < 0) {
549 printf("cpu %d fail inject NMI\n", env->cpu_index);
552 #endif /* KVM_CAP_USER_NMI */
554 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
555 void kvm_arch_do_ioperm(void *_data)
557 struct ioperm_data *data = _data;
558 ioperm(data->start_port, data->num, data->turn_on);
560 #endif
563 * Setup x86 specific IRQ routing
565 int kvm_arch_init_irq_routing(void)
567 int i, r;
569 if (kvm_irqchip && kvm_has_gsi_routing()) {
570 kvm_clear_gsi_routes();
571 for (i = 0; i < 8; ++i) {
572 if (i == 2) {
573 continue;
575 r = kvm_add_irq_route(i, KVM_IRQCHIP_PIC_MASTER, i);
576 if (r < 0) {
577 return r;
580 for (i = 8; i < 16; ++i) {
581 r = kvm_add_irq_route(i, KVM_IRQCHIP_PIC_SLAVE, i - 8);
582 if (r < 0) {
583 return r;
586 for (i = 0; i < 24; ++i) {
587 if (i == 0 && irq0override) {
588 r = kvm_add_irq_route(i, KVM_IRQCHIP_IOAPIC, 2);
589 } else if (i != 2 || !irq0override) {
590 r = kvm_add_irq_route(i, KVM_IRQCHIP_IOAPIC, i);
592 if (r < 0) {
593 return r;
596 kvm_commit_irq_routes();
598 return 0;
601 void kvm_arch_process_irqchip_events(CPUState *env)
603 if (env->interrupt_request & CPU_INTERRUPT_INIT) {
604 kvm_cpu_synchronize_state(env);
605 do_cpu_init(env);
607 if (env->interrupt_request & CPU_INTERRUPT_SIPI) {
608 kvm_cpu_synchronize_state(env);
609 do_cpu_sipi(env);
613 int kvm_arch_process_async_events(CPUState *env)
615 if (env->interrupt_request & CPU_INTERRUPT_MCE) {
616 /* We must not raise CPU_INTERRUPT_MCE if it's not supported. */
617 assert(env->mcg_cap);
619 env->interrupt_request &= ~CPU_INTERRUPT_MCE;
621 kvm_cpu_synchronize_state(env);
623 if (env->exception_injected == EXCP08_DBLE) {
624 /* this means triple fault */
625 qemu_system_reset_request();
626 env->exit_request = 1;
627 return 0;
629 env->exception_injected = EXCP12_MCHK;
630 env->has_error_code = 0;
632 env->halted = 0;
633 if (kvm_irqchip_in_kernel() && env->mp_state == KVM_MP_STATE_HALTED) {
634 env->mp_state = KVM_MP_STATE_RUNNABLE;
637 return 0;