2 * Copyright (C) 2009. SUSE Linux Products GmbH. All rights reserved.
5 * Alexander Graf <agraf@suse.de>
6 * Kevin Wolf <mail@kevin-wolf.de>
9 * This file is derived from arch/powerpc/kvm/44x.c,
10 * by Hollis Blanchard <hollisb@us.ibm.com>.
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License, version 2, as
14 * published by the Free Software Foundation.
17 #include <linux/kvm_host.h>
18 #include <linux/err.h>
21 #include <asm/cputable.h>
22 #include <asm/cacheflush.h>
23 #include <asm/tlbflush.h>
24 #include <asm/uaccess.h>
26 #include <asm/kvm_ppc.h>
27 #include <asm/kvm_book3s.h>
28 #include <asm/mmu_context.h>
29 #include <linux/gfp.h>
30 #include <linux/sched.h>
31 #include <linux/vmalloc.h>
33 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
35 /* #define EXIT_DEBUG */
36 /* #define EXIT_DEBUG_SIMPLE */
37 /* #define DEBUG_EXT */
39 static void kvmppc_giveup_ext(struct kvm_vcpu
*vcpu
, ulong msr
);
41 struct kvm_stats_debugfs_item debugfs_entries
[] = {
42 { "exits", VCPU_STAT(sum_exits
) },
43 { "mmio", VCPU_STAT(mmio_exits
) },
44 { "sig", VCPU_STAT(signal_exits
) },
45 { "sysc", VCPU_STAT(syscall_exits
) },
46 { "inst_emu", VCPU_STAT(emulated_inst_exits
) },
47 { "dec", VCPU_STAT(dec_exits
) },
48 { "ext_intr", VCPU_STAT(ext_intr_exits
) },
49 { "queue_intr", VCPU_STAT(queue_intr
) },
50 { "halt_wakeup", VCPU_STAT(halt_wakeup
) },
51 { "pf_storage", VCPU_STAT(pf_storage
) },
52 { "sp_storage", VCPU_STAT(sp_storage
) },
53 { "pf_instruc", VCPU_STAT(pf_instruc
) },
54 { "sp_instruc", VCPU_STAT(sp_instruc
) },
55 { "ld", VCPU_STAT(ld
) },
56 { "ld_slow", VCPU_STAT(ld_slow
) },
57 { "st", VCPU_STAT(st
) },
58 { "st_slow", VCPU_STAT(st_slow
) },
62 void kvmppc_core_load_host_debugstate(struct kvm_vcpu
*vcpu
)
66 void kvmppc_core_load_guest_debugstate(struct kvm_vcpu
*vcpu
)
70 void kvmppc_core_vcpu_load(struct kvm_vcpu
*vcpu
, int cpu
)
72 memcpy(get_paca()->kvm_slb
, to_book3s(vcpu
)->slb_shadow
, sizeof(get_paca()->kvm_slb
));
73 memcpy(&get_paca()->shadow_vcpu
, &to_book3s(vcpu
)->shadow_vcpu
,
74 sizeof(get_paca()->shadow_vcpu
));
75 get_paca()->kvm_slb_max
= to_book3s(vcpu
)->slb_shadow_max
;
78 void kvmppc_core_vcpu_put(struct kvm_vcpu
*vcpu
)
80 memcpy(to_book3s(vcpu
)->slb_shadow
, get_paca()->kvm_slb
, sizeof(get_paca()->kvm_slb
));
81 memcpy(&to_book3s(vcpu
)->shadow_vcpu
, &get_paca()->shadow_vcpu
,
82 sizeof(get_paca()->shadow_vcpu
));
83 to_book3s(vcpu
)->slb_shadow_max
= get_paca()->kvm_slb_max
;
85 kvmppc_giveup_ext(vcpu
, MSR_FP
);
86 kvmppc_giveup_ext(vcpu
, MSR_VEC
);
87 kvmppc_giveup_ext(vcpu
, MSR_VSX
);
90 #if defined(EXIT_DEBUG)
91 static u32
kvmppc_get_dec(struct kvm_vcpu
*vcpu
)
93 u64 jd
= mftb() - vcpu
->arch
.dec_jiffies
;
94 return vcpu
->arch
.dec
- jd
;
98 static void kvmppc_recalc_shadow_msr(struct kvm_vcpu
*vcpu
)
100 vcpu
->arch
.shadow_msr
= vcpu
->arch
.msr
;
101 /* Guest MSR values */
102 vcpu
->arch
.shadow_msr
&= MSR_FE0
| MSR_FE1
| MSR_SF
| MSR_SE
|
104 /* Process MSR values */
105 vcpu
->arch
.shadow_msr
|= MSR_ME
| MSR_RI
| MSR_IR
| MSR_DR
| MSR_PR
|
107 /* External providers the guest reserved */
108 vcpu
->arch
.shadow_msr
|= (vcpu
->arch
.msr
& vcpu
->arch
.guest_owned_ext
);
109 /* 64-bit Process MSR values */
110 #ifdef CONFIG_PPC_BOOK3S_64
111 vcpu
->arch
.shadow_msr
|= MSR_ISF
| MSR_HV
;
115 void kvmppc_set_msr(struct kvm_vcpu
*vcpu
, u64 msr
)
117 ulong old_msr
= vcpu
->arch
.msr
;
120 printk(KERN_INFO
"KVM: Set MSR to 0x%llx\n", msr
);
123 msr
&= to_book3s(vcpu
)->msr_mask
;
124 vcpu
->arch
.msr
= msr
;
125 kvmppc_recalc_shadow_msr(vcpu
);
127 if (msr
& (MSR_WE
|MSR_POW
)) {
128 if (!vcpu
->arch
.pending_exceptions
) {
129 kvm_vcpu_block(vcpu
);
130 vcpu
->stat
.halt_wakeup
++;
134 if (((vcpu
->arch
.msr
& (MSR_IR
|MSR_DR
)) != (old_msr
& (MSR_IR
|MSR_DR
))) ||
135 (vcpu
->arch
.msr
& MSR_PR
) != (old_msr
& MSR_PR
)) {
136 kvmppc_mmu_flush_segments(vcpu
);
137 kvmppc_mmu_map_segment(vcpu
, vcpu
->arch
.pc
);
141 void kvmppc_inject_interrupt(struct kvm_vcpu
*vcpu
, int vec
, u64 flags
)
143 vcpu
->arch
.srr0
= vcpu
->arch
.pc
;
144 vcpu
->arch
.srr1
= vcpu
->arch
.msr
| flags
;
145 vcpu
->arch
.pc
= to_book3s(vcpu
)->hior
+ vec
;
146 vcpu
->arch
.mmu
.reset_msr(vcpu
);
149 static int kvmppc_book3s_vec2irqprio(unsigned int vec
)
154 case 0x100: prio
= BOOK3S_IRQPRIO_SYSTEM_RESET
; break;
155 case 0x200: prio
= BOOK3S_IRQPRIO_MACHINE_CHECK
; break;
156 case 0x300: prio
= BOOK3S_IRQPRIO_DATA_STORAGE
; break;
157 case 0x380: prio
= BOOK3S_IRQPRIO_DATA_SEGMENT
; break;
158 case 0x400: prio
= BOOK3S_IRQPRIO_INST_STORAGE
; break;
159 case 0x480: prio
= BOOK3S_IRQPRIO_INST_SEGMENT
; break;
160 case 0x500: prio
= BOOK3S_IRQPRIO_EXTERNAL
; break;
161 case 0x600: prio
= BOOK3S_IRQPRIO_ALIGNMENT
; break;
162 case 0x700: prio
= BOOK3S_IRQPRIO_PROGRAM
; break;
163 case 0x800: prio
= BOOK3S_IRQPRIO_FP_UNAVAIL
; break;
164 case 0x900: prio
= BOOK3S_IRQPRIO_DECREMENTER
; break;
165 case 0xc00: prio
= BOOK3S_IRQPRIO_SYSCALL
; break;
166 case 0xd00: prio
= BOOK3S_IRQPRIO_DEBUG
; break;
167 case 0xf20: prio
= BOOK3S_IRQPRIO_ALTIVEC
; break;
168 case 0xf40: prio
= BOOK3S_IRQPRIO_VSX
; break;
169 default: prio
= BOOK3S_IRQPRIO_MAX
; break;
175 static void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu
*vcpu
,
178 clear_bit(kvmppc_book3s_vec2irqprio(vec
),
179 &vcpu
->arch
.pending_exceptions
);
182 void kvmppc_book3s_queue_irqprio(struct kvm_vcpu
*vcpu
, unsigned int vec
)
184 vcpu
->stat
.queue_intr
++;
186 set_bit(kvmppc_book3s_vec2irqprio(vec
),
187 &vcpu
->arch
.pending_exceptions
);
189 printk(KERN_INFO
"Queueing interrupt %x\n", vec
);
194 void kvmppc_core_queue_program(struct kvm_vcpu
*vcpu
, ulong flags
)
196 to_book3s(vcpu
)->prog_flags
= flags
;
197 kvmppc_book3s_queue_irqprio(vcpu
, BOOK3S_INTERRUPT_PROGRAM
);
200 void kvmppc_core_queue_dec(struct kvm_vcpu
*vcpu
)
202 kvmppc_book3s_queue_irqprio(vcpu
, BOOK3S_INTERRUPT_DECREMENTER
);
205 int kvmppc_core_pending_dec(struct kvm_vcpu
*vcpu
)
207 return test_bit(BOOK3S_INTERRUPT_DECREMENTER
>> 7, &vcpu
->arch
.pending_exceptions
);
210 void kvmppc_core_dequeue_dec(struct kvm_vcpu
*vcpu
)
212 kvmppc_book3s_dequeue_irqprio(vcpu
, BOOK3S_INTERRUPT_DECREMENTER
);
215 void kvmppc_core_queue_external(struct kvm_vcpu
*vcpu
,
216 struct kvm_interrupt
*irq
)
218 kvmppc_book3s_queue_irqprio(vcpu
, BOOK3S_INTERRUPT_EXTERNAL
);
221 int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu
*vcpu
, unsigned int priority
)
228 case BOOK3S_IRQPRIO_DECREMENTER
:
229 deliver
= vcpu
->arch
.msr
& MSR_EE
;
230 vec
= BOOK3S_INTERRUPT_DECREMENTER
;
232 case BOOK3S_IRQPRIO_EXTERNAL
:
233 deliver
= vcpu
->arch
.msr
& MSR_EE
;
234 vec
= BOOK3S_INTERRUPT_EXTERNAL
;
236 case BOOK3S_IRQPRIO_SYSTEM_RESET
:
237 vec
= BOOK3S_INTERRUPT_SYSTEM_RESET
;
239 case BOOK3S_IRQPRIO_MACHINE_CHECK
:
240 vec
= BOOK3S_INTERRUPT_MACHINE_CHECK
;
242 case BOOK3S_IRQPRIO_DATA_STORAGE
:
243 vec
= BOOK3S_INTERRUPT_DATA_STORAGE
;
245 case BOOK3S_IRQPRIO_INST_STORAGE
:
246 vec
= BOOK3S_INTERRUPT_INST_STORAGE
;
248 case BOOK3S_IRQPRIO_DATA_SEGMENT
:
249 vec
= BOOK3S_INTERRUPT_DATA_SEGMENT
;
251 case BOOK3S_IRQPRIO_INST_SEGMENT
:
252 vec
= BOOK3S_INTERRUPT_INST_SEGMENT
;
254 case BOOK3S_IRQPRIO_ALIGNMENT
:
255 vec
= BOOK3S_INTERRUPT_ALIGNMENT
;
257 case BOOK3S_IRQPRIO_PROGRAM
:
258 vec
= BOOK3S_INTERRUPT_PROGRAM
;
259 flags
= to_book3s(vcpu
)->prog_flags
;
261 case BOOK3S_IRQPRIO_VSX
:
262 vec
= BOOK3S_INTERRUPT_VSX
;
264 case BOOK3S_IRQPRIO_ALTIVEC
:
265 vec
= BOOK3S_INTERRUPT_ALTIVEC
;
267 case BOOK3S_IRQPRIO_FP_UNAVAIL
:
268 vec
= BOOK3S_INTERRUPT_FP_UNAVAIL
;
270 case BOOK3S_IRQPRIO_SYSCALL
:
271 vec
= BOOK3S_INTERRUPT_SYSCALL
;
273 case BOOK3S_IRQPRIO_DEBUG
:
274 vec
= BOOK3S_INTERRUPT_TRACE
;
276 case BOOK3S_IRQPRIO_PERFORMANCE_MONITOR
:
277 vec
= BOOK3S_INTERRUPT_PERFMON
;
281 printk(KERN_ERR
"KVM: Unknown interrupt: 0x%x\n", priority
);
286 printk(KERN_INFO
"Deliver interrupt 0x%x? %x\n", vec
, deliver
);
290 kvmppc_inject_interrupt(vcpu
, vec
, flags
);
295 void kvmppc_core_deliver_interrupts(struct kvm_vcpu
*vcpu
)
297 unsigned long *pending
= &vcpu
->arch
.pending_exceptions
;
298 unsigned int priority
;
301 if (vcpu
->arch
.pending_exceptions
)
302 printk(KERN_EMERG
"KVM: Check pending: %lx\n", vcpu
->arch
.pending_exceptions
);
304 priority
= __ffs(*pending
);
305 while (priority
<= (sizeof(unsigned int) * 8)) {
306 if (kvmppc_book3s_irqprio_deliver(vcpu
, priority
) &&
307 (priority
!= BOOK3S_IRQPRIO_DECREMENTER
)) {
308 /* DEC interrupts get cleared by mtdec */
309 clear_bit(priority
, &vcpu
->arch
.pending_exceptions
);
313 priority
= find_next_bit(pending
,
314 BITS_PER_BYTE
* sizeof(*pending
),
319 void kvmppc_set_pvr(struct kvm_vcpu
*vcpu
, u32 pvr
)
321 vcpu
->arch
.hflags
&= ~BOOK3S_HFLAG_SLB
;
322 vcpu
->arch
.pvr
= pvr
;
323 if ((pvr
>= 0x330000) && (pvr
< 0x70330000)) {
324 kvmppc_mmu_book3s_64_init(vcpu
);
325 to_book3s(vcpu
)->hior
= 0xfff00000;
326 to_book3s(vcpu
)->msr_mask
= 0xffffffffffffffffULL
;
328 kvmppc_mmu_book3s_32_init(vcpu
);
329 to_book3s(vcpu
)->hior
= 0;
330 to_book3s(vcpu
)->msr_mask
= 0xffffffffULL
;
333 /* If we are in hypervisor level on 970, we can tell the CPU to
334 * treat DCBZ as 32 bytes store */
335 vcpu
->arch
.hflags
&= ~BOOK3S_HFLAG_DCBZ32
;
336 if (vcpu
->arch
.mmu
.is_dcbz32(vcpu
) && (mfmsr() & MSR_HV
) &&
337 !strcmp(cur_cpu_spec
->platform
, "ppc970"))
338 vcpu
->arch
.hflags
|= BOOK3S_HFLAG_DCBZ32
;
342 /* Book3s_32 CPUs always have 32 bytes cache line size, which Linux assumes. To
343 * make Book3s_32 Linux work on Book3s_64, we have to make sure we trap dcbz to
344 * emulate 32 bytes dcbz length.
346 * The Book3s_64 inventors also realized this case and implemented a special bit
347 * in the HID5 register, which is a hypervisor ressource. Thus we can't use it.
349 * My approach here is to patch the dcbz instruction on executing pages.
351 static void kvmppc_patch_dcbz(struct kvm_vcpu
*vcpu
, struct kvmppc_pte
*pte
)
353 bool touched
= false;
358 hpage
= gfn_to_hva(vcpu
->kvm
, pte
->raddr
>> PAGE_SHIFT
);
359 if (kvm_is_error_hva(hpage
))
362 hpage
|= pte
->raddr
& ~PAGE_MASK
;
365 page
= vmalloc(HW_PAGE_SIZE
);
367 if (copy_from_user(page
, (void __user
*)hpage
, HW_PAGE_SIZE
))
370 for (i
=0; i
< HW_PAGE_SIZE
/ 4; i
++)
371 if ((page
[i
] & 0xff0007ff) == INS_DCBZ
) {
372 page
[i
] &= 0xfffffff7; // reserved instruction, so we trap
377 copy_to_user((void __user
*)hpage
, page
, HW_PAGE_SIZE
);
383 static int kvmppc_xlate(struct kvm_vcpu
*vcpu
, ulong eaddr
, bool data
,
384 struct kvmppc_pte
*pte
)
386 int relocated
= (vcpu
->arch
.msr
& (data
? MSR_DR
: MSR_IR
));
390 r
= vcpu
->arch
.mmu
.xlate(vcpu
, eaddr
, pte
, data
);
393 pte
->raddr
= eaddr
& 0xffffffff;
394 pte
->vpage
= eaddr
>> 12;
395 switch (vcpu
->arch
.msr
& (MSR_DR
|MSR_IR
)) {
397 pte
->vpage
|= VSID_REAL
;
399 pte
->vpage
|= VSID_REAL_DR
;
401 pte
->vpage
|= VSID_REAL_IR
;
403 pte
->may_read
= true;
404 pte
->may_write
= true;
405 pte
->may_execute
= true;
412 static hva_t
kvmppc_bad_hva(void)
417 static hva_t
kvmppc_pte_to_hva(struct kvm_vcpu
*vcpu
, struct kvmppc_pte
*pte
,
422 if (read
&& !pte
->may_read
)
425 if (!read
&& !pte
->may_write
)
428 hpage
= gfn_to_hva(vcpu
->kvm
, pte
->raddr
>> PAGE_SHIFT
);
429 if (kvm_is_error_hva(hpage
))
432 return hpage
| (pte
->raddr
& ~PAGE_MASK
);
434 return kvmppc_bad_hva();
437 int kvmppc_st(struct kvm_vcpu
*vcpu
, ulong eaddr
, int size
, void *ptr
)
439 struct kvmppc_pte pte
;
444 if (kvmppc_xlate(vcpu
, eaddr
, false, &pte
))
447 hva
= kvmppc_pte_to_hva(vcpu
, &pte
, false);
448 if (kvm_is_error_hva(hva
))
451 if (copy_to_user((void __user
*)hva
, ptr
, size
)) {
452 printk(KERN_INFO
"kvmppc_st at 0x%lx failed\n", hva
);
462 int kvmppc_ld(struct kvm_vcpu
*vcpu
, ulong eaddr
, int size
, void *ptr
,
465 struct kvmppc_pte pte
;
470 if (kvmppc_xlate(vcpu
, eaddr
, data
, &pte
))
473 hva
= kvmppc_pte_to_hva(vcpu
, &pte
, true);
474 if (kvm_is_error_hva(hva
))
477 if (copy_from_user(ptr
, (void __user
*)hva
, size
)) {
478 printk(KERN_INFO
"kvmppc_ld at 0x%lx failed\n", hva
);
488 static int kvmppc_visible_gfn(struct kvm_vcpu
*vcpu
, gfn_t gfn
)
490 return kvm_is_visible_gfn(vcpu
->kvm
, gfn
);
493 int kvmppc_handle_pagefault(struct kvm_run
*run
, struct kvm_vcpu
*vcpu
,
494 ulong eaddr
, int vec
)
496 bool data
= (vec
== BOOK3S_INTERRUPT_DATA_STORAGE
);
497 int r
= RESUME_GUEST
;
500 struct kvmppc_pte pte
;
501 bool is_mmio
= false;
503 if ( vec
== BOOK3S_INTERRUPT_DATA_STORAGE
) {
504 relocated
= (vcpu
->arch
.msr
& MSR_DR
);
506 relocated
= (vcpu
->arch
.msr
& MSR_IR
);
509 /* Resolve real address if translation turned on */
511 page_found
= vcpu
->arch
.mmu
.xlate(vcpu
, eaddr
, &pte
, data
);
513 pte
.may_execute
= true;
515 pte
.may_write
= true;
516 pte
.raddr
= eaddr
& 0xffffffff;
518 pte
.vpage
= eaddr
>> 12;
519 switch (vcpu
->arch
.msr
& (MSR_DR
|MSR_IR
)) {
521 pte
.vpage
|= VSID_REAL
;
523 pte
.vpage
|= VSID_REAL_DR
;
525 pte
.vpage
|= VSID_REAL_IR
;
529 if (vcpu
->arch
.mmu
.is_dcbz32(vcpu
) &&
530 (!(vcpu
->arch
.hflags
& BOOK3S_HFLAG_DCBZ32
))) {
532 * If we do the dcbz hack, we have to NX on every execution,
533 * so we can patch the executing code. This renders our guest
536 pte
.may_execute
= !data
;
539 if (page_found
== -ENOENT
) {
540 /* Page not found in guest PTE entries */
541 vcpu
->arch
.dear
= vcpu
->arch
.fault_dear
;
542 to_book3s(vcpu
)->dsisr
= vcpu
->arch
.fault_dsisr
;
543 vcpu
->arch
.msr
|= (vcpu
->arch
.shadow_srr1
& 0x00000000f8000000ULL
);
544 kvmppc_book3s_queue_irqprio(vcpu
, vec
);
545 } else if (page_found
== -EPERM
) {
546 /* Storage protection */
547 vcpu
->arch
.dear
= vcpu
->arch
.fault_dear
;
548 to_book3s(vcpu
)->dsisr
= vcpu
->arch
.fault_dsisr
& ~DSISR_NOHPTE
;
549 to_book3s(vcpu
)->dsisr
|= DSISR_PROTFAULT
;
550 vcpu
->arch
.msr
|= (vcpu
->arch
.shadow_srr1
& 0x00000000f8000000ULL
);
551 kvmppc_book3s_queue_irqprio(vcpu
, vec
);
552 } else if (page_found
== -EINVAL
) {
553 /* Page not found in guest SLB */
554 vcpu
->arch
.dear
= vcpu
->arch
.fault_dear
;
555 kvmppc_book3s_queue_irqprio(vcpu
, vec
+ 0x80);
556 } else if (!is_mmio
&&
557 kvmppc_visible_gfn(vcpu
, pte
.raddr
>> PAGE_SHIFT
)) {
558 /* The guest's PTE is not mapped yet. Map on the host */
559 kvmppc_mmu_map_page(vcpu
, &pte
);
561 vcpu
->stat
.sp_storage
++;
562 else if (vcpu
->arch
.mmu
.is_dcbz32(vcpu
) &&
563 (!(vcpu
->arch
.hflags
& BOOK3S_HFLAG_DCBZ32
)))
564 kvmppc_patch_dcbz(vcpu
, &pte
);
567 vcpu
->stat
.mmio_exits
++;
568 vcpu
->arch
.paddr_accessed
= pte
.raddr
;
569 r
= kvmppc_emulate_mmio(run
, vcpu
);
570 if ( r
== RESUME_HOST_NV
)
577 static inline int get_fpr_index(int i
)
585 /* Give up external provider (FPU, Altivec, VSX) */
586 static void kvmppc_giveup_ext(struct kvm_vcpu
*vcpu
, ulong msr
)
588 struct thread_struct
*t
= ¤t
->thread
;
589 u64
*vcpu_fpr
= vcpu
->arch
.fpr
;
590 u64
*vcpu_vsx
= vcpu
->arch
.vsr
;
591 u64
*thread_fpr
= (u64
*)t
->fpr
;
594 if (!(vcpu
->arch
.guest_owned_ext
& msr
))
598 printk(KERN_INFO
"Giving up ext 0x%lx\n", msr
);
604 for (i
= 0; i
< ARRAY_SIZE(vcpu
->arch
.fpr
); i
++)
605 vcpu_fpr
[i
] = thread_fpr
[get_fpr_index(i
)];
607 vcpu
->arch
.fpscr
= t
->fpscr
.val
;
610 #ifdef CONFIG_ALTIVEC
611 giveup_altivec(current
);
612 memcpy(vcpu
->arch
.vr
, t
->vr
, sizeof(vcpu
->arch
.vr
));
613 vcpu
->arch
.vscr
= t
->vscr
;
618 __giveup_vsx(current
);
619 for (i
= 0; i
< ARRAY_SIZE(vcpu
->arch
.vsr
); i
++)
620 vcpu_vsx
[i
] = thread_fpr
[get_fpr_index(i
) + 1];
627 vcpu
->arch
.guest_owned_ext
&= ~msr
;
628 current
->thread
.regs
->msr
&= ~msr
;
629 kvmppc_recalc_shadow_msr(vcpu
);
632 /* Handle external providers (FPU, Altivec, VSX) */
633 static int kvmppc_handle_ext(struct kvm_vcpu
*vcpu
, unsigned int exit_nr
,
636 struct thread_struct
*t
= ¤t
->thread
;
637 u64
*vcpu_fpr
= vcpu
->arch
.fpr
;
638 u64
*vcpu_vsx
= vcpu
->arch
.vsr
;
639 u64
*thread_fpr
= (u64
*)t
->fpr
;
642 if (!(vcpu
->arch
.msr
& msr
)) {
643 kvmppc_book3s_queue_irqprio(vcpu
, exit_nr
);
648 printk(KERN_INFO
"Loading up ext 0x%lx\n", msr
);
651 current
->thread
.regs
->msr
|= msr
;
655 for (i
= 0; i
< ARRAY_SIZE(vcpu
->arch
.fpr
); i
++)
656 thread_fpr
[get_fpr_index(i
)] = vcpu_fpr
[i
];
658 t
->fpscr
.val
= vcpu
->arch
.fpscr
;
660 kvmppc_load_up_fpu();
663 #ifdef CONFIG_ALTIVEC
664 memcpy(t
->vr
, vcpu
->arch
.vr
, sizeof(vcpu
->arch
.vr
));
665 t
->vscr
= vcpu
->arch
.vscr
;
667 kvmppc_load_up_altivec();
672 for (i
= 0; i
< ARRAY_SIZE(vcpu
->arch
.vsr
); i
++)
673 thread_fpr
[get_fpr_index(i
) + 1] = vcpu_vsx
[i
];
674 kvmppc_load_up_vsx();
681 vcpu
->arch
.guest_owned_ext
|= msr
;
683 kvmppc_recalc_shadow_msr(vcpu
);
688 int kvmppc_handle_exit(struct kvm_run
*run
, struct kvm_vcpu
*vcpu
,
689 unsigned int exit_nr
)
693 vcpu
->stat
.sum_exits
++;
695 run
->exit_reason
= KVM_EXIT_UNKNOWN
;
696 run
->ready_for_interrupt_injection
= 1;
698 printk(KERN_EMERG
"exit_nr=0x%x | pc=0x%lx | dar=0x%lx | dec=0x%x | msr=0x%lx\n",
699 exit_nr
, vcpu
->arch
.pc
, vcpu
->arch
.fault_dear
,
700 kvmppc_get_dec(vcpu
), vcpu
->arch
.msr
);
701 #elif defined (EXIT_DEBUG_SIMPLE)
702 if ((exit_nr
!= 0x900) && (exit_nr
!= 0x500))
703 printk(KERN_EMERG
"exit_nr=0x%x | pc=0x%lx | dar=0x%lx | msr=0x%lx\n",
704 exit_nr
, vcpu
->arch
.pc
, vcpu
->arch
.fault_dear
,
709 case BOOK3S_INTERRUPT_INST_STORAGE
:
710 vcpu
->stat
.pf_instruc
++;
711 /* only care about PTEG not found errors, but leave NX alone */
712 if (vcpu
->arch
.shadow_srr1
& 0x40000000) {
713 r
= kvmppc_handle_pagefault(run
, vcpu
, vcpu
->arch
.pc
, exit_nr
);
714 vcpu
->stat
.sp_instruc
++;
715 } else if (vcpu
->arch
.mmu
.is_dcbz32(vcpu
) &&
716 (!(vcpu
->arch
.hflags
& BOOK3S_HFLAG_DCBZ32
))) {
718 * XXX If we do the dcbz hack we use the NX bit to flush&patch the page,
719 * so we can't use the NX bit inside the guest. Let's cross our fingers,
720 * that no guest that needs the dcbz hack does NX.
722 kvmppc_mmu_pte_flush(vcpu
, vcpu
->arch
.pc
, ~0xFFFULL
);
724 vcpu
->arch
.msr
|= vcpu
->arch
.shadow_srr1
& 0x58000000;
725 kvmppc_book3s_queue_irqprio(vcpu
, exit_nr
);
726 kvmppc_mmu_pte_flush(vcpu
, vcpu
->arch
.pc
, ~0xFFFULL
);
730 case BOOK3S_INTERRUPT_DATA_STORAGE
:
731 vcpu
->stat
.pf_storage
++;
732 /* The only case we need to handle is missing shadow PTEs */
733 if (vcpu
->arch
.fault_dsisr
& DSISR_NOHPTE
) {
734 r
= kvmppc_handle_pagefault(run
, vcpu
, vcpu
->arch
.fault_dear
, exit_nr
);
736 vcpu
->arch
.dear
= vcpu
->arch
.fault_dear
;
737 to_book3s(vcpu
)->dsisr
= vcpu
->arch
.fault_dsisr
;
738 kvmppc_book3s_queue_irqprio(vcpu
, exit_nr
);
739 kvmppc_mmu_pte_flush(vcpu
, vcpu
->arch
.dear
, ~0xFFFULL
);
743 case BOOK3S_INTERRUPT_DATA_SEGMENT
:
744 if (kvmppc_mmu_map_segment(vcpu
, vcpu
->arch
.fault_dear
) < 0) {
745 vcpu
->arch
.dear
= vcpu
->arch
.fault_dear
;
746 kvmppc_book3s_queue_irqprio(vcpu
,
747 BOOK3S_INTERRUPT_DATA_SEGMENT
);
751 case BOOK3S_INTERRUPT_INST_SEGMENT
:
752 if (kvmppc_mmu_map_segment(vcpu
, vcpu
->arch
.pc
) < 0) {
753 kvmppc_book3s_queue_irqprio(vcpu
,
754 BOOK3S_INTERRUPT_INST_SEGMENT
);
758 /* We're good on these - the host merely wanted to get our attention */
759 case BOOK3S_INTERRUPT_DECREMENTER
:
760 vcpu
->stat
.dec_exits
++;
763 case BOOK3S_INTERRUPT_EXTERNAL
:
764 vcpu
->stat
.ext_intr_exits
++;
767 case BOOK3S_INTERRUPT_PROGRAM
:
769 enum emulation_result er
;
772 flags
= vcpu
->arch
.shadow_srr1
& 0x1f0000ull
;
774 if (vcpu
->arch
.msr
& MSR_PR
) {
776 printk(KERN_INFO
"Userspace triggered 0x700 exception at 0x%lx (0x%x)\n", vcpu
->arch
.pc
, vcpu
->arch
.last_inst
);
778 if ((vcpu
->arch
.last_inst
& 0xff0007ff) !=
779 (INS_DCBZ
& 0xfffffff7)) {
780 kvmppc_core_queue_program(vcpu
, flags
);
786 vcpu
->stat
.emulated_inst_exits
++;
787 er
= kvmppc_emulate_instruction(run
, vcpu
);
793 printk(KERN_CRIT
"%s: emulation at %lx failed (%08x)\n",
794 __func__
, vcpu
->arch
.pc
, vcpu
->arch
.last_inst
);
795 kvmppc_core_queue_program(vcpu
, flags
);
803 case BOOK3S_INTERRUPT_SYSCALL
:
805 printk(KERN_INFO
"Syscall Nr %d\n", (int)kvmppc_get_gpr(vcpu
, 0));
807 vcpu
->stat
.syscall_exits
++;
808 kvmppc_book3s_queue_irqprio(vcpu
, exit_nr
);
811 case BOOK3S_INTERRUPT_FP_UNAVAIL
:
812 r
= kvmppc_handle_ext(vcpu
, exit_nr
, MSR_FP
);
814 case BOOK3S_INTERRUPT_ALTIVEC
:
815 r
= kvmppc_handle_ext(vcpu
, exit_nr
, MSR_VEC
);
817 case BOOK3S_INTERRUPT_VSX
:
818 r
= kvmppc_handle_ext(vcpu
, exit_nr
, MSR_VSX
);
820 case BOOK3S_INTERRUPT_MACHINE_CHECK
:
821 case BOOK3S_INTERRUPT_TRACE
:
822 kvmppc_book3s_queue_irqprio(vcpu
, exit_nr
);
826 /* Ugh - bork here! What did we get? */
827 printk(KERN_EMERG
"exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n",
828 exit_nr
, vcpu
->arch
.pc
, vcpu
->arch
.shadow_srr1
);
835 if (!(r
& RESUME_HOST
)) {
836 /* To avoid clobbering exit_reason, only check for signals if
837 * we aren't already exiting to userspace for some other
839 if (signal_pending(current
)) {
841 printk(KERN_EMERG
"KVM: Going back to host\n");
843 vcpu
->stat
.signal_exits
++;
844 run
->exit_reason
= KVM_EXIT_INTR
;
847 /* In case an interrupt came in that was triggered
848 * from userspace (like DEC), we need to check what
850 kvmppc_core_deliver_interrupts(vcpu
);
855 printk(KERN_EMERG
"KVM exit: vcpu=0x%p pc=0x%lx r=0x%x\n", vcpu
, vcpu
->arch
.pc
, r
);
861 int kvm_arch_vcpu_setup(struct kvm_vcpu
*vcpu
)
866 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu
*vcpu
, struct kvm_regs
*regs
)
870 regs
->pc
= vcpu
->arch
.pc
;
871 regs
->cr
= kvmppc_get_cr(vcpu
);
872 regs
->ctr
= vcpu
->arch
.ctr
;
873 regs
->lr
= vcpu
->arch
.lr
;
874 regs
->xer
= kvmppc_get_xer(vcpu
);
875 regs
->msr
= vcpu
->arch
.msr
;
876 regs
->srr0
= vcpu
->arch
.srr0
;
877 regs
->srr1
= vcpu
->arch
.srr1
;
878 regs
->pid
= vcpu
->arch
.pid
;
879 regs
->sprg0
= vcpu
->arch
.sprg0
;
880 regs
->sprg1
= vcpu
->arch
.sprg1
;
881 regs
->sprg2
= vcpu
->arch
.sprg2
;
882 regs
->sprg3
= vcpu
->arch
.sprg3
;
883 regs
->sprg5
= vcpu
->arch
.sprg4
;
884 regs
->sprg6
= vcpu
->arch
.sprg5
;
885 regs
->sprg7
= vcpu
->arch
.sprg6
;
887 for (i
= 0; i
< ARRAY_SIZE(regs
->gpr
); i
++)
888 regs
->gpr
[i
] = kvmppc_get_gpr(vcpu
, i
);
893 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu
*vcpu
, struct kvm_regs
*regs
)
897 vcpu
->arch
.pc
= regs
->pc
;
898 kvmppc_set_cr(vcpu
, regs
->cr
);
899 vcpu
->arch
.ctr
= regs
->ctr
;
900 vcpu
->arch
.lr
= regs
->lr
;
901 kvmppc_set_xer(vcpu
, regs
->xer
);
902 kvmppc_set_msr(vcpu
, regs
->msr
);
903 vcpu
->arch
.srr0
= regs
->srr0
;
904 vcpu
->arch
.srr1
= regs
->srr1
;
905 vcpu
->arch
.sprg0
= regs
->sprg0
;
906 vcpu
->arch
.sprg1
= regs
->sprg1
;
907 vcpu
->arch
.sprg2
= regs
->sprg2
;
908 vcpu
->arch
.sprg3
= regs
->sprg3
;
909 vcpu
->arch
.sprg5
= regs
->sprg4
;
910 vcpu
->arch
.sprg6
= regs
->sprg5
;
911 vcpu
->arch
.sprg7
= regs
->sprg6
;
913 for (i
= 0; i
< ARRAY_SIZE(regs
->gpr
); i
++)
914 kvmppc_set_gpr(vcpu
, i
, regs
->gpr
[i
]);
919 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu
*vcpu
,
920 struct kvm_sregs
*sregs
)
922 struct kvmppc_vcpu_book3s
*vcpu3s
= to_book3s(vcpu
);
927 sregs
->pvr
= vcpu
->arch
.pvr
;
929 sregs
->u
.s
.sdr1
= to_book3s(vcpu
)->sdr1
;
930 if (vcpu
->arch
.hflags
& BOOK3S_HFLAG_SLB
) {
931 for (i
= 0; i
< 64; i
++) {
932 sregs
->u
.s
.ppc64
.slb
[i
].slbe
= vcpu3s
->slb
[i
].orige
| i
;
933 sregs
->u
.s
.ppc64
.slb
[i
].slbv
= vcpu3s
->slb
[i
].origv
;
936 for (i
= 0; i
< 16; i
++) {
937 sregs
->u
.s
.ppc32
.sr
[i
] = vcpu3s
->sr
[i
].raw
;
938 sregs
->u
.s
.ppc32
.sr
[i
] = vcpu3s
->sr
[i
].raw
;
940 for (i
= 0; i
< 8; i
++) {
941 sregs
->u
.s
.ppc32
.ibat
[i
] = vcpu3s
->ibat
[i
].raw
;
942 sregs
->u
.s
.ppc32
.dbat
[i
] = vcpu3s
->dbat
[i
].raw
;
951 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu
*vcpu
,
952 struct kvm_sregs
*sregs
)
954 struct kvmppc_vcpu_book3s
*vcpu3s
= to_book3s(vcpu
);
959 kvmppc_set_pvr(vcpu
, sregs
->pvr
);
961 vcpu3s
->sdr1
= sregs
->u
.s
.sdr1
;
962 if (vcpu
->arch
.hflags
& BOOK3S_HFLAG_SLB
) {
963 for (i
= 0; i
< 64; i
++) {
964 vcpu
->arch
.mmu
.slbmte(vcpu
, sregs
->u
.s
.ppc64
.slb
[i
].slbv
,
965 sregs
->u
.s
.ppc64
.slb
[i
].slbe
);
968 for (i
= 0; i
< 16; i
++) {
969 vcpu
->arch
.mmu
.mtsrin(vcpu
, i
, sregs
->u
.s
.ppc32
.sr
[i
]);
971 for (i
= 0; i
< 8; i
++) {
972 kvmppc_set_bat(vcpu
, &(vcpu3s
->ibat
[i
]), false,
973 (u32
)sregs
->u
.s
.ppc32
.ibat
[i
]);
974 kvmppc_set_bat(vcpu
, &(vcpu3s
->ibat
[i
]), true,
975 (u32
)(sregs
->u
.s
.ppc32
.ibat
[i
] >> 32));
976 kvmppc_set_bat(vcpu
, &(vcpu3s
->dbat
[i
]), false,
977 (u32
)sregs
->u
.s
.ppc32
.dbat
[i
]);
978 kvmppc_set_bat(vcpu
, &(vcpu3s
->dbat
[i
]), true,
979 (u32
)(sregs
->u
.s
.ppc32
.dbat
[i
] >> 32));
983 /* Flush the MMU after messing with the segments */
984 kvmppc_mmu_pte_flush(vcpu
, 0, 0);
991 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu
*vcpu
, struct kvm_fpu
*fpu
)
996 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu
*vcpu
, struct kvm_fpu
*fpu
)
1001 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu
*vcpu
,
1002 struct kvm_translation
*tr
)
1008 * Get (and clear) the dirty memory log for a memory slot.
1010 int kvm_vm_ioctl_get_dirty_log(struct kvm
*kvm
,
1011 struct kvm_dirty_log
*log
)
1013 struct kvm_memory_slot
*memslot
;
1014 struct kvm_vcpu
*vcpu
;
1020 mutex_lock(&kvm
->slots_lock
);
1022 r
= kvm_get_dirty_log(kvm
, log
, &is_dirty
);
1026 /* If nothing is dirty, don't bother messing with page tables. */
1028 memslot
= &kvm
->memslots
->memslots
[log
->slot
];
1030 ga
= memslot
->base_gfn
<< PAGE_SHIFT
;
1031 ga_end
= ga
+ (memslot
->npages
<< PAGE_SHIFT
);
1033 kvm_for_each_vcpu(n
, vcpu
, kvm
)
1034 kvmppc_mmu_pte_pflush(vcpu
, ga
, ga_end
);
1036 n
= kvm_dirty_bitmap_bytes(memslot
);
1037 memset(memslot
->dirty_bitmap
, 0, n
);
1042 mutex_unlock(&kvm
->slots_lock
);
1046 int kvmppc_core_check_processor_compat(void)
1051 struct kvm_vcpu
*kvmppc_core_vcpu_create(struct kvm
*kvm
, unsigned int id
)
1053 struct kvmppc_vcpu_book3s
*vcpu_book3s
;
1054 struct kvm_vcpu
*vcpu
;
1057 vcpu_book3s
= (struct kvmppc_vcpu_book3s
*)__get_free_pages( GFP_KERNEL
| __GFP_ZERO
,
1058 get_order(sizeof(struct kvmppc_vcpu_book3s
)));
1064 vcpu
= &vcpu_book3s
->vcpu
;
1065 err
= kvm_vcpu_init(vcpu
, kvm
, id
);
1069 vcpu
->arch
.host_retip
= kvm_return_point
;
1070 vcpu
->arch
.host_msr
= mfmsr();
1071 /* default to book3s_64 (970fx) */
1072 vcpu
->arch
.pvr
= 0x3C0301;
1073 kvmppc_set_pvr(vcpu
, vcpu
->arch
.pvr
);
1074 vcpu_book3s
->slb_nr
= 64;
1076 /* remember where some real-mode handlers are */
1077 vcpu
->arch
.trampoline_lowmem
= kvmppc_trampoline_lowmem
;
1078 vcpu
->arch
.trampoline_enter
= kvmppc_trampoline_enter
;
1079 vcpu
->arch
.highmem_handler
= (ulong
)kvmppc_handler_highmem
;
1080 vcpu
->arch
.rmcall
= *(ulong
*)kvmppc_rmcall
;
1082 vcpu
->arch
.shadow_msr
= MSR_USER64
;
1084 err
= __init_new_context();
1087 vcpu_book3s
->context_id
= err
;
1089 vcpu_book3s
->vsid_max
= ((vcpu_book3s
->context_id
+ 1) << USER_ESID_BITS
) - 1;
1090 vcpu_book3s
->vsid_first
= vcpu_book3s
->context_id
<< USER_ESID_BITS
;
1091 vcpu_book3s
->vsid_next
= vcpu_book3s
->vsid_first
;
1096 free_pages((long)vcpu_book3s
, get_order(sizeof(struct kvmppc_vcpu_book3s
)));
1098 return ERR_PTR(err
);
1101 void kvmppc_core_vcpu_free(struct kvm_vcpu
*vcpu
)
1103 struct kvmppc_vcpu_book3s
*vcpu_book3s
= to_book3s(vcpu
);
1105 __destroy_context(vcpu_book3s
->context_id
);
1106 kvm_vcpu_uninit(vcpu
);
1107 free_pages((long)vcpu_book3s
, get_order(sizeof(struct kvmppc_vcpu_book3s
)));
1110 extern int __kvmppc_vcpu_entry(struct kvm_run
*kvm_run
, struct kvm_vcpu
*vcpu
);
1111 int __kvmppc_vcpu_run(struct kvm_run
*kvm_run
, struct kvm_vcpu
*vcpu
)
1114 struct thread_struct ext_bkp
;
1115 bool save_vec
= current
->thread
.used_vr
;
1116 bool save_vsx
= current
->thread
.used_vsr
;
1119 /* No need to go into the guest when all we do is going out */
1120 if (signal_pending(current
)) {
1121 kvm_run
->exit_reason
= KVM_EXIT_INTR
;
1125 /* Save FPU state in stack */
1126 if (current
->thread
.regs
->msr
& MSR_FP
)
1127 giveup_fpu(current
);
1128 memcpy(ext_bkp
.fpr
, current
->thread
.fpr
, sizeof(current
->thread
.fpr
));
1129 ext_bkp
.fpscr
= current
->thread
.fpscr
;
1130 ext_bkp
.fpexc_mode
= current
->thread
.fpexc_mode
;
1132 #ifdef CONFIG_ALTIVEC
1133 /* Save Altivec state in stack */
1135 if (current
->thread
.regs
->msr
& MSR_VEC
)
1136 giveup_altivec(current
);
1137 memcpy(ext_bkp
.vr
, current
->thread
.vr
, sizeof(ext_bkp
.vr
));
1138 ext_bkp
.vscr
= current
->thread
.vscr
;
1139 ext_bkp
.vrsave
= current
->thread
.vrsave
;
1141 ext_bkp
.used_vr
= current
->thread
.used_vr
;
1145 /* Save VSX state in stack */
1146 if (save_vsx
&& (current
->thread
.regs
->msr
& MSR_VSX
))
1147 __giveup_vsx(current
);
1148 ext_bkp
.used_vsr
= current
->thread
.used_vsr
;
1151 /* Remember the MSR with disabled extensions */
1152 ext_msr
= current
->thread
.regs
->msr
;
1154 /* XXX we get called with irq disabled - change that! */
1157 ret
= __kvmppc_vcpu_entry(kvm_run
, vcpu
);
1159 local_irq_disable();
1161 current
->thread
.regs
->msr
= ext_msr
;
1163 /* Make sure we save the guest FPU/Altivec/VSX state */
1164 kvmppc_giveup_ext(vcpu
, MSR_FP
);
1165 kvmppc_giveup_ext(vcpu
, MSR_VEC
);
1166 kvmppc_giveup_ext(vcpu
, MSR_VSX
);
1168 /* Restore FPU state from stack */
1169 memcpy(current
->thread
.fpr
, ext_bkp
.fpr
, sizeof(ext_bkp
.fpr
));
1170 current
->thread
.fpscr
= ext_bkp
.fpscr
;
1171 current
->thread
.fpexc_mode
= ext_bkp
.fpexc_mode
;
1173 #ifdef CONFIG_ALTIVEC
1174 /* Restore Altivec state from stack */
1175 if (save_vec
&& current
->thread
.used_vr
) {
1176 memcpy(current
->thread
.vr
, ext_bkp
.vr
, sizeof(ext_bkp
.vr
));
1177 current
->thread
.vscr
= ext_bkp
.vscr
;
1178 current
->thread
.vrsave
= ext_bkp
.vrsave
;
1180 current
->thread
.used_vr
= ext_bkp
.used_vr
;
1184 current
->thread
.used_vsr
= ext_bkp
.used_vsr
;
1190 static int kvmppc_book3s_init(void)
1192 return kvm_init(NULL
, sizeof(struct kvmppc_vcpu_book3s
), THIS_MODULE
);
1195 static void kvmppc_book3s_exit(void)
1200 module_init(kvmppc_book3s_init
);
1201 module_exit(kvmppc_book3s_exit
);