2 * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
3 * Copyright (C) 2009. SUSE Linux Products GmbH. All rights reserved.
6 * Paul Mackerras <paulus@au1.ibm.com>
7 * Alexander Graf <agraf@suse.de>
8 * Kevin Wolf <mail@kevin-wolf.de>
10 * Description: KVM functions specific to running on Book 3S
11 * processors in hypervisor mode (specifically POWER7 and later).
13 * This file is derived from arch/powerpc/kvm/book3s.c,
14 * by Alexander Graf <agraf@suse.de>.
16 * This program is free software; you can redistribute it and/or modify
17 * it under the terms of the GNU General Public License, version 2, as
18 * published by the Free Software Foundation.
21 #include <linux/kvm_host.h>
22 #include <linux/err.h>
23 #include <linux/slab.h>
24 #include <linux/preempt.h>
25 #include <linux/sched.h>
26 #include <linux/delay.h>
28 #include <linux/anon_inodes.h>
29 #include <linux/cpumask.h>
32 #include <asm/cputable.h>
33 #include <asm/cacheflush.h>
34 #include <asm/tlbflush.h>
35 #include <asm/uaccess.h>
37 #include <asm/kvm_ppc.h>
38 #include <asm/kvm_book3s.h>
39 #include <asm/mmu_context.h>
40 #include <asm/lppaca.h>
41 #include <asm/processor.h>
42 #include <linux/gfp.h>
43 #include <linux/sched.h>
44 #include <linux/vmalloc.h>
45 #include <linux/highmem.h>
47 /* #define EXIT_DEBUG */
48 /* #define EXIT_DEBUG_SIMPLE */
49 /* #define EXIT_DEBUG_INT */
51 void kvmppc_core_vcpu_load(struct kvm_vcpu
*vcpu
, int cpu
)
53 local_paca
->kvm_hstate
.kvm_vcpu
= vcpu
;
56 void kvmppc_core_vcpu_put(struct kvm_vcpu
*vcpu
)
60 void kvmppc_vcpu_block(struct kvm_vcpu
*vcpu
)
63 unsigned long dec_nsec
;
66 if (now
>= vcpu
->arch
.dec_expires
&& !kvmppc_core_pending_dec(vcpu
))
67 kvmppc_core_queue_dec(vcpu
);
68 if (vcpu
->arch
.pending_exceptions
)
70 if (vcpu
->arch
.dec_expires
!= ~(u64
)0) {
71 dec_nsec
= (vcpu
->arch
.dec_expires
- now
) * NSEC_PER_SEC
/
73 hrtimer_start(&vcpu
->arch
.dec_timer
, ktime_set(0, dec_nsec
),
78 vcpu
->stat
.halt_wakeup
++;
80 if (vcpu
->arch
.dec_expires
!= ~(u64
)0)
81 hrtimer_try_to_cancel(&vcpu
->arch
.dec_timer
);
84 void kvmppc_set_msr(struct kvm_vcpu
*vcpu
, u64 msr
)
86 vcpu
->arch
.shregs
.msr
= msr
;
89 void kvmppc_set_pvr(struct kvm_vcpu
*vcpu
, u32 pvr
)
94 void kvmppc_dump_regs(struct kvm_vcpu
*vcpu
)
98 pr_err("vcpu %p (%d):\n", vcpu
, vcpu
->vcpu_id
);
99 pr_err("pc = %.16lx msr = %.16llx trap = %x\n",
100 vcpu
->arch
.pc
, vcpu
->arch
.shregs
.msr
, vcpu
->arch
.trap
);
101 for (r
= 0; r
< 16; ++r
)
102 pr_err("r%2d = %.16lx r%d = %.16lx\n",
103 r
, kvmppc_get_gpr(vcpu
, r
),
104 r
+16, kvmppc_get_gpr(vcpu
, r
+16));
105 pr_err("ctr = %.16lx lr = %.16lx\n",
106 vcpu
->arch
.ctr
, vcpu
->arch
.lr
);
107 pr_err("srr0 = %.16llx srr1 = %.16llx\n",
108 vcpu
->arch
.shregs
.srr0
, vcpu
->arch
.shregs
.srr1
);
109 pr_err("sprg0 = %.16llx sprg1 = %.16llx\n",
110 vcpu
->arch
.shregs
.sprg0
, vcpu
->arch
.shregs
.sprg1
);
111 pr_err("sprg2 = %.16llx sprg3 = %.16llx\n",
112 vcpu
->arch
.shregs
.sprg2
, vcpu
->arch
.shregs
.sprg3
);
113 pr_err("cr = %.8x xer = %.16lx dsisr = %.8x\n",
114 vcpu
->arch
.cr
, vcpu
->arch
.xer
, vcpu
->arch
.shregs
.dsisr
);
115 pr_err("dar = %.16llx\n", vcpu
->arch
.shregs
.dar
);
116 pr_err("fault dar = %.16lx dsisr = %.8x\n",
117 vcpu
->arch
.fault_dar
, vcpu
->arch
.fault_dsisr
);
118 pr_err("SLB (%d entries):\n", vcpu
->arch
.slb_max
);
119 for (r
= 0; r
< vcpu
->arch
.slb_max
; ++r
)
120 pr_err(" ESID = %.16llx VSID = %.16llx\n",
121 vcpu
->arch
.slb
[r
].orige
, vcpu
->arch
.slb
[r
].origv
);
122 pr_err("lpcr = %.16lx sdr1 = %.16lx last_inst = %.8x\n",
123 vcpu
->arch
.lpcr
, vcpu
->kvm
->arch
.sdr1
,
124 vcpu
->arch
.last_inst
);
127 struct kvm_vcpu
*kvmppc_find_vcpu(struct kvm
*kvm
, int id
)
130 struct kvm_vcpu
*v
, *ret
= NULL
;
132 mutex_lock(&kvm
->lock
);
133 kvm_for_each_vcpu(r
, v
, kvm
) {
134 if (v
->vcpu_id
== id
) {
139 mutex_unlock(&kvm
->lock
);
143 static void init_vpa(struct kvm_vcpu
*vcpu
, struct lppaca
*vpa
)
145 vpa
->shared_proc
= 1;
146 vpa
->yield_count
= 1;
149 static unsigned long do_h_register_vpa(struct kvm_vcpu
*vcpu
,
151 unsigned long vcpuid
, unsigned long vpa
)
153 struct kvm
*kvm
= vcpu
->kvm
;
154 unsigned long pg_index
, ra
, len
;
155 unsigned long pg_offset
;
157 struct kvm_vcpu
*tvcpu
;
159 tvcpu
= kvmppc_find_vcpu(kvm
, vcpuid
);
165 if (flags
== 0 || flags
== 4)
170 /* registering new area; convert logical addr to real */
171 pg_index
= vpa
>> kvm
->arch
.ram_porder
;
172 pg_offset
= vpa
& (kvm
->arch
.ram_psize
- 1);
173 if (pg_index
>= kvm
->arch
.ram_npages
)
175 if (kvm
->arch
.ram_pginfo
[pg_index
].pfn
== 0)
177 ra
= kvm
->arch
.ram_pginfo
[pg_index
].pfn
<< PAGE_SHIFT
;
181 len
= *(unsigned short *)(va
+ 4);
183 len
= *(unsigned int *)(va
+ 4);
184 if (pg_offset
+ len
> kvm
->arch
.ram_psize
)
187 case 1: /* register VPA */
190 tvcpu
->arch
.vpa
= va
;
193 case 2: /* register DTL */
196 if (!tvcpu
->arch
.vpa
)
199 tvcpu
->arch
.dtl
= va
;
200 tvcpu
->arch
.dtl_end
= va
+ len
;
202 case 3: /* register SLB shadow buffer */
205 if (!tvcpu
->arch
.vpa
)
207 tvcpu
->arch
.slb_shadow
= va
;
208 len
= (len
- 16) / 16;
209 tvcpu
->arch
.slb_shadow
= va
;
214 case 5: /* unregister VPA */
215 if (tvcpu
->arch
.slb_shadow
|| tvcpu
->arch
.dtl
)
217 tvcpu
->arch
.vpa
= NULL
;
219 case 6: /* unregister DTL */
220 tvcpu
->arch
.dtl
= NULL
;
222 case 7: /* unregister SLB shadow buffer */
223 tvcpu
->arch
.slb_shadow
= NULL
;
230 int kvmppc_pseries_do_hcall(struct kvm_vcpu
*vcpu
)
232 unsigned long req
= kvmppc_get_gpr(vcpu
, 3);
233 unsigned long target
, ret
= H_SUCCESS
;
234 struct kvm_vcpu
*tvcpu
;
238 vcpu
->arch
.shregs
.msr
|= MSR_EE
;
239 vcpu
->arch
.ceded
= 1;
241 if (!vcpu
->arch
.prodded
)
242 kvmppc_vcpu_block(vcpu
);
244 vcpu
->arch
.prodded
= 0;
246 vcpu
->arch
.ceded
= 0;
249 target
= kvmppc_get_gpr(vcpu
, 4);
250 tvcpu
= kvmppc_find_vcpu(vcpu
->kvm
, target
);
255 tvcpu
->arch
.prodded
= 1;
257 if (vcpu
->arch
.ceded
) {
258 if (waitqueue_active(&vcpu
->wq
)) {
259 wake_up_interruptible(&vcpu
->wq
);
260 vcpu
->stat
.halt_wakeup
++;
267 ret
= do_h_register_vpa(vcpu
, kvmppc_get_gpr(vcpu
, 4),
268 kvmppc_get_gpr(vcpu
, 5),
269 kvmppc_get_gpr(vcpu
, 6));
274 kvmppc_set_gpr(vcpu
, 3, ret
);
275 vcpu
->arch
.hcall_needed
= 0;
279 static int kvmppc_handle_exit(struct kvm_run
*run
, struct kvm_vcpu
*vcpu
,
280 struct task_struct
*tsk
)
284 vcpu
->stat
.sum_exits
++;
286 run
->exit_reason
= KVM_EXIT_UNKNOWN
;
287 run
->ready_for_interrupt_injection
= 1;
288 switch (vcpu
->arch
.trap
) {
289 /* We're good on these - the host merely wanted to get our attention */
290 case BOOK3S_INTERRUPT_HV_DECREMENTER
:
291 vcpu
->stat
.dec_exits
++;
294 case BOOK3S_INTERRUPT_EXTERNAL
:
295 vcpu
->stat
.ext_intr_exits
++;
298 case BOOK3S_INTERRUPT_PERFMON
:
301 case BOOK3S_INTERRUPT_PROGRAM
:
305 * Normally program interrupts are delivered directly
306 * to the guest by the hardware, but we can get here
307 * as a result of a hypervisor emulation interrupt
308 * (e40) getting turned into a 700 by BML RTAS.
310 flags
= vcpu
->arch
.shregs
.msr
& 0x1f0000ull
;
311 kvmppc_core_queue_program(vcpu
, flags
);
315 case BOOK3S_INTERRUPT_SYSCALL
:
317 /* hcall - punt to userspace */
320 if (vcpu
->arch
.shregs
.msr
& MSR_PR
) {
321 /* sc 1 from userspace - reflect to guest syscall */
322 kvmppc_book3s_queue_irqprio(vcpu
, BOOK3S_INTERRUPT_SYSCALL
);
326 run
->papr_hcall
.nr
= kvmppc_get_gpr(vcpu
, 3);
327 for (i
= 0; i
< 9; ++i
)
328 run
->papr_hcall
.args
[i
] = kvmppc_get_gpr(vcpu
, 4 + i
);
329 run
->exit_reason
= KVM_EXIT_PAPR_HCALL
;
330 vcpu
->arch
.hcall_needed
= 1;
335 * We get these next two if the guest does a bad real-mode access,
336 * as we have enabled VRMA (virtualized real mode area) mode in the
337 * LPCR. We just generate an appropriate DSI/ISI to the guest.
339 case BOOK3S_INTERRUPT_H_DATA_STORAGE
:
340 vcpu
->arch
.shregs
.dsisr
= vcpu
->arch
.fault_dsisr
;
341 vcpu
->arch
.shregs
.dar
= vcpu
->arch
.fault_dar
;
342 kvmppc_inject_interrupt(vcpu
, BOOK3S_INTERRUPT_DATA_STORAGE
, 0);
345 case BOOK3S_INTERRUPT_H_INST_STORAGE
:
346 kvmppc_inject_interrupt(vcpu
, BOOK3S_INTERRUPT_INST_STORAGE
,
351 * This occurs if the guest executes an illegal instruction.
352 * We just generate a program interrupt to the guest, since
353 * we don't emulate any guest instructions at this stage.
355 case BOOK3S_INTERRUPT_H_EMUL_ASSIST
:
356 kvmppc_core_queue_program(vcpu
, 0x80000);
360 kvmppc_dump_regs(vcpu
);
361 printk(KERN_EMERG
"trap=0x%x | pc=0x%lx | msr=0x%llx\n",
362 vcpu
->arch
.trap
, kvmppc_get_pc(vcpu
),
363 vcpu
->arch
.shregs
.msr
);
370 if (!(r
& RESUME_HOST
)) {
371 /* To avoid clobbering exit_reason, only check for signals if
372 * we aren't already exiting to userspace for some other
374 if (signal_pending(tsk
)) {
375 vcpu
->stat
.signal_exits
++;
376 run
->exit_reason
= KVM_EXIT_INTR
;
379 kvmppc_core_deliver_interrupts(vcpu
);
386 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu
*vcpu
,
387 struct kvm_sregs
*sregs
)
391 sregs
->pvr
= vcpu
->arch
.pvr
;
393 memset(sregs
, 0, sizeof(struct kvm_sregs
));
394 for (i
= 0; i
< vcpu
->arch
.slb_max
; i
++) {
395 sregs
->u
.s
.ppc64
.slb
[i
].slbe
= vcpu
->arch
.slb
[i
].orige
;
396 sregs
->u
.s
.ppc64
.slb
[i
].slbv
= vcpu
->arch
.slb
[i
].origv
;
402 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu
*vcpu
,
403 struct kvm_sregs
*sregs
)
407 kvmppc_set_pvr(vcpu
, sregs
->pvr
);
410 for (i
= 0; i
< vcpu
->arch
.slb_nr
; i
++) {
411 if (sregs
->u
.s
.ppc64
.slb
[i
].slbe
& SLB_ESID_V
) {
412 vcpu
->arch
.slb
[j
].orige
= sregs
->u
.s
.ppc64
.slb
[i
].slbe
;
413 vcpu
->arch
.slb
[j
].origv
= sregs
->u
.s
.ppc64
.slb
[i
].slbv
;
417 vcpu
->arch
.slb_max
= j
;
422 int kvmppc_core_check_processor_compat(void)
424 if (cpu_has_feature(CPU_FTR_HVMODE_206
))
429 struct kvm_vcpu
*kvmppc_core_vcpu_create(struct kvm
*kvm
, unsigned int id
)
431 struct kvm_vcpu
*vcpu
;
435 vcpu
= kzalloc(sizeof(struct kvm_vcpu
), GFP_KERNEL
);
439 err
= kvm_vcpu_init(vcpu
, kvm
, id
);
443 vcpu
->arch
.shared
= &vcpu
->arch
.shregs
;
444 vcpu
->arch
.last_cpu
= -1;
445 vcpu
->arch
.mmcr
[0] = MMCR0_FC
;
446 vcpu
->arch
.ctrl
= CTRL_RUNLATCH
;
447 /* default to host PVR, since we can't spoof it */
448 vcpu
->arch
.pvr
= mfspr(SPRN_PVR
);
449 kvmppc_set_pvr(vcpu
, vcpu
->arch
.pvr
);
451 lpcr
= kvm
->arch
.host_lpcr
& (LPCR_PECE
| LPCR_LPES
);
452 lpcr
|= LPCR_VPM0
| LPCR_VRMA_L
| (4UL << LPCR_DPFD_SH
) | LPCR_HDICE
;
453 vcpu
->arch
.lpcr
= lpcr
;
455 kvmppc_mmu_book3s_hv_init(vcpu
);
465 void kvmppc_core_vcpu_free(struct kvm_vcpu
*vcpu
)
467 kvm_vcpu_uninit(vcpu
);
471 extern int __kvmppc_vcore_entry(struct kvm_run
*kvm_run
, struct kvm_vcpu
*vcpu
);
473 static int kvmppc_run_vcpu(struct kvm_run
*run
, struct kvm_vcpu
*vcpu
)
477 if (signal_pending(current
)) {
478 run
->exit_reason
= KVM_EXIT_INTR
;
482 flush_fp_to_thread(current
);
483 flush_altivec_to_thread(current
);
484 flush_vsx_to_thread(current
);
488 * Make sure we are running on thread 0, and that
489 * secondary threads are offline.
490 * XXX we should also block attempts to bring any
491 * secondary threads online.
493 if (threads_per_core
> 1) {
494 int cpu
= smp_processor_id();
495 int thr
= cpu_thread_in_core(cpu
);
499 while (++thr
< threads_per_core
)
500 if (cpu_online(cpu
+ thr
))
506 __kvmppc_vcore_entry(NULL
, vcpu
);
514 /* cancel pending dec exception if dec is positive */
515 if (now
< vcpu
->arch
.dec_expires
&& kvmppc_core_pending_dec(vcpu
))
516 kvmppc_core_dequeue_dec(vcpu
);
518 return kvmppc_handle_exit(run
, vcpu
, current
);
525 int kvmppc_vcpu_run(struct kvm_run
*run
, struct kvm_vcpu
*vcpu
)
530 r
= kvmppc_run_vcpu(run
, vcpu
);
532 if (run
->exit_reason
== KVM_EXIT_PAPR_HCALL
&&
533 !(vcpu
->arch
.shregs
.msr
& MSR_PR
)) {
534 r
= kvmppc_pseries_do_hcall(vcpu
);
535 kvmppc_core_deliver_interrupts(vcpu
);
537 } while (r
== RESUME_GUEST
);
541 static long kvmppc_stt_npages(unsigned long window_size
)
543 return ALIGN((window_size
>> SPAPR_TCE_SHIFT
)
544 * sizeof(u64
), PAGE_SIZE
) / PAGE_SIZE
;
547 static void release_spapr_tce_table(struct kvmppc_spapr_tce_table
*stt
)
549 struct kvm
*kvm
= stt
->kvm
;
552 mutex_lock(&kvm
->lock
);
553 list_del(&stt
->list
);
554 for (i
= 0; i
< kvmppc_stt_npages(stt
->window_size
); i
++)
555 __free_page(stt
->pages
[i
]);
557 mutex_unlock(&kvm
->lock
);
562 static int kvm_spapr_tce_fault(struct vm_area_struct
*vma
, struct vm_fault
*vmf
)
564 struct kvmppc_spapr_tce_table
*stt
= vma
->vm_file
->private_data
;
567 if (vmf
->pgoff
>= kvmppc_stt_npages(stt
->window_size
))
568 return VM_FAULT_SIGBUS
;
570 page
= stt
->pages
[vmf
->pgoff
];
576 static const struct vm_operations_struct kvm_spapr_tce_vm_ops
= {
577 .fault
= kvm_spapr_tce_fault
,
580 static int kvm_spapr_tce_mmap(struct file
*file
, struct vm_area_struct
*vma
)
582 vma
->vm_ops
= &kvm_spapr_tce_vm_ops
;
586 static int kvm_spapr_tce_release(struct inode
*inode
, struct file
*filp
)
588 struct kvmppc_spapr_tce_table
*stt
= filp
->private_data
;
590 release_spapr_tce_table(stt
);
594 static struct file_operations kvm_spapr_tce_fops
= {
595 .mmap
= kvm_spapr_tce_mmap
,
596 .release
= kvm_spapr_tce_release
,
599 long kvm_vm_ioctl_create_spapr_tce(struct kvm
*kvm
,
600 struct kvm_create_spapr_tce
*args
)
602 struct kvmppc_spapr_tce_table
*stt
= NULL
;
607 /* Check this LIOBN hasn't been previously allocated */
608 list_for_each_entry(stt
, &kvm
->arch
.spapr_tce_tables
, list
) {
609 if (stt
->liobn
== args
->liobn
)
613 npages
= kvmppc_stt_npages(args
->window_size
);
615 stt
= kzalloc(sizeof(*stt
) + npages
* sizeof(struct page
*),
620 stt
->liobn
= args
->liobn
;
621 stt
->window_size
= args
->window_size
;
624 for (i
= 0; i
< npages
; i
++) {
625 stt
->pages
[i
] = alloc_page(GFP_KERNEL
| __GFP_ZERO
);
632 mutex_lock(&kvm
->lock
);
633 list_add(&stt
->list
, &kvm
->arch
.spapr_tce_tables
);
635 mutex_unlock(&kvm
->lock
);
637 return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops
,
642 for (i
= 0; i
< npages
; i
++)
644 __free_page(stt
->pages
[i
]);
651 int kvmppc_core_prepare_memory_region(struct kvm
*kvm
,
652 struct kvm_userspace_memory_region
*mem
)
654 if (mem
->guest_phys_addr
== 0 && mem
->memory_size
!= 0)
655 return kvmppc_prepare_vrma(kvm
, mem
);
659 void kvmppc_core_commit_memory_region(struct kvm
*kvm
,
660 struct kvm_userspace_memory_region
*mem
)
662 if (mem
->guest_phys_addr
== 0 && mem
->memory_size
!= 0)
663 kvmppc_map_vrma(kvm
, mem
);
666 int kvmppc_core_init_vm(struct kvm
*kvm
)
670 /* Allocate hashed page table */
671 r
= kvmppc_alloc_hpt(kvm
);
675 INIT_LIST_HEAD(&kvm
->arch
.spapr_tce_tables
);
679 void kvmppc_core_destroy_vm(struct kvm
*kvm
)
681 kvmppc_free_hpt(kvm
);
682 WARN_ON(!list_empty(&kvm
->arch
.spapr_tce_tables
));
685 /* These are stubs for now */
686 void kvmppc_mmu_pte_pflush(struct kvm_vcpu
*vcpu
, ulong pa_start
, ulong pa_end
)
690 /* We don't need to emulate any privileged instructions or dcbz */
691 int kvmppc_core_emulate_op(struct kvm_run
*run
, struct kvm_vcpu
*vcpu
,
692 unsigned int inst
, int *advance
)
697 int kvmppc_core_emulate_mtspr(struct kvm_vcpu
*vcpu
, int sprn
, int rs
)
702 int kvmppc_core_emulate_mfspr(struct kvm_vcpu
*vcpu
, int sprn
, int rt
)
707 static int kvmppc_book3s_hv_init(void)
711 r
= kvm_init(NULL
, sizeof(struct kvm_vcpu
), 0, THIS_MODULE
);
716 r
= kvmppc_mmu_hv_init();
721 static void kvmppc_book3s_hv_exit(void)
726 module_init(kvmppc_book3s_hv_init
);
727 module_exit(kvmppc_book3s_hv_exit
);