1 #include "qemu/osdep.h"
2 #include "qemu/cutils.h"
3 #include "qapi/error.h"
4 #include "sysemu/hw_accel.h"
5 #include "sysemu/runstate.h"
7 #include "qemu/main-loop.h"
8 #include "qemu/module.h"
9 #include "qemu/error-report.h"
10 #include "exec/exec-all.h"
11 #include "helper_regs.h"
12 #include "hw/ppc/ppc.h"
13 #include "hw/ppc/spapr.h"
14 #include "hw/ppc/spapr_cpu_core.h"
15 #include "mmu-hash64.h"
16 #include "cpu-models.h"
19 #include "hw/ppc/fdt.h"
20 #include "hw/ppc/spapr_ovec.h"
21 #include "hw/ppc/spapr_numa.h"
22 #include "mmu-book3s-v3.h"
23 #include "hw/mem/memory-device.h"
25 bool is_ram_address(SpaprMachineState
*spapr
, hwaddr addr
)
27 MachineState
*machine
= MACHINE(spapr
);
28 DeviceMemoryState
*dms
= machine
->device_memory
;
30 if (addr
< machine
->ram_size
) {
33 if ((addr
>= dms
->base
)
34 && ((addr
- dms
->base
) < memory_region_size(&dms
->mr
))) {
41 /* Convert a return code from the KVM ioctl()s implementing resize HPT
42 * into a PAPR hypercall return code */
43 static target_ulong
resize_hpt_convert_rc(int ret
)
46 return H_LONG_BUSY_ORDER_100_SEC
;
47 } else if (ret
>= 10000) {
48 return H_LONG_BUSY_ORDER_10_SEC
;
49 } else if (ret
>= 1000) {
50 return H_LONG_BUSY_ORDER_1_SEC
;
51 } else if (ret
>= 100) {
52 return H_LONG_BUSY_ORDER_100_MSEC
;
53 } else if (ret
>= 10) {
54 return H_LONG_BUSY_ORDER_10_MSEC
;
56 return H_LONG_BUSY_ORDER_1_MSEC
;
79 static target_ulong
h_resize_hpt_prepare(PowerPCCPU
*cpu
,
80 SpaprMachineState
*spapr
,
84 target_ulong flags
= args
[0];
86 uint64_t current_ram_size
;
89 if (spapr
->resize_hpt
== SPAPR_RESIZE_HPT_DISABLED
) {
93 if (!spapr
->htab_shift
) {
94 /* Radix guest, no HPT */
95 return H_NOT_AVAILABLE
;
98 trace_spapr_h_resize_hpt_prepare(flags
, shift
);
104 if (shift
&& ((shift
< 18) || (shift
> 46))) {
108 current_ram_size
= MACHINE(spapr
)->ram_size
+ get_plugged_memory_size();
110 /* We only allow the guest to allocate an HPT one order above what
111 * we'd normally give them (to stop a small guest claiming a huge
112 * chunk of resources in the HPT */
113 if (shift
> (spapr_hpt_shift_for_ramsize(current_ram_size
) + 1)) {
117 rc
= kvmppc_resize_hpt_prepare(cpu
, flags
, shift
);
119 return resize_hpt_convert_rc(rc
);
126 return softmmu_resize_hpt_prepare(cpu
, spapr
, shift
);
129 static void do_push_sregs_to_kvm_pr(CPUState
*cs
, run_on_cpu_data data
)
133 cpu_synchronize_state(cs
);
135 ret
= kvmppc_put_books_sregs(POWERPC_CPU(cs
));
137 error_report("failed to push sregs to KVM: %s", strerror(-ret
));
142 void push_sregs_to_kvm_pr(SpaprMachineState
*spapr
)
147 * This is a hack for the benefit of KVM PR - it abuses the SDR1
148 * slot in kvm_sregs to communicate the userspace address of the
151 if (!kvm_enabled() || !spapr
->htab
) {
156 run_on_cpu(cs
, do_push_sregs_to_kvm_pr
, RUN_ON_CPU_NULL
);
160 static target_ulong
h_resize_hpt_commit(PowerPCCPU
*cpu
,
161 SpaprMachineState
*spapr
,
165 target_ulong flags
= args
[0];
166 target_ulong shift
= args
[1];
169 if (spapr
->resize_hpt
== SPAPR_RESIZE_HPT_DISABLED
) {
173 if (!spapr
->htab_shift
) {
174 /* Radix guest, no HPT */
175 return H_NOT_AVAILABLE
;
178 trace_spapr_h_resize_hpt_commit(flags
, shift
);
180 rc
= kvmppc_resize_hpt_commit(cpu
, flags
, shift
);
182 rc
= resize_hpt_convert_rc(rc
);
183 if (rc
== H_SUCCESS
) {
184 /* Need to set the new htab_shift in the machine state */
185 spapr
->htab_shift
= shift
;
194 return softmmu_resize_hpt_commit(cpu
, spapr
, flags
, shift
);
199 static target_ulong
h_set_sprg0(PowerPCCPU
*cpu
, SpaprMachineState
*spapr
,
200 target_ulong opcode
, target_ulong
*args
)
202 cpu_synchronize_state(CPU(cpu
));
203 cpu
->env
.spr
[SPR_SPRG0
] = args
[0];
208 static target_ulong
h_set_dabr(PowerPCCPU
*cpu
, SpaprMachineState
*spapr
,
209 target_ulong opcode
, target_ulong
*args
)
211 if (!ppc_has_spr(cpu
, SPR_DABR
)) {
212 return H_HARDWARE
; /* DABR register not available */
214 cpu_synchronize_state(CPU(cpu
));
216 if (ppc_has_spr(cpu
, SPR_DABRX
)) {
217 cpu
->env
.spr
[SPR_DABRX
] = 0x3; /* Use Problem and Privileged state */
218 } else if (!(args
[0] & 0x4)) { /* Breakpoint Translation set? */
219 return H_RESERVED_DABR
;
222 cpu
->env
.spr
[SPR_DABR
] = args
[0];
226 static target_ulong
h_set_xdabr(PowerPCCPU
*cpu
, SpaprMachineState
*spapr
,
227 target_ulong opcode
, target_ulong
*args
)
229 target_ulong dabrx
= args
[1];
231 if (!ppc_has_spr(cpu
, SPR_DABR
) || !ppc_has_spr(cpu
, SPR_DABRX
)) {
235 if ((dabrx
& ~0xfULL
) != 0 || (dabrx
& H_DABRX_HYPERVISOR
) != 0
236 || (dabrx
& (H_DABRX_KERNEL
| H_DABRX_USER
)) == 0) {
240 cpu_synchronize_state(CPU(cpu
));
241 cpu
->env
.spr
[SPR_DABRX
] = dabrx
;
242 cpu
->env
.spr
[SPR_DABR
] = args
[0];
247 static target_ulong
h_page_init(PowerPCCPU
*cpu
, SpaprMachineState
*spapr
,
248 target_ulong opcode
, target_ulong
*args
)
250 target_ulong flags
= args
[0];
251 hwaddr dst
= args
[1];
252 hwaddr src
= args
[2];
253 hwaddr len
= TARGET_PAGE_SIZE
;
254 uint8_t *pdst
, *psrc
;
255 target_long ret
= H_SUCCESS
;
257 if (flags
& ~(H_ICACHE_SYNCHRONIZE
| H_ICACHE_INVALIDATE
258 | H_COPY_PAGE
| H_ZERO_PAGE
)) {
259 qemu_log_mask(LOG_UNIMP
, "h_page_init: Bad flags (" TARGET_FMT_lx
"\n",
264 /* Map-in destination */
265 if (!is_ram_address(spapr
, dst
) || (dst
& ~TARGET_PAGE_MASK
) != 0) {
268 pdst
= cpu_physical_memory_map(dst
, &len
, true);
269 if (!pdst
|| len
!= TARGET_PAGE_SIZE
) {
273 if (flags
& H_COPY_PAGE
) {
274 /* Map-in source, copy to destination, and unmap source again */
275 if (!is_ram_address(spapr
, src
) || (src
& ~TARGET_PAGE_MASK
) != 0) {
279 psrc
= cpu_physical_memory_map(src
, &len
, false);
280 if (!psrc
|| len
!= TARGET_PAGE_SIZE
) {
284 memcpy(pdst
, psrc
, len
);
285 cpu_physical_memory_unmap(psrc
, len
, 0, len
);
286 } else if (flags
& H_ZERO_PAGE
) {
287 memset(pdst
, 0, len
); /* Just clear the destination page */
290 if (kvm_enabled() && (flags
& H_ICACHE_SYNCHRONIZE
) != 0) {
291 kvmppc_dcbst_range(cpu
, pdst
, len
);
293 if (flags
& (H_ICACHE_SYNCHRONIZE
| H_ICACHE_INVALIDATE
)) {
295 kvmppc_icbi_range(cpu
, pdst
, len
);
302 cpu_physical_memory_unmap(pdst
, TARGET_PAGE_SIZE
, 1, len
);
306 #define FLAGS_REGISTER_VPA 0x0000200000000000ULL
307 #define FLAGS_REGISTER_DTL 0x0000400000000000ULL
308 #define FLAGS_REGISTER_SLBSHADOW 0x0000600000000000ULL
309 #define FLAGS_DEREGISTER_VPA 0x0000a00000000000ULL
310 #define FLAGS_DEREGISTER_DTL 0x0000c00000000000ULL
311 #define FLAGS_DEREGISTER_SLBSHADOW 0x0000e00000000000ULL
313 static target_ulong
register_vpa(PowerPCCPU
*cpu
, target_ulong vpa
)
315 CPUState
*cs
= CPU(cpu
);
316 CPUPPCState
*env
= &cpu
->env
;
317 SpaprCpuState
*spapr_cpu
= spapr_cpu_state(cpu
);
322 hcall_dprintf("Can't cope with registering a VPA at logical 0\n");
326 if (vpa
% env
->dcache_line_size
) {
329 /* FIXME: bounds check the address */
331 size
= lduw_be_phys(cs
->as
, vpa
+ 0x4);
333 if (size
< VPA_MIN_SIZE
) {
337 /* VPA is not allowed to cross a page boundary */
338 if ((vpa
/ 4096) != ((vpa
+ size
- 1) / 4096)) {
342 spapr_cpu
->vpa_addr
= vpa
;
344 tmp
= ldub_phys(cs
->as
, spapr_cpu
->vpa_addr
+ VPA_SHARED_PROC_OFFSET
);
345 tmp
|= VPA_SHARED_PROC_VAL
;
346 stb_phys(cs
->as
, spapr_cpu
->vpa_addr
+ VPA_SHARED_PROC_OFFSET
, tmp
);
351 static target_ulong
deregister_vpa(PowerPCCPU
*cpu
, target_ulong vpa
)
353 SpaprCpuState
*spapr_cpu
= spapr_cpu_state(cpu
);
355 if (spapr_cpu
->slb_shadow_addr
) {
359 if (spapr_cpu
->dtl_addr
) {
363 spapr_cpu
->vpa_addr
= 0;
367 static target_ulong
register_slb_shadow(PowerPCCPU
*cpu
, target_ulong addr
)
369 SpaprCpuState
*spapr_cpu
= spapr_cpu_state(cpu
);
373 hcall_dprintf("Can't cope with SLB shadow at logical 0\n");
377 size
= ldl_be_phys(CPU(cpu
)->as
, addr
+ 0x4);
382 if ((addr
/ 4096) != ((addr
+ size
- 1) / 4096)) {
386 if (!spapr_cpu
->vpa_addr
) {
390 spapr_cpu
->slb_shadow_addr
= addr
;
391 spapr_cpu
->slb_shadow_size
= size
;
396 static target_ulong
deregister_slb_shadow(PowerPCCPU
*cpu
, target_ulong addr
)
398 SpaprCpuState
*spapr_cpu
= spapr_cpu_state(cpu
);
400 spapr_cpu
->slb_shadow_addr
= 0;
401 spapr_cpu
->slb_shadow_size
= 0;
405 static target_ulong
register_dtl(PowerPCCPU
*cpu
, target_ulong addr
)
407 SpaprCpuState
*spapr_cpu
= spapr_cpu_state(cpu
);
411 hcall_dprintf("Can't cope with DTL at logical 0\n");
415 size
= ldl_be_phys(CPU(cpu
)->as
, addr
+ 0x4);
421 if (!spapr_cpu
->vpa_addr
) {
425 spapr_cpu
->dtl_addr
= addr
;
426 spapr_cpu
->dtl_size
= size
;
431 static target_ulong
deregister_dtl(PowerPCCPU
*cpu
, target_ulong addr
)
433 SpaprCpuState
*spapr_cpu
= spapr_cpu_state(cpu
);
435 spapr_cpu
->dtl_addr
= 0;
436 spapr_cpu
->dtl_size
= 0;
441 static target_ulong
h_register_vpa(PowerPCCPU
*cpu
, SpaprMachineState
*spapr
,
442 target_ulong opcode
, target_ulong
*args
)
444 target_ulong flags
= args
[0];
445 target_ulong procno
= args
[1];
446 target_ulong vpa
= args
[2];
447 target_ulong ret
= H_PARAMETER
;
450 tcpu
= spapr_find_cpu(procno
);
456 case FLAGS_REGISTER_VPA
:
457 ret
= register_vpa(tcpu
, vpa
);
460 case FLAGS_DEREGISTER_VPA
:
461 ret
= deregister_vpa(tcpu
, vpa
);
464 case FLAGS_REGISTER_SLBSHADOW
:
465 ret
= register_slb_shadow(tcpu
, vpa
);
468 case FLAGS_DEREGISTER_SLBSHADOW
:
469 ret
= deregister_slb_shadow(tcpu
, vpa
);
472 case FLAGS_REGISTER_DTL
:
473 ret
= register_dtl(tcpu
, vpa
);
476 case FLAGS_DEREGISTER_DTL
:
477 ret
= deregister_dtl(tcpu
, vpa
);
484 static target_ulong
h_cede(PowerPCCPU
*cpu
, SpaprMachineState
*spapr
,
485 target_ulong opcode
, target_ulong
*args
)
487 CPUPPCState
*env
= &cpu
->env
;
488 CPUState
*cs
= CPU(cpu
);
489 SpaprCpuState
*spapr_cpu
= spapr_cpu_state(cpu
);
491 env
->msr
|= (1ULL << MSR_EE
);
492 hreg_compute_hflags(env
);
493 ppc_maybe_interrupt(env
);
495 if (spapr_cpu
->prod
) {
496 spapr_cpu
->prod
= false;
500 if (!cpu_has_work(cs
)) {
502 cs
->exception_index
= EXCP_HLT
;
503 cs
->exit_request
= 1;
504 ppc_maybe_interrupt(env
);
511 * Confer to self, aka join. Cede could use the same pattern as well, if
512 * EXCP_HLT can be changed to ECXP_HALTED.
514 static target_ulong
h_confer_self(PowerPCCPU
*cpu
)
516 CPUState
*cs
= CPU(cpu
);
517 SpaprCpuState
*spapr_cpu
= spapr_cpu_state(cpu
);
519 if (spapr_cpu
->prod
) {
520 spapr_cpu
->prod
= false;
524 cs
->exception_index
= EXCP_HALTED
;
525 cs
->exit_request
= 1;
526 ppc_maybe_interrupt(&cpu
->env
);
531 static target_ulong
h_join(PowerPCCPU
*cpu
, SpaprMachineState
*spapr
,
532 target_ulong opcode
, target_ulong
*args
)
534 CPUPPCState
*env
= &cpu
->env
;
536 bool last_unjoined
= true;
538 if (env
->msr
& (1ULL << MSR_EE
)) {
543 * Must not join the last CPU running. Interestingly, no such restriction
544 * for H_CONFER-to-self, but that is probably not intended to be used
545 * when H_JOIN is available.
548 PowerPCCPU
*c
= POWERPC_CPU(cs
);
549 CPUPPCState
*e
= &c
->env
;
554 /* Don't have a way to indicate joined, so use halted && MSR[EE]=0 */
555 if (!cs
->halted
|| (e
->msr
& (1ULL << MSR_EE
))) {
556 last_unjoined
= false;
564 return h_confer_self(cpu
);
567 static target_ulong
h_confer(PowerPCCPU
*cpu
, SpaprMachineState
*spapr
,
568 target_ulong opcode
, target_ulong
*args
)
570 target_long target
= args
[0];
571 uint32_t dispatch
= args
[1];
572 CPUState
*cs
= CPU(cpu
);
573 SpaprCpuState
*spapr_cpu
;
576 * -1 means confer to all other CPUs without dispatch counter check,
577 * otherwise it's a targeted confer.
580 PowerPCCPU
*target_cpu
= spapr_find_cpu(target
);
581 uint32_t target_dispatch
;
588 * target == self is a special case, we wait until prodded, without
589 * dispatch counter check.
591 if (cpu
== target_cpu
) {
592 return h_confer_self(cpu
);
595 spapr_cpu
= spapr_cpu_state(target_cpu
);
596 if (!spapr_cpu
->vpa_addr
|| ((dispatch
& 1) == 0)) {
600 target_dispatch
= ldl_be_phys(cs
->as
,
601 spapr_cpu
->vpa_addr
+ VPA_DISPATCH_COUNTER
);
602 if (target_dispatch
!= dispatch
) {
607 * The targeted confer does not do anything special beyond yielding
608 * the current vCPU, but even this should be better than nothing.
609 * At least for single-threaded tcg, it gives the target a chance to
610 * run before we run again. Multi-threaded tcg does not really do
611 * anything with EXCP_YIELD yet.
615 cs
->exception_index
= EXCP_YIELD
;
616 cs
->exit_request
= 1;
622 static target_ulong
h_prod(PowerPCCPU
*cpu
, SpaprMachineState
*spapr
,
623 target_ulong opcode
, target_ulong
*args
)
625 target_long target
= args
[0];
628 SpaprCpuState
*spapr_cpu
;
630 tcpu
= spapr_find_cpu(target
);
636 spapr_cpu
= spapr_cpu_state(tcpu
);
637 spapr_cpu
->prod
= true;
639 ppc_maybe_interrupt(&cpu
->env
);
645 static target_ulong
h_rtas(PowerPCCPU
*cpu
, SpaprMachineState
*spapr
,
646 target_ulong opcode
, target_ulong
*args
)
648 target_ulong rtas_r3
= args
[0];
649 uint32_t token
= rtas_ld(rtas_r3
, 0);
650 uint32_t nargs
= rtas_ld(rtas_r3
, 1);
651 uint32_t nret
= rtas_ld(rtas_r3
, 2);
653 return spapr_rtas_call(cpu
, spapr
, token
, nargs
, rtas_r3
+ 12,
654 nret
, rtas_r3
+ 12 + 4*nargs
);
657 static target_ulong
h_logical_load(PowerPCCPU
*cpu
, SpaprMachineState
*spapr
,
658 target_ulong opcode
, target_ulong
*args
)
660 CPUState
*cs
= CPU(cpu
);
661 target_ulong size
= args
[0];
662 target_ulong addr
= args
[1];
666 args
[0] = ldub_phys(cs
->as
, addr
);
669 args
[0] = lduw_phys(cs
->as
, addr
);
672 args
[0] = ldl_phys(cs
->as
, addr
);
675 args
[0] = ldq_phys(cs
->as
, addr
);
681 static target_ulong
h_logical_store(PowerPCCPU
*cpu
, SpaprMachineState
*spapr
,
682 target_ulong opcode
, target_ulong
*args
)
684 CPUState
*cs
= CPU(cpu
);
686 target_ulong size
= args
[0];
687 target_ulong addr
= args
[1];
688 target_ulong val
= args
[2];
692 stb_phys(cs
->as
, addr
, val
);
695 stw_phys(cs
->as
, addr
, val
);
698 stl_phys(cs
->as
, addr
, val
);
701 stq_phys(cs
->as
, addr
, val
);
707 static target_ulong
h_logical_memop(PowerPCCPU
*cpu
, SpaprMachineState
*spapr
,
708 target_ulong opcode
, target_ulong
*args
)
710 CPUState
*cs
= CPU(cpu
);
712 target_ulong dst
= args
[0]; /* Destination address */
713 target_ulong src
= args
[1]; /* Source address */
714 target_ulong esize
= args
[2]; /* Element size (0=1,1=2,2=4,3=8) */
715 target_ulong count
= args
[3]; /* Element count */
716 target_ulong op
= args
[4]; /* 0 = copy, 1 = invert */
718 unsigned int mask
= (1 << esize
) - 1;
719 int step
= 1 << esize
;
721 if (count
> 0x80000000) {
725 if ((dst
& mask
) || (src
& mask
) || (op
> 1)) {
729 if (dst
>= src
&& dst
< (src
+ (count
<< esize
))) {
730 dst
= dst
+ ((count
- 1) << esize
);
731 src
= src
+ ((count
- 1) << esize
);
738 tmp
= ldub_phys(cs
->as
, src
);
741 tmp
= lduw_phys(cs
->as
, src
);
744 tmp
= ldl_phys(cs
->as
, src
);
747 tmp
= ldq_phys(cs
->as
, src
);
757 stb_phys(cs
->as
, dst
, tmp
);
760 stw_phys(cs
->as
, dst
, tmp
);
763 stl_phys(cs
->as
, dst
, tmp
);
766 stq_phys(cs
->as
, dst
, tmp
);
776 static target_ulong
h_logical_icbi(PowerPCCPU
*cpu
, SpaprMachineState
*spapr
,
777 target_ulong opcode
, target_ulong
*args
)
779 /* Nothing to do on emulation, KVM will trap this in the kernel */
783 static target_ulong
h_logical_dcbf(PowerPCCPU
*cpu
, SpaprMachineState
*spapr
,
784 target_ulong opcode
, target_ulong
*args
)
786 /* Nothing to do on emulation, KVM will trap this in the kernel */
790 static target_ulong
h_set_mode_resource_le(PowerPCCPU
*cpu
,
791 SpaprMachineState
*spapr
,
804 case H_SET_MODE_ENDIAN_BIG
:
805 spapr_set_all_lpcrs(0, LPCR_ILE
);
806 spapr_pci_switch_vga(spapr
, true);
809 case H_SET_MODE_ENDIAN_LITTLE
:
810 spapr_set_all_lpcrs(LPCR_ILE
, LPCR_ILE
);
811 spapr_pci_switch_vga(spapr
, false);
815 return H_UNSUPPORTED_FLAG
;
818 static target_ulong
h_set_mode_resource_addr_trans_mode(PowerPCCPU
*cpu
,
823 PowerPCCPUClass
*pcc
= POWERPC_CPU_GET_CLASS(cpu
);
825 if (!(pcc
->insns_flags2
& PPC2_ISA207S
)) {
836 /* AIL=1 is reserved in POWER8/POWER9/POWER10 */
837 return H_UNSUPPORTED_FLAG
;
840 if (mflags
== 2 && (pcc
->insns_flags2
& PPC2_ISA310
)) {
841 /* AIL=2 is reserved in POWER10 (ISA v3.1) */
842 return H_UNSUPPORTED_FLAG
;
845 spapr_set_all_lpcrs(mflags
<< LPCR_AIL_SHIFT
, LPCR_AIL
);
850 static target_ulong
h_set_mode(PowerPCCPU
*cpu
, SpaprMachineState
*spapr
,
851 target_ulong opcode
, target_ulong
*args
)
853 target_ulong resource
= args
[1];
854 target_ulong ret
= H_P2
;
857 case H_SET_MODE_RESOURCE_LE
:
858 ret
= h_set_mode_resource_le(cpu
, spapr
, args
[0], args
[2], args
[3]);
860 case H_SET_MODE_RESOURCE_ADDR_TRANS_MODE
:
861 ret
= h_set_mode_resource_addr_trans_mode(cpu
, args
[0],
869 static target_ulong
h_clean_slb(PowerPCCPU
*cpu
, SpaprMachineState
*spapr
,
870 target_ulong opcode
, target_ulong
*args
)
872 qemu_log_mask(LOG_UNIMP
, "Unimplemented SPAPR hcall 0x"TARGET_FMT_lx
"%s\n",
873 opcode
, " (H_CLEAN_SLB)");
877 static target_ulong
h_invalidate_pid(PowerPCCPU
*cpu
, SpaprMachineState
*spapr
,
878 target_ulong opcode
, target_ulong
*args
)
880 qemu_log_mask(LOG_UNIMP
, "Unimplemented SPAPR hcall 0x"TARGET_FMT_lx
"%s\n",
881 opcode
, " (H_INVALIDATE_PID)");
885 static void spapr_check_setup_free_hpt(SpaprMachineState
*spapr
,
886 uint64_t patbe_old
, uint64_t patbe_new
)
890 * HASH->HASH || RADIX->RADIX || NOTHING->RADIX : Do Nothing
891 * HASH->RADIX : Free HPT
892 * RADIX->HASH : Allocate HPT
893 * NOTHING->HASH : Allocate HPT
894 * Note: NOTHING implies the case where we said the guest could choose
895 * later and so assumed radix and now it's called H_REG_PROC_TBL
898 if ((patbe_old
& PATE1_GR
) == (patbe_new
& PATE1_GR
)) {
899 /* We assume RADIX, so this catches all the "Do Nothing" cases */
900 } else if (!(patbe_old
& PATE1_GR
)) {
901 /* HASH->RADIX : Free HPT */
902 spapr_free_hpt(spapr
);
903 } else if (!(patbe_new
& PATE1_GR
)) {
904 /* RADIX->HASH || NOTHING->HASH : Allocate HPT */
905 spapr_setup_hpt(spapr
);
910 #define FLAGS_MASK 0x01FULL
911 #define FLAG_MODIFY 0x10
912 #define FLAG_REGISTER 0x08
913 #define FLAG_RADIX 0x04
914 #define FLAG_HASH_PROC_TBL 0x02
915 #define FLAG_GTSE 0x01
917 static target_ulong
h_register_process_table(PowerPCCPU
*cpu
,
918 SpaprMachineState
*spapr
,
922 target_ulong flags
= args
[0];
923 target_ulong proc_tbl
= args
[1];
924 target_ulong page_size
= args
[2];
925 target_ulong table_size
= args
[3];
926 target_ulong update_lpcr
= 0;
927 target_ulong table_byte_size
;
930 if (flags
& ~FLAGS_MASK
) { /* Check no reserved bits are set */
933 if (flags
& FLAG_MODIFY
) {
934 if (flags
& FLAG_REGISTER
) {
935 /* Check process table alignment */
936 table_byte_size
= 1ULL << (table_size
+ 12);
937 if (proc_tbl
& (table_byte_size
- 1)) {
938 qemu_log_mask(LOG_GUEST_ERROR
,
939 "%s: process table not properly aligned: proc_tbl 0x"
940 TARGET_FMT_lx
" proc_tbl_size 0x"TARGET_FMT_lx
"\n",
941 __func__
, proc_tbl
, table_byte_size
);
943 if (flags
& FLAG_RADIX
) { /* Register new RADIX process table */
944 if (proc_tbl
& 0xfff || proc_tbl
>> 60) {
946 } else if (page_size
) {
948 } else if (table_size
> 24) {
951 cproc
= PATE1_GR
| proc_tbl
| table_size
;
952 } else { /* Register new HPT process table */
953 if (flags
& FLAG_HASH_PROC_TBL
) { /* Hash with Segment Tables */
954 /* TODO - Not Supported */
955 /* Technically caused by flag bits => H_PARAMETER */
957 } else { /* Hash with SLB */
958 if (proc_tbl
>> 38) {
960 } else if (page_size
& ~0x7) {
962 } else if (table_size
> 24) {
966 cproc
= (proc_tbl
<< 25) | page_size
<< 5 | table_size
;
969 } else { /* Deregister current process table */
971 * Set to benign value: (current GR) | 0. This allows
972 * deregistration in KVM to succeed even if the radix bit
973 * in flags doesn't match the radix bit in the old PATE.
975 cproc
= spapr
->patb_entry
& PATE1_GR
;
977 } else { /* Maintain current registration */
978 if (!(flags
& FLAG_RADIX
) != !(spapr
->patb_entry
& PATE1_GR
)) {
979 /* Technically caused by flag bits => H_PARAMETER */
980 return H_PARAMETER
; /* Existing Process Table Mismatch */
982 cproc
= spapr
->patb_entry
;
985 /* Check if we need to setup OR free the hpt */
986 spapr_check_setup_free_hpt(spapr
, spapr
->patb_entry
, cproc
);
988 spapr
->patb_entry
= cproc
; /* Save new process table */
990 /* Update the UPRT, HR and GTSE bits in the LPCR for all cpus */
991 if (flags
& FLAG_RADIX
) /* Radix must use process tables, also set HR */
992 update_lpcr
|= (LPCR_UPRT
| LPCR_HR
);
993 else if (flags
& FLAG_HASH_PROC_TBL
) /* Hash with process tables */
994 update_lpcr
|= LPCR_UPRT
;
995 if (flags
& FLAG_GTSE
) /* Guest translation shootdown enable */
996 update_lpcr
|= LPCR_GTSE
;
998 spapr_set_all_lpcrs(update_lpcr
, LPCR_UPRT
| LPCR_HR
| LPCR_GTSE
);
1000 if (kvm_enabled()) {
1001 return kvmppc_configure_v3_mmu(cpu
, flags
& FLAG_RADIX
,
1002 flags
& FLAG_GTSE
, cproc
);
1007 #define H_SIGNAL_SYS_RESET_ALL -1
1008 #define H_SIGNAL_SYS_RESET_ALLBUTSELF -2
1010 static target_ulong
h_signal_sys_reset(PowerPCCPU
*cpu
,
1011 SpaprMachineState
*spapr
,
1012 target_ulong opcode
, target_ulong
*args
)
1014 target_long target
= args
[0];
1019 if (target
< H_SIGNAL_SYS_RESET_ALLBUTSELF
) {
1024 PowerPCCPU
*c
= POWERPC_CPU(cs
);
1026 if (target
== H_SIGNAL_SYS_RESET_ALLBUTSELF
) {
1031 run_on_cpu(cs
, spapr_do_system_reset_on_cpu
, RUN_ON_CPU_NULL
);
1037 cs
= CPU(spapr_find_cpu(target
));
1039 run_on_cpu(cs
, spapr_do_system_reset_on_cpu
, RUN_ON_CPU_NULL
);
1046 /* Returns either a logical PVR or zero if none was found */
1047 static uint32_t cas_check_pvr(PowerPCCPU
*cpu
, uint32_t max_compat
,
1048 target_ulong
*addr
, bool *raw_mode_supported
)
1050 bool explicit_match
= false; /* Matched the CPU's real PVR */
1051 uint32_t best_compat
= 0;
1055 * We scan the supplied table of PVRs looking for two things
1056 * 1. Is our real CPU PVR in the list?
1057 * 2. What's the "best" listed logical PVR
1059 for (i
= 0; i
< 512; ++i
) {
1060 uint32_t pvr
, pvr_mask
;
1062 pvr_mask
= ldl_be_phys(&address_space_memory
, *addr
);
1063 pvr
= ldl_be_phys(&address_space_memory
, *addr
+ 4);
1066 if (~pvr_mask
& pvr
) {
1067 break; /* Terminator record */
1070 if ((cpu
->env
.spr
[SPR_PVR
] & pvr_mask
) == (pvr
& pvr_mask
)) {
1071 explicit_match
= true;
1073 if (ppc_check_compat(cpu
, pvr
, best_compat
, max_compat
)) {
1079 *raw_mode_supported
= explicit_match
;
1081 /* Parsing finished */
1082 trace_spapr_cas_pvr(cpu
->compat_pvr
, explicit_match
, best_compat
);
1088 target_ulong
do_client_architecture_support(PowerPCCPU
*cpu
,
1089 SpaprMachineState
*spapr
,
1091 target_ulong fdt_bufsize
)
1093 target_ulong ov_table
; /* Working address in data buffer */
1095 SpaprOptionVector
*ov1_guest
, *ov5_guest
;
1097 bool raw_mode_supported
= false;
1101 uint32_t max_compat
= spapr
->max_compat_pvr
;
1103 /* CAS is supposed to be called early when only the boot vCPU is active. */
1105 if (cs
== CPU(cpu
)) {
1109 warn_report("guest has multiple active vCPUs at CAS, which is not allowed");
1110 return H_MULTI_THREADS_ACTIVE
;
1114 cas_pvr
= cas_check_pvr(cpu
, max_compat
, &vec
, &raw_mode_supported
);
1115 if (!cas_pvr
&& (!raw_mode_supported
|| max_compat
)) {
1117 * We couldn't find a suitable compatibility mode, and either
1118 * the guest doesn't support "raw" mode for this CPU, or "raw"
1119 * mode is disabled because a maximum compat mode is set.
1121 error_report("Couldn't negotiate a suitable PVR during CAS");
1126 if (cpu
->compat_pvr
!= cas_pvr
) {
1127 Error
*local_err
= NULL
;
1129 if (ppc_set_compat_all(cas_pvr
, &local_err
) < 0) {
1130 /* We fail to set compat mode (likely because running with KVM PR),
1131 * but maybe we can fallback to raw mode if the guest supports it.
1133 if (!raw_mode_supported
) {
1134 error_report_err(local_err
);
1137 error_free(local_err
);
1141 /* For the future use: here @ov_table points to the first option vector */
1144 ov1_guest
= spapr_ovec_parse_vector(ov_table
, 1);
1146 warn_report("guest didn't provide option vector 1");
1149 ov5_guest
= spapr_ovec_parse_vector(ov_table
, 5);
1151 spapr_ovec_cleanup(ov1_guest
);
1152 warn_report("guest didn't provide option vector 5");
1155 if (spapr_ovec_test(ov5_guest
, OV5_MMU_BOTH
)) {
1156 error_report("guest requested hash and radix MMU, which is invalid.");
1159 if (spapr_ovec_test(ov5_guest
, OV5_XIVE_BOTH
)) {
1160 error_report("guest requested an invalid interrupt mode");
1164 guest_radix
= spapr_ovec_test(ov5_guest
, OV5_MMU_RADIX_300
);
1166 guest_xive
= spapr_ovec_test(ov5_guest
, OV5_XIVE_EXPLOIT
);
1169 * HPT resizing is a bit of a special case, because when enabled
1170 * we assume an HPT guest will support it until it says it
1171 * doesn't, instead of assuming it won't support it until it says
1172 * it does. Strictly speaking that approach could break for
1173 * guests which don't make a CAS call, but those are so old we
1174 * don't care about them. Without that assumption we'd have to
1175 * make at least a temporary allocation of an HPT sized for max
1176 * memory, which could be impossibly difficult under KVM HV if
1179 if (!guest_radix
&& !spapr_ovec_test(ov5_guest
, OV5_HPT_RESIZE
)) {
1180 int maxshift
= spapr_hpt_shift_for_ramsize(MACHINE(spapr
)->maxram_size
);
1182 if (spapr
->resize_hpt
== SPAPR_RESIZE_HPT_REQUIRED
) {
1184 "h_client_architecture_support: Guest doesn't support HPT resizing, but resize-hpt=required");
1188 if (spapr
->htab_shift
< maxshift
) {
1189 /* Guest doesn't know about HPT resizing, so we
1190 * pre-emptively resize for the maximum permitted RAM. At
1191 * the point this is called, nothing should have been
1192 * entered into the existing HPT */
1193 spapr_reallocate_hpt(spapr
, maxshift
, &error_fatal
);
1194 push_sregs_to_kvm_pr(spapr
);
1198 /* NOTE: there are actually a number of ov5 bits where input from the
1199 * guest is always zero, and the platform/QEMU enables them independently
1200 * of guest input. To model these properly we'd want some sort of mask,
1201 * but since they only currently apply to memory migration as defined
1202 * by LoPAPR 1.1, 14.5.4.8, which QEMU doesn't implement, we don't need
1203 * to worry about this for now.
1206 /* full range of negotiated ov5 capabilities */
1207 spapr_ovec_intersect(spapr
->ov5_cas
, spapr
->ov5
, ov5_guest
);
1208 spapr_ovec_cleanup(ov5_guest
);
1210 spapr_check_mmu_mode(guest_radix
);
1212 spapr
->cas_pre_isa3_guest
= !spapr_ovec_test(ov1_guest
, OV1_PPC_3_00
);
1213 spapr_ovec_cleanup(ov1_guest
);
1216 * Check for NUMA affinity conditions now that we know which NUMA
1217 * affinity the guest will use.
1219 spapr_numa_associativity_check(spapr
);
1222 * Ensure the guest asks for an interrupt mode we support;
1223 * otherwise terminate the boot.
1226 if (!spapr
->irq
->xive
) {
1228 "Guest requested unavailable interrupt mode (XIVE), try the ic-mode=xive or ic-mode=dual machine property");
1232 if (!spapr
->irq
->xics
) {
1234 "Guest requested unavailable interrupt mode (XICS), either don't set the ic-mode machine property or try ic-mode=xics or ic-mode=dual");
1239 spapr_irq_update_active_intc(spapr
);
1242 * Process all pending hot-plug/unplug requests now. An updated full
1243 * rendered FDT will be returned to the guest.
1245 spapr_drc_reset_all(spapr
);
1246 spapr_clear_pending_hotplug_events(spapr
);
1249 * If spapr_machine_reset() did not set up a HPT but one is necessary
1250 * (because the guest isn't going to use radix) then set it up here.
1252 if ((spapr
->patb_entry
& PATE1_GR
) && !guest_radix
) {
1253 /* legacy hash or new hash: */
1254 spapr_setup_hpt(spapr
);
1257 fdt
= spapr_build_fdt(spapr
, spapr
->vof
!= NULL
, fdt_bufsize
);
1258 g_free(spapr
->fdt_blob
);
1259 spapr
->fdt_size
= fdt_totalsize(fdt
);
1260 spapr
->fdt_initial_size
= spapr
->fdt_size
;
1261 spapr
->fdt_blob
= fdt
;
1264 * Set the machine->fdt pointer again since we just freed
1265 * it above (by freeing spapr->fdt_blob). We set this
1266 * pointer to enable support for the 'dumpdtb' QMP/HMP
1269 MACHINE(spapr
)->fdt
= fdt
;
1274 static target_ulong
h_client_architecture_support(PowerPCCPU
*cpu
,
1275 SpaprMachineState
*spapr
,
1276 target_ulong opcode
,
1279 target_ulong vec
= ppc64_phys_to_real(args
[0]);
1280 target_ulong fdt_buf
= args
[1];
1281 target_ulong fdt_bufsize
= args
[2];
1283 SpaprDeviceTreeUpdateHeader hdr
= { .version_id
= 1 };
1285 if (fdt_bufsize
< sizeof(hdr
)) {
1286 error_report("SLOF provided insufficient CAS buffer "
1287 TARGET_FMT_lu
" (min: %zu)", fdt_bufsize
, sizeof(hdr
));
1291 fdt_bufsize
-= sizeof(hdr
);
1293 ret
= do_client_architecture_support(cpu
, spapr
, vec
, fdt_bufsize
);
1294 if (ret
== H_SUCCESS
) {
1295 _FDT((fdt_pack(spapr
->fdt_blob
)));
1296 spapr
->fdt_size
= fdt_totalsize(spapr
->fdt_blob
);
1297 spapr
->fdt_initial_size
= spapr
->fdt_size
;
1299 cpu_physical_memory_write(fdt_buf
, &hdr
, sizeof(hdr
));
1300 cpu_physical_memory_write(fdt_buf
+ sizeof(hdr
), spapr
->fdt_blob
,
1302 trace_spapr_cas_continue(spapr
->fdt_size
+ sizeof(hdr
));
1308 target_ulong
spapr_vof_client_architecture_support(MachineState
*ms
,
1310 target_ulong ovec_addr
)
1312 SpaprMachineState
*spapr
= SPAPR_MACHINE(ms
);
1314 target_ulong ret
= do_client_architecture_support(POWERPC_CPU(cs
), spapr
,
1315 ovec_addr
, FDT_MAX_SIZE
);
1318 * This adds stdout and generates phandles for boottime and CAS FDTs.
1319 * It is alright to update the FDT here as do_client_architecture_support()
1322 spapr_vof_client_dt_finalize(spapr
, spapr
->fdt_blob
);
1327 static target_ulong
h_get_cpu_characteristics(PowerPCCPU
*cpu
,
1328 SpaprMachineState
*spapr
,
1329 target_ulong opcode
,
1332 uint64_t characteristics
= H_CPU_CHAR_HON_BRANCH_HINTS
&
1333 ~H_CPU_CHAR_THR_RECONF_TRIG
;
1334 uint64_t behaviour
= H_CPU_BEHAV_FAVOUR_SECURITY
;
1335 uint8_t safe_cache
= spapr_get_cap(spapr
, SPAPR_CAP_CFPC
);
1336 uint8_t safe_bounds_check
= spapr_get_cap(spapr
, SPAPR_CAP_SBBC
);
1337 uint8_t safe_indirect_branch
= spapr_get_cap(spapr
, SPAPR_CAP_IBS
);
1338 uint8_t count_cache_flush_assist
= spapr_get_cap(spapr
,
1339 SPAPR_CAP_CCF_ASSIST
);
1341 switch (safe_cache
) {
1342 case SPAPR_CAP_WORKAROUND
:
1343 characteristics
|= H_CPU_CHAR_L1D_FLUSH_ORI30
;
1344 characteristics
|= H_CPU_CHAR_L1D_FLUSH_TRIG2
;
1345 characteristics
|= H_CPU_CHAR_L1D_THREAD_PRIV
;
1346 behaviour
|= H_CPU_BEHAV_L1D_FLUSH_PR
;
1348 case SPAPR_CAP_FIXED
:
1349 behaviour
|= H_CPU_BEHAV_NO_L1D_FLUSH_ENTRY
;
1350 behaviour
|= H_CPU_BEHAV_NO_L1D_FLUSH_UACCESS
;
1352 default: /* broken */
1353 assert(safe_cache
== SPAPR_CAP_BROKEN
);
1354 behaviour
|= H_CPU_BEHAV_L1D_FLUSH_PR
;
1358 switch (safe_bounds_check
) {
1359 case SPAPR_CAP_WORKAROUND
:
1360 characteristics
|= H_CPU_CHAR_SPEC_BAR_ORI31
;
1361 behaviour
|= H_CPU_BEHAV_BNDS_CHK_SPEC_BAR
;
1363 case SPAPR_CAP_FIXED
:
1365 default: /* broken */
1366 assert(safe_bounds_check
== SPAPR_CAP_BROKEN
);
1367 behaviour
|= H_CPU_BEHAV_BNDS_CHK_SPEC_BAR
;
1371 switch (safe_indirect_branch
) {
1372 case SPAPR_CAP_FIXED_NA
:
1374 case SPAPR_CAP_FIXED_CCD
:
1375 characteristics
|= H_CPU_CHAR_CACHE_COUNT_DIS
;
1377 case SPAPR_CAP_FIXED_IBS
:
1378 characteristics
|= H_CPU_CHAR_BCCTRL_SERIALISED
;
1380 case SPAPR_CAP_WORKAROUND
:
1381 behaviour
|= H_CPU_BEHAV_FLUSH_COUNT_CACHE
;
1382 if (count_cache_flush_assist
) {
1383 characteristics
|= H_CPU_CHAR_BCCTR_FLUSH_ASSIST
;
1386 default: /* broken */
1387 assert(safe_indirect_branch
== SPAPR_CAP_BROKEN
);
1391 args
[0] = characteristics
;
1392 args
[1] = behaviour
;
1396 static target_ulong
h_update_dt(PowerPCCPU
*cpu
, SpaprMachineState
*spapr
,
1397 target_ulong opcode
, target_ulong
*args
)
1399 target_ulong dt
= ppc64_phys_to_real(args
[0]);
1400 struct fdt_header hdr
= { 0 };
1402 SpaprMachineClass
*smc
= SPAPR_MACHINE_GET_CLASS(spapr
);
1405 cpu_physical_memory_read(dt
, &hdr
, sizeof(hdr
));
1406 cb
= fdt32_to_cpu(hdr
.totalsize
);
1408 if (!smc
->update_dt_enabled
) {
1412 /* Check that the fdt did not grow out of proportion */
1413 if (cb
> spapr
->fdt_initial_size
* 2) {
1414 trace_spapr_update_dt_failed_size(spapr
->fdt_initial_size
, cb
,
1415 fdt32_to_cpu(hdr
.magic
));
1419 fdt
= g_malloc0(cb
);
1420 cpu_physical_memory_read(dt
, fdt
, cb
);
1422 /* Check the fdt consistency */
1423 if (fdt_check_full(fdt
, cb
)) {
1424 trace_spapr_update_dt_failed_check(spapr
->fdt_initial_size
, cb
,
1425 fdt32_to_cpu(hdr
.magic
));
1429 g_free(spapr
->fdt_blob
);
1430 spapr
->fdt_size
= cb
;
1431 spapr
->fdt_blob
= fdt
;
1432 trace_spapr_update_dt(cb
);
1437 static spapr_hcall_fn papr_hypercall_table
[(MAX_HCALL_OPCODE
/ 4) + 1];
1438 static spapr_hcall_fn kvmppc_hypercall_table
[KVMPPC_HCALL_MAX
- KVMPPC_HCALL_BASE
+ 1];
1439 static spapr_hcall_fn svm_hypercall_table
[(SVM_HCALL_MAX
- SVM_HCALL_BASE
) / 4 + 1];
1441 void spapr_register_hypercall(target_ulong opcode
, spapr_hcall_fn fn
)
1443 spapr_hcall_fn
*slot
;
1445 if (opcode
<= MAX_HCALL_OPCODE
) {
1446 assert((opcode
& 0x3) == 0);
1448 slot
= &papr_hypercall_table
[opcode
/ 4];
1449 } else if (opcode
>= SVM_HCALL_BASE
&& opcode
<= SVM_HCALL_MAX
) {
1450 /* we only have SVM-related hcall numbers assigned in multiples of 4 */
1451 assert((opcode
& 0x3) == 0);
1453 slot
= &svm_hypercall_table
[(opcode
- SVM_HCALL_BASE
) / 4];
1455 assert((opcode
>= KVMPPC_HCALL_BASE
) && (opcode
<= KVMPPC_HCALL_MAX
));
1457 slot
= &kvmppc_hypercall_table
[opcode
- KVMPPC_HCALL_BASE
];
1464 target_ulong
spapr_hypercall(PowerPCCPU
*cpu
, target_ulong opcode
,
1467 SpaprMachineState
*spapr
= SPAPR_MACHINE(qdev_get_machine());
1469 if ((opcode
<= MAX_HCALL_OPCODE
)
1470 && ((opcode
& 0x3) == 0)) {
1471 spapr_hcall_fn fn
= papr_hypercall_table
[opcode
/ 4];
1474 return fn(cpu
, spapr
, opcode
, args
);
1476 } else if ((opcode
>= SVM_HCALL_BASE
) &&
1477 (opcode
<= SVM_HCALL_MAX
)) {
1478 spapr_hcall_fn fn
= svm_hypercall_table
[(opcode
- SVM_HCALL_BASE
) / 4];
1481 return fn(cpu
, spapr
, opcode
, args
);
1483 } else if ((opcode
>= KVMPPC_HCALL_BASE
) &&
1484 (opcode
<= KVMPPC_HCALL_MAX
)) {
1485 spapr_hcall_fn fn
= kvmppc_hypercall_table
[opcode
- KVMPPC_HCALL_BASE
];
1488 return fn(cpu
, spapr
, opcode
, args
);
1492 qemu_log_mask(LOG_UNIMP
, "Unimplemented SPAPR hcall 0x" TARGET_FMT_lx
"\n",
1498 #define PRTS_MASK 0x1f
1500 static target_ulong
h_set_ptbl(PowerPCCPU
*cpu
,
1501 SpaprMachineState
*spapr
,
1502 target_ulong opcode
,
1505 target_ulong ptcr
= args
[0];
1507 if (!spapr_get_cap(spapr
, SPAPR_CAP_NESTED_KVM_HV
)) {
1511 if ((ptcr
& PRTS_MASK
) + 12 - 4 > 12) {
1515 spapr
->nested_ptcr
= ptcr
; /* Save new partition table */
1520 static target_ulong
h_tlb_invalidate(PowerPCCPU
*cpu
,
1521 SpaprMachineState
*spapr
,
1522 target_ulong opcode
,
1526 * The spapr virtual hypervisor nested HV implementation retains no L2
1527 * translation state except for TLB. And the TLB is always invalidated
1528 * across L1<->L2 transitions, so nothing is required here.
1534 static target_ulong
h_copy_tofrom_guest(PowerPCCPU
*cpu
,
1535 SpaprMachineState
*spapr
,
1536 target_ulong opcode
,
1540 * This HCALL is not required, L1 KVM will take a slow path and walk the
1541 * page tables manually to do the data copy.
1547 * When this handler returns, the environment is switched to the L2 guest
1548 * and TCG begins running that. spapr_exit_nested() performs the switch from
1549 * L2 back to L1 and returns from the H_ENTER_NESTED hcall.
1551 static target_ulong
h_enter_nested(PowerPCCPU
*cpu
,
1552 SpaprMachineState
*spapr
,
1553 target_ulong opcode
,
1556 PowerPCCPUClass
*pcc
= POWERPC_CPU_GET_CLASS(cpu
);
1557 CPUState
*cs
= CPU(cpu
);
1558 CPUPPCState
*env
= &cpu
->env
;
1559 SpaprCpuState
*spapr_cpu
= spapr_cpu_state(cpu
);
1560 target_ulong hv_ptr
= args
[0];
1561 target_ulong regs_ptr
= args
[1];
1562 target_ulong hdec
, now
= cpu_ppc_load_tbl(env
);
1563 target_ulong lpcr
, lpcr_mask
;
1564 struct kvmppc_hv_guest_state
*hvstate
;
1565 struct kvmppc_hv_guest_state hv_state
;
1566 struct kvmppc_pt_regs
*regs
;
1571 if (spapr
->nested_ptcr
== 0) {
1572 return H_NOT_AVAILABLE
;
1575 len
= sizeof(*hvstate
);
1576 hvstate
= address_space_map(CPU(cpu
)->as
, hv_ptr
, &len
, false,
1577 MEMTXATTRS_UNSPECIFIED
);
1578 if (len
!= sizeof(*hvstate
)) {
1579 address_space_unmap(CPU(cpu
)->as
, hvstate
, len
, 0, false);
1583 memcpy(&hv_state
, hvstate
, len
);
1585 address_space_unmap(CPU(cpu
)->as
, hvstate
, len
, len
, false);
1588 * We accept versions 1 and 2. Version 2 fields are unused because TCG
1589 * does not implement DAWR*.
1591 if (hv_state
.version
> HV_GUEST_STATE_VERSION
) {
1595 spapr_cpu
->nested_host_state
= g_try_new(CPUPPCState
, 1);
1596 if (!spapr_cpu
->nested_host_state
) {
1600 memcpy(spapr_cpu
->nested_host_state
, env
, sizeof(CPUPPCState
));
1602 len
= sizeof(*regs
);
1603 regs
= address_space_map(CPU(cpu
)->as
, regs_ptr
, &len
, false,
1604 MEMTXATTRS_UNSPECIFIED
);
1605 if (!regs
|| len
!= sizeof(*regs
)) {
1606 address_space_unmap(CPU(cpu
)->as
, regs
, len
, 0, false);
1607 g_free(spapr_cpu
->nested_host_state
);
1611 len
= sizeof(env
->gpr
);
1612 assert(len
== sizeof(regs
->gpr
));
1613 memcpy(env
->gpr
, regs
->gpr
, len
);
1615 env
->lr
= regs
->link
;
1616 env
->ctr
= regs
->ctr
;
1617 cpu_write_xer(env
, regs
->xer
);
1620 for (i
= 7; i
>= 0; i
--) {
1621 env
->crf
[i
] = cr
& 15;
1625 env
->msr
= regs
->msr
;
1626 env
->nip
= regs
->nip
;
1628 address_space_unmap(CPU(cpu
)->as
, regs
, len
, len
, false);
1630 env
->cfar
= hv_state
.cfar
;
1632 assert(env
->spr
[SPR_LPIDR
] == 0);
1633 env
->spr
[SPR_LPIDR
] = hv_state
.lpid
;
1635 lpcr_mask
= LPCR_DPFD
| LPCR_ILE
| LPCR_AIL
| LPCR_LD
| LPCR_MER
;
1636 lpcr
= (env
->spr
[SPR_LPCR
] & ~lpcr_mask
) | (hv_state
.lpcr
& lpcr_mask
);
1637 lpcr
|= LPCR_HR
| LPCR_UPRT
| LPCR_GTSE
| LPCR_HVICE
| LPCR_HDICE
;
1638 lpcr
&= ~LPCR_LPES0
;
1639 env
->spr
[SPR_LPCR
] = lpcr
& pcc
->lpcr_mask
;
1641 env
->spr
[SPR_PCR
] = hv_state
.pcr
;
1642 /* hv_state.amor is not used */
1643 env
->spr
[SPR_DPDES
] = hv_state
.dpdes
;
1644 env
->spr
[SPR_HFSCR
] = hv_state
.hfscr
;
1645 hdec
= hv_state
.hdec_expiry
- now
;
1646 spapr_cpu
->nested_tb_offset
= hv_state
.tb_offset
;
1647 /* TCG does not implement DAWR*, CIABR, PURR, SPURR, IC, VTB, HEIR SPRs*/
1648 env
->spr
[SPR_SRR0
] = hv_state
.srr0
;
1649 env
->spr
[SPR_SRR1
] = hv_state
.srr1
;
1650 env
->spr
[SPR_SPRG0
] = hv_state
.sprg
[0];
1651 env
->spr
[SPR_SPRG1
] = hv_state
.sprg
[1];
1652 env
->spr
[SPR_SPRG2
] = hv_state
.sprg
[2];
1653 env
->spr
[SPR_SPRG3
] = hv_state
.sprg
[3];
1654 env
->spr
[SPR_BOOKS_PID
] = hv_state
.pidr
;
1655 env
->spr
[SPR_PPR
] = hv_state
.ppr
;
1657 cpu_ppc_hdecr_init(env
);
1658 cpu_ppc_store_hdecr(env
, hdec
);
1661 * The hv_state.vcpu_token is not needed. It is used by the KVM
1662 * implementation to remember which L2 vCPU last ran on which physical
1663 * CPU so as to invalidate process scope translations if it is moved
1664 * between physical CPUs. For now TLBs are always flushed on L1<->L2
1665 * transitions so this is not a problem.
1667 * Could validate that the same vcpu_token does not attempt to run on
1668 * different L1 vCPUs at the same time, but that would be a L1 KVM bug
1669 * and it's not obviously worth a new data structure to do it.
1672 env
->tb_env
->tb_offset
+= spapr_cpu
->nested_tb_offset
;
1673 spapr_cpu
->in_nested
= true;
1675 hreg_compute_hflags(env
);
1676 ppc_maybe_interrupt(env
);
1678 env
->reserve_addr
= -1; /* Reset the reservation */
1681 * The spapr hcall helper sets env->gpr[3] to the return value, but at
1682 * this point the L1 is not returning from the hcall but rather we
1683 * start running the L2, so r3 must not be clobbered, so return env->gpr[3]
1684 * to leave it unchanged.
1689 void spapr_exit_nested(PowerPCCPU
*cpu
, int excp
)
1691 CPUState
*cs
= CPU(cpu
);
1692 CPUPPCState
*env
= &cpu
->env
;
1693 SpaprCpuState
*spapr_cpu
= spapr_cpu_state(cpu
);
1694 target_ulong r3_return
= env
->excp_vectors
[excp
]; /* hcall return value */
1695 target_ulong hv_ptr
= spapr_cpu
->nested_host_state
->gpr
[4];
1696 target_ulong regs_ptr
= spapr_cpu
->nested_host_state
->gpr
[5];
1697 struct kvmppc_hv_guest_state
*hvstate
;
1698 struct kvmppc_pt_regs
*regs
;
1703 assert(spapr_cpu
->in_nested
);
1705 cpu_ppc_hdecr_exit(env
);
1707 len
= sizeof(*hvstate
);
1708 hvstate
= address_space_map(CPU(cpu
)->as
, hv_ptr
, &len
, true,
1709 MEMTXATTRS_UNSPECIFIED
);
1710 if (len
!= sizeof(*hvstate
)) {
1711 address_space_unmap(CPU(cpu
)->as
, hvstate
, len
, 0, true);
1712 r3_return
= H_PARAMETER
;
1713 goto out_restore_l1
;
1716 hvstate
->cfar
= env
->cfar
;
1717 hvstate
->lpcr
= env
->spr
[SPR_LPCR
];
1718 hvstate
->pcr
= env
->spr
[SPR_PCR
];
1719 hvstate
->dpdes
= env
->spr
[SPR_DPDES
];
1720 hvstate
->hfscr
= env
->spr
[SPR_HFSCR
];
1722 if (excp
== POWERPC_EXCP_HDSI
) {
1723 hvstate
->hdar
= env
->spr
[SPR_HDAR
];
1724 hvstate
->hdsisr
= env
->spr
[SPR_HDSISR
];
1725 hvstate
->asdr
= env
->spr
[SPR_ASDR
];
1726 } else if (excp
== POWERPC_EXCP_HISI
) {
1727 hvstate
->asdr
= env
->spr
[SPR_ASDR
];
1730 /* HEIR should be implemented for HV mode and saved here. */
1731 hvstate
->srr0
= env
->spr
[SPR_SRR0
];
1732 hvstate
->srr1
= env
->spr
[SPR_SRR1
];
1733 hvstate
->sprg
[0] = env
->spr
[SPR_SPRG0
];
1734 hvstate
->sprg
[1] = env
->spr
[SPR_SPRG1
];
1735 hvstate
->sprg
[2] = env
->spr
[SPR_SPRG2
];
1736 hvstate
->sprg
[3] = env
->spr
[SPR_SPRG3
];
1737 hvstate
->pidr
= env
->spr
[SPR_BOOKS_PID
];
1738 hvstate
->ppr
= env
->spr
[SPR_PPR
];
1740 /* Is it okay to specify write length larger than actual data written? */
1741 address_space_unmap(CPU(cpu
)->as
, hvstate
, len
, len
, true);
1743 len
= sizeof(*regs
);
1744 regs
= address_space_map(CPU(cpu
)->as
, regs_ptr
, &len
, true,
1745 MEMTXATTRS_UNSPECIFIED
);
1746 if (!regs
|| len
!= sizeof(*regs
)) {
1747 address_space_unmap(CPU(cpu
)->as
, regs
, len
, 0, true);
1749 goto out_restore_l1
;
1752 len
= sizeof(env
->gpr
);
1753 assert(len
== sizeof(regs
->gpr
));
1754 memcpy(regs
->gpr
, env
->gpr
, len
);
1756 regs
->link
= env
->lr
;
1757 regs
->ctr
= env
->ctr
;
1758 regs
->xer
= cpu_read_xer(env
);
1761 for (i
= 0; i
< 8; i
++) {
1762 cr
|= (env
->crf
[i
] & 15) << (4 * (7 - i
));
1766 if (excp
== POWERPC_EXCP_MCHECK
||
1767 excp
== POWERPC_EXCP_RESET
||
1768 excp
== POWERPC_EXCP_SYSCALL
) {
1769 regs
->nip
= env
->spr
[SPR_SRR0
];
1770 regs
->msr
= env
->spr
[SPR_SRR1
] & env
->msr_mask
;
1772 regs
->nip
= env
->spr
[SPR_HSRR0
];
1773 regs
->msr
= env
->spr
[SPR_HSRR1
] & env
->msr_mask
;
1776 /* Is it okay to specify write length larger than actual data written? */
1777 address_space_unmap(CPU(cpu
)->as
, regs
, len
, len
, true);
1780 memcpy(env
->gpr
, spapr_cpu
->nested_host_state
->gpr
, sizeof(env
->gpr
));
1781 env
->lr
= spapr_cpu
->nested_host_state
->lr
;
1782 env
->ctr
= spapr_cpu
->nested_host_state
->ctr
;
1783 memcpy(env
->crf
, spapr_cpu
->nested_host_state
->crf
, sizeof(env
->crf
));
1784 env
->cfar
= spapr_cpu
->nested_host_state
->cfar
;
1785 env
->xer
= spapr_cpu
->nested_host_state
->xer
;
1786 env
->so
= spapr_cpu
->nested_host_state
->so
;
1787 env
->ov
= spapr_cpu
->nested_host_state
->ov
;
1788 env
->ov32
= spapr_cpu
->nested_host_state
->ov32
;
1789 env
->ca32
= spapr_cpu
->nested_host_state
->ca32
;
1790 env
->msr
= spapr_cpu
->nested_host_state
->msr
;
1791 env
->nip
= spapr_cpu
->nested_host_state
->nip
;
1793 assert(env
->spr
[SPR_LPIDR
] != 0);
1794 env
->spr
[SPR_LPCR
] = spapr_cpu
->nested_host_state
->spr
[SPR_LPCR
];
1795 env
->spr
[SPR_LPIDR
] = spapr_cpu
->nested_host_state
->spr
[SPR_LPIDR
];
1796 env
->spr
[SPR_PCR
] = spapr_cpu
->nested_host_state
->spr
[SPR_PCR
];
1797 env
->spr
[SPR_DPDES
] = 0;
1798 env
->spr
[SPR_HFSCR
] = spapr_cpu
->nested_host_state
->spr
[SPR_HFSCR
];
1799 env
->spr
[SPR_SRR0
] = spapr_cpu
->nested_host_state
->spr
[SPR_SRR0
];
1800 env
->spr
[SPR_SRR1
] = spapr_cpu
->nested_host_state
->spr
[SPR_SRR1
];
1801 env
->spr
[SPR_SPRG0
] = spapr_cpu
->nested_host_state
->spr
[SPR_SPRG0
];
1802 env
->spr
[SPR_SPRG1
] = spapr_cpu
->nested_host_state
->spr
[SPR_SPRG1
];
1803 env
->spr
[SPR_SPRG2
] = spapr_cpu
->nested_host_state
->spr
[SPR_SPRG2
];
1804 env
->spr
[SPR_SPRG3
] = spapr_cpu
->nested_host_state
->spr
[SPR_SPRG3
];
1805 env
->spr
[SPR_BOOKS_PID
] = spapr_cpu
->nested_host_state
->spr
[SPR_BOOKS_PID
];
1806 env
->spr
[SPR_PPR
] = spapr_cpu
->nested_host_state
->spr
[SPR_PPR
];
1809 * Return the interrupt vector address from H_ENTER_NESTED to the L1
1812 env
->gpr
[3] = r3_return
;
1814 env
->tb_env
->tb_offset
-= spapr_cpu
->nested_tb_offset
;
1815 spapr_cpu
->in_nested
= false;
1817 hreg_compute_hflags(env
);
1818 ppc_maybe_interrupt(env
);
1820 env
->reserve_addr
= -1; /* Reset the reservation */
1822 g_free(spapr_cpu
->nested_host_state
);
1823 spapr_cpu
->nested_host_state
= NULL
;
1826 static void hypercall_register_nested(void)
1828 spapr_register_hypercall(KVMPPC_H_SET_PARTITION_TABLE
, h_set_ptbl
);
1829 spapr_register_hypercall(KVMPPC_H_ENTER_NESTED
, h_enter_nested
);
1830 spapr_register_hypercall(KVMPPC_H_TLB_INVALIDATE
, h_tlb_invalidate
);
1831 spapr_register_hypercall(KVMPPC_H_COPY_TOFROM_GUEST
, h_copy_tofrom_guest
);
1834 static void hypercall_register_softmmu(void)
1839 void spapr_exit_nested(PowerPCCPU
*cpu
, int excp
)
1841 g_assert_not_reached();
1844 static target_ulong
h_softmmu(PowerPCCPU
*cpu
, SpaprMachineState
*spapr
,
1845 target_ulong opcode
, target_ulong
*args
)
1847 g_assert_not_reached();
1850 static void hypercall_register_nested(void)
1855 static void hypercall_register_softmmu(void)
1858 spapr_register_hypercall(H_ENTER
, h_softmmu
);
1859 spapr_register_hypercall(H_REMOVE
, h_softmmu
);
1860 spapr_register_hypercall(H_PROTECT
, h_softmmu
);
1861 spapr_register_hypercall(H_READ
, h_softmmu
);
1864 spapr_register_hypercall(H_BULK_REMOVE
, h_softmmu
);
1868 static void hypercall_register_types(void)
1870 hypercall_register_softmmu();
1872 /* hcall-hpt-resize */
1873 spapr_register_hypercall(H_RESIZE_HPT_PREPARE
, h_resize_hpt_prepare
);
1874 spapr_register_hypercall(H_RESIZE_HPT_COMMIT
, h_resize_hpt_commit
);
1877 spapr_register_hypercall(H_REGISTER_VPA
, h_register_vpa
);
1878 spapr_register_hypercall(H_CEDE
, h_cede
);
1879 spapr_register_hypercall(H_CONFER
, h_confer
);
1880 spapr_register_hypercall(H_PROD
, h_prod
);
1883 spapr_register_hypercall(H_JOIN
, h_join
);
1885 spapr_register_hypercall(H_SIGNAL_SYS_RESET
, h_signal_sys_reset
);
1887 /* processor register resource access h-calls */
1888 spapr_register_hypercall(H_SET_SPRG0
, h_set_sprg0
);
1889 spapr_register_hypercall(H_SET_DABR
, h_set_dabr
);
1890 spapr_register_hypercall(H_SET_XDABR
, h_set_xdabr
);
1891 spapr_register_hypercall(H_PAGE_INIT
, h_page_init
);
1892 spapr_register_hypercall(H_SET_MODE
, h_set_mode
);
1894 /* In Memory Table MMU h-calls */
1895 spapr_register_hypercall(H_CLEAN_SLB
, h_clean_slb
);
1896 spapr_register_hypercall(H_INVALIDATE_PID
, h_invalidate_pid
);
1897 spapr_register_hypercall(H_REGISTER_PROC_TBL
, h_register_process_table
);
1899 /* hcall-get-cpu-characteristics */
1900 spapr_register_hypercall(H_GET_CPU_CHARACTERISTICS
,
1901 h_get_cpu_characteristics
);
1903 /* "debugger" hcalls (also used by SLOF). Note: We do -not- differenciate
1904 * here between the "CI" and the "CACHE" variants, they will use whatever
1905 * mapping attributes qemu is using. When using KVM, the kernel will
1906 * enforce the attributes more strongly
1908 spapr_register_hypercall(H_LOGICAL_CI_LOAD
, h_logical_load
);
1909 spapr_register_hypercall(H_LOGICAL_CI_STORE
, h_logical_store
);
1910 spapr_register_hypercall(H_LOGICAL_CACHE_LOAD
, h_logical_load
);
1911 spapr_register_hypercall(H_LOGICAL_CACHE_STORE
, h_logical_store
);
1912 spapr_register_hypercall(H_LOGICAL_ICBI
, h_logical_icbi
);
1913 spapr_register_hypercall(H_LOGICAL_DCBF
, h_logical_dcbf
);
1914 spapr_register_hypercall(KVMPPC_H_LOGICAL_MEMOP
, h_logical_memop
);
1916 /* qemu/KVM-PPC specific hcalls */
1917 spapr_register_hypercall(KVMPPC_H_RTAS
, h_rtas
);
1919 /* ibm,client-architecture-support support */
1920 spapr_register_hypercall(KVMPPC_H_CAS
, h_client_architecture_support
);
1922 spapr_register_hypercall(KVMPPC_H_UPDATE_DT
, h_update_dt
);
1924 hypercall_register_nested();
1927 type_init(hypercall_register_types
)