block/copy-before-write: relax permission requirements when no parents
[qemu.git] / target / ppc / mmu-radix64.c
blob5b0e62e676dc75d310d67bc58c9965d02e5cfd37
1 /*
2 * PowerPC Radix MMU mulation helpers for QEMU.
4 * Copyright (c) 2016 Suraj Jitindar Singh, IBM Corporation
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
21 #include "cpu.h"
22 #include "exec/exec-all.h"
23 #include "qemu/error-report.h"
24 #include "sysemu/kvm.h"
25 #include "kvm_ppc.h"
26 #include "exec/log.h"
27 #include "internal.h"
28 #include "mmu-radix64.h"
29 #include "mmu-book3s-v3.h"
31 static bool ppc_radix64_get_fully_qualified_addr(const CPUPPCState *env,
32 vaddr eaddr,
33 uint64_t *lpid, uint64_t *pid)
35 if (msr_hv) { /* MSR[HV] -> Hypervisor/bare metal */
36 switch (eaddr & R_EADDR_QUADRANT) {
37 case R_EADDR_QUADRANT0:
38 *lpid = 0;
39 *pid = env->spr[SPR_BOOKS_PID];
40 break;
41 case R_EADDR_QUADRANT1:
42 *lpid = env->spr[SPR_LPIDR];
43 *pid = env->spr[SPR_BOOKS_PID];
44 break;
45 case R_EADDR_QUADRANT2:
46 *lpid = env->spr[SPR_LPIDR];
47 *pid = 0;
48 break;
49 case R_EADDR_QUADRANT3:
50 *lpid = 0;
51 *pid = 0;
52 break;
53 default:
54 g_assert_not_reached();
56 } else { /* !MSR[HV] -> Guest */
57 switch (eaddr & R_EADDR_QUADRANT) {
58 case R_EADDR_QUADRANT0: /* Guest application */
59 *lpid = env->spr[SPR_LPIDR];
60 *pid = env->spr[SPR_BOOKS_PID];
61 break;
62 case R_EADDR_QUADRANT1: /* Illegal */
63 case R_EADDR_QUADRANT2:
64 return false;
65 case R_EADDR_QUADRANT3: /* Guest OS */
66 *lpid = env->spr[SPR_LPIDR];
67 *pid = 0; /* pid set to 0 -> addresses guest operating system */
68 break;
69 default:
70 g_assert_not_reached();
74 return true;
77 static void ppc_radix64_raise_segi(PowerPCCPU *cpu, MMUAccessType access_type,
78 vaddr eaddr)
80 CPUState *cs = CPU(cpu);
81 CPUPPCState *env = &cpu->env;
83 switch (access_type) {
84 case MMU_INST_FETCH:
85 /* Instruction Segment Interrupt */
86 cs->exception_index = POWERPC_EXCP_ISEG;
87 break;
88 case MMU_DATA_STORE:
89 case MMU_DATA_LOAD:
90 /* Data Segment Interrupt */
91 cs->exception_index = POWERPC_EXCP_DSEG;
92 env->spr[SPR_DAR] = eaddr;
93 break;
94 default:
95 g_assert_not_reached();
97 env->error_code = 0;
100 static void ppc_radix64_raise_si(PowerPCCPU *cpu, MMUAccessType access_type,
101 vaddr eaddr, uint32_t cause)
103 CPUState *cs = CPU(cpu);
104 CPUPPCState *env = &cpu->env;
106 switch (access_type) {
107 case MMU_INST_FETCH:
108 /* Instruction Storage Interrupt */
109 cs->exception_index = POWERPC_EXCP_ISI;
110 env->error_code = cause;
111 break;
112 case MMU_DATA_STORE:
113 cause |= DSISR_ISSTORE;
114 /* fall through */
115 case MMU_DATA_LOAD:
116 /* Data Storage Interrupt */
117 cs->exception_index = POWERPC_EXCP_DSI;
118 env->spr[SPR_DSISR] = cause;
119 env->spr[SPR_DAR] = eaddr;
120 env->error_code = 0;
121 break;
122 default:
123 g_assert_not_reached();
127 static void ppc_radix64_raise_hsi(PowerPCCPU *cpu, MMUAccessType access_type,
128 vaddr eaddr, hwaddr g_raddr, uint32_t cause)
130 CPUState *cs = CPU(cpu);
131 CPUPPCState *env = &cpu->env;
133 switch (access_type) {
134 case MMU_INST_FETCH:
135 /* H Instruction Storage Interrupt */
136 cs->exception_index = POWERPC_EXCP_HISI;
137 env->spr[SPR_ASDR] = g_raddr;
138 env->error_code = cause;
139 break;
140 case MMU_DATA_STORE:
141 cause |= DSISR_ISSTORE;
142 /* fall through */
143 case MMU_DATA_LOAD:
144 /* H Data Storage Interrupt */
145 cs->exception_index = POWERPC_EXCP_HDSI;
146 env->spr[SPR_HDSISR] = cause;
147 env->spr[SPR_HDAR] = eaddr;
148 env->spr[SPR_ASDR] = g_raddr;
149 env->error_code = 0;
150 break;
151 default:
152 g_assert_not_reached();
156 static bool ppc_radix64_check_prot(PowerPCCPU *cpu, MMUAccessType access_type,
157 uint64_t pte, int *fault_cause, int *prot,
158 int mmu_idx, bool partition_scoped)
160 CPUPPCState *env = &cpu->env;
161 int need_prot;
163 /* Check Page Attributes (pte58:59) */
164 if ((pte & R_PTE_ATT) == R_PTE_ATT_NI_IO && access_type == MMU_INST_FETCH) {
166 * Radix PTE entries with the non-idempotent I/O attribute are treated
167 * as guarded storage
169 *fault_cause |= SRR1_NOEXEC_GUARD;
170 return true;
173 /* Determine permissions allowed by Encoded Access Authority */
174 if (!partition_scoped && (pte & R_PTE_EAA_PRIV) && msr_pr) {
175 *prot = 0;
176 } else if (mmuidx_pr(mmu_idx) || (pte & R_PTE_EAA_PRIV) ||
177 partition_scoped) {
178 *prot = ppc_radix64_get_prot_eaa(pte);
179 } else { /* !msr_pr && !(pte & R_PTE_EAA_PRIV) && !partition_scoped */
180 *prot = ppc_radix64_get_prot_eaa(pte);
181 *prot &= ppc_radix64_get_prot_amr(cpu); /* Least combined permissions */
184 /* Check if requested access type is allowed */
185 need_prot = prot_for_access_type(access_type);
186 if (need_prot & ~*prot) { /* Page Protected for that Access */
187 *fault_cause |= DSISR_PROTFAULT;
188 return true;
191 return false;
194 static void ppc_radix64_set_rc(PowerPCCPU *cpu, MMUAccessType access_type,
195 uint64_t pte, hwaddr pte_addr, int *prot)
197 CPUState *cs = CPU(cpu);
198 uint64_t npte;
200 npte = pte | R_PTE_R; /* Always set reference bit */
202 if (access_type == MMU_DATA_STORE) { /* Store/Write */
203 npte |= R_PTE_C; /* Set change bit */
204 } else {
206 * Treat the page as read-only for now, so that a later write
207 * will pass through this function again to set the C bit.
209 *prot &= ~PAGE_WRITE;
212 if (pte ^ npte) { /* If pte has changed then write it back */
213 stq_phys(cs->as, pte_addr, npte);
217 static int ppc_radix64_next_level(AddressSpace *as, vaddr eaddr,
218 uint64_t *pte_addr, uint64_t *nls,
219 int *psize, uint64_t *pte, int *fault_cause)
221 uint64_t index, pde;
223 if (*nls < 5) { /* Directory maps less than 2**5 entries */
224 *fault_cause |= DSISR_R_BADCONFIG;
225 return 1;
228 /* Read page <directory/table> entry from guest address space */
229 pde = ldq_phys(as, *pte_addr);
230 if (!(pde & R_PTE_VALID)) { /* Invalid Entry */
231 *fault_cause |= DSISR_NOPTE;
232 return 1;
235 *pte = pde;
236 *psize -= *nls;
237 if (!(pde & R_PTE_LEAF)) { /* Prepare for next iteration */
238 *nls = pde & R_PDE_NLS;
239 index = eaddr >> (*psize - *nls); /* Shift */
240 index &= ((1UL << *nls) - 1); /* Mask */
241 *pte_addr = (pde & R_PDE_NLB) + (index * sizeof(pde));
243 return 0;
246 static int ppc_radix64_walk_tree(AddressSpace *as, vaddr eaddr,
247 uint64_t base_addr, uint64_t nls,
248 hwaddr *raddr, int *psize, uint64_t *pte,
249 int *fault_cause, hwaddr *pte_addr)
251 uint64_t index, pde, rpn , mask;
253 if (nls < 5) { /* Directory maps less than 2**5 entries */
254 *fault_cause |= DSISR_R_BADCONFIG;
255 return 1;
258 index = eaddr >> (*psize - nls); /* Shift */
259 index &= ((1UL << nls) - 1); /* Mask */
260 *pte_addr = base_addr + (index * sizeof(pde));
261 do {
262 int ret;
264 ret = ppc_radix64_next_level(as, eaddr, pte_addr, &nls, psize, &pde,
265 fault_cause);
266 if (ret) {
267 return ret;
269 } while (!(pde & R_PTE_LEAF));
271 *pte = pde;
272 rpn = pde & R_PTE_RPN;
273 mask = (1UL << *psize) - 1;
275 /* Or high bits of rpn and low bits to ea to form whole real addr */
276 *raddr = (rpn & ~mask) | (eaddr & mask);
277 return 0;
280 static bool validate_pate(PowerPCCPU *cpu, uint64_t lpid, ppc_v3_pate_t *pate)
282 CPUPPCState *env = &cpu->env;
284 if (!(pate->dw0 & PATE0_HR)) {
285 return false;
287 if (lpid == 0 && !msr_hv) {
288 return false;
290 if ((pate->dw0 & PATE1_R_PRTS) < 5) {
291 return false;
293 /* More checks ... */
294 return true;
297 static int ppc_radix64_partition_scoped_xlate(PowerPCCPU *cpu,
298 MMUAccessType access_type,
299 vaddr eaddr, hwaddr g_raddr,
300 ppc_v3_pate_t pate,
301 hwaddr *h_raddr, int *h_prot,
302 int *h_page_size, bool pde_addr,
303 int mmu_idx, bool guest_visible)
305 int fault_cause = 0;
306 hwaddr pte_addr;
307 uint64_t pte;
309 *h_page_size = PRTBE_R_GET_RTS(pate.dw0);
310 /* No valid pte or access denied due to protection */
311 if (ppc_radix64_walk_tree(CPU(cpu)->as, g_raddr, pate.dw0 & PRTBE_R_RPDB,
312 pate.dw0 & PRTBE_R_RPDS, h_raddr, h_page_size,
313 &pte, &fault_cause, &pte_addr) ||
314 ppc_radix64_check_prot(cpu, access_type, pte,
315 &fault_cause, h_prot, mmu_idx, true)) {
316 if (pde_addr) { /* address being translated was that of a guest pde */
317 fault_cause |= DSISR_PRTABLE_FAULT;
319 if (guest_visible) {
320 ppc_radix64_raise_hsi(cpu, access_type, eaddr, g_raddr, fault_cause);
322 return 1;
325 if (guest_visible) {
326 ppc_radix64_set_rc(cpu, access_type, pte, pte_addr, h_prot);
329 return 0;
332 static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu,
333 MMUAccessType access_type,
334 vaddr eaddr, uint64_t pid,
335 ppc_v3_pate_t pate, hwaddr *g_raddr,
336 int *g_prot, int *g_page_size,
337 int mmu_idx, bool guest_visible)
339 CPUState *cs = CPU(cpu);
340 CPUPPCState *env = &cpu->env;
341 uint64_t offset, size, prtbe_addr, prtbe0, base_addr, nls, index, pte;
342 int fault_cause = 0, h_page_size, h_prot;
343 hwaddr h_raddr, pte_addr;
344 int ret;
346 /* Index Process Table by PID to Find Corresponding Process Table Entry */
347 offset = pid * sizeof(struct prtb_entry);
348 size = 1ULL << ((pate.dw1 & PATE1_R_PRTS) + 12);
349 if (offset >= size) {
350 /* offset exceeds size of the process table */
351 if (guest_visible) {
352 ppc_radix64_raise_si(cpu, access_type, eaddr, DSISR_NOPTE);
354 return 1;
356 prtbe_addr = (pate.dw1 & PATE1_R_PRTB) + offset;
358 if (cpu->vhyp) {
359 prtbe0 = ldq_phys(cs->as, prtbe_addr);
360 } else {
362 * Process table addresses are subject to partition-scoped
363 * translation
365 * On a Radix host, the partition-scoped page table for LPID=0
366 * is only used to translate the effective addresses of the
367 * process table entries.
369 ret = ppc_radix64_partition_scoped_xlate(cpu, 0, eaddr, prtbe_addr,
370 pate, &h_raddr, &h_prot,
371 &h_page_size, true,
372 /* mmu_idx is 5 because we're translating from hypervisor scope */
373 5, guest_visible);
374 if (ret) {
375 return ret;
377 prtbe0 = ldq_phys(cs->as, h_raddr);
380 /* Walk Radix Tree from Process Table Entry to Convert EA to RA */
381 *g_page_size = PRTBE_R_GET_RTS(prtbe0);
382 base_addr = prtbe0 & PRTBE_R_RPDB;
383 nls = prtbe0 & PRTBE_R_RPDS;
384 if (msr_hv || cpu->vhyp) {
386 * Can treat process table addresses as real addresses
388 ret = ppc_radix64_walk_tree(cs->as, eaddr & R_EADDR_MASK, base_addr,
389 nls, g_raddr, g_page_size, &pte,
390 &fault_cause, &pte_addr);
391 if (ret) {
392 /* No valid PTE */
393 if (guest_visible) {
394 ppc_radix64_raise_si(cpu, access_type, eaddr, fault_cause);
396 return ret;
398 } else {
399 uint64_t rpn, mask;
401 index = (eaddr & R_EADDR_MASK) >> (*g_page_size - nls); /* Shift */
402 index &= ((1UL << nls) - 1); /* Mask */
403 pte_addr = base_addr + (index * sizeof(pte));
406 * Each process table address is subject to a partition-scoped
407 * translation
409 do {
410 ret = ppc_radix64_partition_scoped_xlate(cpu, 0, eaddr, pte_addr,
411 pate, &h_raddr, &h_prot,
412 &h_page_size, true,
413 /* mmu_idx is 5 because we're translating from hypervisor scope */
414 5, guest_visible);
415 if (ret) {
416 return ret;
419 ret = ppc_radix64_next_level(cs->as, eaddr & R_EADDR_MASK, &h_raddr,
420 &nls, g_page_size, &pte, &fault_cause);
421 if (ret) {
422 /* No valid pte */
423 if (guest_visible) {
424 ppc_radix64_raise_si(cpu, access_type, eaddr, fault_cause);
426 return ret;
428 pte_addr = h_raddr;
429 } while (!(pte & R_PTE_LEAF));
431 rpn = pte & R_PTE_RPN;
432 mask = (1UL << *g_page_size) - 1;
434 /* Or high bits of rpn and low bits to ea to form whole real addr */
435 *g_raddr = (rpn & ~mask) | (eaddr & mask);
438 if (ppc_radix64_check_prot(cpu, access_type, pte, &fault_cause,
439 g_prot, mmu_idx, false)) {
440 /* Access denied due to protection */
441 if (guest_visible) {
442 ppc_radix64_raise_si(cpu, access_type, eaddr, fault_cause);
444 return 1;
447 if (guest_visible) {
448 ppc_radix64_set_rc(cpu, access_type, pte, pte_addr, g_prot);
451 return 0;
455 * Radix tree translation is a 2 steps translation process:
457 * 1. Process-scoped translation: Guest Eff Addr -> Guest Real Addr
458 * 2. Partition-scoped translation: Guest Real Addr -> Host Real Addr
460 * MSR[HV]
461 * +-------------+----------------+---------------+
462 * | | HV = 0 | HV = 1 |
463 * +-------------+----------------+---------------+
464 * | Relocation | Partition | No |
465 * | = Off | Scoped | Translation |
466 * Relocation +-------------+----------------+---------------+
467 * | Relocation | Partition & | Process |
468 * | = On | Process Scoped | Scoped |
469 * +-------------+----------------+---------------+
471 bool ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type,
472 hwaddr *raddr, int *psizep, int *protp, int mmu_idx,
473 bool guest_visible)
475 CPUPPCState *env = &cpu->env;
476 uint64_t lpid, pid;
477 ppc_v3_pate_t pate;
478 int psize, prot;
479 hwaddr g_raddr;
480 bool relocation;
482 assert(!(mmuidx_hv(mmu_idx) && cpu->vhyp));
484 relocation = !mmuidx_real(mmu_idx);
486 /* HV or virtual hypervisor Real Mode Access */
487 if (!relocation && (mmuidx_hv(mmu_idx) || cpu->vhyp)) {
488 /* In real mode top 4 effective addr bits (mostly) ignored */
489 *raddr = eaddr & 0x0FFFFFFFFFFFFFFFULL;
491 /* In HV mode, add HRMOR if top EA bit is clear */
492 if (mmuidx_hv(mmu_idx) || !env->has_hv_mode) {
493 if (!(eaddr >> 63)) {
494 *raddr |= env->spr[SPR_HRMOR];
497 *protp = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
498 *psizep = TARGET_PAGE_BITS;
499 return true;
503 * Check UPRT (we avoid the check in real mode to deal with
504 * transitional states during kexec.
506 if (guest_visible && !ppc64_use_proc_tbl(cpu)) {
507 qemu_log_mask(LOG_GUEST_ERROR,
508 "LPCR:UPRT not set in radix mode ! LPCR="
509 TARGET_FMT_lx "\n", env->spr[SPR_LPCR]);
512 /* Virtual Mode Access - get the fully qualified address */
513 if (!ppc_radix64_get_fully_qualified_addr(&cpu->env, eaddr, &lpid, &pid)) {
514 if (guest_visible) {
515 ppc_radix64_raise_segi(cpu, access_type, eaddr);
517 return false;
520 /* Get Process Table */
521 if (cpu->vhyp) {
522 PPCVirtualHypervisorClass *vhc;
523 vhc = PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
524 vhc->get_pate(cpu->vhyp, &pate);
525 } else {
526 if (!ppc64_v3_get_pate(cpu, lpid, &pate)) {
527 if (guest_visible) {
528 ppc_radix64_raise_si(cpu, access_type, eaddr, DSISR_NOPTE);
530 return false;
532 if (!validate_pate(cpu, lpid, &pate)) {
533 if (guest_visible) {
534 ppc_radix64_raise_si(cpu, access_type, eaddr, DSISR_R_BADCONFIG);
536 return false;
540 *psizep = INT_MAX;
541 *protp = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
544 * Perform process-scoped translation if relocation enabled.
546 * - Translates an effective address to a host real address in
547 * quadrants 0 and 3 when HV=1.
549 * - Translates an effective address to a guest real address.
551 if (relocation) {
552 int ret = ppc_radix64_process_scoped_xlate(cpu, access_type, eaddr, pid,
553 pate, &g_raddr, &prot,
554 &psize, mmu_idx, guest_visible);
555 if (ret) {
556 return false;
558 *psizep = MIN(*psizep, psize);
559 *protp &= prot;
560 } else {
561 g_raddr = eaddr & R_EADDR_MASK;
564 if (cpu->vhyp) {
565 *raddr = g_raddr;
566 } else {
568 * Perform partition-scoped translation if !HV or HV access to
569 * quadrants 1 or 2. Translates a guest real address to a host
570 * real address.
572 if (lpid || !mmuidx_hv(mmu_idx)) {
573 int ret;
575 ret = ppc_radix64_partition_scoped_xlate(cpu, access_type, eaddr,
576 g_raddr, pate, raddr,
577 &prot, &psize, false,
578 mmu_idx, guest_visible);
579 if (ret) {
580 return false;
582 *psizep = MIN(*psizep, psize);
583 *protp &= prot;
584 } else {
585 *raddr = g_raddr;
589 return true;