6198 Let's EOL cachefs
[illumos-gate.git] / usr / src / uts / common / os / urw.c
blobef70ccac453a37cbd3868a52ad5b381af3b94a57
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
29 #pragma ident "%Z%%M% %I% %E% SMI"
31 #include <sys/atomic.h>
32 #include <sys/errno.h>
33 #include <sys/stat.h>
34 #include <sys/modctl.h>
35 #include <sys/conf.h>
36 #include <sys/systm.h>
37 #include <sys/ddi.h>
38 #include <sys/sunddi.h>
39 #include <sys/cpuvar.h>
40 #include <sys/kmem.h>
41 #include <sys/strsubr.h>
42 #include <sys/sysmacros.h>
43 #include <sys/frame.h>
44 #include <sys/stack.h>
45 #include <sys/proc.h>
46 #include <sys/priv.h>
47 #include <sys/policy.h>
48 #include <sys/ontrap.h>
49 #include <sys/vmsystm.h>
50 #include <sys/prsystm.h>
52 #include <vm/as.h>
53 #include <vm/seg.h>
54 #include <vm/seg_dev.h>
55 #include <vm/seg_vn.h>
56 #include <vm/seg_spt.h>
57 #include <vm/seg_kmem.h>
59 extern struct seg_ops segdev_ops; /* needs a header file */
60 extern struct seg_ops segspt_shmops; /* needs a header file */
62 static int
63 page_valid(struct seg *seg, caddr_t addr)
65 struct segvn_data *svd;
66 vnode_t *vp;
67 vattr_t vattr;
70 * Fail if the page doesn't map to a page in the underlying
71 * mapped file, if an underlying mapped file exists.
73 vattr.va_mask = AT_SIZE;
74 if (seg->s_ops == &segvn_ops &&
75 SEGOP_GETVP(seg, addr, &vp) == 0 &&
76 vp != NULL && vp->v_type == VREG &&
77 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
78 u_offset_t size = roundup(vattr.va_size, (u_offset_t)PAGESIZE);
79 u_offset_t offset = SEGOP_GETOFFSET(seg, addr);
81 if (offset >= size)
82 return (0);
86 * Fail if this is an ISM shared segment and the address is
87 * not within the real size of the spt segment that backs it.
89 if (seg->s_ops == &segspt_shmops &&
90 addr >= seg->s_base + spt_realsize(seg))
91 return (0);
94 * Fail if the segment is mapped from /dev/null.
95 * The key is that the mapping comes from segdev and the
96 * type is neither MAP_SHARED nor MAP_PRIVATE.
98 if (seg->s_ops == &segdev_ops &&
99 ((SEGOP_GETTYPE(seg, addr) & (MAP_SHARED | MAP_PRIVATE)) == 0))
100 return (0);
103 * Fail if the page is a MAP_NORESERVE page that has
104 * not actually materialized.
105 * We cheat by knowing that segvn is the only segment
106 * driver that supports MAP_NORESERVE.
108 if (seg->s_ops == &segvn_ops &&
109 (svd = (struct segvn_data *)seg->s_data) != NULL &&
110 (svd->vp == NULL || svd->vp->v_type != VREG) &&
111 (svd->flags & MAP_NORESERVE)) {
113 * Guilty knowledge here. We know that
114 * segvn_incore returns more than just the
115 * low-order bit that indicates the page is
116 * actually in memory. If any bits are set,
117 * then there is backing store for the page.
119 char incore = 0;
120 (void) SEGOP_INCORE(seg, addr, PAGESIZE, &incore);
121 if (incore == 0)
122 return (0);
124 return (1);
128 * Map address "addr" in address space "as" into a kernel virtual address.
129 * The memory is guaranteed to be resident and locked down.
131 static caddr_t
132 mapin(struct as *as, caddr_t addr, int writing)
134 page_t *pp;
135 caddr_t kaddr;
136 pfn_t pfnum;
139 * NB: Because of past mistakes, we have bits being returned
140 * by getpfnum that are actually the page type bits of the pte.
141 * When the object we are trying to map is a memory page with
142 * a page structure everything is ok and we can use the optimal
143 * method, ppmapin. Otherwise, we have to do something special.
145 pfnum = hat_getpfnum(as->a_hat, addr);
146 if (pf_is_memory(pfnum)) {
147 pp = page_numtopp_nolock(pfnum);
148 if (pp != NULL) {
149 ASSERT(PAGE_LOCKED(pp));
150 kaddr = ppmapin(pp, writing ?
151 (PROT_READ | PROT_WRITE) : PROT_READ,
152 (caddr_t)-1);
153 return (kaddr + ((uintptr_t)addr & PAGEOFFSET));
158 * Oh well, we didn't have a page struct for the object we were
159 * trying to map in; ppmapin doesn't handle devices, but allocating a
160 * heap address allows ppmapout to free virutal space when done.
162 kaddr = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
164 hat_devload(kas.a_hat, kaddr, PAGESIZE, pfnum,
165 writing ? (PROT_READ | PROT_WRITE) : PROT_READ, HAT_LOAD_LOCK);
167 return (kaddr + ((uintptr_t)addr & PAGEOFFSET));
170 /*ARGSUSED*/
171 static void
172 mapout(struct as *as, caddr_t addr, caddr_t vaddr, int writing)
174 vaddr = (caddr_t)(uintptr_t)((uintptr_t)vaddr & PAGEMASK);
175 ppmapout(vaddr);
179 * Perform I/O to a given process. This will return EIO if we detect
180 * corrupt memory and ENXIO if there is no such mapped address in the
181 * user process's address space.
183 static int
184 urw(proc_t *p, int writing, void *buf, size_t len, uintptr_t a)
186 caddr_t addr = (caddr_t)a;
187 caddr_t page;
188 caddr_t vaddr;
189 struct seg *seg;
190 int error = 0;
191 int err = 0;
192 uint_t prot;
193 uint_t prot_rw = writing ? PROT_WRITE : PROT_READ;
194 int protchanged;
195 on_trap_data_t otd;
196 int retrycnt;
197 struct as *as = p->p_as;
198 enum seg_rw rw;
201 * Locate segment containing address of interest.
203 page = (caddr_t)(uintptr_t)((uintptr_t)addr & PAGEMASK);
204 retrycnt = 0;
205 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
206 retry:
207 if ((seg = as_segat(as, page)) == NULL ||
208 !page_valid(seg, page)) {
209 AS_LOCK_EXIT(as, &as->a_lock);
210 return (ENXIO);
212 SEGOP_GETPROT(seg, page, 0, &prot);
214 protchanged = 0;
215 if ((prot & prot_rw) == 0) {
216 protchanged = 1;
217 err = SEGOP_SETPROT(seg, page, PAGESIZE, prot | prot_rw);
219 if (err == IE_RETRY) {
220 protchanged = 0;
221 ASSERT(retrycnt == 0);
222 retrycnt++;
223 goto retry;
226 if (err != 0) {
227 AS_LOCK_EXIT(as, &as->a_lock);
228 return (ENXIO);
233 * segvn may do a copy-on-write for F_SOFTLOCK/S_READ case to break
234 * sharing to avoid a copy on write of a softlocked page by another
235 * thread. But since we locked the address space as a writer no other
236 * thread can cause a copy on write. S_READ_NOCOW is passed as the
237 * access type to tell segvn that it's ok not to do a copy-on-write
238 * for this SOFTLOCK fault.
240 if (writing)
241 rw = S_WRITE;
242 else if (seg->s_ops == &segvn_ops)
243 rw = S_READ_NOCOW;
244 else
245 rw = S_READ;
247 if (SEGOP_FAULT(as->a_hat, seg, page, PAGESIZE, F_SOFTLOCK, rw)) {
248 if (protchanged)
249 (void) SEGOP_SETPROT(seg, page, PAGESIZE, prot);
250 AS_LOCK_EXIT(as, &as->a_lock);
251 return (ENXIO);
253 CPU_STATS_ADD_K(vm, softlock, 1);
256 * Make sure we're not trying to read or write off the end of the page.
258 ASSERT(len <= page + PAGESIZE - addr);
261 * Map in the locked page, copy to our local buffer,
262 * then map the page out and unlock it.
264 vaddr = mapin(as, addr, writing);
267 * Since we are copying memory on behalf of the user process,
268 * protect against memory error correction faults.
270 if (!on_trap(&otd, OT_DATA_EC)) {
271 if (seg->s_ops == &segdev_ops) {
273 * Device memory can behave strangely; invoke
274 * a segdev-specific copy operation instead.
276 if (writing) {
277 if (segdev_copyto(seg, addr, buf, vaddr, len))
278 error = ENXIO;
279 } else {
280 if (segdev_copyfrom(seg, addr, vaddr, buf, len))
281 error = ENXIO;
283 } else {
284 if (writing)
285 bcopy(buf, vaddr, len);
286 else
287 bcopy(vaddr, buf, len);
289 } else {
290 error = EIO;
292 no_trap();
295 * If we're writing to an executable page, we may need to sychronize
296 * the I$ with the modifications we made through the D$.
298 if (writing && (prot & PROT_EXEC))
299 sync_icache(vaddr, (uint_t)len);
301 mapout(as, addr, vaddr, writing);
303 if (rw == S_READ_NOCOW)
304 rw = S_READ;
306 (void) SEGOP_FAULT(as->a_hat, seg, page, PAGESIZE, F_SOFTUNLOCK, rw);
308 if (protchanged)
309 (void) SEGOP_SETPROT(seg, page, PAGESIZE, prot);
311 AS_LOCK_EXIT(as, &as->a_lock);
313 return (error);
317 uread(proc_t *p, void *buf, size_t len, uintptr_t a)
319 return (urw(p, 0, buf, len, a));
323 uwrite(proc_t *p, void *buf, size_t len, uintptr_t a)
325 return (urw(p, 1, buf, len, a));