Merge commit 'ea01a15a654b9e1c7b37d958f4d1911882ed7781'
[unleashed.git] / kernel / vm / seg_dev.c
blob13caa6dc2ed77c0c1bc55e9c24e57b521aac2103
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 * Copyright 2018 Joyent, Inc.
28 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
29 /* All Rights Reserved */
32 * University Copyright- Copyright (c) 1982, 1986, 1988
33 * The Regents of the University of California
34 * All Rights Reserved
36 * University Acknowledgment- Portions of this document are derived from
37 * software developed by the University of California, Berkeley, and its
38 * contributors.
42 * VM - segment of a mapped device.
44 * This segment driver is used when mapping character special devices.
47 #include <sys/types.h>
48 #include <sys/t_lock.h>
49 #include <sys/sysmacros.h>
50 #include <sys/vtrace.h>
51 #include <sys/systm.h>
52 #include <sys/vmsystm.h>
53 #include <sys/mman.h>
54 #include <sys/errno.h>
55 #include <sys/kmem.h>
56 #include <sys/cmn_err.h>
57 #include <sys/vnode.h>
58 #include <sys/proc.h>
59 #include <sys/conf.h>
60 #include <sys/debug.h>
61 #include <sys/ddidevmap.h>
62 #include <sys/ddi_implfuncs.h>
63 #include <sys/lgrp.h>
65 #include <vm/page.h>
66 #include <vm/hat.h>
67 #include <vm/as.h>
68 #include <vm/seg.h>
69 #include <vm/seg_dev.h>
70 #include <vm/seg_kp.h>
71 #include <vm/seg_kmem.h>
72 #include <vm/vpage.h>
74 #include <sys/sunddi.h>
75 #include <sys/esunddi.h>
76 #include <sys/fs/snode.h>
79 #if DEBUG
80 int segdev_debug;
81 #define DEBUGF(level, args) { if (segdev_debug >= (level)) cmn_err args; }
82 #else
83 #define DEBUGF(level, args)
84 #endif
86 /* Default timeout for devmap context management */
87 #define CTX_TIMEOUT_VALUE 0
89 #define HOLD_DHP_LOCK(dhp) if (dhp->dh_flags & DEVMAP_ALLOW_REMAP) \
90 { mutex_enter(&dhp->dh_lock); }
92 #define RELE_DHP_LOCK(dhp) if (dhp->dh_flags & DEVMAP_ALLOW_REMAP) \
93 { mutex_exit(&dhp->dh_lock); }
95 #define round_down_p2(a, s) ((a) & ~((s) - 1))
96 #define round_up_p2(a, s) (((a) + (s) - 1) & ~((s) - 1))
99 * VA_PA_ALIGNED checks to see if both VA and PA are on pgsize boundary
100 * VA_PA_PGSIZE_ALIGNED check to see if VA is aligned with PA w.r.t. pgsize
102 #define VA_PA_ALIGNED(uvaddr, paddr, pgsize) \
103 (((uvaddr | paddr) & (pgsize - 1)) == 0)
104 #define VA_PA_PGSIZE_ALIGNED(uvaddr, paddr, pgsize) \
105 (((uvaddr ^ paddr) & (pgsize - 1)) == 0)
107 #define vpgtob(n) ((n) * sizeof (struct vpage)) /* For brevity */
109 #define VTOCVP(vp) (VTOS(vp)->s_commonvp) /* we "know" it's an snode */
111 static struct devmap_ctx *devmapctx_list = NULL;
112 static struct devmap_softlock *devmap_slist = NULL;
115 * mutex, vnode and page for the page of zeros we use for the trash mappings.
116 * One trash page is allocated on the first ddi_umem_setup call that uses it
117 * XXX Eventually, we may want to combine this with what segnf does when all
118 * hat layers implement HAT_NOFAULT.
120 * The trash page is used when the backing store for a userland mapping is
121 * removed but the application semantics do not take kindly to a SIGBUS.
122 * In that scenario, the applications pages are mapped to some dummy page
123 * which returns garbage on read and writes go into a common place.
124 * (Perfect for NO_FAULT semantics)
125 * The device driver is responsible to communicating to the app with some
126 * other mechanism that such remapping has happened and the app should take
127 * corrective action.
128 * We can also use an anonymous memory page as there is no requirement to
129 * keep the page locked, however this complicates the fault code. RFE.
131 static struct vnode trashvp;
132 static struct page *trashpp;
134 /* Non-pageable kernel memory is allocated from the umem_np_arena. */
135 static vmem_t *umem_np_arena;
137 /* Set the cookie to a value we know will never be a valid umem_cookie */
138 #define DEVMAP_DEVMEM_COOKIE ((ddi_umem_cookie_t)0x1)
141 * Macros to check if type of devmap handle
143 #define cookie_is_devmem(c) \
144 ((c) == (struct ddi_umem_cookie *)DEVMAP_DEVMEM_COOKIE)
146 #define cookie_is_pmem(c) \
147 ((c) == (struct ddi_umem_cookie *)DEVMAP_PMEM_COOKIE)
149 #define cookie_is_kpmem(c) (!cookie_is_devmem(c) && !cookie_is_pmem(c) &&\
150 ((c)->type == KMEM_PAGEABLE))
152 #define dhp_is_devmem(dhp) \
153 (cookie_is_devmem((struct ddi_umem_cookie *)((dhp)->dh_cookie)))
155 #define dhp_is_pmem(dhp) \
156 (cookie_is_pmem((struct ddi_umem_cookie *)((dhp)->dh_cookie)))
158 #define dhp_is_kpmem(dhp) \
159 (cookie_is_kpmem((struct ddi_umem_cookie *)((dhp)->dh_cookie)))
162 * Private seg op routines.
164 static int segdev_dup(struct seg *, struct seg *);
165 static int segdev_unmap(struct seg *, caddr_t, size_t);
166 static void segdev_free(struct seg *);
167 static faultcode_t segdev_fault(struct hat *, struct seg *, caddr_t, size_t,
168 enum fault_type, enum seg_rw);
169 static faultcode_t segdev_faulta(struct seg *, caddr_t);
170 static int segdev_setprot(struct seg *, caddr_t, size_t, uint_t);
171 static int segdev_checkprot(struct seg *, caddr_t, size_t, uint_t);
172 static void segdev_badop(void);
173 static int segdev_sync(struct seg *, caddr_t, size_t, int, uint_t);
174 static size_t segdev_incore(struct seg *, caddr_t, size_t, char *);
175 static int segdev_lockop(struct seg *, caddr_t, size_t, int, int,
176 ulong_t *, size_t);
177 static int segdev_getprot(struct seg *, caddr_t, size_t, uint_t *);
178 static uoff_t segdev_getoffset(struct seg *, caddr_t);
179 static int segdev_gettype(struct seg *, caddr_t);
180 static int segdev_getvp(struct seg *, caddr_t, struct vnode **);
181 static int segdev_advise(struct seg *, caddr_t, size_t, uint_t);
182 static int segdev_pagelock(struct seg *, caddr_t, size_t,
183 struct page ***, enum lock_type, enum seg_rw);
184 static int segdev_getmemid(struct seg *, caddr_t, memid_t *);
187 * XXX this struct is used by rootnex_map_fault to identify
188 * the segment it has been passed. So if you make it
189 * "static" you'll need to fix rootnex_map_fault.
191 const struct seg_ops segdev_ops = {
192 .dup = segdev_dup,
193 .unmap = segdev_unmap,
194 .free = segdev_free,
195 .fault = segdev_fault,
196 .faulta = segdev_faulta,
197 .setprot = segdev_setprot,
198 .checkprot = segdev_checkprot,
199 .kluster = (int (*)())segdev_badop,
200 .sync = segdev_sync,
201 .incore = segdev_incore,
202 .lockop = segdev_lockop,
203 .getprot = segdev_getprot,
204 .getoffset = segdev_getoffset,
205 .gettype = segdev_gettype,
206 .getvp = segdev_getvp,
207 .advise = segdev_advise,
208 .pagelock = segdev_pagelock,
209 .getmemid = segdev_getmemid,
213 * Private segdev support routines
215 static struct segdev_data *sdp_alloc(void);
217 static void segdev_softunlock(struct hat *, struct seg *, caddr_t,
218 size_t, enum seg_rw);
220 static faultcode_t segdev_faultpage(struct hat *, struct seg *, caddr_t,
221 struct vpage *, enum fault_type, enum seg_rw, devmap_handle_t *);
223 static faultcode_t segdev_faultpages(struct hat *, struct seg *, caddr_t,
224 size_t, enum fault_type, enum seg_rw, devmap_handle_t *);
226 static struct devmap_ctx *devmap_ctxinit(dev_t, ulong_t);
227 static struct devmap_softlock *devmap_softlock_init(dev_t, ulong_t);
228 static void devmap_softlock_rele(devmap_handle_t *);
229 static void devmap_ctx_rele(devmap_handle_t *);
231 static void devmap_ctxto(void *);
233 static devmap_handle_t *devmap_find_handle(devmap_handle_t *dhp_head,
234 caddr_t addr);
236 static ulong_t devmap_roundup(devmap_handle_t *dhp, ulong_t offset, size_t len,
237 ulong_t *opfn, ulong_t *pagesize);
239 static void free_devmap_handle(devmap_handle_t *dhp);
241 static int devmap_handle_dup(devmap_handle_t *dhp, devmap_handle_t **new_dhp,
242 struct seg *newseg);
244 static devmap_handle_t *devmap_handle_unmap(devmap_handle_t *dhp);
246 static void devmap_handle_unmap_head(devmap_handle_t *dhp, size_t len);
248 static void devmap_handle_unmap_tail(devmap_handle_t *dhp, caddr_t addr);
250 static int devmap_device(devmap_handle_t *dhp, struct as *as, caddr_t *addr,
251 offset_t off, size_t len, uint_t flags);
253 static void devmap_get_large_pgsize(devmap_handle_t *dhp, size_t len,
254 caddr_t addr, size_t *llen, caddr_t *laddr);
256 static void devmap_handle_reduce_len(devmap_handle_t *dhp, size_t len);
258 static void *devmap_alloc_pages(vmem_t *vmp, size_t size, int vmflag);
259 static void devmap_free_pages(vmem_t *vmp, void *inaddr, size_t size);
261 static void *devmap_umem_alloc_np(size_t size, size_t flags);
262 static void devmap_umem_free_np(void *addr, size_t size);
265 * routines to lock and unlock underlying segkp segment for
266 * KMEM_PAGEABLE type cookies.
268 static faultcode_t acquire_kpmem_lock(struct ddi_umem_cookie *, size_t);
269 static void release_kpmem_lock(struct ddi_umem_cookie *, size_t);
272 * Routines to synchronize F_SOFTLOCK and F_INVAL faults for
273 * drivers with devmap_access callbacks
275 static int devmap_softlock_enter(struct devmap_softlock *, size_t,
276 enum fault_type);
277 static void devmap_softlock_exit(struct devmap_softlock *, size_t,
278 enum fault_type);
280 static kmutex_t devmapctx_lock;
282 static kmutex_t devmap_slock;
285 * Initialize the thread callbacks and thread private data.
287 static struct devmap_ctx *
288 devmap_ctxinit(dev_t dev, ulong_t id)
290 struct devmap_ctx *devctx;
291 struct devmap_ctx *tmp;
292 dev_info_t *dip;
294 tmp = kmem_zalloc(sizeof (struct devmap_ctx), KM_SLEEP);
296 mutex_enter(&devmapctx_lock);
298 dip = e_ddi_hold_devi_by_dev(dev, 0);
299 ASSERT(dip != NULL);
300 ddi_release_devi(dip);
302 for (devctx = devmapctx_list; devctx != NULL; devctx = devctx->next)
303 if ((devctx->dip == dip) && (devctx->id == id))
304 break;
306 if (devctx == NULL) {
307 devctx = tmp;
308 devctx->dip = dip;
309 devctx->id = id;
310 mutex_init(&devctx->lock, NULL, MUTEX_DEFAULT, NULL);
311 cv_init(&devctx->cv, NULL, CV_DEFAULT, NULL);
312 devctx->next = devmapctx_list;
313 devmapctx_list = devctx;
314 } else
315 kmem_free(tmp, sizeof (struct devmap_ctx));
317 mutex_enter(&devctx->lock);
318 devctx->refcnt++;
319 mutex_exit(&devctx->lock);
320 mutex_exit(&devmapctx_lock);
322 return (devctx);
326 * Timeout callback called if a CPU has not given up the device context
327 * within dhp->dh_timeout_length ticks
329 static void
330 devmap_ctxto(void *data)
332 struct devmap_ctx *devctx = data;
334 mutex_enter(&devctx->lock);
336 * Set oncpu = 0 so the next mapping trying to get the device context
337 * can.
339 devctx->oncpu = 0;
340 devctx->timeout = 0;
341 cv_signal(&devctx->cv);
342 mutex_exit(&devctx->lock);
346 * Create a device segment.
349 segdev_create(struct seg **segpp, void *argsp)
351 struct seg *seg = *segpp;
352 struct segdev_data *sdp;
353 struct segdev_crargs *a = (struct segdev_crargs *)argsp;
354 devmap_handle_t *dhp = (devmap_handle_t *)a->devmap_data;
355 int error;
358 * Since the address space is "write" locked, we
359 * don't need the segment lock to protect "segdev" data.
361 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
363 hat_map(seg->s_as->a_hat, seg->s_base, seg->s_size, HAT_MAP);
365 sdp = sdp_alloc();
367 sdp->mapfunc = a->mapfunc;
368 sdp->offset = a->offset;
369 sdp->prot = a->prot;
370 sdp->maxprot = a->maxprot;
371 sdp->type = a->type;
372 sdp->pageprot = 0;
373 sdp->softlockcnt = 0;
374 sdp->vpage = NULL;
376 if (sdp->mapfunc == NULL)
377 sdp->devmap_data = dhp;
378 else
379 sdp->devmap_data = dhp = NULL;
381 sdp->hat_flags = a->hat_flags;
382 sdp->hat_attr = a->hat_attr;
385 * Currently, hat_flags supports only HAT_LOAD_NOCONSIST
387 ASSERT(!(sdp->hat_flags & ~HAT_LOAD_NOCONSIST));
390 * Hold shadow vnode -- segdev only deals with
391 * character (VCHR) devices. We use the common
392 * vp to hang pages on.
394 sdp->vp = specfind(a->dev, VCHR);
395 ASSERT(sdp->vp != NULL);
397 seg->s_ops = &segdev_ops;
398 seg->s_data = sdp;
400 while (dhp != NULL) {
401 dhp->dh_seg = seg;
402 dhp = dhp->dh_next;
406 * Inform the vnode of the new mapping.
409 * It is ok to use pass sdp->maxprot to ADDMAP rather than to use
410 * dhp specific maxprot because spec_addmap does not use maxprot.
412 error = fop_addmap(VTOCVP(sdp->vp), sdp->offset,
413 seg->s_as, seg->s_base, seg->s_size,
414 sdp->prot, sdp->maxprot, sdp->type, CRED(), NULL);
416 if (error != 0) {
417 sdp->devmap_data = NULL;
418 hat_unload(seg->s_as->a_hat, seg->s_base, seg->s_size,
419 HAT_UNLOAD_UNMAP);
420 } else {
422 * Mappings of /dev/null don't count towards the VSZ of a
423 * process. Mappings of /dev/null have no mapping type.
425 if ((segop_gettype(seg, seg->s_base) & (MAP_SHARED |
426 MAP_PRIVATE)) == 0) {
427 seg->s_as->a_resvsize -= seg->s_size;
431 return (error);
434 static struct segdev_data *
435 sdp_alloc(void)
437 struct segdev_data *sdp;
439 sdp = kmem_zalloc(sizeof (struct segdev_data), KM_SLEEP);
440 rw_init(&sdp->lock, NULL, RW_DEFAULT, NULL);
442 return (sdp);
446 * Duplicate seg and return new segment in newseg.
448 static int
449 segdev_dup(struct seg *seg, struct seg *newseg)
451 struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
452 struct segdev_data *newsdp;
453 devmap_handle_t *dhp = (devmap_handle_t *)sdp->devmap_data;
454 size_t npages;
455 int ret;
457 DEBUGF(3, (CE_CONT, "segdev_dup: dhp %p seg %p\n",
458 (void *)dhp, (void *)seg));
461 * Since the address space is "write" locked, we
462 * don't need the segment lock to protect "segdev" data.
464 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
466 newsdp = sdp_alloc();
468 newseg->s_ops = seg->s_ops;
469 newseg->s_data = (void *)newsdp;
471 VN_HOLD(sdp->vp);
472 newsdp->vp = sdp->vp;
473 newsdp->mapfunc = sdp->mapfunc;
474 newsdp->offset = sdp->offset;
475 newsdp->pageprot = sdp->pageprot;
476 newsdp->prot = sdp->prot;
477 newsdp->maxprot = sdp->maxprot;
478 newsdp->type = sdp->type;
479 newsdp->hat_attr = sdp->hat_attr;
480 newsdp->hat_flags = sdp->hat_flags;
481 newsdp->softlockcnt = 0;
484 * Initialize per page data if the segment we are
485 * dup'ing has per page information.
487 npages = seg_pages(newseg);
489 if (sdp->vpage != NULL) {
490 size_t nbytes = vpgtob(npages);
492 newsdp->vpage = kmem_zalloc(nbytes, KM_SLEEP);
493 bcopy(sdp->vpage, newsdp->vpage, nbytes);
494 } else
495 newsdp->vpage = NULL;
498 * duplicate devmap handles
500 if (dhp != NULL) {
501 ret = devmap_handle_dup(dhp,
502 (devmap_handle_t **)&newsdp->devmap_data, newseg);
503 if (ret != 0) {
504 DEBUGF(1, (CE_CONT,
505 "segdev_dup: ret %x dhp %p seg %p\n",
506 ret, (void *)dhp, (void *)seg));
507 return (ret);
512 * Inform the common vnode of the new mapping.
514 return (fop_addmap(VTOCVP(newsdp->vp),
515 newsdp->offset, newseg->s_as,
516 newseg->s_base, newseg->s_size, newsdp->prot,
517 newsdp->maxprot, sdp->type, CRED(), NULL));
521 * duplicate devmap handles
523 static int
524 devmap_handle_dup(devmap_handle_t *dhp, devmap_handle_t **new_dhp,
525 struct seg *newseg)
527 devmap_handle_t *newdhp_save = NULL;
528 devmap_handle_t *newdhp = NULL;
529 struct devmap_callback_ctl *callbackops;
531 while (dhp != NULL) {
532 newdhp = kmem_alloc(sizeof (devmap_handle_t), KM_SLEEP);
534 /* Need to lock the original dhp while copying if REMAP */
535 HOLD_DHP_LOCK(dhp);
536 bcopy(dhp, newdhp, sizeof (devmap_handle_t));
537 RELE_DHP_LOCK(dhp);
538 newdhp->dh_seg = newseg;
539 newdhp->dh_next = NULL;
540 if (newdhp_save != NULL)
541 newdhp_save->dh_next = newdhp;
542 else
543 *new_dhp = newdhp;
544 newdhp_save = newdhp;
546 callbackops = &newdhp->dh_callbackops;
548 if (dhp->dh_softlock != NULL)
549 newdhp->dh_softlock = devmap_softlock_init(
550 newdhp->dh_dev,
551 (ulong_t)callbackops->devmap_access);
552 if (dhp->dh_ctx != NULL)
553 newdhp->dh_ctx = devmap_ctxinit(newdhp->dh_dev,
554 (ulong_t)callbackops->devmap_access);
557 * Initialize dh_lock if we want to do remap.
559 if (newdhp->dh_flags & DEVMAP_ALLOW_REMAP) {
560 mutex_init(&newdhp->dh_lock, NULL, MUTEX_DEFAULT, NULL);
561 newdhp->dh_flags |= DEVMAP_LOCK_INITED;
564 if (callbackops->devmap_dup != NULL) {
565 int ret;
568 * Call the dup callback so that the driver can
569 * duplicate its private data.
571 ret = (*callbackops->devmap_dup)(dhp, dhp->dh_pvtp,
572 (devmap_cookie_t *)newdhp, &newdhp->dh_pvtp);
574 if (ret != 0) {
576 * We want to free up this segment as the driver
577 * has indicated that we can't dup it. But we
578 * don't want to call the drivers, devmap_unmap,
579 * callback function as the driver does not
580 * think this segment exists. The caller of
581 * devmap_dup will call seg_free on newseg
582 * as it was the caller that allocated the
583 * segment.
585 DEBUGF(1, (CE_CONT, "devmap_handle_dup ERROR: "
586 "newdhp %p dhp %p\n", (void *)newdhp,
587 (void *)dhp));
588 callbackops->devmap_unmap = NULL;
589 return (ret);
593 dhp = dhp->dh_next;
596 return (0);
600 * Split a segment at addr for length len.
602 /*ARGSUSED*/
603 static int
604 segdev_unmap(struct seg *seg, caddr_t addr, size_t len)
606 register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
607 register struct segdev_data *nsdp;
608 register struct seg *nseg;
609 register size_t opages; /* old segment size in pages */
610 register size_t npages; /* new segment size in pages */
611 register size_t dpages; /* pages being deleted (unmapped) */
612 register size_t nbytes;
613 devmap_handle_t *dhp = (devmap_handle_t *)sdp->devmap_data;
614 devmap_handle_t *dhpp;
615 devmap_handle_t *newdhp;
616 struct devmap_callback_ctl *callbackops;
617 caddr_t nbase;
618 offset_t off;
619 ulong_t nsize;
620 size_t mlen, sz;
622 DEBUGF(3, (CE_CONT, "segdev_unmap: dhp %p seg %p addr %p len %lx\n",
623 (void *)dhp, (void *)seg, (void *)addr, len));
626 * Since the address space is "write" locked, we
627 * don't need the segment lock to protect "segdev" data.
629 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
631 if ((sz = sdp->softlockcnt) > 0) {
633 * Fail the unmap if pages are SOFTLOCKed through this mapping.
634 * softlockcnt is protected from change by the as write lock.
636 DEBUGF(1, (CE_CONT, "segdev_unmap: softlockcnt %ld\n", sz));
637 return (EAGAIN);
641 * Check for bad sizes
643 if (addr < seg->s_base || addr + len > seg->s_base + seg->s_size ||
644 (len & PAGEOFFSET) || ((uintptr_t)addr & PAGEOFFSET))
645 panic("segdev_unmap");
647 if (dhp != NULL) {
648 devmap_handle_t *tdhp;
650 * If large page size was used in hat_devload(),
651 * the same page size must be used in hat_unload().
653 dhpp = tdhp = devmap_find_handle(dhp, addr);
654 while (tdhp != NULL) {
655 if (tdhp->dh_flags & DEVMAP_FLAG_LARGE) {
656 break;
658 tdhp = tdhp->dh_next;
660 if (tdhp != NULL) { /* found a dhp using large pages */
661 size_t slen = len;
662 size_t mlen;
663 size_t soff;
665 soff = (ulong_t)(addr - dhpp->dh_uvaddr);
666 while (slen != 0) {
667 mlen = MIN(slen, (dhpp->dh_len - soff));
668 hat_unload(seg->s_as->a_hat, dhpp->dh_uvaddr,
669 dhpp->dh_len, HAT_UNLOAD_UNMAP);
670 dhpp = dhpp->dh_next;
671 ASSERT(slen >= mlen);
672 slen -= mlen;
673 soff = 0;
675 } else
676 hat_unload(seg->s_as->a_hat, addr, len,
677 HAT_UNLOAD_UNMAP);
678 } else {
680 * Unload any hardware translations in the range
681 * to be taken out.
683 hat_unload(seg->s_as->a_hat, addr, len, HAT_UNLOAD_UNMAP);
687 * get the user offset which will used in the driver callbacks
689 off = sdp->offset + (offset_t)(addr - seg->s_base);
692 * Inform the vnode of the unmapping.
694 ASSERT(sdp->vp != NULL);
695 (void) fop_delmap(VTOCVP(sdp->vp), off, seg->s_as, addr, len,
696 sdp->prot, sdp->maxprot, sdp->type, CRED(), NULL);
699 * Check for entire segment
701 if (addr == seg->s_base && len == seg->s_size) {
702 seg_free(seg);
703 return (0);
706 opages = seg_pages(seg);
707 dpages = btop(len);
708 npages = opages - dpages;
711 * Check for beginning of segment
713 if (addr == seg->s_base) {
714 if (sdp->vpage != NULL) {
715 register struct vpage *ovpage;
717 ovpage = sdp->vpage; /* keep pointer to vpage */
719 nbytes = vpgtob(npages);
720 sdp->vpage = kmem_alloc(nbytes, KM_SLEEP);
721 bcopy(&ovpage[dpages], sdp->vpage, nbytes);
723 /* free up old vpage */
724 kmem_free(ovpage, vpgtob(opages));
728 * free devmap handles from the beginning of the mapping.
730 if (dhp != NULL)
731 devmap_handle_unmap_head(dhp, len);
733 sdp->offset += (offset_t)len;
735 seg->s_base += len;
736 seg->s_size -= len;
738 return (0);
742 * Check for end of segment
744 if (addr + len == seg->s_base + seg->s_size) {
745 if (sdp->vpage != NULL) {
746 register struct vpage *ovpage;
748 ovpage = sdp->vpage; /* keep pointer to vpage */
750 nbytes = vpgtob(npages);
751 sdp->vpage = kmem_alloc(nbytes, KM_SLEEP);
752 bcopy(ovpage, sdp->vpage, nbytes);
754 /* free up old vpage */
755 kmem_free(ovpage, vpgtob(opages));
757 seg->s_size -= len;
760 * free devmap handles from addr to the end of the mapping.
762 if (dhp != NULL)
763 devmap_handle_unmap_tail(dhp, addr);
765 return (0);
769 * The section to go is in the middle of the segment,
770 * have to make it into two segments. nseg is made for
771 * the high end while seg is cut down at the low end.
773 nbase = addr + len; /* new seg base */
774 nsize = (seg->s_base + seg->s_size) - nbase; /* new seg size */
775 seg->s_size = addr - seg->s_base; /* shrink old seg */
776 nseg = seg_alloc(seg->s_as, nbase, nsize);
777 if (nseg == NULL)
778 panic("segdev_unmap seg_alloc");
780 DEBUGF(3, (CE_CONT, "segdev_unmap: segdev_dup seg %p nseg %p\n",
781 (void *)seg, (void *)nseg));
782 nsdp = sdp_alloc();
784 nseg->s_ops = seg->s_ops;
785 nseg->s_data = (void *)nsdp;
787 VN_HOLD(sdp->vp);
788 nsdp->mapfunc = sdp->mapfunc;
789 nsdp->offset = sdp->offset + (offset_t)(nseg->s_base - seg->s_base);
790 nsdp->vp = sdp->vp;
791 nsdp->pageprot = sdp->pageprot;
792 nsdp->prot = sdp->prot;
793 nsdp->maxprot = sdp->maxprot;
794 nsdp->type = sdp->type;
795 nsdp->hat_attr = sdp->hat_attr;
796 nsdp->hat_flags = sdp->hat_flags;
797 nsdp->softlockcnt = 0;
800 * Initialize per page data if the segment we are
801 * dup'ing has per page information.
803 if (sdp->vpage != NULL) {
804 /* need to split vpage into two arrays */
805 register size_t nnbytes;
806 register size_t nnpages;
807 register struct vpage *ovpage;
809 ovpage = sdp->vpage; /* keep pointer to vpage */
811 npages = seg_pages(seg); /* seg has shrunk */
812 nbytes = vpgtob(npages);
813 nnpages = seg_pages(nseg);
814 nnbytes = vpgtob(nnpages);
816 sdp->vpage = kmem_alloc(nbytes, KM_SLEEP);
817 bcopy(ovpage, sdp->vpage, nbytes);
819 nsdp->vpage = kmem_alloc(nnbytes, KM_SLEEP);
820 bcopy(&ovpage[npages + dpages], nsdp->vpage, nnbytes);
822 /* free up old vpage */
823 kmem_free(ovpage, vpgtob(opages));
824 } else
825 nsdp->vpage = NULL;
828 * unmap dhps.
830 if (dhp == NULL) {
831 nsdp->devmap_data = NULL;
832 return (0);
834 while (dhp != NULL) {
835 callbackops = &dhp->dh_callbackops;
836 DEBUGF(3, (CE_CONT, "unmap: dhp %p addr %p uvaddr %p len %lx\n",
837 (void *)dhp, (void *)addr,
838 (void *)dhp->dh_uvaddr, dhp->dh_len));
840 if (addr == (dhp->dh_uvaddr + dhp->dh_len)) {
841 dhpp = dhp->dh_next;
842 dhp->dh_next = NULL;
843 dhp = dhpp;
844 } else if (addr > (dhp->dh_uvaddr + dhp->dh_len)) {
845 dhp = dhp->dh_next;
846 } else if (addr > dhp->dh_uvaddr &&
847 (addr + len) < (dhp->dh_uvaddr + dhp->dh_len)) {
849 * <addr, addr+len> is enclosed by dhp.
850 * create a newdhp that begins at addr+len and
851 * ends at dhp->dh_uvaddr+dhp->dh_len.
853 newdhp = kmem_alloc(sizeof (devmap_handle_t), KM_SLEEP);
854 HOLD_DHP_LOCK(dhp);
855 bcopy(dhp, newdhp, sizeof (devmap_handle_t));
856 RELE_DHP_LOCK(dhp);
857 newdhp->dh_seg = nseg;
858 newdhp->dh_next = dhp->dh_next;
859 if (dhp->dh_softlock != NULL)
860 newdhp->dh_softlock = devmap_softlock_init(
861 newdhp->dh_dev,
862 (ulong_t)callbackops->devmap_access);
863 if (dhp->dh_ctx != NULL)
864 newdhp->dh_ctx = devmap_ctxinit(newdhp->dh_dev,
865 (ulong_t)callbackops->devmap_access);
866 if (newdhp->dh_flags & DEVMAP_LOCK_INITED) {
867 mutex_init(&newdhp->dh_lock,
868 NULL, MUTEX_DEFAULT, NULL);
870 if (callbackops->devmap_unmap != NULL)
871 (*callbackops->devmap_unmap)(dhp, dhp->dh_pvtp,
872 off, len, dhp, &dhp->dh_pvtp,
873 newdhp, &newdhp->dh_pvtp);
874 mlen = len + (addr - dhp->dh_uvaddr);
875 devmap_handle_reduce_len(newdhp, mlen);
876 nsdp->devmap_data = newdhp;
877 /* XX Changing len should recalculate LARGE flag */
878 dhp->dh_len = addr - dhp->dh_uvaddr;
879 dhpp = dhp->dh_next;
880 dhp->dh_next = NULL;
881 dhp = dhpp;
882 } else if ((addr > dhp->dh_uvaddr) &&
883 ((addr + len) >= (dhp->dh_uvaddr + dhp->dh_len))) {
884 mlen = dhp->dh_len + dhp->dh_uvaddr - addr;
886 * <addr, addr+len> spans over dhps.
888 if (callbackops->devmap_unmap != NULL)
889 (*callbackops->devmap_unmap)(dhp, dhp->dh_pvtp,
890 off, mlen, (devmap_cookie_t *)dhp,
891 &dhp->dh_pvtp, NULL, NULL);
892 /* XX Changing len should recalculate LARGE flag */
893 dhp->dh_len = addr - dhp->dh_uvaddr;
894 dhpp = dhp->dh_next;
895 dhp->dh_next = NULL;
896 dhp = dhpp;
897 nsdp->devmap_data = dhp;
898 } else if ((addr + len) >= (dhp->dh_uvaddr + dhp->dh_len)) {
900 * dhp is enclosed by <addr, addr+len>.
902 dhp->dh_seg = nseg;
903 nsdp->devmap_data = dhp;
904 dhp = devmap_handle_unmap(dhp);
905 nsdp->devmap_data = dhp; /* XX redundant? */
906 } else if (((addr + len) > dhp->dh_uvaddr) &&
907 ((addr + len) < (dhp->dh_uvaddr + dhp->dh_len))) {
908 mlen = addr + len - dhp->dh_uvaddr;
909 if (callbackops->devmap_unmap != NULL)
910 (*callbackops->devmap_unmap)(dhp, dhp->dh_pvtp,
911 dhp->dh_uoff, mlen, NULL,
912 NULL, dhp, &dhp->dh_pvtp);
913 devmap_handle_reduce_len(dhp, mlen);
914 nsdp->devmap_data = dhp;
915 dhp->dh_seg = nseg;
916 dhp = dhp->dh_next;
917 } else {
918 dhp->dh_seg = nseg;
919 dhp = dhp->dh_next;
922 return (0);
926 * Utility function handles reducing the length of a devmap handle during unmap
927 * Note that is only used for unmapping the front portion of the handler,
928 * i.e., we are bumping up the offset/pfn etc up by len
929 * Do not use if reducing length at the tail.
931 static void
932 devmap_handle_reduce_len(devmap_handle_t *dhp, size_t len)
934 struct ddi_umem_cookie *cp;
935 struct devmap_pmem_cookie *pcp;
937 * adjust devmap handle fields
939 ASSERT(len < dhp->dh_len);
941 /* Make sure only page-aligned changes are done */
942 ASSERT((len & PAGEOFFSET) == 0);
944 dhp->dh_len -= len;
945 dhp->dh_uoff += (offset_t)len;
946 dhp->dh_roff += (offset_t)len;
947 dhp->dh_uvaddr += len;
948 /* Need to grab dhp lock if REMAP */
949 HOLD_DHP_LOCK(dhp);
950 cp = dhp->dh_cookie;
951 if (!(dhp->dh_flags & DEVMAP_MAPPING_INVALID)) {
952 if (cookie_is_devmem(cp)) {
953 dhp->dh_pfn += btop(len);
954 } else if (cookie_is_pmem(cp)) {
955 pcp = (struct devmap_pmem_cookie *)dhp->dh_pcookie;
956 ASSERT((dhp->dh_roff & PAGEOFFSET) == 0 &&
957 dhp->dh_roff < ptob(pcp->dp_npages));
958 } else {
959 ASSERT(dhp->dh_roff < cp->size);
960 ASSERT(dhp->dh_cvaddr >= cp->cvaddr &&
961 dhp->dh_cvaddr < (cp->cvaddr + cp->size));
962 ASSERT((dhp->dh_cvaddr + len) <=
963 (cp->cvaddr + cp->size));
965 dhp->dh_cvaddr += len;
968 /* XXX - Should recalculate the DEVMAP_FLAG_LARGE after changes */
969 RELE_DHP_LOCK(dhp);
973 * Free devmap handle, dhp.
974 * Return the next devmap handle on the linked list.
976 static devmap_handle_t *
977 devmap_handle_unmap(devmap_handle_t *dhp)
979 struct devmap_callback_ctl *callbackops = &dhp->dh_callbackops;
980 struct segdev_data *sdp = (struct segdev_data *)dhp->dh_seg->s_data;
981 devmap_handle_t *dhpp = (devmap_handle_t *)sdp->devmap_data;
983 ASSERT(dhp != NULL);
986 * before we free up dhp, call the driver's devmap_unmap entry point
987 * to free resources allocated for this dhp.
989 if (callbackops->devmap_unmap != NULL) {
990 (*callbackops->devmap_unmap)(dhp, dhp->dh_pvtp, dhp->dh_uoff,
991 dhp->dh_len, NULL, NULL, NULL, NULL);
994 if (dhpp == dhp) { /* releasing first dhp, change sdp data */
995 sdp->devmap_data = dhp->dh_next;
996 } else {
997 while (dhpp->dh_next != dhp) {
998 dhpp = dhpp->dh_next;
1000 dhpp->dh_next = dhp->dh_next;
1002 dhpp = dhp->dh_next; /* return value is next dhp in chain */
1004 if (dhp->dh_softlock != NULL)
1005 devmap_softlock_rele(dhp);
1007 if (dhp->dh_ctx != NULL)
1008 devmap_ctx_rele(dhp);
1010 if (dhp->dh_flags & DEVMAP_LOCK_INITED) {
1011 mutex_destroy(&dhp->dh_lock);
1013 kmem_free(dhp, sizeof (devmap_handle_t));
1015 return (dhpp);
1019 * Free complete devmap handles from dhp for len bytes
1020 * dhp can be either the first handle or a subsequent handle
1022 static void
1023 devmap_handle_unmap_head(devmap_handle_t *dhp, size_t len)
1025 struct devmap_callback_ctl *callbackops;
1028 * free the devmap handles covered by len.
1030 while (len >= dhp->dh_len) {
1031 len -= dhp->dh_len;
1032 dhp = devmap_handle_unmap(dhp);
1034 if (len != 0) { /* partial unmap at head of first remaining dhp */
1035 callbackops = &dhp->dh_callbackops;
1038 * Call the unmap callback so the drivers can make
1039 * adjustment on its private data.
1041 if (callbackops->devmap_unmap != NULL)
1042 (*callbackops->devmap_unmap)(dhp, dhp->dh_pvtp,
1043 dhp->dh_uoff, len, NULL, NULL, dhp, &dhp->dh_pvtp);
1044 devmap_handle_reduce_len(dhp, len);
1049 * Free devmap handles to truncate the mapping after addr
1050 * RFE: Simpler to pass in dhp pointing at correct dhp (avoid find again)
1051 * Also could then use the routine in middle unmap case too
1053 static void
1054 devmap_handle_unmap_tail(devmap_handle_t *dhp, caddr_t addr)
1056 register struct seg *seg = dhp->dh_seg;
1057 register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
1058 register devmap_handle_t *dhph = (devmap_handle_t *)sdp->devmap_data;
1059 struct devmap_callback_ctl *callbackops;
1060 register devmap_handle_t *dhpp;
1061 size_t maplen;
1062 ulong_t off;
1063 size_t len;
1065 maplen = (size_t)(addr - dhp->dh_uvaddr);
1066 dhph = devmap_find_handle(dhph, addr);
1068 while (dhph != NULL) {
1069 if (maplen == 0) {
1070 dhph = devmap_handle_unmap(dhph);
1071 } else {
1072 callbackops = &dhph->dh_callbackops;
1073 len = dhph->dh_len - maplen;
1074 off = (ulong_t)sdp->offset + (addr - seg->s_base);
1076 * Call the unmap callback so the driver
1077 * can make adjustments on its private data.
1079 if (callbackops->devmap_unmap != NULL)
1080 (*callbackops->devmap_unmap)(dhph,
1081 dhph->dh_pvtp, off, len,
1082 (devmap_cookie_t *)dhph,
1083 &dhph->dh_pvtp, NULL, NULL);
1084 /* XXX Reducing len needs to recalculate LARGE flag */
1085 dhph->dh_len = maplen;
1086 maplen = 0;
1087 dhpp = dhph->dh_next;
1088 dhph->dh_next = NULL;
1089 dhph = dhpp;
1091 } /* end while */
1095 * Free a segment.
1097 static void
1098 segdev_free(struct seg *seg)
1100 register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
1101 devmap_handle_t *dhp = (devmap_handle_t *)sdp->devmap_data;
1103 DEBUGF(3, (CE_CONT, "segdev_free: dhp %p seg %p\n",
1104 (void *)dhp, (void *)seg));
1107 * Since the address space is "write" locked, we
1108 * don't need the segment lock to protect "segdev" data.
1110 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
1112 while (dhp != NULL)
1113 dhp = devmap_handle_unmap(dhp);
1115 VN_RELE(sdp->vp);
1116 if (sdp->vpage != NULL)
1117 kmem_free(sdp->vpage, vpgtob(seg_pages(seg)));
1119 rw_destroy(&sdp->lock);
1120 kmem_free(sdp, sizeof (*sdp));
1123 static void
1124 free_devmap_handle(devmap_handle_t *dhp)
1126 register devmap_handle_t *dhpp;
1129 * free up devmap handle
1131 while (dhp != NULL) {
1132 dhpp = dhp->dh_next;
1133 if (dhp->dh_flags & DEVMAP_LOCK_INITED) {
1134 mutex_destroy(&dhp->dh_lock);
1137 if (dhp->dh_softlock != NULL)
1138 devmap_softlock_rele(dhp);
1140 if (dhp->dh_ctx != NULL)
1141 devmap_ctx_rele(dhp);
1143 kmem_free(dhp, sizeof (devmap_handle_t));
1144 dhp = dhpp;
1149 * routines to lock and unlock underlying segkp segment for
1150 * KMEM_PAGEABLE type cookies.
1151 * segkp only allows a single pending F_SOFTLOCK
1152 * we keep track of number of locks in the cookie so we can
1153 * have multiple pending faults and manage the calls to segkp.
1154 * RFE: if segkp supports either pagelock or can support multiple
1155 * calls to F_SOFTLOCK, then these routines can go away.
1156 * If pagelock, segdev_faultpage can fault on a page by page basis
1157 * and simplifies the code quite a bit.
1158 * if multiple calls allowed but not partial ranges, then need for
1159 * cookie->lock and locked count goes away, code can call as_fault directly
1161 static faultcode_t
1162 acquire_kpmem_lock(struct ddi_umem_cookie *cookie, size_t npages)
1164 int err = 0;
1165 ASSERT(cookie_is_kpmem(cookie));
1167 * Fault in pages in segkp with F_SOFTLOCK.
1168 * We want to hold the lock until all pages have been loaded.
1169 * segkp only allows single caller to hold SOFTLOCK, so cookie
1170 * holds a count so we dont call into segkp multiple times
1172 mutex_enter(&cookie->lock);
1175 * Check for overflow in locked field
1177 if ((UINT32_MAX - cookie->locked) < npages) {
1178 err = FC_MAKE_ERR(ENOMEM);
1179 } else if (cookie->locked == 0) {
1180 /* First time locking */
1181 err = as_fault(kas.a_hat, &kas, cookie->cvaddr,
1182 cookie->size, F_SOFTLOCK, PROT_READ|PROT_WRITE);
1184 if (!err) {
1185 cookie->locked += npages;
1187 mutex_exit(&cookie->lock);
1188 return (err);
1191 static void
1192 release_kpmem_lock(struct ddi_umem_cookie *cookie, size_t npages)
1194 mutex_enter(&cookie->lock);
1195 ASSERT(cookie_is_kpmem(cookie));
1196 ASSERT(cookie->locked >= npages);
1197 cookie->locked -= (uint_t)npages;
1198 if (cookie->locked == 0) {
1199 /* Last unlock */
1200 if (as_fault(kas.a_hat, &kas, cookie->cvaddr,
1201 cookie->size, F_SOFTUNLOCK, PROT_READ|PROT_WRITE))
1202 panic("segdev releasing kpmem lock %p", (void *)cookie);
1204 mutex_exit(&cookie->lock);
1208 * Routines to synchronize F_SOFTLOCK and F_INVAL faults for
1209 * drivers with devmap_access callbacks
1210 * slock->softlocked basically works like a rw lock
1211 * -ve counts => F_SOFTLOCK in progress
1212 * +ve counts => F_INVAL/F_PROT in progress
1213 * We allow only one F_SOFTLOCK at a time
1214 * but can have multiple pending F_INVAL/F_PROT calls
1216 * This routine waits using cv_wait_sig so killing processes is more graceful
1217 * Returns EINTR if coming out of this routine due to a signal, 0 otherwise
1219 static int devmap_softlock_enter(
1220 struct devmap_softlock *slock,
1221 size_t npages,
1222 enum fault_type type)
1224 if (npages == 0)
1225 return (0);
1226 mutex_enter(&(slock->lock));
1227 switch (type) {
1228 case F_SOFTLOCK :
1229 while (slock->softlocked) {
1230 if (cv_wait_sig(&(slock)->cv, &(slock)->lock) == 0) {
1231 /* signalled */
1232 mutex_exit(&(slock->lock));
1233 return (EINTR);
1236 slock->softlocked -= npages; /* -ve count => locked */
1237 break;
1238 case F_INVAL :
1239 case F_PROT :
1240 while (slock->softlocked < 0)
1241 if (cv_wait_sig(&(slock)->cv, &(slock)->lock) == 0) {
1242 /* signalled */
1243 mutex_exit(&(slock->lock));
1244 return (EINTR);
1246 slock->softlocked += npages; /* +ve count => f_invals */
1247 break;
1248 default:
1249 ASSERT(0);
1251 mutex_exit(&(slock->lock));
1252 return (0);
1255 static void devmap_softlock_exit(
1256 struct devmap_softlock *slock,
1257 size_t npages,
1258 enum fault_type type)
1260 if (slock == NULL)
1261 return;
1262 mutex_enter(&(slock->lock));
1263 switch (type) {
1264 case F_SOFTLOCK :
1265 ASSERT(-slock->softlocked >= npages);
1266 slock->softlocked += npages; /* -ve count is softlocked */
1267 if (slock->softlocked == 0)
1268 cv_signal(&slock->cv);
1269 break;
1270 case F_INVAL :
1271 case F_PROT:
1272 ASSERT(slock->softlocked >= npages);
1273 slock->softlocked -= npages;
1274 if (slock->softlocked == 0)
1275 cv_signal(&slock->cv);
1276 break;
1277 default:
1278 ASSERT(0);
1280 mutex_exit(&(slock->lock));
1284 * Do a F_SOFTUNLOCK call over the range requested.
1285 * The range must have already been F_SOFTLOCK'ed.
1286 * The segment lock should be held, (but not the segment private lock?)
1287 * The softunlock code below does not adjust for large page sizes
1288 * assumes the caller already did any addr/len adjustments for
1289 * pagesize mappings before calling.
1291 /*ARGSUSED*/
1292 static void
1293 segdev_softunlock(
1294 struct hat *hat, /* the hat */
1295 struct seg *seg, /* seg_dev of interest */
1296 caddr_t addr, /* base address of range */
1297 size_t len, /* number of bytes */
1298 enum seg_rw rw) /* type of access at fault */
1300 struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
1301 devmap_handle_t *dhp_head = (devmap_handle_t *)sdp->devmap_data;
1303 DEBUGF(3, (CE_CONT, "segdev_softunlock: dhp %p lockcnt %lx "
1304 "addr %p len %lx\n",
1305 (void *)dhp_head, sdp->softlockcnt, (void *)addr, len));
1307 hat_unlock(hat, addr, len);
1309 if (dhp_head != NULL) {
1310 devmap_handle_t *dhp;
1311 size_t mlen;
1312 size_t tlen = len;
1313 ulong_t off;
1315 dhp = devmap_find_handle(dhp_head, addr);
1316 ASSERT(dhp != NULL);
1318 off = (ulong_t)(addr - dhp->dh_uvaddr);
1319 while (tlen != 0) {
1320 mlen = MIN(tlen, (dhp->dh_len - off));
1323 * unlock segkp memory, locked during F_SOFTLOCK
1325 if (dhp_is_kpmem(dhp)) {
1326 release_kpmem_lock(
1327 (struct ddi_umem_cookie *)dhp->dh_cookie,
1328 btopr(mlen));
1332 * Do the softlock accounting for devmap_access
1334 if (dhp->dh_callbackops.devmap_access != NULL) {
1335 devmap_softlock_exit(dhp->dh_softlock,
1336 btopr(mlen), F_SOFTLOCK);
1339 tlen -= mlen;
1340 dhp = dhp->dh_next;
1341 off = 0;
1345 mutex_enter(&freemem_lock);
1346 ASSERT(sdp->softlockcnt >= btopr(len));
1347 sdp->softlockcnt -= btopr(len);
1348 mutex_exit(&freemem_lock);
1349 if (sdp->softlockcnt == 0) {
1351 * All SOFTLOCKS are gone. Wakeup any waiting
1352 * unmappers so they can try again to unmap.
1353 * Check for waiters first without the mutex
1354 * held so we don't always grab the mutex on
1355 * softunlocks.
1357 if (AS_ISUNMAPWAIT(seg->s_as)) {
1358 mutex_enter(&seg->s_as->a_contents);
1359 if (AS_ISUNMAPWAIT(seg->s_as)) {
1360 AS_CLRUNMAPWAIT(seg->s_as);
1361 cv_broadcast(&seg->s_as->a_cv);
1363 mutex_exit(&seg->s_as->a_contents);
1370 * Handle fault for a single page.
1371 * Done in a separate routine so we can handle errors more easily.
1372 * This routine is called only from segdev_faultpages()
1373 * when looping over the range of addresses requested. The segment lock is held.
1375 static faultcode_t
1376 segdev_faultpage(
1377 struct hat *hat, /* the hat */
1378 struct seg *seg, /* seg_dev of interest */
1379 caddr_t addr, /* address in as */
1380 struct vpage *vpage, /* pointer to vpage for seg, addr */
1381 enum fault_type type, /* type of fault */
1382 enum seg_rw rw, /* type of access at fault */
1383 devmap_handle_t *dhp) /* devmap handle if any for this page */
1385 struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
1386 uint_t prot;
1387 pfn_t pfnum = PFN_INVALID;
1388 uoff_t offset;
1389 uint_t hat_flags;
1390 dev_info_t *dip;
1392 DEBUGF(8, (CE_CONT, "segdev_faultpage: dhp %p seg %p addr %p \n",
1393 (void *)dhp, (void *)seg, (void *)addr));
1396 * Initialize protection value for this page.
1397 * If we have per page protection values check it now.
1399 if (sdp->pageprot) {
1400 uint_t protchk;
1402 switch (rw) {
1403 case S_READ:
1404 protchk = PROT_READ;
1405 break;
1406 case S_WRITE:
1407 protchk = PROT_WRITE;
1408 break;
1409 case S_EXEC:
1410 protchk = PROT_EXEC;
1411 break;
1412 case S_OTHER:
1413 default:
1414 protchk = PROT_READ | PROT_WRITE | PROT_EXEC;
1415 break;
1418 prot = VPP_PROT(vpage);
1419 if ((prot & protchk) == 0)
1420 return (FC_PROT); /* illegal access type */
1421 } else {
1422 prot = sdp->prot;
1423 /* caller has already done segment level protection check */
1426 if (type == F_SOFTLOCK) {
1427 mutex_enter(&freemem_lock);
1428 sdp->softlockcnt++;
1429 mutex_exit(&freemem_lock);
1432 hat_flags = ((type == F_SOFTLOCK) ? HAT_LOAD_LOCK : HAT_LOAD);
1433 offset = sdp->offset + (uoff_t)(addr - seg->s_base);
1435 * In the devmap framework, sdp->mapfunc is set to NULL. we can get
1436 * pfnum from dhp->dh_pfn (at beginning of segment) and offset from
1437 * seg->s_base.
1439 if (dhp == NULL) {
1440 /* If segment has devmap_data, then dhp should be non-NULL */
1441 ASSERT(sdp->devmap_data == NULL);
1442 pfnum = (pfn_t)cdev_mmap(sdp->mapfunc, sdp->vp->v_rdev,
1443 (off_t)offset, prot);
1444 prot |= sdp->hat_attr;
1445 } else {
1446 ulong_t off;
1447 struct ddi_umem_cookie *cp;
1448 struct devmap_pmem_cookie *pcp;
1450 /* ensure the dhp passed in contains addr. */
1451 ASSERT(dhp == devmap_find_handle(
1452 (devmap_handle_t *)sdp->devmap_data, addr));
1454 off = addr - dhp->dh_uvaddr;
1457 * This routine assumes that the caller makes sure that the
1458 * fields in dhp used below are unchanged due to remap during
1459 * this call. Caller does HOLD_DHP_LOCK if neeed
1461 cp = dhp->dh_cookie;
1462 if (dhp->dh_flags & DEVMAP_MAPPING_INVALID) {
1463 pfnum = PFN_INVALID;
1464 } else if (cookie_is_devmem(cp)) {
1465 pfnum = dhp->dh_pfn + btop(off);
1466 } else if (cookie_is_pmem(cp)) {
1467 pcp = (struct devmap_pmem_cookie *)dhp->dh_pcookie;
1468 ASSERT((dhp->dh_roff & PAGEOFFSET) == 0 &&
1469 dhp->dh_roff < ptob(pcp->dp_npages));
1470 pfnum = page_pptonum(
1471 pcp->dp_pparray[btop(off + dhp->dh_roff)]);
1472 } else {
1473 ASSERT(dhp->dh_roff < cp->size);
1474 ASSERT(dhp->dh_cvaddr >= cp->cvaddr &&
1475 dhp->dh_cvaddr < (cp->cvaddr + cp->size));
1476 ASSERT((dhp->dh_cvaddr + off) <=
1477 (cp->cvaddr + cp->size));
1478 ASSERT((dhp->dh_cvaddr + off + PAGESIZE) <=
1479 (cp->cvaddr + cp->size));
1481 switch (cp->type) {
1482 case UMEM_LOCKED :
1483 if (cp->pparray != NULL) {
1484 ASSERT((dhp->dh_roff &
1485 PAGEOFFSET) == 0);
1486 pfnum = page_pptonum(
1487 cp->pparray[btop(off +
1488 dhp->dh_roff)]);
1489 } else {
1490 pfnum = hat_getpfnum(
1491 ((proc_t *)cp->procp)->p_as->a_hat,
1492 cp->cvaddr + off);
1494 break;
1495 case UMEM_TRASH :
1496 pfnum = page_pptonum(trashpp);
1498 * We should set hat_flags to HAT_NOFAULT also
1499 * However, not all hat layers implement this
1501 break;
1502 case KMEM_PAGEABLE:
1503 case KMEM_NON_PAGEABLE:
1504 pfnum = hat_getpfnum(kas.a_hat,
1505 dhp->dh_cvaddr + off);
1506 break;
1507 default :
1508 pfnum = PFN_INVALID;
1509 break;
1512 prot |= dhp->dh_hat_attr;
1514 if (pfnum == PFN_INVALID) {
1515 return (FC_MAKE_ERR(EFAULT));
1517 /* prot should already be OR'ed in with hat_attributes if needed */
1519 DEBUGF(9, (CE_CONT, "segdev_faultpage: pfnum %lx memory %x "
1520 "prot %x flags %x\n", pfnum, pf_is_memory(pfnum), prot, hat_flags));
1522 if (pf_is_memory(pfnum) || (dhp != NULL)) {
1524 * It's not _really_ required here to pass sdp->hat_flags
1525 * to hat_devload even though we do it.
1526 * This is because hat figures it out DEVMEM mappings
1527 * are non-consistent, anyway.
1529 hat_devload(hat, addr, PAGESIZE, pfnum,
1530 prot, hat_flags | sdp->hat_flags);
1531 return (0);
1535 * Fall through to the case where devmap is not used and need to call
1536 * up the device tree to set up the mapping
1539 dip = VTOS(VTOCVP(sdp->vp))->s_dip;
1540 ASSERT(dip);
1543 * When calling ddi_map_fault, we do not OR in sdp->hat_attr
1544 * This is because this calls drivers which may not expect
1545 * prot to have any other values than PROT_ALL
1546 * The root nexus driver has a hack to peek into the segment
1547 * structure and then OR in sdp->hat_attr.
1548 * XX In case the bus_ops interfaces are ever revisited
1549 * we need to fix this. prot should include other hat attributes
1551 if (ddi_map_fault(dip, hat, seg, addr, NULL, pfnum, prot & PROT_ALL,
1552 (uint_t)(type == F_SOFTLOCK)) != DDI_SUCCESS) {
1553 return (FC_MAKE_ERR(EFAULT));
1555 return (0);
1558 static faultcode_t
1559 segdev_fault(
1560 struct hat *hat, /* the hat */
1561 struct seg *seg, /* the seg_dev of interest */
1562 caddr_t addr, /* the address of the fault */
1563 size_t len, /* the length of the range */
1564 enum fault_type type, /* type of fault */
1565 enum seg_rw rw) /* type of access at fault */
1567 struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
1568 devmap_handle_t *dhp_head = (devmap_handle_t *)sdp->devmap_data;
1569 devmap_handle_t *dhp;
1570 struct devmap_softlock *slock = NULL;
1571 ulong_t slpage = 0;
1572 ulong_t off;
1573 caddr_t maddr = addr;
1574 int err;
1575 int err_is_faultcode = 0;
1577 DEBUGF(7, (CE_CONT, "segdev_fault: dhp_head %p seg %p "
1578 "addr %p len %lx type %x\n",
1579 (void *)dhp_head, (void *)seg, (void *)addr, len, type));
1581 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
1583 /* Handle non-devmap case */
1584 if (dhp_head == NULL)
1585 return (segdev_faultpages(hat, seg, addr, len, type, rw, NULL));
1587 /* Find devmap handle */
1588 if ((dhp = devmap_find_handle(dhp_head, addr)) == NULL)
1589 return (FC_NOMAP);
1592 * The seg_dev driver does not implement copy-on-write,
1593 * and always loads translations with maximal allowed permissions
1594 * but we got an fault trying to access the device.
1595 * Servicing the fault is not going to result in any better result
1596 * RFE: If we want devmap_access callbacks to be involved in F_PROT
1597 * faults, then the code below is written for that
1598 * Pending resolution of the following:
1599 * - determine if the F_INVAL/F_SOFTLOCK syncing
1600 * is needed for F_PROT also or not. The code below assumes it does
1601 * - If driver sees F_PROT and calls devmap_load with same type,
1602 * then segdev_faultpages will fail with FC_PROT anyway, need to
1603 * change that so calls from devmap_load to segdev_faultpages for
1604 * F_PROT type are retagged to F_INVAL.
1605 * RFE: Today we dont have drivers that use devmap and want to handle
1606 * F_PROT calls. The code in segdev_fault* is written to allow
1607 * this case but is not tested. A driver that needs this capability
1608 * should be able to remove the short-circuit case; resolve the
1609 * above issues and "should" work.
1611 if (type == F_PROT) {
1612 return (FC_PROT);
1616 * Loop through dhp list calling devmap_access or segdev_faultpages for
1617 * each devmap handle.
1618 * drivers which implement devmap_access can interpose on faults and do
1619 * device-appropriate special actions before calling devmap_load.
1623 * Unfortunately, this simple loop has turned out to expose a variety
1624 * of complex problems which results in the following convoluted code.
1626 * First, a desire to handle a serialization of F_SOFTLOCK calls
1627 * to the driver within the framework.
1628 * This results in a dh_softlock structure that is on a per device
1629 * (or device instance) basis and serializes devmap_access calls.
1630 * Ideally we would need to do this for underlying
1631 * memory/device regions that are being faulted on
1632 * but that is hard to identify and with REMAP, harder
1633 * Second, a desire to serialize F_INVAL(and F_PROT) calls w.r.t.
1634 * to F_SOFTLOCK calls to the driver.
1635 * These serializations are to simplify the driver programmer model.
1636 * To support these two features, the code first goes through the
1637 * devmap handles and counts the pages (slpage) that are covered
1638 * by devmap_access callbacks.
1639 * This part ends with a devmap_softlock_enter call
1640 * which allows only one F_SOFTLOCK active on a device instance,
1641 * but multiple F_INVAL/F_PROTs can be active except when a
1642 * F_SOFTLOCK is active
1644 * Next, we dont short-circuit the fault code upfront to call
1645 * segdev_softunlock for F_SOFTUNLOCK, because we must use
1646 * the same length when we softlock and softunlock.
1648 * -Hat layers may not support softunlocking lengths less than the
1649 * original length when there is large page support.
1650 * -kpmem locking is dependent on keeping the lengths same.
1651 * -if drivers handled F_SOFTLOCK, they probably also expect to
1652 * see an F_SOFTUNLOCK of the same length
1653 * Hence, if extending lengths during softlock,
1654 * softunlock has to make the same adjustments and goes through
1655 * the same loop calling segdev_faultpages/segdev_softunlock
1656 * But some of the synchronization and error handling is different
1659 if (type != F_SOFTUNLOCK) {
1660 devmap_handle_t *dhpp = dhp;
1661 size_t slen = len;
1664 * Calculate count of pages that are :
1665 * a) within the (potentially extended) fault region
1666 * b) AND covered by devmap handle with devmap_access
1668 off = (ulong_t)(addr - dhpp->dh_uvaddr);
1669 while (slen != 0) {
1670 size_t mlen;
1673 * Softlocking on a region that allows remap is
1674 * unsupported due to unresolved locking issues
1675 * XXX: unclear what these are?
1676 * One potential is that if there is a pending
1677 * softlock, then a remap should not be allowed
1678 * until the unlock is done. This is easily
1679 * fixed by returning error in devmap*remap on
1680 * checking the dh->dh_softlock->softlocked value
1682 if ((type == F_SOFTLOCK) &&
1683 (dhpp->dh_flags & DEVMAP_ALLOW_REMAP)) {
1684 return (FC_NOSUPPORT);
1687 mlen = MIN(slen, (dhpp->dh_len - off));
1688 if (dhpp->dh_callbackops.devmap_access) {
1689 size_t llen;
1690 caddr_t laddr;
1692 * use extended length for large page mappings
1694 HOLD_DHP_LOCK(dhpp);
1695 if ((sdp->pageprot == 0) &&
1696 (dhpp->dh_flags & DEVMAP_FLAG_LARGE)) {
1697 devmap_get_large_pgsize(dhpp,
1698 mlen, maddr, &llen, &laddr);
1699 } else {
1700 llen = mlen;
1702 RELE_DHP_LOCK(dhpp);
1704 slpage += btopr(llen);
1705 slock = dhpp->dh_softlock;
1707 maddr += mlen;
1708 ASSERT(slen >= mlen);
1709 slen -= mlen;
1710 dhpp = dhpp->dh_next;
1711 off = 0;
1714 * synchonize with other faulting threads and wait till safe
1715 * devmap_softlock_enter might return due to signal in cv_wait
1717 * devmap_softlock_enter has to be called outside of while loop
1718 * to prevent a deadlock if len spans over multiple dhps.
1719 * dh_softlock is based on device instance and if multiple dhps
1720 * use the same device instance, the second dhp's LOCK call
1721 * will hang waiting on the first to complete.
1722 * devmap_setup verifies that slocks in a dhp_chain are same.
1723 * RFE: this deadlock only hold true for F_SOFTLOCK. For
1724 * F_INVAL/F_PROT, since we now allow multiple in parallel,
1725 * we could have done the softlock_enter inside the loop
1726 * and supported multi-dhp mappings with dissimilar devices
1728 if (err = devmap_softlock_enter(slock, slpage, type))
1729 return (FC_MAKE_ERR(err));
1732 /* reset 'maddr' to the start addr of the range of fault. */
1733 maddr = addr;
1735 /* calculate the offset corresponds to 'addr' in the first dhp. */
1736 off = (ulong_t)(addr - dhp->dh_uvaddr);
1739 * The fault length may span over multiple dhps.
1740 * Loop until the total length is satisfied.
1742 while (len != 0) {
1743 size_t llen;
1744 size_t mlen;
1745 caddr_t laddr;
1748 * mlen is the smaller of 'len' and the length
1749 * from addr to the end of mapping defined by dhp.
1751 mlen = MIN(len, (dhp->dh_len - off));
1753 HOLD_DHP_LOCK(dhp);
1755 * Pass the extended length and address to devmap_access
1756 * if large pagesize is used for loading address translations.
1758 if ((sdp->pageprot == 0) &&
1759 (dhp->dh_flags & DEVMAP_FLAG_LARGE)) {
1760 devmap_get_large_pgsize(dhp, mlen, maddr,
1761 &llen, &laddr);
1762 ASSERT(maddr == addr || laddr == maddr);
1763 } else {
1764 llen = mlen;
1765 laddr = maddr;
1768 if (dhp->dh_callbackops.devmap_access != NULL) {
1769 offset_t aoff;
1771 aoff = sdp->offset + (offset_t)(laddr - seg->s_base);
1774 * call driver's devmap_access entry point which will
1775 * call devmap_load/contextmgmt to load the translations
1777 * We drop the dhp_lock before calling access so
1778 * drivers can call devmap_*_remap within access
1780 RELE_DHP_LOCK(dhp);
1782 err = (*dhp->dh_callbackops.devmap_access)(
1783 dhp, (void *)dhp->dh_pvtp, aoff, llen, type, rw);
1784 } else {
1786 * If no devmap_access entry point, then load mappings
1787 * hold dhp_lock across faultpages if REMAP
1789 err = segdev_faultpages(hat, seg, laddr, llen,
1790 type, rw, dhp);
1791 err_is_faultcode = 1;
1792 RELE_DHP_LOCK(dhp);
1795 if (err) {
1796 if ((type == F_SOFTLOCK) && (maddr > addr)) {
1798 * If not first dhp, use
1799 * segdev_fault(F_SOFTUNLOCK) for prior dhps
1800 * While this is recursion, it is incorrect to
1801 * call just segdev_softunlock
1802 * if we are using either large pages
1803 * or devmap_access. It will be more right
1804 * to go through the same loop as above
1805 * rather than call segdev_softunlock directly
1806 * It will use the right lenghths as well as
1807 * call into the driver devmap_access routines.
1809 size_t done = (size_t)(maddr - addr);
1810 (void) segdev_fault(hat, seg, addr, done,
1811 F_SOFTUNLOCK, S_OTHER);
1813 * reduce slpage by number of pages
1814 * released by segdev_softunlock
1816 ASSERT(slpage >= btopr(done));
1817 devmap_softlock_exit(slock,
1818 slpage - btopr(done), type);
1819 } else {
1820 devmap_softlock_exit(slock, slpage, type);
1825 * Segdev_faultpages() already returns a faultcode,
1826 * hence, result from segdev_faultpages() should be
1827 * returned directly.
1829 if (err_is_faultcode)
1830 return (err);
1831 return (FC_MAKE_ERR(err));
1834 maddr += mlen;
1835 ASSERT(len >= mlen);
1836 len -= mlen;
1837 dhp = dhp->dh_next;
1838 off = 0;
1840 ASSERT(!dhp || len == 0 || maddr == dhp->dh_uvaddr);
1843 * release the softlock count at end of fault
1844 * For F_SOFTLOCk this is done in the later F_SOFTUNLOCK
1846 if ((type == F_INVAL) || (type == F_PROT))
1847 devmap_softlock_exit(slock, slpage, type);
1848 return (0);
1852 * segdev_faultpages
1854 * Used to fault in seg_dev segment pages. Called by segdev_fault or devmap_load
1855 * This routine assumes that the callers makes sure that the fields
1856 * in dhp used below are not changed due to remap during this call.
1857 * Caller does HOLD_DHP_LOCK if neeed
1858 * This routine returns a faultcode_t as a return value for segdev_fault.
1860 static faultcode_t
1861 segdev_faultpages(
1862 struct hat *hat, /* the hat */
1863 struct seg *seg, /* the seg_dev of interest */
1864 caddr_t addr, /* the address of the fault */
1865 size_t len, /* the length of the range */
1866 enum fault_type type, /* type of fault */
1867 enum seg_rw rw, /* type of access at fault */
1868 devmap_handle_t *dhp) /* devmap handle */
1870 register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
1871 register caddr_t a;
1872 struct vpage *vpage;
1873 struct ddi_umem_cookie *kpmem_cookie = NULL;
1874 int err;
1876 DEBUGF(5, (CE_CONT, "segdev_faultpages: "
1877 "dhp %p seg %p addr %p len %lx\n",
1878 (void *)dhp, (void *)seg, (void *)addr, len));
1881 * The seg_dev driver does not implement copy-on-write,
1882 * and always loads translations with maximal allowed permissions
1883 * but we got an fault trying to access the device.
1884 * Servicing the fault is not going to result in any better result
1885 * XXX: If we want to allow devmap_access to handle F_PROT calls,
1886 * This code should be removed and let the normal fault handling
1887 * take care of finding the error
1889 if (type == F_PROT) {
1890 return (FC_PROT);
1893 if (type == F_SOFTUNLOCK) {
1894 segdev_softunlock(hat, seg, addr, len, rw);
1895 return (0);
1899 * For kernel pageable memory, fault/lock segkp pages
1900 * We hold this until the completion of this
1901 * fault (INVAL/PROT) or till unlock (SOFTLOCK).
1903 if ((dhp != NULL) && dhp_is_kpmem(dhp)) {
1904 kpmem_cookie = (struct ddi_umem_cookie *)dhp->dh_cookie;
1905 if (err = acquire_kpmem_lock(kpmem_cookie, btopr(len)))
1906 return (err);
1910 * If we have the same protections for the entire segment,
1911 * insure that the access being attempted is legitimate.
1913 rw_enter(&sdp->lock, RW_READER);
1914 if (sdp->pageprot == 0) {
1915 uint_t protchk;
1917 switch (rw) {
1918 case S_READ:
1919 protchk = PROT_READ;
1920 break;
1921 case S_WRITE:
1922 protchk = PROT_WRITE;
1923 break;
1924 case S_EXEC:
1925 protchk = PROT_EXEC;
1926 break;
1927 case S_OTHER:
1928 default:
1929 protchk = PROT_READ | PROT_WRITE | PROT_EXEC;
1930 break;
1933 if ((sdp->prot & protchk) == 0) {
1934 rw_exit(&sdp->lock);
1935 /* undo kpmem locking */
1936 if (kpmem_cookie != NULL) {
1937 release_kpmem_lock(kpmem_cookie, btopr(len));
1939 return (FC_PROT); /* illegal access type */
1944 * we do a single hat_devload for the range if
1945 * - devmap framework (dhp is not NULL),
1946 * - pageprot == 0, i.e., no per-page protection set and
1947 * - is device pages, irrespective of whether we are using large pages
1949 if ((sdp->pageprot == 0) && (dhp != NULL) && dhp_is_devmem(dhp)) {
1950 pfn_t pfnum;
1951 uint_t hat_flags;
1953 if (dhp->dh_flags & DEVMAP_MAPPING_INVALID) {
1954 rw_exit(&sdp->lock);
1955 return (FC_NOMAP);
1958 if (type == F_SOFTLOCK) {
1959 mutex_enter(&freemem_lock);
1960 sdp->softlockcnt += btopr(len);
1961 mutex_exit(&freemem_lock);
1964 hat_flags = ((type == F_SOFTLOCK) ? HAT_LOAD_LOCK : HAT_LOAD);
1965 pfnum = dhp->dh_pfn + btop((uintptr_t)(addr - dhp->dh_uvaddr));
1966 ASSERT(!pf_is_memory(pfnum));
1968 hat_devload(hat, addr, len, pfnum, sdp->prot | dhp->dh_hat_attr,
1969 hat_flags | sdp->hat_flags);
1970 rw_exit(&sdp->lock);
1971 return (0);
1974 /* Handle cases where we have to loop through fault handling per-page */
1976 if (sdp->vpage == NULL)
1977 vpage = NULL;
1978 else
1979 vpage = &sdp->vpage[seg_page(seg, addr)];
1981 /* loop over the address range handling each fault */
1982 for (a = addr; a < addr + len; a += PAGESIZE) {
1983 if (err = segdev_faultpage(hat, seg, a, vpage, type, rw, dhp)) {
1984 break;
1986 if (vpage != NULL)
1987 vpage++;
1989 rw_exit(&sdp->lock);
1990 if (err && (type == F_SOFTLOCK)) { /* error handling for F_SOFTLOCK */
1991 size_t done = (size_t)(a - addr); /* pages fault successfully */
1992 if (done > 0) {
1993 /* use softunlock for those pages */
1994 segdev_softunlock(hat, seg, addr, done, S_OTHER);
1996 if (kpmem_cookie != NULL) {
1997 /* release kpmem lock for rest of pages */
1998 ASSERT(len >= done);
1999 release_kpmem_lock(kpmem_cookie, btopr(len - done));
2001 } else if ((kpmem_cookie != NULL) && (type != F_SOFTLOCK)) {
2002 /* for non-SOFTLOCK cases, release kpmem */
2003 release_kpmem_lock(kpmem_cookie, btopr(len));
2005 return (err);
2009 * Asynchronous page fault. We simply do nothing since this
2010 * entry point is not supposed to load up the translation.
2012 /*ARGSUSED*/
2013 static faultcode_t
2014 segdev_faulta(struct seg *seg, caddr_t addr)
2016 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
2018 return (0);
2021 static int
2022 segdev_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
2024 register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
2025 register devmap_handle_t *dhp;
2026 register struct vpage *vp, *evp;
2027 devmap_handle_t *dhp_head = (devmap_handle_t *)sdp->devmap_data;
2028 ulong_t off;
2029 size_t mlen, sz;
2031 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
2033 if ((sz = sdp->softlockcnt) > 0 && dhp_head != NULL) {
2035 * Fail the setprot if pages are SOFTLOCKed through this
2036 * mapping.
2037 * Softlockcnt is protected from change by the as read lock.
2039 DEBUGF(1, (CE_CONT, "segdev_setprot: softlockcnt %ld\n", sz));
2040 return (EAGAIN);
2043 if (dhp_head != NULL) {
2044 if ((dhp = devmap_find_handle(dhp_head, addr)) == NULL)
2045 return (EINVAL);
2048 * check if violate maxprot.
2050 off = (ulong_t)(addr - dhp->dh_uvaddr);
2051 mlen = len;
2052 while (dhp) {
2053 if ((dhp->dh_maxprot & prot) != prot)
2054 return (EACCES); /* violated maxprot */
2056 if (mlen > (dhp->dh_len - off)) {
2057 mlen -= dhp->dh_len - off;
2058 dhp = dhp->dh_next;
2059 off = 0;
2060 } else
2061 break;
2063 } else {
2064 if ((sdp->maxprot & prot) != prot)
2065 return (EACCES);
2068 rw_enter(&sdp->lock, RW_WRITER);
2069 if (addr == seg->s_base && len == seg->s_size && sdp->pageprot == 0) {
2070 if (sdp->prot == prot) {
2071 rw_exit(&sdp->lock);
2072 return (0); /* all done */
2074 sdp->prot = (uchar_t)prot;
2075 } else {
2076 sdp->pageprot = 1;
2077 if (sdp->vpage == NULL) {
2079 * First time through setting per page permissions,
2080 * initialize all the vpage structures to prot
2082 sdp->vpage = kmem_zalloc(vpgtob(seg_pages(seg)),
2083 KM_SLEEP);
2084 evp = &sdp->vpage[seg_pages(seg)];
2085 for (vp = sdp->vpage; vp < evp; vp++)
2086 VPP_SETPROT(vp, sdp->prot);
2089 * Now go change the needed vpages protections.
2091 evp = &sdp->vpage[seg_page(seg, addr + len)];
2092 for (vp = &sdp->vpage[seg_page(seg, addr)]; vp < evp; vp++)
2093 VPP_SETPROT(vp, prot);
2095 rw_exit(&sdp->lock);
2097 if (dhp_head != NULL) {
2098 devmap_handle_t *tdhp;
2100 * If large page size was used in hat_devload(),
2101 * the same page size must be used in hat_unload().
2103 dhp = tdhp = devmap_find_handle(dhp_head, addr);
2104 while (tdhp != NULL) {
2105 if (tdhp->dh_flags & DEVMAP_FLAG_LARGE) {
2106 break;
2108 tdhp = tdhp->dh_next;
2110 if (tdhp) {
2111 size_t slen = len;
2112 size_t mlen;
2113 size_t soff;
2115 soff = (ulong_t)(addr - dhp->dh_uvaddr);
2116 while (slen != 0) {
2117 mlen = MIN(slen, (dhp->dh_len - soff));
2118 hat_unload(seg->s_as->a_hat, dhp->dh_uvaddr,
2119 dhp->dh_len, HAT_UNLOAD);
2120 dhp = dhp->dh_next;
2121 ASSERT(slen >= mlen);
2122 slen -= mlen;
2123 soff = 0;
2125 return (0);
2129 if ((prot & ~PROT_USER) == PROT_NONE) {
2130 hat_unload(seg->s_as->a_hat, addr, len, HAT_UNLOAD);
2131 } else {
2133 * RFE: the segment should keep track of all attributes
2134 * allowing us to remove the deprecated hat_chgprot
2135 * and use hat_chgattr.
2137 hat_chgprot(seg->s_as->a_hat, addr, len, prot);
2140 return (0);
2143 static int
2144 segdev_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
2146 struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
2147 struct vpage *vp, *evp;
2149 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
2152 * If segment protection can be used, simply check against them
2154 rw_enter(&sdp->lock, RW_READER);
2155 if (sdp->pageprot == 0) {
2156 register int err;
2158 err = ((sdp->prot & prot) != prot) ? EACCES : 0;
2159 rw_exit(&sdp->lock);
2160 return (err);
2164 * Have to check down to the vpage level
2166 evp = &sdp->vpage[seg_page(seg, addr + len)];
2167 for (vp = &sdp->vpage[seg_page(seg, addr)]; vp < evp; vp++) {
2168 if ((VPP_PROT(vp) & prot) != prot) {
2169 rw_exit(&sdp->lock);
2170 return (EACCES);
2173 rw_exit(&sdp->lock);
2174 return (0);
2177 static int
2178 segdev_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv)
2180 struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
2181 size_t pgno;
2183 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
2185 pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1;
2186 if (pgno != 0) {
2187 rw_enter(&sdp->lock, RW_READER);
2188 if (sdp->pageprot == 0) {
2189 do {
2190 protv[--pgno] = sdp->prot;
2191 } while (pgno != 0);
2192 } else {
2193 size_t pgoff = seg_page(seg, addr);
2195 do {
2196 pgno--;
2197 protv[pgno] =
2198 VPP_PROT(&sdp->vpage[pgno + pgoff]);
2199 } while (pgno != 0);
2201 rw_exit(&sdp->lock);
2203 return (0);
2206 static uoff_t
2207 segdev_getoffset(register struct seg *seg, caddr_t addr)
2209 register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
2211 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
2213 return ((uoff_t)sdp->offset + (addr - seg->s_base));
2216 /*ARGSUSED*/
2217 static int
2218 segdev_gettype(register struct seg *seg, caddr_t addr)
2220 register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
2222 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
2224 return (sdp->type);
2228 /*ARGSUSED*/
2229 static int
2230 segdev_getvp(register struct seg *seg, caddr_t addr, struct vnode **vpp)
2232 register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
2234 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
2237 * Note that this vp is the common_vp of the device, where the
2238 * pages are hung ..
2240 *vpp = VTOCVP(sdp->vp);
2242 return (0);
2245 static void
2246 segdev_badop(void)
2248 panic("segdev_badop");
2249 /*NOTREACHED*/
2253 * segdev pages are not in the cache, and thus can't really be controlled.
2254 * Hence, syncs are simply always successful.
2256 /*ARGSUSED*/
2257 static int
2258 segdev_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags)
2260 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
2262 return (0);
2266 * segdev pages are always "in core".
2268 /*ARGSUSED*/
2269 static size_t
2270 segdev_incore(struct seg *seg, caddr_t addr, size_t len, char *vec)
2272 size_t v = 0;
2274 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
2276 for (len = (len + PAGEOFFSET) & PAGEMASK; len; len -= PAGESIZE,
2277 v += PAGESIZE)
2278 *vec++ = 1;
2279 return (v);
2283 * segdev pages are not in the cache, and thus can't really be controlled.
2284 * Hence, locks are simply always successful.
2286 /*ARGSUSED*/
2287 static int
2288 segdev_lockop(struct seg *seg, caddr_t addr,
2289 size_t len, int attr, int op, ulong_t *lockmap, size_t pos)
2291 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
2293 return (0);
2297 * segdev pages are not in the cache, and thus can't really be controlled.
2298 * Hence, advise is simply always successful.
2300 /*ARGSUSED*/
2301 static int
2302 segdev_advise(struct seg *seg, caddr_t addr, size_t len, uint_t behav)
2304 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
2306 return (0);
2310 * ddi_segmap_setup: Used by drivers who wish specify mapping attributes
2311 * for a segment. Called from a drivers segmap(9E)
2312 * routine.
2314 /*ARGSUSED*/
2316 ddi_segmap_setup(dev_t dev, off_t offset, struct as *as, caddr_t *addrp,
2317 off_t len, uint_t prot, uint_t maxprot, uint_t flags, cred_t *cred,
2318 ddi_device_acc_attr_t *accattrp, uint_t rnumber)
2320 struct segdev_crargs dev_a;
2321 int (*mapfunc)(dev_t dev, off_t off, int prot);
2322 uint_t hat_attr;
2323 pfn_t pfn;
2324 int error, i;
2326 if ((mapfunc = devopsp[getmajor(dev)]->devo_cb_ops->cb_mmap) == nodev)
2327 return (ENODEV);
2330 * Character devices that support the d_mmap
2331 * interface can only be mmap'ed shared.
2333 if ((flags & MAP_TYPE) != MAP_SHARED)
2334 return (EINVAL);
2337 * Check that this region is indeed mappable on this platform.
2338 * Use the mapping function.
2340 if (ddi_device_mapping_check(dev, accattrp, rnumber, &hat_attr) == -1)
2341 return (ENXIO);
2344 * Check to ensure that the entire range is
2345 * legal and we are not trying to map in
2346 * more than the device will let us.
2348 for (i = 0; i < len; i += PAGESIZE) {
2349 if (i == 0) {
2351 * Save the pfn at offset here. This pfn will be
2352 * used later to get user address.
2354 if ((pfn = (pfn_t)cdev_mmap(mapfunc, dev, offset,
2355 maxprot)) == PFN_INVALID)
2356 return (ENXIO);
2357 } else {
2358 if (cdev_mmap(mapfunc, dev, offset + i, maxprot) ==
2359 PFN_INVALID)
2360 return (ENXIO);
2364 as_rangelock(as);
2365 /* Pick an address w/o worrying about any vac alignment constraints. */
2366 error = choose_addr(as, addrp, len, ptob(pfn), ADDR_NOVACALIGN, flags);
2367 if (error != 0) {
2368 as_rangeunlock(as);
2369 return (error);
2372 dev_a.mapfunc = mapfunc;
2373 dev_a.dev = dev;
2374 dev_a.offset = (offset_t)offset;
2375 dev_a.type = flags & MAP_TYPE;
2376 dev_a.prot = (uchar_t)prot;
2377 dev_a.maxprot = (uchar_t)maxprot;
2378 dev_a.hat_attr = hat_attr;
2379 dev_a.hat_flags = 0;
2380 dev_a.devmap_data = NULL;
2382 error = as_map(as, *addrp, len, segdev_create, &dev_a);
2383 as_rangeunlock(as);
2384 return (error);
2388 /*ARGSUSED*/
2389 static int
2390 segdev_pagelock(struct seg *seg, caddr_t addr, size_t len,
2391 struct page ***ppp, enum lock_type type, enum seg_rw rw)
2393 return (ENOTSUP);
2397 * devmap_device: Used by devmap framework to establish mapping
2398 * called by devmap_seup(9F) during map setup time.
2400 /*ARGSUSED*/
2401 static int
2402 devmap_device(devmap_handle_t *dhp, struct as *as, caddr_t *addr,
2403 offset_t off, size_t len, uint_t flags)
2405 devmap_handle_t *rdhp, *maxdhp;
2406 struct segdev_crargs dev_a;
2407 int err;
2408 uint_t maxprot = PROT_ALL;
2409 offset_t offset = 0;
2410 pfn_t pfn;
2411 struct devmap_pmem_cookie *pcp;
2413 DEBUGF(2, (CE_CONT, "devmap_device: dhp %p addr %p off %llx len %lx\n",
2414 (void *)dhp, (void *)addr, off, len));
2416 as_rangelock(as);
2417 if ((flags & MAP_FIXED) == 0) {
2418 offset_t aligned_off;
2420 rdhp = maxdhp = dhp;
2421 while (rdhp != NULL) {
2422 maxdhp = (maxdhp->dh_len > rdhp->dh_len) ?
2423 maxdhp : rdhp;
2424 rdhp = rdhp->dh_next;
2425 maxprot |= dhp->dh_maxprot;
2427 offset = maxdhp->dh_uoff - dhp->dh_uoff;
2430 * Use the dhp that has the
2431 * largest len to get user address.
2434 * If MAPPING_INVALID, cannot use dh_pfn/dh_cvaddr,
2435 * use 0 which is as good as any other.
2437 if (maxdhp->dh_flags & DEVMAP_MAPPING_INVALID) {
2438 aligned_off = 0;
2439 } else if (dhp_is_devmem(maxdhp)) {
2440 aligned_off = (offset_t)ptob(maxdhp->dh_pfn) - offset;
2441 } else if (dhp_is_pmem(maxdhp)) {
2442 pcp = (struct devmap_pmem_cookie *)maxdhp->dh_pcookie;
2443 pfn = page_pptonum(
2444 pcp->dp_pparray[btop(maxdhp->dh_roff)]);
2445 aligned_off = (offset_t)ptob(pfn) - offset;
2446 } else {
2447 aligned_off = (offset_t)(uintptr_t)maxdhp->dh_cvaddr -
2448 offset;
2452 * Pick an address aligned to dh_cookie.
2453 * for kernel memory/user memory, cookie is cvaddr.
2454 * for device memory, cookie is physical address.
2456 map_addr(addr, len, aligned_off, 1, flags);
2457 if (*addr == NULL) {
2458 as_rangeunlock(as);
2459 return (ENOMEM);
2461 } else {
2463 * User-specified address; blow away any previous mappings.
2465 (void) as_unmap(as, *addr, len);
2468 dev_a.mapfunc = NULL;
2469 dev_a.dev = dhp->dh_dev;
2470 dev_a.type = flags & MAP_TYPE;
2471 dev_a.offset = off;
2473 * sdp->maxprot has the least restrict protection of all dhps.
2475 dev_a.maxprot = maxprot;
2476 dev_a.prot = dhp->dh_prot;
2478 * devmap uses dhp->dh_hat_attr for hat.
2480 dev_a.hat_flags = 0;
2481 dev_a.hat_attr = 0;
2482 dev_a.devmap_data = (void *)dhp;
2484 err = as_map(as, *addr, len, segdev_create, &dev_a);
2485 as_rangeunlock(as);
2486 return (err);
2490 devmap_do_ctxmgt(devmap_cookie_t dhc, void *pvtp, offset_t off, size_t len,
2491 uint_t type, uint_t rw, int (*ctxmgt)(devmap_cookie_t, void *, offset_t,
2492 size_t, uint_t, uint_t))
2494 register devmap_handle_t *dhp = (devmap_handle_t *)dhc;
2495 struct devmap_ctx *devctx;
2496 int do_timeout = 0;
2497 int ret;
2500 DEBUGF(7, (CE_CONT, "devmap_do_ctxmgt: dhp %p off %llx len %lx\n",
2501 (void *)dhp, off, len));
2503 if (ctxmgt == NULL)
2504 return (FC_HWERR);
2506 devctx = dhp->dh_ctx;
2509 * If we are on an MP system with more than one cpu running
2510 * and if a thread on some CPU already has the context, wait
2511 * for it to finish if there is a hysteresis timeout.
2513 * We call cv_wait() instead of cv_wait_sig() because
2514 * it does not matter much if it returned due to a signal
2515 * or due to a cv_signal() or cv_broadcast(). In either event
2516 * we need to complete the mapping otherwise the processes
2517 * will die with a SEGV.
2519 if ((dhp->dh_timeout_length > 0) && (ncpus > 1)) {
2520 do_timeout = 1;
2521 mutex_enter(&devctx->lock);
2522 while (devctx->oncpu)
2523 cv_wait(&devctx->cv, &devctx->lock);
2524 devctx->oncpu = 1;
2525 mutex_exit(&devctx->lock);
2529 * Call the contextmgt callback so that the driver can handle
2530 * the fault.
2532 ret = (*ctxmgt)(dhp, dhp->dh_pvtp, off, len, type, rw);
2535 * If devmap_access() returned -1, then there was a hardware
2536 * error so we need to convert the return value to something
2537 * that trap() will understand. Otherwise, the return value
2538 * is already a fault code generated by devmap_unload()
2539 * or devmap_load().
2541 if (ret) {
2542 DEBUGF(1, (CE_CONT, "devmap_do_ctxmgt: ret %x dhp %p\n",
2543 ret, (void *)dhp));
2544 if (devctx->oncpu) {
2545 mutex_enter(&devctx->lock);
2546 devctx->oncpu = 0;
2547 cv_signal(&devctx->cv);
2548 mutex_exit(&devctx->lock);
2550 return (FC_HWERR);
2554 * Setup the timeout if we need to
2556 if (do_timeout) {
2557 mutex_enter(&devctx->lock);
2558 if (dhp->dh_timeout_length > 0) {
2559 devctx->timeout = timeout(devmap_ctxto,
2560 devctx, dhp->dh_timeout_length);
2561 } else {
2563 * We don't want to wait so set oncpu to
2564 * 0 and wake up anyone waiting.
2566 devctx->oncpu = 0;
2567 cv_signal(&devctx->cv);
2569 mutex_exit(&devctx->lock);
2572 return (DDI_SUCCESS);
2576 * end of mapping
2577 * poff fault_offset |
2578 * base | | |
2579 * | | | |
2580 * V V V V
2581 * +-----------+---------------+-------+---------+-------+
2582 * ^ ^ ^ ^
2583 * |<--- offset--->|<-len->| |
2584 * |<--- dh_len(size of mapping) --->|
2585 * |<-- pg -->|
2586 * -->|rlen|<--
2588 static ulong_t
2589 devmap_roundup(devmap_handle_t *dhp, ulong_t offset, size_t len,
2590 ulong_t *opfn, ulong_t *pagesize)
2592 register int level;
2593 ulong_t pg;
2594 ulong_t poff;
2595 ulong_t base;
2596 caddr_t uvaddr;
2597 long rlen;
2599 DEBUGF(2, (CE_CONT, "devmap_roundup: dhp %p off %lx len %lx\n",
2600 (void *)dhp, offset, len));
2603 * get the max. pagesize that is aligned within the range
2604 * <dh_pfn, dh_pfn+offset>.
2606 * The calculations below use physical address to ddetermine
2607 * the page size to use. The same calculations can use the
2608 * virtual address to determine the page size.
2610 base = (ulong_t)ptob(dhp->dh_pfn);
2611 for (level = dhp->dh_mmulevel; level >= 0; level--) {
2612 pg = page_get_pagesize(level);
2613 poff = ((base + offset) & ~(pg - 1));
2614 uvaddr = dhp->dh_uvaddr + (poff - base);
2615 if ((poff >= base) &&
2616 ((poff + pg) <= (base + dhp->dh_len)) &&
2617 VA_PA_ALIGNED((uintptr_t)uvaddr, poff, pg))
2618 break;
2621 DEBUGF(2, (CE_CONT, "devmap_roundup: base %lx poff %lx pfn %lx\n",
2622 base, poff, dhp->dh_pfn));
2624 ASSERT(VA_PA_ALIGNED((uintptr_t)uvaddr, poff, pg));
2625 ASSERT(level >= 0);
2627 *pagesize = pg;
2628 *opfn = dhp->dh_pfn + btop(poff - base);
2630 rlen = len + offset - (poff - base + pg);
2632 ASSERT(rlen < (long)len);
2634 DEBUGF(1, (CE_CONT, "devmap_roundup: dhp %p "
2635 "level %x rlen %lx psize %lx opfn %lx\n",
2636 (void *)dhp, level, rlen, *pagesize, *opfn));
2638 return ((ulong_t)((rlen > 0) ? rlen : 0));
2642 * find the dhp that contains addr.
2644 static devmap_handle_t *
2645 devmap_find_handle(devmap_handle_t *dhp_head, caddr_t addr)
2647 devmap_handle_t *dhp;
2649 dhp = dhp_head;
2650 while (dhp) {
2651 if (addr >= dhp->dh_uvaddr &&
2652 addr < (dhp->dh_uvaddr + dhp->dh_len))
2653 return (dhp);
2654 dhp = dhp->dh_next;
2657 return ((devmap_handle_t *)NULL);
2661 * devmap_unload:
2662 * Marks a segdev segment or pages if offset->offset+len
2663 * is not the entire segment as intercept and unloads the
2664 * pages in the range offset -> offset+len.
2667 devmap_unload(devmap_cookie_t dhc, offset_t offset, size_t len)
2669 register devmap_handle_t *dhp = (devmap_handle_t *)dhc;
2670 caddr_t addr;
2671 ulong_t size;
2672 ssize_t soff;
2674 DEBUGF(7, (CE_CONT, "devmap_unload: dhp %p offset %llx len %lx\n",
2675 (void *)dhp, offset, len));
2677 soff = (ssize_t)(offset - dhp->dh_uoff);
2678 soff = round_down_p2(soff, PAGESIZE);
2679 if (soff < 0 || soff >= dhp->dh_len)
2680 return (FC_MAKE_ERR(EINVAL));
2683 * Address and size must be page aligned. Len is set to the
2684 * number of bytes in the number of pages that are required to
2685 * support len. Offset is set to the byte offset of the first byte
2686 * of the page that contains offset.
2688 len = round_up_p2(len, PAGESIZE);
2691 * If len is == 0, then calculate the size by getting
2692 * the number of bytes from offset to the end of the segment.
2694 if (len == 0)
2695 size = dhp->dh_len - soff;
2696 else {
2697 size = len;
2698 if ((soff + size) > dhp->dh_len)
2699 return (FC_MAKE_ERR(EINVAL));
2703 * The address is offset bytes from the base address of
2704 * the dhp.
2706 addr = (caddr_t)(soff + dhp->dh_uvaddr);
2709 * If large page size was used in hat_devload(),
2710 * the same page size must be used in hat_unload().
2712 if (dhp->dh_flags & DEVMAP_FLAG_LARGE) {
2713 hat_unload(dhp->dh_seg->s_as->a_hat, dhp->dh_uvaddr,
2714 dhp->dh_len, HAT_UNLOAD|HAT_UNLOAD_OTHER);
2715 } else {
2716 hat_unload(dhp->dh_seg->s_as->a_hat, addr, size,
2717 HAT_UNLOAD|HAT_UNLOAD_OTHER);
2720 return (0);
2724 * calculates the optimal page size that will be used for hat_devload().
2726 static void
2727 devmap_get_large_pgsize(devmap_handle_t *dhp, size_t len, caddr_t addr,
2728 size_t *llen, caddr_t *laddr)
2730 ulong_t off;
2731 ulong_t pfn;
2732 ulong_t pgsize;
2733 uint_t first = 1;
2736 * RFE - Code only supports large page mappings for devmem
2737 * This code could be changed in future if we want to support
2738 * large page mappings for kernel exported memory.
2740 ASSERT(dhp_is_devmem(dhp));
2741 ASSERT(!(dhp->dh_flags & DEVMAP_MAPPING_INVALID));
2743 *llen = 0;
2744 off = (ulong_t)(addr - dhp->dh_uvaddr);
2745 while ((long)len > 0) {
2747 * get the optimal pfn to minimize address translations.
2748 * devmap_roundup() returns residue bytes for next round
2749 * calculations.
2751 len = devmap_roundup(dhp, off, len, &pfn, &pgsize);
2753 if (first) {
2754 *laddr = dhp->dh_uvaddr + ptob(pfn - dhp->dh_pfn);
2755 first = 0;
2758 *llen += pgsize;
2759 off = ptob(pfn - dhp->dh_pfn) + pgsize;
2761 /* Large page mapping len/addr cover more range than original fault */
2762 ASSERT(*llen >= len && *laddr <= addr);
2763 ASSERT((*laddr + *llen) >= (addr + len));
2767 * Initialize the devmap_softlock structure.
2769 static struct devmap_softlock *
2770 devmap_softlock_init(dev_t dev, ulong_t id)
2772 struct devmap_softlock *slock;
2773 struct devmap_softlock *tmp;
2775 tmp = kmem_zalloc(sizeof (struct devmap_softlock), KM_SLEEP);
2776 mutex_enter(&devmap_slock);
2778 for (slock = devmap_slist; slock != NULL; slock = slock->next)
2779 if ((slock->dev == dev) && (slock->id == id))
2780 break;
2782 if (slock == NULL) {
2783 slock = tmp;
2784 slock->dev = dev;
2785 slock->id = id;
2786 mutex_init(&slock->lock, NULL, MUTEX_DEFAULT, NULL);
2787 cv_init(&slock->cv, NULL, CV_DEFAULT, NULL);
2788 slock->next = devmap_slist;
2789 devmap_slist = slock;
2790 } else
2791 kmem_free(tmp, sizeof (struct devmap_softlock));
2793 mutex_enter(&slock->lock);
2794 slock->refcnt++;
2795 mutex_exit(&slock->lock);
2796 mutex_exit(&devmap_slock);
2798 return (slock);
2802 * Wake up processes that sleep on softlocked.
2803 * Free dh_softlock if refcnt is 0.
2805 static void
2806 devmap_softlock_rele(devmap_handle_t *dhp)
2808 struct devmap_softlock *slock = dhp->dh_softlock;
2809 struct devmap_softlock *tmp;
2810 struct devmap_softlock *parent;
2812 mutex_enter(&devmap_slock);
2813 mutex_enter(&slock->lock);
2815 ASSERT(slock->refcnt > 0);
2817 slock->refcnt--;
2820 * If no one is using the device, free up the slock data.
2822 if (slock->refcnt == 0) {
2823 slock->softlocked = 0;
2824 cv_signal(&slock->cv);
2826 if (devmap_slist == slock)
2827 devmap_slist = slock->next;
2828 else {
2829 parent = devmap_slist;
2830 for (tmp = devmap_slist->next; tmp != NULL;
2831 tmp = tmp->next) {
2832 if (tmp == slock) {
2833 parent->next = tmp->next;
2834 break;
2836 parent = tmp;
2839 mutex_exit(&slock->lock);
2840 mutex_destroy(&slock->lock);
2841 cv_destroy(&slock->cv);
2842 kmem_free(slock, sizeof (struct devmap_softlock));
2843 } else
2844 mutex_exit(&slock->lock);
2846 mutex_exit(&devmap_slock);
2850 * Wake up processes that sleep on dh_ctx->locked.
2851 * Free dh_ctx if refcnt is 0.
2853 static void
2854 devmap_ctx_rele(devmap_handle_t *dhp)
2856 struct devmap_ctx *devctx = dhp->dh_ctx;
2857 struct devmap_ctx *tmp;
2858 struct devmap_ctx *parent;
2859 timeout_id_t tid;
2861 mutex_enter(&devmapctx_lock);
2862 mutex_enter(&devctx->lock);
2864 ASSERT(devctx->refcnt > 0);
2866 devctx->refcnt--;
2869 * If no one is using the device, free up the devctx data.
2871 if (devctx->refcnt == 0) {
2873 * Untimeout any threads using this mapping as they are about
2874 * to go away.
2876 if (devctx->timeout != 0) {
2877 tid = devctx->timeout;
2878 mutex_exit(&devctx->lock);
2879 (void) untimeout(tid);
2880 mutex_enter(&devctx->lock);
2883 devctx->oncpu = 0;
2884 cv_signal(&devctx->cv);
2886 if (devmapctx_list == devctx)
2887 devmapctx_list = devctx->next;
2888 else {
2889 parent = devmapctx_list;
2890 for (tmp = devmapctx_list->next; tmp != NULL;
2891 tmp = tmp->next) {
2892 if (tmp == devctx) {
2893 parent->next = tmp->next;
2894 break;
2896 parent = tmp;
2899 mutex_exit(&devctx->lock);
2900 mutex_destroy(&devctx->lock);
2901 cv_destroy(&devctx->cv);
2902 kmem_free(devctx, sizeof (struct devmap_ctx));
2903 } else
2904 mutex_exit(&devctx->lock);
2906 mutex_exit(&devmapctx_lock);
2910 * devmap_load:
2911 * Marks a segdev segment or pages if offset->offset+len
2912 * is not the entire segment as nointercept and faults in
2913 * the pages in the range offset -> offset+len.
2916 devmap_load(devmap_cookie_t dhc, offset_t offset, size_t len, uint_t type,
2917 uint_t rw)
2919 devmap_handle_t *dhp = (devmap_handle_t *)dhc;
2920 struct as *asp = dhp->dh_seg->s_as;
2921 caddr_t addr;
2922 ulong_t size;
2923 ssize_t soff; /* offset from the beginning of the segment */
2924 int rc;
2926 DEBUGF(7, (CE_CONT, "devmap_load: dhp %p offset %llx len %lx\n",
2927 (void *)dhp, offset, len));
2930 * Hat layer only supports devload to process' context for which
2931 * the as lock is held. Verify here and return error if drivers
2932 * inadvertently call devmap_load on a wrong devmap handle.
2934 if ((asp != &kas) && !AS_LOCK_HELD(asp))
2935 return (FC_MAKE_ERR(EINVAL));
2937 soff = (ssize_t)(offset - dhp->dh_uoff);
2938 soff = round_down_p2(soff, PAGESIZE);
2939 if (soff < 0 || soff >= dhp->dh_len)
2940 return (FC_MAKE_ERR(EINVAL));
2943 * Address and size must be page aligned. Len is set to the
2944 * number of bytes in the number of pages that are required to
2945 * support len. Offset is set to the byte offset of the first byte
2946 * of the page that contains offset.
2948 len = round_up_p2(len, PAGESIZE);
2951 * If len == 0, then calculate the size by getting
2952 * the number of bytes from offset to the end of the segment.
2954 if (len == 0)
2955 size = dhp->dh_len - soff;
2956 else {
2957 size = len;
2958 if ((soff + size) > dhp->dh_len)
2959 return (FC_MAKE_ERR(EINVAL));
2963 * The address is offset bytes from the base address of
2964 * the segment.
2966 addr = (caddr_t)(soff + dhp->dh_uvaddr);
2968 HOLD_DHP_LOCK(dhp);
2969 rc = segdev_faultpages(asp->a_hat,
2970 dhp->dh_seg, addr, size, type, rw, dhp);
2971 RELE_DHP_LOCK(dhp);
2972 return (rc);
2976 devmap_setup(dev_t dev, offset_t off, struct as *as, caddr_t *addrp,
2977 size_t len, uint_t prot, uint_t maxprot, uint_t flags, struct cred *cred)
2979 register devmap_handle_t *dhp;
2980 int (*devmap)(dev_t, devmap_cookie_t, offset_t, size_t,
2981 size_t *, uint_t);
2982 int (*mmap)(dev_t, off_t, int);
2983 struct devmap_callback_ctl *callbackops;
2984 devmap_handle_t *dhp_head = NULL;
2985 devmap_handle_t *dhp_prev = NULL;
2986 devmap_handle_t *dhp_curr;
2987 caddr_t addr;
2988 int map_flag;
2989 int ret;
2990 ulong_t total_len;
2991 size_t map_len;
2992 size_t resid_len = len;
2993 offset_t map_off = off;
2994 struct devmap_softlock *slock = NULL;
2997 DEBUGF(3, (CE_CONT, "devmap_setup: off %llx len %lx\n",
2998 off, len));
3000 devmap = devopsp[getmajor(dev)]->devo_cb_ops->cb_devmap;
3001 mmap = devopsp[getmajor(dev)]->devo_cb_ops->cb_mmap;
3004 * driver must provide devmap(9E) entry point in cb_ops to use the
3005 * devmap framework.
3007 if (devmap == NULL || devmap == nulldev || devmap == nodev)
3008 return (EINVAL);
3011 * To protect from an inadvertent entry because the devmap entry point
3012 * is not NULL, return error if D_DEVMAP bit is not set in cb_flag and
3013 * mmap is NULL.
3015 map_flag = devopsp[getmajor(dev)]->devo_cb_ops->cb_flag;
3016 if ((map_flag & D_DEVMAP) == 0 && (mmap == NULL || mmap == nulldev))
3017 return (EINVAL);
3020 * devmap allows mmap(2) to map multiple registers.
3021 * one devmap_handle is created for each register mapped.
3023 for (total_len = 0; total_len < len; total_len += map_len) {
3024 dhp = kmem_zalloc(sizeof (devmap_handle_t), KM_SLEEP);
3026 if (dhp_prev != NULL)
3027 dhp_prev->dh_next = dhp;
3028 else
3029 dhp_head = dhp;
3030 dhp_prev = dhp;
3032 dhp->dh_prot = prot;
3033 dhp->dh_orig_maxprot = dhp->dh_maxprot = maxprot;
3034 dhp->dh_dev = dev;
3035 dhp->dh_timeout_length = CTX_TIMEOUT_VALUE;
3036 dhp->dh_uoff = map_off;
3039 * Get mapping specific info from
3040 * the driver, such as rnumber, roff, len, callbackops,
3041 * accattrp and, if the mapping is for kernel memory,
3042 * ddi_umem_cookie.
3044 if ((ret = cdev_devmap(dev, dhp, map_off,
3045 resid_len, &map_len, get_udatamodel())) != 0) {
3046 free_devmap_handle(dhp_head);
3047 return (ENXIO);
3050 if (map_len & PAGEOFFSET) {
3051 free_devmap_handle(dhp_head);
3052 return (EINVAL);
3055 callbackops = &dhp->dh_callbackops;
3057 if ((callbackops->devmap_access == NULL) ||
3058 (callbackops->devmap_access == nulldev) ||
3059 (callbackops->devmap_access == nodev)) {
3061 * Normally devmap does not support MAP_PRIVATE unless
3062 * the drivers provide a valid devmap_access routine.
3064 if ((flags & MAP_PRIVATE) != 0) {
3065 free_devmap_handle(dhp_head);
3066 return (EINVAL);
3068 } else {
3070 * Initialize dhp_softlock and dh_ctx if the drivers
3071 * provide devmap_access.
3073 dhp->dh_softlock = devmap_softlock_init(dev,
3074 (ulong_t)callbackops->devmap_access);
3075 dhp->dh_ctx = devmap_ctxinit(dev,
3076 (ulong_t)callbackops->devmap_access);
3079 * segdev_fault can only work when all
3080 * dh_softlock in a multi-dhp mapping
3081 * are same. see comments in segdev_fault
3082 * This code keeps track of the first
3083 * dh_softlock allocated in slock and
3084 * compares all later allocations and if
3085 * not similar, returns an error.
3087 if (slock == NULL)
3088 slock = dhp->dh_softlock;
3089 if (slock != dhp->dh_softlock) {
3090 free_devmap_handle(dhp_head);
3091 return (ENOTSUP);
3095 map_off += map_len;
3096 resid_len -= map_len;
3100 * get the user virtual address and establish the mapping between
3101 * uvaddr and device physical address.
3103 if ((ret = devmap_device(dhp_head, as, addrp, off, len, flags))
3104 != 0) {
3106 * free devmap handles if error during the mapping.
3108 free_devmap_handle(dhp_head);
3110 return (ret);
3114 * call the driver's devmap_map callback to do more after the mapping,
3115 * such as to allocate driver private data for context management.
3117 dhp = dhp_head;
3118 map_off = off;
3119 addr = *addrp;
3120 while (dhp != NULL) {
3121 callbackops = &dhp->dh_callbackops;
3122 dhp->dh_uvaddr = addr;
3123 dhp_curr = dhp;
3124 if (callbackops->devmap_map != NULL) {
3125 ret = (*callbackops->devmap_map)((devmap_cookie_t)dhp,
3126 dev, flags, map_off,
3127 dhp->dh_len, &dhp->dh_pvtp);
3128 if (ret != 0) {
3129 struct segdev_data *sdp;
3132 * call driver's devmap_unmap entry point
3133 * to free driver resources.
3135 dhp = dhp_head;
3136 map_off = off;
3137 while (dhp != dhp_curr) {
3138 callbackops = &dhp->dh_callbackops;
3139 if (callbackops->devmap_unmap != NULL) {
3140 (*callbackops->devmap_unmap)(
3141 dhp, dhp->dh_pvtp,
3142 map_off, dhp->dh_len,
3143 NULL, NULL, NULL, NULL);
3145 map_off += dhp->dh_len;
3146 dhp = dhp->dh_next;
3148 sdp = dhp_head->dh_seg->s_data;
3149 sdp->devmap_data = NULL;
3150 free_devmap_handle(dhp_head);
3151 return (ENXIO);
3154 map_off += dhp->dh_len;
3155 addr += dhp->dh_len;
3156 dhp = dhp->dh_next;
3159 return (0);
3163 ddi_devmap_segmap(dev_t dev, off_t off, ddi_as_handle_t as, caddr_t *addrp,
3164 off_t len, uint_t prot, uint_t maxprot, uint_t flags, struct cred *cred)
3166 return (devmap_setup(dev, (offset_t)off, (struct as *)as, addrp,
3167 (size_t)len, prot, maxprot, flags, cred));
3171 * Called from devmap_devmem_setup/remap to see if can use large pages for
3172 * this device mapping.
3173 * Also calculate the max. page size for this mapping.
3174 * this page size will be used in fault routine for
3175 * optimal page size calculations.
3177 static void
3178 devmap_devmem_large_page_setup(devmap_handle_t *dhp)
3180 ASSERT(dhp_is_devmem(dhp));
3181 dhp->dh_mmulevel = 0;
3184 * use large page size only if:
3185 * 1. device memory.
3186 * 2. mmu supports multiple page sizes,
3187 * 3. Driver did not disallow it
3188 * 4. dhp length is at least as big as the large pagesize
3189 * 5. the uvaddr and pfn are large pagesize aligned
3191 if (page_num_pagesizes() > 1 &&
3192 !(dhp->dh_flags & (DEVMAP_USE_PAGESIZE | DEVMAP_MAPPING_INVALID))) {
3193 ulong_t base;
3194 int level;
3196 base = (ulong_t)ptob(dhp->dh_pfn);
3197 for (level = 1; level < page_num_pagesizes(); level++) {
3198 size_t pgsize = page_get_pagesize(level);
3199 if ((dhp->dh_len < pgsize) ||
3200 (!VA_PA_PGSIZE_ALIGNED((uintptr_t)dhp->dh_uvaddr,
3201 base, pgsize))) {
3202 break;
3205 dhp->dh_mmulevel = level - 1;
3207 if (dhp->dh_mmulevel > 0) {
3208 dhp->dh_flags |= DEVMAP_FLAG_LARGE;
3209 } else {
3210 dhp->dh_flags &= ~DEVMAP_FLAG_LARGE;
3215 * Called by driver devmap routine to pass device specific info to
3216 * the framework. used for device memory mapping only.
3219 devmap_devmem_setup(devmap_cookie_t dhc, dev_info_t *dip,
3220 struct devmap_callback_ctl *callbackops, uint_t rnumber, offset_t roff,
3221 size_t len, uint_t maxprot, uint_t flags, ddi_device_acc_attr_t *accattrp)
3223 devmap_handle_t *dhp = (devmap_handle_t *)dhc;
3224 ddi_acc_handle_t handle;
3225 ddi_map_req_t mr;
3226 ddi_acc_hdl_t *hp;
3227 int err;
3229 DEBUGF(2, (CE_CONT, "devmap_devmem_setup: dhp %p offset %llx "
3230 "rnum %d len %lx\n", (void *)dhp, roff, rnumber, len));
3233 * First to check if this function has been called for this dhp.
3235 if (dhp->dh_flags & DEVMAP_SETUP_DONE)
3236 return (DDI_FAILURE);
3238 if ((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) != dhp->dh_prot)
3239 return (DDI_FAILURE);
3241 if (flags & DEVMAP_MAPPING_INVALID) {
3243 * Don't go up the tree to get pfn if the driver specifies
3244 * DEVMAP_MAPPING_INVALID in flags.
3246 * If DEVMAP_MAPPING_INVALID is specified, we have to grant
3247 * remap permission.
3249 if (!(flags & DEVMAP_ALLOW_REMAP)) {
3250 return (DDI_FAILURE);
3252 dhp->dh_pfn = PFN_INVALID;
3253 } else {
3254 handle = impl_acc_hdl_alloc(KM_SLEEP, NULL);
3255 if (handle == NULL)
3256 return (DDI_FAILURE);
3258 hp = impl_acc_hdl_get(handle);
3259 hp->ah_vers = VERS_ACCHDL;
3260 hp->ah_dip = dip;
3261 hp->ah_rnumber = rnumber;
3262 hp->ah_offset = roff;
3263 hp->ah_len = len;
3264 if (accattrp != NULL)
3265 hp->ah_acc = *accattrp;
3267 mr.map_op = DDI_MO_MAP_LOCKED;
3268 mr.map_type = DDI_MT_RNUMBER;
3269 mr.map_obj.rnumber = rnumber;
3270 mr.map_prot = maxprot & dhp->dh_orig_maxprot;
3271 mr.map_flags = DDI_MF_DEVICE_MAPPING;
3272 mr.map_handlep = hp;
3273 mr.map_vers = DDI_MAP_VERSION;
3276 * up the device tree to get pfn.
3277 * The rootnex_map_regspec() routine in nexus drivers has been
3278 * modified to return pfn if map_flags is DDI_MF_DEVICE_MAPPING.
3280 err = ddi_map(dip, &mr, roff, len, (caddr_t *)&dhp->dh_pfn);
3281 dhp->dh_hat_attr = hp->ah_hat_flags;
3282 impl_acc_hdl_free(handle);
3284 if (err)
3285 return (DDI_FAILURE);
3287 /* Should not be using devmem setup for memory pages */
3288 ASSERT(!pf_is_memory(dhp->dh_pfn));
3290 /* Only some of the flags bits are settable by the driver */
3291 dhp->dh_flags |= (flags & DEVMAP_SETUP_FLAGS);
3292 dhp->dh_len = ptob(btopr(len));
3294 dhp->dh_cookie = DEVMAP_DEVMEM_COOKIE;
3295 dhp->dh_roff = ptob(btop(roff));
3297 /* setup the dh_mmulevel and DEVMAP_FLAG_LARGE */
3298 devmap_devmem_large_page_setup(dhp);
3299 dhp->dh_maxprot = maxprot & dhp->dh_orig_maxprot;
3300 ASSERT((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) == dhp->dh_prot);
3303 if (callbackops != NULL) {
3304 bcopy(callbackops, &dhp->dh_callbackops,
3305 sizeof (struct devmap_callback_ctl));
3309 * Initialize dh_lock if we want to do remap.
3311 if (dhp->dh_flags & DEVMAP_ALLOW_REMAP) {
3312 mutex_init(&dhp->dh_lock, NULL, MUTEX_DEFAULT, NULL);
3313 dhp->dh_flags |= DEVMAP_LOCK_INITED;
3316 dhp->dh_flags |= DEVMAP_SETUP_DONE;
3318 return (DDI_SUCCESS);
3322 devmap_devmem_remap(devmap_cookie_t dhc, dev_info_t *dip,
3323 uint_t rnumber, offset_t roff, size_t len, uint_t maxprot,
3324 uint_t flags, ddi_device_acc_attr_t *accattrp)
3326 devmap_handle_t *dhp = (devmap_handle_t *)dhc;
3327 ddi_acc_handle_t handle;
3328 ddi_map_req_t mr;
3329 ddi_acc_hdl_t *hp;
3330 pfn_t pfn;
3331 uint_t hat_flags;
3332 int err;
3334 DEBUGF(2, (CE_CONT, "devmap_devmem_remap: dhp %p offset %llx "
3335 "rnum %d len %lx\n", (void *)dhp, roff, rnumber, len));
3338 * Return failure if setup has not been done or no remap permission
3339 * has been granted during the setup.
3341 if ((dhp->dh_flags & DEVMAP_SETUP_DONE) == 0 ||
3342 (dhp->dh_flags & DEVMAP_ALLOW_REMAP) == 0)
3343 return (DDI_FAILURE);
3345 /* Only DEVMAP_MAPPING_INVALID flag supported for remap */
3346 if ((flags != 0) && (flags != DEVMAP_MAPPING_INVALID))
3347 return (DDI_FAILURE);
3349 if ((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) != dhp->dh_prot)
3350 return (DDI_FAILURE);
3352 if (!(flags & DEVMAP_MAPPING_INVALID)) {
3353 handle = impl_acc_hdl_alloc(KM_SLEEP, NULL);
3354 if (handle == NULL)
3355 return (DDI_FAILURE);
3358 HOLD_DHP_LOCK(dhp);
3361 * Unload the old mapping, so next fault will setup the new mappings
3362 * Do this while holding the dhp lock so other faults dont reestablish
3363 * the mappings
3365 hat_unload(dhp->dh_seg->s_as->a_hat, dhp->dh_uvaddr,
3366 dhp->dh_len, HAT_UNLOAD|HAT_UNLOAD_OTHER);
3368 if (flags & DEVMAP_MAPPING_INVALID) {
3369 dhp->dh_flags |= DEVMAP_MAPPING_INVALID;
3370 dhp->dh_pfn = PFN_INVALID;
3371 } else {
3372 /* clear any prior DEVMAP_MAPPING_INVALID flag */
3373 dhp->dh_flags &= ~DEVMAP_MAPPING_INVALID;
3374 hp = impl_acc_hdl_get(handle);
3375 hp->ah_vers = VERS_ACCHDL;
3376 hp->ah_dip = dip;
3377 hp->ah_rnumber = rnumber;
3378 hp->ah_offset = roff;
3379 hp->ah_len = len;
3380 if (accattrp != NULL)
3381 hp->ah_acc = *accattrp;
3383 mr.map_op = DDI_MO_MAP_LOCKED;
3384 mr.map_type = DDI_MT_RNUMBER;
3385 mr.map_obj.rnumber = rnumber;
3386 mr.map_prot = maxprot & dhp->dh_orig_maxprot;
3387 mr.map_flags = DDI_MF_DEVICE_MAPPING;
3388 mr.map_handlep = hp;
3389 mr.map_vers = DDI_MAP_VERSION;
3392 * up the device tree to get pfn.
3393 * The rootnex_map_regspec() routine in nexus drivers has been
3394 * modified to return pfn if map_flags is DDI_MF_DEVICE_MAPPING.
3396 err = ddi_map(dip, &mr, roff, len, (caddr_t *)&pfn);
3397 hat_flags = hp->ah_hat_flags;
3398 impl_acc_hdl_free(handle);
3399 if (err) {
3400 RELE_DHP_LOCK(dhp);
3401 return (DDI_FAILURE);
3404 * Store result of ddi_map first in local variables, as we do
3405 * not want to overwrite the existing dhp with wrong data.
3407 dhp->dh_pfn = pfn;
3408 dhp->dh_hat_attr = hat_flags;
3411 /* clear the large page size flag */
3412 dhp->dh_flags &= ~DEVMAP_FLAG_LARGE;
3414 dhp->dh_cookie = DEVMAP_DEVMEM_COOKIE;
3415 dhp->dh_roff = ptob(btop(roff));
3417 /* setup the dh_mmulevel and DEVMAP_FLAG_LARGE */
3418 devmap_devmem_large_page_setup(dhp);
3419 dhp->dh_maxprot = maxprot & dhp->dh_orig_maxprot;
3420 ASSERT((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) == dhp->dh_prot);
3422 RELE_DHP_LOCK(dhp);
3423 return (DDI_SUCCESS);
3427 * called by driver devmap routine to pass kernel virtual address mapping
3428 * info to the framework. used only for kernel memory
3429 * allocated from ddi_umem_alloc().
3432 devmap_umem_setup(devmap_cookie_t dhc, dev_info_t *dip,
3433 struct devmap_callback_ctl *callbackops, ddi_umem_cookie_t cookie,
3434 offset_t off, size_t len, uint_t maxprot, uint_t flags,
3435 ddi_device_acc_attr_t *accattrp)
3437 devmap_handle_t *dhp = (devmap_handle_t *)dhc;
3438 struct ddi_umem_cookie *cp = (struct ddi_umem_cookie *)cookie;
3441 DEBUGF(2, (CE_CONT, "devmap_umem_setup: dhp %p offset %llx "
3442 "cookie %p len %lx\n", (void *)dhp, off, (void *)cookie, len));
3444 if (cookie == NULL)
3445 return (DDI_FAILURE);
3447 /* For UMEM_TRASH, this restriction is not needed */
3448 if ((off + len) > cp->size)
3449 return (DDI_FAILURE);
3451 /* check if the cache attributes are supported */
3452 if (i_ddi_check_cache_attr(flags) == B_FALSE)
3453 return (DDI_FAILURE);
3456 * First to check if this function has been called for this dhp.
3458 if (dhp->dh_flags & DEVMAP_SETUP_DONE)
3459 return (DDI_FAILURE);
3461 if ((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) != dhp->dh_prot)
3462 return (DDI_FAILURE);
3464 if (flags & DEVMAP_MAPPING_INVALID) {
3466 * If DEVMAP_MAPPING_INVALID is specified, we have to grant
3467 * remap permission.
3469 if (!(flags & DEVMAP_ALLOW_REMAP)) {
3470 return (DDI_FAILURE);
3472 } else {
3473 dhp->dh_cookie = cookie;
3474 dhp->dh_roff = ptob(btop(off));
3475 dhp->dh_cvaddr = cp->cvaddr + dhp->dh_roff;
3476 /* set HAT cache attributes */
3477 i_ddi_cacheattr_to_hatacc(flags, &dhp->dh_hat_attr);
3478 /* set HAT endianess attributes */
3479 i_ddi_devacc_to_hatacc(accattrp, &dhp->dh_hat_attr);
3483 * The default is _not_ to pass HAT_LOAD_NOCONSIST to hat_devload();
3484 * we pass HAT_LOAD_NOCONSIST _only_ in cases where hat tries to
3485 * create consistent mappings but our intention was to create
3486 * non-consistent mappings.
3488 * DEVMEM: hat figures it out it's DEVMEM and creates non-consistent
3489 * mappings.
3491 * kernel exported memory: hat figures it out it's memory and always
3492 * creates consistent mappings.
3494 * /dev/mem: non-consistent mappings. See comments in common/io/mem.c
3496 * /dev/kmem: consistent mappings are created unless they are
3497 * MAP_FIXED. We _explicitly_ tell hat to create non-consistent
3498 * mappings by passing HAT_LOAD_NOCONSIST in case of MAP_FIXED
3499 * mappings of /dev/kmem. See common/io/mem.c
3502 /* Only some of the flags bits are settable by the driver */
3503 dhp->dh_flags |= (flags & DEVMAP_SETUP_FLAGS);
3505 dhp->dh_len = ptob(btopr(len));
3506 dhp->dh_maxprot = maxprot & dhp->dh_orig_maxprot;
3507 ASSERT((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) == dhp->dh_prot);
3509 if (callbackops != NULL) {
3510 bcopy(callbackops, &dhp->dh_callbackops,
3511 sizeof (struct devmap_callback_ctl));
3514 * Initialize dh_lock if we want to do remap.
3516 if (dhp->dh_flags & DEVMAP_ALLOW_REMAP) {
3517 mutex_init(&dhp->dh_lock, NULL, MUTEX_DEFAULT, NULL);
3518 dhp->dh_flags |= DEVMAP_LOCK_INITED;
3521 dhp->dh_flags |= DEVMAP_SETUP_DONE;
3523 return (DDI_SUCCESS);
3527 devmap_umem_remap(devmap_cookie_t dhc, dev_info_t *dip,
3528 ddi_umem_cookie_t cookie, offset_t off, size_t len, uint_t maxprot,
3529 uint_t flags, ddi_device_acc_attr_t *accattrp)
3531 devmap_handle_t *dhp = (devmap_handle_t *)dhc;
3532 struct ddi_umem_cookie *cp = (struct ddi_umem_cookie *)cookie;
3534 DEBUGF(2, (CE_CONT, "devmap_umem_remap: dhp %p offset %llx "
3535 "cookie %p len %lx\n", (void *)dhp, off, (void *)cookie, len));
3538 * Reture failure if setup has not been done or no remap permission
3539 * has been granted during the setup.
3541 if ((dhp->dh_flags & DEVMAP_SETUP_DONE) == 0 ||
3542 (dhp->dh_flags & DEVMAP_ALLOW_REMAP) == 0)
3543 return (DDI_FAILURE);
3545 /* No flags supported for remap yet */
3546 if (flags != 0)
3547 return (DDI_FAILURE);
3549 /* check if the cache attributes are supported */
3550 if (i_ddi_check_cache_attr(flags) == B_FALSE)
3551 return (DDI_FAILURE);
3553 if ((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) != dhp->dh_prot)
3554 return (DDI_FAILURE);
3556 /* For UMEM_TRASH, this restriction is not needed */
3557 if ((off + len) > cp->size)
3558 return (DDI_FAILURE);
3560 HOLD_DHP_LOCK(dhp);
3562 * Unload the old mapping, so next fault will setup the new mappings
3563 * Do this while holding the dhp lock so other faults dont reestablish
3564 * the mappings
3566 hat_unload(dhp->dh_seg->s_as->a_hat, dhp->dh_uvaddr,
3567 dhp->dh_len, HAT_UNLOAD|HAT_UNLOAD_OTHER);
3569 dhp->dh_cookie = cookie;
3570 dhp->dh_roff = ptob(btop(off));
3571 dhp->dh_cvaddr = cp->cvaddr + dhp->dh_roff;
3572 /* set HAT cache attributes */
3573 i_ddi_cacheattr_to_hatacc(flags, &dhp->dh_hat_attr);
3574 /* set HAT endianess attributes */
3575 i_ddi_devacc_to_hatacc(accattrp, &dhp->dh_hat_attr);
3577 /* clear the large page size flag */
3578 dhp->dh_flags &= ~DEVMAP_FLAG_LARGE;
3580 dhp->dh_maxprot = maxprot & dhp->dh_orig_maxprot;
3581 ASSERT((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) == dhp->dh_prot);
3582 RELE_DHP_LOCK(dhp);
3583 return (DDI_SUCCESS);
3587 * to set timeout value for the driver's context management callback, e.g.
3588 * devmap_access().
3590 void
3591 devmap_set_ctx_timeout(devmap_cookie_t dhc, clock_t ticks)
3593 devmap_handle_t *dhp = (devmap_handle_t *)dhc;
3595 dhp->dh_timeout_length = ticks;
3599 devmap_default_access(devmap_cookie_t dhp, void *pvtp, offset_t off,
3600 size_t len, uint_t type, uint_t rw)
3603 return (devmap_load(dhp, off, len, type, rw));
3607 * segkmem_alloc() wrapper to allocate memory which is both
3608 * non-relocatable (for DR) and sharelocked, since the rest
3609 * of this segment driver requires it.
3611 static void *
3612 devmap_alloc_pages(vmem_t *vmp, size_t size, int vmflag)
3614 ASSERT(vmp != NULL);
3615 ASSERT(kvseg.s_base != NULL);
3616 vmflag |= (VM_NORELOC | SEGKMEM_SHARELOCKED);
3617 return (segkmem_alloc(vmp, size, vmflag));
3621 * This is where things are a bit incestuous with seg_kmem: unlike
3622 * seg_kp, seg_kmem does not keep its pages long-term sharelocked, so
3623 * we need to do a bit of a dance around that to prevent duplication of
3624 * code until we decide to bite the bullet and implement a new kernel
3625 * segment for driver-allocated memory that is exported to user space.
3627 static void
3628 devmap_free_pages(vmem_t *vmp, void *inaddr, size_t size)
3630 page_t *pp;
3631 caddr_t addr = inaddr;
3632 caddr_t eaddr;
3633 pgcnt_t npages = btopr(size);
3635 ASSERT(vmp != NULL);
3636 ASSERT(kvseg.s_base != NULL);
3637 ASSERT(((uintptr_t)addr & PAGEOFFSET) == 0);
3639 hat_unload(kas.a_hat, addr, size, HAT_UNLOAD_UNLOCK);
3641 for (eaddr = addr + size; addr < eaddr; addr += PAGESIZE) {
3643 * Use page_find() instead of page_lookup() to find the page
3644 * since we know that it is hashed and has a shared lock.
3646 pp = page_find(&kvp.v_object, (uoff_t)(uintptr_t)addr);
3648 if (pp == NULL)
3649 panic("devmap_free_pages: page not found");
3650 if (!page_tryupgrade(pp)) {
3651 page_unlock(pp);
3652 pp = page_lookup(&kvp.v_object,
3653 (uoff_t)(uintptr_t)addr, SE_EXCL);
3654 if (pp == NULL)
3655 panic("devmap_free_pages: page already freed");
3657 /* Clear p_lckcnt so page_destroy() doesn't update availrmem */
3658 pp->p_lckcnt = 0;
3659 page_destroy(pp, 0);
3661 page_unresv(npages);
3663 if (vmp != NULL)
3664 vmem_free(vmp, inaddr, size);
3668 * devmap_umem_alloc_np() replaces kmem_zalloc() as the method for
3669 * allocating non-pageable kmem in response to a ddi_umem_alloc()
3670 * default request. For now we allocate our own pages and we keep
3671 * them long-term sharelocked, since: A) the fault routines expect the
3672 * memory to already be locked; B) pageable umem is already long-term
3673 * locked; C) it's a lot of work to make it otherwise, particularly
3674 * since the nexus layer expects the pages to never fault. An RFE is to
3675 * not keep the pages long-term locked, but instead to be able to
3676 * take faults on them and simply look them up in kvp in case we
3677 * fault on them. Even then, we must take care not to let pageout
3678 * steal them from us since the data must remain resident; if we
3679 * do this we must come up with some way to pin the pages to prevent
3680 * faults while a driver is doing DMA to/from them.
3682 static void *
3683 devmap_umem_alloc_np(size_t size, size_t flags)
3685 void *buf;
3686 int vmflags = (flags & DDI_UMEM_NOSLEEP)? VM_NOSLEEP : VM_SLEEP;
3688 buf = vmem_alloc(umem_np_arena, size, vmflags);
3689 if (buf != NULL)
3690 bzero(buf, size);
3691 return (buf);
3694 static void
3695 devmap_umem_free_np(void *addr, size_t size)
3697 vmem_free(umem_np_arena, addr, size);
3701 * allocate page aligned kernel memory for exporting to user land.
3702 * The devmap framework will use the cookie allocated by ddi_umem_alloc()
3703 * to find a user virtual address that is in same color as the address
3704 * allocated here.
3706 void *
3707 ddi_umem_alloc(size_t size, int flags, ddi_umem_cookie_t *cookie)
3709 register size_t len = ptob(btopr(size));
3710 void *buf = NULL;
3711 struct ddi_umem_cookie *cp;
3712 int iflags = 0;
3714 *cookie = NULL;
3716 if (len == 0)
3717 return (NULL);
3720 * allocate cookie
3722 if ((cp = kmem_zalloc(sizeof (struct ddi_umem_cookie),
3723 flags & DDI_UMEM_NOSLEEP ? KM_NOSLEEP : KM_SLEEP)) == NULL) {
3724 ASSERT(flags & DDI_UMEM_NOSLEEP);
3725 return (NULL);
3728 if (flags & DDI_UMEM_PAGEABLE) {
3729 /* Only one of the flags is allowed */
3730 ASSERT(!(flags & DDI_UMEM_TRASH));
3731 /* initialize resource with 0 */
3732 iflags = KPD_ZERO;
3735 * to allocate unlocked pageable memory, use segkp_get() to
3736 * create a segkp segment. Since segkp can only service kas,
3737 * other segment drivers such as segdev have to do
3738 * as_fault(segkp, SOFTLOCK) in its fault routine,
3740 if (flags & DDI_UMEM_NOSLEEP)
3741 iflags |= KPD_NOWAIT;
3743 if ((buf = segkp_get(segkp, len, iflags)) == NULL) {
3744 kmem_free(cp, sizeof (struct ddi_umem_cookie));
3745 return (NULL);
3747 cp->type = KMEM_PAGEABLE;
3748 mutex_init(&cp->lock, NULL, MUTEX_DEFAULT, NULL);
3749 cp->locked = 0;
3750 } else if (flags & DDI_UMEM_TRASH) {
3751 /* Only one of the flags is allowed */
3752 ASSERT(!(flags & DDI_UMEM_PAGEABLE));
3753 cp->type = UMEM_TRASH;
3754 buf = NULL;
3755 } else {
3756 if ((buf = devmap_umem_alloc_np(len, flags)) == NULL) {
3757 kmem_free(cp, sizeof (struct ddi_umem_cookie));
3758 return (NULL);
3761 cp->type = KMEM_NON_PAGEABLE;
3765 * need to save size here. size will be used when
3766 * we do kmem_free.
3768 cp->size = len;
3769 cp->cvaddr = (caddr_t)buf;
3771 *cookie = (void *)cp;
3772 return (buf);
3775 void
3776 ddi_umem_free(ddi_umem_cookie_t cookie)
3778 struct ddi_umem_cookie *cp;
3781 * if cookie is NULL, no effects on the system
3783 if (cookie == NULL)
3784 return;
3786 cp = (struct ddi_umem_cookie *)cookie;
3788 switch (cp->type) {
3789 case KMEM_PAGEABLE :
3790 ASSERT(cp->cvaddr != NULL && cp->size != 0);
3792 * Check if there are still any pending faults on the cookie
3793 * while the driver is deleting it,
3794 * XXX - could change to an ASSERT but wont catch errant drivers
3796 mutex_enter(&cp->lock);
3797 if (cp->locked) {
3798 mutex_exit(&cp->lock);
3799 panic("ddi_umem_free for cookie with pending faults %p",
3800 (void *)cp);
3801 return;
3804 segkp_release(segkp, cp->cvaddr);
3807 * release mutex associated with this cookie.
3809 mutex_destroy(&cp->lock);
3810 break;
3811 case KMEM_NON_PAGEABLE :
3812 ASSERT(cp->cvaddr != NULL && cp->size != 0);
3813 devmap_umem_free_np(cp->cvaddr, cp->size);
3814 break;
3815 case UMEM_TRASH :
3816 break;
3817 case UMEM_LOCKED :
3818 /* Callers should use ddi_umem_unlock for this type */
3819 ddi_umem_unlock(cookie);
3820 /* Frees the cookie too */
3821 return;
3822 default:
3823 /* panic so we can diagnose the underlying cause */
3824 panic("ddi_umem_free: illegal cookie type 0x%x\n",
3825 cp->type);
3828 kmem_free(cookie, sizeof (struct ddi_umem_cookie));
3832 static int
3833 segdev_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp)
3835 struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
3838 * It looks as if it is always mapped shared
3840 memidp->val[0] = (uintptr_t)VTOCVP(sdp->vp);
3841 memidp->val[1] = sdp->offset + (uintptr_t)(addr - seg->s_base);
3842 return (0);
3846 * ddi_umem_alloc() non-pageable quantum cache max size.
3847 * This is just a SWAG.
3849 #define DEVMAP_UMEM_QUANTUM (8*PAGESIZE)
3852 * Initialize seg_dev from boot. This routine sets up the trash page
3853 * and creates the umem_np_arena used to back non-pageable memory
3854 * requests.
3856 void
3857 segdev_init(void)
3859 struct seg kseg;
3861 vmobject_init(&trashvp.v_object, &trashvp);
3863 umem_np_arena = vmem_create("umem_np", NULL, 0, PAGESIZE,
3864 devmap_alloc_pages, devmap_free_pages, heap_arena,
3865 DEVMAP_UMEM_QUANTUM, VM_SLEEP);
3867 kseg.s_as = &kas;
3868 trashpp = page_create_va(&trashvp.v_object, 0, PAGESIZE,
3869 PG_NORELOC | PG_EXCL | PG_WAIT, &kseg, NULL);
3870 if (trashpp == NULL)
3871 panic("segdev_init: failed to create trash page");
3872 pagezero(trashpp, 0, PAGESIZE);
3873 page_downgrade(trashpp);
3877 * Invoke platform-dependent support routines so that /proc can have
3878 * the platform code deal with curious hardware.
3881 segdev_copyfrom(struct seg *seg,
3882 caddr_t uaddr, const void *devaddr, void *kaddr, size_t len)
3884 struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
3885 struct snode *sp = VTOS(VTOCVP(sdp->vp));
3887 return (e_ddi_copyfromdev(sp->s_dip,
3888 (off_t)(uaddr - seg->s_base), devaddr, kaddr, len));
3892 segdev_copyto(struct seg *seg,
3893 caddr_t uaddr, const void *kaddr, void *devaddr, size_t len)
3895 struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
3896 struct snode *sp = VTOS(VTOCVP(sdp->vp));
3898 return (e_ddi_copytodev(sp->s_dip,
3899 (off_t)(uaddr - seg->s_base), kaddr, devaddr, len));