2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
13 * Copyright 2018 Joyent, Inc.
17 * VM - Kernel-to-user mapping segment
19 * The umap segment driver was primarily designed to facilitate the comm page:
20 * a portion of kernel memory shared with userspace so that certain (namely
21 * clock-related) actions could operate without making an expensive trip into
24 * Since the initial requirements for the comm page are slim, advanced features
25 * of the segment driver such as per-page protection have been left
26 * unimplemented at this time.
30 #include <sys/types.h>
31 #include <sys/param.h>
32 #include <sys/errno.h>
41 #include <vm/seg_kmem.h>
42 #include <vm/seg_umap.h>
45 static boolean_t
segumap_verify_safe(caddr_t
, size_t);
46 static int segumap_dup(struct seg
*, struct seg
*);
47 static int segumap_unmap(struct seg
*, caddr_t
, size_t);
48 static void segumap_free(struct seg
*);
49 static faultcode_t
segumap_fault(struct hat
*, struct seg
*, caddr_t
, size_t,
50 enum fault_type
, enum seg_rw
);
51 static faultcode_t
segumap_faulta(struct seg
*, caddr_t
);
52 static int segumap_setprot(struct seg
*, caddr_t
, size_t, uint_t
);
53 static int segumap_checkprot(struct seg
*, caddr_t
, size_t, uint_t
);
54 static int segumap_sync(struct seg
*, caddr_t
, size_t, int, uint_t
);
55 static size_t segumap_incore(struct seg
*, caddr_t
, size_t, char *);
56 static int segumap_lockop(struct seg
*, caddr_t
, size_t, int, int, ulong_t
*,
58 static int segumap_getprot(struct seg
*, caddr_t
, size_t, uint_t
*);
59 static uoff_t
segumap_getoffset(struct seg
*, caddr_t
);
60 static int segumap_gettype(struct seg
*, caddr_t
);
61 static int segumap_getvp(struct seg
*, caddr_t
, struct vnode
**);
62 static int segumap_advise(struct seg
*, caddr_t
, size_t, uint_t
);
63 static void segumap_dump(struct seg
*);
64 static int segumap_pagelock(struct seg
*, caddr_t
, size_t, struct page
***,
65 enum lock_type
, enum seg_rw
);
66 static int segumap_setpagesize(struct seg
*, caddr_t
, size_t, uint_t
);
67 static int segumap_getmemid(struct seg
*, caddr_t
, memid_t
*);
68 static int segumap_capable(struct seg
*, segcapability_t
);
70 static struct seg_ops segumap_ops
= {
72 .unmap
= segumap_unmap
,
74 .fault
= segumap_fault
,
75 .faulta
= segumap_faulta
,
76 .setprot
= segumap_setprot
,
77 .checkprot
= segumap_checkprot
,
79 .incore
= segumap_incore
,
80 .lockop
= segumap_lockop
,
81 .getprot
= segumap_getprot
,
82 .getoffset
= segumap_getoffset
,
83 .gettype
= segumap_gettype
,
84 .getvp
= segumap_getvp
,
85 .advise
= segumap_advise
,
87 .pagelock
= segumap_pagelock
,
88 .setpagesize
= segumap_setpagesize
,
89 .getmemid
= segumap_getmemid
,
90 .capable
= segumap_capable
,
95 * Create a kernel/user-mapped segment.
98 segumap_create(struct seg
**segpp
, void *argsp
)
100 struct seg
*seg
= *segpp
;
101 segumap_crargs_t
*a
= (struct segumap_crargs
*)argsp
;
102 segumap_data_t
*data
;
104 ASSERT((uintptr_t)a
->kaddr
> _userlimit
);
107 * Check several aspects of the mapping request to ensure validity:
108 * - kernel pages must reside entirely in kernel space
109 * - target protection must be user-accessible
110 * - kernel address must be page-aligned
111 * - kernel address must reside inside a "safe" segment
113 if ((uintptr_t)a
->kaddr
<= _userlimit
||
114 ((uintptr_t)a
->kaddr
+ seg
->s_size
) < (uintptr_t)a
->kaddr
||
115 (a
->prot
& PROT_USER
) == 0 ||
116 ((uintptr_t)a
->kaddr
& PAGEOFFSET
) != 0 ||
117 !segumap_verify_safe(a
->kaddr
, seg
->s_size
)) {
121 data
= kmem_zalloc(sizeof (*data
), KM_SLEEP
);
122 rw_init(&data
->sud_lock
, NULL
, RW_DEFAULT
, NULL
);
123 data
->sud_kaddr
= a
->kaddr
;
124 data
->sud_prot
= a
->prot
;
126 seg
->s_ops
= &segumap_ops
;
132 segumap_verify_safe(caddr_t kaddr
, size_t len
)
137 * Presently, only pages which are backed by segkmem are allowed to be
138 * shared with userspace. This prevents nasty paging behavior with
139 * other drivers such as seg_kp. Furthermore, the backing kernel
140 * segment must completely contain the region to be mapped.
142 * Failing these checks is fatal for now since such mappings are done
143 * in a very limited context from the kernel.
145 AS_LOCK_ENTER(&kas
, RW_READER
);
146 seg
= as_segat(&kas
, kaddr
);
148 VERIFY(seg
->s_base
+ seg
->s_size
>= kaddr
+ len
);
149 VERIFY(seg
->s_ops
== &segkmem_ops
);
156 segumap_dup(struct seg
*seg
, struct seg
*newseg
)
158 segumap_data_t
*sud
= (segumap_data_t
*)seg
->s_data
;
159 segumap_data_t
*newsud
;
161 ASSERT(seg
->s_as
&& AS_WRITE_HELD(seg
->s_as
));
163 newsud
= kmem_zalloc(sizeof (segumap_data_t
), KM_SLEEP
);
164 rw_init(&newsud
->sud_lock
, NULL
, RW_DEFAULT
, NULL
);
165 newsud
->sud_kaddr
= sud
->sud_kaddr
;
166 newsud
->sud_prot
= sud
->sud_prot
;
168 newseg
->s_ops
= seg
->s_ops
;
169 newseg
->s_data
= newsud
;
174 segumap_unmap(struct seg
*seg
, caddr_t addr
, size_t len
)
176 segumap_data_t
*sud
= (segumap_data_t
*)seg
->s_data
;
178 ASSERT(seg
->s_as
&& AS_WRITE_HELD(seg
->s_as
));
180 /* Only allow unmap of entire segment */
181 if (addr
!= seg
->s_base
|| len
!= seg
->s_size
) {
184 if (sud
->sud_softlockcnt
!= 0) {
189 * Unconditionally unload the entire segment range.
191 hat_unload(seg
->s_as
->a_hat
, addr
, len
, HAT_UNLOAD_UNMAP
);
198 segumap_free(struct seg
*seg
)
200 segumap_data_t
*data
= (segumap_data_t
*)seg
->s_data
;
202 ASSERT(data
!= NULL
);
204 rw_destroy(&data
->sud_lock
);
205 VERIFY(data
->sud_softlockcnt
== 0);
206 kmem_free(data
, sizeof (*data
));
212 segumap_fault(struct hat
*hat
, struct seg
*seg
, caddr_t addr
, size_t len
,
213 enum fault_type type
, enum seg_rw tw
)
215 segumap_data_t
*sud
= (segumap_data_t
*)seg
->s_data
;
217 ASSERT(seg
->s_as
&& AS_LOCK_HELD(seg
->s_as
));
219 if (type
== F_PROT
) {
221 * Since protection on the segment is fixed, there is nothing
222 * to do but report an error for protection faults.
225 } else if (type
== F_SOFTUNLOCK
) {
226 size_t plen
= btop(len
);
228 rw_enter(&sud
->sud_lock
, RW_WRITER
);
229 VERIFY(sud
->sud_softlockcnt
>= plen
);
230 sud
->sud_softlockcnt
-= plen
;
231 rw_exit(&sud
->sud_lock
);
235 ASSERT(type
== F_INVAL
|| type
== F_SOFTLOCK
);
236 rw_enter(&sud
->sud_lock
, RW_WRITER
);
238 if (type
== F_INVAL
||
239 (type
== F_SOFTLOCK
&& sud
->sud_softlockcnt
== 0)) {
241 * Load the (entire) segment into the HAT.
243 * It's possible that threads racing into as_fault will cause
244 * seg_umap to load the same range multiple times in quick
245 * succession. Redundant hat_devload operations are safe.
247 for (uintptr_t i
= 0; i
< seg
->s_size
; i
+= PAGESIZE
) {
250 pfn
= hat_getpfnum(kas
.a_hat
, sud
->sud_kaddr
+ i
);
251 VERIFY(pfn
!= PFN_INVALID
);
252 hat_devload(seg
->s_as
->a_hat
, seg
->s_base
+ i
,
253 PAGESIZE
, pfn
, sud
->sud_prot
, HAT_LOAD
);
256 if (type
== F_SOFTLOCK
) {
257 size_t nval
= sud
->sud_softlockcnt
+ btop(len
);
259 if (sud
->sud_softlockcnt
>= nval
) {
260 rw_exit(&sud
->sud_lock
);
261 return (FC_MAKE_ERR(EOVERFLOW
));
263 sud
->sud_softlockcnt
= nval
;
266 rw_exit(&sud
->sud_lock
);
272 segumap_faulta(struct seg
*seg
, caddr_t addr
)
274 /* Do nothing since asynch pagefault should not load translation. */
280 segumap_setprot(struct seg
*seg
, caddr_t addr
, size_t len
, uint_t prot
)
282 ASSERT(seg
->s_as
&& AS_LOCK_HELD(seg
->s_as
));
285 * The seg_umap driver does not yet allow protection to be changed.
292 segumap_checkprot(struct seg
*seg
, caddr_t addr
, size_t len
, uint_t prot
)
294 segumap_data_t
*sud
= (segumap_data_t
*)seg
->s_data
;
297 ASSERT(seg
->s_as
&& AS_LOCK_HELD(seg
->s_as
));
299 rw_enter(&sud
->sud_lock
, RW_READER
);
300 if ((sud
->sud_prot
& prot
) != prot
) {
303 rw_exit(&sud
->sud_lock
);
309 segumap_sync(struct seg
*seg
, caddr_t addr
, size_t len
, int attr
, uint_t flags
)
311 /* Always succeed since there are no backing store to sync */
317 segumap_incore(struct seg
*seg
, caddr_t addr
, size_t len
, char *vec
)
321 ASSERT(seg
->s_as
&& AS_LOCK_HELD(seg
->s_as
));
323 len
= (len
+ PAGEOFFSET
) & PAGEMASK
;
335 segumap_lockop(struct seg
*seg
, caddr_t addr
, size_t len
, int attr
, int op
,
336 ulong_t
*lockmap
, size_t pos
)
338 /* Report success since kernel pages are always in memory. */
343 segumap_getprot(struct seg
*seg
, caddr_t addr
, size_t len
, uint_t
*protv
)
345 segumap_data_t
*sud
= (segumap_data_t
*)seg
->s_data
;
349 ASSERT(seg
->s_as
&& AS_LOCK_HELD(seg
->s_as
));
351 rw_enter(&sud
->sud_lock
, RW_READER
);
352 prot
= sud
->sud_prot
;
353 rw_exit(&sud
->sud_lock
);
356 * Reporting protection is simple since it is not tracked per-page.
358 pgno
= seg_page(seg
, addr
+ len
) - seg_page(seg
, addr
) + 1;
360 protv
[--pgno
] = prot
;
367 segumap_getoffset(struct seg
*seg
, caddr_t addr
)
370 * To avoid leaking information about the layout of the kernel address
371 * space, always report '0' as the offset.
378 segumap_gettype(struct seg
*seg
, caddr_t addr
)
381 * Since already-existing kernel pages are being mapped into userspace,
382 * always report the segment type as shared.
389 segumap_getvp(struct seg
*seg
, caddr_t addr
, struct vnode
**vpp
)
391 ASSERT(seg
->s_as
&& AS_LOCK_HELD(seg
->s_as
));
399 segumap_advise(struct seg
*seg
, caddr_t addr
, size_t len
, uint_t behav
)
401 if (behav
== MADV_PURGE
) {
402 /* Purge does not make sense for this mapping */
405 /* Indicate success for everything else. */
411 segumap_dump(struct seg
*seg
)
414 * Since this is a mapping to share kernel data with userspace, nothing
415 * additional should be dumped.
421 segumap_pagelock(struct seg
*seg
, caddr_t addr
, size_t len
, struct page
***ppp
,
422 enum lock_type type
, enum seg_rw rw
)
429 segumap_setpagesize(struct seg
*seg
, caddr_t addr
, size_t len
, uint_t szc
)
435 segumap_getmemid(struct seg
*seg
, caddr_t addr
, memid_t
*memidp
)
437 segumap_data_t
*sud
= (segumap_data_t
*)seg
->s_data
;
439 memidp
->val
[0] = (uintptr_t)sud
->sud_kaddr
;
440 memidp
->val
[1] = (uintptr_t)(addr
- seg
->s_base
);
446 segumap_capable(struct seg
*seg
, segcapability_t capability
)
448 /* no special capablities */