Merge illumos-gate
[unleashed.git] / kernel / vm / seg_umap.c
blob142950c17f46b70409e9f69e945c65a4848b19d7
1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
13 * Copyright 2018 Joyent, Inc.
17 * VM - Kernel-to-user mapping segment
19 * The umap segment driver was primarily designed to facilitate the comm page:
20 * a portion of kernel memory shared with userspace so that certain (namely
21 * clock-related) actions could operate without making an expensive trip into
22 * the kernel.
24 * Since the initial requirements for the comm page are slim, advanced features
25 * of the segment driver such as per-page protection have been left
26 * unimplemented at this time.
30 #include <sys/types.h>
31 #include <sys/param.h>
32 #include <sys/errno.h>
33 #include <sys/cred.h>
34 #include <sys/kmem.h>
35 #include <sys/lgrp.h>
36 #include <sys/mman.h>
38 #include <vm/hat.h>
39 #include <vm/as.h>
40 #include <vm/seg.h>
41 #include <vm/seg_kmem.h>
42 #include <vm/seg_umap.h>
45 static boolean_t segumap_verify_safe(caddr_t, size_t);
46 static int segumap_dup(struct seg *, struct seg *);
47 static int segumap_unmap(struct seg *, caddr_t, size_t);
48 static void segumap_free(struct seg *);
49 static faultcode_t segumap_fault(struct hat *, struct seg *, caddr_t, size_t,
50 enum fault_type, enum seg_rw);
51 static faultcode_t segumap_faulta(struct seg *, caddr_t);
52 static int segumap_setprot(struct seg *, caddr_t, size_t, uint_t);
53 static int segumap_checkprot(struct seg *, caddr_t, size_t, uint_t);
54 static int segumap_sync(struct seg *, caddr_t, size_t, int, uint_t);
55 static size_t segumap_incore(struct seg *, caddr_t, size_t, char *);
56 static int segumap_lockop(struct seg *, caddr_t, size_t, int, int, ulong_t *,
57 size_t);
58 static int segumap_getprot(struct seg *, caddr_t, size_t, uint_t *);
59 static uoff_t segumap_getoffset(struct seg *, caddr_t);
60 static int segumap_gettype(struct seg *, caddr_t);
61 static int segumap_getvp(struct seg *, caddr_t, struct vnode **);
62 static int segumap_advise(struct seg *, caddr_t, size_t, uint_t);
63 static void segumap_dump(struct seg *);
64 static int segumap_pagelock(struct seg *, caddr_t, size_t, struct page ***,
65 enum lock_type, enum seg_rw);
66 static int segumap_setpagesize(struct seg *, caddr_t, size_t, uint_t);
67 static int segumap_getmemid(struct seg *, caddr_t, memid_t *);
68 static int segumap_capable(struct seg *, segcapability_t);
70 static struct seg_ops segumap_ops = {
71 .dup = segumap_dup,
72 .unmap = segumap_unmap,
73 .free = segumap_free,
74 .fault = segumap_fault,
75 .faulta = segumap_faulta,
76 .setprot = segumap_setprot,
77 .checkprot = segumap_checkprot,
78 .sync = segumap_sync,
79 .incore = segumap_incore,
80 .lockop = segumap_lockop,
81 .getprot = segumap_getprot,
82 .getoffset = segumap_getoffset,
83 .gettype = segumap_gettype,
84 .getvp = segumap_getvp,
85 .advise = segumap_advise,
86 .dump = segumap_dump,
87 .pagelock = segumap_pagelock,
88 .setpagesize = segumap_setpagesize,
89 .getmemid = segumap_getmemid,
90 .capable = segumap_capable,
95 * Create a kernel/user-mapped segment.
97 int
98 segumap_create(struct seg **segpp, void *argsp)
100 struct seg *seg = *segpp;
101 segumap_crargs_t *a = (struct segumap_crargs *)argsp;
102 segumap_data_t *data;
104 ASSERT((uintptr_t)a->kaddr > _userlimit);
107 * Check several aspects of the mapping request to ensure validity:
108 * - kernel pages must reside entirely in kernel space
109 * - target protection must be user-accessible
110 * - kernel address must be page-aligned
111 * - kernel address must reside inside a "safe" segment
113 if ((uintptr_t)a->kaddr <= _userlimit ||
114 ((uintptr_t)a->kaddr + seg->s_size) < (uintptr_t)a->kaddr ||
115 (a->prot & PROT_USER) == 0 ||
116 ((uintptr_t)a->kaddr & PAGEOFFSET) != 0 ||
117 !segumap_verify_safe(a->kaddr, seg->s_size)) {
118 return (EINVAL);
121 data = kmem_zalloc(sizeof (*data), KM_SLEEP);
122 rw_init(&data->sud_lock, NULL, RW_DEFAULT, NULL);
123 data->sud_kaddr = a->kaddr;
124 data->sud_prot = a->prot;
126 seg->s_ops = &segumap_ops;
127 seg->s_data = data;
128 return (0);
131 static boolean_t
132 segumap_verify_safe(caddr_t kaddr, size_t len)
134 struct seg *seg;
137 * Presently, only pages which are backed by segkmem are allowed to be
138 * shared with userspace. This prevents nasty paging behavior with
139 * other drivers such as seg_kp. Furthermore, the backing kernel
140 * segment must completely contain the region to be mapped.
142 * Failing these checks is fatal for now since such mappings are done
143 * in a very limited context from the kernel.
145 AS_LOCK_ENTER(&kas, RW_READER);
146 seg = as_segat(&kas, kaddr);
147 VERIFY(seg != NULL);
148 VERIFY(seg->s_base + seg->s_size >= kaddr + len);
149 VERIFY(seg->s_ops == &segkmem_ops);
150 AS_LOCK_EXIT(&kas);
152 return (B_TRUE);
155 static int
156 segumap_dup(struct seg *seg, struct seg *newseg)
158 segumap_data_t *sud = (segumap_data_t *)seg->s_data;
159 segumap_data_t *newsud;
161 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
163 newsud = kmem_zalloc(sizeof (segumap_data_t), KM_SLEEP);
164 rw_init(&newsud->sud_lock, NULL, RW_DEFAULT, NULL);
165 newsud->sud_kaddr = sud->sud_kaddr;
166 newsud->sud_prot = sud->sud_prot;
168 newseg->s_ops = seg->s_ops;
169 newseg->s_data = newsud;
170 return (0);
173 static int
174 segumap_unmap(struct seg *seg, caddr_t addr, size_t len)
176 segumap_data_t *sud = (segumap_data_t *)seg->s_data;
178 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
180 /* Only allow unmap of entire segment */
181 if (addr != seg->s_base || len != seg->s_size) {
182 return (EINVAL);
184 if (sud->sud_softlockcnt != 0) {
185 return (EAGAIN);
189 * Unconditionally unload the entire segment range.
191 hat_unload(seg->s_as->a_hat, addr, len, HAT_UNLOAD_UNMAP);
193 seg_free(seg);
194 return (0);
197 static void
198 segumap_free(struct seg *seg)
200 segumap_data_t *data = (segumap_data_t *)seg->s_data;
202 ASSERT(data != NULL);
204 rw_destroy(&data->sud_lock);
205 VERIFY(data->sud_softlockcnt == 0);
206 kmem_free(data, sizeof (*data));
207 seg->s_data = NULL;
210 /* ARGSUSED */
211 static faultcode_t
212 segumap_fault(struct hat *hat, struct seg *seg, caddr_t addr, size_t len,
213 enum fault_type type, enum seg_rw tw)
215 segumap_data_t *sud = (segumap_data_t *)seg->s_data;
217 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
219 if (type == F_PROT) {
221 * Since protection on the segment is fixed, there is nothing
222 * to do but report an error for protection faults.
224 return (FC_PROT);
225 } else if (type == F_SOFTUNLOCK) {
226 size_t plen = btop(len);
228 rw_enter(&sud->sud_lock, RW_WRITER);
229 VERIFY(sud->sud_softlockcnt >= plen);
230 sud->sud_softlockcnt -= plen;
231 rw_exit(&sud->sud_lock);
232 return (0);
235 ASSERT(type == F_INVAL || type == F_SOFTLOCK);
236 rw_enter(&sud->sud_lock, RW_WRITER);
238 if (type == F_INVAL ||
239 (type == F_SOFTLOCK && sud->sud_softlockcnt == 0)) {
241 * Load the (entire) segment into the HAT.
243 * It's possible that threads racing into as_fault will cause
244 * seg_umap to load the same range multiple times in quick
245 * succession. Redundant hat_devload operations are safe.
247 for (uintptr_t i = 0; i < seg->s_size; i += PAGESIZE) {
248 pfn_t pfn;
250 pfn = hat_getpfnum(kas.a_hat, sud->sud_kaddr + i);
251 VERIFY(pfn != PFN_INVALID);
252 hat_devload(seg->s_as->a_hat, seg->s_base + i,
253 PAGESIZE, pfn, sud->sud_prot, HAT_LOAD);
256 if (type == F_SOFTLOCK) {
257 size_t nval = sud->sud_softlockcnt + btop(len);
259 if (sud->sud_softlockcnt >= nval) {
260 rw_exit(&sud->sud_lock);
261 return (FC_MAKE_ERR(EOVERFLOW));
263 sud->sud_softlockcnt = nval;
266 rw_exit(&sud->sud_lock);
267 return (0);
270 /* ARGSUSED */
271 static faultcode_t
272 segumap_faulta(struct seg *seg, caddr_t addr)
274 /* Do nothing since asynch pagefault should not load translation. */
275 return (0);
278 /* ARGSUSED */
279 static int
280 segumap_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
282 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
285 * The seg_umap driver does not yet allow protection to be changed.
287 return (EACCES);
290 /* ARGSUSED */
291 static int
292 segumap_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
294 segumap_data_t *sud = (segumap_data_t *)seg->s_data;
295 int error = 0;
297 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
299 rw_enter(&sud->sud_lock, RW_READER);
300 if ((sud->sud_prot & prot) != prot) {
301 error = EACCES;
303 rw_exit(&sud->sud_lock);
304 return (error);
307 /* ARGSUSED */
308 static int
309 segumap_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags)
311 /* Always succeed since there are no backing store to sync */
312 return (0);
315 /* ARGSUSED */
316 static size_t
317 segumap_incore(struct seg *seg, caddr_t addr, size_t len, char *vec)
319 size_t sz = 0;
321 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
323 len = (len + PAGEOFFSET) & PAGEMASK;
324 while (len > 0) {
325 *vec = 1;
326 sz += PAGESIZE;
327 vec++;
328 len -= PAGESIZE;
330 return (sz);
333 /* ARGSUSED */
334 static int
335 segumap_lockop(struct seg *seg, caddr_t addr, size_t len, int attr, int op,
336 ulong_t *lockmap, size_t pos)
338 /* Report success since kernel pages are always in memory. */
339 return (0);
342 static int
343 segumap_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv)
345 segumap_data_t *sud = (segumap_data_t *)seg->s_data;
346 size_t pgno;
347 uint_t prot;
349 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
351 rw_enter(&sud->sud_lock, RW_READER);
352 prot = sud->sud_prot;
353 rw_exit(&sud->sud_lock);
356 * Reporting protection is simple since it is not tracked per-page.
358 pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1;
359 while (pgno > 0) {
360 protv[--pgno] = prot;
362 return (0);
365 /* ARGSUSED */
366 static uoff_t
367 segumap_getoffset(struct seg *seg, caddr_t addr)
370 * To avoid leaking information about the layout of the kernel address
371 * space, always report '0' as the offset.
373 return (0);
376 /* ARGSUSED */
377 static int
378 segumap_gettype(struct seg *seg, caddr_t addr)
381 * Since already-existing kernel pages are being mapped into userspace,
382 * always report the segment type as shared.
384 return (MAP_SHARED);
387 /* ARGSUSED */
388 static int
389 segumap_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp)
391 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
393 *vpp = NULL;
394 return (0);
397 /* ARGSUSED */
398 static int
399 segumap_advise(struct seg *seg, caddr_t addr, size_t len, uint_t behav)
401 if (behav == MADV_PURGE) {
402 /* Purge does not make sense for this mapping */
403 return (EINVAL);
405 /* Indicate success for everything else. */
406 return (0);
409 /* ARGSUSED */
410 static void
411 segumap_dump(struct seg *seg)
414 * Since this is a mapping to share kernel data with userspace, nothing
415 * additional should be dumped.
419 /* ARGSUSED */
420 static int
421 segumap_pagelock(struct seg *seg, caddr_t addr, size_t len, struct page ***ppp,
422 enum lock_type type, enum seg_rw rw)
424 return (ENOTSUP);
427 /* ARGSUSED */
428 static int
429 segumap_setpagesize(struct seg *seg, caddr_t addr, size_t len, uint_t szc)
431 return (ENOTSUP);
434 static int
435 segumap_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp)
437 segumap_data_t *sud = (segumap_data_t *)seg->s_data;
439 memidp->val[0] = (uintptr_t)sud->sud_kaddr;
440 memidp->val[1] = (uintptr_t)(addr - seg->s_base);
441 return (0);
444 /* ARGSUSED */
445 static int
446 segumap_capable(struct seg *seg, segcapability_t capability)
448 /* no special capablities */
449 return (0);