Merge commit '7e934d3acc051b7ee3ef0d11571fd1225800a607'
[unleashed.git] / kernel / os / subr.c
blob784d1add80a94222f6a17e005ccd71ef5a0b86f6
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
29 #include <sys/types.h>
30 #include <sys/sysmacros.h>
31 #include <sys/param.h>
32 #include <sys/vmparam.h>
33 #include <sys/systm.h>
34 #include <sys/cred.h>
35 #include <sys/user.h>
36 #include <sys/proc.h>
37 #include <sys/conf.h>
38 #include <sys/tuneable.h>
39 #include <sys/cpuvar.h>
40 #include <sys/archsystm.h>
41 #include <sys/vmem.h>
42 #include <vm/seg_kmem.h>
43 #include <sys/errno.h>
44 #include <sys/cmn_err.h>
45 #include <sys/debug.h>
46 #include <sys/atomic.h>
47 #include <sys/model.h>
48 #include <sys/kmem.h>
49 #include <sys/memlist.h>
50 #include <sys/autoconf.h>
51 #include <sys/ontrap.h>
52 #include <sys/utsname.h>
53 #include <sys/zone.h>
57 * Routine which sets a user error; placed in
58 * illegal entries in the bdevsw and cdevsw tables.
61 int
62 nodev()
64 return (curthread->t_lwp ?
65 ttolwp(curthread)->lwp_error = ENXIO : ENXIO);
69 * Null routine; placed in insignificant entries
70 * in the bdevsw and cdevsw tables.
73 int
74 nulldev()
76 return (0);
79 static kmutex_t udevlock;
82 * Generate an unused major device number.
84 major_t
85 getudev()
87 static major_t next = 0;
88 major_t ret;
91 * Ensure that we start allocating major numbers above the 'devcnt'
92 * count. The only limit we place on the number is that it should be a
93 * legal 32-bit SVR4 major number and be greater than or equal to devcnt
94 * in the current system).
96 mutex_enter(&udevlock);
97 if (next == 0)
98 next = devcnt;
99 if (next <= L_MAXMAJ32 && next >= devcnt)
100 ret = next++;
101 else {
103 * If we fail to allocate a major number because devcnt has
104 * reached L_MAXMAJ32, we may be the victim of a sparsely
105 * populated devnames array. We scan the array backwards
106 * looking for an empty slot; if we find one, mark it as
107 * DN_GETUDEV so it doesn't get taken by subsequent consumers
108 * users of the devnames array, and issue a warning.
109 * It is vital for this routine to take drastic measures to
110 * succeed, since the kernel really needs it to boot.
112 int i;
113 for (i = devcnt - 1; i >= 0; i--) {
114 LOCK_DEV_OPS(&devnamesp[i].dn_lock);
115 if (devnamesp[i].dn_name == NULL &&
116 ((devnamesp[i].dn_flags & DN_TAKEN_GETUDEV) == 0))
117 break;
118 UNLOCK_DEV_OPS(&devnamesp[i].dn_lock);
120 if (i != -1) {
121 cmn_err(CE_WARN, "Reusing device major number %d.", i);
122 ASSERT(i >= 0 && i < devcnt);
123 devnamesp[i].dn_flags |= DN_TAKEN_GETUDEV;
124 UNLOCK_DEV_OPS(&devnamesp[i].dn_lock);
125 ret = (major_t)i;
126 } else {
127 ret = DDI_MAJOR_T_NONE;
130 mutex_exit(&udevlock);
131 return (ret);
136 * Compress 'long' device number encoding to 32-bit device number
137 * encoding. If it won't fit, we return failure, but set the
138 * device number to 32-bit NODEV for the sake of our callers.
141 cmpldev(dev32_t *dst, dev_t dev)
143 #if defined(_LP64)
144 if (dev == NODEV) {
145 *dst = NODEV32;
146 } else {
147 major_t major = dev >> L_BITSMINOR;
148 minor_t minor = dev & L_MAXMIN;
150 if (major > L_MAXMAJ32 || minor > L_MAXMIN32) {
151 *dst = NODEV32;
152 return (0);
155 *dst = (dev32_t)((major << L_BITSMINOR32) | minor);
157 #else
158 *dst = (dev32_t)dev;
159 #endif
160 return (1);
164 * Expand 32-bit dev_t's to long dev_t's. Expansion always "fits"
165 * into the return type, but we're careful to expand NODEV explicitly.
167 dev_t
168 expldev(dev32_t dev32)
170 #ifdef _LP64
171 if (dev32 == NODEV32)
172 return (NODEV);
173 return (makedevice((dev32 >> L_BITSMINOR32) & L_MAXMAJ32,
174 dev32 & L_MAXMIN32));
175 #else
176 return ((dev_t)dev32);
177 #endif
180 #ifndef _LP64
182 * Keep these entry points for 32-bit systems but enforce the use
183 * of MIN/MAX macros on 64-bit systems. The DDI header files already
184 * define min/max as macros so drivers shouldn't need these functions.
188 min(int a, int b)
190 return (a < b ? a : b);
194 max(int a, int b)
196 return (a > b ? a : b);
199 uint_t
200 umin(uint_t a, uint_t b)
202 return (a < b ? a : b);
205 uint_t
206 umax(uint_t a, uint_t b)
208 return (a > b ? a : b);
211 #endif /* !_LP64 */
214 * Parse suboptions from a string.
215 * Same as getsubopt(3C).
218 getsubopt(char **optionsp, char * const *tokens, char **valuep)
220 char *s = *optionsp, *p;
221 int i;
222 size_t optlen;
224 *valuep = NULL;
225 if (*s == '\0')
226 return (-1);
227 p = strchr(s, ','); /* find next option */
228 if (p == NULL) {
229 p = s + strlen(s);
230 } else {
231 *p++ = '\0'; /* mark end and point to next */
233 *optionsp = p; /* point to next option */
234 p = strchr(s, '='); /* find value */
235 if (p == NULL) {
236 optlen = strlen(s);
237 *valuep = NULL;
238 } else {
239 optlen = p - s;
240 *valuep = ++p;
242 for (i = 0; tokens[i] != NULL; i++) {
243 if ((optlen == strlen(tokens[i])) &&
244 (strncmp(s, tokens[i], optlen) == 0))
245 return (i);
247 /* no match, point value at option and return error */
248 *valuep = s;
249 return (-1);
253 * Append the suboption string 'opt' starting at the position 'str'
254 * within the buffer defined by 'buf' and 'len'. If 'buf' is not null,
255 * a comma is appended first.
256 * Return a pointer to the end of the resulting string (the null byte).
257 * Return NULL if there isn't enough space left to append 'opt'.
259 char *
260 append_subopt(const char *buf, size_t len, char *str, const char *opt)
262 size_t l = strlen(opt);
265 * Include a ',' if this is not the first option.
266 * Include space for the null byte.
268 if (strlen(buf) + (buf[0] != '\0') + l + 1 > len)
269 return (NULL);
271 if (buf[0] != '\0')
272 *str++ = ',';
273 (void) strcpy(str, opt);
274 return (str + l);
278 * Tables to convert a single byte to/from binary-coded decimal (BCD).
280 uchar_t byte_to_bcd[256] = {
281 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
282 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19,
283 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29,
284 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
285 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
286 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
287 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
288 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
289 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
290 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
293 uchar_t bcd_to_byte[256] = { /* CSTYLED */
294 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0,
295 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 0, 0, 0, 0, 0, 0,
296 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 0, 0, 0, 0, 0, 0,
297 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 0, 0, 0, 0, 0, 0,
298 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 0, 0, 0, 0, 0, 0,
299 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 0, 0, 0, 0, 0, 0,
300 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 0, 0, 0, 0, 0,
301 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 0, 0, 0, 0, 0, 0,
302 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 0, 0, 0, 0, 0, 0,
303 90, 91, 92, 93, 94, 95, 96, 97, 98, 99,
307 * Hot-patch a single instruction in the kernel's text.
308 * If you want to patch multiple instructions you must
309 * arrange to do it so that all intermediate stages are
310 * sane -- we don't stop other cpus while doing this.
311 * Size must be 1, 2, or 4 bytes with iaddr aligned accordingly.
313 void
314 hot_patch_kernel_text(caddr_t iaddr, uint32_t new_instr, uint_t size)
316 caddr_t vaddr;
317 page_t **ppp;
318 uintptr_t off = (uintptr_t)iaddr & PAGEOFFSET;
320 vaddr = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
322 (void) as_pagelock(&kas, &ppp, iaddr - off, PAGESIZE, S_WRITE);
324 hat_devload(kas.a_hat, vaddr, PAGESIZE,
325 hat_getpfnum(kas.a_hat, iaddr - off),
326 PROT_READ | PROT_WRITE, HAT_LOAD_LOCK | HAT_LOAD_NOCONSIST);
328 switch (size) {
329 case 1:
330 *(uint8_t *)(vaddr + off) = new_instr;
331 break;
332 case 2:
333 *(uint16_t *)(vaddr + off) = new_instr;
334 break;
335 case 4:
336 *(uint32_t *)(vaddr + off) = new_instr;
337 break;
338 default:
339 panic("illegal hot-patch");
342 membar_enter();
343 sync_icache(vaddr + off, size);
344 sync_icache(iaddr, size);
345 as_pageunlock(&kas, ppp, iaddr - off, PAGESIZE, S_WRITE);
346 hat_unload(kas.a_hat, vaddr, PAGESIZE, HAT_UNLOAD_UNLOCK);
347 vmem_free(heap_arena, vaddr, PAGESIZE);
351 * Routine to report an attempt to execute non-executable data. If the
352 * address executed lies in the stack, explicitly say so.
354 void
355 report_stack_exec(proc_t *p, caddr_t addr)
357 if (!noexec_user_stack_log)
358 return;
360 if (addr < p->p_usrstack && addr >= (p->p_usrstack - p->p_stksize)) {
361 cmn_err(CE_NOTE, "%s[%d] attempt to execute code "
362 "on stack by uid %d", p->p_user.u_comm,
363 p->p_pid, crgetruid(p->p_cred));
364 } else {
365 cmn_err(CE_NOTE, "%s[%d] attempt to execute non-executable "
366 "data at 0x%p by uid %d", p->p_user.u_comm,
367 p->p_pid, (void *) addr, crgetruid(p->p_cred));
370 ddi_msleep(20);
374 * Determine whether the address range [addr, addr + len) is in memlist mp.
377 address_in_memlist(struct memlist *mp, uint64_t addr, size_t len)
379 while (mp != 0) {
380 if ((addr >= mp->ml_address) &&
381 (addr + len <= mp->ml_address + mp->ml_size))
382 return (1); /* TRUE */
383 mp = mp->ml_next;
385 return (0); /* FALSE */
389 * Pop the topmost element from the t_ontrap stack, removing the current set of
390 * on_trap() protections. Refer to <sys/ontrap.h> for more info. If the
391 * stack is already empty, no_trap() just returns.
393 void
394 no_trap(void)
396 if (curthread->t_ontrap != NULL) {
397 curthread->t_ontrap = curthread->t_ontrap->ot_prev;
402 * Return utsname.nodename outside a zone, or the zone name within.
404 char *
405 uts_nodename(void)
407 if (curproc == NULL)
408 return (utsname.nodename);
409 return (curproc->p_zone->zone_nodename);