kernel - Fix some rare pmap races in i386 and x86_64.
[dragonfly.git] / sys / platform / pc32 / i386 / sys_machdep.c
blobc9d4988afe6d816c235dd7ab7b52598fa9f7d7c2
1 /*-
2 * Copyright (c) 1990 The Regents of the University of California.
3 * All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
33 * from: @(#)sys_machdep.c 5.5 (Berkeley) 1/19/91
34 * $FreeBSD: src/sys/i386/i386/sys_machdep.c,v 1.47.2.3 2002/10/07 17:20:00 jhb Exp $
35 * $DragonFly: src/sys/platform/pc32/i386/sys_machdep.c,v 1.32 2008/01/06 16:55:53 swildner Exp $
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/sysproto.h>
42 #include <sys/malloc.h>
43 #include <sys/thread.h>
44 #include <sys/proc.h>
45 #include <sys/priv.h>
46 #include <sys/thread.h>
47 #include <sys/memrange.h>
49 #include <vm/vm.h>
50 #include <sys/lock.h>
51 #include <vm/pmap.h>
52 #include <vm/vm_map.h>
53 #include <vm/vm_extern.h>
55 #include <sys/user.h>
57 #include <machine/cpu.h>
58 #include <machine/pcb_ext.h> /* pcb.h included by sys/user.h */
59 #include <machine/sysarch.h>
60 #include <machine/smp.h>
61 #include <machine/globaldata.h> /* mdcpu */
63 #include <vm/vm_kern.h> /* for kernel_map */
65 #include <sys/thread2.h>
66 #include <sys/mplock2.h>
68 #define MAX_LD 8192
69 #define LD_PER_PAGE 512
70 #define NEW_MAX_LD(num) ((num + LD_PER_PAGE) & ~(LD_PER_PAGE-1))
71 #define SIZE_FROM_LARGEST_LD(num) (NEW_MAX_LD(num) << 3)
75 static int ki386_get_ldt(struct lwp *, char *, int *);
76 static int ki386_set_ldt(struct lwp *, char *, int *);
77 static int ki386_get_ioperm(struct lwp *, char *);
78 static int ki386_set_ioperm(struct lwp *, char *);
79 static int check_descs(union descriptor *, int);
80 int i386_extend_pcb(struct lwp *);
83 * sysarch_args(int op, char *params)
85 * MPALMOSTSAFE
87 int
88 sys_sysarch(struct sysarch_args *uap)
90 struct lwp *lp = curthread->td_lwp;
91 int error = 0;
93 get_mplock();
95 switch(uap->op) {
96 case I386_GET_LDT:
97 error = ki386_get_ldt(lp, uap->parms, &uap->sysmsg_result);
98 break;
99 case I386_SET_LDT:
100 error = ki386_set_ldt(lp, uap->parms, &uap->sysmsg_result);
101 break;
102 case I386_GET_IOPERM:
103 error = ki386_get_ioperm(lp, uap->parms);
104 break;
105 case I386_SET_IOPERM:
106 error = ki386_set_ioperm(lp, uap->parms);
107 break;
108 case I386_VM86:
109 error = vm86_sysarch(lp, uap->parms);
110 break;
111 default:
112 error = EOPNOTSUPP;
113 break;
115 rel_mplock();
116 return (error);
120 i386_extend_pcb(struct lwp *lp)
122 int i, offset;
123 u_long *addr;
124 struct pcb_ext *ext;
125 struct soft_segment_descriptor ssd = {
126 0, /* segment base address (overwritten) */
127 ctob(IOPAGES + 1) - 1, /* length */
128 SDT_SYS386TSS, /* segment type */
129 0, /* priority level */
130 1, /* descriptor present */
131 0, 0,
132 0, /* default 32 size */
133 0 /* granularity */
136 ext = (struct pcb_ext *)kmem_alloc(&kernel_map, ctob(IOPAGES+1));
137 if (ext == NULL)
138 return (ENOMEM);
139 bzero(ext, sizeof(struct pcb_ext));
140 ext->ext_tss.tss_esp0 = (unsigned)((char *)lp->lwp_thread->td_pcb - 16);
141 ext->ext_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
143 * The last byte of the i/o map must be followed by an 0xff byte.
144 * We arbitrarily allocate 16 bytes here, to keep the starting
145 * address on a doubleword boundary.
147 offset = PAGE_SIZE - 16;
148 ext->ext_tss.tss_ioopt =
149 (offset - ((unsigned)&ext->ext_tss - (unsigned)ext)) << 16;
150 ext->ext_iomap = (caddr_t)ext + offset;
151 ext->ext_vm86.vm86_intmap = (caddr_t)ext + offset - 32;
153 addr = (u_long *)ext->ext_vm86.vm86_intmap;
154 for (i = 0; i < (ctob(IOPAGES) + 32 + 16) / sizeof(u_long); i++)
155 *addr++ = ~0;
157 ssd.ssd_base = (unsigned)&ext->ext_tss;
158 ssd.ssd_limit -= ((unsigned)&ext->ext_tss - (unsigned)ext);
159 ssdtosd(&ssd, &ext->ext_tssd);
162 * Put the new TSS where the switch code can find it. Do
163 * a forced switch to ourself to activate it.
165 crit_enter();
166 lp->lwp_thread->td_pcb->pcb_ext = ext;
167 lp->lwp_thread->td_switch(lp->lwp_thread);
168 crit_exit();
170 return 0;
173 static int
174 ki386_set_ioperm(struct lwp *lp, char *args)
176 int i, error;
177 struct i386_ioperm_args ua;
178 char *iomap;
180 if ((error = copyin(args, &ua, sizeof(struct i386_ioperm_args))) != 0)
181 return (error);
183 if ((error = priv_check_cred(lp->lwp_thread->td_ucred, PRIV_ROOT, 0)) != 0)
184 return (error);
185 if (securelevel > 0)
186 return (EPERM);
188 * XXX
189 * While this is restricted to root, we should probably figure out
190 * whether any other driver is using this i/o address, as so not to
191 * cause confusion. This probably requires a global 'usage registry'.
194 if (lp->lwp_thread->td_pcb->pcb_ext == 0)
195 if ((error = i386_extend_pcb(lp)) != 0)
196 return (error);
197 iomap = (char *)lp->lwp_thread->td_pcb->pcb_ext->ext_iomap;
199 if (ua.start + ua.length > IOPAGES * PAGE_SIZE * NBBY)
200 return (EINVAL);
202 for (i = ua.start; i < ua.start + ua.length; i++) {
203 if (ua.enable)
204 iomap[i >> 3] &= ~(1 << (i & 7));
205 else
206 iomap[i >> 3] |= (1 << (i & 7));
208 return (error);
211 static int
212 ki386_get_ioperm(struct lwp *lp, char *args)
214 int i, state, error;
215 struct i386_ioperm_args ua;
216 char *iomap;
218 if ((error = copyin(args, &ua, sizeof(struct i386_ioperm_args))) != 0)
219 return (error);
220 if (ua.start >= IOPAGES * PAGE_SIZE * NBBY)
221 return (EINVAL);
223 if (lp->lwp_thread->td_pcb->pcb_ext == 0) {
224 ua.length = 0;
225 goto done;
228 iomap = (char *)lp->lwp_thread->td_pcb->pcb_ext->ext_iomap;
230 i = ua.start;
231 state = (iomap[i >> 3] >> (i & 7)) & 1;
232 ua.enable = !state;
233 ua.length = 1;
235 for (i = ua.start + 1; i < IOPAGES * PAGE_SIZE * NBBY; i++) {
236 if (state != ((iomap[i >> 3] >> (i & 7)) & 1))
237 break;
238 ua.length++;
241 done:
242 error = copyout(&ua, args, sizeof(struct i386_ioperm_args));
243 return (error);
247 * Update the TLS entries for the process. Used by assembly, do not staticize.
249 * Must be called from a critical section (else an interrupt thread preemption
250 * may cause %gs to fault). Normally called from the low level swtch.s code.
252 * MPSAFE
254 void
255 set_user_TLS(void)
257 struct thread *td = curthread;
258 int i;
259 #ifdef SMP
260 int off = GTLS_START + mycpu->gd_cpuid * NGDT;
261 #else
262 const int off = GTLS_START;
263 #endif
264 for (i = 0; i < NGTLS; ++i)
265 gdt[off + i].sd = td->td_tls.tls[i];
268 #ifdef SMP
269 static
270 void
271 set_user_ldt_cpusync(struct lwkt_cpusync *cmd)
273 set_user_ldt(cmd->cs_data);
275 #endif
278 * Update the GDT entry pointing to the LDT to point to the LDT of the
279 * current process. Used by assembly, do not staticize.
281 * Must be called from a critical section (else an interrupt thread preemption
282 * may cause %gs to fault). Normally called from the low level swtch.s code.
284 void
285 set_user_ldt(struct pcb *pcb)
287 struct pcb_ldt *pcb_ldt;
289 if (pcb != curthread->td_pcb)
290 return;
292 pcb_ldt = pcb->pcb_ldt;
293 #ifdef SMP
294 gdt[mycpu->gd_cpuid * NGDT + GUSERLDT_SEL].sd = pcb_ldt->ldt_sd;
295 #else
296 gdt[GUSERLDT_SEL].sd = pcb_ldt->ldt_sd;
297 #endif
298 lldt(GSEL(GUSERLDT_SEL, SEL_KPL));
299 mdcpu->gd_currentldt = GSEL(GUSERLDT_SEL, SEL_KPL);
302 struct pcb_ldt *
303 user_ldt_alloc(struct pcb *pcb, int len)
305 struct pcb_ldt *pcb_ldt, *new_ldt;
307 MALLOC(new_ldt, struct pcb_ldt *, sizeof(struct pcb_ldt),
308 M_SUBPROC, M_WAITOK);
310 new_ldt->ldt_len = len = NEW_MAX_LD(len);
311 new_ldt->ldt_base = (caddr_t)kmem_alloc(&kernel_map,
312 len * sizeof(union descriptor));
313 if (new_ldt->ldt_base == NULL) {
314 FREE(new_ldt, M_SUBPROC);
315 return NULL;
317 new_ldt->ldt_refcnt = 1;
318 new_ldt->ldt_active = 0;
320 gdt_segs[GUSERLDT_SEL].ssd_base = (unsigned)new_ldt->ldt_base;
321 gdt_segs[GUSERLDT_SEL].ssd_limit = len * sizeof(union descriptor) - 1;
322 ssdtosd(&gdt_segs[GUSERLDT_SEL], &new_ldt->ldt_sd);
324 if ((pcb_ldt = pcb->pcb_ldt)) {
325 if (len > pcb_ldt->ldt_len)
326 len = pcb_ldt->ldt_len;
327 bcopy(pcb_ldt->ldt_base, new_ldt->ldt_base,
328 len * sizeof(union descriptor));
329 } else {
330 bcopy(ldt, new_ldt->ldt_base, sizeof(ldt));
332 return new_ldt;
335 void
336 user_ldt_free(struct pcb *pcb)
338 struct pcb_ldt *pcb_ldt = pcb->pcb_ldt;
340 if (pcb_ldt == NULL)
341 return;
343 crit_enter();
344 if (pcb == curthread->td_pcb) {
345 lldt(_default_ldt);
346 mdcpu->gd_currentldt = _default_ldt;
348 pcb->pcb_ldt = NULL;
349 crit_exit();
351 if (--pcb_ldt->ldt_refcnt == 0) {
352 kmem_free(&kernel_map, (vm_offset_t)pcb_ldt->ldt_base,
353 pcb_ldt->ldt_len * sizeof(union descriptor));
354 FREE(pcb_ldt, M_SUBPROC);
358 static int
359 ki386_get_ldt(struct lwp *lwp, char *args, int *res)
361 int error = 0;
362 struct pcb *pcb = lwp->lwp_thread->td_pcb;
363 struct pcb_ldt *pcb_ldt = pcb->pcb_ldt;
364 unsigned int nldt, num;
365 union descriptor *lp;
366 struct i386_ldt_args ua, *uap = &ua;
368 if ((error = copyin(args, uap, sizeof(struct i386_ldt_args))) < 0)
369 return(error);
371 #ifdef DEBUG
372 kprintf("ki386_get_ldt: start=%d num=%d descs=%p\n",
373 uap->start, uap->num, (void *)uap->descs);
374 #endif
376 crit_enter();
378 if (pcb_ldt) {
379 nldt = (unsigned int)pcb_ldt->ldt_len;
380 num = min(uap->num, nldt);
381 lp = &((union descriptor *)(pcb_ldt->ldt_base))[uap->start];
382 } else {
383 nldt = (unsigned int)(sizeof(ldt) / sizeof(ldt[0]));
384 num = min(uap->num, nldt);
385 lp = &ldt[uap->start];
389 * note: uap->(args), num, and nldt are unsigned. nldt and num
390 * are limited in scope, but uap->start can be anything.
392 if (uap->start > nldt || uap->start + num > nldt) {
393 crit_exit();
394 return(EINVAL);
397 error = copyout(lp, uap->descs, num * sizeof(union descriptor));
398 if (!error)
399 *res = num;
400 crit_exit();
401 return(error);
404 static int
405 ki386_set_ldt(struct lwp *lp, char *args, int *res)
407 int error = 0;
408 int largest_ld;
409 struct pcb *pcb = lp->lwp_thread->td_pcb;
410 struct pcb_ldt *pcb_ldt = pcb->pcb_ldt;
411 union descriptor *descs;
412 int descs_size;
413 struct i386_ldt_args ua, *uap = &ua;
415 if ((error = copyin(args, uap, sizeof(struct i386_ldt_args))) < 0)
416 return(error);
418 #ifdef DEBUG
419 kprintf("ki386_set_ldt: start=%d num=%d descs=%p\n",
420 uap->start, uap->num, (void *)uap->descs);
421 #endif
423 /* verify range of descriptors to modify */
424 if ((uap->start < 0) || (uap->start >= MAX_LD) || (uap->num < 0) ||
425 (uap->num > MAX_LD))
427 return(EINVAL);
429 largest_ld = uap->start + uap->num - 1;
430 if (largest_ld >= MAX_LD)
431 return(EINVAL);
433 /* allocate user ldt */
434 if (!pcb_ldt || largest_ld >= pcb_ldt->ldt_len) {
435 struct pcb_ldt *new_ldt = user_ldt_alloc(pcb, largest_ld);
436 if (new_ldt == NULL)
437 return ENOMEM;
438 if (pcb_ldt) {
439 pcb_ldt->ldt_sd = new_ldt->ldt_sd;
440 kmem_free(&kernel_map, (vm_offset_t)pcb_ldt->ldt_base,
441 pcb_ldt->ldt_len * sizeof(union descriptor));
442 pcb_ldt->ldt_base = new_ldt->ldt_base;
443 pcb_ldt->ldt_len = new_ldt->ldt_len;
444 FREE(new_ldt, M_SUBPROC);
445 } else {
446 pcb->pcb_ldt = pcb_ldt = new_ldt;
449 * Since the LDT may be shared, we must signal other cpus to
450 * reload it. XXX we need to track which cpus might be
451 * using the shared ldt and only signal those.
453 #ifdef SMP
454 lwkt_cpusync_simple(-1, set_user_ldt_cpusync, pcb);
455 #else
456 set_user_ldt(pcb);
457 #endif
460 descs_size = uap->num * sizeof(union descriptor);
461 descs = (union descriptor *)kmem_alloc(&kernel_map, descs_size);
462 if (descs == NULL)
463 return (ENOMEM);
464 error = copyin(&uap->descs[0], descs, descs_size);
465 if (error) {
466 kmem_free(&kernel_map, (vm_offset_t)descs, descs_size);
467 return (error);
469 /* Check descriptors for access violations */
470 error = check_descs(descs, uap->num);
471 if (error) {
472 kmem_free(&kernel_map, (vm_offset_t)descs, descs_size);
473 return (error);
477 * Fill in the actual ldt entries. Since %fs or %gs might point to
478 * one of these entries a critical section is required to prevent an
479 * interrupt thread from preempting us, switch back, and faulting
480 * on the load of %fs due to a half-formed descriptor.
482 crit_enter();
483 bcopy(descs,
484 &((union descriptor *)(pcb_ldt->ldt_base))[uap->start],
485 uap->num * sizeof(union descriptor));
486 *res = uap->start;
488 crit_exit();
489 kmem_free(&kernel_map, (vm_offset_t)descs, descs_size);
490 return (0);
493 static int
494 check_descs(union descriptor *descs, int num)
496 int i;
498 /* Check descriptors for access violations */
499 for (i = 0; i < num; i++) {
500 union descriptor *dp;
501 dp = &descs[i];
503 switch (dp->sd.sd_type) {
504 case SDT_SYSNULL: /* system null */
505 dp->sd.sd_p = 0;
506 break;
507 case SDT_SYS286TSS: /* system 286 TSS available */
508 case SDT_SYSLDT: /* system local descriptor table */
509 case SDT_SYS286BSY: /* system 286 TSS busy */
510 case SDT_SYSTASKGT: /* system task gate */
511 case SDT_SYS286IGT: /* system 286 interrupt gate */
512 case SDT_SYS286TGT: /* system 286 trap gate */
513 case SDT_SYSNULL2: /* undefined by Intel */
514 case SDT_SYS386TSS: /* system 386 TSS available */
515 case SDT_SYSNULL3: /* undefined by Intel */
516 case SDT_SYS386BSY: /* system 386 TSS busy */
517 case SDT_SYSNULL4: /* undefined by Intel */
518 case SDT_SYS386IGT: /* system 386 interrupt gate */
519 case SDT_SYS386TGT: /* system 386 trap gate */
520 case SDT_SYS286CGT: /* system 286 call gate */
521 case SDT_SYS386CGT: /* system 386 call gate */
522 /* I can't think of any reason to allow a user proc
523 * to create a segment of these types. They are
524 * for OS use only.
526 return EACCES;
528 /* memory segment types */
529 case SDT_MEMEC: /* memory execute only conforming */
530 case SDT_MEMEAC: /* memory execute only accessed conforming */
531 case SDT_MEMERC: /* memory execute read conforming */
532 case SDT_MEMERAC: /* memory execute read accessed conforming */
533 /* Must be "present" if executable and conforming. */
534 if (dp->sd.sd_p == 0)
535 return (EACCES);
536 break;
537 case SDT_MEMRO: /* memory read only */
538 case SDT_MEMROA: /* memory read only accessed */
539 case SDT_MEMRW: /* memory read write */
540 case SDT_MEMRWA: /* memory read write accessed */
541 case SDT_MEMROD: /* memory read only expand dwn limit */
542 case SDT_MEMRODA: /* memory read only expand dwn lim accessed */
543 case SDT_MEMRWD: /* memory read write expand dwn limit */
544 case SDT_MEMRWDA: /* memory read write expand dwn lim acessed */
545 case SDT_MEME: /* memory execute only */
546 case SDT_MEMEA: /* memory execute only accessed */
547 case SDT_MEMER: /* memory execute read */
548 case SDT_MEMERA: /* memory execute read accessed */
549 break;
550 default:
551 return(EINVAL);
552 /*NOTREACHED*/
555 /* Only user (ring-3) descriptors may be present. */
556 if ((dp->sd.sd_p != 0) && (dp->sd.sd_dpl != SEL_UPL))
557 return (EACCES);
559 return (0);
563 * Called when /dev/io is opened
566 cpu_set_iopl(void)
568 curthread->td_lwp->lwp_md.md_regs->tf_eflags |= PSL_IOPL;
569 return(0);
573 * Called when /dev/io is closed
576 cpu_clr_iopl(void)
578 curthread->td_lwp->lwp_md.md_regs->tf_eflags &= ~PSL_IOPL;
579 return(0);