2 * Copyright (c) 1990 The Regents of the University of California.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * from: @(#)sys_machdep.c 5.5 (Berkeley) 1/19/91
34 * $FreeBSD: src/sys/i386/i386/sys_machdep.c,v 1.47.2.3 2002/10/07 17:20:00 jhb Exp $
35 * $DragonFly: src/sys/platform/pc32/i386/sys_machdep.c,v 1.32 2008/01/06 16:55:53 swildner Exp $
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/sysproto.h>
42 #include <sys/malloc.h>
43 #include <sys/thread.h>
46 #include <sys/thread.h>
47 #include <sys/memrange.h>
52 #include <vm/vm_map.h>
53 #include <vm/vm_extern.h>
57 #include <machine/cpu.h>
58 #include <machine/pcb_ext.h> /* pcb.h included by sys/user.h */
59 #include <machine/sysarch.h>
60 #include <machine/smp.h>
61 #include <machine/globaldata.h> /* mdcpu */
63 #include <vm/vm_kern.h> /* for kernel_map */
64 #include <sys/thread2.h>
67 #define LD_PER_PAGE 512
68 #define NEW_MAX_LD(num) ((num + LD_PER_PAGE) & ~(LD_PER_PAGE-1))
69 #define SIZE_FROM_LARGEST_LD(num) (NEW_MAX_LD(num) << 3)
73 static int ki386_get_ldt(struct lwp
*, char *, int *);
74 static int ki386_set_ldt(struct lwp
*, char *, int *);
75 static int ki386_get_ioperm(struct lwp
*, char *);
76 static int ki386_set_ioperm(struct lwp
*, char *);
77 static int check_descs(union descriptor
*, int);
78 int i386_extend_pcb(struct lwp
*);
81 * sysarch_args(int op, char *params)
85 sys_sysarch(struct sysarch_args
*uap
)
87 struct lwp
*lp
= curthread
->td_lwp
;
92 error
= ki386_get_ldt(lp
, uap
->parms
, &uap
->sysmsg_result
);
95 error
= ki386_set_ldt(lp
, uap
->parms
, &uap
->sysmsg_result
);
98 error
= ki386_get_ioperm(lp
, uap
->parms
);
100 case I386_SET_IOPERM
:
101 error
= ki386_set_ioperm(lp
, uap
->parms
);
104 error
= vm86_sysarch(lp
, uap
->parms
);
114 i386_extend_pcb(struct lwp
*lp
)
119 struct soft_segment_descriptor ssd
= {
120 0, /* segment base address (overwritten) */
121 ctob(IOPAGES
+ 1) - 1, /* length */
122 SDT_SYS386TSS
, /* segment type */
123 0, /* priority level */
124 1, /* descriptor present */
126 0, /* default 32 size */
130 ext
= (struct pcb_ext
*)kmem_alloc(&kernel_map
, ctob(IOPAGES
+1));
133 bzero(ext
, sizeof(struct pcb_ext
));
134 ext
->ext_tss
.tss_esp0
= (unsigned)((char *)lp
->lwp_thread
->td_pcb
- 16);
135 ext
->ext_tss
.tss_ss0
= GSEL(GDATA_SEL
, SEL_KPL
);
137 * The last byte of the i/o map must be followed by an 0xff byte.
138 * We arbitrarily allocate 16 bytes here, to keep the starting
139 * address on a doubleword boundary.
141 offset
= PAGE_SIZE
- 16;
142 ext
->ext_tss
.tss_ioopt
=
143 (offset
- ((unsigned)&ext
->ext_tss
- (unsigned)ext
)) << 16;
144 ext
->ext_iomap
= (caddr_t
)ext
+ offset
;
145 ext
->ext_vm86
.vm86_intmap
= (caddr_t
)ext
+ offset
- 32;
147 addr
= (u_long
*)ext
->ext_vm86
.vm86_intmap
;
148 for (i
= 0; i
< (ctob(IOPAGES
) + 32 + 16) / sizeof(u_long
); i
++)
151 ssd
.ssd_base
= (unsigned)&ext
->ext_tss
;
152 ssd
.ssd_limit
-= ((unsigned)&ext
->ext_tss
- (unsigned)ext
);
153 ssdtosd(&ssd
, &ext
->ext_tssd
);
156 * Put the new TSS where the switch code can find it. Do
157 * a forced switch to ourself to activate it.
160 lp
->lwp_thread
->td_pcb
->pcb_ext
= ext
;
161 lp
->lwp_thread
->td_switch(lp
->lwp_thread
);
168 ki386_set_ioperm(struct lwp
*lp
, char *args
)
171 struct i386_ioperm_args ua
;
174 if ((error
= copyin(args
, &ua
, sizeof(struct i386_ioperm_args
))) != 0)
177 if ((error
= priv_check_cred(lp
->lwp_proc
->p_ucred
, PRIV_ROOT
, 0)) != 0)
183 * While this is restricted to root, we should probably figure out
184 * whether any other driver is using this i/o address, as so not to
185 * cause confusion. This probably requires a global 'usage registry'.
188 if (lp
->lwp_thread
->td_pcb
->pcb_ext
== 0)
189 if ((error
= i386_extend_pcb(lp
)) != 0)
191 iomap
= (char *)lp
->lwp_thread
->td_pcb
->pcb_ext
->ext_iomap
;
193 if (ua
.start
+ ua
.length
> IOPAGES
* PAGE_SIZE
* NBBY
)
196 for (i
= ua
.start
; i
< ua
.start
+ ua
.length
; i
++) {
198 iomap
[i
>> 3] &= ~(1 << (i
& 7));
200 iomap
[i
>> 3] |= (1 << (i
& 7));
206 ki386_get_ioperm(struct lwp
*lp
, char *args
)
209 struct i386_ioperm_args ua
;
212 if ((error
= copyin(args
, &ua
, sizeof(struct i386_ioperm_args
))) != 0)
214 if (ua
.start
>= IOPAGES
* PAGE_SIZE
* NBBY
)
217 if (lp
->lwp_thread
->td_pcb
->pcb_ext
== 0) {
222 iomap
= (char *)lp
->lwp_thread
->td_pcb
->pcb_ext
->ext_iomap
;
225 state
= (iomap
[i
>> 3] >> (i
& 7)) & 1;
229 for (i
= ua
.start
+ 1; i
< IOPAGES
* PAGE_SIZE
* NBBY
; i
++) {
230 if (state
!= ((iomap
[i
>> 3] >> (i
& 7)) & 1))
236 error
= copyout(&ua
, args
, sizeof(struct i386_ioperm_args
));
241 * Update the TLS entries for the process. Used by assembly, do not staticize.
243 * Must be called from a critical section (else an interrupt thread preemption
244 * may cause %gs to fault). Normally called from the low level swtch.s code.
249 struct thread
*td
= curthread
;
252 int off
= GTLS_START
+ mycpu
->gd_cpuid
* NGDT
;
254 const int off
= GTLS_START
;
256 for (i
= 0; i
< NGTLS
; ++i
)
257 gdt
[off
+ i
].sd
= td
->td_tls
.tls
[i
];
263 set_user_ldt_cpusync(struct lwkt_cpusync
*cmd
)
265 set_user_ldt(cmd
->cs_data
);
270 * Update the GDT entry pointing to the LDT to point to the LDT of the
271 * current process. Used by assembly, do not staticize.
273 * Must be called from a critical section (else an interrupt thread preemption
274 * may cause %gs to fault). Normally called from the low level swtch.s code.
277 set_user_ldt(struct pcb
*pcb
)
279 struct pcb_ldt
*pcb_ldt
;
281 if (pcb
!= curthread
->td_pcb
)
284 pcb_ldt
= pcb
->pcb_ldt
;
286 gdt
[mycpu
->gd_cpuid
* NGDT
+ GUSERLDT_SEL
].sd
= pcb_ldt
->ldt_sd
;
288 gdt
[GUSERLDT_SEL
].sd
= pcb_ldt
->ldt_sd
;
290 lldt(GSEL(GUSERLDT_SEL
, SEL_KPL
));
291 mdcpu
->gd_currentldt
= GSEL(GUSERLDT_SEL
, SEL_KPL
);
295 user_ldt_alloc(struct pcb
*pcb
, int len
)
297 struct pcb_ldt
*pcb_ldt
, *new_ldt
;
299 MALLOC(new_ldt
, struct pcb_ldt
*, sizeof(struct pcb_ldt
),
300 M_SUBPROC
, M_WAITOK
);
302 new_ldt
->ldt_len
= len
= NEW_MAX_LD(len
);
303 new_ldt
->ldt_base
= (caddr_t
)kmem_alloc(&kernel_map
,
304 len
* sizeof(union descriptor
));
305 if (new_ldt
->ldt_base
== NULL
) {
306 FREE(new_ldt
, M_SUBPROC
);
309 new_ldt
->ldt_refcnt
= 1;
310 new_ldt
->ldt_active
= 0;
312 gdt_segs
[GUSERLDT_SEL
].ssd_base
= (unsigned)new_ldt
->ldt_base
;
313 gdt_segs
[GUSERLDT_SEL
].ssd_limit
= len
* sizeof(union descriptor
) - 1;
314 ssdtosd(&gdt_segs
[GUSERLDT_SEL
], &new_ldt
->ldt_sd
);
316 if ((pcb_ldt
= pcb
->pcb_ldt
)) {
317 if (len
> pcb_ldt
->ldt_len
)
318 len
= pcb_ldt
->ldt_len
;
319 bcopy(pcb_ldt
->ldt_base
, new_ldt
->ldt_base
,
320 len
* sizeof(union descriptor
));
322 bcopy(ldt
, new_ldt
->ldt_base
, sizeof(ldt
));
328 user_ldt_free(struct pcb
*pcb
)
330 struct pcb_ldt
*pcb_ldt
= pcb
->pcb_ldt
;
336 if (pcb
== curthread
->td_pcb
) {
338 mdcpu
->gd_currentldt
= _default_ldt
;
343 if (--pcb_ldt
->ldt_refcnt
== 0) {
344 kmem_free(&kernel_map
, (vm_offset_t
)pcb_ldt
->ldt_base
,
345 pcb_ldt
->ldt_len
* sizeof(union descriptor
));
346 FREE(pcb_ldt
, M_SUBPROC
);
351 ki386_get_ldt(struct lwp
*lwp
, char *args
, int *res
)
354 struct pcb
*pcb
= lwp
->lwp_thread
->td_pcb
;
355 struct pcb_ldt
*pcb_ldt
= pcb
->pcb_ldt
;
356 unsigned int nldt
, num
;
357 union descriptor
*lp
;
358 struct i386_ldt_args ua
, *uap
= &ua
;
360 if ((error
= copyin(args
, uap
, sizeof(struct i386_ldt_args
))) < 0)
364 kprintf("ki386_get_ldt: start=%d num=%d descs=%p\n",
365 uap
->start
, uap
->num
, (void *)uap
->descs
);
371 nldt
= (unsigned int)pcb_ldt
->ldt_len
;
372 num
= min(uap
->num
, nldt
);
373 lp
= &((union descriptor
*)(pcb_ldt
->ldt_base
))[uap
->start
];
375 nldt
= (unsigned int)(sizeof(ldt
) / sizeof(ldt
[0]));
376 num
= min(uap
->num
, nldt
);
377 lp
= &ldt
[uap
->start
];
381 * note: uap->(args), num, and nldt are unsigned. nldt and num
382 * are limited in scope, but uap->start can be anything.
384 if (uap
->start
> nldt
|| uap
->start
+ num
> nldt
) {
389 error
= copyout(lp
, uap
->descs
, num
* sizeof(union descriptor
));
397 ki386_set_ldt(struct lwp
*lp
, char *args
, int *res
)
401 struct pcb
*pcb
= lp
->lwp_thread
->td_pcb
;
402 struct pcb_ldt
*pcb_ldt
= pcb
->pcb_ldt
;
403 union descriptor
*descs
;
405 struct i386_ldt_args ua
, *uap
= &ua
;
407 if ((error
= copyin(args
, uap
, sizeof(struct i386_ldt_args
))) < 0)
411 kprintf("ki386_set_ldt: start=%d num=%d descs=%p\n",
412 uap
->start
, uap
->num
, (void *)uap
->descs
);
415 /* verify range of descriptors to modify */
416 if ((uap
->start
< 0) || (uap
->start
>= MAX_LD
) || (uap
->num
< 0) ||
421 largest_ld
= uap
->start
+ uap
->num
- 1;
422 if (largest_ld
>= MAX_LD
)
425 /* allocate user ldt */
426 if (!pcb_ldt
|| largest_ld
>= pcb_ldt
->ldt_len
) {
427 struct pcb_ldt
*new_ldt
= user_ldt_alloc(pcb
, largest_ld
);
431 pcb_ldt
->ldt_sd
= new_ldt
->ldt_sd
;
432 kmem_free(&kernel_map
, (vm_offset_t
)pcb_ldt
->ldt_base
,
433 pcb_ldt
->ldt_len
* sizeof(union descriptor
));
434 pcb_ldt
->ldt_base
= new_ldt
->ldt_base
;
435 pcb_ldt
->ldt_len
= new_ldt
->ldt_len
;
436 FREE(new_ldt
, M_SUBPROC
);
438 pcb
->pcb_ldt
= pcb_ldt
= new_ldt
;
441 * Since the LDT may be shared, we must signal other cpus to
442 * reload it. XXX we need to track which cpus might be
443 * using the shared ldt and only signal those.
446 lwkt_cpusync_simple(-1, set_user_ldt_cpusync
, pcb
);
452 descs_size
= uap
->num
* sizeof(union descriptor
);
453 descs
= (union descriptor
*)kmem_alloc(&kernel_map
, descs_size
);
456 error
= copyin(&uap
->descs
[0], descs
, descs_size
);
458 kmem_free(&kernel_map
, (vm_offset_t
)descs
, descs_size
);
461 /* Check descriptors for access violations */
462 error
= check_descs(descs
, uap
->num
);
464 kmem_free(&kernel_map
, (vm_offset_t
)descs
, descs_size
);
469 * Fill in the actual ldt entries. Since %fs or %gs might point to
470 * one of these entries a critical section is required to prevent an
471 * interrupt thread from preempting us, switch back, and faulting
472 * on the load of %fs due to a half-formed descriptor.
476 &((union descriptor
*)(pcb_ldt
->ldt_base
))[uap
->start
],
477 uap
->num
* sizeof(union descriptor
));
481 kmem_free(&kernel_map
, (vm_offset_t
)descs
, descs_size
);
486 check_descs(union descriptor
*descs
, int num
)
490 /* Check descriptors for access violations */
491 for (i
= 0; i
< num
; i
++) {
492 union descriptor
*dp
;
495 switch (dp
->sd
.sd_type
) {
496 case SDT_SYSNULL
: /* system null */
499 case SDT_SYS286TSS
: /* system 286 TSS available */
500 case SDT_SYSLDT
: /* system local descriptor table */
501 case SDT_SYS286BSY
: /* system 286 TSS busy */
502 case SDT_SYSTASKGT
: /* system task gate */
503 case SDT_SYS286IGT
: /* system 286 interrupt gate */
504 case SDT_SYS286TGT
: /* system 286 trap gate */
505 case SDT_SYSNULL2
: /* undefined by Intel */
506 case SDT_SYS386TSS
: /* system 386 TSS available */
507 case SDT_SYSNULL3
: /* undefined by Intel */
508 case SDT_SYS386BSY
: /* system 386 TSS busy */
509 case SDT_SYSNULL4
: /* undefined by Intel */
510 case SDT_SYS386IGT
: /* system 386 interrupt gate */
511 case SDT_SYS386TGT
: /* system 386 trap gate */
512 case SDT_SYS286CGT
: /* system 286 call gate */
513 case SDT_SYS386CGT
: /* system 386 call gate */
514 /* I can't think of any reason to allow a user proc
515 * to create a segment of these types. They are
520 /* memory segment types */
521 case SDT_MEMEC
: /* memory execute only conforming */
522 case SDT_MEMEAC
: /* memory execute only accessed conforming */
523 case SDT_MEMERC
: /* memory execute read conforming */
524 case SDT_MEMERAC
: /* memory execute read accessed conforming */
525 /* Must be "present" if executable and conforming. */
526 if (dp
->sd
.sd_p
== 0)
529 case SDT_MEMRO
: /* memory read only */
530 case SDT_MEMROA
: /* memory read only accessed */
531 case SDT_MEMRW
: /* memory read write */
532 case SDT_MEMRWA
: /* memory read write accessed */
533 case SDT_MEMROD
: /* memory read only expand dwn limit */
534 case SDT_MEMRODA
: /* memory read only expand dwn lim accessed */
535 case SDT_MEMRWD
: /* memory read write expand dwn limit */
536 case SDT_MEMRWDA
: /* memory read write expand dwn lim acessed */
537 case SDT_MEME
: /* memory execute only */
538 case SDT_MEMEA
: /* memory execute only accessed */
539 case SDT_MEMER
: /* memory execute read */
540 case SDT_MEMERA
: /* memory execute read accessed */
547 /* Only user (ring-3) descriptors may be present. */
548 if ((dp
->sd
.sd_p
!= 0) && (dp
->sd
.sd_dpl
!= SEL_UPL
))
555 * Called when /dev/io is opened
560 curthread
->td_lwp
->lwp_md
.md_regs
->tf_eflags
|= PSL_IOPL
;
565 * Called when /dev/io is closed
570 curthread
->td_lwp
->lwp_md
.md_regs
->tf_eflags
&= ~PSL_IOPL
;