2 * Copyright (c) 1982, 1986, 1991, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
10 * Copyright (c) 2014 The FreeBSD Foundation
12 * Portions of this software were developed by Konstantin Belousov
13 * under sponsorship from the FreeBSD Foundation.
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
18 * 1. Redistributions of source code must retain the above copyright
19 * notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 * notice, this list of conditions and the following disclaimer in the
22 * documentation and/or other materials provided with the distribution.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * @(#)kern_subr.c 8.3 (Berkeley) 1/21/94
42 #include <sys/cdefs.h>
43 __FBSDID("$FreeBSD$");
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/kernel.h>
48 #include <sys/limits.h>
52 #include <sys/resourcevar.h>
53 #include <sys/rwlock.h>
54 #include <sys/sched.h>
55 #include <sys/sysctl.h>
56 #include <sys/vnode.h>
59 #include <vm/vm_param.h>
60 #include <vm/vm_extern.h>
61 #include <vm/vm_page.h>
62 #include <vm/vm_pageout.h>
63 #include <vm/vm_map.h>
65 #include <machine/bus.h>
67 SYSCTL_INT(_kern
, KERN_IOV_MAX
, iov_max
, CTLFLAG_RD
, SYSCTL_NULL_INT_PTR
, UIO_MAXIOV
,
68 "Maximum number of elements in an I/O vector; sysconf(_SC_IOV_MAX)");
70 static int uiomove_faultflag(void *cp
, int n
, struct uio
*uio
, int nofault
);
73 copyin_nofault(const void *udaddr
, void *kaddr
, size_t len
)
77 save
= vm_fault_disable_pagefaults();
78 error
= copyin(udaddr
, kaddr
, len
);
79 vm_fault_enable_pagefaults(save
);
84 copyout_nofault(const void *kaddr
, void *udaddr
, size_t len
)
88 save
= vm_fault_disable_pagefaults();
89 error
= copyout(kaddr
, udaddr
, len
);
90 vm_fault_enable_pagefaults(save
);
94 #define PHYS_PAGE_COUNT(len) (howmany(len, PAGE_SIZE) + 1)
97 physcopyin(void *src
, vm_paddr_t dst
, size_t len
)
99 vm_page_t m
[PHYS_PAGE_COUNT(len
)];
104 iov
[0].iov_base
= src
;
105 iov
[0].iov_len
= len
;
110 uio
.uio_segflg
= UIO_SYSSPACE
;
111 uio
.uio_rw
= UIO_WRITE
;
112 for (i
= 0; i
< PHYS_PAGE_COUNT(len
); i
++, dst
+= PAGE_SIZE
)
113 m
[i
] = PHYS_TO_VM_PAGE(dst
);
114 return (uiomove_fromphys(m
, dst
& PAGE_MASK
, len
, &uio
));
118 physcopyout(vm_paddr_t src
, void *dst
, size_t len
)
120 vm_page_t m
[PHYS_PAGE_COUNT(len
)];
125 iov
[0].iov_base
= dst
;
126 iov
[0].iov_len
= len
;
131 uio
.uio_segflg
= UIO_SYSSPACE
;
132 uio
.uio_rw
= UIO_READ
;
133 for (i
= 0; i
< PHYS_PAGE_COUNT(len
); i
++, src
+= PAGE_SIZE
)
134 m
[i
] = PHYS_TO_VM_PAGE(src
);
135 return (uiomove_fromphys(m
, src
& PAGE_MASK
, len
, &uio
));
138 #undef PHYS_PAGE_COUNT
141 physcopyin_vlist(bus_dma_segment_t
*src
, off_t offset
, vm_paddr_t dst
,
148 while (offset
>= src
->ds_len
) {
149 offset
-= src
->ds_len
;
153 while (len
> 0 && error
== 0) {
154 seg_len
= MIN(src
->ds_len
- offset
, len
);
155 error
= physcopyin((void *)(uintptr_t)(src
->ds_addr
+ offset
),
167 physcopyout_vlist(vm_paddr_t src
, bus_dma_segment_t
*dst
, off_t offset
,
174 while (offset
>= dst
->ds_len
) {
175 offset
-= dst
->ds_len
;
179 while (len
> 0 && error
== 0) {
180 seg_len
= MIN(dst
->ds_len
- offset
, len
);
181 error
= physcopyout(src
, (void *)(uintptr_t)(dst
->ds_addr
+
193 uiomove(void *cp
, int n
, struct uio
*uio
)
196 return (uiomove_faultflag(cp
, n
, uio
, 0));
200 uiomove_nofault(void *cp
, int n
, struct uio
*uio
)
203 return (uiomove_faultflag(cp
, n
, uio
, 1));
207 uiomove_faultflag(void *cp
, int n
, struct uio
*uio
, int nofault
)
212 int error
, newflags
, save
;
217 KASSERT(uio
->uio_rw
== UIO_READ
|| uio
->uio_rw
== UIO_WRITE
,
219 KASSERT(uio
->uio_segflg
!= UIO_USERSPACE
|| uio
->uio_td
== td
,
222 WITNESS_WARN(WARN_GIANTOK
| WARN_SLEEPOK
, NULL
,
223 "Calling uiomove()");
225 /* XXX does it make a sense to set TDP_DEADLKTREAT for UIO_SYSSPACE ? */
226 newflags
= TDP_DEADLKTREAT
;
227 if (uio
->uio_segflg
== UIO_USERSPACE
&& nofault
) {
229 * Fail if a non-spurious page fault occurs.
231 newflags
|= TDP_NOFAULTING
| TDP_RESETSPUR
;
233 save
= curthread_pflags_set(newflags
);
235 while (n
> 0 && uio
->uio_resid
) {
246 switch (uio
->uio_segflg
) {
250 if (uio
->uio_rw
== UIO_READ
)
251 error
= copyout(cp
, iov
->iov_base
, cnt
);
253 error
= copyin(iov
->iov_base
, cp
, cnt
);
259 if (uio
->uio_rw
== UIO_READ
)
260 bcopy(cp
, iov
->iov_base
, cnt
);
262 bcopy(iov
->iov_base
, cp
, cnt
);
267 iov
->iov_base
= (char *)iov
->iov_base
+ cnt
;
269 uio
->uio_resid
-= cnt
;
270 uio
->uio_offset
+= cnt
;
271 cp
= (char *)cp
+ cnt
;
275 curthread_pflags_restore(save
);
280 * Wrapper for uiomove() that validates the arguments against a known-good
281 * kernel buffer. Currently, uiomove accepts a signed (n) argument, which
282 * is almost definitely a bad thing, so we catch that here as well. We
283 * return a runtime failure, but it might be desirable to generate a runtime
284 * assertion failure instead.
287 uiomove_frombuf(void *buf
, int buflen
, struct uio
*uio
)
291 if (uio
->uio_offset
< 0 || uio
->uio_resid
< 0 ||
292 (offset
= uio
->uio_offset
) != uio
->uio_offset
)
294 if (buflen
<= 0 || offset
>= buflen
)
296 if ((n
= buflen
- offset
) > IOSIZE_MAX
)
298 return (uiomove((char *)buf
+ offset
, n
, uio
));
302 * Give next character to user as result of read.
305 ureadc(int c
, struct uio
*uio
)
310 WITNESS_WARN(WARN_GIANTOK
| WARN_SLEEPOK
, NULL
,
314 if (uio
->uio_iovcnt
== 0 || uio
->uio_resid
== 0)
317 if (iov
->iov_len
== 0) {
322 switch (uio
->uio_segflg
) {
325 if (subyte(iov
->iov_base
, c
) < 0)
330 iov_base
= iov
->iov_base
;
337 iov
->iov_base
= (char *)iov
->iov_base
+ 1;
345 copyinfrom(const void * __restrict src
, void * __restrict dst
, size_t len
,
352 error
= copyin(src
, dst
, len
);
355 bcopy(src
, dst
, len
);
358 panic("copyinfrom: bad seg %d\n", seg
);
364 copyinstrfrom(const void * __restrict src
, void * __restrict dst
, size_t len
,
365 size_t * __restrict copied
, int seg
)
371 error
= copyinstr(src
, dst
, len
, copied
);
374 error
= copystr(src
, dst
, len
, copied
);
377 panic("copyinstrfrom: bad seg %d\n", seg
);
383 copyiniov(const struct iovec
*iovp
, u_int iovcnt
, struct iovec
**iov
, int error
)
388 if (iovcnt
> UIO_MAXIOV
)
390 iovlen
= iovcnt
* sizeof (struct iovec
);
391 *iov
= malloc(iovlen
, M_IOV
, M_WAITOK
);
392 error
= copyin(iovp
, *iov
, iovlen
);
401 copyinuio(const struct iovec
*iovp
, u_int iovcnt
, struct uio
**uiop
)
409 if (iovcnt
> UIO_MAXIOV
)
411 iovlen
= iovcnt
* sizeof (struct iovec
);
412 uio
= malloc(iovlen
+ sizeof *uio
, M_IOV
, M_WAITOK
);
413 iov
= (struct iovec
*)(uio
+ 1);
414 error
= copyin(iovp
, iov
, iovlen
);
420 uio
->uio_iovcnt
= iovcnt
;
421 uio
->uio_segflg
= UIO_USERSPACE
;
422 uio
->uio_offset
= -1;
424 for (i
= 0; i
< iovcnt
; i
++) {
425 if (iov
->iov_len
> IOSIZE_MAX
- uio
->uio_resid
) {
429 uio
->uio_resid
+= iov
->iov_len
;
437 cloneuio(struct uio
*uiop
)
442 iovlen
= uiop
->uio_iovcnt
* sizeof (struct iovec
);
443 uio
= malloc(iovlen
+ sizeof *uio
, M_IOV
, M_WAITOK
);
445 uio
->uio_iov
= (struct iovec
*)(uio
+ 1);
446 bcopy(uiop
->uio_iov
, uio
->uio_iov
, iovlen
);
451 * Map some anonymous memory in user space of size sz, rounded up to the page
455 copyout_map(struct thread
*td
, vm_offset_t
*addr
, size_t sz
)
461 vms
= td
->td_proc
->p_vmspace
;
464 * Map somewhere after heap in process memory.
466 *addr
= round_page((vm_offset_t
)vms
->vm_daddr
+
467 lim_max(td
, RLIMIT_DATA
));
469 /* round size up to page boundary */
470 size
= (vm_size_t
)round_page(sz
);
472 error
= vm_mmap(&vms
->vm_map
, addr
, size
, VM_PROT_READ
| VM_PROT_WRITE
,
473 VM_PROT_ALL
, MAP_PRIVATE
| MAP_ANON
, OBJT_DEFAULT
, NULL
, 0);
479 * Unmap memory in user space.
482 copyout_unmap(struct thread
*td
, vm_offset_t addr
, size_t sz
)
490 map
= &td
->td_proc
->p_vmspace
->vm_map
;
491 size
= (vm_size_t
)round_page(sz
);
493 if (vm_map_remove(map
, addr
, addr
+ size
) != KERN_SUCCESS
)
501 * XXXKIB The temporal implementation of fue*() functions which do not
502 * handle usermode -1 properly, mixing it with the fault code. Keep
503 * this until MD code is written. Currently sparc64 and mips do not
504 * have proper implementation.
508 fueword(volatile const void *base
, long *val
)
520 fueword32(volatile const void *base
, int32_t *val
)
524 res
= fuword32(base
);
533 fueword64(volatile const void *base
, int64_t *val
)
537 res
= fuword64(base
);
546 casueword32(volatile uint32_t *base
, uint32_t oldval
, uint32_t *oldvalp
,
551 ov
= casuword32(base
, oldval
, newval
);
559 casueword(volatile u_long
*p
, u_long oldval
, u_long
*oldvalp
, u_long newval
)
563 ov
= casuword(p
, oldval
, newval
);
569 #else /* NO_FUEWORD */
571 fuword32(volatile const void *addr
)
576 rv
= fueword32(addr
, &val
);
577 return (rv
== -1 ? -1 : val
);
582 fuword64(volatile const void *addr
)
587 rv
= fueword64(addr
, &val
);
588 return (rv
== -1 ? -1 : val
);
593 fuword(volatile const void *addr
)
598 rv
= fueword(addr
, &val
);
599 return (rv
== -1 ? -1 : val
);
603 casuword32(volatile uint32_t *addr
, uint32_t old
, uint32_t new)
608 rv
= casueword32(addr
, old
, &val
, new);
609 return (rv
== -1 ? -1 : val
);
613 casuword(volatile u_long
*addr
, u_long old
, u_long
new)
618 rv
= casueword(addr
, old
, &val
, new);
619 return (rv
== -1 ? -1 : val
);
622 #endif /* NO_FUEWORD */