2 * Copyright (c) 1982, 1986, 1991, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * @(#)kern_subr.c 8.3 (Berkeley) 1/21/94
39 * $FreeBSD: src/sys/kern/kern_subr.c,v 1.31.2.2 2002/04/21 08:09:37 bde Exp $
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
46 #include <sys/malloc.h>
48 #include <sys/resourcevar.h>
49 #include <sys/sysctl.h>
51 #include <sys/vnode.h>
52 #include <sys/thread2.h>
53 #include <machine/limits.h>
55 #include <cpu/lwbuf.h>
58 #include <vm/vm_page.h>
59 #include <vm/vm_map.h>
61 SYSCTL_INT(_kern
, KERN_IOV_MAX
, iov_max
, CTLFLAG_RD
, NULL
, UIO_MAXIOV
,
62 "Maximum number of elements in an I/O vector; sysconf(_SC_IOV_MAX)");
65 * UIO_READ: copy the kernelspace cp to the user or kernelspace UIO
66 * UIO_WRITE: copy the user or kernelspace UIO to the kernelspace cp
68 * For userspace UIO's, uio_td must be the current thread.
70 * The syscall interface is responsible for limiting the length to
71 * ssize_t for things like read() or write() which return the bytes
72 * read or written as ssize_t. These functions work with unsigned
76 uiomove(caddr_t cp
, size_t n
, struct uio
*uio
)
78 thread_t td
= curthread
;
85 KASSERT(uio
->uio_rw
== UIO_READ
|| uio
->uio_rw
== UIO_WRITE
,
87 KASSERT(uio
->uio_segflg
!= UIO_USERSPACE
|| uio
->uio_td
== td
,
91 save
= td
->td_flags
& TDF_DEADLKTREAT
;
92 td
->td_flags
|= TDF_DEADLKTREAT
;
97 while (n
> 0 && uio
->uio_resid
) {
109 switch (uio
->uio_segflg
) {
113 if (uio
->uio_rw
== UIO_READ
)
114 error
= copyout(cp
, iov
->iov_base
, cnt
);
116 error
= copyin(iov
->iov_base
, cp
, cnt
);
122 if (uio
->uio_rw
== UIO_READ
)
123 bcopy(cp
, iov
->iov_base
, cnt
);
125 bcopy(iov
->iov_base
, cp
, cnt
);
130 iov
->iov_base
= (char *)iov
->iov_base
+ cnt
;
132 uio
->uio_resid
-= cnt
;
133 uio
->uio_offset
+= cnt
;
138 td
->td_flags
= (td
->td_flags
& ~TDF_DEADLKTREAT
) | save
;
144 * This is the same as uiomove() except (cp, n) is within the bounds of
145 * the passed, locked buffer. Under certain circumstances a VM fault
146 * occuring with a locked buffer held can result in a deadlock or an
147 * attempt to recursively lock the buffer.
149 * This procedure deals with these cases.
151 * If the buffer represents a regular file, is B_CACHE, but the last VM page
152 * is not fully valid we fix-up the last VM page. This should handle the
153 * recursive lock issue.
155 * Deadlocks are another issue. We are holding the vp and the bp locked
156 * and could deadlock against a different vp and/or bp if another thread is
157 * trying to access us while we accessing it. The only solution here is
158 * to release the bp and vnode lock and do the uio to/from a system buffer,
159 * then regain the locks and copyback (if applicable). XXX TODO.
162 uiomovebp(struct buf
*bp
, caddr_t cp
, size_t n
, struct uio
*uio
)
167 if (bp
->b_vp
&& bp
->b_vp
->v_type
== VREG
&&
168 (bp
->b_flags
& B_CACHE
) &&
169 (count
= bp
->b_xio
.xio_npages
) != 0 &&
170 (m
= bp
->b_xio
.xio_pages
[count
-1])->valid
!= VM_PAGE_BITS_ALL
) {
171 vm_page_zero_invalid(m
, TRUE
);
173 return (uiomove(cp
, n
, uio
));
177 * Like uiomove() but copies zero-fill. Only allowed for UIO_READ,
178 * for obvious reasons.
181 uiomovez(size_t n
, struct uio
*uio
)
187 KASSERT(uio
->uio_rw
== UIO_READ
, ("uiomovez: mode"));
188 KASSERT(uio
->uio_segflg
!= UIO_USERSPACE
|| uio
->uio_td
== curthread
,
191 while (n
> 0 && uio
->uio_resid
) {
202 switch (uio
->uio_segflg
) {
204 error
= copyout(ZeroPage
, iov
->iov_base
, cnt
);
209 bzero(iov
->iov_base
, cnt
);
214 iov
->iov_base
= (char *)iov
->iov_base
+ cnt
;
216 uio
->uio_resid
-= cnt
;
217 uio
->uio_offset
+= cnt
;
224 * Wrapper for uiomove() that validates the arguments against a known-good
225 * kernel buffer. This function automatically indexes the buffer by
226 * uio_offset and handles all range checking.
229 uiomove_frombuf(void *buf
, size_t buflen
, struct uio
*uio
)
233 offset
= (size_t)uio
->uio_offset
;
234 if ((off_t
)offset
!= uio
->uio_offset
)
236 if (buflen
== 0 || offset
>= buflen
)
238 return (uiomove((char *)buf
+ offset
, buflen
- offset
, uio
));
242 * Give next character to user as result of read.
245 ureadc(int c
, struct uio
*uio
)
251 if (uio
->uio_iovcnt
== 0 || uio
->uio_resid
== 0)
254 if (iov
->iov_len
== 0) {
259 switch (uio
->uio_segflg
) {
262 if (subyte(iov
->iov_base
, c
) < 0)
267 iov_base
= iov
->iov_base
;
269 iov
->iov_base
= iov_base
;
275 iov
->iov_base
= (char *)iov
->iov_base
+ 1;
283 * General routine to allocate a hash table. Make the hash table size a
284 * power of 2 greater or equal to the number of elements requested, and
285 * store the masking value in *hashmask.
288 hashinit(int elements
, struct malloc_type
*type
, u_long
*hashmask
)
291 LIST_HEAD(generic
, generic
) *hashtbl
;
295 panic("hashinit: bad elements");
296 for (hashsize
= 2; hashsize
< elements
; hashsize
<<= 1)
298 hashtbl
= kmalloc((u_long
)hashsize
* sizeof(*hashtbl
), type
, M_WAITOK
);
299 for (i
= 0; i
< hashsize
; i
++)
300 LIST_INIT(&hashtbl
[i
]);
301 *hashmask
= hashsize
- 1;
306 hashdestroy(void *vhashtbl
, struct malloc_type
*type
, u_long hashmask
)
308 LIST_HEAD(generic
, generic
) *hashtbl
, *hp
;
311 for (hp
= hashtbl
; hp
<= &hashtbl
[hashmask
]; hp
++)
312 KASSERT(LIST_EMPTY(hp
), ("%s: hash not empty", __func__
));
313 kfree(hashtbl
, type
);
317 * This is a newer version which allocates a hash table of structures.
319 * The returned array will be zero'd. The caller is responsible for
320 * initializing the structures.
323 hashinit_ext(int elements
, size_t size
, struct malloc_type
*type
,
330 panic("hashinit: bad elements");
331 for (hashsize
= 2; hashsize
< elements
; hashsize
<<= 1)
333 hashtbl
= kmalloc((size_t)hashsize
* size
, type
, M_WAITOK
| M_ZERO
);
334 *hashmask
= hashsize
- 1;
338 static int primes
[] = { 1, 13, 31, 61, 127, 251, 509, 761, 1021, 1531, 2039,
339 2557, 3067, 3583, 4093, 4603, 5119, 5623, 6143, 6653,
340 7159, 7673, 8191, 12281, 16381, 24571, 32749 };
341 #define NPRIMES NELEM(primes)
344 * General routine to allocate a prime number sized hash table.
347 phashinit(int elements
, struct malloc_type
*type
, u_long
*nentries
)
350 LIST_HEAD(generic
, generic
) *hashtbl
;
354 panic("phashinit: bad elements");
355 for (i
= 1, hashsize
= primes
[1]; hashsize
<= elements
;) {
359 hashsize
= primes
[i
];
361 hashsize
= primes
[i
- 1];
362 hashtbl
= kmalloc((u_long
)hashsize
* sizeof(*hashtbl
), type
, M_WAITOK
);
363 for (i
= 0; i
< hashsize
; i
++)
364 LIST_INIT(&hashtbl
[i
]);
365 *nentries
= hashsize
;
370 * This is a newer version which allocates a hash table of structures
371 * in a prime-number size.
373 * The returned array will be zero'd. The caller is responsible for
374 * initializing the structures.
377 phashinit_ext(int elements
, size_t size
, struct malloc_type
*type
,
385 panic("phashinit: bad elements");
386 for (i
= 1, hashsize
= primes
[1]; hashsize
<= elements
;) {
390 hashsize
= primes
[i
];
392 hashsize
= primes
[i
- 1];
393 hashtbl
= kmalloc((size_t)hashsize
* size
, type
, M_WAITOK
| M_ZERO
);
394 *nentries
= hashsize
;
399 * Copyin an iovec. If the iovec array fits, use the preallocated small
400 * iovec structure. If it is too big, dynamically allocate an iovec array
401 * of sufficient size.
406 iovec_copyin(struct iovec
*uiov
, struct iovec
**kiov
, struct iovec
*siov
,
407 size_t iov_cnt
, size_t *iov_len
)
413 if (iov_cnt
> UIO_MAXIOV
)
415 if (iov_cnt
> UIO_SMALLIOV
) {
416 *kiov
= kmalloc(sizeof(struct iovec
) * iov_cnt
, M_IOV
,
421 error
= copyin(uiov
, *kiov
, iov_cnt
* sizeof(struct iovec
));
424 for (i
= 0, iovp
= *kiov
; i
< iov_cnt
; i
++, iovp
++) {
426 * Check for both *iov_len overflows and out of
427 * range iovp->iov_len's. We limit to the
428 * capabilities of signed integers.
430 * GCC4 - overflow check opt requires assign/test.
432 len
= *iov_len
+ iovp
->iov_len
;
440 * From userland disallow iovec's which exceed the sized size
441 * limit as the system calls return ssize_t.
443 * NOTE: Internal kernel interfaces can handle the unsigned
446 if (error
== 0 && (ssize_t
)*iov_len
< 0)
450 iovec_free(kiov
, siov
);
456 * Copyright (c) 2004 Alan L. Cox <alc@cs.rice.edu>
457 * Copyright (c) 1982, 1986, 1991, 1993
458 * The Regents of the University of California. All rights reserved.
459 * (c) UNIX System Laboratories, Inc.
460 * All or some portions of this file are derived from material licensed
461 * to the University of California by American Telephone and Telegraph
462 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
463 * the permission of UNIX System Laboratories, Inc.
465 * Redistribution and use in source and binary forms, with or without
466 * modification, are permitted provided that the following conditions
468 * 1. Redistributions of source code must retain the above copyright
469 * notice, this list of conditions and the following disclaimer.
470 * 2. Redistributions in binary form must reproduce the above copyright
471 * notice, this list of conditions and the following disclaimer in the
472 * documentation and/or other materials provided with the distribution.
473 * 4. Neither the name of the University nor the names of its contributors
474 * may be used to endorse or promote products derived from this software
475 * without specific prior written permission.
477 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
478 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
479 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
480 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
481 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
482 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
483 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
484 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
485 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
486 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
489 * @(#)kern_subr.c 8.3 (Berkeley) 1/21/94
490 * $FreeBSD: src/sys/i386/i386/uio_machdep.c,v 1.1 2004/03/21 20:28:36 alc Exp $
494 * Implement uiomove(9) from physical memory using lwbuf's to reduce
495 * the creation and destruction of ephemeral mappings.
498 uiomove_fromphys(vm_page_t
*ma
, vm_offset_t offset
, size_t n
, struct uio
*uio
)
500 struct lwbuf lwb_cache
;
502 struct thread
*td
= curthread
;
505 vm_offset_t page_offset
;
511 KASSERT(uio
->uio_rw
== UIO_READ
|| uio
->uio_rw
== UIO_WRITE
,
512 ("uiomove_fromphys: mode"));
513 KASSERT(uio
->uio_segflg
!= UIO_USERSPACE
|| uio
->uio_td
== curthread
,
514 ("uiomove_fromphys proc"));
517 save
= td
->td_flags
& TDF_DEADLKTREAT
;
518 td
->td_flags
|= TDF_DEADLKTREAT
;
521 while (n
> 0 && uio
->uio_resid
) {
531 page_offset
= offset
& PAGE_MASK
;
532 cnt
= min(cnt
, PAGE_SIZE
- page_offset
);
533 m
= ma
[offset
>> PAGE_SHIFT
];
534 lwb
= lwbuf_alloc(m
, &lwb_cache
);
535 cp
= (char *)lwbuf_kva(lwb
) + page_offset
;
536 switch (uio
->uio_segflg
) {
539 * note: removed uioyield (it was the wrong place to
542 if (uio
->uio_rw
== UIO_READ
)
543 error
= copyout(cp
, iov
->iov_base
, cnt
);
545 error
= copyin(iov
->iov_base
, cp
, cnt
);
552 if (uio
->uio_rw
== UIO_READ
)
553 bcopy(cp
, iov
->iov_base
, cnt
);
555 bcopy(iov
->iov_base
, cp
, cnt
);
561 iov
->iov_base
= (char *)iov
->iov_base
+ cnt
;
563 uio
->uio_resid
-= cnt
;
564 uio
->uio_offset
+= cnt
;
571 td
->td_flags
&= ~TDF_DEADLKTREAT
;