2 * Copyright (c) 2004, 2005 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Hiten Pandya <hmp@backplane.com> and Matthew Dillon
6 * <dillon@backplane.com>.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
18 * 3. Neither the name of The DragonFly Project nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific, prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * The MSF_BUF API was augmented from the SFBUF API:
36 * Copyright (c) 1998 David Greenman. All rights reserved.
37 * src/sys/kern/kern_sfbuf.c,v 1.7 2004/05/13 19:46:18 dillon
39 * $DragonFly: src/sys/kern/kern_msfbuf.c,v 1.21 2007/06/29 21:54:08 dillon Exp $
42 * MSFBUFs cache linear multi-page ephermal mappings and operate similar
43 * to SFBUFs. MSFBUFs use XIO's internally to hold the page list and can
44 * be considered to be a KVA wrapper around an XIO.
46 * Like the SFBUF subsystem, the locking and validation of the page array
47 * is the responsibility of the caller. Also like the SFBUF subsystem,
48 * MSFBUFs are SMP-friendly, cache the mappings, and will avoid unnecessary
49 * page invalidations when possible.
51 * MSFBUFs are primarily designed to be used in subsystems that manipulate
52 * XIOs. The DEV and BUF subsystems are a good example.
55 * - Overload XIOs representitive of smaller chunks of memory onto the
56 * same KVA space to efficiently cache smaller mappings (filesystem
57 * blocks / buffer cache related).
60 #include <sys/param.h>
61 #include <sys/systm.h>
62 #include <sys/globaldata.h>
63 #include <sys/kernel.h>
64 #include <sys/malloc.h>
65 #include <sys/queue.h>
67 #include <sys/sfbuf.h>
68 #include <sys/sysctl.h>
69 #include <sys/thread.h>
71 #include <sys/msfbuf.h>
76 #include <vm/vm_param.h>
77 #include <vm/vm_extern.h>
78 #include <vm/vm_kern.h>
79 #include <vm/vm_page.h>
80 #include <vm/vm_map.h>
83 #include <sys/thread2.h>
84 #include <vm/vm_page2.h>
86 MALLOC_DEFINE(M_MSFBUF
, "MSFBUF", "direct-copy buffers");
88 /* lists and queues associated with msf_bufs */
89 LIST_HEAD(msf_buf_list
, msf_buf
);
91 TAILQ_HEAD(, msf_buf
) msf_buf_freelist
;
93 /* indicate shortage of available msf_bufs */
94 static u_int msf_buf_alloc_want
;
96 /* base of the msf_buf map */
97 static vm_offset_t msf_base
;
98 static struct msf_buf
*msf_bufs
;
99 static int msf_buf_hits
;
100 static int msf_buf_misses
;
102 static int msf_buf_count
= 256; /* magic value */
103 SYSCTL_INT(_kern_ipc
, OID_AUTO
, msf_bufs
, CTLFLAG_RD
, &msf_buf_count
,
104 0, "number of direct-copy buffers available");
105 SYSCTL_INT(_kern_ipc
, OID_AUTO
, msf_hits
, CTLFLAG_RD
, &msf_buf_hits
,
106 0, "direct-copy buffers cache hits");
107 SYSCTL_INT(_kern_ipc
, OID_AUTO
, msf_misses
, CTLFLAG_RD
, &msf_buf_misses
,
108 0, "direct-copy buffers cache misses");
111 msf_buf_init(void *__dummy
)
116 msf_buf_alloc_want
= 0;
117 TUNABLE_INT_FETCH("kern.ipc.msfbufs", &msf_buf_count
);
119 TAILQ_INIT(&msf_buf_freelist
);
121 msf_base
= kmem_alloc_nofault(&kernel_map
,
122 msf_buf_count
* XIO_INTERNAL_SIZE
);
124 msf_bufs
= kmalloc(msf_buf_count
* sizeof(struct msf_buf
), M_MSFBUF
,
127 /* Initialize the free list with necessary information. */
128 for (i
= 0; i
< msf_buf_count
; i
++) {
130 msf
->ms_kva
= msf_base
+ i
* XIO_INTERNAL_SIZE
;
131 msf
->ms_flags
= MSF_ONFREEQ
;
132 msf
->ms_type
= MSF_TYPE_UNKNOWN
;
133 msf
->ms_xio
= &msf
->ms_internal_xio
;
134 xio_init(&msf
->ms_internal_xio
);
135 TAILQ_INSERT_TAIL(&msf_buf_freelist
, &msf_bufs
[i
], free_list
);
138 SYSINIT(msf_buf
, SI_BOOT2_MACHDEP
, SI_ORDER_ANY
, msf_buf_init
, NULL
);
141 * Get an msf_buf from the freelist; if none are available
142 * than it will block.
144 * If SFB_CATCH was specified in 'flags' than the sleep is
145 * block is interruptable by signals etc; this flag is normally
146 * use for system calls.
149 static struct msf_buf
*
150 msf_alloc(vm_page_t firstpage
, int flags
)
157 if (firstpage
&& (msf
= firstpage
->msf_hint
) != NULL
&&
158 (msf
->ms_flags
& MSF_ONFREEQ
)
160 KKASSERT(msf
->ms_refcnt
== 0);
161 msf
->ms_flags
&= ~MSF_ONFREEQ
;
163 TAILQ_REMOVE(&msf_buf_freelist
, msf
, free_list
);
168 * Get a buffer off the freelist. If the freelist is empty, we
169 * block until something becomes available; this happens quite
170 * quickly anyway because MSFBUFs are supposed to be temporary
173 * If the SFB_CATCH flag was provided, then we allow the sleep
174 * to be interruptible.
177 if ((msf
= TAILQ_FIRST(&msf_buf_freelist
)) != NULL
) {
178 KKASSERT(msf
->ms_refcnt
== 0);
180 TAILQ_REMOVE(&msf_buf_freelist
, msf
, free_list
);
181 msf
->ms_flags
&= ~MSF_ONFREEQ
;
184 firstpage
->msf_hint
= msf
;
187 pflags
= (flags
& SFB_CATCH
) ? PCATCH
: 0;
188 ++msf_buf_alloc_want
;
189 error
= tsleep(&msf_buf_freelist
, pflags
, "msfbuf", 0);
190 --msf_buf_alloc_want
;
202 msf_map_msf(struct msf_buf
*msf
, int flags
)
205 if (flags
& SFB_CPUPRIVATE
) {
206 pmap_qenter2(msf
->ms_kva
, msf
->ms_xio
->xio_pages
,
207 msf
->ms_xio
->xio_npages
, &msf
->ms_cpumask
);
209 pmap_qenter(msf
->ms_kva
, msf
->ms_xio
->xio_pages
,
210 msf
->ms_xio
->xio_npages
);
211 msf
->ms_cpumask
= (cpumask_t
)-1;
214 pmap_qenter2(msf
->ms_kva
, msf
->ms_xio
->xio_pages
,
215 msf
->ms_xio
->xio_npages
, &msf
->ms_cpumask
);
220 msf_map_pagelist(struct msf_buf
**msfp
, vm_page_t
*list
, int npages
, int flags
)
225 KKASSERT(npages
!= 0 && npages
<= XIO_INTERNAL_PAGES
);
227 if ((msf
= msf_alloc(list
[0], flags
)) != NULL
) {
228 KKASSERT(msf
->ms_xio
== &msf
->ms_internal_xio
);
229 for (i
= 0; i
< npages
; ++i
)
230 msf
->ms_internal_xio
.xio_pages
[i
] = list
[i
];
231 msf
->ms_internal_xio
.xio_offset
= 0;
232 msf
->ms_internal_xio
.xio_npages
= npages
;
233 msf
->ms_internal_xio
.xio_bytes
= npages
<< PAGE_SHIFT
;
234 msf
->ms_type
= MSF_TYPE_PGLIST
;
235 msf_map_msf(msf
, flags
);
245 msf_map_xio(struct msf_buf
**msfp
, struct xio
*xio
, int flags
)
249 KKASSERT(xio
!= NULL
&& xio
->xio_npages
> 0);
250 KKASSERT(xio
->xio_npages
<= XIO_INTERNAL_PAGES
);
252 if ((msf
= msf_alloc(xio
->xio_pages
[0], flags
)) != NULL
) {
253 msf
->ms_type
= MSF_TYPE_XIO
;
255 msf_map_msf(msf
, flags
);
265 msf_map_ubuf(struct msf_buf
**msfp
, void *base
, size_t nbytes
, int flags
)
271 if (((int)(intptr_t)base
& PAGE_MASK
) + nbytes
> XIO_INTERNAL_SIZE
) {
276 if ((paddr
= pmap_extract(&curthread
->td_lwp
->lwp_vmspace
->vm_pmap
,
277 (vm_offset_t
)base
)) != 0)
278 msf
= msf_alloc(PHYS_TO_VM_PAGE(paddr
), flags
);
280 msf
= msf_alloc(NULL
, flags
);
285 error
= xio_init_ubuf(&msf
->ms_internal_xio
, base
, nbytes
, 0);
287 KKASSERT(msf
->ms_xio
== &msf
->ms_internal_xio
);
288 msf_map_msf(msf
, flags
);
289 msf
->ms_type
= MSF_TYPE_UBUF
;
300 msf_map_kbuf(struct msf_buf
**msfp
, void *base
, size_t nbytes
, int flags
)
306 if (((int)(intptr_t)base
& PAGE_MASK
) + nbytes
> XIO_INTERNAL_SIZE
) {
311 if ((paddr
= pmap_kextract((vm_offset_t
)base
)) != 0)
312 msf
= msf_alloc(PHYS_TO_VM_PAGE(paddr
), flags
);
314 msf
= msf_alloc(NULL
, flags
);
319 error
= xio_init_kbuf(&msf
->ms_internal_xio
, base
, nbytes
);
321 KKASSERT(msf
->ms_xio
== &msf
->ms_internal_xio
);
322 msf_map_msf(msf
, flags
);
323 msf
->ms_type
= MSF_TYPE_KBUF
;
334 * Iterate through the specified uio calling the function with a kernel buffer
335 * containing the data until the uio has been exhausted. If the uio
336 * represents system space no mapping occurs. If the uio represents user
337 * space the data is mapped into system space in chunks. This function does
338 * not guarentee any particular alignment or minimum chunk size, it will
339 * depend on the limitations of MSF buffers and the breakdown of the UIO's
343 msf_uio_iterate(struct uio
*uio
,
344 int (*callback
)(void *info
, char *buf
, int bytes
), void *info
)
354 switch (uio
->uio_segflg
) {
357 for (i
= 0; i
< uio
->uio_iovcnt
&& error
== 0; ++i
) {
358 iov
= &uio
->uio_iov
[i
];
360 pgoff
= (int)(intptr_t)iov
->iov_base
& PAGE_MASK
;
361 while (offset
< iov
->iov_len
) {
362 bytes
= iov
->iov_len
- offset
;
363 if (bytes
+ pgoff
> XIO_INTERNAL_SIZE
)
364 bytes
= XIO_INTERNAL_SIZE
- pgoff
;
365 error
= msf_map_ubuf(&msf
, iov
->iov_base
+ offset
, bytes
, 0);
368 error
= callback(info
, msf_buf_kva(msf
), bytes
);
379 for (i
= 0; i
< uio
->uio_iovcnt
; ++i
) {
380 iov
= &uio
->uio_iov
[i
];
381 if (iov
->iov_len
== 0)
383 error
= callback(info
, iov
->iov_base
, iov
->iov_len
);
397 * Add a reference to a buffer (currently unused)
400 msf_buf_ref(struct msf_buf
*msf
)
402 if (msf
->ms_refcnt
== 0)
403 panic("msf_buf_ref: referencing a free msf_buf");
411 * Lose a reference to an msf_buf. When none left, detach mapped page
412 * and release resources back to the system. Note that the sfbuf's
413 * removal from the freelist is delayed, so it may in fact already be
414 * on the free list. This is the optimal (and most likely) scenario.
417 msf_buf_free(struct msf_buf
*msf
)
419 KKASSERT(msf
->ms_refcnt
> 0);
422 if (--msf
->ms_refcnt
== 0) {
423 KKASSERT((msf
->ms_flags
& MSF_ONFREEQ
) == 0);
425 if (msf
->ms_type
== MSF_TYPE_UBUF
|| msf
->ms_type
== MSF_TYPE_KBUF
)
426 xio_release(msf
->ms_xio
);
428 msf
->ms_type
= MSF_TYPE_UNKNOWN
;
429 msf
->ms_flags
|= MSF_ONFREEQ
;
430 msf
->ms_xio
= &msf
->ms_internal_xio
;
431 TAILQ_INSERT_TAIL(&msf_buf_freelist
, msf
, free_list
);
433 if (msf_buf_alloc_want
> 0)
434 wakeup_one(&msf_buf_freelist
);