Add tunable for each_burst.
[dragonfly.git] / sys / kern / kern_msfbuf.c
blob8a73e58c1f815010e6a8e4f2b587c5c5b17825c9
1 /*
2 * Copyright (c) 2004, 2005 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Hiten Pandya <hmp@backplane.com> and Matthew Dillon
6 * <dillon@backplane.com>.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
17 * distribution.
18 * 3. Neither the name of The DragonFly Project nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific, prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
35 * The MSF_BUF API was augmented from the SFBUF API:
36 * Copyright (c) 1998 David Greenman. All rights reserved.
37 * src/sys/kern/kern_sfbuf.c,v 1.7 2004/05/13 19:46:18 dillon
39 * $DragonFly: src/sys/kern/kern_msfbuf.c,v 1.21 2007/06/29 21:54:08 dillon Exp $
42 * MSFBUFs cache linear multi-page ephermal mappings and operate similar
43 * to SFBUFs. MSFBUFs use XIO's internally to hold the page list and can
44 * be considered to be a KVA wrapper around an XIO.
46 * Like the SFBUF subsystem, the locking and validation of the page array
47 * is the responsibility of the caller. Also like the SFBUF subsystem,
48 * MSFBUFs are SMP-friendly, cache the mappings, and will avoid unnecessary
49 * page invalidations when possible.
51 * MSFBUFs are primarily designed to be used in subsystems that manipulate
52 * XIOs. The DEV and BUF subsystems are a good example.
54 * TODO LIST:
55 * - Overload XIOs representitive of smaller chunks of memory onto the
56 * same KVA space to efficiently cache smaller mappings (filesystem
57 * blocks / buffer cache related).
60 #include <sys/param.h>
61 #include <sys/systm.h>
62 #include <sys/globaldata.h>
63 #include <sys/kernel.h>
64 #include <sys/malloc.h>
65 #include <sys/queue.h>
66 #include <sys/proc.h>
67 #include <sys/sfbuf.h>
68 #include <sys/sysctl.h>
69 #include <sys/thread.h>
70 #include <sys/xio.h>
71 #include <sys/msfbuf.h>
72 #include <sys/uio.h>
73 #include <sys/lock.h>
75 #include <vm/vm.h>
76 #include <vm/vm_param.h>
77 #include <vm/vm_extern.h>
78 #include <vm/vm_kern.h>
79 #include <vm/vm_page.h>
80 #include <vm/vm_map.h>
81 #include <vm/pmap.h>
83 #include <sys/thread2.h>
84 #include <vm/vm_page2.h>
86 MALLOC_DEFINE(M_MSFBUF, "MSFBUF", "direct-copy buffers");
88 /* lists and queues associated with msf_bufs */
89 LIST_HEAD(msf_buf_list, msf_buf);
91 TAILQ_HEAD(, msf_buf) msf_buf_freelist;
93 /* indicate shortage of available msf_bufs */
94 static u_int msf_buf_alloc_want;
96 /* base of the msf_buf map */
97 static vm_offset_t msf_base;
98 static struct msf_buf *msf_bufs;
99 static int msf_buf_hits;
100 static int msf_buf_misses;
102 static int msf_buf_count = 256; /* magic value */
103 SYSCTL_INT(_kern_ipc, OID_AUTO, msf_bufs, CTLFLAG_RD, &msf_buf_count,
104 0, "number of direct-copy buffers available");
105 SYSCTL_INT(_kern_ipc, OID_AUTO, msf_hits, CTLFLAG_RD, &msf_buf_hits,
106 0, "direct-copy buffers cache hits");
107 SYSCTL_INT(_kern_ipc, OID_AUTO, msf_misses, CTLFLAG_RD, &msf_buf_misses,
108 0, "direct-copy buffers cache misses");
110 static void
111 msf_buf_init(void *__dummy)
113 struct msf_buf *msf;
114 int i;
116 msf_buf_alloc_want = 0;
117 TUNABLE_INT_FETCH("kern.ipc.msfbufs", &msf_buf_count);
119 TAILQ_INIT(&msf_buf_freelist);
121 msf_base = kmem_alloc_nofault(&kernel_map,
122 msf_buf_count * XIO_INTERNAL_SIZE);
124 msf_bufs = kmalloc(msf_buf_count * sizeof(struct msf_buf), M_MSFBUF,
125 M_WAITOK|M_ZERO);
127 /* Initialize the free list with necessary information. */
128 for (i = 0; i < msf_buf_count; i++) {
129 msf = &msf_bufs[i];
130 msf->ms_kva = msf_base + i * XIO_INTERNAL_SIZE;
131 msf->ms_flags = MSF_ONFREEQ;
132 msf->ms_type = MSF_TYPE_UNKNOWN;
133 msf->ms_xio = &msf->ms_internal_xio;
134 xio_init(&msf->ms_internal_xio);
135 TAILQ_INSERT_TAIL(&msf_buf_freelist, &msf_bufs[i], free_list);
138 SYSINIT(msf_buf, SI_BOOT2_MACHDEP, SI_ORDER_ANY, msf_buf_init, NULL);
141 * Get an msf_buf from the freelist; if none are available
142 * than it will block.
144 * If SFB_CATCH was specified in 'flags' than the sleep is
145 * block is interruptable by signals etc; this flag is normally
146 * use for system calls.
149 static struct msf_buf *
150 msf_alloc(vm_page_t firstpage, int flags)
152 struct msf_buf *msf;
153 int pflags;
154 int error;
156 crit_enter();
157 if (firstpage && (msf = firstpage->msf_hint) != NULL &&
158 (msf->ms_flags & MSF_ONFREEQ)
160 KKASSERT(msf->ms_refcnt == 0);
161 msf->ms_flags &= ~MSF_ONFREEQ;
162 msf->ms_refcnt = 1;
163 TAILQ_REMOVE(&msf_buf_freelist, msf, free_list);
164 --msf_buf_count;
165 ++msf_buf_hits;
166 } else {
168 * Get a buffer off the freelist. If the freelist is empty, we
169 * block until something becomes available; this happens quite
170 * quickly anyway because MSFBUFs are supposed to be temporary
171 * mappings.
173 * If the SFB_CATCH flag was provided, then we allow the sleep
174 * to be interruptible.
176 for (;;) {
177 if ((msf = TAILQ_FIRST(&msf_buf_freelist)) != NULL) {
178 KKASSERT(msf->ms_refcnt == 0);
179 --msf_buf_count;
180 TAILQ_REMOVE(&msf_buf_freelist, msf, free_list);
181 msf->ms_flags &= ~MSF_ONFREEQ;
182 msf->ms_refcnt = 1;
183 if (firstpage)
184 firstpage->msf_hint = msf;
185 break;
187 pflags = (flags & SFB_CATCH) ? PCATCH : 0;
188 ++msf_buf_alloc_want;
189 error = tsleep(&msf_buf_freelist, pflags, "msfbuf", 0);
190 --msf_buf_alloc_want;
191 if (error)
192 break;
194 ++msf_buf_misses;
196 crit_exit();
197 return (msf);
200 static
201 void
202 msf_map_msf(struct msf_buf *msf, int flags)
204 #ifdef SMP
205 if (flags & SFB_CPUPRIVATE) {
206 pmap_qenter2(msf->ms_kva, msf->ms_xio->xio_pages,
207 msf->ms_xio->xio_npages, &msf->ms_cpumask);
208 } else {
209 pmap_qenter(msf->ms_kva, msf->ms_xio->xio_pages,
210 msf->ms_xio->xio_npages);
211 msf->ms_cpumask = (cpumask_t)-1;
213 #else
214 pmap_qenter2(msf->ms_kva, msf->ms_xio->xio_pages,
215 msf->ms_xio->xio_npages, &msf->ms_cpumask);
216 #endif
220 msf_map_pagelist(struct msf_buf **msfp, vm_page_t *list, int npages, int flags)
222 struct msf_buf *msf;
223 int i;
225 KKASSERT(npages != 0 && npages <= XIO_INTERNAL_PAGES);
227 if ((msf = msf_alloc(list[0], flags)) != NULL) {
228 KKASSERT(msf->ms_xio == &msf->ms_internal_xio);
229 for (i = 0; i < npages; ++i)
230 msf->ms_internal_xio.xio_pages[i] = list[i];
231 msf->ms_internal_xio.xio_offset = 0;
232 msf->ms_internal_xio.xio_npages = npages;
233 msf->ms_internal_xio.xio_bytes = npages << PAGE_SHIFT;
234 msf->ms_type = MSF_TYPE_PGLIST;
235 msf_map_msf(msf, flags);
236 *msfp = msf;
237 return (0);
238 } else {
239 *msfp = NULL;
240 return (ENOMEM);
245 msf_map_xio(struct msf_buf **msfp, struct xio *xio, int flags)
247 struct msf_buf *msf;
249 KKASSERT(xio != NULL && xio->xio_npages > 0);
250 KKASSERT(xio->xio_npages <= XIO_INTERNAL_PAGES);
252 if ((msf = msf_alloc(xio->xio_pages[0], flags)) != NULL) {
253 msf->ms_type = MSF_TYPE_XIO;
254 msf->ms_xio = xio;
255 msf_map_msf(msf, flags);
256 *msfp = msf;
257 return(0);
258 } else {
259 *msfp = NULL;
260 return(ENOMEM);
265 msf_map_ubuf(struct msf_buf **msfp, void *base, size_t nbytes, int flags)
267 struct msf_buf *msf;
268 vm_paddr_t paddr;
269 int error;
271 if (((int)(intptr_t)base & PAGE_MASK) + nbytes > XIO_INTERNAL_SIZE) {
272 *msfp = NULL;
273 return (ERANGE);
276 if ((paddr = pmap_extract(&curthread->td_lwp->lwp_vmspace->vm_pmap,
277 (vm_offset_t)base)) != 0)
278 msf = msf_alloc(PHYS_TO_VM_PAGE(paddr), flags);
279 else
280 msf = msf_alloc(NULL, flags);
282 if (msf == NULL) {
283 error = ENOENT;
284 } else {
285 error = xio_init_ubuf(&msf->ms_internal_xio, base, nbytes, 0);
286 if (error == 0) {
287 KKASSERT(msf->ms_xio == &msf->ms_internal_xio);
288 msf_map_msf(msf, flags);
289 msf->ms_type = MSF_TYPE_UBUF;
290 } else {
291 msf_buf_free(msf);
292 msf = NULL;
295 *msfp = msf;
296 return (error);
300 msf_map_kbuf(struct msf_buf **msfp, void *base, size_t nbytes, int flags)
302 struct msf_buf *msf;
303 vm_paddr_t paddr;
304 int error;
306 if (((int)(intptr_t)base & PAGE_MASK) + nbytes > XIO_INTERNAL_SIZE) {
307 *msfp = NULL;
308 return (ERANGE);
311 if ((paddr = pmap_kextract((vm_offset_t)base)) != 0)
312 msf = msf_alloc(PHYS_TO_VM_PAGE(paddr), flags);
313 else
314 msf = msf_alloc(NULL, flags);
316 if (msf == NULL) {
317 error = ENOENT;
318 } else {
319 error = xio_init_kbuf(&msf->ms_internal_xio, base, nbytes);
320 if (error == 0) {
321 KKASSERT(msf->ms_xio == &msf->ms_internal_xio);
322 msf_map_msf(msf, flags);
323 msf->ms_type = MSF_TYPE_KBUF;
324 } else {
325 msf_buf_free(msf);
326 msf = NULL;
329 *msfp = msf;
330 return (error);
334 * Iterate through the specified uio calling the function with a kernel buffer
335 * containing the data until the uio has been exhausted. If the uio
336 * represents system space no mapping occurs. If the uio represents user
337 * space the data is mapped into system space in chunks. This function does
338 * not guarentee any particular alignment or minimum chunk size, it will
339 * depend on the limitations of MSF buffers and the breakdown of the UIO's
340 * elements.
343 msf_uio_iterate(struct uio *uio,
344 int (*callback)(void *info, char *buf, int bytes), void *info)
346 struct msf_buf *msf;
347 struct iovec *iov;
348 size_t offset;
349 size_t bytes;
350 size_t pgoff;
351 int error;
352 int i;
354 switch (uio->uio_segflg) {
355 case UIO_USERSPACE:
356 error = 0;
357 for (i = 0; i < uio->uio_iovcnt && error == 0; ++i) {
358 iov = &uio->uio_iov[i];
359 offset = 0;
360 pgoff = (int)(intptr_t)iov->iov_base & PAGE_MASK;
361 while (offset < iov->iov_len) {
362 bytes = iov->iov_len - offset;
363 if (bytes + pgoff > XIO_INTERNAL_SIZE)
364 bytes = XIO_INTERNAL_SIZE - pgoff;
365 error = msf_map_ubuf(&msf, iov->iov_base + offset, bytes, 0);
366 if (error)
367 break;
368 error = callback(info, msf_buf_kva(msf), bytes);
369 msf_buf_free(msf);
370 if (error)
371 break;
372 pgoff = 0;
373 offset += bytes;
376 break;
377 case UIO_SYSSPACE:
378 error = 0;
379 for (i = 0; i < uio->uio_iovcnt; ++i) {
380 iov = &uio->uio_iov[i];
381 if (iov->iov_len == 0)
382 continue;
383 error = callback(info, iov->iov_base, iov->iov_len);
384 if (error)
385 break;
387 break;
388 default:
389 error = EOPNOTSUPP;
390 break;
392 return (error);
395 #if 0
397 * Add a reference to a buffer (currently unused)
399 void
400 msf_buf_ref(struct msf_buf *msf)
402 if (msf->ms_refcnt == 0)
403 panic("msf_buf_ref: referencing a free msf_buf");
404 crit_enter();
405 ++msf->ms_refcnt;
406 crit_exit();
408 #endif
411 * Lose a reference to an msf_buf. When none left, detach mapped page
412 * and release resources back to the system. Note that the sfbuf's
413 * removal from the freelist is delayed, so it may in fact already be
414 * on the free list. This is the optimal (and most likely) scenario.
416 void
417 msf_buf_free(struct msf_buf *msf)
419 KKASSERT(msf->ms_refcnt > 0);
421 crit_enter();
422 if (--msf->ms_refcnt == 0) {
423 KKASSERT((msf->ms_flags & MSF_ONFREEQ) == 0);
425 if (msf->ms_type == MSF_TYPE_UBUF || msf->ms_type == MSF_TYPE_KBUF)
426 xio_release(msf->ms_xio);
428 msf->ms_type = MSF_TYPE_UNKNOWN;
429 msf->ms_flags |= MSF_ONFREEQ;
430 msf->ms_xio = &msf->ms_internal_xio;
431 TAILQ_INSERT_TAIL(&msf_buf_freelist, msf, free_list);
432 ++msf_buf_count;
433 if (msf_buf_alloc_want > 0)
434 wakeup_one(&msf_buf_freelist);
436 crit_exit();