kern - Convert aio from zalloc to objcache
[dragonfly.git] / sys / kern / kern_dsched.c
blob8a032ee759197d0e1d402043f0e72451859a5304
1 /*
2 * Copyright (c) 2009, 2010 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Alex Hornung <ahornung@gmail.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/kernel.h>
37 #include <sys/proc.h>
38 #include <sys/sysctl.h>
39 #include <sys/buf.h>
40 #include <sys/conf.h>
41 #include <sys/diskslice.h>
42 #include <sys/disk.h>
43 #include <sys/malloc.h>
44 #include <machine/md_var.h>
45 #include <sys/ctype.h>
46 #include <sys/syslog.h>
47 #include <sys/device.h>
48 #include <sys/msgport.h>
49 #include <sys/msgport2.h>
50 #include <sys/buf2.h>
51 #include <sys/dsched.h>
52 #include <sys/fcntl.h>
53 #include <machine/varargs.h>
55 MALLOC_DEFINE(M_DSCHED, "dsched", "dsched allocs");
57 static dsched_prepare_t noop_prepare;
58 static dsched_teardown_t noop_teardown;
59 static dsched_cancel_t noop_cancel;
60 static dsched_queue_t noop_queue;
62 static void dsched_sysctl_add_disk(struct dsched_disk_ctx *diskctx, char *name);
64 static int dsched_inited = 0;
65 static int default_set = 0;
67 struct lock dsched_lock;
68 static int dsched_debug_enable = 0;
70 struct dsched_stats dsched_stats;
72 struct objcache_malloc_args dsched_disk_ctx_malloc_args = {
73 DSCHED_DISK_CTX_MAX_SZ, M_DSCHED };
74 struct objcache_malloc_args dsched_thread_io_malloc_args = {
75 DSCHED_THREAD_IO_MAX_SZ, M_DSCHED };
76 struct objcache_malloc_args dsched_thread_ctx_malloc_args = {
77 DSCHED_THREAD_CTX_MAX_SZ, M_DSCHED };
79 static struct objcache *dsched_diskctx_cache;
80 static struct objcache *dsched_tdctx_cache;
81 static struct objcache *dsched_tdio_cache;
83 TAILQ_HEAD(, dsched_thread_ctx) dsched_tdctx_list =
84 TAILQ_HEAD_INITIALIZER(dsched_tdctx_list);
86 struct lock dsched_tdctx_lock;
88 static struct dsched_policy_head dsched_policy_list =
89 TAILQ_HEAD_INITIALIZER(dsched_policy_list);
91 static struct dsched_policy dsched_noop_policy = {
92 .name = "noop",
94 .prepare = noop_prepare,
95 .teardown = noop_teardown,
96 .cancel_all = noop_cancel,
97 .bio_queue = noop_queue
100 static struct dsched_policy *default_policy = &dsched_noop_policy;
103 * dsched_debug() is a SYSCTL and TUNABLE controlled debug output function
104 * using kvprintf
107 dsched_debug(int level, char *fmt, ...)
109 __va_list ap;
111 __va_start(ap, fmt);
112 if (level <= dsched_debug_enable)
113 kvprintf(fmt, ap);
114 __va_end(ap);
116 return 0;
120 * Called on disk_create()
121 * tries to read which policy to use from loader.conf, if there's
122 * none specified, the default policy is used.
124 void
125 dsched_disk_create_callback(struct disk *dp, const char *head_name, int unit)
127 char tunable_key[SPECNAMELEN + 48];
128 char sched_policy[DSCHED_POLICY_NAME_LENGTH];
129 char *ptr;
130 struct dsched_policy *policy = NULL;
132 /* Also look for serno stuff? */
133 /* kprintf("dsched_disk_create_callback() for disk %s%d\n", head_name, unit); */
134 lockmgr(&dsched_lock, LK_EXCLUSIVE);
136 ksnprintf(tunable_key, sizeof(tunable_key), "dsched.policy.%s%d",
137 head_name, unit);
138 if (TUNABLE_STR_FETCH(tunable_key, sched_policy,
139 sizeof(sched_policy)) != 0) {
140 policy = dsched_find_policy(sched_policy);
143 ksnprintf(tunable_key, sizeof(tunable_key), "dsched.policy.%s",
144 head_name);
145 for (ptr = tunable_key; *ptr; ptr++) {
146 if (*ptr == '/')
147 *ptr = '-';
149 if (!policy && (TUNABLE_STR_FETCH(tunable_key, sched_policy,
150 sizeof(sched_policy)) != 0)) {
151 policy = dsched_find_policy(sched_policy);
154 ksnprintf(tunable_key, sizeof(tunable_key), "dsched.policy.default");
155 if (!policy && !default_set && (TUNABLE_STR_FETCH(tunable_key, sched_policy,
156 sizeof(sched_policy)) != 0)) {
157 policy = dsched_find_policy(sched_policy);
160 if (!policy) {
161 if (!default_set) {
162 dsched_debug(0, "No policy for %s%d specified, "
163 "or policy not found\n", head_name, unit);
165 dsched_set_policy(dp, default_policy);
166 } else {
167 dsched_set_policy(dp, policy);
170 if (strncmp(head_name, "mapper/", strlen("mapper/")) == 0)
171 ksnprintf(tunable_key, sizeof(tunable_key), "%s", head_name);
172 else
173 ksnprintf(tunable_key, sizeof(tunable_key), "%s%d", head_name, unit);
174 for (ptr = tunable_key; *ptr; ptr++) {
175 if (*ptr == '/')
176 *ptr = '-';
178 dsched_sysctl_add_disk(
179 (struct dsched_disk_ctx *)dsched_get_disk_priv(dp),
180 tunable_key);
182 lockmgr(&dsched_lock, LK_RELEASE);
186 * Called from disk_setdiskinfo (or rather _setdiskinfo). This will check if
187 * there's any policy associated with the serial number of the device.
189 void
190 dsched_disk_update_callback(struct disk *dp, struct disk_info *info)
192 char tunable_key[SPECNAMELEN + 48];
193 char sched_policy[DSCHED_POLICY_NAME_LENGTH];
194 struct dsched_policy *policy = NULL;
196 if (info->d_serialno == NULL)
197 return;
199 lockmgr(&dsched_lock, LK_EXCLUSIVE);
201 ksnprintf(tunable_key, sizeof(tunable_key), "dsched.policy.%s",
202 info->d_serialno);
204 if((TUNABLE_STR_FETCH(tunable_key, sched_policy,
205 sizeof(sched_policy)) != 0)) {
206 policy = dsched_find_policy(sched_policy);
209 if (policy) {
210 dsched_switch(dp, policy);
213 dsched_sysctl_add_disk(
214 (struct dsched_disk_ctx *)dsched_get_disk_priv(dp),
215 info->d_serialno);
217 lockmgr(&dsched_lock, LK_RELEASE);
221 * Called on disk_destroy()
222 * shuts down the scheduler core and cancels all remaining bios
224 void
225 dsched_disk_destroy_callback(struct disk *dp)
227 struct dsched_policy *old_policy;
228 struct dsched_disk_ctx *diskctx;
230 lockmgr(&dsched_lock, LK_EXCLUSIVE);
232 diskctx = dsched_get_disk_priv(dp);
234 old_policy = dp->d_sched_policy;
235 dp->d_sched_policy = &dsched_noop_policy;
236 old_policy->cancel_all(dsched_get_disk_priv(dp));
237 old_policy->teardown(dsched_get_disk_priv(dp));
239 if (diskctx->flags & DSCHED_SYSCTL_CTX_INITED)
240 sysctl_ctx_free(&diskctx->sysctl_ctx);
242 policy_destroy(dp);
243 atomic_subtract_int(&old_policy->ref_count, 1);
244 KKASSERT(old_policy->ref_count >= 0);
246 lockmgr(&dsched_lock, LK_RELEASE);
250 void
251 dsched_queue(struct disk *dp, struct bio *bio)
253 struct dsched_thread_ctx *tdctx;
254 struct dsched_thread_io *tdio;
255 struct dsched_disk_ctx *diskctx;
257 int found = 0, error = 0;
259 tdctx = dsched_get_buf_priv(bio->bio_buf);
260 if (tdctx == NULL) {
261 /* We don't handle this case, let dsched dispatch */
262 atomic_add_int(&dsched_stats.no_tdctx, 1);
263 dsched_strategy_raw(dp, bio);
264 return;
267 DSCHED_THREAD_CTX_LOCK(tdctx);
269 KKASSERT(!TAILQ_EMPTY(&tdctx->tdio_list));
270 TAILQ_FOREACH(tdio, &tdctx->tdio_list, link) {
271 if (tdio->dp == dp) {
272 dsched_thread_io_ref(tdio);
273 found = 1;
274 break;
278 DSCHED_THREAD_CTX_UNLOCK(tdctx);
279 dsched_clr_buf_priv(bio->bio_buf);
280 dsched_thread_ctx_unref(tdctx); /* acquired on new_buf */
282 KKASSERT(found == 1);
283 diskctx = dsched_get_disk_priv(dp);
284 dsched_disk_ctx_ref(diskctx);
285 error = dp->d_sched_policy->bio_queue(diskctx, tdio, bio);
287 if (error) {
288 dsched_strategy_raw(dp, bio);
290 dsched_disk_ctx_unref(diskctx);
291 dsched_thread_io_unref(tdio);
296 * Called from each module_init or module_attach of each policy
297 * registers the policy in the local policy list.
300 dsched_register(struct dsched_policy *d_policy)
302 struct dsched_policy *policy;
303 int error = 0;
305 lockmgr(&dsched_lock, LK_EXCLUSIVE);
307 policy = dsched_find_policy(d_policy->name);
309 if (!policy) {
310 TAILQ_INSERT_TAIL(&dsched_policy_list, d_policy, link);
311 atomic_add_int(&d_policy->ref_count, 1);
312 } else {
313 dsched_debug(LOG_ERR, "Policy with name %s already registered!\n",
314 d_policy->name);
315 error = EEXIST;
318 lockmgr(&dsched_lock, LK_RELEASE);
319 return error;
323 * Called from each module_detach of each policy
324 * unregisters the policy
327 dsched_unregister(struct dsched_policy *d_policy)
329 struct dsched_policy *policy;
331 lockmgr(&dsched_lock, LK_EXCLUSIVE);
332 policy = dsched_find_policy(d_policy->name);
334 if (policy) {
335 if (policy->ref_count > 1) {
336 lockmgr(&dsched_lock, LK_RELEASE);
337 return EBUSY;
339 TAILQ_REMOVE(&dsched_policy_list, policy, link);
340 atomic_subtract_int(&policy->ref_count, 1);
341 KKASSERT(policy->ref_count == 0);
343 lockmgr(&dsched_lock, LK_RELEASE);
344 return 0;
349 * switches the policy by first removing the old one and then
350 * enabling the new one.
353 dsched_switch(struct disk *dp, struct dsched_policy *new_policy)
355 struct dsched_policy *old_policy;
357 /* If we are asked to set the same policy, do nothing */
358 if (dp->d_sched_policy == new_policy)
359 return 0;
361 /* lock everything down, diskwise */
362 lockmgr(&dsched_lock, LK_EXCLUSIVE);
363 old_policy = dp->d_sched_policy;
365 atomic_subtract_int(&old_policy->ref_count, 1);
366 KKASSERT(old_policy->ref_count >= 0);
368 dp->d_sched_policy = &dsched_noop_policy;
369 old_policy->teardown(dsched_get_disk_priv(dp));
370 policy_destroy(dp);
372 /* Bring everything back to life */
373 dsched_set_policy(dp, new_policy);
374 lockmgr(&dsched_lock, LK_RELEASE);
375 return 0;
380 * Loads a given policy and attaches it to the specified disk.
381 * Also initializes the core for the policy
383 void
384 dsched_set_policy(struct disk *dp, struct dsched_policy *new_policy)
386 int locked = 0;
388 /* Check if it is locked already. if not, we acquire the devfs lock */
389 if (!(lockstatus(&dsched_lock, curthread)) == LK_EXCLUSIVE) {
390 lockmgr(&dsched_lock, LK_EXCLUSIVE);
391 locked = 1;
394 policy_new(dp, new_policy);
395 new_policy->prepare(dsched_get_disk_priv(dp));
396 dp->d_sched_policy = new_policy;
397 atomic_add_int(&new_policy->ref_count, 1);
398 kprintf("disk scheduler: set policy of %s to %s\n", dp->d_cdev->si_name,
399 new_policy->name);
401 /* If we acquired the lock, we also get rid of it */
402 if (locked)
403 lockmgr(&dsched_lock, LK_RELEASE);
406 struct dsched_policy*
407 dsched_find_policy(char *search)
409 struct dsched_policy *policy;
410 struct dsched_policy *policy_found = NULL;
411 int locked = 0;
413 /* Check if it is locked already. if not, we acquire the devfs lock */
414 if (!(lockstatus(&dsched_lock, curthread)) == LK_EXCLUSIVE) {
415 lockmgr(&dsched_lock, LK_EXCLUSIVE);
416 locked = 1;
419 TAILQ_FOREACH(policy, &dsched_policy_list, link) {
420 if (!strcmp(policy->name, search)) {
421 policy_found = policy;
422 break;
426 /* If we acquired the lock, we also get rid of it */
427 if (locked)
428 lockmgr(&dsched_lock, LK_RELEASE);
430 return policy_found;
433 struct disk*
434 dsched_find_disk(char *search)
436 struct disk *dp_found = NULL;
437 struct disk *dp = NULL;
439 while((dp = disk_enumerate(dp))) {
440 if (!strcmp(dp->d_cdev->si_name, search)) {
441 dp_found = dp;
442 break;
446 return dp_found;
449 struct disk*
450 dsched_disk_enumerate(struct disk *dp, struct dsched_policy *policy)
452 while ((dp = disk_enumerate(dp))) {
453 if (dp->d_sched_policy == policy)
454 return dp;
457 return NULL;
460 struct dsched_policy *
461 dsched_policy_enumerate(struct dsched_policy *pol)
463 if (!pol)
464 return (TAILQ_FIRST(&dsched_policy_list));
465 else
466 return (TAILQ_NEXT(pol, link));
469 void
470 dsched_cancel_bio(struct bio *bp)
472 bp->bio_buf->b_error = ENXIO;
473 bp->bio_buf->b_flags |= B_ERROR;
474 bp->bio_buf->b_resid = bp->bio_buf->b_bcount;
476 biodone(bp);
479 void
480 dsched_strategy_raw(struct disk *dp, struct bio *bp)
483 * Ideally, this stuff shouldn't be needed... but just in case, we leave it in
484 * to avoid panics
486 KASSERT(dp->d_rawdev != NULL, ("dsched_strategy_raw sees NULL d_rawdev!!"));
487 if(bp->bio_track != NULL) {
488 dsched_debug(LOG_INFO,
489 "dsched_strategy_raw sees non-NULL bio_track!! "
490 "bio: %p\n", bp);
491 bp->bio_track = NULL;
493 dev_dstrategy(dp->d_rawdev, bp);
496 void
497 dsched_strategy_sync(struct disk *dp, struct bio *bio)
499 struct buf *bp, *nbp;
500 struct bio *nbio;
502 bp = bio->bio_buf;
504 nbp = getpbuf(NULL);
505 nbio = &nbp->b_bio1;
507 nbp->b_cmd = bp->b_cmd;
508 nbp->b_bufsize = bp->b_bufsize;
509 nbp->b_runningbufspace = bp->b_runningbufspace;
510 nbp->b_bcount = bp->b_bcount;
511 nbp->b_resid = bp->b_resid;
512 nbp->b_data = bp->b_data;
513 #if 0
515 * Buffers undergoing device I/O do not need a kvabase/size.
517 nbp->b_kvabase = bp->b_kvabase;
518 nbp->b_kvasize = bp->b_kvasize;
519 #endif
520 nbp->b_dirtyend = bp->b_dirtyend;
522 nbio->bio_done = biodone_sync;
523 nbio->bio_flags |= BIO_SYNC;
524 nbio->bio_track = NULL;
526 nbio->bio_caller_info1.ptr = dp;
527 nbio->bio_offset = bio->bio_offset;
529 dev_dstrategy(dp->d_rawdev, nbio);
530 biowait(nbio, "dschedsync");
531 bp->b_resid = nbp->b_resid;
532 bp->b_error = nbp->b_error;
533 biodone(bio);
534 #if 0
535 nbp->b_kvabase = NULL;
536 nbp->b_kvasize = 0;
537 #endif
538 relpbuf(nbp, NULL);
541 void
542 dsched_strategy_async(struct disk *dp, struct bio *bio, biodone_t *done, void *priv)
544 struct bio *nbio;
546 nbio = push_bio(bio);
547 nbio->bio_done = done;
548 nbio->bio_offset = bio->bio_offset;
550 dsched_set_bio_dp(nbio, dp);
551 dsched_set_bio_priv(nbio, priv);
553 getmicrotime(&nbio->bio_caller_info3.tv);
554 dev_dstrategy(dp->d_rawdev, nbio);
557 void
558 dsched_disk_ctx_ref(struct dsched_disk_ctx *diskctx)
560 int refcount;
562 refcount = atomic_fetchadd_int(&diskctx->refcount, 1);
564 KKASSERT(refcount >= 0);
567 void
568 dsched_thread_io_ref(struct dsched_thread_io *tdio)
570 int refcount;
572 refcount = atomic_fetchadd_int(&tdio->refcount, 1);
574 KKASSERT(refcount >= 0);
577 void
578 dsched_thread_ctx_ref(struct dsched_thread_ctx *tdctx)
580 int refcount;
582 refcount = atomic_fetchadd_int(&tdctx->refcount, 1);
584 KKASSERT(refcount >= 0);
587 void
588 dsched_disk_ctx_unref(struct dsched_disk_ctx *diskctx)
590 struct dsched_thread_io *tdio, *tdio2;
591 int refcount;
593 refcount = atomic_fetchadd_int(&diskctx->refcount, -1);
596 KKASSERT(refcount >= 0 || refcount <= -0x400);
598 if (refcount == 1) {
599 atomic_subtract_int(&diskctx->refcount, 0x400); /* mark as: in destruction */
600 #if 0
601 kprintf("diskctx (%p) destruction started, trace:\n", diskctx);
602 print_backtrace(4);
603 #endif
604 lockmgr(&diskctx->lock, LK_EXCLUSIVE);
605 TAILQ_FOREACH_MUTABLE(tdio, &diskctx->tdio_list, dlink, tdio2) {
606 TAILQ_REMOVE(&diskctx->tdio_list, tdio, dlink);
607 tdio->flags &= ~DSCHED_LINKED_DISK_CTX;
608 dsched_thread_io_unref(tdio);
610 lockmgr(&diskctx->lock, LK_RELEASE);
611 if (diskctx->dp->d_sched_policy->destroy_diskctx)
612 diskctx->dp->d_sched_policy->destroy_diskctx(diskctx);
613 objcache_put(dsched_diskctx_cache, diskctx);
614 atomic_subtract_int(&dsched_stats.diskctx_allocations, 1);
618 void
619 dsched_thread_io_unref(struct dsched_thread_io *tdio)
621 struct dsched_thread_ctx *tdctx;
622 struct dsched_disk_ctx *diskctx;
623 int refcount;
625 refcount = atomic_fetchadd_int(&tdio->refcount, -1);
627 KKASSERT(refcount >= 0 || refcount <= -0x400);
629 if (refcount == 1) {
630 atomic_subtract_int(&tdio->refcount, 0x400); /* mark as: in destruction */
631 #if 0
632 kprintf("tdio (%p) destruction started, trace:\n", tdio);
633 print_backtrace(8);
634 #endif
635 diskctx = tdio->diskctx;
636 KKASSERT(diskctx != NULL);
637 KKASSERT(tdio->qlength == 0);
639 if (tdio->flags & DSCHED_LINKED_DISK_CTX) {
640 lockmgr(&diskctx->lock, LK_EXCLUSIVE);
642 TAILQ_REMOVE(&diskctx->tdio_list, tdio, dlink);
643 tdio->flags &= ~DSCHED_LINKED_DISK_CTX;
645 lockmgr(&diskctx->lock, LK_RELEASE);
648 if (tdio->flags & DSCHED_LINKED_THREAD_CTX) {
649 tdctx = tdio->tdctx;
650 KKASSERT(tdctx != NULL);
652 lockmgr(&tdctx->lock, LK_EXCLUSIVE);
654 TAILQ_REMOVE(&tdctx->tdio_list, tdio, link);
655 tdio->flags &= ~DSCHED_LINKED_THREAD_CTX;
657 lockmgr(&tdctx->lock, LK_RELEASE);
659 if (tdio->diskctx->dp->d_sched_policy->destroy_tdio)
660 tdio->diskctx->dp->d_sched_policy->destroy_tdio(tdio);
661 objcache_put(dsched_tdio_cache, tdio);
662 atomic_subtract_int(&dsched_stats.tdio_allocations, 1);
663 #if 0
664 dsched_disk_ctx_unref(diskctx);
665 #endif
669 void
670 dsched_thread_ctx_unref(struct dsched_thread_ctx *tdctx)
672 struct dsched_thread_io *tdio, *tdio2;
673 int refcount;
675 refcount = atomic_fetchadd_int(&tdctx->refcount, -1);
677 KKASSERT(refcount >= 0 || refcount <= -0x400);
679 if (refcount == 1) {
680 atomic_subtract_int(&tdctx->refcount, 0x400); /* mark as: in destruction */
681 #if 0
682 kprintf("tdctx (%p) destruction started, trace:\n", tdctx);
683 print_backtrace(8);
684 #endif
685 DSCHED_GLOBAL_THREAD_CTX_LOCK();
687 TAILQ_FOREACH_MUTABLE(tdio, &tdctx->tdio_list, link, tdio2) {
688 TAILQ_REMOVE(&tdctx->tdio_list, tdio, link);
689 tdio->flags &= ~DSCHED_LINKED_THREAD_CTX;
690 dsched_thread_io_unref(tdio);
692 TAILQ_REMOVE(&dsched_tdctx_list, tdctx, link);
694 DSCHED_GLOBAL_THREAD_CTX_UNLOCK();
696 objcache_put(dsched_tdctx_cache, tdctx);
697 atomic_subtract_int(&dsched_stats.tdctx_allocations, 1);
702 struct dsched_thread_io *
703 dsched_thread_io_alloc(struct disk *dp, struct dsched_thread_ctx *tdctx,
704 struct dsched_policy *pol)
706 struct dsched_thread_io *tdio;
707 #if 0
708 dsched_disk_ctx_ref(dsched_get_disk_priv(dp));
709 #endif
710 tdio = objcache_get(dsched_tdio_cache, M_WAITOK);
711 bzero(tdio, DSCHED_THREAD_IO_MAX_SZ);
713 /* XXX: maybe we do need another ref for the disk list for tdio */
714 dsched_thread_io_ref(tdio);
716 DSCHED_THREAD_IO_LOCKINIT(tdio);
717 tdio->dp = dp;
719 tdio->diskctx = dsched_get_disk_priv(dp);
720 TAILQ_INIT(&tdio->queue);
722 if (pol->new_tdio)
723 pol->new_tdio(tdio);
725 TAILQ_INSERT_TAIL(&tdio->diskctx->tdio_list, tdio, dlink);
726 tdio->flags |= DSCHED_LINKED_DISK_CTX;
728 if (tdctx) {
729 tdio->tdctx = tdctx;
730 tdio->p = tdctx->p;
732 /* Put the tdio in the tdctx list */
733 DSCHED_THREAD_CTX_LOCK(tdctx);
734 TAILQ_INSERT_TAIL(&tdctx->tdio_list, tdio, link);
735 DSCHED_THREAD_CTX_UNLOCK(tdctx);
736 tdio->flags |= DSCHED_LINKED_THREAD_CTX;
739 atomic_add_int(&dsched_stats.tdio_allocations, 1);
740 return tdio;
744 struct dsched_disk_ctx *
745 dsched_disk_ctx_alloc(struct disk *dp, struct dsched_policy *pol)
747 struct dsched_disk_ctx *diskctx;
749 diskctx = objcache_get(dsched_diskctx_cache, M_WAITOK);
750 bzero(diskctx, DSCHED_DISK_CTX_MAX_SZ);
751 dsched_disk_ctx_ref(diskctx);
752 diskctx->dp = dp;
753 DSCHED_DISK_CTX_LOCKINIT(diskctx);
754 TAILQ_INIT(&diskctx->tdio_list);
756 atomic_add_int(&dsched_stats.diskctx_allocations, 1);
757 if (pol->new_diskctx)
758 pol->new_diskctx(diskctx);
759 return diskctx;
763 struct dsched_thread_ctx *
764 dsched_thread_ctx_alloc(struct proc *p)
766 struct dsched_thread_ctx *tdctx;
767 struct dsched_thread_io *tdio;
768 struct disk *dp = NULL;
770 tdctx = objcache_get(dsched_tdctx_cache, M_WAITOK);
771 bzero(tdctx, DSCHED_THREAD_CTX_MAX_SZ);
772 dsched_thread_ctx_ref(tdctx);
773 #if 0
774 kprintf("dsched_thread_ctx_alloc, new tdctx = %p\n", tdctx);
775 #endif
776 DSCHED_THREAD_CTX_LOCKINIT(tdctx);
777 TAILQ_INIT(&tdctx->tdio_list);
778 tdctx->p = p;
780 /* XXX */
781 while ((dp = disk_enumerate(dp))) {
782 tdio = dsched_thread_io_alloc(dp, tdctx, dp->d_sched_policy);
785 DSCHED_GLOBAL_THREAD_CTX_LOCK();
786 TAILQ_INSERT_TAIL(&dsched_tdctx_list, tdctx, link);
787 DSCHED_GLOBAL_THREAD_CTX_UNLOCK();
789 atomic_add_int(&dsched_stats.tdctx_allocations, 1);
790 /* XXX: no callback here */
791 return tdctx;
794 void
795 policy_new(struct disk *dp, struct dsched_policy *pol) {
796 struct dsched_thread_ctx *tdctx;
797 struct dsched_disk_ctx *diskctx;
798 struct dsched_thread_io *tdio;
800 diskctx = dsched_disk_ctx_alloc(dp, pol);
801 dsched_disk_ctx_ref(diskctx);
802 dsched_set_disk_priv(dp, diskctx);
804 DSCHED_GLOBAL_THREAD_CTX_LOCK();
805 TAILQ_FOREACH(tdctx, &dsched_tdctx_list, link) {
806 tdio = dsched_thread_io_alloc(dp, tdctx, pol);
808 DSCHED_GLOBAL_THREAD_CTX_UNLOCK();
812 void
813 policy_destroy(struct disk *dp) {
814 struct dsched_disk_ctx *diskctx;
816 diskctx = dsched_get_disk_priv(dp);
817 KKASSERT(diskctx != NULL);
819 dsched_disk_ctx_unref(diskctx); /* from prepare */
820 dsched_disk_ctx_unref(diskctx); /* from alloc */
822 dsched_set_disk_priv(dp, NULL);
825 void
826 dsched_new_buf(struct buf *bp)
828 struct dsched_thread_ctx *tdctx = NULL;
830 if (dsched_inited == 0)
831 return;
833 if (curproc != NULL) {
834 tdctx = dsched_get_proc_priv(curproc);
835 } else {
836 /* This is a kernel thread, so no proc info is available */
837 tdctx = dsched_get_thread_priv(curthread);
840 #if 0
842 * XXX: hack. we don't want this assert because we aren't catching all
843 * threads. mi_startup() is still getting away without an tdctx.
846 /* by now we should have an tdctx. if not, something bad is going on */
847 KKASSERT(tdctx != NULL);
848 #endif
850 if (tdctx) {
851 dsched_thread_ctx_ref(tdctx);
853 dsched_set_buf_priv(bp, tdctx);
856 void
857 dsched_exit_buf(struct buf *bp)
859 struct dsched_thread_ctx *tdctx;
861 tdctx = dsched_get_buf_priv(bp);
862 if (tdctx != NULL) {
863 dsched_clr_buf_priv(bp);
864 dsched_thread_ctx_unref(tdctx);
868 void
869 dsched_new_proc(struct proc *p)
871 struct dsched_thread_ctx *tdctx;
873 if (dsched_inited == 0)
874 return;
876 KKASSERT(p != NULL);
878 tdctx = dsched_thread_ctx_alloc(p);
879 tdctx->p = p;
880 dsched_thread_ctx_ref(tdctx);
882 dsched_set_proc_priv(p, tdctx);
883 atomic_add_int(&dsched_stats.nprocs, 1);
887 void
888 dsched_new_thread(struct thread *td)
890 struct dsched_thread_ctx *tdctx;
892 if (dsched_inited == 0)
893 return;
895 KKASSERT(td != NULL);
897 tdctx = dsched_thread_ctx_alloc(NULL);
898 tdctx->td = td;
899 dsched_thread_ctx_ref(tdctx);
901 dsched_set_thread_priv(td, tdctx);
902 atomic_add_int(&dsched_stats.nthreads, 1);
905 void
906 dsched_exit_proc(struct proc *p)
908 struct dsched_thread_ctx *tdctx;
910 if (dsched_inited == 0)
911 return;
913 KKASSERT(p != NULL);
915 tdctx = dsched_get_proc_priv(p);
916 KKASSERT(tdctx != NULL);
918 tdctx->dead = 0xDEAD;
919 dsched_set_proc_priv(p, NULL);
921 dsched_thread_ctx_unref(tdctx); /* one for alloc, */
922 dsched_thread_ctx_unref(tdctx); /* one for ref */
923 atomic_subtract_int(&dsched_stats.nprocs, 1);
927 void
928 dsched_exit_thread(struct thread *td)
930 struct dsched_thread_ctx *tdctx;
932 if (dsched_inited == 0)
933 return;
935 KKASSERT(td != NULL);
937 tdctx = dsched_get_thread_priv(td);
938 KKASSERT(tdctx != NULL);
940 tdctx->dead = 0xDEAD;
941 dsched_set_thread_priv(td, 0);
943 dsched_thread_ctx_unref(tdctx); /* one for alloc, */
944 dsched_thread_ctx_unref(tdctx); /* one for ref */
945 atomic_subtract_int(&dsched_stats.nthreads, 1);
948 struct dsched_thread_io *
949 dsched_new_policy_thread_tdio(struct dsched_disk_ctx *diskctx,
950 struct dsched_policy *pol) {
951 struct dsched_thread_ctx *tdctx;
952 struct dsched_thread_io *tdio;
954 tdctx = dsched_get_thread_priv(curthread);
955 KKASSERT(tdctx != NULL);
957 tdio = dsched_thread_io_alloc(diskctx->dp, tdctx, pol);
958 return tdio;
961 /* DEFAULT NOOP POLICY */
963 static int
964 noop_prepare(struct dsched_disk_ctx *diskctx)
966 return 0;
969 static void
970 noop_teardown(struct dsched_disk_ctx *diskctx)
975 static void
976 noop_cancel(struct dsched_disk_ctx *diskctx)
981 static int
982 noop_queue(struct dsched_disk_ctx *diskctx, struct dsched_thread_io *tdio,
983 struct bio *bio)
985 dsched_strategy_raw(diskctx->dp, bio);
986 #if 0
987 dsched_strategy_async(diskctx->dp, bio, noop_completed, NULL);
988 #endif
989 return 0;
993 * SYSINIT stuff
995 static void
996 dsched_init(void)
998 dsched_tdio_cache = objcache_create("dsched-tdio-cache", 0, 0,
999 NULL, NULL, NULL,
1000 objcache_malloc_alloc,
1001 objcache_malloc_free,
1002 &dsched_thread_io_malloc_args );
1004 dsched_tdctx_cache = objcache_create("dsched-tdctx-cache", 0, 0,
1005 NULL, NULL, NULL,
1006 objcache_malloc_alloc,
1007 objcache_malloc_free,
1008 &dsched_thread_ctx_malloc_args );
1010 dsched_diskctx_cache = objcache_create("dsched-diskctx-cache", 0, 0,
1011 NULL, NULL, NULL,
1012 objcache_malloc_alloc,
1013 objcache_malloc_free,
1014 &dsched_disk_ctx_malloc_args );
1016 bzero(&dsched_stats, sizeof(struct dsched_stats));
1018 lockinit(&dsched_lock, "dsched lock", 0, LK_CANRECURSE);
1019 DSCHED_GLOBAL_THREAD_CTX_LOCKINIT();
1021 dsched_register(&dsched_noop_policy);
1023 dsched_inited = 1;
1026 static void
1027 dsched_uninit(void)
1031 SYSINIT(subr_dsched_register, SI_SUB_CREATE_INIT-1, SI_ORDER_FIRST, dsched_init, NULL);
1032 SYSUNINIT(subr_dsched_register, SI_SUB_CREATE_INIT-1, SI_ORDER_ANY, dsched_uninit, NULL);
1035 * SYSCTL stuff
1037 static int
1038 sysctl_dsched_stats(SYSCTL_HANDLER_ARGS)
1040 return (sysctl_handle_opaque(oidp, &dsched_stats, sizeof(struct dsched_stats), req));
1043 static int
1044 sysctl_dsched_list_policies(SYSCTL_HANDLER_ARGS)
1046 struct dsched_policy *pol = NULL;
1047 int error, first = 1;
1049 lockmgr(&dsched_lock, LK_EXCLUSIVE);
1051 while ((pol = dsched_policy_enumerate(pol))) {
1052 if (!first) {
1053 error = SYSCTL_OUT(req, " ", 1);
1054 if (error)
1055 break;
1056 } else {
1057 first = 0;
1059 error = SYSCTL_OUT(req, pol->name, strlen(pol->name));
1060 if (error)
1061 break;
1065 lockmgr(&dsched_lock, LK_RELEASE);
1067 error = SYSCTL_OUT(req, "", 1);
1069 return error;
1072 static int
1073 sysctl_dsched_policy(SYSCTL_HANDLER_ARGS)
1075 char buf[DSCHED_POLICY_NAME_LENGTH];
1076 struct dsched_disk_ctx *diskctx = arg1;
1077 struct dsched_policy *pol = NULL;
1078 int error;
1080 if (diskctx == NULL) {
1081 return 0;
1084 lockmgr(&dsched_lock, LK_EXCLUSIVE);
1086 pol = diskctx->dp->d_sched_policy;
1087 memcpy(buf, pol->name, DSCHED_POLICY_NAME_LENGTH);
1089 error = sysctl_handle_string(oidp, buf, DSCHED_POLICY_NAME_LENGTH, req);
1090 if (error || req->newptr == NULL) {
1091 lockmgr(&dsched_lock, LK_RELEASE);
1092 return (error);
1095 pol = dsched_find_policy(buf);
1096 if (pol == NULL) {
1097 lockmgr(&dsched_lock, LK_RELEASE);
1098 return 0;
1101 dsched_switch(diskctx->dp, pol);
1103 lockmgr(&dsched_lock, LK_RELEASE);
1105 return error;
1108 static int
1109 sysctl_dsched_default_policy(SYSCTL_HANDLER_ARGS)
1111 char buf[DSCHED_POLICY_NAME_LENGTH];
1112 struct dsched_policy *pol = NULL;
1113 int error;
1115 lockmgr(&dsched_lock, LK_EXCLUSIVE);
1117 pol = default_policy;
1118 memcpy(buf, pol->name, DSCHED_POLICY_NAME_LENGTH);
1120 error = sysctl_handle_string(oidp, buf, DSCHED_POLICY_NAME_LENGTH, req);
1121 if (error || req->newptr == NULL) {
1122 lockmgr(&dsched_lock, LK_RELEASE);
1123 return (error);
1126 pol = dsched_find_policy(buf);
1127 if (pol == NULL) {
1128 lockmgr(&dsched_lock, LK_RELEASE);
1129 return 0;
1132 default_set = 1;
1133 default_policy = pol;
1135 lockmgr(&dsched_lock, LK_RELEASE);
1137 return error;
1140 SYSCTL_NODE(, OID_AUTO, dsched, CTLFLAG_RD, NULL,
1141 "Disk Scheduler Framework (dsched) magic");
1142 SYSCTL_NODE(_dsched, OID_AUTO, policy, CTLFLAG_RW, NULL,
1143 "List of disks and their policies");
1144 SYSCTL_INT(_dsched, OID_AUTO, debug, CTLFLAG_RW, &dsched_debug_enable,
1145 0, "Enable dsched debugging");
1146 SYSCTL_PROC(_dsched, OID_AUTO, stats, CTLTYPE_OPAQUE|CTLFLAG_RD,
1147 0, sizeof(struct dsched_stats), sysctl_dsched_stats, "dsched_stats",
1148 "dsched statistics");
1149 SYSCTL_PROC(_dsched, OID_AUTO, policies, CTLTYPE_STRING|CTLFLAG_RD,
1150 NULL, 0, sysctl_dsched_list_policies, "A", "names of available policies");
1151 SYSCTL_PROC(_dsched_policy, OID_AUTO, default, CTLTYPE_STRING|CTLFLAG_RW,
1152 NULL, 0, sysctl_dsched_default_policy, "A", "default dsched policy");
1154 static void
1155 dsched_sysctl_add_disk(struct dsched_disk_ctx *diskctx, char *name)
1157 if (!(diskctx->flags & DSCHED_SYSCTL_CTX_INITED)) {
1158 diskctx->flags |= DSCHED_SYSCTL_CTX_INITED;
1159 sysctl_ctx_init(&diskctx->sysctl_ctx);
1162 SYSCTL_ADD_PROC(&diskctx->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_dsched_policy),
1163 OID_AUTO, name, CTLTYPE_STRING|CTLFLAG_RW,
1164 diskctx, 0, sysctl_dsched_policy, "A", "policy");