Merge branch 'vendor/OPENSSL'
[dragonfly.git] / sys / kern / kern_dsched.c
blob2bcedc155cd5cc18b5e1825d39f940bc91be4283
1 /*
2 * Copyright (c) 2009, 2010 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Alex Hornung <ahornung@gmail.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/kernel.h>
37 #include <sys/proc.h>
38 #include <sys/sysctl.h>
39 #include <sys/buf.h>
40 #include <sys/conf.h>
41 #include <sys/diskslice.h>
42 #include <sys/disk.h>
43 #include <sys/malloc.h>
44 #include <machine/md_var.h>
45 #include <sys/ctype.h>
46 #include <sys/syslog.h>
47 #include <sys/device.h>
48 #include <sys/msgport.h>
49 #include <sys/msgport2.h>
50 #include <sys/buf2.h>
51 #include <sys/dsched.h>
52 #include <sys/fcntl.h>
53 #include <machine/varargs.h>
55 TAILQ_HEAD(tdio_list_head, dsched_thread_io);
57 MALLOC_DEFINE(M_DSCHED, "dsched", "dsched allocs");
59 static dsched_prepare_t noop_prepare;
60 static dsched_teardown_t noop_teardown;
61 static dsched_cancel_t noop_cancel;
62 static dsched_queue_t noop_queue;
64 static void dsched_thread_io_unref_destroy(struct dsched_thread_io *tdio);
65 static void dsched_sysctl_add_disk(struct dsched_disk_ctx *diskctx, char *name);
66 static void dsched_disk_ctx_destroy(struct dsched_disk_ctx *diskctx);
67 static void dsched_thread_io_destroy(struct dsched_thread_io *tdio);
68 static void dsched_thread_ctx_destroy(struct dsched_thread_ctx *tdctx);
70 static int dsched_inited = 0;
71 static int default_set = 0;
73 struct lock dsched_lock;
74 static int dsched_debug_enable = 0;
76 struct dsched_stats dsched_stats;
78 struct objcache_malloc_args dsched_disk_ctx_malloc_args = {
79 DSCHED_DISK_CTX_MAX_SZ, M_DSCHED };
80 struct objcache_malloc_args dsched_thread_io_malloc_args = {
81 DSCHED_THREAD_IO_MAX_SZ, M_DSCHED };
82 struct objcache_malloc_args dsched_thread_ctx_malloc_args = {
83 DSCHED_THREAD_CTX_MAX_SZ, M_DSCHED };
85 static struct objcache *dsched_diskctx_cache;
86 static struct objcache *dsched_tdctx_cache;
87 static struct objcache *dsched_tdio_cache;
89 TAILQ_HEAD(, dsched_thread_ctx) dsched_tdctx_list =
90 TAILQ_HEAD_INITIALIZER(dsched_tdctx_list);
92 struct lock dsched_tdctx_lock;
94 static struct dsched_policy_head dsched_policy_list =
95 TAILQ_HEAD_INITIALIZER(dsched_policy_list);
97 static struct dsched_policy dsched_noop_policy = {
98 .name = "noop",
100 .prepare = noop_prepare,
101 .teardown = noop_teardown,
102 .cancel_all = noop_cancel,
103 .bio_queue = noop_queue
106 static struct dsched_policy *default_policy = &dsched_noop_policy;
109 * dsched_debug() is a SYSCTL and TUNABLE controlled debug output function
110 * using kvprintf
113 dsched_debug(int level, char *fmt, ...)
115 __va_list ap;
117 __va_start(ap, fmt);
118 if (level <= dsched_debug_enable)
119 kvprintf(fmt, ap);
120 __va_end(ap);
122 return 0;
126 * Called on disk_create()
127 * tries to read which policy to use from loader.conf, if there's
128 * none specified, the default policy is used.
130 void
131 dsched_disk_create_callback(struct disk *dp, const char *head_name, int unit)
133 char tunable_key[SPECNAMELEN + 48];
134 char sched_policy[DSCHED_POLICY_NAME_LENGTH];
135 char *ptr;
136 struct dsched_policy *policy = NULL;
138 /* Also look for serno stuff? */
139 lockmgr(&dsched_lock, LK_EXCLUSIVE);
141 ksnprintf(tunable_key, sizeof(tunable_key),
142 "dsched.policy.%s%d", head_name, unit);
143 if (TUNABLE_STR_FETCH(tunable_key, sched_policy,
144 sizeof(sched_policy)) != 0) {
145 policy = dsched_find_policy(sched_policy);
148 ksnprintf(tunable_key, sizeof(tunable_key),
149 "dsched.policy.%s", head_name);
151 for (ptr = tunable_key; *ptr; ptr++) {
152 if (*ptr == '/')
153 *ptr = '-';
155 if (!policy && (TUNABLE_STR_FETCH(tunable_key, sched_policy,
156 sizeof(sched_policy)) != 0)) {
157 policy = dsched_find_policy(sched_policy);
160 ksnprintf(tunable_key, sizeof(tunable_key), "dsched.policy.default");
161 if (!policy && !default_set &&
162 (TUNABLE_STR_FETCH(tunable_key, sched_policy,
163 sizeof(sched_policy)) != 0)) {
164 policy = dsched_find_policy(sched_policy);
167 if (!policy) {
168 if (!default_set && bootverbose) {
169 dsched_debug(0,
170 "No policy for %s%d specified, "
171 "or policy not found\n",
172 head_name, unit);
174 dsched_set_policy(dp, default_policy);
175 } else {
176 dsched_set_policy(dp, policy);
179 if (strncmp(head_name, "mapper/", strlen("mapper/")) == 0)
180 ksnprintf(tunable_key, sizeof(tunable_key), "%s", head_name);
181 else
182 ksnprintf(tunable_key, sizeof(tunable_key), "%s%d", head_name, unit);
183 for (ptr = tunable_key; *ptr; ptr++) {
184 if (*ptr == '/')
185 *ptr = '-';
187 dsched_sysctl_add_disk(
188 (struct dsched_disk_ctx *)dsched_get_disk_priv(dp),
189 tunable_key);
191 lockmgr(&dsched_lock, LK_RELEASE);
195 * Called from disk_setdiskinfo (or rather _setdiskinfo). This will check if
196 * there's any policy associated with the serial number of the device.
198 void
199 dsched_disk_update_callback(struct disk *dp, struct disk_info *info)
201 char tunable_key[SPECNAMELEN + 48];
202 char sched_policy[DSCHED_POLICY_NAME_LENGTH];
203 struct dsched_policy *policy = NULL;
205 if (info->d_serialno == NULL)
206 return;
208 lockmgr(&dsched_lock, LK_EXCLUSIVE);
210 ksnprintf(tunable_key, sizeof(tunable_key), "dsched.policy.%s",
211 info->d_serialno);
213 if((TUNABLE_STR_FETCH(tunable_key, sched_policy,
214 sizeof(sched_policy)) != 0)) {
215 policy = dsched_find_policy(sched_policy);
218 if (policy) {
219 dsched_switch(dp, policy);
222 dsched_sysctl_add_disk(
223 (struct dsched_disk_ctx *)dsched_get_disk_priv(dp),
224 info->d_serialno);
226 lockmgr(&dsched_lock, LK_RELEASE);
230 * Called on disk_destroy()
231 * shuts down the scheduler core and cancels all remaining bios
233 void
234 dsched_disk_destroy_callback(struct disk *dp)
236 struct dsched_policy *old_policy;
237 struct dsched_disk_ctx *diskctx;
239 lockmgr(&dsched_lock, LK_EXCLUSIVE);
241 diskctx = dsched_get_disk_priv(dp);
243 old_policy = dp->d_sched_policy;
244 dp->d_sched_policy = &dsched_noop_policy;
245 old_policy->cancel_all(dsched_get_disk_priv(dp));
246 old_policy->teardown(dsched_get_disk_priv(dp));
248 if (diskctx->flags & DSCHED_SYSCTL_CTX_INITED)
249 sysctl_ctx_free(&diskctx->sysctl_ctx);
251 policy_destroy(dp);
252 atomic_subtract_int(&old_policy->ref_count, 1);
253 KKASSERT(old_policy->ref_count >= 0);
255 lockmgr(&dsched_lock, LK_RELEASE);
259 void
260 dsched_queue(struct disk *dp, struct bio *bio)
262 struct dsched_thread_ctx *tdctx;
263 struct dsched_thread_io *tdio;
264 struct dsched_disk_ctx *diskctx;
266 int found = 0, error = 0;
268 tdctx = dsched_get_buf_priv(bio->bio_buf);
269 if (tdctx == NULL) {
270 /* We don't handle this case, let dsched dispatch */
271 atomic_add_int(&dsched_stats.no_tdctx, 1);
272 dsched_strategy_raw(dp, bio);
273 return;
276 DSCHED_THREAD_CTX_LOCK(tdctx);
278 KKASSERT(!TAILQ_EMPTY(&tdctx->tdio_list));
280 * XXX:
281 * iterate in reverse to make sure we find the most up-to-date
282 * tdio for a given disk. After a switch it may take some time
283 * for everything to clean up.
285 TAILQ_FOREACH_REVERSE(tdio, &tdctx->tdio_list, tdio_list_head, link) {
286 if (tdio->dp == dp) {
287 dsched_thread_io_ref(tdio);
288 found = 1;
289 break;
293 DSCHED_THREAD_CTX_UNLOCK(tdctx);
294 dsched_clr_buf_priv(bio->bio_buf);
295 dsched_thread_ctx_unref(tdctx); /* acquired on new_buf */
297 KKASSERT(found == 1);
298 diskctx = dsched_get_disk_priv(dp);
299 dsched_disk_ctx_ref(diskctx);
301 if (dp->d_sched_policy != &dsched_noop_policy)
302 KKASSERT(tdio->debug_policy == dp->d_sched_policy);
304 KKASSERT(tdio->debug_inited == 0xF00F1234);
306 error = dp->d_sched_policy->bio_queue(diskctx, tdio, bio);
308 if (error) {
309 dsched_strategy_raw(dp, bio);
311 dsched_disk_ctx_unref(diskctx);
312 dsched_thread_io_unref(tdio);
317 * Called from each module_init or module_attach of each policy
318 * registers the policy in the local policy list.
321 dsched_register(struct dsched_policy *d_policy)
323 struct dsched_policy *policy;
324 int error = 0;
326 lockmgr(&dsched_lock, LK_EXCLUSIVE);
328 policy = dsched_find_policy(d_policy->name);
330 if (!policy) {
331 TAILQ_INSERT_TAIL(&dsched_policy_list, d_policy, link);
332 atomic_add_int(&d_policy->ref_count, 1);
333 } else {
334 dsched_debug(LOG_ERR, "Policy with name %s already registered!\n",
335 d_policy->name);
336 error = EEXIST;
339 lockmgr(&dsched_lock, LK_RELEASE);
340 return error;
344 * Called from each module_detach of each policy
345 * unregisters the policy
348 dsched_unregister(struct dsched_policy *d_policy)
350 struct dsched_policy *policy;
352 lockmgr(&dsched_lock, LK_EXCLUSIVE);
353 policy = dsched_find_policy(d_policy->name);
355 if (policy) {
356 if (policy->ref_count > 1) {
357 lockmgr(&dsched_lock, LK_RELEASE);
358 return EBUSY;
360 TAILQ_REMOVE(&dsched_policy_list, policy, link);
361 atomic_subtract_int(&policy->ref_count, 1);
362 KKASSERT(policy->ref_count == 0);
364 lockmgr(&dsched_lock, LK_RELEASE);
366 return 0;
371 * switches the policy by first removing the old one and then
372 * enabling the new one.
375 dsched_switch(struct disk *dp, struct dsched_policy *new_policy)
377 struct dsched_policy *old_policy;
379 /* If we are asked to set the same policy, do nothing */
380 if (dp->d_sched_policy == new_policy)
381 return 0;
383 /* lock everything down, diskwise */
384 lockmgr(&dsched_lock, LK_EXCLUSIVE);
385 old_policy = dp->d_sched_policy;
387 atomic_subtract_int(&old_policy->ref_count, 1);
388 KKASSERT(old_policy->ref_count >= 0);
390 dp->d_sched_policy = &dsched_noop_policy;
391 old_policy->teardown(dsched_get_disk_priv(dp));
392 policy_destroy(dp);
394 /* Bring everything back to life */
395 dsched_set_policy(dp, new_policy);
396 lockmgr(&dsched_lock, LK_RELEASE);
398 return 0;
403 * Loads a given policy and attaches it to the specified disk.
404 * Also initializes the core for the policy
406 void
407 dsched_set_policy(struct disk *dp, struct dsched_policy *new_policy)
409 int locked = 0;
411 /* Check if it is locked already. if not, we acquire the devfs lock */
412 if (!(lockstatus(&dsched_lock, curthread)) == LK_EXCLUSIVE) {
413 lockmgr(&dsched_lock, LK_EXCLUSIVE);
414 locked = 1;
417 DSCHED_GLOBAL_THREAD_CTX_LOCK();
419 policy_new(dp, new_policy);
420 new_policy->prepare(dsched_get_disk_priv(dp));
421 dp->d_sched_policy = new_policy;
423 DSCHED_GLOBAL_THREAD_CTX_UNLOCK();
425 atomic_add_int(&new_policy->ref_count, 1);
426 kprintf("disk scheduler: set policy of %s to %s\n", dp->d_cdev->si_name,
427 new_policy->name);
429 /* If we acquired the lock, we also get rid of it */
430 if (locked)
431 lockmgr(&dsched_lock, LK_RELEASE);
434 struct dsched_policy*
435 dsched_find_policy(char *search)
437 struct dsched_policy *policy;
438 struct dsched_policy *policy_found = NULL;
439 int locked = 0;
441 /* Check if it is locked already. if not, we acquire the devfs lock */
442 if (!(lockstatus(&dsched_lock, curthread)) == LK_EXCLUSIVE) {
443 lockmgr(&dsched_lock, LK_EXCLUSIVE);
444 locked = 1;
447 TAILQ_FOREACH(policy, &dsched_policy_list, link) {
448 if (!strcmp(policy->name, search)) {
449 policy_found = policy;
450 break;
454 /* If we acquired the lock, we also get rid of it */
455 if (locked)
456 lockmgr(&dsched_lock, LK_RELEASE);
458 return policy_found;
462 * Returns ref'd disk
464 struct disk *
465 dsched_find_disk(char *search)
467 struct disk marker;
468 struct disk *dp = NULL;
470 while ((dp = disk_enumerate(&marker, dp)) != NULL) {
471 if (strcmp(dp->d_cdev->si_name, search) == 0) {
472 disk_enumerate_stop(&marker, NULL);
473 /* leave ref on dp */
474 break;
477 return dp;
480 struct disk *
481 dsched_disk_enumerate(struct disk *marker, struct disk *dp,
482 struct dsched_policy *policy)
484 while ((dp = disk_enumerate(marker, dp)) != NULL) {
485 if (dp->d_sched_policy == policy)
486 break;
488 return NULL;
491 struct dsched_policy *
492 dsched_policy_enumerate(struct dsched_policy *pol)
494 if (!pol)
495 return (TAILQ_FIRST(&dsched_policy_list));
496 else
497 return (TAILQ_NEXT(pol, link));
500 void
501 dsched_cancel_bio(struct bio *bp)
503 bp->bio_buf->b_error = ENXIO;
504 bp->bio_buf->b_flags |= B_ERROR;
505 bp->bio_buf->b_resid = bp->bio_buf->b_bcount;
507 biodone(bp);
510 void
511 dsched_strategy_raw(struct disk *dp, struct bio *bp)
514 * Ideally, this stuff shouldn't be needed... but just in case, we leave it in
515 * to avoid panics
517 KASSERT(dp->d_rawdev != NULL, ("dsched_strategy_raw sees NULL d_rawdev!!"));
518 if(bp->bio_track != NULL) {
519 dsched_debug(LOG_INFO,
520 "dsched_strategy_raw sees non-NULL bio_track!! "
521 "bio: %p\n", bp);
522 bp->bio_track = NULL;
524 dev_dstrategy(dp->d_rawdev, bp);
527 void
528 dsched_strategy_sync(struct disk *dp, struct bio *bio)
530 struct buf *bp, *nbp;
531 struct bio *nbio;
533 bp = bio->bio_buf;
535 nbp = getpbuf(NULL);
536 nbio = &nbp->b_bio1;
538 nbp->b_cmd = bp->b_cmd;
539 nbp->b_bufsize = bp->b_bufsize;
540 nbp->b_runningbufspace = bp->b_runningbufspace;
541 nbp->b_bcount = bp->b_bcount;
542 nbp->b_resid = bp->b_resid;
543 nbp->b_data = bp->b_data;
544 #if 0
546 * Buffers undergoing device I/O do not need a kvabase/size.
548 nbp->b_kvabase = bp->b_kvabase;
549 nbp->b_kvasize = bp->b_kvasize;
550 #endif
551 nbp->b_dirtyend = bp->b_dirtyend;
553 nbio->bio_done = biodone_sync;
554 nbio->bio_flags |= BIO_SYNC;
555 nbio->bio_track = NULL;
557 nbio->bio_caller_info1.ptr = dp;
558 nbio->bio_offset = bio->bio_offset;
560 dev_dstrategy(dp->d_rawdev, nbio);
561 biowait(nbio, "dschedsync");
562 bp->b_resid = nbp->b_resid;
563 bp->b_error = nbp->b_error;
564 biodone(bio);
565 #if 0
566 nbp->b_kvabase = NULL;
567 nbp->b_kvasize = 0;
568 #endif
569 relpbuf(nbp, NULL);
572 void
573 dsched_strategy_async(struct disk *dp, struct bio *bio, biodone_t *done, void *priv)
575 struct bio *nbio;
577 nbio = push_bio(bio);
578 nbio->bio_done = done;
579 nbio->bio_offset = bio->bio_offset;
581 dsched_set_bio_dp(nbio, dp);
582 dsched_set_bio_priv(nbio, priv);
584 getmicrotime(&nbio->bio_caller_info3.tv);
585 dev_dstrategy(dp->d_rawdev, nbio);
589 * A special bio done call back function
590 * used by policy having request polling implemented.
592 static void
593 request_polling_biodone(struct bio *bp)
595 struct dsched_disk_ctx *diskctx = NULL;
596 struct disk *dp = NULL;
597 struct bio *obio;
598 struct dsched_policy *policy;
600 dp = dsched_get_bio_dp(bp);
601 policy = dp->d_sched_policy;
602 diskctx = dsched_get_disk_priv(dp);
603 KKASSERT(diskctx && policy);
604 dsched_disk_ctx_ref(diskctx);
607 * XXX:
608 * the bio_done function should not be blocked !
610 if (diskctx->dp->d_sched_policy->bio_done)
611 diskctx->dp->d_sched_policy->bio_done(bp);
613 obio = pop_bio(bp);
614 biodone(obio);
616 atomic_subtract_int(&diskctx->current_tag_queue_depth, 1);
618 /* call the polling function,
619 * XXX:
620 * the polling function should not be blocked!
622 if (policy->polling_func)
623 policy->polling_func(diskctx);
624 else
625 dsched_debug(0, "dsched: the policy uses request polling without a polling function!\n");
626 dsched_disk_ctx_unref(diskctx);
630 * A special dsched strategy used by policy having request polling
631 * (polling function) implemented.
633 * The strategy is the just like dsched_strategy_async(), but
634 * the biodone call back is set to a preset one.
636 * If the policy needs its own biodone callback, it should
637 * register it in the policy structure. (bio_done field)
639 * The current_tag_queue_depth is maintained by this function
640 * and the request_polling_biodone() function
643 void
644 dsched_strategy_request_polling(struct disk *dp, struct bio *bio, struct dsched_disk_ctx *diskctx)
646 atomic_add_int(&diskctx->current_tag_queue_depth, 1);
647 dsched_strategy_async(dp, bio, request_polling_biodone, dsched_get_bio_priv(bio));
651 * Ref and deref various structures. The 1->0 transition of the reference
652 * count actually transitions 1->0x80000000 and causes the object to be
653 * destroyed. It is possible for transitory references to occur on the
654 * object while it is being destroyed. We use bit 31 to indicate that
655 * destruction is in progress and to prevent nested destructions.
657 void
658 dsched_disk_ctx_ref(struct dsched_disk_ctx *diskctx)
660 int refcount;
662 refcount = atomic_fetchadd_int(&diskctx->refcount, 1);
665 void
666 dsched_thread_io_ref(struct dsched_thread_io *tdio)
668 int refcount;
670 refcount = atomic_fetchadd_int(&tdio->refcount, 1);
673 void
674 dsched_thread_ctx_ref(struct dsched_thread_ctx *tdctx)
676 int refcount;
678 refcount = atomic_fetchadd_int(&tdctx->refcount, 1);
681 void
682 dsched_disk_ctx_unref(struct dsched_disk_ctx *diskctx)
684 int refs;
685 int nrefs;
688 * Handle 1->0 transitions for diskctx and nested destruction
689 * recursions. If the refs are already in destruction mode (bit 31
690 * set) on the 1->0 transition we don't try to destruct it again.
692 * 0x80000001->0x80000000 transitions are handled normally and
693 * thus avoid nested dstruction.
695 for (;;) {
696 refs = diskctx->refcount;
697 cpu_ccfence();
698 nrefs = refs - 1;
700 KKASSERT(((refs ^ nrefs) & 0x80000000) == 0);
701 if (nrefs) {
702 if (atomic_cmpset_int(&diskctx->refcount, refs, nrefs))
703 break;
704 continue;
706 nrefs = 0x80000000;
707 if (atomic_cmpset_int(&diskctx->refcount, refs, nrefs)) {
708 dsched_disk_ctx_destroy(diskctx);
709 break;
714 static
715 void
716 dsched_disk_ctx_destroy(struct dsched_disk_ctx *diskctx)
718 struct dsched_thread_io *tdio;
719 int refs;
720 int nrefs;
722 #if 0
723 kprintf("diskctx (%p) destruction started, trace:\n", diskctx);
724 print_backtrace(4);
725 #endif
726 lockmgr(&diskctx->lock, LK_EXCLUSIVE);
727 while ((tdio = TAILQ_FIRST(&diskctx->tdio_list)) != NULL) {
728 KKASSERT(tdio->flags & DSCHED_LINKED_DISK_CTX);
729 TAILQ_REMOVE(&diskctx->tdio_list, tdio, dlink);
730 atomic_clear_int(&tdio->flags, DSCHED_LINKED_DISK_CTX);
731 tdio->diskctx = NULL;
732 /* XXX tdio->diskctx->dp->d_sched_policy->destroy_tdio(tdio);*/
733 lockmgr(&diskctx->lock, LK_RELEASE);
734 dsched_thread_io_unref_destroy(tdio);
735 lockmgr(&diskctx->lock, LK_EXCLUSIVE);
737 lockmgr(&diskctx->lock, LK_RELEASE);
740 * Expect diskctx->refcount to be 0x80000000. If it isn't someone
741 * else still has a temporary ref on the diskctx and we have to
742 * transition it back to an undestroyed-state (albeit without any
743 * associations), so the other user destroys it properly when the
744 * ref is released.
746 while ((refs = diskctx->refcount) != 0x80000000) {
747 kprintf("dsched_thread_io: destroy race diskctx=%p\n", diskctx);
748 cpu_ccfence();
749 KKASSERT(refs & 0x80000000);
750 nrefs = refs & 0x7FFFFFFF;
751 if (atomic_cmpset_int(&diskctx->refcount, refs, nrefs))
752 return;
756 * Really for sure now.
758 if (diskctx->dp->d_sched_policy->destroy_diskctx)
759 diskctx->dp->d_sched_policy->destroy_diskctx(diskctx);
760 objcache_put(dsched_diskctx_cache, diskctx);
761 atomic_subtract_int(&dsched_stats.diskctx_allocations, 1);
764 void
765 dsched_thread_io_unref(struct dsched_thread_io *tdio)
767 int refs;
768 int nrefs;
771 * Handle 1->0 transitions for tdio and nested destruction
772 * recursions. If the refs are already in destruction mode (bit 31
773 * set) on the 1->0 transition we don't try to destruct it again.
775 * 0x80000001->0x80000000 transitions are handled normally and
776 * thus avoid nested dstruction.
778 for (;;) {
779 refs = tdio->refcount;
780 cpu_ccfence();
781 nrefs = refs - 1;
783 KKASSERT(((refs ^ nrefs) & 0x80000000) == 0);
784 if (nrefs) {
785 if (atomic_cmpset_int(&tdio->refcount, refs, nrefs))
786 break;
787 continue;
789 nrefs = 0x80000000;
790 if (atomic_cmpset_int(&tdio->refcount, refs, nrefs)) {
791 dsched_thread_io_destroy(tdio);
792 break;
798 * Unref and destroy the tdio even if additional refs are present.
800 static
801 void
802 dsched_thread_io_unref_destroy(struct dsched_thread_io *tdio)
804 int refs;
805 int nrefs;
808 * If not already transitioned to destroy-in-progress we transition
809 * to destroy-in-progress, cleanup our ref, and destroy the tdio.
811 for (;;) {
812 refs = tdio->refcount;
813 cpu_ccfence();
814 nrefs = refs - 1;
816 KKASSERT(((refs ^ nrefs) & 0x80000000) == 0);
817 if (nrefs & 0x80000000) {
818 if (atomic_cmpset_int(&tdio->refcount, refs, nrefs))
819 break;
820 continue;
822 nrefs |= 0x80000000;
823 if (atomic_cmpset_int(&tdio->refcount, refs, nrefs)) {
824 dsched_thread_io_destroy(tdio);
825 break;
830 static void
831 dsched_thread_io_destroy(struct dsched_thread_io *tdio)
833 struct dsched_thread_ctx *tdctx;
834 struct dsched_disk_ctx *diskctx;
835 int refs;
836 int nrefs;
838 #if 0
839 kprintf("tdio (%p) destruction started, trace:\n", tdio);
840 print_backtrace(8);
841 #endif
842 KKASSERT(tdio->qlength == 0);
844 while ((diskctx = tdio->diskctx) != NULL) {
845 dsched_disk_ctx_ref(diskctx);
846 lockmgr(&diskctx->lock, LK_EXCLUSIVE);
847 if (diskctx != tdio->diskctx) {
848 lockmgr(&diskctx->lock, LK_RELEASE);
849 dsched_disk_ctx_unref(diskctx);
850 continue;
852 KKASSERT(tdio->flags & DSCHED_LINKED_DISK_CTX);
853 if (diskctx->dp->d_sched_policy->destroy_tdio)
854 diskctx->dp->d_sched_policy->destroy_tdio(tdio);
855 TAILQ_REMOVE(&diskctx->tdio_list, tdio, dlink);
856 atomic_clear_int(&tdio->flags, DSCHED_LINKED_DISK_CTX);
857 tdio->diskctx = NULL;
858 dsched_thread_io_unref(tdio);
859 lockmgr(&diskctx->lock, LK_RELEASE);
860 dsched_disk_ctx_unref(diskctx);
862 while ((tdctx = tdio->tdctx) != NULL) {
863 dsched_thread_ctx_ref(tdctx);
864 lockmgr(&tdctx->lock, LK_EXCLUSIVE);
865 if (tdctx != tdio->tdctx) {
866 lockmgr(&tdctx->lock, LK_RELEASE);
867 dsched_thread_ctx_unref(tdctx);
868 continue;
870 KKASSERT(tdio->flags & DSCHED_LINKED_THREAD_CTX);
871 TAILQ_REMOVE(&tdctx->tdio_list, tdio, link);
872 atomic_clear_int(&tdio->flags, DSCHED_LINKED_THREAD_CTX);
873 tdio->tdctx = NULL;
874 dsched_thread_io_unref(tdio);
875 lockmgr(&tdctx->lock, LK_RELEASE);
876 dsched_thread_ctx_unref(tdctx);
880 * Expect tdio->refcount to be 0x80000000. If it isn't someone else
881 * still has a temporary ref on the tdio and we have to transition
882 * it back to an undestroyed-state (albeit without any associations)
883 * so the other user destroys it properly when the ref is released.
885 while ((refs = tdio->refcount) != 0x80000000) {
886 kprintf("dsched_thread_io: destroy race tdio=%p\n", tdio);
887 cpu_ccfence();
888 KKASSERT(refs & 0x80000000);
889 nrefs = refs & 0x7FFFFFFF;
890 if (atomic_cmpset_int(&tdio->refcount, refs, nrefs))
891 return;
895 * Really for sure now.
897 objcache_put(dsched_tdio_cache, tdio);
898 atomic_subtract_int(&dsched_stats.tdio_allocations, 1);
901 void
902 dsched_thread_ctx_unref(struct dsched_thread_ctx *tdctx)
904 int refs;
905 int nrefs;
908 * Handle 1->0 transitions for tdctx and nested destruction
909 * recursions. If the refs are already in destruction mode (bit 31
910 * set) on the 1->0 transition we don't try to destruct it again.
912 * 0x80000001->0x80000000 transitions are handled normally and
913 * thus avoid nested dstruction.
915 for (;;) {
916 refs = tdctx->refcount;
917 cpu_ccfence();
918 nrefs = refs - 1;
920 KKASSERT(((refs ^ nrefs) & 0x80000000) == 0);
921 if (nrefs) {
922 if (atomic_cmpset_int(&tdctx->refcount, refs, nrefs))
923 break;
924 continue;
926 nrefs = 0x80000000;
927 if (atomic_cmpset_int(&tdctx->refcount, refs, nrefs)) {
928 dsched_thread_ctx_destroy(tdctx);
929 break;
934 static void
935 dsched_thread_ctx_destroy(struct dsched_thread_ctx *tdctx)
937 struct dsched_thread_io *tdio;
939 #if 0
940 kprintf("tdctx (%p) destruction started, trace:\n", tdctx);
941 print_backtrace(8);
942 #endif
943 DSCHED_GLOBAL_THREAD_CTX_LOCK();
945 lockmgr(&tdctx->lock, LK_EXCLUSIVE);
947 while ((tdio = TAILQ_FIRST(&tdctx->tdio_list)) != NULL) {
948 KKASSERT(tdio->flags & DSCHED_LINKED_THREAD_CTX);
949 TAILQ_REMOVE(&tdctx->tdio_list, tdio, link);
950 atomic_clear_int(&tdio->flags, DSCHED_LINKED_THREAD_CTX);
951 tdio->tdctx = NULL;
952 lockmgr(&tdctx->lock, LK_RELEASE); /* avoid deadlock */
953 dsched_thread_io_unref_destroy(tdio);
954 lockmgr(&tdctx->lock, LK_EXCLUSIVE);
956 KKASSERT(tdctx->refcount == 0x80000000);
957 TAILQ_REMOVE(&dsched_tdctx_list, tdctx, link);
959 lockmgr(&tdctx->lock, LK_RELEASE);
961 DSCHED_GLOBAL_THREAD_CTX_UNLOCK();
963 objcache_put(dsched_tdctx_cache, tdctx);
964 atomic_subtract_int(&dsched_stats.tdctx_allocations, 1);
968 * Ensures that a tdio is assigned to tdctx and disk.
970 void
971 dsched_thread_io_alloc(struct disk *dp, struct dsched_thread_ctx *tdctx,
972 struct dsched_policy *pol)
974 struct dsched_thread_io *tdio;
975 #if 0
976 dsched_disk_ctx_ref(dsched_get_disk_priv(dp));
977 #endif
978 tdio = objcache_get(dsched_tdio_cache, M_WAITOK);
979 bzero(tdio, DSCHED_THREAD_IO_MAX_SZ);
981 dsched_thread_io_ref(tdio); /* prevent ripout */
982 dsched_thread_io_ref(tdio); /* for diskctx ref */
984 DSCHED_THREAD_IO_LOCKINIT(tdio);
985 tdio->dp = dp;
987 tdio->diskctx = dsched_get_disk_priv(dp);
988 TAILQ_INIT(&tdio->queue);
990 if (pol->new_tdio)
991 pol->new_tdio(tdio);
993 lockmgr(&tdio->diskctx->lock, LK_EXCLUSIVE);
994 TAILQ_INSERT_TAIL(&tdio->diskctx->tdio_list, tdio, dlink);
995 atomic_set_int(&tdio->flags, DSCHED_LINKED_DISK_CTX);
996 lockmgr(&tdio->diskctx->lock, LK_RELEASE);
998 if (tdctx) {
1000 * Put the tdio in the tdctx list. Inherit the temporary
1001 * ref (one ref for each list).
1003 DSCHED_THREAD_CTX_LOCK(tdctx);
1004 tdio->tdctx = tdctx;
1005 tdio->p = tdctx->p;
1006 TAILQ_INSERT_TAIL(&tdctx->tdio_list, tdio, link);
1007 atomic_set_int(&tdio->flags, DSCHED_LINKED_THREAD_CTX);
1008 DSCHED_THREAD_CTX_UNLOCK(tdctx);
1009 } else {
1010 dsched_thread_io_unref(tdio);
1013 tdio->debug_policy = pol;
1014 tdio->debug_inited = 0xF00F1234;
1016 atomic_add_int(&dsched_stats.tdio_allocations, 1);
1020 struct dsched_disk_ctx *
1021 dsched_disk_ctx_alloc(struct disk *dp, struct dsched_policy *pol)
1023 struct dsched_disk_ctx *diskctx;
1025 diskctx = objcache_get(dsched_diskctx_cache, M_WAITOK);
1026 bzero(diskctx, DSCHED_DISK_CTX_MAX_SZ);
1027 dsched_disk_ctx_ref(diskctx);
1028 diskctx->dp = dp;
1029 DSCHED_DISK_CTX_LOCKINIT(diskctx);
1030 TAILQ_INIT(&diskctx->tdio_list);
1032 * XXX: magic number 32: most device has a tag queue
1033 * of depth 32.
1034 * Better to retrive more precise value from the driver
1036 diskctx->max_tag_queue_depth = 32;
1037 diskctx->current_tag_queue_depth = 0;
1039 atomic_add_int(&dsched_stats.diskctx_allocations, 1);
1040 if (pol->new_diskctx)
1041 pol->new_diskctx(diskctx);
1042 return diskctx;
1046 struct dsched_thread_ctx *
1047 dsched_thread_ctx_alloc(struct proc *p)
1049 struct dsched_thread_ctx *tdctx;
1050 struct disk marker;
1051 struct disk *dp;
1053 tdctx = objcache_get(dsched_tdctx_cache, M_WAITOK);
1054 bzero(tdctx, DSCHED_THREAD_CTX_MAX_SZ);
1055 dsched_thread_ctx_ref(tdctx);
1056 #if 0
1057 kprintf("dsched_thread_ctx_alloc, new tdctx = %p\n", tdctx);
1058 #endif
1059 DSCHED_THREAD_CTX_LOCKINIT(tdctx);
1060 TAILQ_INIT(&tdctx->tdio_list);
1061 tdctx->p = p;
1063 DSCHED_GLOBAL_THREAD_CTX_LOCK();
1064 dp = NULL;
1065 while ((dp = disk_enumerate(&marker, dp)) != NULL)
1066 dsched_thread_io_alloc(dp, tdctx, dp->d_sched_policy);
1068 TAILQ_INSERT_TAIL(&dsched_tdctx_list, tdctx, link);
1069 DSCHED_GLOBAL_THREAD_CTX_UNLOCK();
1071 atomic_add_int(&dsched_stats.tdctx_allocations, 1);
1072 /* XXX: no callback here */
1073 return tdctx;
1076 void
1077 policy_new(struct disk *dp, struct dsched_policy *pol) {
1078 struct dsched_thread_ctx *tdctx;
1079 struct dsched_disk_ctx *diskctx;
1081 diskctx = dsched_disk_ctx_alloc(dp, pol);
1082 dsched_disk_ctx_ref(diskctx);
1083 dsched_set_disk_priv(dp, diskctx);
1086 * XXX this is really really expensive!
1088 TAILQ_FOREACH(tdctx, &dsched_tdctx_list, link)
1089 dsched_thread_io_alloc(dp, tdctx, pol);
1092 void
1093 policy_destroy(struct disk *dp) {
1094 struct dsched_disk_ctx *diskctx;
1096 diskctx = dsched_get_disk_priv(dp);
1097 KKASSERT(diskctx != NULL);
1099 dsched_disk_ctx_unref(diskctx); /* from prepare */
1100 dsched_disk_ctx_unref(diskctx); /* from alloc */
1102 dsched_set_disk_priv(dp, NULL);
1105 void
1106 dsched_new_buf(struct buf *bp)
1108 struct dsched_thread_ctx *tdctx = NULL;
1110 if (dsched_inited == 0)
1111 return;
1113 if (curproc != NULL) {
1114 tdctx = dsched_get_proc_priv(curproc);
1115 } else {
1116 /* This is a kernel thread, so no proc info is available */
1117 tdctx = dsched_get_thread_priv(curthread);
1120 #if 0
1122 * XXX: hack. we don't want this assert because we aren't catching all
1123 * threads. mi_startup() is still getting away without an tdctx.
1126 /* by now we should have an tdctx. if not, something bad is going on */
1127 KKASSERT(tdctx != NULL);
1128 #endif
1130 if (tdctx) {
1131 dsched_thread_ctx_ref(tdctx);
1133 dsched_set_buf_priv(bp, tdctx);
1136 void
1137 dsched_exit_buf(struct buf *bp)
1139 struct dsched_thread_ctx *tdctx;
1141 tdctx = dsched_get_buf_priv(bp);
1142 if (tdctx != NULL) {
1143 dsched_clr_buf_priv(bp);
1144 dsched_thread_ctx_unref(tdctx);
1148 void
1149 dsched_new_proc(struct proc *p)
1151 struct dsched_thread_ctx *tdctx;
1153 if (dsched_inited == 0)
1154 return;
1156 KKASSERT(p != NULL);
1158 tdctx = dsched_thread_ctx_alloc(p);
1159 tdctx->p = p;
1160 dsched_thread_ctx_ref(tdctx);
1162 dsched_set_proc_priv(p, tdctx);
1163 atomic_add_int(&dsched_stats.nprocs, 1);
1167 void
1168 dsched_new_thread(struct thread *td)
1170 struct dsched_thread_ctx *tdctx;
1172 if (dsched_inited == 0)
1173 return;
1175 KKASSERT(td != NULL);
1177 tdctx = dsched_thread_ctx_alloc(NULL);
1178 tdctx->td = td;
1179 dsched_thread_ctx_ref(tdctx);
1181 dsched_set_thread_priv(td, tdctx);
1182 atomic_add_int(&dsched_stats.nthreads, 1);
1185 void
1186 dsched_exit_proc(struct proc *p)
1188 struct dsched_thread_ctx *tdctx;
1190 if (dsched_inited == 0)
1191 return;
1193 KKASSERT(p != NULL);
1195 tdctx = dsched_get_proc_priv(p);
1196 KKASSERT(tdctx != NULL);
1198 tdctx->dead = 0xDEAD;
1199 dsched_set_proc_priv(p, NULL);
1201 dsched_thread_ctx_unref(tdctx); /* one for alloc, */
1202 dsched_thread_ctx_unref(tdctx); /* one for ref */
1203 atomic_subtract_int(&dsched_stats.nprocs, 1);
1207 void
1208 dsched_exit_thread(struct thread *td)
1210 struct dsched_thread_ctx *tdctx;
1212 if (dsched_inited == 0)
1213 return;
1215 KKASSERT(td != NULL);
1217 tdctx = dsched_get_thread_priv(td);
1218 KKASSERT(tdctx != NULL);
1220 tdctx->dead = 0xDEAD;
1221 dsched_set_thread_priv(td, 0);
1223 dsched_thread_ctx_unref(tdctx); /* one for alloc, */
1224 dsched_thread_ctx_unref(tdctx); /* one for ref */
1225 atomic_subtract_int(&dsched_stats.nthreads, 1);
1229 * Returns ref'd tdio.
1231 * tdio may have additional refs for the diskctx and tdctx it resides on.
1233 void
1234 dsched_new_policy_thread_tdio(struct dsched_disk_ctx *diskctx,
1235 struct dsched_policy *pol)
1237 struct dsched_thread_ctx *tdctx;
1239 DSCHED_GLOBAL_THREAD_CTX_LOCK();
1241 tdctx = dsched_get_thread_priv(curthread);
1242 KKASSERT(tdctx != NULL);
1243 dsched_thread_io_alloc(diskctx->dp, tdctx, pol);
1245 DSCHED_GLOBAL_THREAD_CTX_UNLOCK();
1248 /* DEFAULT NOOP POLICY */
1250 static int
1251 noop_prepare(struct dsched_disk_ctx *diskctx)
1253 return 0;
1256 static void
1257 noop_teardown(struct dsched_disk_ctx *diskctx)
1262 static void
1263 noop_cancel(struct dsched_disk_ctx *diskctx)
1268 static int
1269 noop_queue(struct dsched_disk_ctx *diskctx, struct dsched_thread_io *tdio,
1270 struct bio *bio)
1272 dsched_strategy_raw(diskctx->dp, bio);
1273 #if 0
1274 dsched_strategy_async(diskctx->dp, bio, noop_completed, NULL);
1275 #endif
1276 return 0;
1280 * SYSINIT stuff
1282 static void
1283 dsched_init(void)
1285 dsched_tdio_cache = objcache_create("dsched-tdio-cache", 0, 0,
1286 NULL, NULL, NULL,
1287 objcache_malloc_alloc,
1288 objcache_malloc_free,
1289 &dsched_thread_io_malloc_args );
1291 dsched_tdctx_cache = objcache_create("dsched-tdctx-cache", 0, 0,
1292 NULL, NULL, NULL,
1293 objcache_malloc_alloc,
1294 objcache_malloc_free,
1295 &dsched_thread_ctx_malloc_args );
1297 dsched_diskctx_cache = objcache_create("dsched-diskctx-cache", 0, 0,
1298 NULL, NULL, NULL,
1299 objcache_malloc_alloc,
1300 objcache_malloc_free,
1301 &dsched_disk_ctx_malloc_args );
1303 bzero(&dsched_stats, sizeof(struct dsched_stats));
1305 lockinit(&dsched_lock, "dsched lock", 0, LK_CANRECURSE);
1306 DSCHED_GLOBAL_THREAD_CTX_LOCKINIT();
1308 dsched_register(&dsched_noop_policy);
1310 dsched_inited = 1;
1313 static void
1314 dsched_uninit(void)
1318 SYSINIT(subr_dsched_register, SI_SUB_CREATE_INIT-1, SI_ORDER_FIRST, dsched_init, NULL);
1319 SYSUNINIT(subr_dsched_register, SI_SUB_CREATE_INIT-1, SI_ORDER_ANY, dsched_uninit, NULL);
1322 * SYSCTL stuff
1324 static int
1325 sysctl_dsched_stats(SYSCTL_HANDLER_ARGS)
1327 return (sysctl_handle_opaque(oidp, &dsched_stats, sizeof(struct dsched_stats), req));
1330 static int
1331 sysctl_dsched_list_policies(SYSCTL_HANDLER_ARGS)
1333 struct dsched_policy *pol = NULL;
1334 int error, first = 1;
1336 lockmgr(&dsched_lock, LK_EXCLUSIVE);
1338 while ((pol = dsched_policy_enumerate(pol))) {
1339 if (!first) {
1340 error = SYSCTL_OUT(req, " ", 1);
1341 if (error)
1342 break;
1343 } else {
1344 first = 0;
1346 error = SYSCTL_OUT(req, pol->name, strlen(pol->name));
1347 if (error)
1348 break;
1352 lockmgr(&dsched_lock, LK_RELEASE);
1354 error = SYSCTL_OUT(req, "", 1);
1356 return error;
1359 static int
1360 sysctl_dsched_policy(SYSCTL_HANDLER_ARGS)
1362 char buf[DSCHED_POLICY_NAME_LENGTH];
1363 struct dsched_disk_ctx *diskctx = arg1;
1364 struct dsched_policy *pol = NULL;
1365 int error;
1367 if (diskctx == NULL) {
1368 return 0;
1371 lockmgr(&dsched_lock, LK_EXCLUSIVE);
1373 pol = diskctx->dp->d_sched_policy;
1374 memcpy(buf, pol->name, DSCHED_POLICY_NAME_LENGTH);
1376 error = sysctl_handle_string(oidp, buf, DSCHED_POLICY_NAME_LENGTH, req);
1377 if (error || req->newptr == NULL) {
1378 lockmgr(&dsched_lock, LK_RELEASE);
1379 return (error);
1382 pol = dsched_find_policy(buf);
1383 if (pol == NULL) {
1384 lockmgr(&dsched_lock, LK_RELEASE);
1385 return 0;
1388 dsched_switch(diskctx->dp, pol);
1390 lockmgr(&dsched_lock, LK_RELEASE);
1392 return error;
1395 static int
1396 sysctl_dsched_default_policy(SYSCTL_HANDLER_ARGS)
1398 char buf[DSCHED_POLICY_NAME_LENGTH];
1399 struct dsched_policy *pol = NULL;
1400 int error;
1402 lockmgr(&dsched_lock, LK_EXCLUSIVE);
1404 pol = default_policy;
1405 memcpy(buf, pol->name, DSCHED_POLICY_NAME_LENGTH);
1407 error = sysctl_handle_string(oidp, buf, DSCHED_POLICY_NAME_LENGTH, req);
1408 if (error || req->newptr == NULL) {
1409 lockmgr(&dsched_lock, LK_RELEASE);
1410 return (error);
1413 pol = dsched_find_policy(buf);
1414 if (pol == NULL) {
1415 lockmgr(&dsched_lock, LK_RELEASE);
1416 return 0;
1419 default_set = 1;
1420 default_policy = pol;
1422 lockmgr(&dsched_lock, LK_RELEASE);
1424 return error;
1427 SYSCTL_NODE(, OID_AUTO, dsched, CTLFLAG_RD, NULL,
1428 "Disk Scheduler Framework (dsched) magic");
1429 SYSCTL_NODE(_dsched, OID_AUTO, policy, CTLFLAG_RW, NULL,
1430 "List of disks and their policies");
1431 SYSCTL_INT(_dsched, OID_AUTO, debug, CTLFLAG_RW, &dsched_debug_enable,
1432 0, "Enable dsched debugging");
1433 SYSCTL_PROC(_dsched, OID_AUTO, stats, CTLTYPE_OPAQUE|CTLFLAG_RD,
1434 0, sizeof(struct dsched_stats), sysctl_dsched_stats, "dsched_stats",
1435 "dsched statistics");
1436 SYSCTL_PROC(_dsched, OID_AUTO, policies, CTLTYPE_STRING|CTLFLAG_RD,
1437 NULL, 0, sysctl_dsched_list_policies, "A", "names of available policies");
1438 SYSCTL_PROC(_dsched_policy, OID_AUTO, default, CTLTYPE_STRING|CTLFLAG_RW,
1439 NULL, 0, sysctl_dsched_default_policy, "A", "default dsched policy");
1441 static void
1442 dsched_sysctl_add_disk(struct dsched_disk_ctx *diskctx, char *name)
1444 if (!(diskctx->flags & DSCHED_SYSCTL_CTX_INITED)) {
1445 diskctx->flags |= DSCHED_SYSCTL_CTX_INITED;
1446 sysctl_ctx_init(&diskctx->sysctl_ctx);
1449 SYSCTL_ADD_PROC(&diskctx->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_dsched_policy),
1450 OID_AUTO, name, CTLTYPE_STRING|CTLFLAG_RW,
1451 diskctx, 0, sysctl_dsched_policy, "A", "policy");