2 * Copyright (c) 2009, 2010 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Alex Hornung <ahornung@gmail.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/kernel.h>
38 #include <sys/sysctl.h>
41 #include <sys/diskslice.h>
43 #include <sys/malloc.h>
44 #include <machine/md_var.h>
45 #include <sys/ctype.h>
46 #include <sys/syslog.h>
47 #include <sys/device.h>
48 #include <sys/msgport.h>
49 #include <sys/msgport2.h>
51 #include <sys/dsched.h>
52 #include <sys/fcntl.h>
53 #include <machine/varargs.h>
55 MALLOC_DEFINE(M_DSCHED
, "dsched", "dsched allocs");
57 static dsched_prepare_t noop_prepare
;
58 static dsched_teardown_t noop_teardown
;
59 static dsched_cancel_t noop_cancel
;
60 static dsched_queue_t noop_queue
;
62 static void dsched_sysctl_add_disk(struct dsched_disk_ctx
*diskctx
, char *name
);
64 static int dsched_inited
= 0;
65 static int default_set
= 0;
67 struct lock dsched_lock
;
68 static int dsched_debug_enable
= 0;
70 struct dsched_stats dsched_stats
;
72 struct objcache_malloc_args dsched_disk_ctx_malloc_args
= {
73 DSCHED_DISK_CTX_MAX_SZ
, M_DSCHED
};
74 struct objcache_malloc_args dsched_thread_io_malloc_args
= {
75 DSCHED_THREAD_IO_MAX_SZ
, M_DSCHED
};
76 struct objcache_malloc_args dsched_thread_ctx_malloc_args
= {
77 DSCHED_THREAD_CTX_MAX_SZ
, M_DSCHED
};
79 static struct objcache
*dsched_diskctx_cache
;
80 static struct objcache
*dsched_tdctx_cache
;
81 static struct objcache
*dsched_tdio_cache
;
83 TAILQ_HEAD(, dsched_thread_ctx
) dsched_tdctx_list
=
84 TAILQ_HEAD_INITIALIZER(dsched_tdctx_list
);
86 struct lock dsched_tdctx_lock
;
88 static struct dsched_policy_head dsched_policy_list
=
89 TAILQ_HEAD_INITIALIZER(dsched_policy_list
);
91 static struct dsched_policy dsched_noop_policy
= {
94 .prepare
= noop_prepare
,
95 .teardown
= noop_teardown
,
96 .cancel_all
= noop_cancel
,
97 .bio_queue
= noop_queue
100 static struct dsched_policy
*default_policy
= &dsched_noop_policy
;
103 * dsched_debug() is a SYSCTL and TUNABLE controlled debug output function
107 dsched_debug(int level
, char *fmt
, ...)
112 if (level
<= dsched_debug_enable
)
120 * Called on disk_create()
121 * tries to read which policy to use from loader.conf, if there's
122 * none specified, the default policy is used.
125 dsched_disk_create_callback(struct disk
*dp
, const char *head_name
, int unit
)
127 char tunable_key
[SPECNAMELEN
+ 48];
128 char sched_policy
[DSCHED_POLICY_NAME_LENGTH
];
130 struct dsched_policy
*policy
= NULL
;
132 /* Also look for serno stuff? */
133 /* kprintf("dsched_disk_create_callback() for disk %s%d\n", head_name, unit); */
134 lockmgr(&dsched_lock
, LK_EXCLUSIVE
);
136 ksnprintf(tunable_key
, sizeof(tunable_key
), "dsched.policy.%s%d",
138 if (TUNABLE_STR_FETCH(tunable_key
, sched_policy
,
139 sizeof(sched_policy
)) != 0) {
140 policy
= dsched_find_policy(sched_policy
);
143 ksnprintf(tunable_key
, sizeof(tunable_key
), "dsched.policy.%s",
145 for (ptr
= tunable_key
; *ptr
; ptr
++) {
149 if (!policy
&& (TUNABLE_STR_FETCH(tunable_key
, sched_policy
,
150 sizeof(sched_policy
)) != 0)) {
151 policy
= dsched_find_policy(sched_policy
);
154 ksnprintf(tunable_key
, sizeof(tunable_key
), "dsched.policy.default");
155 if (!policy
&& !default_set
&& (TUNABLE_STR_FETCH(tunable_key
, sched_policy
,
156 sizeof(sched_policy
)) != 0)) {
157 policy
= dsched_find_policy(sched_policy
);
162 dsched_debug(0, "No policy for %s%d specified, "
163 "or policy not found\n", head_name
, unit
);
165 dsched_set_policy(dp
, default_policy
);
167 dsched_set_policy(dp
, policy
);
170 if (strncmp(head_name
, "mapper/", strlen("mapper/")) == 0)
171 ksnprintf(tunable_key
, sizeof(tunable_key
), "%s", head_name
);
173 ksnprintf(tunable_key
, sizeof(tunable_key
), "%s%d", head_name
, unit
);
174 for (ptr
= tunable_key
; *ptr
; ptr
++) {
178 dsched_sysctl_add_disk(
179 (struct dsched_disk_ctx
*)dsched_get_disk_priv(dp
),
182 lockmgr(&dsched_lock
, LK_RELEASE
);
186 * Called from disk_setdiskinfo (or rather _setdiskinfo). This will check if
187 * there's any policy associated with the serial number of the device.
190 dsched_disk_update_callback(struct disk
*dp
, struct disk_info
*info
)
192 char tunable_key
[SPECNAMELEN
+ 48];
193 char sched_policy
[DSCHED_POLICY_NAME_LENGTH
];
194 struct dsched_policy
*policy
= NULL
;
196 if (info
->d_serialno
== NULL
)
199 lockmgr(&dsched_lock
, LK_EXCLUSIVE
);
201 ksnprintf(tunable_key
, sizeof(tunable_key
), "dsched.policy.%s",
204 if((TUNABLE_STR_FETCH(tunable_key
, sched_policy
,
205 sizeof(sched_policy
)) != 0)) {
206 policy
= dsched_find_policy(sched_policy
);
210 dsched_switch(dp
, policy
);
213 dsched_sysctl_add_disk(
214 (struct dsched_disk_ctx
*)dsched_get_disk_priv(dp
),
217 lockmgr(&dsched_lock
, LK_RELEASE
);
221 * Called on disk_destroy()
222 * shuts down the scheduler core and cancels all remaining bios
225 dsched_disk_destroy_callback(struct disk
*dp
)
227 struct dsched_policy
*old_policy
;
228 struct dsched_disk_ctx
*diskctx
;
230 lockmgr(&dsched_lock
, LK_EXCLUSIVE
);
232 diskctx
= dsched_get_disk_priv(dp
);
234 old_policy
= dp
->d_sched_policy
;
235 dp
->d_sched_policy
= &dsched_noop_policy
;
236 old_policy
->cancel_all(dsched_get_disk_priv(dp
));
237 old_policy
->teardown(dsched_get_disk_priv(dp
));
239 if (diskctx
->flags
& DSCHED_SYSCTL_CTX_INITED
)
240 sysctl_ctx_free(&diskctx
->sysctl_ctx
);
243 atomic_subtract_int(&old_policy
->ref_count
, 1);
244 KKASSERT(old_policy
->ref_count
>= 0);
246 lockmgr(&dsched_lock
, LK_RELEASE
);
251 dsched_queue(struct disk
*dp
, struct bio
*bio
)
253 struct dsched_thread_ctx
*tdctx
;
254 struct dsched_thread_io
*tdio
;
255 struct dsched_disk_ctx
*diskctx
;
257 int found
= 0, error
= 0;
259 tdctx
= dsched_get_buf_priv(bio
->bio_buf
);
261 /* We don't handle this case, let dsched dispatch */
262 atomic_add_int(&dsched_stats
.no_tdctx
, 1);
263 dsched_strategy_raw(dp
, bio
);
267 DSCHED_THREAD_CTX_LOCK(tdctx
);
269 KKASSERT(!TAILQ_EMPTY(&tdctx
->tdio_list
));
270 TAILQ_FOREACH(tdio
, &tdctx
->tdio_list
, link
) {
271 if (tdio
->dp
== dp
) {
272 dsched_thread_io_ref(tdio
);
278 DSCHED_THREAD_CTX_UNLOCK(tdctx
);
279 dsched_clr_buf_priv(bio
->bio_buf
);
280 dsched_thread_ctx_unref(tdctx
); /* acquired on new_buf */
282 KKASSERT(found
== 1);
283 diskctx
= dsched_get_disk_priv(dp
);
284 dsched_disk_ctx_ref(diskctx
);
285 error
= dp
->d_sched_policy
->bio_queue(diskctx
, tdio
, bio
);
288 dsched_strategy_raw(dp
, bio
);
290 dsched_disk_ctx_unref(diskctx
);
291 dsched_thread_io_unref(tdio
);
296 * Called from each module_init or module_attach of each policy
297 * registers the policy in the local policy list.
300 dsched_register(struct dsched_policy
*d_policy
)
302 struct dsched_policy
*policy
;
305 lockmgr(&dsched_lock
, LK_EXCLUSIVE
);
307 policy
= dsched_find_policy(d_policy
->name
);
310 TAILQ_INSERT_TAIL(&dsched_policy_list
, d_policy
, link
);
311 atomic_add_int(&d_policy
->ref_count
, 1);
313 dsched_debug(LOG_ERR
, "Policy with name %s already registered!\n",
318 lockmgr(&dsched_lock
, LK_RELEASE
);
323 * Called from each module_detach of each policy
324 * unregisters the policy
327 dsched_unregister(struct dsched_policy
*d_policy
)
329 struct dsched_policy
*policy
;
331 lockmgr(&dsched_lock
, LK_EXCLUSIVE
);
332 policy
= dsched_find_policy(d_policy
->name
);
335 if (policy
->ref_count
> 1) {
336 lockmgr(&dsched_lock
, LK_RELEASE
);
339 TAILQ_REMOVE(&dsched_policy_list
, policy
, link
);
340 atomic_subtract_int(&policy
->ref_count
, 1);
341 KKASSERT(policy
->ref_count
== 0);
343 lockmgr(&dsched_lock
, LK_RELEASE
);
349 * switches the policy by first removing the old one and then
350 * enabling the new one.
353 dsched_switch(struct disk
*dp
, struct dsched_policy
*new_policy
)
355 struct dsched_policy
*old_policy
;
357 /* If we are asked to set the same policy, do nothing */
358 if (dp
->d_sched_policy
== new_policy
)
361 /* lock everything down, diskwise */
362 lockmgr(&dsched_lock
, LK_EXCLUSIVE
);
363 old_policy
= dp
->d_sched_policy
;
365 atomic_subtract_int(&old_policy
->ref_count
, 1);
366 KKASSERT(old_policy
->ref_count
>= 0);
368 dp
->d_sched_policy
= &dsched_noop_policy
;
369 old_policy
->teardown(dsched_get_disk_priv(dp
));
372 /* Bring everything back to life */
373 dsched_set_policy(dp
, new_policy
);
374 lockmgr(&dsched_lock
, LK_RELEASE
);
380 * Loads a given policy and attaches it to the specified disk.
381 * Also initializes the core for the policy
384 dsched_set_policy(struct disk
*dp
, struct dsched_policy
*new_policy
)
388 /* Check if it is locked already. if not, we acquire the devfs lock */
389 if (!(lockstatus(&dsched_lock
, curthread
)) == LK_EXCLUSIVE
) {
390 lockmgr(&dsched_lock
, LK_EXCLUSIVE
);
394 policy_new(dp
, new_policy
);
395 new_policy
->prepare(dsched_get_disk_priv(dp
));
396 dp
->d_sched_policy
= new_policy
;
397 atomic_add_int(&new_policy
->ref_count
, 1);
398 kprintf("disk scheduler: set policy of %s to %s\n", dp
->d_cdev
->si_name
,
401 /* If we acquired the lock, we also get rid of it */
403 lockmgr(&dsched_lock
, LK_RELEASE
);
406 struct dsched_policy
*
407 dsched_find_policy(char *search
)
409 struct dsched_policy
*policy
;
410 struct dsched_policy
*policy_found
= NULL
;
413 /* Check if it is locked already. if not, we acquire the devfs lock */
414 if (!(lockstatus(&dsched_lock
, curthread
)) == LK_EXCLUSIVE
) {
415 lockmgr(&dsched_lock
, LK_EXCLUSIVE
);
419 TAILQ_FOREACH(policy
, &dsched_policy_list
, link
) {
420 if (!strcmp(policy
->name
, search
)) {
421 policy_found
= policy
;
426 /* If we acquired the lock, we also get rid of it */
428 lockmgr(&dsched_lock
, LK_RELEASE
);
434 dsched_find_disk(char *search
)
436 struct disk
*dp_found
= NULL
;
437 struct disk
*dp
= NULL
;
439 while((dp
= disk_enumerate(dp
))) {
440 if (!strcmp(dp
->d_cdev
->si_name
, search
)) {
450 dsched_disk_enumerate(struct disk
*dp
, struct dsched_policy
*policy
)
452 while ((dp
= disk_enumerate(dp
))) {
453 if (dp
->d_sched_policy
== policy
)
460 struct dsched_policy
*
461 dsched_policy_enumerate(struct dsched_policy
*pol
)
464 return (TAILQ_FIRST(&dsched_policy_list
));
466 return (TAILQ_NEXT(pol
, link
));
470 dsched_cancel_bio(struct bio
*bp
)
472 bp
->bio_buf
->b_error
= ENXIO
;
473 bp
->bio_buf
->b_flags
|= B_ERROR
;
474 bp
->bio_buf
->b_resid
= bp
->bio_buf
->b_bcount
;
480 dsched_strategy_raw(struct disk
*dp
, struct bio
*bp
)
483 * Ideally, this stuff shouldn't be needed... but just in case, we leave it in
486 KASSERT(dp
->d_rawdev
!= NULL
, ("dsched_strategy_raw sees NULL d_rawdev!!"));
487 if(bp
->bio_track
!= NULL
) {
488 dsched_debug(LOG_INFO
,
489 "dsched_strategy_raw sees non-NULL bio_track!! "
491 bp
->bio_track
= NULL
;
493 dev_dstrategy(dp
->d_rawdev
, bp
);
497 dsched_strategy_sync(struct disk
*dp
, struct bio
*bio
)
499 struct buf
*bp
, *nbp
;
507 nbp
->b_cmd
= bp
->b_cmd
;
508 nbp
->b_bufsize
= bp
->b_bufsize
;
509 nbp
->b_runningbufspace
= bp
->b_runningbufspace
;
510 nbp
->b_bcount
= bp
->b_bcount
;
511 nbp
->b_resid
= bp
->b_resid
;
512 nbp
->b_data
= bp
->b_data
;
515 * Buffers undergoing device I/O do not need a kvabase/size.
517 nbp
->b_kvabase
= bp
->b_kvabase
;
518 nbp
->b_kvasize
= bp
->b_kvasize
;
520 nbp
->b_dirtyend
= bp
->b_dirtyend
;
522 nbio
->bio_done
= biodone_sync
;
523 nbio
->bio_flags
|= BIO_SYNC
;
524 nbio
->bio_track
= NULL
;
526 nbio
->bio_caller_info1
.ptr
= dp
;
527 nbio
->bio_offset
= bio
->bio_offset
;
529 dev_dstrategy(dp
->d_rawdev
, nbio
);
530 biowait(nbio
, "dschedsync");
531 bp
->b_resid
= nbp
->b_resid
;
532 bp
->b_error
= nbp
->b_error
;
535 nbp
->b_kvabase
= NULL
;
542 dsched_strategy_async(struct disk
*dp
, struct bio
*bio
, biodone_t
*done
, void *priv
)
546 nbio
= push_bio(bio
);
547 nbio
->bio_done
= done
;
548 nbio
->bio_offset
= bio
->bio_offset
;
550 dsched_set_bio_dp(nbio
, dp
);
551 dsched_set_bio_priv(nbio
, priv
);
553 getmicrotime(&nbio
->bio_caller_info3
.tv
);
554 dev_dstrategy(dp
->d_rawdev
, nbio
);
558 dsched_disk_ctx_ref(struct dsched_disk_ctx
*diskctx
)
562 refcount
= atomic_fetchadd_int(&diskctx
->refcount
, 1);
564 KKASSERT(refcount
>= 0);
568 dsched_thread_io_ref(struct dsched_thread_io
*tdio
)
572 refcount
= atomic_fetchadd_int(&tdio
->refcount
, 1);
574 KKASSERT(refcount
>= 0);
578 dsched_thread_ctx_ref(struct dsched_thread_ctx
*tdctx
)
582 refcount
= atomic_fetchadd_int(&tdctx
->refcount
, 1);
584 KKASSERT(refcount
>= 0);
588 dsched_disk_ctx_unref(struct dsched_disk_ctx
*diskctx
)
590 struct dsched_thread_io
*tdio
, *tdio2
;
593 refcount
= atomic_fetchadd_int(&diskctx
->refcount
, -1);
596 KKASSERT(refcount
>= 0 || refcount
<= -0x400);
599 atomic_subtract_int(&diskctx
->refcount
, 0x400); /* mark as: in destruction */
601 kprintf("diskctx (%p) destruction started, trace:\n", diskctx
);
604 lockmgr(&diskctx
->lock
, LK_EXCLUSIVE
);
605 TAILQ_FOREACH_MUTABLE(tdio
, &diskctx
->tdio_list
, dlink
, tdio2
) {
606 TAILQ_REMOVE(&diskctx
->tdio_list
, tdio
, dlink
);
607 tdio
->flags
&= ~DSCHED_LINKED_DISK_CTX
;
608 dsched_thread_io_unref(tdio
);
610 lockmgr(&diskctx
->lock
, LK_RELEASE
);
611 if (diskctx
->dp
->d_sched_policy
->destroy_diskctx
)
612 diskctx
->dp
->d_sched_policy
->destroy_diskctx(diskctx
);
613 objcache_put(dsched_diskctx_cache
, diskctx
);
614 atomic_subtract_int(&dsched_stats
.diskctx_allocations
, 1);
619 dsched_thread_io_unref(struct dsched_thread_io
*tdio
)
621 struct dsched_thread_ctx
*tdctx
;
622 struct dsched_disk_ctx
*diskctx
;
625 refcount
= atomic_fetchadd_int(&tdio
->refcount
, -1);
627 KKASSERT(refcount
>= 0 || refcount
<= -0x400);
630 atomic_subtract_int(&tdio
->refcount
, 0x400); /* mark as: in destruction */
632 kprintf("tdio (%p) destruction started, trace:\n", tdio
);
635 diskctx
= tdio
->diskctx
;
636 KKASSERT(diskctx
!= NULL
);
637 KKASSERT(tdio
->qlength
== 0);
639 if (tdio
->flags
& DSCHED_LINKED_DISK_CTX
) {
640 lockmgr(&diskctx
->lock
, LK_EXCLUSIVE
);
642 TAILQ_REMOVE(&diskctx
->tdio_list
, tdio
, dlink
);
643 tdio
->flags
&= ~DSCHED_LINKED_DISK_CTX
;
645 lockmgr(&diskctx
->lock
, LK_RELEASE
);
648 if (tdio
->flags
& DSCHED_LINKED_THREAD_CTX
) {
650 KKASSERT(tdctx
!= NULL
);
652 lockmgr(&tdctx
->lock
, LK_EXCLUSIVE
);
654 TAILQ_REMOVE(&tdctx
->tdio_list
, tdio
, link
);
655 tdio
->flags
&= ~DSCHED_LINKED_THREAD_CTX
;
657 lockmgr(&tdctx
->lock
, LK_RELEASE
);
659 if (tdio
->diskctx
->dp
->d_sched_policy
->destroy_tdio
)
660 tdio
->diskctx
->dp
->d_sched_policy
->destroy_tdio(tdio
);
661 objcache_put(dsched_tdio_cache
, tdio
);
662 atomic_subtract_int(&dsched_stats
.tdio_allocations
, 1);
664 dsched_disk_ctx_unref(diskctx
);
670 dsched_thread_ctx_unref(struct dsched_thread_ctx
*tdctx
)
672 struct dsched_thread_io
*tdio
, *tdio2
;
675 refcount
= atomic_fetchadd_int(&tdctx
->refcount
, -1);
677 KKASSERT(refcount
>= 0 || refcount
<= -0x400);
680 atomic_subtract_int(&tdctx
->refcount
, 0x400); /* mark as: in destruction */
682 kprintf("tdctx (%p) destruction started, trace:\n", tdctx
);
685 DSCHED_GLOBAL_THREAD_CTX_LOCK();
687 TAILQ_FOREACH_MUTABLE(tdio
, &tdctx
->tdio_list
, link
, tdio2
) {
688 TAILQ_REMOVE(&tdctx
->tdio_list
, tdio
, link
);
689 tdio
->flags
&= ~DSCHED_LINKED_THREAD_CTX
;
690 dsched_thread_io_unref(tdio
);
692 TAILQ_REMOVE(&dsched_tdctx_list
, tdctx
, link
);
694 DSCHED_GLOBAL_THREAD_CTX_UNLOCK();
696 objcache_put(dsched_tdctx_cache
, tdctx
);
697 atomic_subtract_int(&dsched_stats
.tdctx_allocations
, 1);
702 struct dsched_thread_io
*
703 dsched_thread_io_alloc(struct disk
*dp
, struct dsched_thread_ctx
*tdctx
,
704 struct dsched_policy
*pol
)
706 struct dsched_thread_io
*tdio
;
708 dsched_disk_ctx_ref(dsched_get_disk_priv(dp
));
710 tdio
= objcache_get(dsched_tdio_cache
, M_WAITOK
);
711 bzero(tdio
, DSCHED_THREAD_IO_MAX_SZ
);
713 /* XXX: maybe we do need another ref for the disk list for tdio */
714 dsched_thread_io_ref(tdio
);
716 DSCHED_THREAD_IO_LOCKINIT(tdio
);
719 tdio
->diskctx
= dsched_get_disk_priv(dp
);
720 TAILQ_INIT(&tdio
->queue
);
725 TAILQ_INSERT_TAIL(&tdio
->diskctx
->tdio_list
, tdio
, dlink
);
726 tdio
->flags
|= DSCHED_LINKED_DISK_CTX
;
732 /* Put the tdio in the tdctx list */
733 DSCHED_THREAD_CTX_LOCK(tdctx
);
734 TAILQ_INSERT_TAIL(&tdctx
->tdio_list
, tdio
, link
);
735 DSCHED_THREAD_CTX_UNLOCK(tdctx
);
736 tdio
->flags
|= DSCHED_LINKED_THREAD_CTX
;
739 atomic_add_int(&dsched_stats
.tdio_allocations
, 1);
744 struct dsched_disk_ctx
*
745 dsched_disk_ctx_alloc(struct disk
*dp
, struct dsched_policy
*pol
)
747 struct dsched_disk_ctx
*diskctx
;
749 diskctx
= objcache_get(dsched_diskctx_cache
, M_WAITOK
);
750 bzero(diskctx
, DSCHED_DISK_CTX_MAX_SZ
);
751 dsched_disk_ctx_ref(diskctx
);
753 DSCHED_DISK_CTX_LOCKINIT(diskctx
);
754 TAILQ_INIT(&diskctx
->tdio_list
);
756 atomic_add_int(&dsched_stats
.diskctx_allocations
, 1);
757 if (pol
->new_diskctx
)
758 pol
->new_diskctx(diskctx
);
763 struct dsched_thread_ctx
*
764 dsched_thread_ctx_alloc(struct proc
*p
)
766 struct dsched_thread_ctx
*tdctx
;
767 struct dsched_thread_io
*tdio
;
768 struct disk
*dp
= NULL
;
770 tdctx
= objcache_get(dsched_tdctx_cache
, M_WAITOK
);
771 bzero(tdctx
, DSCHED_THREAD_CTX_MAX_SZ
);
772 dsched_thread_ctx_ref(tdctx
);
774 kprintf("dsched_thread_ctx_alloc, new tdctx = %p\n", tdctx
);
776 DSCHED_THREAD_CTX_LOCKINIT(tdctx
);
777 TAILQ_INIT(&tdctx
->tdio_list
);
781 while ((dp
= disk_enumerate(dp
))) {
782 tdio
= dsched_thread_io_alloc(dp
, tdctx
, dp
->d_sched_policy
);
785 DSCHED_GLOBAL_THREAD_CTX_LOCK();
786 TAILQ_INSERT_TAIL(&dsched_tdctx_list
, tdctx
, link
);
787 DSCHED_GLOBAL_THREAD_CTX_UNLOCK();
789 atomic_add_int(&dsched_stats
.tdctx_allocations
, 1);
790 /* XXX: no callback here */
795 policy_new(struct disk
*dp
, struct dsched_policy
*pol
) {
796 struct dsched_thread_ctx
*tdctx
;
797 struct dsched_disk_ctx
*diskctx
;
798 struct dsched_thread_io
*tdio
;
800 diskctx
= dsched_disk_ctx_alloc(dp
, pol
);
801 dsched_disk_ctx_ref(diskctx
);
802 dsched_set_disk_priv(dp
, diskctx
);
804 DSCHED_GLOBAL_THREAD_CTX_LOCK();
805 TAILQ_FOREACH(tdctx
, &dsched_tdctx_list
, link
) {
806 tdio
= dsched_thread_io_alloc(dp
, tdctx
, pol
);
808 DSCHED_GLOBAL_THREAD_CTX_UNLOCK();
813 policy_destroy(struct disk
*dp
) {
814 struct dsched_disk_ctx
*diskctx
;
816 diskctx
= dsched_get_disk_priv(dp
);
817 KKASSERT(diskctx
!= NULL
);
819 dsched_disk_ctx_unref(diskctx
); /* from prepare */
820 dsched_disk_ctx_unref(diskctx
); /* from alloc */
822 dsched_set_disk_priv(dp
, NULL
);
826 dsched_new_buf(struct buf
*bp
)
828 struct dsched_thread_ctx
*tdctx
= NULL
;
830 if (dsched_inited
== 0)
833 if (curproc
!= NULL
) {
834 tdctx
= dsched_get_proc_priv(curproc
);
836 /* This is a kernel thread, so no proc info is available */
837 tdctx
= dsched_get_thread_priv(curthread
);
842 * XXX: hack. we don't want this assert because we aren't catching all
843 * threads. mi_startup() is still getting away without an tdctx.
846 /* by now we should have an tdctx. if not, something bad is going on */
847 KKASSERT(tdctx
!= NULL
);
851 dsched_thread_ctx_ref(tdctx
);
853 dsched_set_buf_priv(bp
, tdctx
);
857 dsched_exit_buf(struct buf
*bp
)
859 struct dsched_thread_ctx
*tdctx
;
861 tdctx
= dsched_get_buf_priv(bp
);
863 dsched_clr_buf_priv(bp
);
864 dsched_thread_ctx_unref(tdctx
);
869 dsched_new_proc(struct proc
*p
)
871 struct dsched_thread_ctx
*tdctx
;
873 if (dsched_inited
== 0)
878 tdctx
= dsched_thread_ctx_alloc(p
);
880 dsched_thread_ctx_ref(tdctx
);
882 dsched_set_proc_priv(p
, tdctx
);
883 atomic_add_int(&dsched_stats
.nprocs
, 1);
888 dsched_new_thread(struct thread
*td
)
890 struct dsched_thread_ctx
*tdctx
;
892 if (dsched_inited
== 0)
895 KKASSERT(td
!= NULL
);
897 tdctx
= dsched_thread_ctx_alloc(NULL
);
899 dsched_thread_ctx_ref(tdctx
);
901 dsched_set_thread_priv(td
, tdctx
);
902 atomic_add_int(&dsched_stats
.nthreads
, 1);
906 dsched_exit_proc(struct proc
*p
)
908 struct dsched_thread_ctx
*tdctx
;
910 if (dsched_inited
== 0)
915 tdctx
= dsched_get_proc_priv(p
);
916 KKASSERT(tdctx
!= NULL
);
918 tdctx
->dead
= 0xDEAD;
919 dsched_set_proc_priv(p
, NULL
);
921 dsched_thread_ctx_unref(tdctx
); /* one for alloc, */
922 dsched_thread_ctx_unref(tdctx
); /* one for ref */
923 atomic_subtract_int(&dsched_stats
.nprocs
, 1);
928 dsched_exit_thread(struct thread
*td
)
930 struct dsched_thread_ctx
*tdctx
;
932 if (dsched_inited
== 0)
935 KKASSERT(td
!= NULL
);
937 tdctx
= dsched_get_thread_priv(td
);
938 KKASSERT(tdctx
!= NULL
);
940 tdctx
->dead
= 0xDEAD;
941 dsched_set_thread_priv(td
, 0);
943 dsched_thread_ctx_unref(tdctx
); /* one for alloc, */
944 dsched_thread_ctx_unref(tdctx
); /* one for ref */
945 atomic_subtract_int(&dsched_stats
.nthreads
, 1);
948 struct dsched_thread_io
*
949 dsched_new_policy_thread_tdio(struct dsched_disk_ctx
*diskctx
,
950 struct dsched_policy
*pol
) {
951 struct dsched_thread_ctx
*tdctx
;
952 struct dsched_thread_io
*tdio
;
954 tdctx
= dsched_get_thread_priv(curthread
);
955 KKASSERT(tdctx
!= NULL
);
957 tdio
= dsched_thread_io_alloc(diskctx
->dp
, tdctx
, pol
);
961 /* DEFAULT NOOP POLICY */
964 noop_prepare(struct dsched_disk_ctx
*diskctx
)
970 noop_teardown(struct dsched_disk_ctx
*diskctx
)
976 noop_cancel(struct dsched_disk_ctx
*diskctx
)
982 noop_queue(struct dsched_disk_ctx
*diskctx
, struct dsched_thread_io
*tdio
,
985 dsched_strategy_raw(diskctx
->dp
, bio
);
987 dsched_strategy_async(diskctx
->dp
, bio
, noop_completed
, NULL
);
998 dsched_tdio_cache
= objcache_create("dsched-tdio-cache", 0, 0,
1000 objcache_malloc_alloc
,
1001 objcache_malloc_free
,
1002 &dsched_thread_io_malloc_args
);
1004 dsched_tdctx_cache
= objcache_create("dsched-tdctx-cache", 0, 0,
1006 objcache_malloc_alloc
,
1007 objcache_malloc_free
,
1008 &dsched_thread_ctx_malloc_args
);
1010 dsched_diskctx_cache
= objcache_create("dsched-diskctx-cache", 0, 0,
1012 objcache_malloc_alloc
,
1013 objcache_malloc_free
,
1014 &dsched_disk_ctx_malloc_args
);
1016 bzero(&dsched_stats
, sizeof(struct dsched_stats
));
1018 lockinit(&dsched_lock
, "dsched lock", 0, LK_CANRECURSE
);
1019 DSCHED_GLOBAL_THREAD_CTX_LOCKINIT();
1021 dsched_register(&dsched_noop_policy
);
1031 SYSINIT(subr_dsched_register
, SI_SUB_CREATE_INIT
-1, SI_ORDER_FIRST
, dsched_init
, NULL
);
1032 SYSUNINIT(subr_dsched_register
, SI_SUB_CREATE_INIT
-1, SI_ORDER_ANY
, dsched_uninit
, NULL
);
1038 sysctl_dsched_stats(SYSCTL_HANDLER_ARGS
)
1040 return (sysctl_handle_opaque(oidp
, &dsched_stats
, sizeof(struct dsched_stats
), req
));
1044 sysctl_dsched_list_policies(SYSCTL_HANDLER_ARGS
)
1046 struct dsched_policy
*pol
= NULL
;
1047 int error
, first
= 1;
1049 lockmgr(&dsched_lock
, LK_EXCLUSIVE
);
1051 while ((pol
= dsched_policy_enumerate(pol
))) {
1053 error
= SYSCTL_OUT(req
, " ", 1);
1059 error
= SYSCTL_OUT(req
, pol
->name
, strlen(pol
->name
));
1065 lockmgr(&dsched_lock
, LK_RELEASE
);
1067 error
= SYSCTL_OUT(req
, "", 1);
1073 sysctl_dsched_policy(SYSCTL_HANDLER_ARGS
)
1075 char buf
[DSCHED_POLICY_NAME_LENGTH
];
1076 struct dsched_disk_ctx
*diskctx
= arg1
;
1077 struct dsched_policy
*pol
= NULL
;
1080 if (diskctx
== NULL
) {
1084 lockmgr(&dsched_lock
, LK_EXCLUSIVE
);
1086 pol
= diskctx
->dp
->d_sched_policy
;
1087 memcpy(buf
, pol
->name
, DSCHED_POLICY_NAME_LENGTH
);
1089 error
= sysctl_handle_string(oidp
, buf
, DSCHED_POLICY_NAME_LENGTH
, req
);
1090 if (error
|| req
->newptr
== NULL
) {
1091 lockmgr(&dsched_lock
, LK_RELEASE
);
1095 pol
= dsched_find_policy(buf
);
1097 lockmgr(&dsched_lock
, LK_RELEASE
);
1101 dsched_switch(diskctx
->dp
, pol
);
1103 lockmgr(&dsched_lock
, LK_RELEASE
);
1109 sysctl_dsched_default_policy(SYSCTL_HANDLER_ARGS
)
1111 char buf
[DSCHED_POLICY_NAME_LENGTH
];
1112 struct dsched_policy
*pol
= NULL
;
1115 lockmgr(&dsched_lock
, LK_EXCLUSIVE
);
1117 pol
= default_policy
;
1118 memcpy(buf
, pol
->name
, DSCHED_POLICY_NAME_LENGTH
);
1120 error
= sysctl_handle_string(oidp
, buf
, DSCHED_POLICY_NAME_LENGTH
, req
);
1121 if (error
|| req
->newptr
== NULL
) {
1122 lockmgr(&dsched_lock
, LK_RELEASE
);
1126 pol
= dsched_find_policy(buf
);
1128 lockmgr(&dsched_lock
, LK_RELEASE
);
1133 default_policy
= pol
;
1135 lockmgr(&dsched_lock
, LK_RELEASE
);
1140 SYSCTL_NODE(, OID_AUTO
, dsched
, CTLFLAG_RD
, NULL
,
1141 "Disk Scheduler Framework (dsched) magic");
1142 SYSCTL_NODE(_dsched
, OID_AUTO
, policy
, CTLFLAG_RW
, NULL
,
1143 "List of disks and their policies");
1144 SYSCTL_INT(_dsched
, OID_AUTO
, debug
, CTLFLAG_RW
, &dsched_debug_enable
,
1145 0, "Enable dsched debugging");
1146 SYSCTL_PROC(_dsched
, OID_AUTO
, stats
, CTLTYPE_OPAQUE
|CTLFLAG_RD
,
1147 0, sizeof(struct dsched_stats
), sysctl_dsched_stats
, "dsched_stats",
1148 "dsched statistics");
1149 SYSCTL_PROC(_dsched
, OID_AUTO
, policies
, CTLTYPE_STRING
|CTLFLAG_RD
,
1150 NULL
, 0, sysctl_dsched_list_policies
, "A", "names of available policies");
1151 SYSCTL_PROC(_dsched_policy
, OID_AUTO
, default, CTLTYPE_STRING
|CTLFLAG_RW
,
1152 NULL
, 0, sysctl_dsched_default_policy
, "A", "default dsched policy");
1155 dsched_sysctl_add_disk(struct dsched_disk_ctx
*diskctx
, char *name
)
1157 if (!(diskctx
->flags
& DSCHED_SYSCTL_CTX_INITED
)) {
1158 diskctx
->flags
|= DSCHED_SYSCTL_CTX_INITED
;
1159 sysctl_ctx_init(&diskctx
->sysctl_ctx
);
1162 SYSCTL_ADD_PROC(&diskctx
->sysctl_ctx
, SYSCTL_STATIC_CHILDREN(_dsched_policy
),
1163 OID_AUTO
, name
, CTLTYPE_STRING
|CTLFLAG_RW
,
1164 diskctx
, 0, sysctl_dsched_policy
, "A", "policy");