2 * Copyright (c) 2003 Matthew Dillon <dillon@backplane.com> All rights reserved.
3 * cdevsw from kern/kern_conf.c Copyright (c) 1995 Terrence R. Lambert
4 * cdevsw from kern/kern_conf.c Copyright (c) 1995 Julian R. Elishcer,
6 * Copyright (c) 1982, 1986, 1991, 1993
7 * The Regents of the University of California. All rights reserved.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * $DragonFly: src/sys/kern/kern_device.c,v 1.27 2007/07/23 18:59:50 dillon Exp $
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/kernel.h>
35 #include <sys/sysctl.h>
36 #include <sys/systm.h>
37 #include <sys/module.h>
38 #include <sys/malloc.h>
42 #include <sys/vnode.h>
43 #include <sys/queue.h>
44 #include <sys/device.h>
46 #include <sys/syslink_rpc.h>
48 #include <machine/stdarg.h>
49 #include <sys/thread2.h>
52 * system link descriptors identify the command in the
53 * arguments structure.
55 #define DDESCNAME(name) __CONCAT(__CONCAT(dev_,name),_desc)
57 #define DEVOP_DESC_INIT(name) \
58 struct syslink_desc DDESCNAME(name) = { \
59 __offsetof(struct dev_ops, __CONCAT(d_, name)), \
62 DEVOP_DESC_INIT(default);
63 DEVOP_DESC_INIT(open
);
64 DEVOP_DESC_INIT(close
);
65 DEVOP_DESC_INIT(read
);
66 DEVOP_DESC_INIT(write
);
67 DEVOP_DESC_INIT(ioctl
);
68 DEVOP_DESC_INIT(dump
);
69 DEVOP_DESC_INIT(psize
);
70 DEVOP_DESC_INIT(poll
);
71 DEVOP_DESC_INIT(mmap
);
72 DEVOP_DESC_INIT(strategy
);
73 DEVOP_DESC_INIT(kqfilter
);
74 DEVOP_DESC_INIT(revoke
);
75 DEVOP_DESC_INIT(clone
);
80 struct dev_ops dead_dev_ops
;
82 struct dev_ops default_dev_ops
= {
84 .d_default
= NULL
, /* must be NULL */
92 .d_strategy
= nostrategy
,
95 .d_kqfilter
= nokqfilter
,
100 /************************************************************************
101 * GENERAL DEVICE API FUNCTIONS *
102 ************************************************************************/
105 dev_dopen(cdev_t dev
, int oflags
, int devtype
, struct ucred
*cred
)
107 struct dev_open_args ap
;
109 ap
.a_head
.a_desc
= &dev_open_desc
;
110 ap
.a_head
.a_dev
= dev
;
111 ap
.a_oflags
= oflags
;
112 ap
.a_devtype
= devtype
;
114 return(dev
->si_ops
->d_open(&ap
));
118 dev_dclose(cdev_t dev
, int fflag
, int devtype
)
120 struct dev_close_args ap
;
122 ap
.a_head
.a_desc
= &dev_close_desc
;
123 ap
.a_head
.a_dev
= dev
;
125 ap
.a_devtype
= devtype
;
126 return(dev
->si_ops
->d_close(&ap
));
130 dev_dread(cdev_t dev
, struct uio
*uio
, int ioflag
)
132 struct dev_read_args ap
;
135 ap
.a_head
.a_desc
= &dev_read_desc
;
136 ap
.a_head
.a_dev
= dev
;
138 ap
.a_ioflag
= ioflag
;
139 error
= dev
->si_ops
->d_read(&ap
);
141 dev
->si_lastread
= time_second
;
146 dev_dwrite(cdev_t dev
, struct uio
*uio
, int ioflag
)
148 struct dev_write_args ap
;
151 dev
->si_lastwrite
= time_second
;
152 ap
.a_head
.a_desc
= &dev_write_desc
;
153 ap
.a_head
.a_dev
= dev
;
155 ap
.a_ioflag
= ioflag
;
156 error
= dev
->si_ops
->d_write(&ap
);
161 dev_dioctl(cdev_t dev
, u_long cmd
, caddr_t data
, int fflag
, struct ucred
*cred
)
163 struct dev_ioctl_args ap
;
165 ap
.a_head
.a_desc
= &dev_ioctl_desc
;
166 ap
.a_head
.a_dev
= dev
;
171 return(dev
->si_ops
->d_ioctl(&ap
));
175 dev_dpoll(cdev_t dev
, int events
)
177 struct dev_poll_args ap
;
180 ap
.a_head
.a_desc
= &dev_poll_desc
;
181 ap
.a_head
.a_dev
= dev
;
182 ap
.a_events
= events
;
183 error
= dev
->si_ops
->d_poll(&ap
);
186 return (seltrue(dev
, events
));
190 dev_dmmap(cdev_t dev
, vm_offset_t offset
, int nprot
)
192 struct dev_mmap_args ap
;
195 ap
.a_head
.a_desc
= &dev_mmap_desc
;
196 ap
.a_head
.a_dev
= dev
;
197 ap
.a_offset
= offset
;
199 error
= dev
->si_ops
->d_mmap(&ap
);
206 dev_dclone(cdev_t dev
)
208 struct dev_clone_args ap
;
210 ap
.a_head
.a_desc
= &dev_clone_desc
;
211 ap
.a_head
.a_dev
= dev
;
212 return (dev
->si_ops
->d_clone(&ap
));
216 dev_drevoke(cdev_t dev
)
218 struct dev_revoke_args ap
;
220 ap
.a_head
.a_desc
= &dev_revoke_desc
;
221 ap
.a_head
.a_dev
= dev
;
222 return (dev
->si_ops
->d_revoke(&ap
));
226 * Core device strategy call, used to issue I/O on a device. There are
227 * two versions, a non-chained version and a chained version. The chained
228 * version reuses a BIO set up by vn_strategy(). The only difference is
229 * that, for now, we do not push a new tracking structure when chaining
230 * from vn_strategy. XXX this will ultimately have to change.
233 dev_dstrategy(cdev_t dev
, struct bio
*bio
)
235 struct dev_strategy_args ap
;
236 struct bio_track
*track
;
238 ap
.a_head
.a_desc
= &dev_strategy_desc
;
239 ap
.a_head
.a_dev
= dev
;
242 KKASSERT(bio
->bio_track
== NULL
);
243 KKASSERT(bio
->bio_buf
->b_cmd
!= BUF_CMD_DONE
);
244 if (bio
->bio_buf
->b_cmd
== BUF_CMD_READ
)
245 track
= &dev
->si_track_read
;
247 track
= &dev
->si_track_write
;
248 atomic_add_int(&track
->bk_active
, 1);
249 bio
->bio_track
= track
;
250 (void)dev
->si_ops
->d_strategy(&ap
);
254 dev_dstrategy_chain(cdev_t dev
, struct bio
*bio
)
256 struct dev_strategy_args ap
;
258 KKASSERT(bio
->bio_track
!= NULL
);
259 ap
.a_head
.a_desc
= &dev_strategy_desc
;
260 ap
.a_head
.a_dev
= dev
;
262 (void)dev
->si_ops
->d_strategy(&ap
);
266 * note: the disk layer is expected to set count, blkno, and secsize before
267 * forwarding the message.
270 dev_ddump(cdev_t dev
)
272 struct dev_dump_args ap
;
274 ap
.a_head
.a_desc
= &dev_dump_desc
;
275 ap
.a_head
.a_dev
= dev
;
279 return(dev
->si_ops
->d_dump(&ap
));
283 dev_dpsize(cdev_t dev
)
285 struct dev_psize_args ap
;
288 ap
.a_head
.a_desc
= &dev_psize_desc
;
289 ap
.a_head
.a_dev
= dev
;
290 error
= dev
->si_ops
->d_psize(&ap
);
292 return (ap
.a_result
);
297 dev_dkqfilter(cdev_t dev
, struct knote
*kn
)
299 struct dev_kqfilter_args ap
;
302 ap
.a_head
.a_desc
= &dev_kqfilter_desc
;
303 ap
.a_head
.a_dev
= dev
;
305 error
= dev
->si_ops
->d_kqfilter(&ap
);
311 /************************************************************************
312 * DEVICE HELPER FUNCTIONS *
313 ************************************************************************/
316 dev_drefs(cdev_t dev
)
318 return(dev
->si_sysref
.refcnt
);
322 dev_dname(cdev_t dev
)
324 return(dev
->si_ops
->head
.name
);
328 dev_dflags(cdev_t dev
)
330 return(dev
->si_ops
->head
.flags
);
336 return(dev
->si_ops
->head
.maj
);
340 * Used when forwarding a request through layers. The caller adjusts
341 * ap->a_head.a_dev and then calls this function.
344 dev_doperate(struct dev_generic_args
*ap
)
346 int (*func
)(struct dev_generic_args
*);
348 func
= *(void **)((char *)ap
->a_dev
->si_ops
+ ap
->a_desc
->sd_offset
);
353 * Used by the console intercept code only. Issue an operation through
354 * a foreign ops structure allowing the ops structure associated
355 * with the device to remain intact.
358 dev_doperate_ops(struct dev_ops
*ops
, struct dev_generic_args
*ap
)
360 int (*func
)(struct dev_generic_args
*);
362 func
= *(void **)((char *)ops
+ ap
->a_desc
->sd_offset
);
367 * Convert a template dev_ops into the real thing by filling in
368 * uninitialized fields.
371 compile_dev_ops(struct dev_ops
*ops
)
375 for (offset
= offsetof(struct dev_ops
, dev_ops_first_field
);
376 offset
<= offsetof(struct dev_ops
, dev_ops_last_field
);
377 offset
+= sizeof(void *)
379 void **func_p
= (void **)((char *)ops
+ offset
);
380 void **def_p
= (void **)((char *)&default_dev_ops
+ offset
);
381 if (*func_p
== NULL
) {
383 *func_p
= ops
->d_default
;
390 /************************************************************************
391 * MAJOR/MINOR SPACE FUNCTION *
392 ************************************************************************/
395 * This makes a dev_ops entry visible to userland (e.g /dev/<blah>).
397 * The kernel can overload a data space by making multiple dev_ops_add()
398 * calls, but only the most recent one in the list matching the mask/match
399 * will be visible to userland.
401 * make_dev() does not automatically call dev_ops_add() (nor do we want it
402 * to, since partition-managed disk devices are overloaded on top of the
405 * Disk devices typically register their major, e.g. 'ad0', and then call
406 * into the disk label management code which overloads its own onto e.g. 'ad0'
407 * to support all the various slice and partition combinations.
409 * The mask/match supplied in this call are a full 32 bits and the same
410 * mask and match must be specified in a later dev_ops_remove() call to
411 * match this add. However, the match value for the minor number should never
412 * have any bits set in the major number's bit range (8-15). The mask value
413 * may be conveniently specified as -1 without creating any major number
419 rb_dev_ops_compare(struct dev_ops_maj
*a
, struct dev_ops_maj
*b
)
423 else if (a
->maj
> b
->maj
)
428 RB_GENERATE2(dev_ops_rb_tree
, dev_ops_maj
, rbnode
, rb_dev_ops_compare
, int, maj
);
430 struct dev_ops_rb_tree dev_ops_rbhead
= RB_INITIALIZER(dev_ops_rbhead
);
433 dev_ops_add(struct dev_ops
*ops
, u_int mask
, u_int match
)
435 static int next_maj
= 256; /* first dynamic major number */
436 struct dev_ops_maj
*rbmaj
;
437 struct dev_ops_link
*link
;
439 compile_dev_ops(ops
);
440 if (ops
->head
.maj
< 0) {
441 while (dev_ops_rb_tree_RB_LOOKUP(&dev_ops_rbhead
, next_maj
) != NULL
) {
445 ops
->head
.maj
= next_maj
;
447 rbmaj
= dev_ops_rb_tree_RB_LOOKUP(&dev_ops_rbhead
, ops
->head
.maj
);
449 rbmaj
= kmalloc(sizeof(*rbmaj
), M_DEVBUF
, M_INTWAIT
| M_ZERO
);
450 rbmaj
->maj
= ops
->head
.maj
;
451 dev_ops_rb_tree_RB_INSERT(&dev_ops_rbhead
, rbmaj
);
453 for (link
= rbmaj
->link
; link
; link
= link
->next
) {
455 * If we get an exact match we usurp the target, but we only print
456 * a warning message if a different device switch is installed.
458 if (link
->mask
== mask
&& link
->match
== match
) {
459 if (link
->ops
!= ops
) {
460 kprintf("WARNING: \"%s\" (%p) is usurping \"%s\"'s"
463 link
->ops
->head
.name
, link
->ops
);
470 * XXX add additional warnings for overlaps
474 link
= kmalloc(sizeof(struct dev_ops_link
), M_DEVBUF
, M_INTWAIT
|M_ZERO
);
478 link
->next
= rbmaj
->link
;
485 * Should only be used by udev2dev().
487 * If the minor number is -1, we match the first ops we find for this
488 * major. If the mask is not -1 then multiple minor numbers can match
491 * Note that this function will return NULL if the minor number is not within
492 * the bounds of the installed mask(s).
494 * The specified minor number should NOT include any major bits.
497 dev_ops_get(int x
, int y
)
499 struct dev_ops_maj
*rbmaj
;
500 struct dev_ops_link
*link
;
502 rbmaj
= dev_ops_rb_tree_RB_LOOKUP(&dev_ops_rbhead
, x
);
505 for (link
= rbmaj
->link
; link
; link
= link
->next
) {
506 if (y
== -1 || (link
->mask
& y
) == link
->match
)
513 * Take a cookie cutter to the major/minor device space for the passed
514 * device and generate a new dev_ops visible to userland which the caller
515 * can then modify. The original device is not modified but portions of
516 * its major/minor space will no longer be visible to userland.
519 dev_ops_add_override(cdev_t backing_dev
, struct dev_ops
*template,
520 u_int mask
, u_int match
)
523 struct dev_ops
*backing_ops
= backing_dev
->si_ops
;
525 ops
= kmalloc(sizeof(struct dev_ops
), M_DEVBUF
, M_INTWAIT
);
527 ops
->head
.name
= backing_ops
->head
.name
;
528 ops
->head
.maj
= backing_ops
->head
.maj
;
529 ops
->head
.flags
|= backing_ops
->head
.flags
& ~D_TRACKCLOSE
;
530 compile_dev_ops(ops
);
531 dev_ops_add(ops
, mask
, match
);
537 dev_ops_remove_override(struct dev_ops
*ops
, u_int mask
, u_int match
)
539 dev_ops_remove(ops
, mask
, match
);
540 if (ops
->head
.refs
) {
541 kprintf("dev_ops_remove_override: %s still has %d refs!\n",
542 ops
->head
.name
, ops
->head
.refs
);
544 bzero(ops
, sizeof(*ops
));
545 kfree(ops
, M_DEVBUF
);
550 * Remove all matching dev_ops entries from the dev_ops_array[] major
551 * array so no new user opens can be performed, and destroy all devices
552 * installed in the hash table that are associated with this dev_ops. (see
553 * destroy_all_devs()).
555 * The mask and match should match a previous call to dev_ops_add*().
558 dev_ops_remove(struct dev_ops
*ops
, u_int mask
, u_int match
)
560 struct dev_ops_maj
*rbmaj
;
561 struct dev_ops_link
*link
;
562 struct dev_ops_link
**plink
;
564 if (ops
!= &dead_dev_ops
)
565 destroy_all_devs(ops
, mask
, match
);
567 rbmaj
= dev_ops_rb_tree_RB_LOOKUP(&dev_ops_rbhead
, ops
->head
.maj
);
569 kprintf("double-remove of dev_ops %p for %s(%d)\n",
570 ops
, ops
->head
.name
, ops
->head
.maj
);
573 for (plink
= &rbmaj
->link
; (link
= *plink
) != NULL
;
574 plink
= &link
->next
) {
575 if (link
->mask
== mask
&& link
->match
== match
) {
576 if (link
->ops
== ops
)
578 kprintf("%s: ERROR: cannot remove dev_ops, "
579 "its major number %d was stolen by %s\n",
580 ops
->head
.name
, ops
->head
.maj
,
586 kprintf("%s(%d)[%08x/%08x]: WARNING: ops removed "
588 ops
->head
.name
, ops
->head
.maj
, mask
, match
);
591 --ops
->head
.refs
; /* XXX ops_release() / record refs */
592 kfree(link
, M_DEVBUF
);
596 * Scrap the RB tree node for the major number if no ops are
597 * installed any longer.
599 if (rbmaj
->link
== NULL
) {
600 dev_ops_rb_tree_RB_REMOVE(&dev_ops_rbhead
, rbmaj
);
601 kfree(rbmaj
, M_DEVBUF
);
606 * The same ops might be used with multiple devices, so don't
607 * complain if the ref count is non-zero.
609 if (ops
->head
.refs
!= 0) {
610 kprintf("%s(%d)[%08x/%08x]: Warning: dev_ops_remove() called "
611 "while %d device refs still exist!\n",
612 ops
->head
.name
, ops
->head
.maj
, mask
, match
,
616 kprintf("%s: ops removed\n", ops
->head
.name
);
623 * dev_ops_scan() - Issue a callback for all installed dev_ops structures.
625 * The scan will terminate if a callback returns a negative number.
627 struct dev_ops_scan_info
{
628 int (*callback
)(struct dev_ops
*, void *);
634 dev_ops_scan_callback(struct dev_ops_maj
*rbmaj
, void *arg
)
636 struct dev_ops_scan_info
*info
= arg
;
637 struct dev_ops_link
*link
;
641 for (link
= rbmaj
->link
; link
; link
= link
->next
) {
642 r
= info
->callback(link
->ops
, info
->arg
);
651 dev_ops_scan(int (*callback
)(struct dev_ops
*, void *), void *arg
)
653 struct dev_ops_scan_info info
= { callback
, arg
};
655 return (dev_ops_rb_tree_RB_SCAN(&dev_ops_rbhead
, NULL
,
656 dev_ops_scan_callback
, &info
));
661 * Release a ops entry. When the ref count reaches zero, recurse
665 dev_ops_release(struct dev_ops
*ops
)
668 if (ops
->head
.refs
== 0) {
674 dev_ops_intercept(cdev_t dev
, struct dev_ops
*iops
)
676 struct dev_ops
*oops
= dev
->si_ops
;
678 compile_dev_ops(iops
);
679 iops
->head
.maj
= oops
->head
.maj
;
680 iops
->head
.data
= oops
->head
.data
;
681 iops
->head
.flags
= oops
->head
.flags
;
683 dev
->si_flags
|= SI_INTERCEPTED
;
689 dev_ops_restore(cdev_t dev
, struct dev_ops
*oops
)
691 struct dev_ops
*iops
= dev
->si_ops
;
694 dev
->si_flags
&= ~SI_INTERCEPTED
;
696 iops
->head
.data
= NULL
;
697 iops
->head
.flags
= 0;
700 /************************************************************************
701 * DEFAULT DEV OPS FUNCTIONS *
702 ************************************************************************/
706 * Unsupported devswitch functions (e.g. for writing to read-only device).
707 * XXX may belong elsewhere.
710 norevoke(struct dev_revoke_args
*ap
)
717 noclone(struct dev_clone_args
*ap
)
720 return (0); /* allow the clone */
724 noopen(struct dev_open_args
*ap
)
730 noclose(struct dev_close_args
*ap
)
736 noread(struct dev_read_args
*ap
)
742 nowrite(struct dev_write_args
*ap
)
748 noioctl(struct dev_ioctl_args
*ap
)
754 nokqfilter(struct dev_kqfilter_args
*ap
)
760 nommap(struct dev_mmap_args
*ap
)
766 nopoll(struct dev_poll_args
*ap
)
773 nostrategy(struct dev_strategy_args
*ap
)
775 struct bio
*bio
= ap
->a_bio
;
777 bio
->bio_buf
->b_flags
|= B_ERROR
;
778 bio
->bio_buf
->b_error
= EOPNOTSUPP
;
784 nopsize(struct dev_psize_args
*ap
)
791 nodump(struct dev_dump_args
*ap
)
797 * XXX this is probably bogus. Any device that uses it isn't checking the
801 nullopen(struct dev_open_args
*ap
)
807 nullclose(struct dev_close_args
*ap
)