2 * Copyright (c) 2003 Matthew Dillon <dillon@backplane.com> All rights reserved.
3 * cdevsw from kern/kern_conf.c Copyright (c) 1995 Terrence R. Lambert
4 * cdevsw from kern/kern_conf.c Copyright (c) 1995 Julian R. Elishcer,
6 * Copyright (c) 1982, 1986, 1991, 1993
7 * The Regents of the University of California. All rights reserved.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * $DragonFly: src/sys/kern/kern_device.c,v 1.27 2007/07/23 18:59:50 dillon Exp $
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/kernel.h>
35 #include <sys/sysctl.h>
36 #include <sys/systm.h>
37 #include <sys/module.h>
38 #include <sys/malloc.h>
42 #include <sys/vnode.h>
43 #include <sys/queue.h>
44 #include <sys/device.h>
46 #include <sys/syslink_rpc.h>
48 #include <machine/stdarg.h>
49 #include <sys/thread2.h>
52 * system link descriptors identify the command in the
53 * arguments structure.
55 #define DDESCNAME(name) __CONCAT(__CONCAT(dev_,name),_desc)
57 #define DEVOP_DESC_INIT(name) \
58 struct syslink_desc DDESCNAME(name) = { \
59 __offsetof(struct dev_ops, __CONCAT(d_, name)), \
62 DEVOP_DESC_INIT(default);
63 DEVOP_DESC_INIT(open
);
64 DEVOP_DESC_INIT(close
);
65 DEVOP_DESC_INIT(read
);
66 DEVOP_DESC_INIT(write
);
67 DEVOP_DESC_INIT(ioctl
);
68 DEVOP_DESC_INIT(dump
);
69 DEVOP_DESC_INIT(psize
);
70 DEVOP_DESC_INIT(poll
);
71 DEVOP_DESC_INIT(mmap
);
72 DEVOP_DESC_INIT(strategy
);
73 DEVOP_DESC_INIT(kqfilter
);
74 DEVOP_DESC_INIT(clone
);
79 struct dev_ops dead_dev_ops
;
81 struct dev_ops default_dev_ops
= {
83 .d_default
= NULL
, /* must be NULL */
91 .d_strategy
= nostrategy
,
94 .d_kqfilter
= nokqfilter
,
98 /************************************************************************
99 * GENERAL DEVICE API FUNCTIONS *
100 ************************************************************************/
103 dev_dopen(cdev_t dev
, int oflags
, int devtype
, struct ucred
*cred
)
105 struct dev_open_args ap
;
107 ap
.a_head
.a_desc
= &dev_open_desc
;
108 ap
.a_head
.a_dev
= dev
;
109 ap
.a_oflags
= oflags
;
110 ap
.a_devtype
= devtype
;
112 return(dev
->si_ops
->d_open(&ap
));
116 dev_dclose(cdev_t dev
, int fflag
, int devtype
)
118 struct dev_close_args ap
;
120 ap
.a_head
.a_desc
= &dev_close_desc
;
121 ap
.a_head
.a_dev
= dev
;
123 ap
.a_devtype
= devtype
;
124 return(dev
->si_ops
->d_close(&ap
));
128 dev_dread(cdev_t dev
, struct uio
*uio
, int ioflag
)
130 struct dev_read_args ap
;
133 ap
.a_head
.a_desc
= &dev_read_desc
;
134 ap
.a_head
.a_dev
= dev
;
136 ap
.a_ioflag
= ioflag
;
137 error
= dev
->si_ops
->d_read(&ap
);
139 dev
->si_lastread
= time_second
;
144 dev_dwrite(cdev_t dev
, struct uio
*uio
, int ioflag
)
146 struct dev_write_args ap
;
149 dev
->si_lastwrite
= time_second
;
150 ap
.a_head
.a_desc
= &dev_write_desc
;
151 ap
.a_head
.a_dev
= dev
;
153 ap
.a_ioflag
= ioflag
;
154 error
= dev
->si_ops
->d_write(&ap
);
159 dev_dioctl(cdev_t dev
, u_long cmd
, caddr_t data
, int fflag
, struct ucred
*cred
)
161 struct dev_ioctl_args ap
;
163 ap
.a_head
.a_desc
= &dev_ioctl_desc
;
164 ap
.a_head
.a_dev
= dev
;
169 return(dev
->si_ops
->d_ioctl(&ap
));
173 dev_dpoll(cdev_t dev
, int events
)
175 struct dev_poll_args ap
;
178 ap
.a_head
.a_desc
= &dev_poll_desc
;
179 ap
.a_head
.a_dev
= dev
;
180 ap
.a_events
= events
;
181 error
= dev
->si_ops
->d_poll(&ap
);
184 return (seltrue(dev
, events
));
188 dev_dmmap(cdev_t dev
, vm_offset_t offset
, int nprot
)
190 struct dev_mmap_args ap
;
193 ap
.a_head
.a_desc
= &dev_mmap_desc
;
194 ap
.a_head
.a_dev
= dev
;
195 ap
.a_offset
= offset
;
197 error
= dev
->si_ops
->d_mmap(&ap
);
204 dev_dclone(cdev_t dev
)
206 struct dev_clone_args ap
;
208 ap
.a_head
.a_desc
= &dev_clone_desc
;
209 ap
.a_head
.a_dev
= dev
;
210 return (dev
->si_ops
->d_clone(&ap
));
214 * Core device strategy call, used to issue I/O on a device. There are
215 * two versions, a non-chained version and a chained version. The chained
216 * version reuses a BIO set up by vn_strategy(). The only difference is
217 * that, for now, we do not push a new tracking structure when chaining
218 * from vn_strategy. XXX this will ultimately have to change.
221 dev_dstrategy(cdev_t dev
, struct bio
*bio
)
223 struct dev_strategy_args ap
;
224 struct bio_track
*track
;
226 ap
.a_head
.a_desc
= &dev_strategy_desc
;
227 ap
.a_head
.a_dev
= dev
;
230 KKASSERT(bio
->bio_track
== NULL
);
231 KKASSERT(bio
->bio_buf
->b_cmd
!= BUF_CMD_DONE
);
232 if (bio
->bio_buf
->b_cmd
== BUF_CMD_READ
)
233 track
= &dev
->si_track_read
;
235 track
= &dev
->si_track_write
;
236 atomic_add_int(&track
->bk_active
, 1);
237 bio
->bio_track
= track
;
238 (void)dev
->si_ops
->d_strategy(&ap
);
242 dev_dstrategy_chain(cdev_t dev
, struct bio
*bio
)
244 struct dev_strategy_args ap
;
246 KKASSERT(bio
->bio_track
!= NULL
);
247 ap
.a_head
.a_desc
= &dev_strategy_desc
;
248 ap
.a_head
.a_dev
= dev
;
250 (void)dev
->si_ops
->d_strategy(&ap
);
254 * note: the disk layer is expected to set count, blkno, and secsize before
255 * forwarding the message.
258 dev_ddump(cdev_t dev
)
260 struct dev_dump_args ap
;
262 ap
.a_head
.a_desc
= &dev_dump_desc
;
263 ap
.a_head
.a_dev
= dev
;
267 return(dev
->si_ops
->d_dump(&ap
));
271 dev_dpsize(cdev_t dev
)
273 struct dev_psize_args ap
;
276 ap
.a_head
.a_desc
= &dev_psize_desc
;
277 ap
.a_head
.a_dev
= dev
;
278 error
= dev
->si_ops
->d_psize(&ap
);
280 return (ap
.a_result
);
285 dev_dkqfilter(cdev_t dev
, struct knote
*kn
)
287 struct dev_kqfilter_args ap
;
290 ap
.a_head
.a_desc
= &dev_kqfilter_desc
;
291 ap
.a_head
.a_dev
= dev
;
293 error
= dev
->si_ops
->d_kqfilter(&ap
);
299 /************************************************************************
300 * DEVICE HELPER FUNCTIONS *
301 ************************************************************************/
304 dev_drefs(cdev_t dev
)
306 return(dev
->si_sysref
.refcnt
);
310 dev_dname(cdev_t dev
)
312 return(dev
->si_ops
->head
.name
);
316 dev_dflags(cdev_t dev
)
318 return(dev
->si_ops
->head
.flags
);
324 return(dev
->si_ops
->head
.maj
);
328 * Used when forwarding a request through layers. The caller adjusts
329 * ap->a_head.a_dev and then calls this function.
332 dev_doperate(struct dev_generic_args
*ap
)
334 int (*func
)(struct dev_generic_args
*);
336 func
= *(void **)((char *)ap
->a_dev
->si_ops
+ ap
->a_desc
->sd_offset
);
341 * Used by the console intercept code only. Issue an operation through
342 * a foreign ops structure allowing the ops structure associated
343 * with the device to remain intact.
346 dev_doperate_ops(struct dev_ops
*ops
, struct dev_generic_args
*ap
)
348 int (*func
)(struct dev_generic_args
*);
350 func
= *(void **)((char *)ops
+ ap
->a_desc
->sd_offset
);
355 * Convert a template dev_ops into the real thing by filling in
356 * uninitialized fields.
359 compile_dev_ops(struct dev_ops
*ops
)
363 for (offset
= offsetof(struct dev_ops
, dev_ops_first_field
);
364 offset
<= offsetof(struct dev_ops
, dev_ops_last_field
);
365 offset
+= sizeof(void *)
367 void **func_p
= (void **)((char *)ops
+ offset
);
368 void **def_p
= (void **)((char *)&default_dev_ops
+ offset
);
369 if (*func_p
== NULL
) {
371 *func_p
= ops
->d_default
;
378 /************************************************************************
379 * MAJOR/MINOR SPACE FUNCTION *
380 ************************************************************************/
383 * This makes a dev_ops entry visible to userland (e.g /dev/<blah>).
385 * The kernel can overload a data space by making multiple dev_ops_add()
386 * calls, but only the most recent one in the list matching the mask/match
387 * will be visible to userland.
389 * make_dev() does not automatically call dev_ops_add() (nor do we want it
390 * to, since partition-managed disk devices are overloaded on top of the
393 * Disk devices typically register their major, e.g. 'ad0', and then call
394 * into the disk label management code which overloads its own onto e.g. 'ad0'
395 * to support all the various slice and partition combinations.
397 * The mask/match supplied in this call are a full 32 bits and the same
398 * mask and match must be specified in a later dev_ops_remove() call to
399 * match this add. However, the match value for the minor number should never
400 * have any bits set in the major number's bit range (8-15). The mask value
401 * may be conveniently specified as -1 without creating any major number
407 rb_dev_ops_compare(struct dev_ops_maj
*a
, struct dev_ops_maj
*b
)
411 else if (a
->maj
> b
->maj
)
416 RB_GENERATE2(dev_ops_rb_tree
, dev_ops_maj
, rbnode
, rb_dev_ops_compare
, int, maj
);
418 struct dev_ops_rb_tree dev_ops_rbhead
= RB_INITIALIZER(dev_ops_rbhead
);
421 dev_ops_add(struct dev_ops
*ops
, u_int mask
, u_int match
)
423 static int next_maj
= 256; /* first dynamic major number */
424 struct dev_ops_maj
*rbmaj
;
425 struct dev_ops_link
*link
;
427 compile_dev_ops(ops
);
428 if (ops
->head
.maj
< 0) {
429 while (dev_ops_rb_tree_RB_LOOKUP(&dev_ops_rbhead
, next_maj
) != NULL
) {
433 ops
->head
.maj
= next_maj
;
435 rbmaj
= dev_ops_rb_tree_RB_LOOKUP(&dev_ops_rbhead
, ops
->head
.maj
);
437 rbmaj
= kmalloc(sizeof(*rbmaj
), M_DEVBUF
, M_INTWAIT
| M_ZERO
);
438 rbmaj
->maj
= ops
->head
.maj
;
439 dev_ops_rb_tree_RB_INSERT(&dev_ops_rbhead
, rbmaj
);
441 for (link
= rbmaj
->link
; link
; link
= link
->next
) {
443 * If we get an exact match we usurp the target, but we only print
444 * a warning message if a different device switch is installed.
446 if (link
->mask
== mask
&& link
->match
== match
) {
447 if (link
->ops
!= ops
) {
448 kprintf("WARNING: \"%s\" (%p) is usurping \"%s\"'s"
451 link
->ops
->head
.name
, link
->ops
);
458 * XXX add additional warnings for overlaps
462 link
= kmalloc(sizeof(struct dev_ops_link
), M_DEVBUF
, M_INTWAIT
|M_ZERO
);
466 link
->next
= rbmaj
->link
;
473 * Should only be used by udev2dev().
475 * If the minor number is -1, we match the first ops we find for this
476 * major. If the mask is not -1 then multiple minor numbers can match
479 * Note that this function will return NULL if the minor number is not within
480 * the bounds of the installed mask(s).
482 * The specified minor number should NOT include any major bits.
485 dev_ops_get(int x
, int y
)
487 struct dev_ops_maj
*rbmaj
;
488 struct dev_ops_link
*link
;
490 rbmaj
= dev_ops_rb_tree_RB_LOOKUP(&dev_ops_rbhead
, x
);
493 for (link
= rbmaj
->link
; link
; link
= link
->next
) {
494 if (y
== -1 || (link
->mask
& y
) == link
->match
)
501 * Take a cookie cutter to the major/minor device space for the passed
502 * device and generate a new dev_ops visible to userland which the caller
503 * can then modify. The original device is not modified but portions of
504 * its major/minor space will no longer be visible to userland.
507 dev_ops_add_override(cdev_t backing_dev
, struct dev_ops
*template,
508 u_int mask
, u_int match
)
511 struct dev_ops
*backing_ops
= backing_dev
->si_ops
;
513 ops
= kmalloc(sizeof(struct dev_ops
), M_DEVBUF
, M_INTWAIT
);
515 ops
->head
.name
= backing_ops
->head
.name
;
516 ops
->head
.maj
= backing_ops
->head
.maj
;
517 ops
->head
.flags
|= backing_ops
->head
.flags
& ~D_TRACKCLOSE
;
518 compile_dev_ops(ops
);
519 dev_ops_add(ops
, mask
, match
);
525 * Remove all matching dev_ops entries from the dev_ops_array[] major
526 * array so no new user opens can be performed, and destroy all devices
527 * installed in the hash table that are associated with this dev_ops. (see
528 * destroy_all_devs()).
530 * The mask and match should match a previous call to dev_ops_add*().
533 dev_ops_remove(struct dev_ops
*ops
, u_int mask
, u_int match
)
535 struct dev_ops_maj
*rbmaj
;
536 struct dev_ops_link
*link
;
537 struct dev_ops_link
**plink
;
539 if (ops
!= &dead_dev_ops
)
540 destroy_all_devs(ops
, mask
, match
);
542 rbmaj
= dev_ops_rb_tree_RB_LOOKUP(&dev_ops_rbhead
, ops
->head
.maj
);
544 kprintf("double-remove of dev_ops %p for %s(%d)\n",
545 ops
, ops
->head
.name
, ops
->head
.maj
);
548 for (plink
= &rbmaj
->link
; (link
= *plink
) != NULL
;
549 plink
= &link
->next
) {
550 if (link
->mask
== mask
&& link
->match
== match
) {
551 if (link
->ops
== ops
)
553 kprintf("%s: ERROR: cannot remove dev_ops, "
554 "its major number %d was stolen by %s\n",
555 ops
->head
.name
, ops
->head
.maj
,
561 kprintf("%s(%d)[%08x/%08x]: WARNING: ops removed "
563 ops
->head
.name
, ops
->head
.maj
, mask
, match
);
566 --ops
->head
.refs
; /* XXX ops_release() / record refs */
567 kfree(link
, M_DEVBUF
);
571 * Scrap the RB tree node for the major number if no ops are
572 * installed any longer.
574 if (rbmaj
->link
== NULL
) {
575 dev_ops_rb_tree_RB_REMOVE(&dev_ops_rbhead
, rbmaj
);
576 kfree(rbmaj
, M_DEVBUF
);
579 if (ops
->head
.refs
!= 0) {
580 kprintf("%s(%d)[%08x/%08x]: Warning: dev_ops_remove() called "
581 "while %d device refs still exist!\n",
582 ops
->head
.name
, ops
->head
.maj
, mask
, match
,
586 kprintf("%s: ops removed\n", ops
->head
.name
);
592 * dev_ops_scan() - Issue a callback for all installed dev_ops structures.
594 * The scan will terminate if a callback returns a negative number.
596 struct dev_ops_scan_info
{
597 int (*callback
)(struct dev_ops
*, void *);
603 dev_ops_scan_callback(struct dev_ops_maj
*rbmaj
, void *arg
)
605 struct dev_ops_scan_info
*info
= arg
;
606 struct dev_ops_link
*link
;
610 for (link
= rbmaj
->link
; link
; link
= link
->next
) {
611 r
= info
->callback(link
->ops
, info
->arg
);
620 dev_ops_scan(int (*callback
)(struct dev_ops
*, void *), void *arg
)
622 struct dev_ops_scan_info info
= { callback
, arg
};
624 return (dev_ops_rb_tree_RB_SCAN(&dev_ops_rbhead
, NULL
,
625 dev_ops_scan_callback
, &info
));
630 * Release a ops entry. When the ref count reaches zero, recurse
634 dev_ops_release(struct dev_ops
*ops
)
637 if (ops
->head
.refs
== 0) {
643 dev_ops_intercept(cdev_t dev
, struct dev_ops
*iops
)
645 struct dev_ops
*oops
= dev
->si_ops
;
647 compile_dev_ops(iops
);
648 iops
->head
.maj
= oops
->head
.maj
;
649 iops
->head
.data
= oops
->head
.data
;
650 iops
->head
.flags
= oops
->head
.flags
;
652 dev
->si_flags
|= SI_INTERCEPTED
;
658 dev_ops_restore(cdev_t dev
, struct dev_ops
*oops
)
660 struct dev_ops
*iops
= dev
->si_ops
;
663 dev
->si_flags
&= ~SI_INTERCEPTED
;
665 iops
->head
.data
= NULL
;
666 iops
->head
.flags
= 0;
669 /************************************************************************
670 * DEFAULT DEV OPS FUNCTIONS *
671 ************************************************************************/
675 * Unsupported devswitch functions (e.g. for writing to read-only device).
676 * XXX may belong elsewhere.
680 noclone(struct dev_clone_args
*ap
)
683 return (0); /* allow the clone */
687 noopen(struct dev_open_args
*ap
)
693 noclose(struct dev_close_args
*ap
)
699 noread(struct dev_read_args
*ap
)
705 nowrite(struct dev_write_args
*ap
)
711 noioctl(struct dev_ioctl_args
*ap
)
717 nokqfilter(struct dev_kqfilter_args
*ap
)
723 nommap(struct dev_mmap_args
*ap
)
729 nopoll(struct dev_poll_args
*ap
)
736 nostrategy(struct dev_strategy_args
*ap
)
738 struct bio
*bio
= ap
->a_bio
;
740 bio
->bio_buf
->b_flags
|= B_ERROR
;
741 bio
->bio_buf
->b_error
= EOPNOTSUPP
;
747 nopsize(struct dev_psize_args
*ap
)
754 nodump(struct dev_dump_args
*ap
)
760 * XXX this is probably bogus. Any device that uses it isn't checking the
764 nullopen(struct dev_open_args
*ap
)
770 nullclose(struct dev_close_args
*ap
)