2 * Copyright (c) 2003 Matthew Dillon <dillon@backplane.com> All rights reserved.
3 * cdevsw from kern/kern_conf.c Copyright (c) 1995 Terrence R. Lambert
4 * cdevsw from kern/kern_conf.c Copyright (c) 1995 Julian R. Elishcer,
6 * Copyright (c) 1982, 1986, 1991, 1993
7 * The Regents of the University of California. All rights reserved.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/kernel.h>
34 #include <sys/sysctl.h>
35 #include <sys/module.h>
36 #include <sys/malloc.h>
40 #include <sys/vnode.h>
41 #include <sys/queue.h>
42 #include <sys/device.h>
44 #include <sys/syslink_rpc.h>
46 #include <machine/stdarg.h>
47 #include <sys/devfs.h>
48 #include <sys/dsched.h>
50 #include <sys/thread2.h>
51 #include <sys/mplock2.h>
53 static int mpsafe_writes
;
54 static int mplock_writes
;
55 static int mpsafe_reads
;
56 static int mplock_reads
;
57 static int mpsafe_strategies
;
58 static int mplock_strategies
;
60 SYSCTL_INT(_kern
, OID_AUTO
, mpsafe_writes
, CTLFLAG_RD
, &mpsafe_writes
,
62 SYSCTL_INT(_kern
, OID_AUTO
, mplock_writes
, CTLFLAG_RD
, &mplock_writes
,
63 0, "non-mpsafe writes");
64 SYSCTL_INT(_kern
, OID_AUTO
, mpsafe_reads
, CTLFLAG_RD
, &mpsafe_reads
,
66 SYSCTL_INT(_kern
, OID_AUTO
, mplock_reads
, CTLFLAG_RD
, &mplock_reads
,
67 0, "non-mpsafe reads");
68 SYSCTL_INT(_kern
, OID_AUTO
, mpsafe_strategies
, CTLFLAG_RD
, &mpsafe_strategies
,
69 0, "mpsafe strategies");
70 SYSCTL_INT(_kern
, OID_AUTO
, mplock_strategies
, CTLFLAG_RD
, &mplock_strategies
,
71 0, "non-mpsafe strategies");
74 * system link descriptors identify the command in the
75 * arguments structure.
77 #define DDESCNAME(name) __CONCAT(__CONCAT(dev_,name),_desc)
79 #define DEVOP_DESC_INIT(name) \
80 struct syslink_desc DDESCNAME(name) = { \
81 __offsetof(struct dev_ops, __CONCAT(d_, name)), \
84 DEVOP_DESC_INIT(default);
85 DEVOP_DESC_INIT(open
);
86 DEVOP_DESC_INIT(close
);
87 DEVOP_DESC_INIT(read
);
88 DEVOP_DESC_INIT(write
);
89 DEVOP_DESC_INIT(ioctl
);
90 DEVOP_DESC_INIT(dump
);
91 DEVOP_DESC_INIT(psize
);
92 DEVOP_DESC_INIT(mmap
);
93 DEVOP_DESC_INIT(mmap_single
);
94 DEVOP_DESC_INIT(strategy
);
95 DEVOP_DESC_INIT(kqfilter
);
96 DEVOP_DESC_INIT(revoke
);
97 DEVOP_DESC_INIT(clone
);
102 struct dev_ops dead_dev_ops
;
104 static d_open_t noopen
;
105 static d_close_t noclose
;
106 static d_read_t noread
;
107 static d_write_t nowrite
;
108 static d_ioctl_t noioctl
;
109 static d_mmap_t nommap
;
110 static d_mmap_single_t nommap_single
;
111 static d_strategy_t nostrategy
;
112 static d_dump_t nodump
;
113 static d_psize_t nopsize
;
114 static d_kqfilter_t nokqfilter
;
115 static d_clone_t noclone
;
116 static d_revoke_t norevoke
;
118 struct dev_ops default_dev_ops
= {
120 .d_default
= NULL
, /* must be NULL */
127 .d_mmap_single
= nommap_single
,
128 .d_strategy
= nostrategy
,
131 .d_kqfilter
= nokqfilter
,
132 .d_revoke
= norevoke
,
138 dev_needmplock(cdev_t dev
)
140 return((dev
->si_ops
->head
.flags
& D_MPSAFE
) == 0);
143 /************************************************************************
144 * GENERAL DEVICE API FUNCTIONS *
145 ************************************************************************
147 * The MPSAFEness of these depends on dev->si_ops->head.flags
150 dev_dopen(cdev_t dev
, int oflags
, int devtype
, struct ucred
*cred
, struct file
*fp
)
152 struct dev_open_args ap
;
153 int needmplock
= dev_needmplock(dev
);
156 ap
.a_head
.a_desc
= &dev_open_desc
;
157 ap
.a_head
.a_dev
= dev
;
158 ap
.a_oflags
= oflags
;
159 ap
.a_devtype
= devtype
;
165 error
= dev
->si_ops
->d_open(&ap
);
172 dev_dclose(cdev_t dev
, int fflag
, int devtype
, struct file
*fp
)
174 struct dev_close_args ap
;
175 int needmplock
= dev_needmplock(dev
);
178 ap
.a_head
.a_desc
= &dev_close_desc
;
179 ap
.a_head
.a_dev
= dev
;
181 ap
.a_devtype
= devtype
;
186 error
= dev
->si_ops
->d_close(&ap
);
193 dev_dread(cdev_t dev
, struct uio
*uio
, int ioflag
, struct file
*fp
)
195 struct dev_read_args ap
;
196 int needmplock
= dev_needmplock(dev
);
199 ap
.a_head
.a_desc
= &dev_read_desc
;
200 ap
.a_head
.a_dev
= dev
;
202 ap
.a_ioflag
= ioflag
;
211 error
= dev
->si_ops
->d_read(&ap
);
215 dev
->si_lastread
= time_uptime
;
220 dev_dwrite(cdev_t dev
, struct uio
*uio
, int ioflag
, struct file
*fp
)
222 struct dev_write_args ap
;
223 int needmplock
= dev_needmplock(dev
);
226 dev
->si_lastwrite
= time_uptime
;
227 ap
.a_head
.a_desc
= &dev_write_desc
;
228 ap
.a_head
.a_dev
= dev
;
230 ap
.a_ioflag
= ioflag
;
239 error
= dev
->si_ops
->d_write(&ap
);
246 dev_dioctl(cdev_t dev
, u_long cmd
, caddr_t data
, int fflag
, struct ucred
*cred
,
247 struct sysmsg
*msg
, struct file
*fp
)
249 struct dev_ioctl_args ap
;
250 int needmplock
= dev_needmplock(dev
);
253 ap
.a_head
.a_desc
= &dev_ioctl_desc
;
254 ap
.a_head
.a_dev
= dev
;
264 error
= dev
->si_ops
->d_ioctl(&ap
);
271 dev_dmmap(cdev_t dev
, vm_offset_t offset
, int nprot
, struct file
*fp
)
273 struct dev_mmap_args ap
;
274 int needmplock
= dev_needmplock(dev
);
277 ap
.a_head
.a_desc
= &dev_mmap_desc
;
278 ap
.a_head
.a_dev
= dev
;
279 ap
.a_offset
= offset
;
285 error
= dev
->si_ops
->d_mmap(&ap
);
295 dev_dmmap_single(cdev_t dev
, vm_ooffset_t
*offset
, vm_size_t size
,
296 struct vm_object
**object
, int nprot
, struct file
*fp
)
298 struct dev_mmap_single_args ap
;
299 int needmplock
= dev_needmplock(dev
);
302 ap
.a_head
.a_desc
= &dev_mmap_single_desc
;
303 ap
.a_head
.a_dev
= dev
;
304 ap
.a_offset
= offset
;
306 ap
.a_object
= object
;
312 error
= dev
->si_ops
->d_mmap_single(&ap
);
320 dev_dclone(cdev_t dev
)
322 struct dev_clone_args ap
;
323 int needmplock
= dev_needmplock(dev
);
326 ap
.a_head
.a_desc
= &dev_clone_desc
;
327 ap
.a_head
.a_dev
= dev
;
331 error
= dev
->si_ops
->d_clone(&ap
);
338 dev_drevoke(cdev_t dev
)
340 struct dev_revoke_args ap
;
341 int needmplock
= dev_needmplock(dev
);
344 ap
.a_head
.a_desc
= &dev_revoke_desc
;
345 ap
.a_head
.a_dev
= dev
;
349 error
= dev
->si_ops
->d_revoke(&ap
);
357 * Core device strategy call, used to issue I/O on a device. There are
358 * two versions, a non-chained version and a chained version. The chained
359 * version reuses a BIO set up by vn_strategy(). The only difference is
360 * that, for now, we do not push a new tracking structure when chaining
361 * from vn_strategy. XXX this will ultimately have to change.
364 dev_dstrategy(cdev_t dev
, struct bio
*bio
)
366 struct dev_strategy_args ap
;
367 struct bio_track
*track
;
368 int needmplock
= dev_needmplock(dev
);
370 ap
.a_head
.a_desc
= &dev_strategy_desc
;
371 ap
.a_head
.a_dev
= dev
;
374 KKASSERT(bio
->bio_track
== NULL
);
375 KKASSERT(bio
->bio_buf
->b_cmd
!= BUF_CMD_DONE
);
376 if (bio
->bio_buf
->b_cmd
== BUF_CMD_READ
)
377 track
= &dev
->si_track_read
;
379 track
= &dev
->si_track_write
;
380 bio_track_ref(track
);
381 bio
->bio_track
= track
;
383 if (dsched_is_clear_buf_priv(bio
->bio_buf
))
384 dsched_new_buf(bio
->bio_buf
);
386 KKASSERT((bio
->bio_flags
& BIO_DONE
) == 0);
393 (void)dev
->si_ops
->d_strategy(&ap
);
399 dev_dstrategy_chain(cdev_t dev
, struct bio
*bio
)
401 struct dev_strategy_args ap
;
402 int needmplock
= dev_needmplock(dev
);
404 ap
.a_head
.a_desc
= &dev_strategy_desc
;
405 ap
.a_head
.a_dev
= dev
;
408 KKASSERT(bio
->bio_track
!= NULL
);
409 KKASSERT((bio
->bio_flags
& BIO_DONE
) == 0);
412 (void)dev
->si_ops
->d_strategy(&ap
);
418 * note: the disk layer is expected to set count, blkno, and secsize before
419 * forwarding the message.
422 dev_ddump(cdev_t dev
, void *virtual, vm_offset_t physical
, off_t offset
,
425 struct dev_dump_args ap
;
426 int needmplock
= dev_needmplock(dev
);
429 ap
.a_head
.a_desc
= &dev_dump_desc
;
430 ap
.a_head
.a_dev
= dev
;
434 ap
.a_virtual
= virtual;
435 ap
.a_physical
= physical
;
436 ap
.a_offset
= offset
;
437 ap
.a_length
= length
;
441 error
= dev
->si_ops
->d_dump(&ap
);
448 dev_dpsize(cdev_t dev
)
450 struct dev_psize_args ap
;
451 int needmplock
= dev_needmplock(dev
);
454 ap
.a_head
.a_desc
= &dev_psize_desc
;
455 ap
.a_head
.a_dev
= dev
;
459 error
= dev
->si_ops
->d_psize(&ap
);
464 return (ap
.a_result
);
469 * Pass-thru to the device kqfilter.
471 * NOTE: We explicitly preset a_result to 0 so d_kqfilter() functions
472 * which return 0 do not have to bother setting a_result.
475 dev_dkqfilter(cdev_t dev
, struct knote
*kn
, struct file
*fp
)
477 struct dev_kqfilter_args ap
;
478 int needmplock
= dev_needmplock(dev
);
481 ap
.a_head
.a_desc
= &dev_kqfilter_desc
;
482 ap
.a_head
.a_dev
= dev
;
489 error
= dev
->si_ops
->d_kqfilter(&ap
);
498 /************************************************************************
499 * DEVICE HELPER FUNCTIONS *
500 ************************************************************************/
506 dev_drefs(cdev_t dev
)
508 return(dev
->si_sysref
.refcnt
);
515 dev_dname(cdev_t dev
)
517 return(dev
->si_ops
->head
.name
);
524 dev_dflags(cdev_t dev
)
526 return(dev
->si_ops
->head
.flags
);
535 return(dev
->si_ops
->head
.maj
);
539 * Used when forwarding a request through layers. The caller adjusts
540 * ap->a_head.a_dev and then calls this function.
543 dev_doperate(struct dev_generic_args
*ap
)
545 int (*func
)(struct dev_generic_args
*);
546 int needmplock
= dev_needmplock(ap
->a_dev
);
549 func
= *(void **)((char *)ap
->a_dev
->si_ops
+ ap
->a_desc
->sd_offset
);
561 * Used by the console intercept code only. Issue an operation through
562 * a foreign ops structure allowing the ops structure associated
563 * with the device to remain intact.
566 dev_doperate_ops(struct dev_ops
*ops
, struct dev_generic_args
*ap
)
568 int (*func
)(struct dev_generic_args
*);
569 int needmplock
= ((ops
->head
.flags
& D_MPSAFE
) == 0);
572 func
= *(void **)((char *)ops
+ ap
->a_desc
->sd_offset
);
584 * Convert a template dev_ops into the real thing by filling in
585 * uninitialized fields.
588 compile_dev_ops(struct dev_ops
*ops
)
592 for (offset
= offsetof(struct dev_ops
, dev_ops_first_field
);
593 offset
<= offsetof(struct dev_ops
, dev_ops_last_field
);
594 offset
+= sizeof(void *)
596 void **func_p
= (void **)((char *)ops
+ offset
);
597 void **def_p
= (void **)((char *)&default_dev_ops
+ offset
);
598 if (*func_p
== NULL
) {
600 *func_p
= ops
->d_default
;
607 /************************************************************************
608 * MAJOR/MINOR SPACE FUNCTION *
609 ************************************************************************/
612 * This makes a dev_ops entry visible to userland (e.g /dev/<blah>).
614 * Disk devices typically register their major, e.g. 'ad0', and then call
615 * into the disk label management code which overloads its own onto e.g. 'ad0'
616 * to support all the various slice and partition combinations.
618 * The mask/match supplied in this call are a full 32 bits and the same
619 * mask and match must be specified in a later dev_ops_remove() call to
620 * match this add. However, the match value for the minor number should never
621 * have any bits set in the major number's bit range (8-15). The mask value
622 * may be conveniently specified as -1 without creating any major number
628 rb_dev_ops_compare(struct dev_ops_maj
*a
, struct dev_ops_maj
*b
)
632 else if (a
->maj
> b
->maj
)
637 RB_GENERATE2(dev_ops_rb_tree
, dev_ops_maj
, rbnode
, rb_dev_ops_compare
, int, maj
);
639 struct dev_ops_rb_tree dev_ops_rbhead
= RB_INITIALIZER(dev_ops_rbhead
);
642 dev_ops_remove_all(struct dev_ops
*ops
)
644 return devfs_destroy_dev_by_ops(ops
, -1);
648 dev_ops_remove_minor(struct dev_ops
*ops
, int minor
)
650 return devfs_destroy_dev_by_ops(ops
, minor
);
654 dev_ops_intercept(cdev_t dev
, struct dev_ops
*iops
)
656 struct dev_ops
*oops
= dev
->si_ops
;
658 compile_dev_ops(iops
);
659 iops
->head
.maj
= oops
->head
.maj
;
660 iops
->head
.data
= oops
->head
.data
;
661 iops
->head
.flags
= oops
->head
.flags
;
663 dev
->si_flags
|= SI_INTERCEPTED
;
669 dev_ops_restore(cdev_t dev
, struct dev_ops
*oops
)
671 struct dev_ops
*iops
= dev
->si_ops
;
674 dev
->si_flags
&= ~SI_INTERCEPTED
;
676 iops
->head
.data
= NULL
;
677 iops
->head
.flags
= 0;
680 /************************************************************************
681 * DEFAULT DEV OPS FUNCTIONS *
682 ************************************************************************/
686 * Unsupported devswitch functions (e.g. for writing to read-only device).
687 * XXX may belong elsewhere.
690 norevoke(struct dev_revoke_args
*ap
)
697 noclone(struct dev_clone_args
*ap
)
700 return (0); /* allow the clone */
704 noopen(struct dev_open_args
*ap
)
710 noclose(struct dev_close_args
*ap
)
716 noread(struct dev_read_args
*ap
)
722 nowrite(struct dev_write_args
*ap
)
728 noioctl(struct dev_ioctl_args
*ap
)
734 nokqfilter(struct dev_kqfilter_args
*ap
)
740 nommap(struct dev_mmap_args
*ap
)
746 nommap_single(struct dev_mmap_single_args
*ap
)
752 nostrategy(struct dev_strategy_args
*ap
)
754 struct bio
*bio
= ap
->a_bio
;
756 bio
->bio_buf
->b_flags
|= B_ERROR
;
757 bio
->bio_buf
->b_error
= EOPNOTSUPP
;
763 nopsize(struct dev_psize_args
*ap
)
770 nodump(struct dev_dump_args
*ap
)
776 * XXX this is probably bogus. Any device that uses it isn't checking the
780 nullopen(struct dev_open_args
*ap
)
786 nullclose(struct dev_close_args
*ap
)