2 * Copyright (c) 2003 Matthew Dillon <dillon@backplane.com> All rights reserved.
3 * cdevsw from kern/kern_conf.c Copyright (c) 1995 Terrence R. Lambert
4 * cdevsw from kern/kern_conf.c Copyright (c) 1995 Julian R. Elishcer,
6 * Copyright (c) 1982, 1986, 1991, 1993
7 * The Regents of the University of California. All rights reserved.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/kernel.h>
34 #include <sys/sysctl.h>
35 #include <sys/module.h>
36 #include <sys/malloc.h>
40 #include <sys/vnode.h>
41 #include <sys/queue.h>
42 #include <sys/device.h>
44 #include <sys/syslink_rpc.h>
46 #include <machine/stdarg.h>
47 #include <sys/devfs.h>
48 #include <sys/dsched.h>
50 #include <sys/thread2.h>
51 #include <sys/mplock2.h>
53 static int mpsafe_writes
;
54 static int mplock_writes
;
55 static int mpsafe_reads
;
56 static int mplock_reads
;
57 static int mpsafe_strategies
;
58 static int mplock_strategies
;
60 SYSCTL_INT(_kern
, OID_AUTO
, mpsafe_writes
, CTLFLAG_RD
, &mpsafe_writes
,
62 SYSCTL_INT(_kern
, OID_AUTO
, mplock_writes
, CTLFLAG_RD
, &mplock_writes
,
63 0, "non-mpsafe writes");
64 SYSCTL_INT(_kern
, OID_AUTO
, mpsafe_reads
, CTLFLAG_RD
, &mpsafe_reads
,
66 SYSCTL_INT(_kern
, OID_AUTO
, mplock_reads
, CTLFLAG_RD
, &mplock_reads
,
67 0, "non-mpsafe reads");
68 SYSCTL_INT(_kern
, OID_AUTO
, mpsafe_strategies
, CTLFLAG_RD
, &mpsafe_strategies
,
69 0, "mpsafe strategies");
70 SYSCTL_INT(_kern
, OID_AUTO
, mplock_strategies
, CTLFLAG_RD
, &mplock_strategies
,
71 0, "non-mpsafe strategies");
74 * system link descriptors identify the command in the
75 * arguments structure.
77 #define DDESCNAME(name) __CONCAT(__CONCAT(dev_,name),_desc)
79 #define DEVOP_DESC_INIT(name) \
80 struct syslink_desc DDESCNAME(name) = { \
81 __offsetof(struct dev_ops, __CONCAT(d_, name)), \
84 DEVOP_DESC_INIT(default);
85 DEVOP_DESC_INIT(open
);
86 DEVOP_DESC_INIT(close
);
87 DEVOP_DESC_INIT(read
);
88 DEVOP_DESC_INIT(write
);
89 DEVOP_DESC_INIT(ioctl
);
90 DEVOP_DESC_INIT(dump
);
91 DEVOP_DESC_INIT(psize
);
92 DEVOP_DESC_INIT(mmap
);
93 DEVOP_DESC_INIT(strategy
);
94 DEVOP_DESC_INIT(kqfilter
);
95 DEVOP_DESC_INIT(revoke
);
96 DEVOP_DESC_INIT(clone
);
101 struct dev_ops dead_dev_ops
;
103 struct dev_ops default_dev_ops
= {
105 .d_default
= NULL
, /* must be NULL */
112 .d_strategy
= nostrategy
,
115 .d_kqfilter
= nokqfilter
,
116 .d_revoke
= norevoke
,
122 dev_needmplock(cdev_t dev
)
124 return((dev
->si_ops
->head
.flags
& D_MPSAFE
) == 0);
127 /************************************************************************
128 * GENERAL DEVICE API FUNCTIONS *
129 ************************************************************************
131 * The MPSAFEness of these depends on dev->si_ops->head.flags
134 dev_dopen(cdev_t dev
, int oflags
, int devtype
, struct ucred
*cred
)
136 struct dev_open_args ap
;
137 int needmplock
= dev_needmplock(dev
);
140 ap
.a_head
.a_desc
= &dev_open_desc
;
141 ap
.a_head
.a_dev
= dev
;
142 ap
.a_oflags
= oflags
;
143 ap
.a_devtype
= devtype
;
148 error
= dev
->si_ops
->d_open(&ap
);
155 dev_dclose(cdev_t dev
, int fflag
, int devtype
)
157 struct dev_close_args ap
;
158 int needmplock
= dev_needmplock(dev
);
161 ap
.a_head
.a_desc
= &dev_close_desc
;
162 ap
.a_head
.a_dev
= dev
;
164 ap
.a_devtype
= devtype
;
168 error
= dev
->si_ops
->d_close(&ap
);
175 dev_dread(cdev_t dev
, struct uio
*uio
, int ioflag
)
177 struct dev_read_args ap
;
178 int needmplock
= dev_needmplock(dev
);
181 ap
.a_head
.a_desc
= &dev_read_desc
;
182 ap
.a_head
.a_dev
= dev
;
184 ap
.a_ioflag
= ioflag
;
192 error
= dev
->si_ops
->d_read(&ap
);
196 dev
->si_lastread
= time_second
;
201 dev_dwrite(cdev_t dev
, struct uio
*uio
, int ioflag
)
203 struct dev_write_args ap
;
204 int needmplock
= dev_needmplock(dev
);
207 dev
->si_lastwrite
= time_second
;
208 ap
.a_head
.a_desc
= &dev_write_desc
;
209 ap
.a_head
.a_dev
= dev
;
211 ap
.a_ioflag
= ioflag
;
219 error
= dev
->si_ops
->d_write(&ap
);
226 dev_dioctl(cdev_t dev
, u_long cmd
, caddr_t data
, int fflag
, struct ucred
*cred
,
229 struct dev_ioctl_args ap
;
230 int needmplock
= dev_needmplock(dev
);
233 ap
.a_head
.a_desc
= &dev_ioctl_desc
;
234 ap
.a_head
.a_dev
= dev
;
243 error
= dev
->si_ops
->d_ioctl(&ap
);
250 dev_dmmap(cdev_t dev
, vm_offset_t offset
, int nprot
)
252 struct dev_mmap_args ap
;
253 int needmplock
= dev_needmplock(dev
);
256 ap
.a_head
.a_desc
= &dev_mmap_desc
;
257 ap
.a_head
.a_dev
= dev
;
258 ap
.a_offset
= offset
;
263 error
= dev
->si_ops
->d_mmap(&ap
);
273 dev_dclone(cdev_t dev
)
275 struct dev_clone_args ap
;
276 int needmplock
= dev_needmplock(dev
);
279 ap
.a_head
.a_desc
= &dev_clone_desc
;
280 ap
.a_head
.a_dev
= dev
;
284 error
= dev
->si_ops
->d_clone(&ap
);
291 dev_drevoke(cdev_t dev
)
293 struct dev_revoke_args ap
;
294 int needmplock
= dev_needmplock(dev
);
297 ap
.a_head
.a_desc
= &dev_revoke_desc
;
298 ap
.a_head
.a_dev
= dev
;
302 error
= dev
->si_ops
->d_revoke(&ap
);
310 * Core device strategy call, used to issue I/O on a device. There are
311 * two versions, a non-chained version and a chained version. The chained
312 * version reuses a BIO set up by vn_strategy(). The only difference is
313 * that, for now, we do not push a new tracking structure when chaining
314 * from vn_strategy. XXX this will ultimately have to change.
317 dev_dstrategy(cdev_t dev
, struct bio
*bio
)
319 struct dev_strategy_args ap
;
320 struct bio_track
*track
;
321 int needmplock
= dev_needmplock(dev
);
323 ap
.a_head
.a_desc
= &dev_strategy_desc
;
324 ap
.a_head
.a_dev
= dev
;
327 KKASSERT(bio
->bio_track
== NULL
);
328 KKASSERT(bio
->bio_buf
->b_cmd
!= BUF_CMD_DONE
);
329 if (bio
->bio_buf
->b_cmd
== BUF_CMD_READ
)
330 track
= &dev
->si_track_read
;
332 track
= &dev
->si_track_write
;
333 bio_track_ref(track
);
334 bio
->bio_track
= track
;
336 if (dsched_is_clear_buf_priv(bio
->bio_buf
))
337 dsched_new_buf(bio
->bio_buf
);
339 KKASSERT((bio
->bio_flags
& BIO_DONE
) == 0);
346 (void)dev
->si_ops
->d_strategy(&ap
);
352 dev_dstrategy_chain(cdev_t dev
, struct bio
*bio
)
354 struct dev_strategy_args ap
;
355 int needmplock
= dev_needmplock(dev
);
357 ap
.a_head
.a_desc
= &dev_strategy_desc
;
358 ap
.a_head
.a_dev
= dev
;
361 KKASSERT(bio
->bio_track
!= NULL
);
362 KKASSERT((bio
->bio_flags
& BIO_DONE
) == 0);
365 (void)dev
->si_ops
->d_strategy(&ap
);
371 * note: the disk layer is expected to set count, blkno, and secsize before
372 * forwarding the message.
375 dev_ddump(cdev_t dev
, void *virtual, vm_offset_t physical
, off_t offset
,
378 struct dev_dump_args ap
;
379 int needmplock
= dev_needmplock(dev
);
382 ap
.a_head
.a_desc
= &dev_dump_desc
;
383 ap
.a_head
.a_dev
= dev
;
387 ap
.a_virtual
= virtual;
388 ap
.a_physical
= physical
;
389 ap
.a_offset
= offset
;
390 ap
.a_length
= length
;
394 error
= dev
->si_ops
->d_dump(&ap
);
401 dev_dpsize(cdev_t dev
)
403 struct dev_psize_args ap
;
404 int needmplock
= dev_needmplock(dev
);
407 ap
.a_head
.a_desc
= &dev_psize_desc
;
408 ap
.a_head
.a_dev
= dev
;
412 error
= dev
->si_ops
->d_psize(&ap
);
417 return (ap
.a_result
);
422 * Pass-thru to the device kqfilter.
424 * NOTE: We explicitly preset a_result to 0 so d_kqfilter() functions
425 * which return 0 do not have to bother setting a_result.
428 dev_dkqfilter(cdev_t dev
, struct knote
*kn
)
430 struct dev_kqfilter_args ap
;
431 int needmplock
= dev_needmplock(dev
);
434 ap
.a_head
.a_desc
= &dev_kqfilter_desc
;
435 ap
.a_head
.a_dev
= dev
;
441 error
= dev
->si_ops
->d_kqfilter(&ap
);
450 /************************************************************************
451 * DEVICE HELPER FUNCTIONS *
452 ************************************************************************/
458 dev_drefs(cdev_t dev
)
460 return(dev
->si_sysref
.refcnt
);
467 dev_dname(cdev_t dev
)
469 return(dev
->si_ops
->head
.name
);
476 dev_dflags(cdev_t dev
)
478 return(dev
->si_ops
->head
.flags
);
487 return(dev
->si_ops
->head
.maj
);
491 * Used when forwarding a request through layers. The caller adjusts
492 * ap->a_head.a_dev and then calls this function.
495 dev_doperate(struct dev_generic_args
*ap
)
497 int (*func
)(struct dev_generic_args
*);
498 int needmplock
= dev_needmplock(ap
->a_dev
);
501 func
= *(void **)((char *)ap
->a_dev
->si_ops
+ ap
->a_desc
->sd_offset
);
513 * Used by the console intercept code only. Issue an operation through
514 * a foreign ops structure allowing the ops structure associated
515 * with the device to remain intact.
518 dev_doperate_ops(struct dev_ops
*ops
, struct dev_generic_args
*ap
)
520 int (*func
)(struct dev_generic_args
*);
521 int needmplock
= ((ops
->head
.flags
& D_MPSAFE
) == 0);
524 func
= *(void **)((char *)ops
+ ap
->a_desc
->sd_offset
);
536 * Convert a template dev_ops into the real thing by filling in
537 * uninitialized fields.
540 compile_dev_ops(struct dev_ops
*ops
)
544 for (offset
= offsetof(struct dev_ops
, dev_ops_first_field
);
545 offset
<= offsetof(struct dev_ops
, dev_ops_last_field
);
546 offset
+= sizeof(void *)
548 void **func_p
= (void **)((char *)ops
+ offset
);
549 void **def_p
= (void **)((char *)&default_dev_ops
+ offset
);
550 if (*func_p
== NULL
) {
552 *func_p
= ops
->d_default
;
559 /************************************************************************
560 * MAJOR/MINOR SPACE FUNCTION *
561 ************************************************************************/
564 * This makes a dev_ops entry visible to userland (e.g /dev/<blah>).
566 * Disk devices typically register their major, e.g. 'ad0', and then call
567 * into the disk label management code which overloads its own onto e.g. 'ad0'
568 * to support all the various slice and partition combinations.
570 * The mask/match supplied in this call are a full 32 bits and the same
571 * mask and match must be specified in a later dev_ops_remove() call to
572 * match this add. However, the match value for the minor number should never
573 * have any bits set in the major number's bit range (8-15). The mask value
574 * may be conveniently specified as -1 without creating any major number
580 rb_dev_ops_compare(struct dev_ops_maj
*a
, struct dev_ops_maj
*b
)
584 else if (a
->maj
> b
->maj
)
589 RB_GENERATE2(dev_ops_rb_tree
, dev_ops_maj
, rbnode
, rb_dev_ops_compare
, int, maj
);
591 struct dev_ops_rb_tree dev_ops_rbhead
= RB_INITIALIZER(dev_ops_rbhead
);
594 dev_ops_remove_all(struct dev_ops
*ops
)
596 return devfs_destroy_dev_by_ops(ops
, -1);
600 dev_ops_remove_minor(struct dev_ops
*ops
, int minor
)
602 return devfs_destroy_dev_by_ops(ops
, minor
);
606 dev_ops_intercept(cdev_t dev
, struct dev_ops
*iops
)
608 struct dev_ops
*oops
= dev
->si_ops
;
610 compile_dev_ops(iops
);
611 iops
->head
.maj
= oops
->head
.maj
;
612 iops
->head
.data
= oops
->head
.data
;
613 iops
->head
.flags
= oops
->head
.flags
;
615 dev
->si_flags
|= SI_INTERCEPTED
;
621 dev_ops_restore(cdev_t dev
, struct dev_ops
*oops
)
623 struct dev_ops
*iops
= dev
->si_ops
;
626 dev
->si_flags
&= ~SI_INTERCEPTED
;
628 iops
->head
.data
= NULL
;
629 iops
->head
.flags
= 0;
632 /************************************************************************
633 * DEFAULT DEV OPS FUNCTIONS *
634 ************************************************************************/
638 * Unsupported devswitch functions (e.g. for writing to read-only device).
639 * XXX may belong elsewhere.
642 norevoke(struct dev_revoke_args
*ap
)
649 noclone(struct dev_clone_args
*ap
)
652 return (0); /* allow the clone */
656 noopen(struct dev_open_args
*ap
)
662 noclose(struct dev_close_args
*ap
)
668 noread(struct dev_read_args
*ap
)
674 nowrite(struct dev_write_args
*ap
)
680 noioctl(struct dev_ioctl_args
*ap
)
686 nokqfilter(struct dev_kqfilter_args
*ap
)
692 nommap(struct dev_mmap_args
*ap
)
698 nostrategy(struct dev_strategy_args
*ap
)
700 struct bio
*bio
= ap
->a_bio
;
702 bio
->bio_buf
->b_flags
|= B_ERROR
;
703 bio
->bio_buf
->b_error
= EOPNOTSUPP
;
709 nopsize(struct dev_psize_args
*ap
)
716 nodump(struct dev_dump_args
*ap
)
722 * XXX this is probably bogus. Any device that uses it isn't checking the
726 nullopen(struct dev_open_args
*ap
)
732 nullclose(struct dev_close_args
*ap
)