2 * Copyright (c) 2003 Matthew Dillon <dillon@backplane.com> All rights reserved.
3 * cdevsw from kern/kern_conf.c Copyright (c) 1995 Terrence R. Lambert
4 * cdevsw from kern/kern_conf.c Copyright (c) 1995 Julian R. Elishcer,
6 * Copyright (c) 1982, 1986, 1991, 1993
7 * The Regents of the University of California. All rights reserved.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/kernel.h>
34 #include <sys/sysctl.h>
35 #include <sys/module.h>
36 #include <sys/malloc.h>
40 #include <sys/vnode.h>
41 #include <sys/queue.h>
42 #include <sys/device.h>
44 #include <sys/syslink_rpc.h>
46 #include <sys/dsched.h>
47 #include <sys/devfs.h>
49 #include <machine/stdarg.h>
51 #include <sys/thread2.h>
52 #include <sys/mplock2.h>
54 static int mpsafe_writes
;
55 static int mplock_writes
;
56 static int mpsafe_reads
;
57 static int mplock_reads
;
58 static int mpsafe_strategies
;
59 static int mplock_strategies
;
61 SYSCTL_INT(_kern
, OID_AUTO
, mpsafe_writes
, CTLFLAG_RD
, &mpsafe_writes
,
63 SYSCTL_INT(_kern
, OID_AUTO
, mplock_writes
, CTLFLAG_RD
, &mplock_writes
,
64 0, "non-mpsafe writes");
65 SYSCTL_INT(_kern
, OID_AUTO
, mpsafe_reads
, CTLFLAG_RD
, &mpsafe_reads
,
67 SYSCTL_INT(_kern
, OID_AUTO
, mplock_reads
, CTLFLAG_RD
, &mplock_reads
,
68 0, "non-mpsafe reads");
69 SYSCTL_INT(_kern
, OID_AUTO
, mpsafe_strategies
, CTLFLAG_RD
, &mpsafe_strategies
,
70 0, "mpsafe strategies");
71 SYSCTL_INT(_kern
, OID_AUTO
, mplock_strategies
, CTLFLAG_RD
, &mplock_strategies
,
72 0, "non-mpsafe strategies");
75 * system link descriptors identify the command in the
76 * arguments structure.
78 #define DDESCNAME(name) __CONCAT(__CONCAT(dev_,name),_desc)
80 #define DEVOP_DESC_INIT(name) \
81 struct syslink_desc DDESCNAME(name) = { \
82 __offsetof(struct dev_ops, __CONCAT(d_, name)), \
85 DEVOP_DESC_INIT(default);
86 DEVOP_DESC_INIT(open
);
87 DEVOP_DESC_INIT(close
);
88 DEVOP_DESC_INIT(read
);
89 DEVOP_DESC_INIT(write
);
90 DEVOP_DESC_INIT(ioctl
);
91 DEVOP_DESC_INIT(dump
);
92 DEVOP_DESC_INIT(psize
);
93 DEVOP_DESC_INIT(mmap
);
94 DEVOP_DESC_INIT(mmap_single
);
95 DEVOP_DESC_INIT(strategy
);
96 DEVOP_DESC_INIT(kqfilter
);
97 DEVOP_DESC_INIT(revoke
);
98 DEVOP_DESC_INIT(clone
);
103 struct dev_ops dead_dev_ops
;
105 static d_open_t noopen
;
106 static d_close_t noclose
;
107 static d_read_t noread
;
108 static d_write_t nowrite
;
109 static d_ioctl_t noioctl
;
110 static d_mmap_t nommap
;
111 static d_mmap_single_t nommap_single
;
112 static d_strategy_t nostrategy
;
113 static d_dump_t nodump
;
114 static d_psize_t nopsize
;
115 static d_kqfilter_t nokqfilter
;
116 static d_clone_t noclone
;
117 static d_revoke_t norevoke
;
119 struct dev_ops default_dev_ops
= {
121 .d_default
= NULL
, /* must be NULL */
128 .d_mmap_single
= nommap_single
,
129 .d_strategy
= nostrategy
,
132 .d_kqfilter
= nokqfilter
,
133 .d_revoke
= norevoke
,
139 dev_needmplock(cdev_t dev
)
141 return((dev
->si_ops
->head
.flags
& D_MPSAFE
) == 0);
144 /************************************************************************
145 * GENERAL DEVICE API FUNCTIONS *
146 ************************************************************************
148 * The MPSAFEness of these depends on dev->si_ops->head.flags
151 dev_dopen(cdev_t dev
, int oflags
, int devtype
, struct ucred
*cred
, struct file
*fp
)
153 struct dev_open_args ap
;
154 int needmplock
= dev_needmplock(dev
);
157 ap
.a_head
.a_desc
= &dev_open_desc
;
158 ap
.a_head
.a_dev
= dev
;
159 ap
.a_oflags
= oflags
;
160 ap
.a_devtype
= devtype
;
166 error
= dev
->si_ops
->d_open(&ap
);
173 dev_dclose(cdev_t dev
, int fflag
, int devtype
, struct file
*fp
)
175 struct dev_close_args ap
;
176 int needmplock
= dev_needmplock(dev
);
179 ap
.a_head
.a_desc
= &dev_close_desc
;
180 ap
.a_head
.a_dev
= dev
;
182 ap
.a_devtype
= devtype
;
187 error
= dev
->si_ops
->d_close(&ap
);
194 dev_dread(cdev_t dev
, struct uio
*uio
, int ioflag
, struct file
*fp
)
196 struct dev_read_args ap
;
197 int needmplock
= dev_needmplock(dev
);
200 ap
.a_head
.a_desc
= &dev_read_desc
;
201 ap
.a_head
.a_dev
= dev
;
203 ap
.a_ioflag
= ioflag
;
212 error
= dev
->si_ops
->d_read(&ap
);
216 dev
->si_lastread
= time_uptime
;
221 dev_dwrite(cdev_t dev
, struct uio
*uio
, int ioflag
, struct file
*fp
)
223 struct dev_write_args ap
;
224 int needmplock
= dev_needmplock(dev
);
227 dev
->si_lastwrite
= time_uptime
;
228 ap
.a_head
.a_desc
= &dev_write_desc
;
229 ap
.a_head
.a_dev
= dev
;
231 ap
.a_ioflag
= ioflag
;
240 error
= dev
->si_ops
->d_write(&ap
);
247 dev_dioctl(cdev_t dev
, u_long cmd
, caddr_t data
, int fflag
, struct ucred
*cred
,
248 struct sysmsg
*msg
, struct file
*fp
)
250 struct dev_ioctl_args ap
;
251 int needmplock
= dev_needmplock(dev
);
254 ap
.a_head
.a_desc
= &dev_ioctl_desc
;
255 ap
.a_head
.a_dev
= dev
;
265 error
= dev
->si_ops
->d_ioctl(&ap
);
272 dev_dmmap(cdev_t dev
, vm_offset_t offset
, int nprot
, struct file
*fp
)
274 struct dev_mmap_args ap
;
275 int needmplock
= dev_needmplock(dev
);
278 ap
.a_head
.a_desc
= &dev_mmap_desc
;
279 ap
.a_head
.a_dev
= dev
;
280 ap
.a_offset
= offset
;
286 error
= dev
->si_ops
->d_mmap(&ap
);
296 dev_dmmap_single(cdev_t dev
, vm_ooffset_t
*offset
, vm_size_t size
,
297 struct vm_object
**object
, int nprot
, struct file
*fp
)
299 struct dev_mmap_single_args ap
;
300 int needmplock
= dev_needmplock(dev
);
303 ap
.a_head
.a_desc
= &dev_mmap_single_desc
;
304 ap
.a_head
.a_dev
= dev
;
305 ap
.a_offset
= offset
;
307 ap
.a_object
= object
;
313 error
= dev
->si_ops
->d_mmap_single(&ap
);
321 dev_dclone(cdev_t dev
)
323 struct dev_clone_args ap
;
324 int needmplock
= dev_needmplock(dev
);
327 ap
.a_head
.a_desc
= &dev_clone_desc
;
328 ap
.a_head
.a_dev
= dev
;
332 error
= dev
->si_ops
->d_clone(&ap
);
339 dev_drevoke(cdev_t dev
)
341 struct dev_revoke_args ap
;
342 int needmplock
= dev_needmplock(dev
);
345 ap
.a_head
.a_desc
= &dev_revoke_desc
;
346 ap
.a_head
.a_dev
= dev
;
350 error
= dev
->si_ops
->d_revoke(&ap
);
358 * Core device strategy call, used to issue I/O on a device. There are
359 * two versions, a non-chained version and a chained version. The chained
360 * version reuses a BIO set up by vn_strategy(). The only difference is
361 * that, for now, we do not push a new tracking structure when chaining
362 * from vn_strategy. XXX this will ultimately have to change.
365 dev_dstrategy(cdev_t dev
, struct bio
*bio
)
367 struct dev_strategy_args ap
;
368 struct bio_track
*track
;
369 int needmplock
= dev_needmplock(dev
);
371 ap
.a_head
.a_desc
= &dev_strategy_desc
;
372 ap
.a_head
.a_dev
= dev
;
375 KKASSERT(bio
->bio_track
== NULL
);
376 KKASSERT(bio
->bio_buf
->b_cmd
!= BUF_CMD_DONE
);
377 if (bio
->bio_buf
->b_cmd
== BUF_CMD_READ
)
378 track
= &dev
->si_track_read
;
380 track
= &dev
->si_track_write
;
381 bio_track_ref(track
);
382 bio
->bio_track
= track
;
383 dsched_buf_enter(bio
->bio_buf
); /* might stack */
385 KKASSERT((bio
->bio_flags
& BIO_DONE
) == 0);
392 (void)dev
->si_ops
->d_strategy(&ap
);
398 dev_dstrategy_chain(cdev_t dev
, struct bio
*bio
)
400 struct dev_strategy_args ap
;
401 int needmplock
= dev_needmplock(dev
);
403 ap
.a_head
.a_desc
= &dev_strategy_desc
;
404 ap
.a_head
.a_dev
= dev
;
407 KKASSERT(bio
->bio_track
!= NULL
);
408 KKASSERT((bio
->bio_flags
& BIO_DONE
) == 0);
411 (void)dev
->si_ops
->d_strategy(&ap
);
417 * note: the disk layer is expected to set count, blkno, and secsize before
418 * forwarding the message.
421 dev_ddump(cdev_t dev
, void *virtual, vm_offset_t physical
, off_t offset
,
424 struct dev_dump_args ap
;
425 int needmplock
= dev_needmplock(dev
);
428 ap
.a_head
.a_desc
= &dev_dump_desc
;
429 ap
.a_head
.a_dev
= dev
;
433 ap
.a_virtual
= virtual;
434 ap
.a_physical
= physical
;
435 ap
.a_offset
= offset
;
436 ap
.a_length
= length
;
440 error
= dev
->si_ops
->d_dump(&ap
);
447 dev_dpsize(cdev_t dev
)
449 struct dev_psize_args ap
;
450 int needmplock
= dev_needmplock(dev
);
453 ap
.a_head
.a_desc
= &dev_psize_desc
;
454 ap
.a_head
.a_dev
= dev
;
458 error
= dev
->si_ops
->d_psize(&ap
);
463 return (ap
.a_result
);
468 * Pass-thru to the device kqfilter.
470 * NOTE: We explicitly preset a_result to 0 so d_kqfilter() functions
471 * which return 0 do not have to bother setting a_result.
474 dev_dkqfilter(cdev_t dev
, struct knote
*kn
, struct file
*fp
)
476 struct dev_kqfilter_args ap
;
477 int needmplock
= dev_needmplock(dev
);
480 ap
.a_head
.a_desc
= &dev_kqfilter_desc
;
481 ap
.a_head
.a_dev
= dev
;
488 error
= dev
->si_ops
->d_kqfilter(&ap
);
497 /************************************************************************
498 * DEVICE HELPER FUNCTIONS *
499 ************************************************************************/
505 dev_drefs(cdev_t dev
)
507 return(dev
->si_sysref
.refcnt
);
514 dev_dname(cdev_t dev
)
516 return(dev
->si_ops
->head
.name
);
523 dev_dflags(cdev_t dev
)
525 return(dev
->si_ops
->head
.flags
);
534 return(dev
->si_ops
->head
.maj
);
538 * Used when forwarding a request through layers. The caller adjusts
539 * ap->a_head.a_dev and then calls this function.
542 dev_doperate(struct dev_generic_args
*ap
)
544 int (*func
)(struct dev_generic_args
*);
545 int needmplock
= dev_needmplock(ap
->a_dev
);
548 func
= *(void **)((char *)ap
->a_dev
->si_ops
+ ap
->a_desc
->sd_offset
);
560 * Used by the console intercept code only. Issue an operation through
561 * a foreign ops structure allowing the ops structure associated
562 * with the device to remain intact.
565 dev_doperate_ops(struct dev_ops
*ops
, struct dev_generic_args
*ap
)
567 int (*func
)(struct dev_generic_args
*);
568 int needmplock
= ((ops
->head
.flags
& D_MPSAFE
) == 0);
571 func
= *(void **)((char *)ops
+ ap
->a_desc
->sd_offset
);
583 * Convert a template dev_ops into the real thing by filling in
584 * uninitialized fields.
587 compile_dev_ops(struct dev_ops
*ops
)
591 for (offset
= offsetof(struct dev_ops
, dev_ops_first_field
);
592 offset
<= offsetof(struct dev_ops
, dev_ops_last_field
);
593 offset
+= sizeof(void *)
595 void **func_p
= (void **)((char *)ops
+ offset
);
596 void **def_p
= (void **)((char *)&default_dev_ops
+ offset
);
597 if (*func_p
== NULL
) {
599 *func_p
= ops
->d_default
;
606 /************************************************************************
607 * MAJOR/MINOR SPACE FUNCTION *
608 ************************************************************************/
611 * This makes a dev_ops entry visible to userland (e.g /dev/<blah>).
613 * Disk devices typically register their major, e.g. 'ad0', and then call
614 * into the disk label management code which overloads its own onto e.g. 'ad0'
615 * to support all the various slice and partition combinations.
617 * The mask/match supplied in this call are a full 32 bits and the same
618 * mask and match must be specified in a later dev_ops_remove() call to
619 * match this add. However, the match value for the minor number should never
620 * have any bits set in the major number's bit range (8-15). The mask value
621 * may be conveniently specified as -1 without creating any major number
627 rb_dev_ops_compare(struct dev_ops_maj
*a
, struct dev_ops_maj
*b
)
631 else if (a
->maj
> b
->maj
)
636 RB_GENERATE2(dev_ops_rb_tree
, dev_ops_maj
, rbnode
, rb_dev_ops_compare
, int, maj
);
638 struct dev_ops_rb_tree dev_ops_rbhead
= RB_INITIALIZER(dev_ops_rbhead
);
641 dev_ops_remove_all(struct dev_ops
*ops
)
643 return devfs_destroy_dev_by_ops(ops
, -1);
647 dev_ops_remove_minor(struct dev_ops
*ops
, int minor
)
649 return devfs_destroy_dev_by_ops(ops
, minor
);
653 dev_ops_intercept(cdev_t dev
, struct dev_ops
*iops
)
655 struct dev_ops
*oops
= dev
->si_ops
;
657 compile_dev_ops(iops
);
658 iops
->head
.maj
= oops
->head
.maj
;
659 iops
->head
.data
= oops
->head
.data
;
660 iops
->head
.flags
= oops
->head
.flags
;
662 dev
->si_flags
|= SI_INTERCEPTED
;
668 dev_ops_restore(cdev_t dev
, struct dev_ops
*oops
)
670 struct dev_ops
*iops
= dev
->si_ops
;
673 dev
->si_flags
&= ~SI_INTERCEPTED
;
675 iops
->head
.data
= NULL
;
676 iops
->head
.flags
= 0;
679 /************************************************************************
680 * DEFAULT DEV OPS FUNCTIONS *
681 ************************************************************************/
685 * Unsupported devswitch functions (e.g. for writing to read-only device).
686 * XXX may belong elsewhere.
689 norevoke(struct dev_revoke_args
*ap
)
696 noclone(struct dev_clone_args
*ap
)
699 return (0); /* allow the clone */
703 noopen(struct dev_open_args
*ap
)
709 noclose(struct dev_close_args
*ap
)
715 noread(struct dev_read_args
*ap
)
721 nowrite(struct dev_write_args
*ap
)
727 noioctl(struct dev_ioctl_args
*ap
)
733 nokqfilter(struct dev_kqfilter_args
*ap
)
739 nommap(struct dev_mmap_args
*ap
)
745 nommap_single(struct dev_mmap_single_args
*ap
)
751 nostrategy(struct dev_strategy_args
*ap
)
753 struct bio
*bio
= ap
->a_bio
;
755 bio
->bio_buf
->b_flags
|= B_ERROR
;
756 bio
->bio_buf
->b_error
= EOPNOTSUPP
;
762 nopsize(struct dev_psize_args
*ap
)
769 nodump(struct dev_dump_args
*ap
)
775 * XXX this is probably bogus. Any device that uses it isn't checking the
779 nullopen(struct dev_open_args
*ap
)
785 nullclose(struct dev_close_args
*ap
)