kernel: Add a few forgotten crit_exit()s and fix a wrong crit_enter().
[dragonfly.git] / sys / kern / kern_device.c
blob5de7ecbcd96b1ab31a91d02995f2b747e4c395a9
1 /*
2 * Copyright (c) 2003 Matthew Dillon <dillon@backplane.com> All rights reserved.
3 * cdevsw from kern/kern_conf.c Copyright (c) 1995 Terrence R. Lambert
4 * cdevsw from kern/kern_conf.c Copyright (c) 1995 Julian R. Elishcer,
5 * All rights reserved.
6 * Copyright (c) 1982, 1986, 1991, 1993
7 * The Regents of the University of California. All rights reserved.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/kernel.h>
34 #include <sys/sysctl.h>
35 #include <sys/module.h>
36 #include <sys/malloc.h>
37 #include <sys/conf.h>
38 #include <sys/bio.h>
39 #include <sys/buf.h>
40 #include <sys/vnode.h>
41 #include <sys/queue.h>
42 #include <sys/device.h>
43 #include <sys/tree.h>
44 #include <sys/syslink_rpc.h>
45 #include <sys/proc.h>
46 #include <machine/stdarg.h>
47 #include <sys/devfs.h>
48 #include <sys/dsched.h>
50 #include <sys/thread2.h>
51 #include <sys/mplock2.h>
53 static int mpsafe_writes;
54 static int mplock_writes;
55 static int mpsafe_reads;
56 static int mplock_reads;
57 static int mpsafe_strategies;
58 static int mplock_strategies;
60 SYSCTL_INT(_kern, OID_AUTO, mpsafe_writes, CTLFLAG_RD, &mpsafe_writes,
61 0, "mpsafe writes");
62 SYSCTL_INT(_kern, OID_AUTO, mplock_writes, CTLFLAG_RD, &mplock_writes,
63 0, "non-mpsafe writes");
64 SYSCTL_INT(_kern, OID_AUTO, mpsafe_reads, CTLFLAG_RD, &mpsafe_reads,
65 0, "mpsafe reads");
66 SYSCTL_INT(_kern, OID_AUTO, mplock_reads, CTLFLAG_RD, &mplock_reads,
67 0, "non-mpsafe reads");
68 SYSCTL_INT(_kern, OID_AUTO, mpsafe_strategies, CTLFLAG_RD, &mpsafe_strategies,
69 0, "mpsafe strategies");
70 SYSCTL_INT(_kern, OID_AUTO, mplock_strategies, CTLFLAG_RD, &mplock_strategies,
71 0, "non-mpsafe strategies");
74 * system link descriptors identify the command in the
75 * arguments structure.
77 #define DDESCNAME(name) __CONCAT(__CONCAT(dev_,name),_desc)
79 #define DEVOP_DESC_INIT(name) \
80 struct syslink_desc DDESCNAME(name) = { \
81 __offsetof(struct dev_ops, __CONCAT(d_, name)), \
82 #name }
84 DEVOP_DESC_INIT(default);
85 DEVOP_DESC_INIT(open);
86 DEVOP_DESC_INIT(close);
87 DEVOP_DESC_INIT(read);
88 DEVOP_DESC_INIT(write);
89 DEVOP_DESC_INIT(ioctl);
90 DEVOP_DESC_INIT(dump);
91 DEVOP_DESC_INIT(psize);
92 DEVOP_DESC_INIT(mmap);
93 DEVOP_DESC_INIT(strategy);
94 DEVOP_DESC_INIT(kqfilter);
95 DEVOP_DESC_INIT(revoke);
96 DEVOP_DESC_INIT(clone);
99 * Misc default ops
101 struct dev_ops dead_dev_ops;
103 struct dev_ops default_dev_ops = {
104 { "null" },
105 .d_default = NULL, /* must be NULL */
106 .d_open = noopen,
107 .d_close = noclose,
108 .d_read = noread,
109 .d_write = nowrite,
110 .d_ioctl = noioctl,
111 .d_mmap = nommap,
112 .d_strategy = nostrategy,
113 .d_dump = nodump,
114 .d_psize = nopsize,
115 .d_kqfilter = nokqfilter,
116 .d_revoke = norevoke,
117 .d_clone = noclone
120 static __inline
122 dev_needmplock(cdev_t dev)
124 return((dev->si_ops->head.flags & D_MPSAFE) == 0);
127 /************************************************************************
128 * GENERAL DEVICE API FUNCTIONS *
129 ************************************************************************
131 * The MPSAFEness of these depends on dev->si_ops->head.flags
134 dev_dopen(cdev_t dev, int oflags, int devtype, struct ucred *cred)
136 struct dev_open_args ap;
137 int needmplock = dev_needmplock(dev);
138 int error;
140 ap.a_head.a_desc = &dev_open_desc;
141 ap.a_head.a_dev = dev;
142 ap.a_oflags = oflags;
143 ap.a_devtype = devtype;
144 ap.a_cred = cred;
146 if (needmplock)
147 get_mplock();
148 error = dev->si_ops->d_open(&ap);
149 if (needmplock)
150 rel_mplock();
151 return (error);
155 dev_dclose(cdev_t dev, int fflag, int devtype)
157 struct dev_close_args ap;
158 int needmplock = dev_needmplock(dev);
159 int error;
161 ap.a_head.a_desc = &dev_close_desc;
162 ap.a_head.a_dev = dev;
163 ap.a_fflag = fflag;
164 ap.a_devtype = devtype;
166 if (needmplock)
167 get_mplock();
168 error = dev->si_ops->d_close(&ap);
169 if (needmplock)
170 rel_mplock();
171 return (error);
175 dev_dread(cdev_t dev, struct uio *uio, int ioflag)
177 struct dev_read_args ap;
178 int needmplock = dev_needmplock(dev);
179 int error;
181 ap.a_head.a_desc = &dev_read_desc;
182 ap.a_head.a_dev = dev;
183 ap.a_uio = uio;
184 ap.a_ioflag = ioflag;
186 if (needmplock) {
187 get_mplock();
188 ++mplock_reads;
189 } else {
190 ++mpsafe_reads;
192 error = dev->si_ops->d_read(&ap);
193 if (needmplock)
194 rel_mplock();
195 if (error == 0)
196 dev->si_lastread = time_second;
197 return (error);
201 dev_dwrite(cdev_t dev, struct uio *uio, int ioflag)
203 struct dev_write_args ap;
204 int needmplock = dev_needmplock(dev);
205 int error;
207 dev->si_lastwrite = time_second;
208 ap.a_head.a_desc = &dev_write_desc;
209 ap.a_head.a_dev = dev;
210 ap.a_uio = uio;
211 ap.a_ioflag = ioflag;
213 if (needmplock) {
214 get_mplock();
215 ++mplock_writes;
216 } else {
217 ++mpsafe_writes;
219 error = dev->si_ops->d_write(&ap);
220 if (needmplock)
221 rel_mplock();
222 return (error);
226 dev_dioctl(cdev_t dev, u_long cmd, caddr_t data, int fflag, struct ucred *cred,
227 struct sysmsg *msg)
229 struct dev_ioctl_args ap;
230 int needmplock = dev_needmplock(dev);
231 int error;
233 ap.a_head.a_desc = &dev_ioctl_desc;
234 ap.a_head.a_dev = dev;
235 ap.a_cmd = cmd;
236 ap.a_data = data;
237 ap.a_fflag = fflag;
238 ap.a_cred = cred;
239 ap.a_sysmsg = msg;
241 if (needmplock)
242 get_mplock();
243 error = dev->si_ops->d_ioctl(&ap);
244 if (needmplock)
245 rel_mplock();
246 return (error);
250 dev_dmmap(cdev_t dev, vm_offset_t offset, int nprot)
252 struct dev_mmap_args ap;
253 int needmplock = dev_needmplock(dev);
254 int error;
256 ap.a_head.a_desc = &dev_mmap_desc;
257 ap.a_head.a_dev = dev;
258 ap.a_offset = offset;
259 ap.a_nprot = nprot;
261 if (needmplock)
262 get_mplock();
263 error = dev->si_ops->d_mmap(&ap);
264 if (needmplock)
265 rel_mplock();
267 if (error == 0)
268 return(ap.a_result);
269 return(-1);
273 dev_dclone(cdev_t dev)
275 struct dev_clone_args ap;
276 int needmplock = dev_needmplock(dev);
277 int error;
279 ap.a_head.a_desc = &dev_clone_desc;
280 ap.a_head.a_dev = dev;
282 if (needmplock)
283 get_mplock();
284 error = dev->si_ops->d_clone(&ap);
285 if (needmplock)
286 rel_mplock();
287 return (error);
291 dev_drevoke(cdev_t dev)
293 struct dev_revoke_args ap;
294 int needmplock = dev_needmplock(dev);
295 int error;
297 ap.a_head.a_desc = &dev_revoke_desc;
298 ap.a_head.a_dev = dev;
300 if (needmplock)
301 get_mplock();
302 error = dev->si_ops->d_revoke(&ap);
303 if (needmplock)
304 rel_mplock();
306 return (error);
310 * Core device strategy call, used to issue I/O on a device. There are
311 * two versions, a non-chained version and a chained version. The chained
312 * version reuses a BIO set up by vn_strategy(). The only difference is
313 * that, for now, we do not push a new tracking structure when chaining
314 * from vn_strategy. XXX this will ultimately have to change.
316 void
317 dev_dstrategy(cdev_t dev, struct bio *bio)
319 struct dev_strategy_args ap;
320 struct bio_track *track;
321 int needmplock = dev_needmplock(dev);
323 ap.a_head.a_desc = &dev_strategy_desc;
324 ap.a_head.a_dev = dev;
325 ap.a_bio = bio;
327 KKASSERT(bio->bio_track == NULL);
328 KKASSERT(bio->bio_buf->b_cmd != BUF_CMD_DONE);
329 if (bio->bio_buf->b_cmd == BUF_CMD_READ)
330 track = &dev->si_track_read;
331 else
332 track = &dev->si_track_write;
333 bio_track_ref(track);
334 bio->bio_track = track;
336 if (dsched_is_clear_buf_priv(bio->bio_buf))
337 dsched_new_buf(bio->bio_buf);
339 KKASSERT((bio->bio_flags & BIO_DONE) == 0);
340 if (needmplock) {
341 get_mplock();
342 ++mplock_strategies;
343 } else {
344 ++mpsafe_strategies;
346 (void)dev->si_ops->d_strategy(&ap);
347 if (needmplock)
348 rel_mplock();
351 void
352 dev_dstrategy_chain(cdev_t dev, struct bio *bio)
354 struct dev_strategy_args ap;
355 int needmplock = dev_needmplock(dev);
357 ap.a_head.a_desc = &dev_strategy_desc;
358 ap.a_head.a_dev = dev;
359 ap.a_bio = bio;
361 KKASSERT(bio->bio_track != NULL);
362 KKASSERT((bio->bio_flags & BIO_DONE) == 0);
363 if (needmplock)
364 get_mplock();
365 (void)dev->si_ops->d_strategy(&ap);
366 if (needmplock)
367 rel_mplock();
371 * note: the disk layer is expected to set count, blkno, and secsize before
372 * forwarding the message.
375 dev_ddump(cdev_t dev, void *virtual, vm_offset_t physical, off_t offset,
376 size_t length)
378 struct dev_dump_args ap;
379 int needmplock = dev_needmplock(dev);
380 int error;
382 ap.a_head.a_desc = &dev_dump_desc;
383 ap.a_head.a_dev = dev;
384 ap.a_count = 0;
385 ap.a_blkno = 0;
386 ap.a_secsize = 0;
387 ap.a_virtual = virtual;
388 ap.a_physical = physical;
389 ap.a_offset = offset;
390 ap.a_length = length;
392 if (needmplock)
393 get_mplock();
394 error = dev->si_ops->d_dump(&ap);
395 if (needmplock)
396 rel_mplock();
397 return (error);
400 int64_t
401 dev_dpsize(cdev_t dev)
403 struct dev_psize_args ap;
404 int needmplock = dev_needmplock(dev);
405 int error;
407 ap.a_head.a_desc = &dev_psize_desc;
408 ap.a_head.a_dev = dev;
410 if (needmplock)
411 get_mplock();
412 error = dev->si_ops->d_psize(&ap);
413 if (needmplock)
414 rel_mplock();
416 if (error == 0)
417 return (ap.a_result);
418 return(-1);
422 * Pass-thru to the device kqfilter.
424 * NOTE: We explicitly preset a_result to 0 so d_kqfilter() functions
425 * which return 0 do not have to bother setting a_result.
428 dev_dkqfilter(cdev_t dev, struct knote *kn)
430 struct dev_kqfilter_args ap;
431 int needmplock = dev_needmplock(dev);
432 int error;
434 ap.a_head.a_desc = &dev_kqfilter_desc;
435 ap.a_head.a_dev = dev;
436 ap.a_kn = kn;
437 ap.a_result = 0;
439 if (needmplock)
440 get_mplock();
441 error = dev->si_ops->d_kqfilter(&ap);
442 if (needmplock)
443 rel_mplock();
445 if (error == 0)
446 return(ap.a_result);
447 return(ENODEV);
450 /************************************************************************
451 * DEVICE HELPER FUNCTIONS *
452 ************************************************************************/
455 * MPSAFE
458 dev_drefs(cdev_t dev)
460 return(dev->si_sysref.refcnt);
464 * MPSAFE
466 const char *
467 dev_dname(cdev_t dev)
469 return(dev->si_ops->head.name);
473 * MPSAFE
476 dev_dflags(cdev_t dev)
478 return(dev->si_ops->head.flags);
482 * MPSAFE
485 dev_dmaj(cdev_t dev)
487 return(dev->si_ops->head.maj);
491 * Used when forwarding a request through layers. The caller adjusts
492 * ap->a_head.a_dev and then calls this function.
495 dev_doperate(struct dev_generic_args *ap)
497 int (*func)(struct dev_generic_args *);
498 int needmplock = dev_needmplock(ap->a_dev);
499 int error;
501 func = *(void **)((char *)ap->a_dev->si_ops + ap->a_desc->sd_offset);
503 if (needmplock)
504 get_mplock();
505 error = func(ap);
506 if (needmplock)
507 rel_mplock();
509 return (error);
513 * Used by the console intercept code only. Issue an operation through
514 * a foreign ops structure allowing the ops structure associated
515 * with the device to remain intact.
518 dev_doperate_ops(struct dev_ops *ops, struct dev_generic_args *ap)
520 int (*func)(struct dev_generic_args *);
521 int needmplock = ((ops->head.flags & D_MPSAFE) == 0);
522 int error;
524 func = *(void **)((char *)ops + ap->a_desc->sd_offset);
526 if (needmplock)
527 get_mplock();
528 error = func(ap);
529 if (needmplock)
530 rel_mplock();
532 return (error);
536 * Convert a template dev_ops into the real thing by filling in
537 * uninitialized fields.
539 void
540 compile_dev_ops(struct dev_ops *ops)
542 int offset;
544 for (offset = offsetof(struct dev_ops, dev_ops_first_field);
545 offset <= offsetof(struct dev_ops, dev_ops_last_field);
546 offset += sizeof(void *)
548 void **func_p = (void **)((char *)ops + offset);
549 void **def_p = (void **)((char *)&default_dev_ops + offset);
550 if (*func_p == NULL) {
551 if (ops->d_default)
552 *func_p = ops->d_default;
553 else
554 *func_p = *def_p;
559 /************************************************************************
560 * MAJOR/MINOR SPACE FUNCTION *
561 ************************************************************************/
564 * This makes a dev_ops entry visible to userland (e.g /dev/<blah>).
566 * Disk devices typically register their major, e.g. 'ad0', and then call
567 * into the disk label management code which overloads its own onto e.g. 'ad0'
568 * to support all the various slice and partition combinations.
570 * The mask/match supplied in this call are a full 32 bits and the same
571 * mask and match must be specified in a later dev_ops_remove() call to
572 * match this add. However, the match value for the minor number should never
573 * have any bits set in the major number's bit range (8-15). The mask value
574 * may be conveniently specified as -1 without creating any major number
575 * interference.
578 static
580 rb_dev_ops_compare(struct dev_ops_maj *a, struct dev_ops_maj *b)
582 if (a->maj < b->maj)
583 return(-1);
584 else if (a->maj > b->maj)
585 return(1);
586 return(0);
589 RB_GENERATE2(dev_ops_rb_tree, dev_ops_maj, rbnode, rb_dev_ops_compare, int, maj);
591 struct dev_ops_rb_tree dev_ops_rbhead = RB_INITIALIZER(dev_ops_rbhead);
594 dev_ops_remove_all(struct dev_ops *ops)
596 return devfs_destroy_dev_by_ops(ops, -1);
600 dev_ops_remove_minor(struct dev_ops *ops, int minor)
602 return devfs_destroy_dev_by_ops(ops, minor);
605 struct dev_ops *
606 dev_ops_intercept(cdev_t dev, struct dev_ops *iops)
608 struct dev_ops *oops = dev->si_ops;
610 compile_dev_ops(iops);
611 iops->head.maj = oops->head.maj;
612 iops->head.data = oops->head.data;
613 iops->head.flags = oops->head.flags;
614 dev->si_ops = iops;
615 dev->si_flags |= SI_INTERCEPTED;
617 return (oops);
620 void
621 dev_ops_restore(cdev_t dev, struct dev_ops *oops)
623 struct dev_ops *iops = dev->si_ops;
625 dev->si_ops = oops;
626 dev->si_flags &= ~SI_INTERCEPTED;
627 iops->head.maj = 0;
628 iops->head.data = NULL;
629 iops->head.flags = 0;
632 /************************************************************************
633 * DEFAULT DEV OPS FUNCTIONS *
634 ************************************************************************/
638 * Unsupported devswitch functions (e.g. for writing to read-only device).
639 * XXX may belong elsewhere.
642 norevoke(struct dev_revoke_args *ap)
644 /* take no action */
645 return(0);
649 noclone(struct dev_clone_args *ap)
651 /* take no action */
652 return (0); /* allow the clone */
656 noopen(struct dev_open_args *ap)
658 return (ENODEV);
662 noclose(struct dev_close_args *ap)
664 return (ENODEV);
668 noread(struct dev_read_args *ap)
670 return (ENODEV);
674 nowrite(struct dev_write_args *ap)
676 return (ENODEV);
680 noioctl(struct dev_ioctl_args *ap)
682 return (ENODEV);
686 nokqfilter(struct dev_kqfilter_args *ap)
688 return (ENODEV);
692 nommap(struct dev_mmap_args *ap)
694 return (ENODEV);
698 nostrategy(struct dev_strategy_args *ap)
700 struct bio *bio = ap->a_bio;
702 bio->bio_buf->b_flags |= B_ERROR;
703 bio->bio_buf->b_error = EOPNOTSUPP;
704 biodone(bio);
705 return(0);
709 nopsize(struct dev_psize_args *ap)
711 ap->a_result = 0;
712 return(0);
716 nodump(struct dev_dump_args *ap)
718 return (ENODEV);
722 * XXX this is probably bogus. Any device that uses it isn't checking the
723 * minor number.
726 nullopen(struct dev_open_args *ap)
728 return (0);
732 nullclose(struct dev_close_args *ap)
734 return (0);