sed(1): Move regex.h to be the last included
[dragonfly.git] / sys / kern / kern_device.c
blob2a2fc39646a90002f77a141c7459dfd75715a62c
1 /*
2 * Copyright (c) 2003 Matthew Dillon <dillon@backplane.com> All rights reserved.
3 * cdevsw from kern/kern_conf.c Copyright (c) 1995 Terrence R. Lambert
4 * cdevsw from kern/kern_conf.c Copyright (c) 1995 Julian R. Elishcer,
5 * All rights reserved.
6 * Copyright (c) 1982, 1986, 1991, 1993
7 * The Regents of the University of California. All rights reserved.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/kernel.h>
34 #include <sys/sysctl.h>
35 #include <sys/module.h>
36 #include <sys/malloc.h>
37 #include <sys/conf.h>
38 #include <sys/bio.h>
39 #include <sys/buf.h>
40 #include <sys/vnode.h>
41 #include <sys/queue.h>
42 #include <sys/device.h>
43 #include <sys/tree.h>
44 #include <sys/syslink_rpc.h>
45 #include <sys/proc.h>
46 #include <machine/stdarg.h>
47 #include <sys/devfs.h>
48 #include <sys/dsched.h>
50 #include <sys/thread2.h>
51 #include <sys/mplock2.h>
53 static int mpsafe_writes;
54 static int mplock_writes;
55 static int mpsafe_reads;
56 static int mplock_reads;
57 static int mpsafe_strategies;
58 static int mplock_strategies;
60 SYSCTL_INT(_kern, OID_AUTO, mpsafe_writes, CTLFLAG_RD, &mpsafe_writes,
61 0, "mpsafe writes");
62 SYSCTL_INT(_kern, OID_AUTO, mplock_writes, CTLFLAG_RD, &mplock_writes,
63 0, "non-mpsafe writes");
64 SYSCTL_INT(_kern, OID_AUTO, mpsafe_reads, CTLFLAG_RD, &mpsafe_reads,
65 0, "mpsafe reads");
66 SYSCTL_INT(_kern, OID_AUTO, mplock_reads, CTLFLAG_RD, &mplock_reads,
67 0, "non-mpsafe reads");
68 SYSCTL_INT(_kern, OID_AUTO, mpsafe_strategies, CTLFLAG_RD, &mpsafe_strategies,
69 0, "mpsafe strategies");
70 SYSCTL_INT(_kern, OID_AUTO, mplock_strategies, CTLFLAG_RD, &mplock_strategies,
71 0, "non-mpsafe strategies");
74 * system link descriptors identify the command in the
75 * arguments structure.
77 #define DDESCNAME(name) __CONCAT(__CONCAT(dev_,name),_desc)
79 #define DEVOP_DESC_INIT(name) \
80 struct syslink_desc DDESCNAME(name) = { \
81 __offsetof(struct dev_ops, __CONCAT(d_, name)), \
82 #name }
84 DEVOP_DESC_INIT(default);
85 DEVOP_DESC_INIT(open);
86 DEVOP_DESC_INIT(close);
87 DEVOP_DESC_INIT(read);
88 DEVOP_DESC_INIT(write);
89 DEVOP_DESC_INIT(ioctl);
90 DEVOP_DESC_INIT(dump);
91 DEVOP_DESC_INIT(psize);
92 DEVOP_DESC_INIT(mmap);
93 DEVOP_DESC_INIT(mmap_single);
94 DEVOP_DESC_INIT(strategy);
95 DEVOP_DESC_INIT(kqfilter);
96 DEVOP_DESC_INIT(revoke);
97 DEVOP_DESC_INIT(clone);
100 * Misc default ops
102 struct dev_ops dead_dev_ops;
104 static d_open_t noopen;
105 static d_close_t noclose;
106 static d_read_t noread;
107 static d_write_t nowrite;
108 static d_ioctl_t noioctl;
109 static d_mmap_t nommap;
110 static d_mmap_single_t nommap_single;
111 static d_strategy_t nostrategy;
112 static d_dump_t nodump;
113 static d_psize_t nopsize;
114 static d_kqfilter_t nokqfilter;
115 static d_clone_t noclone;
116 static d_revoke_t norevoke;
118 struct dev_ops default_dev_ops = {
119 { "null" },
120 .d_default = NULL, /* must be NULL */
121 .d_open = noopen,
122 .d_close = noclose,
123 .d_read = noread,
124 .d_write = nowrite,
125 .d_ioctl = noioctl,
126 .d_mmap = nommap,
127 .d_mmap_single = nommap_single,
128 .d_strategy = nostrategy,
129 .d_dump = nodump,
130 .d_psize = nopsize,
131 .d_kqfilter = nokqfilter,
132 .d_revoke = norevoke,
133 .d_clone = noclone
136 static __inline
138 dev_needmplock(cdev_t dev)
140 return((dev->si_ops->head.flags & D_MPSAFE) == 0);
143 /************************************************************************
144 * GENERAL DEVICE API FUNCTIONS *
145 ************************************************************************
147 * The MPSAFEness of these depends on dev->si_ops->head.flags
150 dev_dopen(cdev_t dev, int oflags, int devtype, struct ucred *cred, struct file *fp)
152 struct dev_open_args ap;
153 int needmplock = dev_needmplock(dev);
154 int error;
156 ap.a_head.a_desc = &dev_open_desc;
157 ap.a_head.a_dev = dev;
158 ap.a_oflags = oflags;
159 ap.a_devtype = devtype;
160 ap.a_cred = cred;
161 ap.a_fp = fp;
163 if (needmplock)
164 get_mplock();
165 error = dev->si_ops->d_open(&ap);
166 if (needmplock)
167 rel_mplock();
168 return (error);
172 dev_dclose(cdev_t dev, int fflag, int devtype, struct file *fp)
174 struct dev_close_args ap;
175 int needmplock = dev_needmplock(dev);
176 int error;
178 ap.a_head.a_desc = &dev_close_desc;
179 ap.a_head.a_dev = dev;
180 ap.a_fflag = fflag;
181 ap.a_devtype = devtype;
182 ap.a_fp = fp;
184 if (needmplock)
185 get_mplock();
186 error = dev->si_ops->d_close(&ap);
187 if (needmplock)
188 rel_mplock();
189 return (error);
193 dev_dread(cdev_t dev, struct uio *uio, int ioflag, struct file *fp)
195 struct dev_read_args ap;
196 int needmplock = dev_needmplock(dev);
197 int error;
199 ap.a_head.a_desc = &dev_read_desc;
200 ap.a_head.a_dev = dev;
201 ap.a_uio = uio;
202 ap.a_ioflag = ioflag;
203 ap.a_fp = fp;
205 if (needmplock) {
206 get_mplock();
207 ++mplock_reads;
208 } else {
209 ++mpsafe_reads;
211 error = dev->si_ops->d_read(&ap);
212 if (needmplock)
213 rel_mplock();
214 if (error == 0)
215 dev->si_lastread = time_uptime;
216 return (error);
220 dev_dwrite(cdev_t dev, struct uio *uio, int ioflag, struct file *fp)
222 struct dev_write_args ap;
223 int needmplock = dev_needmplock(dev);
224 int error;
226 dev->si_lastwrite = time_uptime;
227 ap.a_head.a_desc = &dev_write_desc;
228 ap.a_head.a_dev = dev;
229 ap.a_uio = uio;
230 ap.a_ioflag = ioflag;
231 ap.a_fp = fp;
233 if (needmplock) {
234 get_mplock();
235 ++mplock_writes;
236 } else {
237 ++mpsafe_writes;
239 error = dev->si_ops->d_write(&ap);
240 if (needmplock)
241 rel_mplock();
242 return (error);
246 dev_dioctl(cdev_t dev, u_long cmd, caddr_t data, int fflag, struct ucred *cred,
247 struct sysmsg *msg, struct file *fp)
249 struct dev_ioctl_args ap;
250 int needmplock = dev_needmplock(dev);
251 int error;
253 ap.a_head.a_desc = &dev_ioctl_desc;
254 ap.a_head.a_dev = dev;
255 ap.a_cmd = cmd;
256 ap.a_data = data;
257 ap.a_fflag = fflag;
258 ap.a_cred = cred;
259 ap.a_sysmsg = msg;
260 ap.a_fp = fp;
262 if (needmplock)
263 get_mplock();
264 error = dev->si_ops->d_ioctl(&ap);
265 if (needmplock)
266 rel_mplock();
267 return (error);
271 dev_dmmap(cdev_t dev, vm_offset_t offset, int nprot, struct file *fp)
273 struct dev_mmap_args ap;
274 int needmplock = dev_needmplock(dev);
275 int error;
277 ap.a_head.a_desc = &dev_mmap_desc;
278 ap.a_head.a_dev = dev;
279 ap.a_offset = offset;
280 ap.a_nprot = nprot;
281 ap.a_fp = fp;
283 if (needmplock)
284 get_mplock();
285 error = dev->si_ops->d_mmap(&ap);
286 if (needmplock)
287 rel_mplock();
289 if (error == 0)
290 return(ap.a_result);
291 return(-1);
295 dev_dmmap_single(cdev_t dev, vm_ooffset_t *offset, vm_size_t size,
296 struct vm_object **object, int nprot, struct file *fp)
298 struct dev_mmap_single_args ap;
299 int needmplock = dev_needmplock(dev);
300 int error;
302 ap.a_head.a_desc = &dev_mmap_single_desc;
303 ap.a_head.a_dev = dev;
304 ap.a_offset = offset;
305 ap.a_size = size;
306 ap.a_object = object;
307 ap.a_nprot = nprot;
308 ap.a_fp = fp;
310 if (needmplock)
311 get_mplock();
312 error = dev->si_ops->d_mmap_single(&ap);
313 if (needmplock)
314 rel_mplock();
316 return(error);
320 dev_dclone(cdev_t dev)
322 struct dev_clone_args ap;
323 int needmplock = dev_needmplock(dev);
324 int error;
326 ap.a_head.a_desc = &dev_clone_desc;
327 ap.a_head.a_dev = dev;
329 if (needmplock)
330 get_mplock();
331 error = dev->si_ops->d_clone(&ap);
332 if (needmplock)
333 rel_mplock();
334 return (error);
338 dev_drevoke(cdev_t dev)
340 struct dev_revoke_args ap;
341 int needmplock = dev_needmplock(dev);
342 int error;
344 ap.a_head.a_desc = &dev_revoke_desc;
345 ap.a_head.a_dev = dev;
347 if (needmplock)
348 get_mplock();
349 error = dev->si_ops->d_revoke(&ap);
350 if (needmplock)
351 rel_mplock();
353 return (error);
357 * Core device strategy call, used to issue I/O on a device. There are
358 * two versions, a non-chained version and a chained version. The chained
359 * version reuses a BIO set up by vn_strategy(). The only difference is
360 * that, for now, we do not push a new tracking structure when chaining
361 * from vn_strategy. XXX this will ultimately have to change.
363 void
364 dev_dstrategy(cdev_t dev, struct bio *bio)
366 struct dev_strategy_args ap;
367 struct bio_track *track;
368 int needmplock = dev_needmplock(dev);
370 ap.a_head.a_desc = &dev_strategy_desc;
371 ap.a_head.a_dev = dev;
372 ap.a_bio = bio;
374 KKASSERT(bio->bio_track == NULL);
375 KKASSERT(bio->bio_buf->b_cmd != BUF_CMD_DONE);
376 if (bio->bio_buf->b_cmd == BUF_CMD_READ)
377 track = &dev->si_track_read;
378 else
379 track = &dev->si_track_write;
380 bio_track_ref(track);
381 bio->bio_track = track;
383 if (dsched_is_clear_buf_priv(bio->bio_buf))
384 dsched_new_buf(bio->bio_buf);
386 KKASSERT((bio->bio_flags & BIO_DONE) == 0);
387 if (needmplock) {
388 get_mplock();
389 ++mplock_strategies;
390 } else {
391 ++mpsafe_strategies;
393 (void)dev->si_ops->d_strategy(&ap);
394 if (needmplock)
395 rel_mplock();
398 void
399 dev_dstrategy_chain(cdev_t dev, struct bio *bio)
401 struct dev_strategy_args ap;
402 int needmplock = dev_needmplock(dev);
404 ap.a_head.a_desc = &dev_strategy_desc;
405 ap.a_head.a_dev = dev;
406 ap.a_bio = bio;
408 KKASSERT(bio->bio_track != NULL);
409 KKASSERT((bio->bio_flags & BIO_DONE) == 0);
410 if (needmplock)
411 get_mplock();
412 (void)dev->si_ops->d_strategy(&ap);
413 if (needmplock)
414 rel_mplock();
418 * note: the disk layer is expected to set count, blkno, and secsize before
419 * forwarding the message.
422 dev_ddump(cdev_t dev, void *virtual, vm_offset_t physical, off_t offset,
423 size_t length)
425 struct dev_dump_args ap;
426 int needmplock = dev_needmplock(dev);
427 int error;
429 ap.a_head.a_desc = &dev_dump_desc;
430 ap.a_head.a_dev = dev;
431 ap.a_count = 0;
432 ap.a_blkno = 0;
433 ap.a_secsize = 0;
434 ap.a_virtual = virtual;
435 ap.a_physical = physical;
436 ap.a_offset = offset;
437 ap.a_length = length;
439 if (needmplock)
440 get_mplock();
441 error = dev->si_ops->d_dump(&ap);
442 if (needmplock)
443 rel_mplock();
444 return (error);
447 int64_t
448 dev_dpsize(cdev_t dev)
450 struct dev_psize_args ap;
451 int needmplock = dev_needmplock(dev);
452 int error;
454 ap.a_head.a_desc = &dev_psize_desc;
455 ap.a_head.a_dev = dev;
457 if (needmplock)
458 get_mplock();
459 error = dev->si_ops->d_psize(&ap);
460 if (needmplock)
461 rel_mplock();
463 if (error == 0)
464 return (ap.a_result);
465 return(-1);
469 * Pass-thru to the device kqfilter.
471 * NOTE: We explicitly preset a_result to 0 so d_kqfilter() functions
472 * which return 0 do not have to bother setting a_result.
475 dev_dkqfilter(cdev_t dev, struct knote *kn, struct file *fp)
477 struct dev_kqfilter_args ap;
478 int needmplock = dev_needmplock(dev);
479 int error;
481 ap.a_head.a_desc = &dev_kqfilter_desc;
482 ap.a_head.a_dev = dev;
483 ap.a_kn = kn;
484 ap.a_result = 0;
485 ap.a_fp = fp;
487 if (needmplock)
488 get_mplock();
489 error = dev->si_ops->d_kqfilter(&ap);
490 if (needmplock)
491 rel_mplock();
493 if (error == 0)
494 return(ap.a_result);
495 return(ENODEV);
498 /************************************************************************
499 * DEVICE HELPER FUNCTIONS *
500 ************************************************************************/
503 * MPSAFE
506 dev_drefs(cdev_t dev)
508 return(dev->si_sysref.refcnt);
512 * MPSAFE
514 const char *
515 dev_dname(cdev_t dev)
517 return(dev->si_ops->head.name);
521 * MPSAFE
524 dev_dflags(cdev_t dev)
526 return(dev->si_ops->head.flags);
530 * MPSAFE
533 dev_dmaj(cdev_t dev)
535 return(dev->si_ops->head.maj);
539 * Used when forwarding a request through layers. The caller adjusts
540 * ap->a_head.a_dev and then calls this function.
543 dev_doperate(struct dev_generic_args *ap)
545 int (*func)(struct dev_generic_args *);
546 int needmplock = dev_needmplock(ap->a_dev);
547 int error;
549 func = *(void **)((char *)ap->a_dev->si_ops + ap->a_desc->sd_offset);
551 if (needmplock)
552 get_mplock();
553 error = func(ap);
554 if (needmplock)
555 rel_mplock();
557 return (error);
561 * Used by the console intercept code only. Issue an operation through
562 * a foreign ops structure allowing the ops structure associated
563 * with the device to remain intact.
566 dev_doperate_ops(struct dev_ops *ops, struct dev_generic_args *ap)
568 int (*func)(struct dev_generic_args *);
569 int needmplock = ((ops->head.flags & D_MPSAFE) == 0);
570 int error;
572 func = *(void **)((char *)ops + ap->a_desc->sd_offset);
574 if (needmplock)
575 get_mplock();
576 error = func(ap);
577 if (needmplock)
578 rel_mplock();
580 return (error);
584 * Convert a template dev_ops into the real thing by filling in
585 * uninitialized fields.
587 void
588 compile_dev_ops(struct dev_ops *ops)
590 int offset;
592 for (offset = offsetof(struct dev_ops, dev_ops_first_field);
593 offset <= offsetof(struct dev_ops, dev_ops_last_field);
594 offset += sizeof(void *)
596 void **func_p = (void **)((char *)ops + offset);
597 void **def_p = (void **)((char *)&default_dev_ops + offset);
598 if (*func_p == NULL) {
599 if (ops->d_default)
600 *func_p = ops->d_default;
601 else
602 *func_p = *def_p;
607 /************************************************************************
608 * MAJOR/MINOR SPACE FUNCTION *
609 ************************************************************************/
612 * This makes a dev_ops entry visible to userland (e.g /dev/<blah>).
614 * Disk devices typically register their major, e.g. 'ad0', and then call
615 * into the disk label management code which overloads its own onto e.g. 'ad0'
616 * to support all the various slice and partition combinations.
618 * The mask/match supplied in this call are a full 32 bits and the same
619 * mask and match must be specified in a later dev_ops_remove() call to
620 * match this add. However, the match value for the minor number should never
621 * have any bits set in the major number's bit range (8-15). The mask value
622 * may be conveniently specified as -1 without creating any major number
623 * interference.
626 static
628 rb_dev_ops_compare(struct dev_ops_maj *a, struct dev_ops_maj *b)
630 if (a->maj < b->maj)
631 return(-1);
632 else if (a->maj > b->maj)
633 return(1);
634 return(0);
637 RB_GENERATE2(dev_ops_rb_tree, dev_ops_maj, rbnode, rb_dev_ops_compare, int, maj);
639 struct dev_ops_rb_tree dev_ops_rbhead = RB_INITIALIZER(dev_ops_rbhead);
642 dev_ops_remove_all(struct dev_ops *ops)
644 return devfs_destroy_dev_by_ops(ops, -1);
648 dev_ops_remove_minor(struct dev_ops *ops, int minor)
650 return devfs_destroy_dev_by_ops(ops, minor);
653 struct dev_ops *
654 dev_ops_intercept(cdev_t dev, struct dev_ops *iops)
656 struct dev_ops *oops = dev->si_ops;
658 compile_dev_ops(iops);
659 iops->head.maj = oops->head.maj;
660 iops->head.data = oops->head.data;
661 iops->head.flags = oops->head.flags;
662 dev->si_ops = iops;
663 dev->si_flags |= SI_INTERCEPTED;
665 return (oops);
668 void
669 dev_ops_restore(cdev_t dev, struct dev_ops *oops)
671 struct dev_ops *iops = dev->si_ops;
673 dev->si_ops = oops;
674 dev->si_flags &= ~SI_INTERCEPTED;
675 iops->head.maj = 0;
676 iops->head.data = NULL;
677 iops->head.flags = 0;
680 /************************************************************************
681 * DEFAULT DEV OPS FUNCTIONS *
682 ************************************************************************/
686 * Unsupported devswitch functions (e.g. for writing to read-only device).
687 * XXX may belong elsewhere.
689 static int
690 norevoke(struct dev_revoke_args *ap)
692 /* take no action */
693 return(0);
696 static int
697 noclone(struct dev_clone_args *ap)
699 /* take no action */
700 return (0); /* allow the clone */
703 static int
704 noopen(struct dev_open_args *ap)
706 return (ENODEV);
709 static int
710 noclose(struct dev_close_args *ap)
712 return (ENODEV);
715 static int
716 noread(struct dev_read_args *ap)
718 return (ENODEV);
721 static int
722 nowrite(struct dev_write_args *ap)
724 return (ENODEV);
727 static int
728 noioctl(struct dev_ioctl_args *ap)
730 return (ENODEV);
733 static int
734 nokqfilter(struct dev_kqfilter_args *ap)
736 return (ENODEV);
739 static int
740 nommap(struct dev_mmap_args *ap)
742 return (ENODEV);
745 static int
746 nommap_single(struct dev_mmap_single_args *ap)
748 return (ENODEV);
751 static int
752 nostrategy(struct dev_strategy_args *ap)
754 struct bio *bio = ap->a_bio;
756 bio->bio_buf->b_flags |= B_ERROR;
757 bio->bio_buf->b_error = EOPNOTSUPP;
758 biodone(bio);
759 return(0);
762 static int
763 nopsize(struct dev_psize_args *ap)
765 ap->a_result = 0;
766 return(0);
769 static int
770 nodump(struct dev_dump_args *ap)
772 return (ENODEV);
776 * XXX this is probably bogus. Any device that uses it isn't checking the
777 * minor number.
780 nullopen(struct dev_open_args *ap)
782 return (0);
786 nullclose(struct dev_close_args *ap)
788 return (0);