priv: Define and use PRIV_SETHOSTNAME
[dragonfly.git] / sys / kern / kern_device.c
blobb1004e6c839c82b8d77b8a8674a9ea704cc6d023
1 /*
2 * Copyright (c) 2003 Matthew Dillon <dillon@backplane.com> All rights reserved.
3 * cdevsw from kern/kern_conf.c Copyright (c) 1995 Terrence R. Lambert
4 * cdevsw from kern/kern_conf.c Copyright (c) 1995 Julian R. Elishcer,
5 * All rights reserved.
6 * Copyright (c) 1982, 1986, 1991, 1993
7 * The Regents of the University of California. All rights reserved.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
30 * $DragonFly: src/sys/kern/kern_device.c,v 1.27 2007/07/23 18:59:50 dillon Exp $
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/kernel.h>
35 #include <sys/sysctl.h>
36 #include <sys/systm.h>
37 #include <sys/module.h>
38 #include <sys/malloc.h>
39 #include <sys/conf.h>
40 #include <sys/bio.h>
41 #include <sys/buf.h>
42 #include <sys/vnode.h>
43 #include <sys/queue.h>
44 #include <sys/device.h>
45 #include <sys/tree.h>
46 #include <sys/syslink_rpc.h>
47 #include <sys/proc.h>
48 #include <machine/stdarg.h>
49 #include <sys/thread2.h>
52 * system link descriptors identify the command in the
53 * arguments structure.
55 #define DDESCNAME(name) __CONCAT(__CONCAT(dev_,name),_desc)
57 #define DEVOP_DESC_INIT(name) \
58 struct syslink_desc DDESCNAME(name) = { \
59 __offsetof(struct dev_ops, __CONCAT(d_, name)), \
60 #name }
62 DEVOP_DESC_INIT(default);
63 DEVOP_DESC_INIT(open);
64 DEVOP_DESC_INIT(close);
65 DEVOP_DESC_INIT(read);
66 DEVOP_DESC_INIT(write);
67 DEVOP_DESC_INIT(ioctl);
68 DEVOP_DESC_INIT(dump);
69 DEVOP_DESC_INIT(psize);
70 DEVOP_DESC_INIT(poll);
71 DEVOP_DESC_INIT(mmap);
72 DEVOP_DESC_INIT(strategy);
73 DEVOP_DESC_INIT(kqfilter);
74 DEVOP_DESC_INIT(revoke);
75 DEVOP_DESC_INIT(clone);
78 * Misc default ops
80 struct dev_ops dead_dev_ops;
82 struct dev_ops default_dev_ops = {
83 { "null" },
84 .d_default = NULL, /* must be NULL */
85 .d_open = noopen,
86 .d_close = noclose,
87 .d_read = noread,
88 .d_write = nowrite,
89 .d_ioctl = noioctl,
90 .d_poll = nopoll,
91 .d_mmap = nommap,
92 .d_strategy = nostrategy,
93 .d_dump = nodump,
94 .d_psize = nopsize,
95 .d_kqfilter = nokqfilter,
96 .d_revoke = norevoke,
97 .d_clone = noclone
100 /************************************************************************
101 * GENERAL DEVICE API FUNCTIONS *
102 ************************************************************************/
105 dev_dopen(cdev_t dev, int oflags, int devtype, struct ucred *cred)
107 struct dev_open_args ap;
109 ap.a_head.a_desc = &dev_open_desc;
110 ap.a_head.a_dev = dev;
111 ap.a_oflags = oflags;
112 ap.a_devtype = devtype;
113 ap.a_cred = cred;
114 return(dev->si_ops->d_open(&ap));
118 dev_dclose(cdev_t dev, int fflag, int devtype)
120 struct dev_close_args ap;
122 ap.a_head.a_desc = &dev_close_desc;
123 ap.a_head.a_dev = dev;
124 ap.a_fflag = fflag;
125 ap.a_devtype = devtype;
126 return(dev->si_ops->d_close(&ap));
130 dev_dread(cdev_t dev, struct uio *uio, int ioflag)
132 struct dev_read_args ap;
133 int error;
135 ap.a_head.a_desc = &dev_read_desc;
136 ap.a_head.a_dev = dev;
137 ap.a_uio = uio;
138 ap.a_ioflag = ioflag;
139 error = dev->si_ops->d_read(&ap);
140 if (error == 0)
141 dev->si_lastread = time_second;
142 return (error);
146 dev_dwrite(cdev_t dev, struct uio *uio, int ioflag)
148 struct dev_write_args ap;
149 int error;
151 dev->si_lastwrite = time_second;
152 ap.a_head.a_desc = &dev_write_desc;
153 ap.a_head.a_dev = dev;
154 ap.a_uio = uio;
155 ap.a_ioflag = ioflag;
156 error = dev->si_ops->d_write(&ap);
157 return (error);
161 dev_dioctl(cdev_t dev, u_long cmd, caddr_t data, int fflag, struct ucred *cred)
163 struct dev_ioctl_args ap;
165 ap.a_head.a_desc = &dev_ioctl_desc;
166 ap.a_head.a_dev = dev;
167 ap.a_cmd = cmd;
168 ap.a_data = data;
169 ap.a_fflag = fflag;
170 ap.a_cred = cred;
171 return(dev->si_ops->d_ioctl(&ap));
175 dev_dpoll(cdev_t dev, int events)
177 struct dev_poll_args ap;
178 int error;
180 ap.a_head.a_desc = &dev_poll_desc;
181 ap.a_head.a_dev = dev;
182 ap.a_events = events;
183 error = dev->si_ops->d_poll(&ap);
184 if (error == 0)
185 return(ap.a_events);
186 return (seltrue(dev, events));
190 dev_dmmap(cdev_t dev, vm_offset_t offset, int nprot)
192 struct dev_mmap_args ap;
193 int error;
195 ap.a_head.a_desc = &dev_mmap_desc;
196 ap.a_head.a_dev = dev;
197 ap.a_offset = offset;
198 ap.a_nprot = nprot;
199 error = dev->si_ops->d_mmap(&ap);
200 if (error == 0)
201 return(ap.a_result);
202 return(-1);
206 dev_dclone(cdev_t dev)
208 struct dev_clone_args ap;
210 ap.a_head.a_desc = &dev_clone_desc;
211 ap.a_head.a_dev = dev;
212 return (dev->si_ops->d_clone(&ap));
216 dev_drevoke(cdev_t dev)
218 struct dev_revoke_args ap;
220 ap.a_head.a_desc = &dev_revoke_desc;
221 ap.a_head.a_dev = dev;
222 return (dev->si_ops->d_revoke(&ap));
226 * Core device strategy call, used to issue I/O on a device. There are
227 * two versions, a non-chained version and a chained version. The chained
228 * version reuses a BIO set up by vn_strategy(). The only difference is
229 * that, for now, we do not push a new tracking structure when chaining
230 * from vn_strategy. XXX this will ultimately have to change.
232 void
233 dev_dstrategy(cdev_t dev, struct bio *bio)
235 struct dev_strategy_args ap;
236 struct bio_track *track;
238 ap.a_head.a_desc = &dev_strategy_desc;
239 ap.a_head.a_dev = dev;
240 ap.a_bio = bio;
242 KKASSERT(bio->bio_track == NULL);
243 KKASSERT(bio->bio_buf->b_cmd != BUF_CMD_DONE);
244 if (bio->bio_buf->b_cmd == BUF_CMD_READ)
245 track = &dev->si_track_read;
246 else
247 track = &dev->si_track_write;
248 atomic_add_int(&track->bk_active, 1);
249 bio->bio_track = track;
250 (void)dev->si_ops->d_strategy(&ap);
253 void
254 dev_dstrategy_chain(cdev_t dev, struct bio *bio)
256 struct dev_strategy_args ap;
258 KKASSERT(bio->bio_track != NULL);
259 ap.a_head.a_desc = &dev_strategy_desc;
260 ap.a_head.a_dev = dev;
261 ap.a_bio = bio;
262 (void)dev->si_ops->d_strategy(&ap);
266 * note: the disk layer is expected to set count, blkno, and secsize before
267 * forwarding the message.
270 dev_ddump(cdev_t dev)
272 struct dev_dump_args ap;
274 ap.a_head.a_desc = &dev_dump_desc;
275 ap.a_head.a_dev = dev;
276 ap.a_count = 0;
277 ap.a_blkno = 0;
278 ap.a_secsize = 0;
279 return(dev->si_ops->d_dump(&ap));
282 int64_t
283 dev_dpsize(cdev_t dev)
285 struct dev_psize_args ap;
286 int error;
288 ap.a_head.a_desc = &dev_psize_desc;
289 ap.a_head.a_dev = dev;
290 error = dev->si_ops->d_psize(&ap);
291 if (error == 0)
292 return (ap.a_result);
293 return(-1);
297 dev_dkqfilter(cdev_t dev, struct knote *kn)
299 struct dev_kqfilter_args ap;
300 int error;
302 ap.a_head.a_desc = &dev_kqfilter_desc;
303 ap.a_head.a_dev = dev;
304 ap.a_kn = kn;
305 error = dev->si_ops->d_kqfilter(&ap);
306 if (error == 0)
307 return(ap.a_result);
308 return(ENODEV);
311 /************************************************************************
312 * DEVICE HELPER FUNCTIONS *
313 ************************************************************************/
316 dev_drefs(cdev_t dev)
318 return(dev->si_sysref.refcnt);
321 const char *
322 dev_dname(cdev_t dev)
324 return(dev->si_ops->head.name);
328 dev_dflags(cdev_t dev)
330 return(dev->si_ops->head.flags);
334 dev_dmaj(cdev_t dev)
336 return(dev->si_ops->head.maj);
340 * Used when forwarding a request through layers. The caller adjusts
341 * ap->a_head.a_dev and then calls this function.
344 dev_doperate(struct dev_generic_args *ap)
346 int (*func)(struct dev_generic_args *);
348 func = *(void **)((char *)ap->a_dev->si_ops + ap->a_desc->sd_offset);
349 return (func(ap));
353 * Used by the console intercept code only. Issue an operation through
354 * a foreign ops structure allowing the ops structure associated
355 * with the device to remain intact.
358 dev_doperate_ops(struct dev_ops *ops, struct dev_generic_args *ap)
360 int (*func)(struct dev_generic_args *);
362 func = *(void **)((char *)ops + ap->a_desc->sd_offset);
363 return (func(ap));
367 * Convert a template dev_ops into the real thing by filling in
368 * uninitialized fields.
370 void
371 compile_dev_ops(struct dev_ops *ops)
373 int offset;
375 for (offset = offsetof(struct dev_ops, dev_ops_first_field);
376 offset <= offsetof(struct dev_ops, dev_ops_last_field);
377 offset += sizeof(void *)
379 void **func_p = (void **)((char *)ops + offset);
380 void **def_p = (void **)((char *)&default_dev_ops + offset);
381 if (*func_p == NULL) {
382 if (ops->d_default)
383 *func_p = ops->d_default;
384 else
385 *func_p = *def_p;
390 /************************************************************************
391 * MAJOR/MINOR SPACE FUNCTION *
392 ************************************************************************/
395 * This makes a dev_ops entry visible to userland (e.g /dev/<blah>).
397 * The kernel can overload a data space by making multiple dev_ops_add()
398 * calls, but only the most recent one in the list matching the mask/match
399 * will be visible to userland.
401 * make_dev() does not automatically call dev_ops_add() (nor do we want it
402 * to, since partition-managed disk devices are overloaded on top of the
403 * raw device).
405 * Disk devices typically register their major, e.g. 'ad0', and then call
406 * into the disk label management code which overloads its own onto e.g. 'ad0'
407 * to support all the various slice and partition combinations.
409 * The mask/match supplied in this call are a full 32 bits and the same
410 * mask and match must be specified in a later dev_ops_remove() call to
411 * match this add. However, the match value for the minor number should never
412 * have any bits set in the major number's bit range (8-15). The mask value
413 * may be conveniently specified as -1 without creating any major number
414 * interference.
417 static
419 rb_dev_ops_compare(struct dev_ops_maj *a, struct dev_ops_maj *b)
421 if (a->maj < b->maj)
422 return(-1);
423 else if (a->maj > b->maj)
424 return(1);
425 return(0);
428 RB_GENERATE2(dev_ops_rb_tree, dev_ops_maj, rbnode, rb_dev_ops_compare, int, maj);
430 struct dev_ops_rb_tree dev_ops_rbhead = RB_INITIALIZER(dev_ops_rbhead);
433 dev_ops_add(struct dev_ops *ops, u_int mask, u_int match)
435 static int next_maj = 256; /* first dynamic major number */
436 struct dev_ops_maj *rbmaj;
437 struct dev_ops_link *link;
439 compile_dev_ops(ops);
440 if (ops->head.maj < 0) {
441 while (dev_ops_rb_tree_RB_LOOKUP(&dev_ops_rbhead, next_maj) != NULL) {
442 if (++next_maj <= 0)
443 next_maj = 256;
445 ops->head.maj = next_maj;
447 rbmaj = dev_ops_rb_tree_RB_LOOKUP(&dev_ops_rbhead, ops->head.maj);
448 if (rbmaj == NULL) {
449 rbmaj = kmalloc(sizeof(*rbmaj), M_DEVBUF, M_INTWAIT | M_ZERO);
450 rbmaj->maj = ops->head.maj;
451 dev_ops_rb_tree_RB_INSERT(&dev_ops_rbhead, rbmaj);
453 for (link = rbmaj->link; link; link = link->next) {
455 * If we get an exact match we usurp the target, but we only print
456 * a warning message if a different device switch is installed.
458 if (link->mask == mask && link->match == match) {
459 if (link->ops != ops) {
460 kprintf("WARNING: \"%s\" (%p) is usurping \"%s\"'s"
461 " (%p)\n",
462 ops->head.name, ops,
463 link->ops->head.name, link->ops);
464 link->ops = ops;
465 ++ops->head.refs;
467 return(0);
470 * XXX add additional warnings for overlaps
474 link = kmalloc(sizeof(struct dev_ops_link), M_DEVBUF, M_INTWAIT|M_ZERO);
475 link->mask = mask;
476 link->match = match;
477 link->ops = ops;
478 link->next = rbmaj->link;
479 rbmaj->link = link;
480 ++ops->head.refs;
481 return(0);
485 * Should only be used by udev2dev().
487 * If the minor number is -1, we match the first ops we find for this
488 * major. If the mask is not -1 then multiple minor numbers can match
489 * the same ops.
491 * Note that this function will return NULL if the minor number is not within
492 * the bounds of the installed mask(s).
494 * The specified minor number should NOT include any major bits.
496 struct dev_ops *
497 dev_ops_get(int x, int y)
499 struct dev_ops_maj *rbmaj;
500 struct dev_ops_link *link;
502 rbmaj = dev_ops_rb_tree_RB_LOOKUP(&dev_ops_rbhead, x);
503 if (rbmaj == NULL)
504 return(NULL);
505 for (link = rbmaj->link; link; link = link->next) {
506 if (y == -1 || (link->mask & y) == link->match)
507 return(link->ops);
509 return(NULL);
513 * Take a cookie cutter to the major/minor device space for the passed
514 * device and generate a new dev_ops visible to userland which the caller
515 * can then modify. The original device is not modified but portions of
516 * its major/minor space will no longer be visible to userland.
518 struct dev_ops *
519 dev_ops_add_override(cdev_t backing_dev, struct dev_ops *template,
520 u_int mask, u_int match)
522 struct dev_ops *ops;
523 struct dev_ops *backing_ops = backing_dev->si_ops;
525 ops = kmalloc(sizeof(struct dev_ops), M_DEVBUF, M_INTWAIT);
526 *ops = *template;
527 ops->head.name = backing_ops->head.name;
528 ops->head.maj = backing_ops->head.maj;
529 ops->head.flags |= backing_ops->head.flags & ~D_TRACKCLOSE;
530 compile_dev_ops(ops);
531 dev_ops_add(ops, mask, match);
533 return(ops);
536 void
537 dev_ops_remove_override(struct dev_ops *ops, u_int mask, u_int match)
539 dev_ops_remove(ops, mask, match);
540 if (ops->head.refs) {
541 kprintf("dev_ops_remove_override: %s still has %d refs!\n",
542 ops->head.name, ops->head.refs);
543 } else {
544 bzero(ops, sizeof(*ops));
545 kfree(ops, M_DEVBUF);
550 * Remove all matching dev_ops entries from the dev_ops_array[] major
551 * array so no new user opens can be performed, and destroy all devices
552 * installed in the hash table that are associated with this dev_ops. (see
553 * destroy_all_devs()).
555 * The mask and match should match a previous call to dev_ops_add*().
558 dev_ops_remove(struct dev_ops *ops, u_int mask, u_int match)
560 struct dev_ops_maj *rbmaj;
561 struct dev_ops_link *link;
562 struct dev_ops_link **plink;
564 if (ops != &dead_dev_ops)
565 destroy_all_devs(ops, mask, match);
567 rbmaj = dev_ops_rb_tree_RB_LOOKUP(&dev_ops_rbhead, ops->head.maj);
568 if (rbmaj == NULL) {
569 kprintf("double-remove of dev_ops %p for %s(%d)\n",
570 ops, ops->head.name, ops->head.maj);
571 return(0);
573 for (plink = &rbmaj->link; (link = *plink) != NULL;
574 plink = &link->next) {
575 if (link->mask == mask && link->match == match) {
576 if (link->ops == ops)
577 break;
578 kprintf("%s: ERROR: cannot remove dev_ops, "
579 "its major number %d was stolen by %s\n",
580 ops->head.name, ops->head.maj,
581 link->ops->head.name
585 if (link == NULL) {
586 kprintf("%s(%d)[%08x/%08x]: WARNING: ops removed "
587 "multiple times!\n",
588 ops->head.name, ops->head.maj, mask, match);
589 } else {
590 *plink = link->next;
591 --ops->head.refs; /* XXX ops_release() / record refs */
592 kfree(link, M_DEVBUF);
596 * Scrap the RB tree node for the major number if no ops are
597 * installed any longer.
599 if (rbmaj->link == NULL) {
600 dev_ops_rb_tree_RB_REMOVE(&dev_ops_rbhead, rbmaj);
601 kfree(rbmaj, M_DEVBUF);
604 #if 0
606 * The same ops might be used with multiple devices, so don't
607 * complain if the ref count is non-zero.
609 if (ops->head.refs != 0) {
610 kprintf("%s(%d)[%08x/%08x]: Warning: dev_ops_remove() called "
611 "while %d device refs still exist!\n",
612 ops->head.name, ops->head.maj, mask, match,
613 ops->head.refs);
614 } else {
615 if (bootverbose)
616 kprintf("%s: ops removed\n", ops->head.name);
618 #endif
619 return 0;
623 * dev_ops_scan() - Issue a callback for all installed dev_ops structures.
625 * The scan will terminate if a callback returns a negative number.
627 struct dev_ops_scan_info {
628 int (*callback)(struct dev_ops *, void *);
629 void *arg;
632 static
634 dev_ops_scan_callback(struct dev_ops_maj *rbmaj, void *arg)
636 struct dev_ops_scan_info *info = arg;
637 struct dev_ops_link *link;
638 int count = 0;
639 int r;
641 for (link = rbmaj->link; link; link = link->next) {
642 r = info->callback(link->ops, info->arg);
643 if (r < 0)
644 return(r);
645 count += r;
647 return(count);
651 dev_ops_scan(int (*callback)(struct dev_ops *, void *), void *arg)
653 struct dev_ops_scan_info info = { callback, arg };
655 return (dev_ops_rb_tree_RB_SCAN(&dev_ops_rbhead, NULL,
656 dev_ops_scan_callback, &info));
661 * Release a ops entry. When the ref count reaches zero, recurse
662 * through the stack.
664 void
665 dev_ops_release(struct dev_ops *ops)
667 --ops->head.refs;
668 if (ops->head.refs == 0) {
669 /* XXX */
673 struct dev_ops *
674 dev_ops_intercept(cdev_t dev, struct dev_ops *iops)
676 struct dev_ops *oops = dev->si_ops;
678 compile_dev_ops(iops);
679 iops->head.maj = oops->head.maj;
680 iops->head.data = oops->head.data;
681 iops->head.flags = oops->head.flags;
682 dev->si_ops = iops;
683 dev->si_flags |= SI_INTERCEPTED;
685 return (oops);
688 void
689 dev_ops_restore(cdev_t dev, struct dev_ops *oops)
691 struct dev_ops *iops = dev->si_ops;
693 dev->si_ops = oops;
694 dev->si_flags &= ~SI_INTERCEPTED;
695 iops->head.maj = 0;
696 iops->head.data = NULL;
697 iops->head.flags = 0;
700 /************************************************************************
701 * DEFAULT DEV OPS FUNCTIONS *
702 ************************************************************************/
706 * Unsupported devswitch functions (e.g. for writing to read-only device).
707 * XXX may belong elsewhere.
710 norevoke(struct dev_revoke_args *ap)
712 /* take no action */
713 return(0);
717 noclone(struct dev_clone_args *ap)
719 /* take no action */
720 return (0); /* allow the clone */
724 noopen(struct dev_open_args *ap)
726 return (ENODEV);
730 noclose(struct dev_close_args *ap)
732 return (ENODEV);
736 noread(struct dev_read_args *ap)
738 return (ENODEV);
742 nowrite(struct dev_write_args *ap)
744 return (ENODEV);
748 noioctl(struct dev_ioctl_args *ap)
750 return (ENODEV);
754 nokqfilter(struct dev_kqfilter_args *ap)
756 return (ENODEV);
760 nommap(struct dev_mmap_args *ap)
762 return (ENODEV);
766 nopoll(struct dev_poll_args *ap)
768 ap->a_events = 0;
769 return(0);
773 nostrategy(struct dev_strategy_args *ap)
775 struct bio *bio = ap->a_bio;
777 bio->bio_buf->b_flags |= B_ERROR;
778 bio->bio_buf->b_error = EOPNOTSUPP;
779 biodone(bio);
780 return(0);
784 nopsize(struct dev_psize_args *ap)
786 ap->a_result = 0;
787 return(0);
791 nodump(struct dev_dump_args *ap)
793 return (ENODEV);
797 * XXX this is probably bogus. Any device that uses it isn't checking the
798 * minor number.
801 nullopen(struct dev_open_args *ap)
803 return (0);
807 nullclose(struct dev_close_args *ap)
809 return (0);