6324 Add an `ndp' tool for manipulating the neighbors table
[illumos-gate.git] / usr / src / uts / common / io / devinfo.c
blob84a48e086e6b335ee6dac57096cbee25cea91e9e
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
27 * driver for accessing kernel devinfo tree.
29 #include <sys/types.h>
30 #include <sys/pathname.h>
31 #include <sys/debug.h>
32 #include <sys/autoconf.h>
33 #include <sys/vmsystm.h>
34 #include <sys/conf.h>
35 #include <sys/file.h>
36 #include <sys/kmem.h>
37 #include <sys/modctl.h>
38 #include <sys/stat.h>
39 #include <sys/ddi.h>
40 #include <sys/sunddi.h>
41 #include <sys/sunldi_impl.h>
42 #include <sys/sunndi.h>
43 #include <sys/esunddi.h>
44 #include <sys/sunmdi.h>
45 #include <sys/ddi_impldefs.h>
46 #include <sys/ndi_impldefs.h>
47 #include <sys/mdi_impldefs.h>
48 #include <sys/devinfo_impl.h>
49 #include <sys/thread.h>
50 #include <sys/modhash.h>
51 #include <sys/bitmap.h>
52 #include <util/qsort.h>
53 #include <sys/disp.h>
54 #include <sys/kobj.h>
55 #include <sys/crc32.h>
56 #include <sys/ddi_hp.h>
57 #include <sys/ddi_hp_impl.h>
58 #include <sys/sysmacros.h>
59 #include <sys/list.h>
62 #ifdef DEBUG
63 static int di_debug;
64 #define dcmn_err(args) if (di_debug >= 1) cmn_err args
65 #define dcmn_err2(args) if (di_debug >= 2) cmn_err args
66 #define dcmn_err3(args) if (di_debug >= 3) cmn_err args
67 #else
68 #define dcmn_err(args) /* nothing */
69 #define dcmn_err2(args) /* nothing */
70 #define dcmn_err3(args) /* nothing */
71 #endif
74 * We partition the space of devinfo minor nodes equally between the full and
75 * unprivileged versions of the driver. The even-numbered minor nodes are the
76 * full version, while the odd-numbered ones are the read-only version.
78 static int di_max_opens = 32;
80 static int di_prop_dyn = 1; /* enable dynamic property support */
82 #define DI_FULL_PARENT 0
83 #define DI_READONLY_PARENT 1
84 #define DI_NODE_SPECIES 2
85 #define DI_UNPRIVILEGED_NODE(x) (((x) % 2) != 0)
87 #define IOC_IDLE 0 /* snapshot ioctl states */
88 #define IOC_SNAP 1 /* snapshot in progress */
89 #define IOC_DONE 2 /* snapshot done, but not copied out */
90 #define IOC_COPY 3 /* copyout in progress */
93 * Keep max alignment so we can move snapshot to different platforms.
95 * NOTE: Most callers should rely on the di_checkmem return value
96 * being aligned, and reestablish *off_p with aligned value, instead
97 * of trying to align size of their allocations: this approach will
98 * minimize memory use.
100 #define DI_ALIGN(addr) ((addr + 7l) & ~7l)
103 * To avoid wasting memory, make a linked list of memory chunks.
104 * Size of each chunk is buf_size.
106 struct di_mem {
107 struct di_mem *next; /* link to next chunk */
108 char *buf; /* contiguous kernel memory */
109 size_t buf_size; /* size of buf in bytes */
110 devmap_cookie_t cook; /* cookie from ddi_umem_alloc */
114 * This is a stack for walking the tree without using recursion.
115 * When the devinfo tree height is above some small size, one
116 * gets watchdog resets on sun4m.
118 struct di_stack {
119 void *offset[MAX_TREE_DEPTH];
120 struct dev_info *dip[MAX_TREE_DEPTH];
121 int circ[MAX_TREE_DEPTH];
122 int depth; /* depth of current node to be copied */
125 #define TOP_OFFSET(stack) \
126 ((di_off_t *)(stack)->offset[(stack)->depth - 1])
127 #define TOP_NODE(stack) \
128 ((stack)->dip[(stack)->depth - 1])
129 #define PARENT_OFFSET(stack) \
130 ((di_off_t *)(stack)->offset[(stack)->depth - 2])
131 #define EMPTY_STACK(stack) ((stack)->depth == 0)
132 #define POP_STACK(stack) { \
133 ndi_devi_exit((dev_info_t *)TOP_NODE(stack), \
134 (stack)->circ[(stack)->depth - 1]); \
135 ((stack)->depth--); \
137 #define PUSH_STACK(stack, node, off_p) { \
138 ASSERT(node != NULL); \
139 ndi_devi_enter((dev_info_t *)node, &(stack)->circ[(stack)->depth]); \
140 (stack)->dip[(stack)->depth] = (node); \
141 (stack)->offset[(stack)->depth] = (void *)(off_p); \
142 ((stack)->depth)++; \
145 #define DI_ALL_PTR(s) DI_ALL(di_mem_addr((s), 0))
148 * With devfs, the device tree has no global locks. The device tree is
149 * dynamic and dips may come and go if they are not locked locally. Under
150 * these conditions, pointers are no longer reliable as unique IDs.
151 * Specifically, these pointers cannot be used as keys for hash tables
152 * as the same devinfo structure may be freed in one part of the tree only
153 * to be allocated as the structure for a different device in another
154 * part of the tree. This can happen if DR and the snapshot are
155 * happening concurrently.
156 * The following data structures act as keys for devinfo nodes and
157 * pathinfo nodes.
160 enum di_ktype {
161 DI_DKEY = 1,
162 DI_PKEY = 2
165 struct di_dkey {
166 dev_info_t *dk_dip;
167 major_t dk_major;
168 int dk_inst;
169 pnode_t dk_nodeid;
172 struct di_pkey {
173 mdi_pathinfo_t *pk_pip;
174 char *pk_path_addr;
175 dev_info_t *pk_client;
176 dev_info_t *pk_phci;
179 struct di_key {
180 enum di_ktype k_type;
181 union {
182 struct di_dkey dkey;
183 struct di_pkey pkey;
184 } k_u;
188 struct i_lnode;
190 typedef struct i_link {
192 * If a di_link struct representing this i_link struct makes it
193 * into the snapshot, then self will point to the offset of
194 * the di_link struct in the snapshot
196 di_off_t self;
198 int spec_type; /* block or char access type */
199 struct i_lnode *src_lnode; /* src i_lnode */
200 struct i_lnode *tgt_lnode; /* tgt i_lnode */
201 struct i_link *src_link_next; /* next src i_link /w same i_lnode */
202 struct i_link *tgt_link_next; /* next tgt i_link /w same i_lnode */
203 } i_link_t;
205 typedef struct i_lnode {
207 * If a di_lnode struct representing this i_lnode struct makes it
208 * into the snapshot, then self will point to the offset of
209 * the di_lnode struct in the snapshot
211 di_off_t self;
214 * used for hashing and comparing i_lnodes
216 int modid;
219 * public information describing a link endpoint
221 struct di_node *di_node; /* di_node in snapshot */
222 dev_t devt; /* devt */
225 * i_link ptr to links coming into this i_lnode node
226 * (this i_lnode is the target of these i_links)
228 i_link_t *link_in;
231 * i_link ptr to links going out of this i_lnode node
232 * (this i_lnode is the source of these i_links)
234 i_link_t *link_out;
235 } i_lnode_t;
237 typedef struct i_hp {
238 di_off_t hp_off; /* Offset of di_hp_t in snapshot */
239 dev_info_t *hp_child; /* Child devinfo node of the di_hp_t */
240 list_node_t hp_link; /* List linkage */
241 } i_hp_t;
244 * Soft state associated with each instance of driver open.
246 static struct di_state {
247 di_off_t mem_size; /* total # bytes in memlist */
248 struct di_mem *memlist; /* head of memlist */
249 uint_t command; /* command from ioctl */
250 int di_iocstate; /* snapshot ioctl state */
251 mod_hash_t *reg_dip_hash;
252 mod_hash_t *reg_pip_hash;
253 int lnode_count;
254 int link_count;
256 mod_hash_t *lnode_hash;
257 mod_hash_t *link_hash;
259 list_t hp_list;
260 } **di_states;
262 static kmutex_t di_lock; /* serialize instance assignment */
264 typedef enum {
265 DI_QUIET = 0, /* DI_QUIET must always be 0 */
266 DI_ERR,
267 DI_INFO,
268 DI_TRACE,
269 DI_TRACE1,
270 DI_TRACE2
271 } di_cache_debug_t;
273 static uint_t di_chunk = 32; /* I/O chunk size in pages */
275 #define DI_CACHE_LOCK(c) (mutex_enter(&(c).cache_lock))
276 #define DI_CACHE_UNLOCK(c) (mutex_exit(&(c).cache_lock))
277 #define DI_CACHE_LOCKED(c) (mutex_owned(&(c).cache_lock))
280 * Check that whole device tree is being configured as a pre-condition for
281 * cleaning up /etc/devices files.
283 #define DEVICES_FILES_CLEANABLE(st) \
284 (((st)->command & DINFOSUBTREE) && ((st)->command & DINFOFORCE) && \
285 strcmp(DI_ALL_PTR(st)->root_path, "/") == 0)
287 #define CACHE_DEBUG(args) \
288 { if (di_cache_debug != DI_QUIET) di_cache_print args; }
290 typedef struct phci_walk_arg {
291 di_off_t off;
292 struct di_state *st;
293 } phci_walk_arg_t;
295 static int di_open(dev_t *, int, int, cred_t *);
296 static int di_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
297 static int di_close(dev_t, int, int, cred_t *);
298 static int di_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
299 static int di_attach(dev_info_t *, ddi_attach_cmd_t);
300 static int di_detach(dev_info_t *, ddi_detach_cmd_t);
302 static di_off_t di_copyformat(di_off_t, struct di_state *, intptr_t, int);
303 static di_off_t di_snapshot_and_clean(struct di_state *);
304 static di_off_t di_copydevnm(di_off_t *, struct di_state *);
305 static di_off_t di_copytree(struct dev_info *, di_off_t *, struct di_state *);
306 static di_off_t di_copynode(struct dev_info *, struct di_stack *,
307 struct di_state *);
308 static di_off_t di_getmdata(struct ddi_minor_data *, di_off_t *, di_off_t,
309 struct di_state *);
310 static di_off_t di_getppdata(struct dev_info *, di_off_t *, struct di_state *);
311 static di_off_t di_getdpdata(struct dev_info *, di_off_t *, struct di_state *);
312 static di_off_t di_gethpdata(ddi_hp_cn_handle_t *, di_off_t *,
313 struct di_state *);
314 static di_off_t di_getprop(int, struct ddi_prop **, di_off_t *,
315 struct di_state *, struct dev_info *);
316 static void di_allocmem(struct di_state *, size_t);
317 static void di_freemem(struct di_state *);
318 static void di_copymem(struct di_state *st, caddr_t buf, size_t bufsiz);
319 static di_off_t di_checkmem(struct di_state *, di_off_t, size_t);
320 static void *di_mem_addr(struct di_state *, di_off_t);
321 static int di_setstate(struct di_state *, int);
322 static void di_register_dip(struct di_state *, dev_info_t *, di_off_t);
323 static void di_register_pip(struct di_state *, mdi_pathinfo_t *, di_off_t);
324 static di_off_t di_getpath_data(dev_info_t *, di_off_t *, di_off_t,
325 struct di_state *, int);
326 static di_off_t di_getlink_data(di_off_t, struct di_state *);
327 static int di_dip_find(struct di_state *st, dev_info_t *node, di_off_t *off_p);
329 static int cache_args_valid(struct di_state *st, int *error);
330 static int snapshot_is_cacheable(struct di_state *st);
331 static int di_cache_lookup(struct di_state *st);
332 static int di_cache_update(struct di_state *st);
333 static void di_cache_print(di_cache_debug_t msglevel, char *fmt, ...);
334 static int build_vhci_list(dev_info_t *vh_devinfo, void *arg);
335 static int build_phci_list(dev_info_t *ph_devinfo, void *arg);
336 static void di_hotplug_children(struct di_state *st);
338 extern int modrootloaded;
339 extern void mdi_walk_vhcis(int (*)(dev_info_t *, void *), void *);
340 extern void mdi_vhci_walk_phcis(dev_info_t *,
341 int (*)(dev_info_t *, void *), void *);
344 static struct cb_ops di_cb_ops = {
345 di_open, /* open */
346 di_close, /* close */
347 nodev, /* strategy */
348 nodev, /* print */
349 nodev, /* dump */
350 nodev, /* read */
351 nodev, /* write */
352 di_ioctl, /* ioctl */
353 nodev, /* devmap */
354 nodev, /* mmap */
355 nodev, /* segmap */
356 nochpoll, /* poll */
357 ddi_prop_op, /* prop_op */
358 NULL, /* streamtab */
359 D_NEW | D_MP /* Driver compatibility flag */
362 static struct dev_ops di_ops = {
363 DEVO_REV, /* devo_rev, */
364 0, /* refcnt */
365 di_info, /* info */
366 nulldev, /* identify */
367 nulldev, /* probe */
368 di_attach, /* attach */
369 di_detach, /* detach */
370 nodev, /* reset */
371 &di_cb_ops, /* driver operations */
372 NULL /* bus operations */
376 * Module linkage information for the kernel.
378 static struct modldrv modldrv = {
379 &mod_driverops,
380 "DEVINFO Driver",
381 &di_ops
384 static struct modlinkage modlinkage = {
385 MODREV_1,
386 &modldrv,
387 NULL
391 _init(void)
393 int error;
395 mutex_init(&di_lock, NULL, MUTEX_DRIVER, NULL);
397 error = mod_install(&modlinkage);
398 if (error != 0) {
399 mutex_destroy(&di_lock);
400 return (error);
403 return (0);
407 _info(struct modinfo *modinfop)
409 return (mod_info(&modlinkage, modinfop));
413 _fini(void)
415 int error;
417 error = mod_remove(&modlinkage);
418 if (error != 0) {
419 return (error);
422 mutex_destroy(&di_lock);
423 return (0);
426 static dev_info_t *di_dip;
428 /*ARGSUSED*/
429 static int
430 di_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
432 int error = DDI_FAILURE;
434 switch (infocmd) {
435 case DDI_INFO_DEVT2DEVINFO:
436 *result = (void *)di_dip;
437 error = DDI_SUCCESS;
438 break;
439 case DDI_INFO_DEVT2INSTANCE:
441 * All dev_t's map to the same, single instance.
443 *result = (void *)0;
444 error = DDI_SUCCESS;
445 break;
446 default:
447 break;
450 return (error);
453 static int
454 di_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
456 int error = DDI_FAILURE;
458 switch (cmd) {
459 case DDI_ATTACH:
460 di_states = kmem_zalloc(
461 di_max_opens * sizeof (struct di_state *), KM_SLEEP);
463 if (ddi_create_minor_node(dip, "devinfo", S_IFCHR,
464 DI_FULL_PARENT, DDI_PSEUDO, NULL) == DDI_FAILURE ||
465 ddi_create_minor_node(dip, "devinfo,ro", S_IFCHR,
466 DI_READONLY_PARENT, DDI_PSEUDO, NULL) == DDI_FAILURE) {
467 kmem_free(di_states,
468 di_max_opens * sizeof (struct di_state *));
469 ddi_remove_minor_node(dip, NULL);
470 error = DDI_FAILURE;
471 } else {
472 di_dip = dip;
473 ddi_report_dev(dip);
475 error = DDI_SUCCESS;
477 break;
478 default:
479 error = DDI_FAILURE;
480 break;
483 return (error);
486 static int
487 di_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
489 int error = DDI_FAILURE;
491 switch (cmd) {
492 case DDI_DETACH:
493 ddi_remove_minor_node(dip, NULL);
494 di_dip = NULL;
495 kmem_free(di_states, di_max_opens * sizeof (struct di_state *));
497 error = DDI_SUCCESS;
498 break;
499 default:
500 error = DDI_FAILURE;
501 break;
504 return (error);
508 * Allow multiple opens by tweaking the dev_t such that it looks like each
509 * open is getting a different minor device. Each minor gets a separate
510 * entry in the di_states[] table. Based on the original minor number, we
511 * discriminate opens of the full and read-only nodes. If all of the instances
512 * of the selected minor node are currently open, we return EAGAIN.
514 /*ARGSUSED*/
515 static int
516 di_open(dev_t *devp, int flag, int otyp, cred_t *credp)
518 int m;
519 minor_t minor_parent = getminor(*devp);
521 if (minor_parent != DI_FULL_PARENT &&
522 minor_parent != DI_READONLY_PARENT)
523 return (ENXIO);
525 mutex_enter(&di_lock);
527 for (m = minor_parent; m < di_max_opens; m += DI_NODE_SPECIES) {
528 if (di_states[m] != NULL)
529 continue;
531 di_states[m] = kmem_zalloc(sizeof (struct di_state), KM_SLEEP);
532 break; /* It's ours. */
535 if (m >= di_max_opens) {
537 * maximum open instance for device reached
539 mutex_exit(&di_lock);
540 dcmn_err((CE_WARN, "devinfo: maximum devinfo open reached"));
541 return (EAGAIN);
543 mutex_exit(&di_lock);
545 ASSERT(m < di_max_opens);
546 *devp = makedevice(getmajor(*devp), (minor_t)(m + DI_NODE_SPECIES));
548 dcmn_err((CE_CONT, "di_open: thread = %p, assigned minor = %d\n",
549 (void *)curthread, m + DI_NODE_SPECIES));
551 return (0);
554 /*ARGSUSED*/
555 static int
556 di_close(dev_t dev, int flag, int otype, cred_t *cred_p)
558 struct di_state *st;
559 int m = (int)getminor(dev) - DI_NODE_SPECIES;
561 if (m < 0) {
562 cmn_err(CE_WARN, "closing non-existent devinfo minor %d",
563 m + DI_NODE_SPECIES);
564 return (ENXIO);
567 st = di_states[m];
568 ASSERT(m < di_max_opens && st != NULL);
570 di_freemem(st);
571 kmem_free(st, sizeof (struct di_state));
574 * empty slot in state table
576 mutex_enter(&di_lock);
577 di_states[m] = NULL;
578 dcmn_err((CE_CONT, "di_close: thread = %p, assigned minor = %d\n",
579 (void *)curthread, m + DI_NODE_SPECIES));
580 mutex_exit(&di_lock);
582 return (0);
586 /*ARGSUSED*/
587 static int
588 di_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp)
590 int rv, error;
591 di_off_t off;
592 struct di_all *all;
593 struct di_state *st;
594 int m = (int)getminor(dev) - DI_NODE_SPECIES;
595 major_t i;
596 char *drv_name;
597 size_t map_size, size;
598 struct di_mem *dcp;
599 int ndi_flags;
601 if (m < 0 || m >= di_max_opens) {
602 return (ENXIO);
605 st = di_states[m];
606 ASSERT(st != NULL);
608 dcmn_err2((CE_CONT, "di_ioctl: mode = %x, cmd = %x\n", mode, cmd));
610 switch (cmd) {
611 case DINFOIDENT:
613 * This is called from di_init to verify that the driver
614 * opened is indeed devinfo. The purpose is to guard against
615 * sending ioctl to an unknown driver in case of an
616 * unresolved major number conflict during bfu.
618 *rvalp = DI_MAGIC;
619 return (0);
621 case DINFOLODRV:
623 * Hold an installed driver and return the result
625 if (DI_UNPRIVILEGED_NODE(m)) {
627 * Only the fully enabled instances may issue
628 * DINFOLDDRV.
630 return (EACCES);
633 drv_name = kmem_alloc(MAXNAMELEN, KM_SLEEP);
634 if (ddi_copyin((void *)arg, drv_name, MAXNAMELEN, mode) != 0) {
635 kmem_free(drv_name, MAXNAMELEN);
636 return (EFAULT);
640 * Some 3rd party driver's _init() walks the device tree,
641 * so we load the driver module before configuring driver.
643 i = ddi_name_to_major(drv_name);
644 if (ddi_hold_driver(i) == NULL) {
645 kmem_free(drv_name, MAXNAMELEN);
646 return (ENXIO);
649 ndi_flags = NDI_DEVI_PERSIST | NDI_CONFIG | NDI_NO_EVENT;
652 * i_ddi_load_drvconf() below will trigger a reprobe
653 * via reset_nexus_flags(). NDI_DRV_CONF_REPROBE isn't
654 * needed here.
656 modunload_disable();
657 (void) i_ddi_load_drvconf(i);
658 (void) ndi_devi_config_driver(ddi_root_node(), ndi_flags, i);
659 kmem_free(drv_name, MAXNAMELEN);
660 ddi_rele_driver(i);
661 rv = i_ddi_devs_attached(i);
662 modunload_enable();
664 i_ddi_di_cache_invalidate();
666 return ((rv == DDI_SUCCESS)? 0 : ENXIO);
668 case DINFOUSRLD:
670 * The case for copying snapshot to userland
672 if (di_setstate(st, IOC_COPY) == -1)
673 return (EBUSY);
675 map_size = DI_ALL_PTR(st)->map_size;
676 if (map_size == 0) {
677 (void) di_setstate(st, IOC_DONE);
678 return (EFAULT);
682 * copyout the snapshot
684 map_size = (map_size + PAGEOFFSET) & PAGEMASK;
687 * Return the map size, so caller may do a sanity
688 * check against the return value of snapshot ioctl()
690 *rvalp = (int)map_size;
693 * Copy one chunk at a time
695 off = 0;
696 dcp = st->memlist;
697 while (map_size) {
698 size = dcp->buf_size;
699 if (map_size <= size) {
700 size = map_size;
703 if (ddi_copyout(di_mem_addr(st, off),
704 (void *)(arg + off), size, mode) != 0) {
705 (void) di_setstate(st, IOC_DONE);
706 return (EFAULT);
709 map_size -= size;
710 off += size;
711 dcp = dcp->next;
714 di_freemem(st);
715 (void) di_setstate(st, IOC_IDLE);
716 return (0);
718 default:
719 if ((cmd & ~DIIOC_MASK) != DIIOC) {
721 * Invalid ioctl command
723 return (ENOTTY);
726 * take a snapshot
728 st->command = cmd & DIIOC_MASK;
729 /*FALLTHROUGH*/
733 * Obtain enough memory to hold header + rootpath. We prevent kernel
734 * memory exhaustion by freeing any previously allocated snapshot and
735 * refusing the operation; otherwise we would be allowing ioctl(),
736 * ioctl(), ioctl(), ..., panic.
738 if (di_setstate(st, IOC_SNAP) == -1)
739 return (EBUSY);
742 * Initial memlist always holds di_all and the root_path - and
743 * is at least a page and size.
745 size = sizeof (struct di_all) +
746 sizeof (((struct dinfo_io *)(NULL))->root_path);
747 if (size < PAGESIZE)
748 size = PAGESIZE;
749 off = di_checkmem(st, 0, size);
750 all = DI_ALL_PTR(st);
751 off += sizeof (struct di_all); /* real length of di_all */
753 all->devcnt = devcnt;
754 all->command = st->command;
755 all->version = DI_SNAPSHOT_VERSION;
756 all->top_vhci_devinfo = 0; /* filled by build_vhci_list. */
759 * Note the endianness in case we need to transport snapshot
760 * over the network.
762 #if defined(_LITTLE_ENDIAN)
763 all->endianness = DI_LITTLE_ENDIAN;
764 #else
765 all->endianness = DI_BIG_ENDIAN;
766 #endif
768 /* Copyin ioctl args, store in the snapshot. */
769 if (copyinstr((void *)arg, all->req_path,
770 sizeof (((struct dinfo_io *)(NULL))->root_path), &size) != 0) {
771 di_freemem(st);
772 (void) di_setstate(st, IOC_IDLE);
773 return (EFAULT);
775 (void) strcpy(all->root_path, all->req_path);
776 off += size; /* real length of root_path */
778 if ((st->command & DINFOCLEANUP) && !DEVICES_FILES_CLEANABLE(st)) {
779 di_freemem(st);
780 (void) di_setstate(st, IOC_IDLE);
781 return (EINVAL);
784 error = 0;
785 if ((st->command & DINFOCACHE) && !cache_args_valid(st, &error)) {
786 di_freemem(st);
787 (void) di_setstate(st, IOC_IDLE);
788 return (error);
792 * Only the fully enabled version may force load drivers or read
793 * the parent private data from a driver.
795 if ((st->command & (DINFOPRIVDATA | DINFOFORCE)) != 0 &&
796 DI_UNPRIVILEGED_NODE(m)) {
797 di_freemem(st);
798 (void) di_setstate(st, IOC_IDLE);
799 return (EACCES);
802 /* Do we need private data? */
803 if (st->command & DINFOPRIVDATA) {
804 arg += sizeof (((struct dinfo_io *)(NULL))->root_path);
806 #ifdef _MULTI_DATAMODEL
807 switch (ddi_model_convert_from(mode & FMODELS)) {
808 case DDI_MODEL_ILP32: {
810 * Cannot copy private data from 64-bit kernel
811 * to 32-bit app
813 di_freemem(st);
814 (void) di_setstate(st, IOC_IDLE);
815 return (EINVAL);
817 case DDI_MODEL_NONE:
818 if ((off = di_copyformat(off, st, arg, mode)) == 0) {
819 di_freemem(st);
820 (void) di_setstate(st, IOC_IDLE);
821 return (EFAULT);
823 break;
825 #else /* !_MULTI_DATAMODEL */
826 if ((off = di_copyformat(off, st, arg, mode)) == 0) {
827 di_freemem(st);
828 (void) di_setstate(st, IOC_IDLE);
829 return (EFAULT);
831 #endif /* _MULTI_DATAMODEL */
834 all->top_devinfo = DI_ALIGN(off);
837 * For cache lookups we reallocate memory from scratch,
838 * so the value of "all" is no longer valid.
840 all = NULL;
842 if (st->command & DINFOCACHE) {
843 *rvalp = di_cache_lookup(st);
844 } else if (snapshot_is_cacheable(st)) {
845 DI_CACHE_LOCK(di_cache);
846 *rvalp = di_cache_update(st);
847 DI_CACHE_UNLOCK(di_cache);
848 } else
849 *rvalp = di_snapshot_and_clean(st);
851 if (*rvalp) {
852 DI_ALL_PTR(st)->map_size = *rvalp;
853 (void) di_setstate(st, IOC_DONE);
854 } else {
855 di_freemem(st);
856 (void) di_setstate(st, IOC_IDLE);
859 return (0);
863 * Get a chunk of memory >= size, for the snapshot
865 static void
866 di_allocmem(struct di_state *st, size_t size)
868 struct di_mem *mem = kmem_zalloc(sizeof (struct di_mem), KM_SLEEP);
871 * Round up size to nearest power of 2. If it is less
872 * than st->mem_size, set it to st->mem_size (i.e.,
873 * the mem_size is doubled every time) to reduce the
874 * number of memory allocations.
876 size_t tmp = 1;
877 while (tmp < size) {
878 tmp <<= 1;
880 size = (tmp > st->mem_size) ? tmp : st->mem_size;
882 mem->buf = ddi_umem_alloc(size, DDI_UMEM_SLEEP, &mem->cook);
883 mem->buf_size = size;
885 dcmn_err2((CE_CONT, "di_allocmem: mem_size=%x\n", st->mem_size));
887 if (st->mem_size == 0) { /* first chunk */
888 st->memlist = mem;
889 } else {
891 * locate end of linked list and add a chunk at the end
893 struct di_mem *dcp = st->memlist;
894 while (dcp->next != NULL) {
895 dcp = dcp->next;
898 dcp->next = mem;
901 st->mem_size += size;
905 * Copy upto bufsiz bytes of the memlist to buf
907 static void
908 di_copymem(struct di_state *st, caddr_t buf, size_t bufsiz)
910 struct di_mem *dcp;
911 size_t copysz;
913 if (st->mem_size == 0) {
914 ASSERT(st->memlist == NULL);
915 return;
918 copysz = 0;
919 for (dcp = st->memlist; dcp; dcp = dcp->next) {
921 ASSERT(bufsiz > 0);
923 if (bufsiz <= dcp->buf_size)
924 copysz = bufsiz;
925 else
926 copysz = dcp->buf_size;
928 bcopy(dcp->buf, buf, copysz);
930 buf += copysz;
931 bufsiz -= copysz;
933 if (bufsiz == 0)
934 break;
939 * Free all memory for the snapshot
941 static void
942 di_freemem(struct di_state *st)
944 struct di_mem *dcp, *tmp;
946 dcmn_err2((CE_CONT, "di_freemem\n"));
948 if (st->mem_size) {
949 dcp = st->memlist;
950 while (dcp) { /* traverse the linked list */
951 tmp = dcp;
952 dcp = dcp->next;
953 ddi_umem_free(tmp->cook);
954 kmem_free(tmp, sizeof (struct di_mem));
956 st->mem_size = 0;
957 st->memlist = NULL;
960 ASSERT(st->mem_size == 0);
961 ASSERT(st->memlist == NULL);
965 * Copies cached data to the di_state structure.
966 * Returns:
967 * - size of data copied, on SUCCESS
968 * - 0 on failure
970 static int
971 di_cache2mem(struct di_cache *cache, struct di_state *st)
973 caddr_t pa;
975 ASSERT(st->mem_size == 0);
976 ASSERT(st->memlist == NULL);
977 ASSERT(!servicing_interrupt());
978 ASSERT(DI_CACHE_LOCKED(*cache));
980 if (cache->cache_size == 0) {
981 ASSERT(cache->cache_data == NULL);
982 CACHE_DEBUG((DI_ERR, "Empty cache. Skipping copy"));
983 return (0);
986 ASSERT(cache->cache_data);
988 di_allocmem(st, cache->cache_size);
990 pa = di_mem_addr(st, 0);
992 ASSERT(pa);
995 * Verify that di_allocmem() allocates contiguous memory,
996 * so that it is safe to do straight bcopy()
998 ASSERT(st->memlist != NULL);
999 ASSERT(st->memlist->next == NULL);
1000 bcopy(cache->cache_data, pa, cache->cache_size);
1002 return (cache->cache_size);
1006 * Copies a snapshot from di_state to the cache
1007 * Returns:
1008 * - 0 on failure
1009 * - size of copied data on success
1011 static size_t
1012 di_mem2cache(struct di_state *st, struct di_cache *cache)
1014 size_t map_size;
1016 ASSERT(cache->cache_size == 0);
1017 ASSERT(cache->cache_data == NULL);
1018 ASSERT(!servicing_interrupt());
1019 ASSERT(DI_CACHE_LOCKED(*cache));
1021 if (st->mem_size == 0) {
1022 ASSERT(st->memlist == NULL);
1023 CACHE_DEBUG((DI_ERR, "Empty memlist. Skipping copy"));
1024 return (0);
1027 ASSERT(st->memlist);
1030 * The size of the memory list may be much larger than the
1031 * size of valid data (map_size). Cache only the valid data
1033 map_size = DI_ALL_PTR(st)->map_size;
1034 if (map_size == 0 || map_size < sizeof (struct di_all) ||
1035 map_size > st->mem_size) {
1036 CACHE_DEBUG((DI_ERR, "cannot cache: bad size: 0x%x", map_size));
1037 return (0);
1040 cache->cache_data = kmem_alloc(map_size, KM_SLEEP);
1041 cache->cache_size = map_size;
1042 di_copymem(st, cache->cache_data, cache->cache_size);
1044 return (map_size);
1048 * Make sure there is at least "size" bytes memory left before
1049 * going on. Otherwise, start on a new chunk.
1051 static di_off_t
1052 di_checkmem(struct di_state *st, di_off_t off, size_t size)
1054 dcmn_err3((CE_CONT, "di_checkmem: off=%x size=%x\n",
1055 off, (int)size));
1058 * di_checkmem() shouldn't be called with a size of zero.
1059 * But in case it is, we want to make sure we return a valid
1060 * offset within the memlist and not an offset that points us
1061 * at the end of the memlist.
1063 if (size == 0) {
1064 dcmn_err((CE_WARN, "di_checkmem: invalid zero size used"));
1065 size = 1;
1068 off = DI_ALIGN(off);
1069 if ((st->mem_size - off) < size) {
1070 off = st->mem_size;
1071 di_allocmem(st, size);
1074 /* verify that return value is aligned */
1075 ASSERT(off == DI_ALIGN(off));
1076 return (off);
1080 * Copy the private data format from ioctl arg.
1081 * On success, the ending offset is returned. On error 0 is returned.
1083 static di_off_t
1084 di_copyformat(di_off_t off, struct di_state *st, intptr_t arg, int mode)
1086 di_off_t size;
1087 struct di_priv_data *priv;
1088 struct di_all *all = DI_ALL_PTR(st);
1090 dcmn_err2((CE_CONT, "di_copyformat: off=%x, arg=%p mode=%x\n",
1091 off, (void *)arg, mode));
1094 * Copyin data and check version.
1095 * We only handle private data version 0.
1097 priv = kmem_alloc(sizeof (struct di_priv_data), KM_SLEEP);
1098 if ((ddi_copyin((void *)arg, priv, sizeof (struct di_priv_data),
1099 mode) != 0) || (priv->version != DI_PRIVDATA_VERSION_0)) {
1100 kmem_free(priv, sizeof (struct di_priv_data));
1101 return (0);
1105 * Save di_priv_data copied from userland in snapshot.
1107 all->pd_version = priv->version;
1108 all->n_ppdata = priv->n_parent;
1109 all->n_dpdata = priv->n_driver;
1112 * copyin private data format, modify offset accordingly
1114 if (all->n_ppdata) { /* parent private data format */
1116 * check memory
1118 size = all->n_ppdata * sizeof (struct di_priv_format);
1119 all->ppdata_format = off = di_checkmem(st, off, size);
1120 if (ddi_copyin(priv->parent, di_mem_addr(st, off), size,
1121 mode) != 0) {
1122 kmem_free(priv, sizeof (struct di_priv_data));
1123 return (0);
1126 off += size;
1129 if (all->n_dpdata) { /* driver private data format */
1131 * check memory
1133 size = all->n_dpdata * sizeof (struct di_priv_format);
1134 all->dpdata_format = off = di_checkmem(st, off, size);
1135 if (ddi_copyin(priv->driver, di_mem_addr(st, off), size,
1136 mode) != 0) {
1137 kmem_free(priv, sizeof (struct di_priv_data));
1138 return (0);
1141 off += size;
1144 kmem_free(priv, sizeof (struct di_priv_data));
1145 return (off);
1149 * Return the real address based on the offset (off) within snapshot
1151 static void *
1152 di_mem_addr(struct di_state *st, di_off_t off)
1154 struct di_mem *dcp = st->memlist;
1156 dcmn_err3((CE_CONT, "di_mem_addr: dcp=%p off=%x\n",
1157 (void *)dcp, off));
1159 ASSERT(off < st->mem_size);
1161 while (off >= dcp->buf_size) {
1162 off -= dcp->buf_size;
1163 dcp = dcp->next;
1166 dcmn_err3((CE_CONT, "di_mem_addr: new off=%x, return = %p\n",
1167 off, (void *)(dcp->buf + off)));
1169 return (dcp->buf + off);
1173 * Ideally we would use the whole key to derive the hash
1174 * value. However, the probability that two keys will
1175 * have the same dip (or pip) is very low, so
1176 * hashing by dip (or pip) pointer should suffice.
1178 static uint_t
1179 di_hash_byptr(void *arg, mod_hash_key_t key)
1181 struct di_key *dik = key;
1182 size_t rshift;
1183 void *ptr;
1185 ASSERT(arg == NULL);
1187 switch (dik->k_type) {
1188 case DI_DKEY:
1189 ptr = dik->k_u.dkey.dk_dip;
1190 rshift = highbit(sizeof (struct dev_info));
1191 break;
1192 case DI_PKEY:
1193 ptr = dik->k_u.pkey.pk_pip;
1194 rshift = highbit(sizeof (struct mdi_pathinfo));
1195 break;
1196 default:
1197 panic("devinfo: unknown key type");
1198 /*NOTREACHED*/
1200 return (mod_hash_byptr((void *)rshift, ptr));
1203 static void
1204 di_key_dtor(mod_hash_key_t key)
1206 char *path_addr;
1207 struct di_key *dik = key;
1209 switch (dik->k_type) {
1210 case DI_DKEY:
1211 break;
1212 case DI_PKEY:
1213 path_addr = dik->k_u.pkey.pk_path_addr;
1214 if (path_addr)
1215 kmem_free(path_addr, strlen(path_addr) + 1);
1216 break;
1217 default:
1218 panic("devinfo: unknown key type");
1219 /*NOTREACHED*/
1222 kmem_free(dik, sizeof (struct di_key));
1225 static int
1226 di_dkey_cmp(struct di_dkey *dk1, struct di_dkey *dk2)
1228 if (dk1->dk_dip != dk2->dk_dip)
1229 return (dk1->dk_dip > dk2->dk_dip ? 1 : -1);
1231 if (dk1->dk_major != DDI_MAJOR_T_NONE &&
1232 dk2->dk_major != DDI_MAJOR_T_NONE) {
1233 if (dk1->dk_major != dk2->dk_major)
1234 return (dk1->dk_major > dk2->dk_major ? 1 : -1);
1236 if (dk1->dk_inst != dk2->dk_inst)
1237 return (dk1->dk_inst > dk2->dk_inst ? 1 : -1);
1240 if (dk1->dk_nodeid != dk2->dk_nodeid)
1241 return (dk1->dk_nodeid > dk2->dk_nodeid ? 1 : -1);
1243 return (0);
1246 static int
1247 di_pkey_cmp(struct di_pkey *pk1, struct di_pkey *pk2)
1249 char *p1, *p2;
1250 int rv;
1252 if (pk1->pk_pip != pk2->pk_pip)
1253 return (pk1->pk_pip > pk2->pk_pip ? 1 : -1);
1255 p1 = pk1->pk_path_addr;
1256 p2 = pk2->pk_path_addr;
1258 p1 = p1 ? p1 : "";
1259 p2 = p2 ? p2 : "";
1261 rv = strcmp(p1, p2);
1262 if (rv)
1263 return (rv > 0 ? 1 : -1);
1265 if (pk1->pk_client != pk2->pk_client)
1266 return (pk1->pk_client > pk2->pk_client ? 1 : -1);
1268 if (pk1->pk_phci != pk2->pk_phci)
1269 return (pk1->pk_phci > pk2->pk_phci ? 1 : -1);
1271 return (0);
1274 static int
1275 di_key_cmp(mod_hash_key_t key1, mod_hash_key_t key2)
1277 struct di_key *dik1, *dik2;
1279 dik1 = key1;
1280 dik2 = key2;
1282 if (dik1->k_type != dik2->k_type) {
1283 panic("devinfo: mismatched keys");
1284 /*NOTREACHED*/
1287 switch (dik1->k_type) {
1288 case DI_DKEY:
1289 return (di_dkey_cmp(&(dik1->k_u.dkey), &(dik2->k_u.dkey)));
1290 case DI_PKEY:
1291 return (di_pkey_cmp(&(dik1->k_u.pkey), &(dik2->k_u.pkey)));
1292 default:
1293 panic("devinfo: unknown key type");
1294 /*NOTREACHED*/
1298 static void
1299 di_copy_aliases(struct di_state *st, alias_pair_t *apair, di_off_t *offp)
1301 di_off_t off;
1302 struct di_all *all = DI_ALL_PTR(st);
1303 struct di_alias *di_alias;
1304 di_off_t curroff;
1305 dev_info_t *currdip;
1306 size_t size;
1308 currdip = NULL;
1309 if (resolve_pathname(apair->pair_alias, &currdip, NULL, NULL) != 0) {
1310 return;
1313 if (di_dip_find(st, currdip, &curroff) != 0) {
1314 ndi_rele_devi(currdip);
1315 return;
1317 ndi_rele_devi(currdip);
1319 off = *offp;
1320 size = sizeof (struct di_alias);
1321 size += strlen(apair->pair_alias) + 1;
1322 off = di_checkmem(st, off, size);
1323 di_alias = DI_ALIAS(di_mem_addr(st, off));
1325 di_alias->self = off;
1326 di_alias->next = all->aliases;
1327 all->aliases = off;
1328 (void) strcpy(di_alias->alias, apair->pair_alias);
1329 di_alias->curroff = curroff;
1331 off += size;
1333 *offp = off;
1337 * This is the main function that takes a snapshot
1339 static di_off_t
1340 di_snapshot(struct di_state *st)
1342 di_off_t off;
1343 struct di_all *all;
1344 dev_info_t *rootnode;
1345 char buf[80];
1346 int plen;
1347 char *path;
1348 vnode_t *vp;
1349 int i;
1351 all = DI_ALL_PTR(st);
1352 dcmn_err((CE_CONT, "Taking a snapshot of devinfo tree...\n"));
1355 * Translate requested root path if an alias and snap-root != "/"
1357 if (ddi_aliases_present == B_TRUE && strcmp(all->root_path, "/") != 0) {
1358 /* If there is no redirected alias, use root_path as is */
1359 rootnode = ddi_alias_redirect(all->root_path);
1360 if (rootnode) {
1361 (void) ddi_pathname(rootnode, all->root_path);
1362 goto got_root;
1367 * Verify path before entrusting it to e_ddi_hold_devi_by_path because
1368 * some platforms have OBP bugs where executing the NDI_PROMNAME code
1369 * path against an invalid path results in panic. The lookupnameat
1370 * is done relative to rootdir without a leading '/' on "devices/"
1371 * to force the lookup to occur in the global zone.
1373 plen = strlen("devices/") + strlen(all->root_path) + 1;
1374 path = kmem_alloc(plen, KM_SLEEP);
1375 (void) snprintf(path, plen, "devices/%s", all->root_path);
1376 if (lookupnameat(path, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp, rootdir)) {
1377 dcmn_err((CE_CONT, "Devinfo node %s not found\n",
1378 all->root_path));
1379 kmem_free(path, plen);
1380 return (0);
1382 kmem_free(path, plen);
1383 VN_RELE(vp);
1386 * Hold the devinfo node referred by the path.
1388 rootnode = e_ddi_hold_devi_by_path(all->root_path, 0);
1389 if (rootnode == NULL) {
1390 dcmn_err((CE_CONT, "Devinfo node %s not found\n",
1391 all->root_path));
1392 return (0);
1395 got_root:
1396 (void) snprintf(buf, sizeof (buf),
1397 "devinfo registered dips (statep=%p)", (void *)st);
1399 st->reg_dip_hash = mod_hash_create_extended(buf, 64,
1400 di_key_dtor, mod_hash_null_valdtor, di_hash_byptr,
1401 NULL, di_key_cmp, KM_SLEEP);
1404 (void) snprintf(buf, sizeof (buf),
1405 "devinfo registered pips (statep=%p)", (void *)st);
1407 st->reg_pip_hash = mod_hash_create_extended(buf, 64,
1408 di_key_dtor, mod_hash_null_valdtor, di_hash_byptr,
1409 NULL, di_key_cmp, KM_SLEEP);
1411 if (DINFOHP & st->command) {
1412 list_create(&st->hp_list, sizeof (i_hp_t),
1413 offsetof(i_hp_t, hp_link));
1417 * copy the device tree
1419 off = di_copytree(DEVI(rootnode), &all->top_devinfo, st);
1421 if (DINFOPATH & st->command) {
1422 mdi_walk_vhcis(build_vhci_list, st);
1425 if (DINFOHP & st->command) {
1426 di_hotplug_children(st);
1429 ddi_release_devi(rootnode);
1432 * copy the devnames array
1434 all->devnames = off;
1435 off = di_copydevnm(&all->devnames, st);
1438 /* initialize the hash tables */
1439 st->lnode_count = 0;
1440 st->link_count = 0;
1442 if (DINFOLYR & st->command) {
1443 off = di_getlink_data(off, st);
1446 all->aliases = 0;
1447 if (ddi_aliases_present == B_FALSE)
1448 goto done;
1450 for (i = 0; i < ddi_aliases.dali_num_pairs; i++) {
1451 di_copy_aliases(st, &(ddi_aliases.dali_alias_pairs[i]), &off);
1454 done:
1456 * Free up hash tables
1458 mod_hash_destroy_hash(st->reg_dip_hash);
1459 mod_hash_destroy_hash(st->reg_pip_hash);
1462 * Record the timestamp now that we are done with snapshot.
1464 * We compute the checksum later and then only if we cache
1465 * the snapshot, since checksumming adds some overhead.
1466 * The checksum is checked later if we read the cache file.
1467 * from disk.
1469 * Set checksum field to 0 as CRC is calculated with that
1470 * field set to 0.
1472 all->snapshot_time = ddi_get_time();
1473 all->cache_checksum = 0;
1475 ASSERT(all->snapshot_time != 0);
1477 return (off);
1481 * Take a snapshot and clean /etc/devices files if DINFOCLEANUP is set
1483 static di_off_t
1484 di_snapshot_and_clean(struct di_state *st)
1486 di_off_t off;
1488 modunload_disable();
1489 off = di_snapshot(st);
1490 if (off != 0 && (st->command & DINFOCLEANUP)) {
1491 ASSERT(DEVICES_FILES_CLEANABLE(st));
1493 * Cleanup /etc/devices files:
1494 * In order to accurately account for the system configuration
1495 * in /etc/devices files, the appropriate drivers must be
1496 * fully configured before the cleanup starts.
1497 * So enable modunload only after the cleanup.
1499 i_ddi_clean_devices_files();
1501 * Remove backing store nodes for unused devices,
1502 * which retain past permissions customizations
1503 * and may be undesired for newly configured devices.
1505 dev_devices_cleanup();
1507 modunload_enable();
1509 return (off);
1513 * construct vhci linkage in the snapshot.
1515 static int
1516 build_vhci_list(dev_info_t *vh_devinfo, void *arg)
1518 struct di_all *all;
1519 struct di_node *me;
1520 struct di_state *st;
1521 di_off_t off;
1522 phci_walk_arg_t pwa;
1524 dcmn_err3((CE_CONT, "build_vhci list\n"));
1526 dcmn_err3((CE_CONT, "vhci node %s%d\n",
1527 ddi_driver_name(vh_devinfo), ddi_get_instance(vh_devinfo)));
1529 st = (struct di_state *)arg;
1530 if (di_dip_find(st, vh_devinfo, &off) != 0) {
1531 dcmn_err((CE_WARN, "di_dip_find error for the given node\n"));
1532 return (DDI_WALK_TERMINATE);
1535 dcmn_err3((CE_CONT, "st->mem_size: %d vh_devinfo off: 0x%x\n",
1536 st->mem_size, off));
1538 all = DI_ALL_PTR(st);
1539 if (all->top_vhci_devinfo == 0) {
1540 all->top_vhci_devinfo = off;
1541 } else {
1542 me = DI_NODE(di_mem_addr(st, all->top_vhci_devinfo));
1544 while (me->next_vhci != 0) {
1545 me = DI_NODE(di_mem_addr(st, me->next_vhci));
1548 me->next_vhci = off;
1551 pwa.off = off;
1552 pwa.st = st;
1553 mdi_vhci_walk_phcis(vh_devinfo, build_phci_list, &pwa);
1555 return (DDI_WALK_CONTINUE);
1559 * construct phci linkage for the given vhci in the snapshot.
1561 static int
1562 build_phci_list(dev_info_t *ph_devinfo, void *arg)
1564 struct di_node *vh_di_node;
1565 struct di_node *me;
1566 phci_walk_arg_t *pwa;
1567 di_off_t off;
1569 pwa = (phci_walk_arg_t *)arg;
1571 dcmn_err3((CE_CONT, "build_phci list for vhci at offset: 0x%x\n",
1572 pwa->off));
1574 vh_di_node = DI_NODE(di_mem_addr(pwa->st, pwa->off));
1575 if (di_dip_find(pwa->st, ph_devinfo, &off) != 0) {
1576 dcmn_err((CE_WARN, "di_dip_find error for the given node\n"));
1577 return (DDI_WALK_TERMINATE);
1580 dcmn_err3((CE_CONT, "phci node %s%d, at offset 0x%x\n",
1581 ddi_driver_name(ph_devinfo), ddi_get_instance(ph_devinfo), off));
1583 if (vh_di_node->top_phci == 0) {
1584 vh_di_node->top_phci = off;
1585 return (DDI_WALK_CONTINUE);
1588 me = DI_NODE(di_mem_addr(pwa->st, vh_di_node->top_phci));
1590 while (me->next_phci != 0) {
1591 me = DI_NODE(di_mem_addr(pwa->st, me->next_phci));
1593 me->next_phci = off;
1595 return (DDI_WALK_CONTINUE);
1599 * Assumes all devinfo nodes in device tree have been snapshotted
1601 static void
1602 snap_driver_list(struct di_state *st, struct devnames *dnp, di_off_t *off_p)
1604 struct dev_info *node;
1605 struct di_node *me;
1606 di_off_t off;
1608 ASSERT(mutex_owned(&dnp->dn_lock));
1610 node = DEVI(dnp->dn_head);
1611 for (; node; node = node->devi_next) {
1612 if (di_dip_find(st, (dev_info_t *)node, &off) != 0)
1613 continue;
1615 ASSERT(off > 0);
1616 me = DI_NODE(di_mem_addr(st, off));
1617 ASSERT(me->next == 0 || me->next == -1);
1619 * Only nodes which were BOUND when they were
1620 * snapshotted will be added to per-driver list.
1622 if (me->next != -1)
1623 continue;
1625 *off_p = off;
1626 off_p = &me->next;
1629 *off_p = 0;
1633 * Copy the devnames array, so we have a list of drivers in the snapshot.
1634 * Also makes it possible to locate the per-driver devinfo nodes.
1636 static di_off_t
1637 di_copydevnm(di_off_t *off_p, struct di_state *st)
1639 int i;
1640 di_off_t off;
1641 size_t size;
1642 struct di_devnm *dnp;
1644 dcmn_err2((CE_CONT, "di_copydevnm: *off_p = %p\n", (void *)off_p));
1647 * make sure there is some allocated memory
1649 size = devcnt * sizeof (struct di_devnm);
1650 *off_p = off = di_checkmem(st, *off_p, size);
1651 dnp = DI_DEVNM(di_mem_addr(st, off));
1652 off += size;
1654 dcmn_err((CE_CONT, "Start copying devnamesp[%d] at offset 0x%x\n",
1655 devcnt, off));
1657 for (i = 0; i < devcnt; i++) {
1658 if (devnamesp[i].dn_name == NULL) {
1659 continue;
1663 * dn_name is not freed during driver unload or removal.
1665 * There is a race condition when make_devname() changes
1666 * dn_name during our strcpy. This should be rare since
1667 * only add_drv does this. At any rate, we never had a
1668 * problem with ddi_name_to_major(), which should have
1669 * the same problem.
1671 dcmn_err2((CE_CONT, "di_copydevnm: %s%d, off=%x\n",
1672 devnamesp[i].dn_name, devnamesp[i].dn_instance, off));
1674 size = strlen(devnamesp[i].dn_name) + 1;
1675 dnp[i].name = off = di_checkmem(st, off, size);
1676 (void) strcpy((char *)di_mem_addr(st, off),
1677 devnamesp[i].dn_name);
1678 off += size;
1680 mutex_enter(&devnamesp[i].dn_lock);
1683 * Snapshot per-driver node list
1685 snap_driver_list(st, &devnamesp[i], &dnp[i].head);
1688 * This is not used by libdevinfo, leave it for now
1690 dnp[i].flags = devnamesp[i].dn_flags;
1691 dnp[i].instance = devnamesp[i].dn_instance;
1694 * get global properties
1696 if ((DINFOPROP & st->command) &&
1697 devnamesp[i].dn_global_prop_ptr) {
1698 dnp[i].global_prop = off;
1699 off = di_getprop(DI_PROP_GLB_LIST,
1700 &devnamesp[i].dn_global_prop_ptr->prop_list,
1701 &dnp[i].global_prop, st, NULL);
1705 * Bit encode driver ops: & bus_ops, cb_ops, & cb_ops->cb_str
1707 if (CB_DRV_INSTALLED(devopsp[i])) {
1708 if (devopsp[i]->devo_cb_ops) {
1709 dnp[i].ops |= DI_CB_OPS;
1710 if (devopsp[i]->devo_cb_ops->cb_str)
1711 dnp[i].ops |= DI_STREAM_OPS;
1713 if (NEXUS_DRV(devopsp[i])) {
1714 dnp[i].ops |= DI_BUS_OPS;
1718 mutex_exit(&devnamesp[i].dn_lock);
1721 dcmn_err((CE_CONT, "End copying devnamesp at offset 0x%x\n", off));
1723 return (off);
1727 * Copy the kernel devinfo tree. The tree and the devnames array forms
1728 * the entire snapshot (see also di_copydevnm).
1730 static di_off_t
1731 di_copytree(struct dev_info *root, di_off_t *off_p, struct di_state *st)
1733 di_off_t off;
1734 struct dev_info *node;
1735 struct di_stack *dsp = kmem_zalloc(sizeof (struct di_stack), KM_SLEEP);
1737 dcmn_err((CE_CONT, "di_copytree: root = %p, *off_p = %x\n",
1738 (void *)root, *off_p));
1740 /* force attach drivers */
1741 if (i_ddi_devi_attached((dev_info_t *)root) &&
1742 (st->command & DINFOSUBTREE) && (st->command & DINFOFORCE)) {
1743 (void) ndi_devi_config((dev_info_t *)root,
1744 NDI_CONFIG | NDI_DEVI_PERSIST | NDI_NO_EVENT |
1745 NDI_DRV_CONF_REPROBE);
1749 * Push top_devinfo onto a stack
1751 * The stack is necessary to avoid recursion, which can overrun
1752 * the kernel stack.
1754 PUSH_STACK(dsp, root, off_p);
1757 * As long as there is a node on the stack, copy the node.
1758 * di_copynode() is responsible for pushing and popping
1759 * child and sibling nodes on the stack.
1761 while (!EMPTY_STACK(dsp)) {
1762 node = TOP_NODE(dsp);
1763 off = di_copynode(node, dsp, st);
1767 * Free the stack structure
1769 kmem_free(dsp, sizeof (struct di_stack));
1771 return (off);
1775 * This is the core function, which copies all data associated with a single
1776 * node into the snapshot. The amount of information is determined by the
1777 * ioctl command.
1779 static di_off_t
1780 di_copynode(struct dev_info *node, struct di_stack *dsp, struct di_state *st)
1782 di_off_t off;
1783 struct di_node *me;
1784 size_t size;
1785 struct dev_info *n;
1787 dcmn_err2((CE_CONT, "di_copynode: depth = %x\n", dsp->depth));
1788 ASSERT((node != NULL) && (node == TOP_NODE(dsp)));
1791 * check memory usage, and fix offsets accordingly.
1793 size = sizeof (struct di_node);
1794 *(TOP_OFFSET(dsp)) = off = di_checkmem(st, *(TOP_OFFSET(dsp)), size);
1795 me = DI_NODE(di_mem_addr(st, off));
1796 me->self = off;
1797 off += size;
1799 dcmn_err((CE_CONT, "copy node %s, instance #%d, at offset 0x%x\n",
1800 node->devi_node_name, node->devi_instance, off));
1803 * Node parameters:
1804 * self -- offset of current node within snapshot
1805 * nodeid -- pointer to PROM node (tri-valued)
1806 * state -- hot plugging device state
1807 * node_state -- devinfo node state
1809 me->instance = node->devi_instance;
1810 me->nodeid = node->devi_nodeid;
1811 me->node_class = node->devi_node_class;
1812 me->attributes = node->devi_node_attributes;
1813 me->state = node->devi_state;
1814 me->flags = node->devi_flags;
1815 me->node_state = node->devi_node_state;
1816 me->next_vhci = 0; /* Filled up by build_vhci_list. */
1817 me->top_phci = 0; /* Filled up by build_phci_list. */
1818 me->next_phci = 0; /* Filled up by build_phci_list. */
1819 me->multipath_component = MULTIPATH_COMPONENT_NONE; /* set default. */
1820 me->user_private_data = NULL;
1823 * Get parent's offset in snapshot from the stack
1824 * and store it in the current node
1826 if (dsp->depth > 1) {
1827 me->parent = *(PARENT_OFFSET(dsp));
1831 * Save the offset of this di_node in a hash table.
1832 * This is used later to resolve references to this
1833 * dip from other parts of the tree (per-driver list,
1834 * multipathing linkages, layered usage linkages).
1835 * The key used for the hash table is derived from
1836 * information in the dip.
1838 di_register_dip(st, (dev_info_t *)node, me->self);
1840 #ifdef DEVID_COMPATIBILITY
1841 /* check for devid as property marker */
1842 if (node->devi_devid_str) {
1843 ddi_devid_t devid;
1846 * The devid is now represented as a property. For
1847 * compatibility with di_devid() interface in libdevinfo we
1848 * must return it as a binary structure in the snapshot. When
1849 * (if) di_devid() is removed from libdevinfo then the code
1850 * related to DEVID_COMPATIBILITY can be removed.
1852 if (ddi_devid_str_decode(node->devi_devid_str, &devid, NULL) ==
1853 DDI_SUCCESS) {
1854 size = ddi_devid_sizeof(devid);
1855 off = di_checkmem(st, off, size);
1856 me->devid = off;
1857 bcopy(devid, di_mem_addr(st, off), size);
1858 off += size;
1859 ddi_devid_free(devid);
1862 #endif /* DEVID_COMPATIBILITY */
1864 if (node->devi_node_name) {
1865 size = strlen(node->devi_node_name) + 1;
1866 me->node_name = off = di_checkmem(st, off, size);
1867 (void) strcpy(di_mem_addr(st, off), node->devi_node_name);
1868 off += size;
1871 if (node->devi_compat_names && (node->devi_compat_length > 1)) {
1872 size = node->devi_compat_length;
1873 me->compat_names = off = di_checkmem(st, off, size);
1874 me->compat_length = (int)size;
1875 bcopy(node->devi_compat_names, di_mem_addr(st, off), size);
1876 off += size;
1879 if (node->devi_addr) {
1880 size = strlen(node->devi_addr) + 1;
1881 me->address = off = di_checkmem(st, off, size);
1882 (void) strcpy(di_mem_addr(st, off), node->devi_addr);
1883 off += size;
1886 if (node->devi_binding_name) {
1887 size = strlen(node->devi_binding_name) + 1;
1888 me->bind_name = off = di_checkmem(st, off, size);
1889 (void) strcpy(di_mem_addr(st, off), node->devi_binding_name);
1890 off += size;
1893 me->drv_major = node->devi_major;
1896 * If the dip is BOUND, set the next pointer of the
1897 * per-instance list to -1, indicating that it is yet to be resolved.
1898 * This will be resolved later in snap_driver_list().
1900 if (me->drv_major != -1) {
1901 me->next = -1;
1902 } else {
1903 me->next = 0;
1907 * An optimization to skip mutex_enter when not needed.
1909 if (!((DINFOMINOR | DINFOPROP | DINFOPATH | DINFOHP) & st->command)) {
1910 goto priv_data;
1914 * LOCKING: We already have an active ndi_devi_enter to gather the
1915 * minor data, and we will take devi_lock to gather properties as
1916 * needed off di_getprop.
1918 if (!(DINFOMINOR & st->command)) {
1919 goto path;
1922 ASSERT(DEVI_BUSY_OWNED(node));
1923 if (node->devi_minor) { /* minor data */
1924 me->minor_data = off;
1925 off = di_getmdata(node->devi_minor, &me->minor_data,
1926 me->self, st);
1929 path:
1930 if (!(DINFOPATH & st->command)) {
1931 goto property;
1934 if (MDI_VHCI(node)) {
1935 me->multipath_component = MULTIPATH_COMPONENT_VHCI;
1938 if (MDI_CLIENT(node)) {
1939 me->multipath_component = MULTIPATH_COMPONENT_CLIENT;
1940 me->multipath_client = off;
1941 off = di_getpath_data((dev_info_t *)node, &me->multipath_client,
1942 me->self, st, 1);
1943 dcmn_err((CE_WARN, "me->multipath_client = %x for node %p "
1944 "component type = %d. off=%d",
1945 me->multipath_client,
1946 (void *)node, node->devi_mdi_component, off));
1949 if (MDI_PHCI(node)) {
1950 me->multipath_component = MULTIPATH_COMPONENT_PHCI;
1951 me->multipath_phci = off;
1952 off = di_getpath_data((dev_info_t *)node, &me->multipath_phci,
1953 me->self, st, 0);
1954 dcmn_err((CE_WARN, "me->multipath_phci = %x for node %p "
1955 "component type = %d. off=%d",
1956 me->multipath_phci,
1957 (void *)node, node->devi_mdi_component, off));
1960 property:
1961 if (!(DINFOPROP & st->command)) {
1962 goto hotplug_data;
1965 if (node->devi_drv_prop_ptr) { /* driver property list */
1966 me->drv_prop = off;
1967 off = di_getprop(DI_PROP_DRV_LIST, &node->devi_drv_prop_ptr,
1968 &me->drv_prop, st, node);
1971 if (node->devi_sys_prop_ptr) { /* system property list */
1972 me->sys_prop = off;
1973 off = di_getprop(DI_PROP_SYS_LIST, &node->devi_sys_prop_ptr,
1974 &me->sys_prop, st, node);
1977 if (node->devi_hw_prop_ptr) { /* hardware property list */
1978 me->hw_prop = off;
1979 off = di_getprop(DI_PROP_HW_LIST, &node->devi_hw_prop_ptr,
1980 &me->hw_prop, st, node);
1983 if (node->devi_global_prop_list == NULL) {
1984 me->glob_prop = (di_off_t)-1; /* not global property */
1985 } else {
1987 * Make copy of global property list if this devinfo refers
1988 * global properties different from what's on the devnames
1989 * array. It can happen if there has been a forced
1990 * driver.conf update. See mod_drv(1M).
1992 ASSERT(me->drv_major != -1);
1993 if (node->devi_global_prop_list !=
1994 devnamesp[me->drv_major].dn_global_prop_ptr) {
1995 me->glob_prop = off;
1996 off = di_getprop(DI_PROP_GLB_LIST,
1997 &node->devi_global_prop_list->prop_list,
1998 &me->glob_prop, st, node);
2002 hotplug_data:
2003 if (!(DINFOHP & st->command)) {
2004 goto priv_data;
2007 if (node->devi_hp_hdlp) { /* hotplug data */
2008 me->hp_data = off;
2009 off = di_gethpdata(node->devi_hp_hdlp, &me->hp_data, st);
2012 priv_data:
2013 if (!(DINFOPRIVDATA & st->command)) {
2014 goto pm_info;
2017 if (ddi_get_parent_data((dev_info_t *)node) != NULL) {
2018 me->parent_data = off;
2019 off = di_getppdata(node, &me->parent_data, st);
2022 if (ddi_get_driver_private((dev_info_t *)node) != NULL) {
2023 me->driver_data = off;
2024 off = di_getdpdata(node, &me->driver_data, st);
2027 pm_info: /* NOT implemented */
2029 subtree:
2030 /* keep the stack aligned */
2031 off = DI_ALIGN(off);
2033 if (!(DINFOSUBTREE & st->command)) {
2034 POP_STACK(dsp);
2035 return (off);
2038 child:
2040 * If there is a visible child--push child onto stack.
2041 * Hold the parent (me) busy while doing so.
2043 if ((n = node->devi_child) != NULL) {
2044 /* skip hidden nodes */
2045 while (n && ndi_dev_is_hidden_node((dev_info_t *)n))
2046 n = n->devi_sibling;
2047 if (n) {
2048 me->child = off;
2049 PUSH_STACK(dsp, n, &me->child);
2050 return (me->child);
2054 sibling:
2056 * Done with any child nodes, unroll the stack till a visible
2057 * sibling of a parent node is found or root node is reached.
2059 POP_STACK(dsp);
2060 while (!EMPTY_STACK(dsp)) {
2061 if ((n = node->devi_sibling) != NULL) {
2062 /* skip hidden nodes */
2063 while (n && ndi_dev_is_hidden_node((dev_info_t *)n))
2064 n = n->devi_sibling;
2065 if (n) {
2066 me->sibling = DI_ALIGN(off);
2067 PUSH_STACK(dsp, n, &me->sibling);
2068 return (me->sibling);
2071 node = TOP_NODE(dsp);
2072 me = DI_NODE(di_mem_addr(st, *(TOP_OFFSET(dsp))));
2073 POP_STACK(dsp);
2077 * DONE with all nodes
2079 return (off);
2082 static i_lnode_t *
2083 i_lnode_alloc(int modid)
2085 i_lnode_t *i_lnode;
2087 i_lnode = kmem_zalloc(sizeof (i_lnode_t), KM_SLEEP);
2089 ASSERT(modid != -1);
2090 i_lnode->modid = modid;
2092 return (i_lnode);
2095 static void
2096 i_lnode_free(i_lnode_t *i_lnode)
2098 kmem_free(i_lnode, sizeof (i_lnode_t));
2101 static void
2102 i_lnode_check_free(i_lnode_t *i_lnode)
2104 /* This lnode and its dip must have been snapshotted */
2105 ASSERT(i_lnode->self > 0);
2106 ASSERT(i_lnode->di_node->self > 0);
2108 /* at least 1 link (in or out) must exist for this lnode */
2109 ASSERT(i_lnode->link_in || i_lnode->link_out);
2111 i_lnode_free(i_lnode);
2114 static i_link_t *
2115 i_link_alloc(int spec_type)
2117 i_link_t *i_link;
2119 i_link = kmem_zalloc(sizeof (i_link_t), KM_SLEEP);
2120 i_link->spec_type = spec_type;
2122 return (i_link);
2125 static void
2126 i_link_check_free(i_link_t *i_link)
2128 /* This link must have been snapshotted */
2129 ASSERT(i_link->self > 0);
2131 /* Both endpoint lnodes must exist for this link */
2132 ASSERT(i_link->src_lnode);
2133 ASSERT(i_link->tgt_lnode);
2135 kmem_free(i_link, sizeof (i_link_t));
2138 /*ARGSUSED*/
2139 static uint_t
2140 i_lnode_hashfunc(void *arg, mod_hash_key_t key)
2142 i_lnode_t *i_lnode = (i_lnode_t *)key;
2143 struct di_node *ptr;
2144 dev_t dev;
2146 dev = i_lnode->devt;
2147 if (dev != DDI_DEV_T_NONE)
2148 return (i_lnode->modid + getminor(dev) + getmajor(dev));
2150 ptr = i_lnode->di_node;
2151 ASSERT(ptr->self > 0);
2152 if (ptr) {
2153 uintptr_t k = (uintptr_t)ptr;
2154 k >>= (int)highbit(sizeof (struct di_node));
2155 return ((uint_t)k);
2158 return (i_lnode->modid);
2161 static int
2162 i_lnode_cmp(void *arg1, void *arg2)
2164 i_lnode_t *i_lnode1 = (i_lnode_t *)arg1;
2165 i_lnode_t *i_lnode2 = (i_lnode_t *)arg2;
2167 if (i_lnode1->modid != i_lnode2->modid) {
2168 return ((i_lnode1->modid < i_lnode2->modid) ? -1 : 1);
2171 if (i_lnode1->di_node != i_lnode2->di_node)
2172 return ((i_lnode1->di_node < i_lnode2->di_node) ? -1 : 1);
2174 if (i_lnode1->devt != i_lnode2->devt)
2175 return ((i_lnode1->devt < i_lnode2->devt) ? -1 : 1);
2177 return (0);
2181 * An lnode represents a {dip, dev_t} tuple. A link represents a
2182 * {src_lnode, tgt_lnode, spec_type} tuple.
2183 * The following callback assumes that LDI framework ref-counts the
2184 * src_dip and tgt_dip while invoking this callback.
2186 static int
2187 di_ldi_callback(const ldi_usage_t *ldi_usage, void *arg)
2189 struct di_state *st = (struct di_state *)arg;
2190 i_lnode_t *src_lnode, *tgt_lnode, *i_lnode;
2191 i_link_t **i_link_next, *i_link;
2192 di_off_t soff, toff;
2193 mod_hash_val_t nodep = NULL;
2194 int res;
2197 * if the source or target of this device usage information doesn't
2198 * correspond to a device node then we don't report it via
2199 * libdevinfo so return.
2201 if ((ldi_usage->src_dip == NULL) || (ldi_usage->tgt_dip == NULL))
2202 return (LDI_USAGE_CONTINUE);
2204 ASSERT(e_ddi_devi_holdcnt(ldi_usage->src_dip));
2205 ASSERT(e_ddi_devi_holdcnt(ldi_usage->tgt_dip));
2208 * Skip the ldi_usage if either src or tgt dip is not in the
2209 * snapshot. This saves us from pruning bad lnodes/links later.
2211 if (di_dip_find(st, ldi_usage->src_dip, &soff) != 0)
2212 return (LDI_USAGE_CONTINUE);
2213 if (di_dip_find(st, ldi_usage->tgt_dip, &toff) != 0)
2214 return (LDI_USAGE_CONTINUE);
2216 ASSERT(soff > 0);
2217 ASSERT(toff > 0);
2220 * allocate an i_lnode and add it to the lnode hash
2221 * if it is not already present. For this particular
2222 * link the lnode is a source, but it may
2223 * participate as tgt or src in any number of layered
2224 * operations - so it may already be in the hash.
2226 i_lnode = i_lnode_alloc(ldi_usage->src_modid);
2227 i_lnode->di_node = DI_NODE(di_mem_addr(st, soff));
2228 i_lnode->devt = ldi_usage->src_devt;
2230 res = mod_hash_find(st->lnode_hash, i_lnode, &nodep);
2231 if (res == MH_ERR_NOTFOUND) {
2233 * new i_lnode
2234 * add it to the hash and increment the lnode count
2236 res = mod_hash_insert(st->lnode_hash, i_lnode, i_lnode);
2237 ASSERT(res == 0);
2238 st->lnode_count++;
2239 src_lnode = i_lnode;
2240 } else {
2241 /* this i_lnode already exists in the lnode_hash */
2242 i_lnode_free(i_lnode);
2243 src_lnode = (i_lnode_t *)nodep;
2247 * allocate a tgt i_lnode and add it to the lnode hash
2249 i_lnode = i_lnode_alloc(ldi_usage->tgt_modid);
2250 i_lnode->di_node = DI_NODE(di_mem_addr(st, toff));
2251 i_lnode->devt = ldi_usage->tgt_devt;
2253 res = mod_hash_find(st->lnode_hash, i_lnode, &nodep);
2254 if (res == MH_ERR_NOTFOUND) {
2256 * new i_lnode
2257 * add it to the hash and increment the lnode count
2259 res = mod_hash_insert(st->lnode_hash, i_lnode, i_lnode);
2260 ASSERT(res == 0);
2261 st->lnode_count++;
2262 tgt_lnode = i_lnode;
2263 } else {
2264 /* this i_lnode already exists in the lnode_hash */
2265 i_lnode_free(i_lnode);
2266 tgt_lnode = (i_lnode_t *)nodep;
2270 * allocate a i_link
2272 i_link = i_link_alloc(ldi_usage->tgt_spec_type);
2273 i_link->src_lnode = src_lnode;
2274 i_link->tgt_lnode = tgt_lnode;
2277 * add this link onto the src i_lnodes outbound i_link list
2279 i_link_next = &(src_lnode->link_out);
2280 while (*i_link_next != NULL) {
2281 if ((i_lnode_cmp(tgt_lnode, (*i_link_next)->tgt_lnode) == 0) &&
2282 (i_link->spec_type == (*i_link_next)->spec_type)) {
2283 /* this link already exists */
2284 kmem_free(i_link, sizeof (i_link_t));
2285 return (LDI_USAGE_CONTINUE);
2287 i_link_next = &((*i_link_next)->src_link_next);
2289 *i_link_next = i_link;
2292 * add this link onto the tgt i_lnodes inbound i_link list
2294 i_link_next = &(tgt_lnode->link_in);
2295 while (*i_link_next != NULL) {
2296 ASSERT(i_lnode_cmp(src_lnode, (*i_link_next)->src_lnode) != 0);
2297 i_link_next = &((*i_link_next)->tgt_link_next);
2299 *i_link_next = i_link;
2302 * add this i_link to the link hash
2304 res = mod_hash_insert(st->link_hash, i_link, i_link);
2305 ASSERT(res == 0);
2306 st->link_count++;
2308 return (LDI_USAGE_CONTINUE);
2311 struct i_layer_data {
2312 struct di_state *st;
2313 int lnode_count;
2314 int link_count;
2315 di_off_t lnode_off;
2316 di_off_t link_off;
2319 /*ARGSUSED*/
2320 static uint_t
2321 i_link_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg)
2323 i_link_t *i_link = (i_link_t *)key;
2324 struct i_layer_data *data = arg;
2325 struct di_link *me;
2326 struct di_lnode *melnode;
2327 struct di_node *medinode;
2329 ASSERT(i_link->self == 0);
2331 i_link->self = data->link_off +
2332 (data->link_count * sizeof (struct di_link));
2333 data->link_count++;
2335 ASSERT(data->link_off > 0 && data->link_count > 0);
2336 ASSERT(data->lnode_count == data->st->lnode_count); /* lnodes done */
2337 ASSERT(data->link_count <= data->st->link_count);
2339 /* fill in fields for the di_link snapshot */
2340 me = DI_LINK(di_mem_addr(data->st, i_link->self));
2341 me->self = i_link->self;
2342 me->spec_type = i_link->spec_type;
2345 * The src_lnode and tgt_lnode i_lnode_t for this i_link_t
2346 * are created during the LDI table walk. Since we are
2347 * walking the link hash, the lnode hash has already been
2348 * walked and the lnodes have been snapshotted. Save lnode
2349 * offsets.
2351 me->src_lnode = i_link->src_lnode->self;
2352 me->tgt_lnode = i_link->tgt_lnode->self;
2355 * Save this link's offset in the src_lnode snapshot's link_out
2356 * field
2358 melnode = DI_LNODE(di_mem_addr(data->st, me->src_lnode));
2359 me->src_link_next = melnode->link_out;
2360 melnode->link_out = me->self;
2363 * Put this link on the tgt_lnode's link_in field
2365 melnode = DI_LNODE(di_mem_addr(data->st, me->tgt_lnode));
2366 me->tgt_link_next = melnode->link_in;
2367 melnode->link_in = me->self;
2370 * An i_lnode_t is only created if the corresponding dip exists
2371 * in the snapshot. A pointer to the di_node is saved in the
2372 * i_lnode_t when it is allocated. For this link, get the di_node
2373 * for the source lnode. Then put the link on the di_node's list
2374 * of src links
2376 medinode = i_link->src_lnode->di_node;
2377 me->src_node_next = medinode->src_links;
2378 medinode->src_links = me->self;
2381 * Put this link on the tgt_links list of the target
2382 * dip.
2384 medinode = i_link->tgt_lnode->di_node;
2385 me->tgt_node_next = medinode->tgt_links;
2386 medinode->tgt_links = me->self;
2388 return (MH_WALK_CONTINUE);
2391 /*ARGSUSED*/
2392 static uint_t
2393 i_lnode_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg)
2395 i_lnode_t *i_lnode = (i_lnode_t *)key;
2396 struct i_layer_data *data = arg;
2397 struct di_lnode *me;
2398 struct di_node *medinode;
2400 ASSERT(i_lnode->self == 0);
2402 i_lnode->self = data->lnode_off +
2403 (data->lnode_count * sizeof (struct di_lnode));
2404 data->lnode_count++;
2406 ASSERT(data->lnode_off > 0 && data->lnode_count > 0);
2407 ASSERT(data->link_count == 0); /* links not done yet */
2408 ASSERT(data->lnode_count <= data->st->lnode_count);
2410 /* fill in fields for the di_lnode snapshot */
2411 me = DI_LNODE(di_mem_addr(data->st, i_lnode->self));
2412 me->self = i_lnode->self;
2414 if (i_lnode->devt == DDI_DEV_T_NONE) {
2415 me->dev_major = DDI_MAJOR_T_NONE;
2416 me->dev_minor = DDI_MAJOR_T_NONE;
2417 } else {
2418 me->dev_major = getmajor(i_lnode->devt);
2419 me->dev_minor = getminor(i_lnode->devt);
2423 * The dip corresponding to this lnode must exist in
2424 * the snapshot or we wouldn't have created the i_lnode_t
2425 * during LDI walk. Save the offset of the dip.
2427 ASSERT(i_lnode->di_node && i_lnode->di_node->self > 0);
2428 me->node = i_lnode->di_node->self;
2431 * There must be at least one link in or out of this lnode
2432 * or we wouldn't have created it. These fields will be set
2433 * during the link hash walk.
2435 ASSERT((i_lnode->link_in != NULL) || (i_lnode->link_out != NULL));
2438 * set the offset of the devinfo node associated with this
2439 * lnode. Also update the node_next next pointer. this pointer
2440 * is set if there are multiple lnodes associated with the same
2441 * devinfo node. (could occure when multiple minor nodes
2442 * are open for one device, etc.)
2444 medinode = i_lnode->di_node;
2445 me->node_next = medinode->lnodes;
2446 medinode->lnodes = me->self;
2448 return (MH_WALK_CONTINUE);
2451 static di_off_t
2452 di_getlink_data(di_off_t off, struct di_state *st)
2454 struct i_layer_data data = {0};
2455 size_t size;
2457 dcmn_err2((CE_CONT, "di_copylyr: off = %x\n", off));
2459 st->lnode_hash = mod_hash_create_extended("di_lnode_hash", 32,
2460 mod_hash_null_keydtor, (void (*)(mod_hash_val_t))i_lnode_check_free,
2461 i_lnode_hashfunc, NULL, i_lnode_cmp, KM_SLEEP);
2463 st->link_hash = mod_hash_create_ptrhash("di_link_hash", 32,
2464 (void (*)(mod_hash_val_t))i_link_check_free, sizeof (i_link_t));
2466 /* get driver layering information */
2467 (void) ldi_usage_walker(st, di_ldi_callback);
2469 /* check if there is any link data to include in the snapshot */
2470 if (st->lnode_count == 0) {
2471 ASSERT(st->link_count == 0);
2472 goto out;
2475 ASSERT(st->link_count != 0);
2477 /* get a pointer to snapshot memory for all the di_lnodes */
2478 size = sizeof (struct di_lnode) * st->lnode_count;
2479 data.lnode_off = off = di_checkmem(st, off, size);
2480 off += size;
2482 /* get a pointer to snapshot memory for all the di_links */
2483 size = sizeof (struct di_link) * st->link_count;
2484 data.link_off = off = di_checkmem(st, off, size);
2485 off += size;
2487 data.lnode_count = data.link_count = 0;
2488 data.st = st;
2491 * We have lnodes and links that will go into the
2492 * snapshot, so let's walk the respective hashes
2493 * and snapshot them. The various linkages are
2494 * also set up during the walk.
2496 mod_hash_walk(st->lnode_hash, i_lnode_walker, (void *)&data);
2497 ASSERT(data.lnode_count == st->lnode_count);
2499 mod_hash_walk(st->link_hash, i_link_walker, (void *)&data);
2500 ASSERT(data.link_count == st->link_count);
2502 out:
2503 /* free up the i_lnodes and i_links used to create the snapshot */
2504 mod_hash_destroy_hash(st->lnode_hash);
2505 mod_hash_destroy_hash(st->link_hash);
2506 st->lnode_count = 0;
2507 st->link_count = 0;
2509 return (off);
2514 * Copy all minor data nodes attached to a devinfo node into the snapshot.
2515 * It is called from di_copynode with active ndi_devi_enter to protect
2516 * the list of minor nodes.
2518 static di_off_t
2519 di_getmdata(struct ddi_minor_data *mnode, di_off_t *off_p, di_off_t node,
2520 struct di_state *st)
2522 di_off_t off;
2523 struct di_minor *me;
2524 size_t size;
2526 dcmn_err2((CE_CONT, "di_getmdata:\n"));
2529 * check memory first
2531 off = di_checkmem(st, *off_p, sizeof (struct di_minor));
2532 *off_p = off;
2534 do {
2535 me = DI_MINOR(di_mem_addr(st, off));
2536 me->self = off;
2537 me->type = mnode->type;
2538 me->node = node;
2539 me->user_private_data = NULL;
2541 off += sizeof (struct di_minor);
2544 * Split dev_t to major/minor, so it works for
2545 * both ILP32 and LP64 model
2547 me->dev_major = getmajor(mnode->ddm_dev);
2548 me->dev_minor = getminor(mnode->ddm_dev);
2549 me->spec_type = mnode->ddm_spec_type;
2551 if (mnode->ddm_name) {
2552 size = strlen(mnode->ddm_name) + 1;
2553 me->name = off = di_checkmem(st, off, size);
2554 (void) strcpy(di_mem_addr(st, off), mnode->ddm_name);
2555 off += size;
2558 if (mnode->ddm_node_type) {
2559 size = strlen(mnode->ddm_node_type) + 1;
2560 me->node_type = off = di_checkmem(st, off, size);
2561 (void) strcpy(di_mem_addr(st, off),
2562 mnode->ddm_node_type);
2563 off += size;
2566 off = di_checkmem(st, off, sizeof (struct di_minor));
2567 me->next = off;
2568 mnode = mnode->next;
2569 } while (mnode);
2571 me->next = 0;
2573 return (off);
2577 * di_register_dip(), di_find_dip(): The dip must be protected
2578 * from deallocation when using these routines - this can either
2579 * be a reference count, a busy hold or a per-driver lock.
2582 static void
2583 di_register_dip(struct di_state *st, dev_info_t *dip, di_off_t off)
2585 struct dev_info *node = DEVI(dip);
2586 struct di_key *key = kmem_zalloc(sizeof (*key), KM_SLEEP);
2587 struct di_dkey *dk;
2589 ASSERT(dip);
2590 ASSERT(off > 0);
2592 key->k_type = DI_DKEY;
2593 dk = &(key->k_u.dkey);
2595 dk->dk_dip = dip;
2596 dk->dk_major = node->devi_major;
2597 dk->dk_inst = node->devi_instance;
2598 dk->dk_nodeid = node->devi_nodeid;
2600 if (mod_hash_insert(st->reg_dip_hash, (mod_hash_key_t)key,
2601 (mod_hash_val_t)(uintptr_t)off) != 0) {
2602 panic(
2603 "duplicate devinfo (%p) registered during device "
2604 "tree walk", (void *)dip);
2609 static int
2610 di_dip_find(struct di_state *st, dev_info_t *dip, di_off_t *off_p)
2613 * uintptr_t must be used because it matches the size of void *;
2614 * mod_hash expects clients to place results into pointer-size
2615 * containers; since di_off_t is always a 32-bit offset, alignment
2616 * would otherwise be broken on 64-bit kernels.
2618 uintptr_t offset;
2619 struct di_key key = {0};
2620 struct di_dkey *dk;
2622 ASSERT(st->reg_dip_hash);
2623 ASSERT(dip);
2624 ASSERT(off_p);
2627 key.k_type = DI_DKEY;
2628 dk = &(key.k_u.dkey);
2630 dk->dk_dip = dip;
2631 dk->dk_major = DEVI(dip)->devi_major;
2632 dk->dk_inst = DEVI(dip)->devi_instance;
2633 dk->dk_nodeid = DEVI(dip)->devi_nodeid;
2635 if (mod_hash_find(st->reg_dip_hash, (mod_hash_key_t)&key,
2636 (mod_hash_val_t *)&offset) == 0) {
2637 *off_p = (di_off_t)offset;
2638 return (0);
2639 } else {
2640 return (-1);
2645 * di_register_pip(), di_find_pip(): The pip must be protected from deallocation
2646 * when using these routines. The caller must do this by protecting the
2647 * client(or phci)<->pip linkage while traversing the list and then holding the
2648 * pip when it is found in the list.
2651 static void
2652 di_register_pip(struct di_state *st, mdi_pathinfo_t *pip, di_off_t off)
2654 struct di_key *key = kmem_zalloc(sizeof (*key), KM_SLEEP);
2655 char *path_addr;
2656 struct di_pkey *pk;
2658 ASSERT(pip);
2659 ASSERT(off > 0);
2661 key->k_type = DI_PKEY;
2662 pk = &(key->k_u.pkey);
2664 pk->pk_pip = pip;
2665 path_addr = mdi_pi_get_addr(pip);
2666 if (path_addr)
2667 pk->pk_path_addr = i_ddi_strdup(path_addr, KM_SLEEP);
2668 pk->pk_client = mdi_pi_get_client(pip);
2669 pk->pk_phci = mdi_pi_get_phci(pip);
2671 if (mod_hash_insert(st->reg_pip_hash, (mod_hash_key_t)key,
2672 (mod_hash_val_t)(uintptr_t)off) != 0) {
2673 panic(
2674 "duplicate pathinfo (%p) registered during device "
2675 "tree walk", (void *)pip);
2680 * As with di_register_pip, the caller must hold or lock the pip
2682 static int
2683 di_pip_find(struct di_state *st, mdi_pathinfo_t *pip, di_off_t *off_p)
2686 * uintptr_t must be used because it matches the size of void *;
2687 * mod_hash expects clients to place results into pointer-size
2688 * containers; since di_off_t is always a 32-bit offset, alignment
2689 * would otherwise be broken on 64-bit kernels.
2691 uintptr_t offset;
2692 struct di_key key = {0};
2693 struct di_pkey *pk;
2695 ASSERT(st->reg_pip_hash);
2696 ASSERT(off_p);
2698 if (pip == NULL) {
2699 *off_p = 0;
2700 return (0);
2703 key.k_type = DI_PKEY;
2704 pk = &(key.k_u.pkey);
2706 pk->pk_pip = pip;
2707 pk->pk_path_addr = mdi_pi_get_addr(pip);
2708 pk->pk_client = mdi_pi_get_client(pip);
2709 pk->pk_phci = mdi_pi_get_phci(pip);
2711 if (mod_hash_find(st->reg_pip_hash, (mod_hash_key_t)&key,
2712 (mod_hash_val_t *)&offset) == 0) {
2713 *off_p = (di_off_t)offset;
2714 return (0);
2715 } else {
2716 return (-1);
2720 static di_path_state_t
2721 path_state_convert(mdi_pathinfo_state_t st)
2723 switch (st) {
2724 case MDI_PATHINFO_STATE_ONLINE:
2725 return (DI_PATH_STATE_ONLINE);
2726 case MDI_PATHINFO_STATE_STANDBY:
2727 return (DI_PATH_STATE_STANDBY);
2728 case MDI_PATHINFO_STATE_OFFLINE:
2729 return (DI_PATH_STATE_OFFLINE);
2730 case MDI_PATHINFO_STATE_FAULT:
2731 return (DI_PATH_STATE_FAULT);
2732 default:
2733 return (DI_PATH_STATE_UNKNOWN);
2737 static uint_t
2738 path_flags_convert(uint_t pi_path_flags)
2740 uint_t di_path_flags = 0;
2742 /* MDI_PATHINFO_FLAGS_HIDDEN nodes not in snapshot */
2744 if (pi_path_flags & MDI_PATHINFO_FLAGS_DEVICE_REMOVED)
2745 di_path_flags |= DI_PATH_FLAGS_DEVICE_REMOVED;
2747 return (di_path_flags);
2751 static di_off_t
2752 di_path_getprop(mdi_pathinfo_t *pip, di_off_t *off_p,
2753 struct di_state *st)
2755 nvpair_t *prop = NULL;
2756 struct di_path_prop *me;
2757 int off;
2758 size_t size;
2759 char *str;
2760 uchar_t *buf;
2761 uint_t nelems;
2763 off = *off_p;
2764 if (mdi_pi_get_next_prop(pip, NULL) == NULL) {
2765 *off_p = 0;
2766 return (off);
2769 off = di_checkmem(st, off, sizeof (struct di_path_prop));
2770 *off_p = off;
2772 while (prop = mdi_pi_get_next_prop(pip, prop)) {
2773 me = DI_PATHPROP(di_mem_addr(st, off));
2774 me->self = off;
2775 off += sizeof (struct di_path_prop);
2778 * property name
2780 size = strlen(nvpair_name(prop)) + 1;
2781 me->prop_name = off = di_checkmem(st, off, size);
2782 (void) strcpy(di_mem_addr(st, off), nvpair_name(prop));
2783 off += size;
2785 switch (nvpair_type(prop)) {
2786 case DATA_TYPE_BYTE:
2787 case DATA_TYPE_INT16:
2788 case DATA_TYPE_UINT16:
2789 case DATA_TYPE_INT32:
2790 case DATA_TYPE_UINT32:
2791 me->prop_type = DDI_PROP_TYPE_INT;
2792 size = sizeof (int32_t);
2793 off = di_checkmem(st, off, size);
2794 (void) nvpair_value_int32(prop,
2795 (int32_t *)di_mem_addr(st, off));
2796 break;
2798 case DATA_TYPE_INT64:
2799 case DATA_TYPE_UINT64:
2800 me->prop_type = DDI_PROP_TYPE_INT64;
2801 size = sizeof (int64_t);
2802 off = di_checkmem(st, off, size);
2803 (void) nvpair_value_int64(prop,
2804 (int64_t *)di_mem_addr(st, off));
2805 break;
2807 case DATA_TYPE_STRING:
2808 me->prop_type = DDI_PROP_TYPE_STRING;
2809 (void) nvpair_value_string(prop, &str);
2810 size = strlen(str) + 1;
2811 off = di_checkmem(st, off, size);
2812 (void) strcpy(di_mem_addr(st, off), str);
2813 break;
2815 case DATA_TYPE_BYTE_ARRAY:
2816 case DATA_TYPE_INT16_ARRAY:
2817 case DATA_TYPE_UINT16_ARRAY:
2818 case DATA_TYPE_INT32_ARRAY:
2819 case DATA_TYPE_UINT32_ARRAY:
2820 case DATA_TYPE_INT64_ARRAY:
2821 case DATA_TYPE_UINT64_ARRAY:
2822 me->prop_type = DDI_PROP_TYPE_BYTE;
2823 (void) nvpair_value_byte_array(prop, &buf, &nelems);
2824 size = nelems;
2825 if (nelems != 0) {
2826 off = di_checkmem(st, off, size);
2827 bcopy(buf, di_mem_addr(st, off), size);
2829 break;
2831 default: /* Unknown or unhandled type; skip it */
2832 size = 0;
2833 break;
2836 if (size > 0) {
2837 me->prop_data = off;
2840 me->prop_len = (int)size;
2841 off += size;
2843 off = di_checkmem(st, off, sizeof (struct di_path_prop));
2844 me->prop_next = off;
2847 me->prop_next = 0;
2848 return (off);
2852 static void
2853 di_path_one_endpoint(struct di_path *me, di_off_t noff, di_off_t **off_pp,
2854 int get_client)
2856 if (get_client) {
2857 ASSERT(me->path_client == 0);
2858 me->path_client = noff;
2859 ASSERT(me->path_c_link == 0);
2860 *off_pp = &me->path_c_link;
2861 me->path_snap_state &=
2862 ~(DI_PATH_SNAP_NOCLIENT | DI_PATH_SNAP_NOCLINK);
2863 } else {
2864 ASSERT(me->path_phci == 0);
2865 me->path_phci = noff;
2866 ASSERT(me->path_p_link == 0);
2867 *off_pp = &me->path_p_link;
2868 me->path_snap_state &=
2869 ~(DI_PATH_SNAP_NOPHCI | DI_PATH_SNAP_NOPLINK);
2874 * off_p: pointer to the linkage field. This links pips along the client|phci
2875 * linkage list.
2876 * noff : Offset for the endpoint dip snapshot.
2878 static di_off_t
2879 di_getpath_data(dev_info_t *dip, di_off_t *off_p, di_off_t noff,
2880 struct di_state *st, int get_client)
2882 di_off_t off;
2883 mdi_pathinfo_t *pip;
2884 struct di_path *me;
2885 mdi_pathinfo_t *(*next_pip)(dev_info_t *, mdi_pathinfo_t *);
2886 size_t size;
2888 dcmn_err2((CE_WARN, "di_getpath_data: client = %d", get_client));
2891 * The naming of the following mdi_xyz() is unfortunately
2892 * non-intuitive. mdi_get_next_phci_path() follows the
2893 * client_link i.e. the list of pip's belonging to the
2894 * given client dip.
2896 if (get_client)
2897 next_pip = &mdi_get_next_phci_path;
2898 else
2899 next_pip = &mdi_get_next_client_path;
2901 off = *off_p;
2903 pip = NULL;
2904 while (pip = (*next_pip)(dip, pip)) {
2905 di_off_t stored_offset;
2907 dcmn_err((CE_WARN, "marshalling pip = %p", (void *)pip));
2909 mdi_pi_lock(pip);
2911 /* We don't represent hidden paths in the snapshot */
2912 if (mdi_pi_ishidden(pip)) {
2913 dcmn_err((CE_WARN, "hidden, skip"));
2914 mdi_pi_unlock(pip);
2915 continue;
2918 if (di_pip_find(st, pip, &stored_offset) != -1) {
2920 * We've already seen this pathinfo node so we need to
2921 * take care not to snap it again; However, one endpoint
2922 * and linkage will be set here. The other endpoint
2923 * and linkage has already been set when the pip was
2924 * first snapshotted i.e. when the other endpoint dip
2925 * was snapshotted.
2927 me = DI_PATH(di_mem_addr(st, stored_offset));
2928 *off_p = stored_offset;
2930 di_path_one_endpoint(me, noff, &off_p, get_client);
2933 * The other endpoint and linkage were set when this
2934 * pip was snapshotted. So we are done with both
2935 * endpoints and linkages.
2937 ASSERT(!(me->path_snap_state &
2938 (DI_PATH_SNAP_NOCLIENT|DI_PATH_SNAP_NOPHCI)));
2939 ASSERT(!(me->path_snap_state &
2940 (DI_PATH_SNAP_NOCLINK|DI_PATH_SNAP_NOPLINK)));
2942 mdi_pi_unlock(pip);
2943 continue;
2947 * Now that we need to snapshot this pip, check memory
2949 size = sizeof (struct di_path);
2950 *off_p = off = di_checkmem(st, off, size);
2951 me = DI_PATH(di_mem_addr(st, off));
2952 me->self = off;
2953 off += size;
2955 me->path_snap_state =
2956 DI_PATH_SNAP_NOCLINK | DI_PATH_SNAP_NOPLINK;
2957 me->path_snap_state |=
2958 DI_PATH_SNAP_NOCLIENT | DI_PATH_SNAP_NOPHCI;
2961 * Zero out fields as di_checkmem() doesn't guarantee
2962 * zero-filled memory
2964 me->path_client = me->path_phci = 0;
2965 me->path_c_link = me->path_p_link = 0;
2967 di_path_one_endpoint(me, noff, &off_p, get_client);
2970 * Note the existence of this pathinfo
2972 di_register_pip(st, pip, me->self);
2974 me->path_state = path_state_convert(mdi_pi_get_state(pip));
2975 me->path_flags = path_flags_convert(mdi_pi_get_flags(pip));
2977 me->path_instance = mdi_pi_get_path_instance(pip);
2980 * Get intermediate addressing info.
2982 size = strlen(mdi_pi_get_addr(pip)) + 1;
2983 me->path_addr = off = di_checkmem(st, off, size);
2984 (void) strcpy(di_mem_addr(st, off), mdi_pi_get_addr(pip));
2985 off += size;
2988 * Get path properties if props are to be included in the
2989 * snapshot
2991 if (DINFOPROP & st->command) {
2992 me->path_prop = off;
2993 off = di_path_getprop(pip, &me->path_prop, st);
2994 } else {
2995 me->path_prop = 0;
2998 mdi_pi_unlock(pip);
3001 *off_p = 0;
3002 return (off);
3006 * Return driver prop_op entry point for the specified devinfo node.
3008 * To return a non-NULL value:
3009 * - driver must be attached and held:
3010 * If driver is not attached we ignore the driver property list.
3011 * No one should rely on such properties.
3012 * - driver "cb_prop_op != ddi_prop_op":
3013 * If "cb_prop_op == ddi_prop_op", framework does not need to call driver.
3014 * XXX or parent's bus_prop_op != ddi_bus_prop_op
3016 static int
3017 (*di_getprop_prop_op(struct dev_info *dip))
3018 (dev_t, dev_info_t *, ddi_prop_op_t, int, char *, caddr_t, int *)
3020 struct dev_ops *ops;
3022 /* If driver is not attached we ignore the driver property list. */
3023 if ((dip == NULL) || !i_ddi_devi_attached((dev_info_t *)dip))
3024 return (NULL);
3027 * Some nexus drivers incorrectly set cb_prop_op to nodev, nulldev,
3028 * or even NULL.
3030 ops = dip->devi_ops;
3031 if (ops && ops->devo_cb_ops &&
3032 (ops->devo_cb_ops->cb_prop_op != ddi_prop_op) &&
3033 (ops->devo_cb_ops->cb_prop_op != nodev) &&
3034 (ops->devo_cb_ops->cb_prop_op != nulldev) &&
3035 (ops->devo_cb_ops->cb_prop_op != NULL))
3036 return (ops->devo_cb_ops->cb_prop_op);
3037 return (NULL);
3040 static di_off_t
3041 di_getprop_add(int list, int dyn, struct di_state *st, struct dev_info *dip,
3042 int (*prop_op)(),
3043 char *name, dev_t devt, int aflags, int alen, caddr_t aval,
3044 di_off_t off, di_off_t **off_pp)
3046 int need_free = 0;
3047 dev_t pdevt;
3048 int pflags;
3049 int rv;
3050 caddr_t val;
3051 int len;
3052 size_t size;
3053 struct di_prop *pp;
3055 /* If we have prop_op function, ask driver for latest value */
3056 if (prop_op) {
3057 ASSERT(dip);
3059 /* Must search DDI_DEV_T_NONE with DDI_DEV_T_ANY */
3060 pdevt = (devt == DDI_DEV_T_NONE) ? DDI_DEV_T_ANY : devt;
3063 * We have type information in flags, but are invoking an
3064 * old non-typed prop_op(9E) interface. Since not all types are
3065 * part of DDI_PROP_TYPE_ANY (example is DDI_PROP_TYPE_INT64),
3066 * we set DDI_PROP_CONSUMER_TYPED - causing the framework to
3067 * expand type bits beyond DDI_PROP_TYPE_ANY. This allows us
3068 * to use the legacy prop_op(9E) interface to obtain updates
3069 * non-DDI_PROP_TYPE_ANY dynamic properties.
3071 pflags = aflags & ~DDI_PROP_TYPE_MASK;
3072 pflags |= DDI_PROP_DONTPASS | DDI_PROP_NOTPROM |
3073 DDI_PROP_CONSUMER_TYPED;
3076 * Hold and exit across prop_op(9E) to avoid lock order
3077 * issues between
3078 * [ndi_devi_enter() ..prop_op(9E).. driver-lock]
3079 * .vs.
3080 * [..ioctl(9E).. driver-lock ..ddi_remove_minor_node(9F)..
3081 * ndi_devi_enter()]
3082 * ordering.
3084 ndi_hold_devi((dev_info_t *)dip);
3085 ndi_devi_exit((dev_info_t *)dip, dip->devi_circular);
3086 rv = (*prop_op)(pdevt, (dev_info_t *)dip,
3087 PROP_LEN_AND_VAL_ALLOC, pflags, name, &val, &len);
3088 ndi_devi_enter((dev_info_t *)dip, &dip->devi_circular);
3089 ndi_rele_devi((dev_info_t *)dip);
3091 if (rv == DDI_PROP_SUCCESS) {
3092 need_free = 1; /* dynamic prop obtained */
3093 } else if (dyn) {
3095 * A dynamic property must succeed prop_op(9E) to show
3096 * up in the snapshot - that is the only source of its
3097 * value.
3099 return (off); /* dynamic prop not supported */
3100 } else {
3102 * In case calling the driver caused an update off
3103 * prop_op(9E) of a non-dynamic property (code leading
3104 * to ddi_prop_change), we defer picking up val and
3105 * len informatiojn until after prop_op(9E) to ensure
3106 * that we snapshot the latest value.
3108 val = aval;
3109 len = alen;
3112 } else {
3113 val = aval;
3114 len = alen;
3117 dcmn_err((CE_CONT, "di_getprop_add: list %d %s len %d val %p\n",
3118 list, name ? name : "NULL", len, (void *)val));
3120 size = sizeof (struct di_prop);
3121 **off_pp = off = di_checkmem(st, off, size);
3122 pp = DI_PROP(di_mem_addr(st, off));
3123 pp->self = off;
3124 off += size;
3126 pp->dev_major = getmajor(devt);
3127 pp->dev_minor = getminor(devt);
3128 pp->prop_flags = aflags;
3129 pp->prop_list = list;
3131 /* property name */
3132 if (name) {
3133 size = strlen(name) + 1;
3134 pp->prop_name = off = di_checkmem(st, off, size);
3135 (void) strcpy(di_mem_addr(st, off), name);
3136 off += size;
3137 } else {
3138 pp->prop_name = -1;
3141 pp->prop_len = len;
3142 if (val == NULL) {
3143 pp->prop_data = -1;
3144 } else if (len != 0) {
3145 size = len;
3146 pp->prop_data = off = di_checkmem(st, off, size);
3147 bcopy(val, di_mem_addr(st, off), size);
3148 off += size;
3151 pp->next = 0; /* assume tail for now */
3152 *off_pp = &pp->next; /* return pointer to our next */
3154 if (need_free) /* free PROP_LEN_AND_VAL_ALLOC alloc */
3155 kmem_free(val, len);
3156 return (off);
3161 * Copy a list of properties attached to a devinfo node. Called from
3162 * di_copynode with active ndi_devi_enter. The major number is passed in case
3163 * we need to call driver's prop_op entry. The value of list indicates
3164 * which list we are copying. Possible values are:
3165 * DI_PROP_DRV_LIST, DI_PROP_SYS_LIST, DI_PROP_GLB_LIST, DI_PROP_HW_LIST
3167 static di_off_t
3168 di_getprop(int list, struct ddi_prop **pprop, di_off_t *off_p,
3169 struct di_state *st, struct dev_info *dip)
3171 struct ddi_prop *prop;
3172 int (*prop_op)();
3173 int off;
3174 struct ddi_minor_data *mn;
3175 i_ddi_prop_dyn_t *dp;
3176 struct plist {
3177 struct plist *pl_next;
3178 char *pl_name;
3179 int pl_flags;
3180 dev_t pl_dev;
3181 int pl_len;
3182 caddr_t pl_val;
3183 } *pl, *pl0, **plp;
3185 ASSERT(st != NULL);
3187 off = *off_p;
3188 *off_p = 0;
3189 dcmn_err((CE_CONT, "di_getprop: copy property list %d at addr %p\n",
3190 list, (void *)*pprop));
3192 /* get pointer to driver's prop_op(9E) implementation if DRV_LIST */
3193 prop_op = (list == DI_PROP_DRV_LIST) ? di_getprop_prop_op(dip) : NULL;
3196 * Form private list of properties, holding devi_lock for properties
3197 * that hang off the dip.
3199 if (dip)
3200 mutex_enter(&(dip->devi_lock));
3201 for (pl0 = NULL, plp = &pl0, prop = *pprop;
3202 prop; plp = &pl->pl_next, prop = prop->prop_next) {
3203 pl = kmem_alloc(sizeof (*pl), KM_SLEEP);
3204 *plp = pl;
3205 pl->pl_next = NULL;
3206 if (prop->prop_name)
3207 pl->pl_name = i_ddi_strdup(prop->prop_name, KM_SLEEP);
3208 else
3209 pl->pl_name = NULL;
3210 pl->pl_flags = prop->prop_flags;
3211 pl->pl_dev = prop->prop_dev;
3212 if (prop->prop_len) {
3213 pl->pl_len = prop->prop_len;
3214 pl->pl_val = kmem_alloc(pl->pl_len, KM_SLEEP);
3215 bcopy(prop->prop_val, pl->pl_val, pl->pl_len);
3216 } else {
3217 pl->pl_len = 0;
3218 pl->pl_val = NULL;
3221 if (dip)
3222 mutex_exit(&(dip->devi_lock));
3225 * Now that we have dropped devi_lock, perform a second-pass to
3226 * add properties to the snapshot. We do this as a second pass
3227 * because we may need to call prop_op(9E) and we can't hold
3228 * devi_lock across that call.
3230 for (pl = pl0; pl; pl = pl0) {
3231 pl0 = pl->pl_next;
3232 off = di_getprop_add(list, 0, st, dip, prop_op, pl->pl_name,
3233 pl->pl_dev, pl->pl_flags, pl->pl_len, pl->pl_val,
3234 off, &off_p);
3235 if (pl->pl_val)
3236 kmem_free(pl->pl_val, pl->pl_len);
3237 if (pl->pl_name)
3238 kmem_free(pl->pl_name, strlen(pl->pl_name) + 1);
3239 kmem_free(pl, sizeof (*pl));
3243 * If there is no prop_op or dynamic property support has been
3244 * disabled, we are done.
3246 if ((prop_op == NULL) || (di_prop_dyn == 0)) {
3247 *off_p = 0;
3248 return (off);
3251 /* Add dynamic driver properties to snapshot */
3252 for (dp = i_ddi_prop_dyn_driver_get((dev_info_t *)dip);
3253 dp && dp->dp_name; dp++) {
3254 if (dp->dp_spec_type) {
3255 /* if spec_type, property of matching minor */
3256 ASSERT(DEVI_BUSY_OWNED(dip));
3257 for (mn = dip->devi_minor; mn; mn = mn->next) {
3258 if (mn->ddm_spec_type != dp->dp_spec_type)
3259 continue;
3260 off = di_getprop_add(list, 1, st, dip, prop_op,
3261 dp->dp_name, mn->ddm_dev, dp->dp_type,
3262 0, NULL, off, &off_p);
3264 } else {
3265 /* property of devinfo node */
3266 off = di_getprop_add(list, 1, st, dip, prop_op,
3267 dp->dp_name, DDI_DEV_T_NONE, dp->dp_type,
3268 0, NULL, off, &off_p);
3272 /* Add dynamic parent properties to snapshot */
3273 for (dp = i_ddi_prop_dyn_parent_get((dev_info_t *)dip);
3274 dp && dp->dp_name; dp++) {
3275 if (dp->dp_spec_type) {
3276 /* if spec_type, property of matching minor */
3277 ASSERT(DEVI_BUSY_OWNED(dip));
3278 for (mn = dip->devi_minor; mn; mn = mn->next) {
3279 if (mn->ddm_spec_type != dp->dp_spec_type)
3280 continue;
3281 off = di_getprop_add(list, 1, st, dip, prop_op,
3282 dp->dp_name, mn->ddm_dev, dp->dp_type,
3283 0, NULL, off, &off_p);
3285 } else {
3286 /* property of devinfo node */
3287 off = di_getprop_add(list, 1, st, dip, prop_op,
3288 dp->dp_name, DDI_DEV_T_NONE, dp->dp_type,
3289 0, NULL, off, &off_p);
3293 *off_p = 0;
3294 return (off);
3298 * find private data format attached to a dip
3299 * parent = 1 to match driver name of parent dip (for parent private data)
3300 * 0 to match driver name of current dip (for driver private data)
3302 #define DI_MATCH_DRIVER 0
3303 #define DI_MATCH_PARENT 1
3305 struct di_priv_format *
3306 di_match_drv_name(struct dev_info *node, struct di_state *st, int match)
3308 int i, count, len;
3309 char *drv_name;
3310 major_t major;
3311 struct di_all *all;
3312 struct di_priv_format *form;
3314 dcmn_err2((CE_CONT, "di_match_drv_name: node = %s, match = %x\n",
3315 node->devi_node_name, match));
3317 if (match == DI_MATCH_PARENT) {
3318 node = DEVI(node->devi_parent);
3321 if (node == NULL) {
3322 return (NULL);
3325 major = node->devi_major;
3326 if (major == (major_t)(-1)) {
3327 return (NULL);
3331 * Match the driver name.
3333 drv_name = ddi_major_to_name(major);
3334 if ((drv_name == NULL) || *drv_name == '\0') {
3335 return (NULL);
3338 /* Now get the di_priv_format array */
3339 all = DI_ALL_PTR(st);
3340 if (match == DI_MATCH_PARENT) {
3341 count = all->n_ppdata;
3342 form = DI_PRIV_FORMAT(di_mem_addr(st, all->ppdata_format));
3343 } else {
3344 count = all->n_dpdata;
3345 form = DI_PRIV_FORMAT(di_mem_addr(st, all->dpdata_format));
3348 len = strlen(drv_name);
3349 for (i = 0; i < count; i++) {
3350 char *tmp;
3352 tmp = form[i].drv_name;
3353 while (tmp && (*tmp != '\0')) {
3354 if (strncmp(drv_name, tmp, len) == 0) {
3355 return (&form[i]);
3358 * Move to next driver name, skipping a white space
3360 if (tmp = strchr(tmp, ' ')) {
3361 tmp++;
3366 return (NULL);
3370 * The following functions copy data as specified by the format passed in.
3371 * To prevent invalid format from panicing the system, we call on_fault().
3372 * A return value of 0 indicates an error. Otherwise, the total offset
3373 * is returned.
3375 #define DI_MAX_PRIVDATA (PAGESIZE >> 1) /* max private data size */
3377 static di_off_t
3378 di_getprvdata(struct di_priv_format *pdp, struct dev_info *node,
3379 void *data, di_off_t *off_p, struct di_state *st)
3381 caddr_t pa;
3382 void *ptr;
3383 int i, size, repeat;
3384 di_off_t off, off0, *tmp;
3385 char *path;
3386 label_t ljb;
3388 dcmn_err2((CE_CONT, "di_getprvdata:\n"));
3391 * check memory availability. Private data size is
3392 * limited to DI_MAX_PRIVDATA.
3394 off = di_checkmem(st, *off_p, DI_MAX_PRIVDATA);
3395 *off_p = off;
3397 if ((pdp->bytes == 0) || pdp->bytes > DI_MAX_PRIVDATA) {
3398 goto failure;
3401 if (!on_fault(&ljb)) {
3402 /* copy the struct */
3403 bcopy(data, di_mem_addr(st, off), pdp->bytes);
3404 off0 = DI_ALIGN(pdp->bytes); /* XXX remove DI_ALIGN */
3406 /* dereferencing pointers */
3407 for (i = 0; i < MAX_PTR_IN_PRV; i++) {
3409 if (pdp->ptr[i].size == 0) {
3410 goto success; /* no more ptrs */
3414 * first, get the pointer content
3416 if ((pdp->ptr[i].offset < 0) ||
3417 (pdp->ptr[i].offset > pdp->bytes - sizeof (char *)))
3418 goto failure; /* wrong offset */
3420 pa = di_mem_addr(st, off + pdp->ptr[i].offset);
3422 /* save a tmp ptr to store off_t later */
3423 tmp = (di_off_t *)(intptr_t)pa;
3425 /* get pointer value, if NULL continue */
3426 ptr = *((void **) (intptr_t)pa);
3427 if (ptr == NULL) {
3428 continue;
3432 * next, find the repeat count (array dimension)
3434 repeat = pdp->ptr[i].len_offset;
3437 * Positive value indicates a fixed sized array.
3438 * 0 or negative value indicates variable sized array.
3440 * For variable sized array, the variable must be
3441 * an int member of the structure, with an offset
3442 * equal to the absolution value of struct member.
3444 if (repeat > pdp->bytes - sizeof (int)) {
3445 goto failure; /* wrong offset */
3448 if (repeat >= 0) {
3449 repeat = *((int *)
3450 (intptr_t)((caddr_t)data + repeat));
3451 } else {
3452 repeat = -repeat;
3456 * next, get the size of the object to be copied
3458 size = pdp->ptr[i].size * repeat;
3461 * Arbitrarily limit the total size of object to be
3462 * copied (1 byte to 1/4 page).
3464 if ((size <= 0) || (size > (DI_MAX_PRIVDATA - off0))) {
3465 goto failure; /* wrong size or too big */
3469 * Now copy the data
3471 *tmp = off0;
3472 bcopy(ptr, di_mem_addr(st, off + off0), size);
3473 off0 += DI_ALIGN(size); /* XXX remove DI_ALIGN */
3475 } else {
3476 goto failure;
3479 success:
3481 * success if reached here
3483 no_fault();
3484 return (off + off0);
3485 /*NOTREACHED*/
3487 failure:
3489 * fault occurred
3491 no_fault();
3492 path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
3493 cmn_err(CE_WARN, "devinfo: fault on private data for '%s' at %p",
3494 ddi_pathname((dev_info_t *)node, path), data);
3495 kmem_free(path, MAXPATHLEN);
3496 *off_p = -1; /* set private data to indicate error */
3498 return (off);
3502 * get parent private data; on error, returns original offset
3504 static di_off_t
3505 di_getppdata(struct dev_info *node, di_off_t *off_p, struct di_state *st)
3507 int off;
3508 struct di_priv_format *ppdp;
3510 dcmn_err2((CE_CONT, "di_getppdata:\n"));
3512 /* find the parent data format */
3513 if ((ppdp = di_match_drv_name(node, st, DI_MATCH_PARENT)) == NULL) {
3514 off = *off_p;
3515 *off_p = 0; /* set parent data to none */
3516 return (off);
3519 return (di_getprvdata(ppdp, node,
3520 ddi_get_parent_data((dev_info_t *)node), off_p, st));
3524 * get parent private data; returns original offset
3526 static di_off_t
3527 di_getdpdata(struct dev_info *node, di_off_t *off_p, struct di_state *st)
3529 int off;
3530 struct di_priv_format *dpdp;
3532 dcmn_err2((CE_CONT, "di_getdpdata:"));
3534 /* find the parent data format */
3535 if ((dpdp = di_match_drv_name(node, st, DI_MATCH_DRIVER)) == NULL) {
3536 off = *off_p;
3537 *off_p = 0; /* set driver data to none */
3538 return (off);
3541 return (di_getprvdata(dpdp, node,
3542 ddi_get_driver_private((dev_info_t *)node), off_p, st));
3546 * Copy hotplug data associated with a devinfo node into the snapshot.
3548 static di_off_t
3549 di_gethpdata(ddi_hp_cn_handle_t *hp_hdl, di_off_t *off_p,
3550 struct di_state *st)
3552 struct i_hp *hp;
3553 struct di_hp *me;
3554 size_t size;
3555 di_off_t off;
3557 dcmn_err2((CE_CONT, "di_gethpdata:\n"));
3560 * check memory first
3562 off = di_checkmem(st, *off_p, sizeof (struct di_hp));
3563 *off_p = off;
3565 do {
3566 me = DI_HP(di_mem_addr(st, off));
3567 me->self = off;
3568 me->hp_name = 0;
3569 me->hp_connection = (int)hp_hdl->cn_info.cn_num;
3570 me->hp_depends_on = (int)hp_hdl->cn_info.cn_num_dpd_on;
3571 (void) ddihp_cn_getstate(hp_hdl);
3572 me->hp_state = (int)hp_hdl->cn_info.cn_state;
3573 me->hp_type = (int)hp_hdl->cn_info.cn_type;
3574 me->hp_type_str = 0;
3575 me->hp_last_change = (uint32_t)hp_hdl->cn_info.cn_last_change;
3576 me->hp_child = 0;
3579 * Child links are resolved later by di_hotplug_children().
3580 * Store a reference to this di_hp_t in the list used later
3581 * by di_hotplug_children().
3583 hp = kmem_zalloc(sizeof (i_hp_t), KM_SLEEP);
3584 hp->hp_off = off;
3585 hp->hp_child = hp_hdl->cn_info.cn_child;
3586 list_insert_tail(&st->hp_list, hp);
3588 off += sizeof (struct di_hp);
3590 /* Add name of this di_hp_t to the snapshot */
3591 if (hp_hdl->cn_info.cn_name) {
3592 size = strlen(hp_hdl->cn_info.cn_name) + 1;
3593 me->hp_name = off = di_checkmem(st, off, size);
3594 (void) strcpy(di_mem_addr(st, off),
3595 hp_hdl->cn_info.cn_name);
3596 off += size;
3599 /* Add type description of this di_hp_t to the snapshot */
3600 if (hp_hdl->cn_info.cn_type_str) {
3601 size = strlen(hp_hdl->cn_info.cn_type_str) + 1;
3602 me->hp_type_str = off = di_checkmem(st, off, size);
3603 (void) strcpy(di_mem_addr(st, off),
3604 hp_hdl->cn_info.cn_type_str);
3605 off += size;
3609 * Set link to next in the chain of di_hp_t nodes,
3610 * or terminate the chain when processing the last node.
3612 if (hp_hdl->next != NULL) {
3613 off = di_checkmem(st, off, sizeof (struct di_hp));
3614 me->next = off;
3615 } else {
3616 me->next = 0;
3619 /* Update pointer to next in the chain */
3620 hp_hdl = hp_hdl->next;
3622 } while (hp_hdl);
3624 return (off);
3628 * The driver is stateful across DINFOCPYALL and DINFOUSRLD.
3629 * This function encapsulates the state machine:
3631 * -> IOC_IDLE -> IOC_SNAP -> IOC_DONE -> IOC_COPY ->
3632 * | SNAPSHOT USRLD |
3633 * --------------------------------------------------
3635 * Returns 0 on success and -1 on failure
3637 static int
3638 di_setstate(struct di_state *st, int new_state)
3640 int ret = 0;
3642 mutex_enter(&di_lock);
3643 switch (new_state) {
3644 case IOC_IDLE:
3645 case IOC_DONE:
3646 break;
3647 case IOC_SNAP:
3648 if (st->di_iocstate != IOC_IDLE)
3649 ret = -1;
3650 break;
3651 case IOC_COPY:
3652 if (st->di_iocstate != IOC_DONE)
3653 ret = -1;
3654 break;
3655 default:
3656 ret = -1;
3659 if (ret == 0)
3660 st->di_iocstate = new_state;
3661 else
3662 cmn_err(CE_NOTE, "incorrect state transition from %d to %d",
3663 st->di_iocstate, new_state);
3664 mutex_exit(&di_lock);
3665 return (ret);
3669 * We cannot assume the presence of the entire
3670 * snapshot in this routine. All we are guaranteed
3671 * is the di_all struct + 1 byte (for root_path)
3673 static int
3674 header_plus_one_ok(struct di_all *all)
3677 * Refuse to read old versions
3679 if (all->version != DI_SNAPSHOT_VERSION) {
3680 CACHE_DEBUG((DI_ERR, "bad version: 0x%x", all->version));
3681 return (0);
3684 if (all->cache_magic != DI_CACHE_MAGIC) {
3685 CACHE_DEBUG((DI_ERR, "bad magic #: 0x%x", all->cache_magic));
3686 return (0);
3689 if (all->snapshot_time == 0) {
3690 CACHE_DEBUG((DI_ERR, "bad timestamp: %ld", all->snapshot_time));
3691 return (0);
3694 if (all->top_devinfo == 0) {
3695 CACHE_DEBUG((DI_ERR, "NULL top devinfo"));
3696 return (0);
3699 if (all->map_size < sizeof (*all) + 1) {
3700 CACHE_DEBUG((DI_ERR, "bad map size: %u", all->map_size));
3701 return (0);
3704 if (all->root_path[0] != '/' || all->root_path[1] != '\0') {
3705 CACHE_DEBUG((DI_ERR, "bad rootpath: %c%c",
3706 all->root_path[0], all->root_path[1]));
3707 return (0);
3711 * We can't check checksum here as we just have the header
3714 return (1);
3717 static int
3718 chunk_write(struct vnode *vp, offset_t off, caddr_t buf, size_t len)
3720 rlim64_t rlimit;
3721 ssize_t resid;
3722 int error = 0;
3725 rlimit = RLIM64_INFINITY;
3727 while (len) {
3728 resid = 0;
3729 error = vn_rdwr(UIO_WRITE, vp, buf, len, off,
3730 UIO_SYSSPACE, FSYNC, rlimit, kcred, &resid);
3732 if (error || resid < 0) {
3733 error = error ? error : EIO;
3734 CACHE_DEBUG((DI_ERR, "write error: %d", error));
3735 break;
3739 * Check if we are making progress
3741 if (resid >= len) {
3742 error = ENOSPC;
3743 break;
3745 buf += len - resid;
3746 off += len - resid;
3747 len = resid;
3750 return (error);
3753 static void
3754 di_cache_write(struct di_cache *cache)
3756 struct di_all *all;
3757 struct vnode *vp;
3758 int oflags;
3759 size_t map_size;
3760 size_t chunk;
3761 offset_t off;
3762 int error;
3763 char *buf;
3765 ASSERT(DI_CACHE_LOCKED(*cache));
3766 ASSERT(!servicing_interrupt());
3768 if (cache->cache_size == 0) {
3769 ASSERT(cache->cache_data == NULL);
3770 CACHE_DEBUG((DI_ERR, "Empty cache. Skipping write"));
3771 return;
3774 ASSERT(cache->cache_size > 0);
3775 ASSERT(cache->cache_data);
3777 if (!modrootloaded || rootvp == NULL || vn_is_readonly(rootvp)) {
3778 CACHE_DEBUG((DI_ERR, "Can't write to rootFS. Skipping write"));
3779 return;
3782 all = (struct di_all *)cache->cache_data;
3784 if (!header_plus_one_ok(all)) {
3785 CACHE_DEBUG((DI_ERR, "Invalid header. Skipping write"));
3786 return;
3789 ASSERT(strcmp(all->root_path, "/") == 0);
3792 * The cache_size is the total allocated memory for the cache.
3793 * The map_size is the actual size of valid data in the cache.
3794 * map_size may be smaller than cache_size but cannot exceed
3795 * cache_size.
3797 if (all->map_size > cache->cache_size) {
3798 CACHE_DEBUG((DI_ERR, "map_size (0x%x) > cache_size (0x%x)."
3799 " Skipping write", all->map_size, cache->cache_size));
3800 return;
3804 * First unlink the temp file
3806 error = vn_remove(DI_CACHE_TEMP, UIO_SYSSPACE, RMFILE);
3807 if (error && error != ENOENT) {
3808 CACHE_DEBUG((DI_ERR, "%s: unlink failed: %d",
3809 DI_CACHE_TEMP, error));
3812 if (error == EROFS) {
3813 CACHE_DEBUG((DI_ERR, "RDONLY FS. Skipping write"));
3814 return;
3817 vp = NULL;
3818 oflags = (FCREAT|FWRITE);
3819 if (error = vn_open(DI_CACHE_TEMP, UIO_SYSSPACE, oflags,
3820 DI_CACHE_PERMS, &vp, CRCREAT, 0)) {
3821 CACHE_DEBUG((DI_ERR, "%s: create failed: %d",
3822 DI_CACHE_TEMP, error));
3823 return;
3826 ASSERT(vp);
3829 * Paranoid: Check if the file is on a read-only FS
3831 if (vn_is_readonly(vp)) {
3832 CACHE_DEBUG((DI_ERR, "cannot write: readonly FS"));
3833 goto fail;
3837 * Note that we only write map_size bytes to disk - this saves
3838 * space as the actual cache size may be larger than size of
3839 * valid data in the cache.
3840 * Another advantage is that it makes verification of size
3841 * easier when the file is read later.
3843 map_size = all->map_size;
3844 off = 0;
3845 buf = cache->cache_data;
3847 while (map_size) {
3848 ASSERT(map_size > 0);
3850 * Write in chunks so that VM system
3851 * is not overwhelmed
3853 if (map_size > di_chunk * PAGESIZE)
3854 chunk = di_chunk * PAGESIZE;
3855 else
3856 chunk = map_size;
3858 error = chunk_write(vp, off, buf, chunk);
3859 if (error) {
3860 CACHE_DEBUG((DI_ERR, "write failed: off=0x%x: %d",
3861 off, error));
3862 goto fail;
3865 off += chunk;
3866 buf += chunk;
3867 map_size -= chunk;
3869 /* If low on memory, give pageout a chance to run */
3870 if (freemem < desfree)
3871 delay(1);
3875 * Now sync the file and close it
3877 if (error = VOP_FSYNC(vp, FSYNC, kcred, NULL)) {
3878 CACHE_DEBUG((DI_ERR, "FSYNC failed: %d", error));
3881 if (error = VOP_CLOSE(vp, oflags, 1, (offset_t)0, kcred, NULL)) {
3882 CACHE_DEBUG((DI_ERR, "close() failed: %d", error));
3883 VN_RELE(vp);
3884 return;
3887 VN_RELE(vp);
3890 * Now do the rename
3892 if (error = vn_rename(DI_CACHE_TEMP, DI_CACHE_FILE, UIO_SYSSPACE)) {
3893 CACHE_DEBUG((DI_ERR, "rename failed: %d", error));
3894 return;
3897 CACHE_DEBUG((DI_INFO, "Cache write successful."));
3899 return;
3901 fail:
3902 (void) VOP_CLOSE(vp, oflags, 1, (offset_t)0, kcred, NULL);
3903 VN_RELE(vp);
3908 * Since we could be called early in boot,
3909 * use kobj_read_file()
3911 static void
3912 di_cache_read(struct di_cache *cache)
3914 struct _buf *file;
3915 struct di_all *all;
3916 int n;
3917 size_t map_size, sz, chunk;
3918 offset_t off;
3919 caddr_t buf;
3920 uint32_t saved_crc, crc;
3922 ASSERT(modrootloaded);
3923 ASSERT(DI_CACHE_LOCKED(*cache));
3924 ASSERT(cache->cache_data == NULL);
3925 ASSERT(cache->cache_size == 0);
3926 ASSERT(!servicing_interrupt());
3928 file = kobj_open_file(DI_CACHE_FILE);
3929 if (file == (struct _buf *)-1) {
3930 CACHE_DEBUG((DI_ERR, "%s: open failed: %d",
3931 DI_CACHE_FILE, ENOENT));
3932 return;
3936 * Read in the header+root_path first. The root_path must be "/"
3938 all = kmem_zalloc(sizeof (*all) + 1, KM_SLEEP);
3939 n = kobj_read_file(file, (caddr_t)all, sizeof (*all) + 1, 0);
3941 if ((n != sizeof (*all) + 1) || !header_plus_one_ok(all)) {
3942 kmem_free(all, sizeof (*all) + 1);
3943 kobj_close_file(file);
3944 CACHE_DEBUG((DI_ERR, "cache header: read error or invalid"));
3945 return;
3948 map_size = all->map_size;
3950 kmem_free(all, sizeof (*all) + 1);
3952 ASSERT(map_size >= sizeof (*all) + 1);
3954 buf = di_cache.cache_data = kmem_alloc(map_size, KM_SLEEP);
3955 sz = map_size;
3956 off = 0;
3957 while (sz) {
3958 /* Don't overload VM with large reads */
3959 chunk = (sz > di_chunk * PAGESIZE) ? di_chunk * PAGESIZE : sz;
3960 n = kobj_read_file(file, buf, chunk, off);
3961 if (n != chunk) {
3962 CACHE_DEBUG((DI_ERR, "%s: read error at offset: %lld",
3963 DI_CACHE_FILE, off));
3964 goto fail;
3966 off += chunk;
3967 buf += chunk;
3968 sz -= chunk;
3971 ASSERT(off == map_size);
3974 * Read past expected EOF to verify size.
3976 if (kobj_read_file(file, (caddr_t)&sz, 1, off) > 0) {
3977 CACHE_DEBUG((DI_ERR, "%s: file size changed", DI_CACHE_FILE));
3978 goto fail;
3981 all = (struct di_all *)di_cache.cache_data;
3982 if (!header_plus_one_ok(all)) {
3983 CACHE_DEBUG((DI_ERR, "%s: file header changed", DI_CACHE_FILE));
3984 goto fail;
3988 * Compute CRC with checksum field in the cache data set to 0
3990 saved_crc = all->cache_checksum;
3991 all->cache_checksum = 0;
3992 CRC32(crc, di_cache.cache_data, map_size, -1U, crc32_table);
3993 all->cache_checksum = saved_crc;
3995 if (crc != all->cache_checksum) {
3996 CACHE_DEBUG((DI_ERR,
3997 "%s: checksum error: expected=0x%x actual=0x%x",
3998 DI_CACHE_FILE, all->cache_checksum, crc));
3999 goto fail;
4002 if (all->map_size != map_size) {
4003 CACHE_DEBUG((DI_ERR, "%s: map size changed", DI_CACHE_FILE));
4004 goto fail;
4007 kobj_close_file(file);
4009 di_cache.cache_size = map_size;
4011 return;
4013 fail:
4014 kmem_free(di_cache.cache_data, map_size);
4015 kobj_close_file(file);
4016 di_cache.cache_data = NULL;
4017 di_cache.cache_size = 0;
4022 * Checks if arguments are valid for using the cache.
4024 static int
4025 cache_args_valid(struct di_state *st, int *error)
4027 ASSERT(error);
4028 ASSERT(st->mem_size > 0);
4029 ASSERT(st->memlist != NULL);
4031 if (!modrootloaded || !i_ddi_io_initialized()) {
4032 CACHE_DEBUG((DI_ERR,
4033 "cache lookup failure: I/O subsystem not inited"));
4034 *error = ENOTACTIVE;
4035 return (0);
4039 * No other flags allowed with DINFOCACHE
4041 if (st->command != (DINFOCACHE & DIIOC_MASK)) {
4042 CACHE_DEBUG((DI_ERR,
4043 "cache lookup failure: bad flags: 0x%x",
4044 st->command));
4045 *error = EINVAL;
4046 return (0);
4049 if (strcmp(DI_ALL_PTR(st)->root_path, "/") != 0) {
4050 CACHE_DEBUG((DI_ERR,
4051 "cache lookup failure: bad root: %s",
4052 DI_ALL_PTR(st)->root_path));
4053 *error = EINVAL;
4054 return (0);
4057 CACHE_DEBUG((DI_INFO, "cache lookup args ok: 0x%x", st->command));
4059 *error = 0;
4061 return (1);
4064 static int
4065 snapshot_is_cacheable(struct di_state *st)
4067 ASSERT(st->mem_size > 0);
4068 ASSERT(st->memlist != NULL);
4070 if ((st->command & DI_CACHE_SNAPSHOT_FLAGS) !=
4071 (DI_CACHE_SNAPSHOT_FLAGS & DIIOC_MASK)) {
4072 CACHE_DEBUG((DI_INFO,
4073 "not cacheable: incompatible flags: 0x%x",
4074 st->command));
4075 return (0);
4078 if (strcmp(DI_ALL_PTR(st)->root_path, "/") != 0) {
4079 CACHE_DEBUG((DI_INFO,
4080 "not cacheable: incompatible root path: %s",
4081 DI_ALL_PTR(st)->root_path));
4082 return (0);
4085 CACHE_DEBUG((DI_INFO, "cacheable snapshot request: 0x%x", st->command));
4087 return (1);
4090 static int
4091 di_cache_lookup(struct di_state *st)
4093 size_t rval;
4094 int cache_valid;
4096 ASSERT(cache_args_valid(st, &cache_valid));
4097 ASSERT(modrootloaded);
4099 DI_CACHE_LOCK(di_cache);
4102 * The following assignment determines the validity
4103 * of the cache as far as this snapshot is concerned.
4105 cache_valid = di_cache.cache_valid;
4107 if (cache_valid && di_cache.cache_data == NULL) {
4108 di_cache_read(&di_cache);
4109 /* check for read or file error */
4110 if (di_cache.cache_data == NULL)
4111 cache_valid = 0;
4114 if (cache_valid) {
4116 * Ok, the cache was valid as of this particular
4117 * snapshot. Copy the cached snapshot. This is safe
4118 * to do as the cache cannot be freed (we hold the
4119 * cache lock). Free the memory allocated in di_state
4120 * up until this point - we will simply copy everything
4121 * in the cache.
4124 ASSERT(di_cache.cache_data != NULL);
4125 ASSERT(di_cache.cache_size > 0);
4127 di_freemem(st);
4129 rval = 0;
4130 if (di_cache2mem(&di_cache, st) > 0) {
4132 * map_size is size of valid data in the
4133 * cached snapshot and may be less than
4134 * size of the cache.
4136 ASSERT(DI_ALL_PTR(st));
4137 rval = DI_ALL_PTR(st)->map_size;
4139 ASSERT(rval >= sizeof (struct di_all));
4140 ASSERT(rval <= di_cache.cache_size);
4142 } else {
4144 * The cache isn't valid, we need to take a snapshot.
4145 * Set the command flags appropriately
4147 ASSERT(st->command == (DINFOCACHE & DIIOC_MASK));
4148 st->command = (DI_CACHE_SNAPSHOT_FLAGS & DIIOC_MASK);
4149 rval = di_cache_update(st);
4150 st->command = (DINFOCACHE & DIIOC_MASK);
4153 DI_CACHE_UNLOCK(di_cache);
4156 * For cached snapshots, the devinfo driver always returns
4157 * a snapshot rooted at "/".
4159 ASSERT(rval == 0 || strcmp(DI_ALL_PTR(st)->root_path, "/") == 0);
4161 return ((int)rval);
4165 * This is a forced update of the cache - the previous state of the cache
4166 * may be:
4167 * - unpopulated
4168 * - populated and invalid
4169 * - populated and valid
4171 static int
4172 di_cache_update(struct di_state *st)
4174 int rval;
4175 uint32_t crc;
4176 struct di_all *all;
4178 ASSERT(DI_CACHE_LOCKED(di_cache));
4179 ASSERT(snapshot_is_cacheable(st));
4182 * Free the in-core cache and the on-disk file (if they exist)
4184 i_ddi_di_cache_free(&di_cache);
4187 * Set valid flag before taking the snapshot,
4188 * so that any invalidations that arrive
4189 * during or after the snapshot are not
4190 * removed by us.
4192 atomic_or_32(&di_cache.cache_valid, 1);
4194 rval = di_snapshot_and_clean(st);
4196 if (rval == 0) {
4197 CACHE_DEBUG((DI_ERR, "can't update cache: bad snapshot"));
4198 return (0);
4201 DI_ALL_PTR(st)->map_size = rval;
4202 if (di_mem2cache(st, &di_cache) == 0) {
4203 CACHE_DEBUG((DI_ERR, "can't update cache: copy failed"));
4204 return (0);
4207 ASSERT(di_cache.cache_data);
4208 ASSERT(di_cache.cache_size > 0);
4211 * Now that we have cached the snapshot, compute its checksum.
4212 * The checksum is only computed over the valid data in the
4213 * cache, not the entire cache.
4214 * Also, set all the fields (except checksum) before computing
4215 * checksum.
4217 all = (struct di_all *)di_cache.cache_data;
4218 all->cache_magic = DI_CACHE_MAGIC;
4219 all->map_size = rval;
4221 ASSERT(all->cache_checksum == 0);
4222 CRC32(crc, di_cache.cache_data, all->map_size, -1U, crc32_table);
4223 all->cache_checksum = crc;
4225 di_cache_write(&di_cache);
4227 return (rval);
4230 static void
4231 di_cache_print(di_cache_debug_t msglevel, char *fmt, ...)
4233 va_list ap;
4235 if (di_cache_debug <= DI_QUIET)
4236 return;
4238 if (di_cache_debug < msglevel)
4239 return;
4241 switch (msglevel) {
4242 case DI_ERR:
4243 msglevel = CE_WARN;
4244 break;
4245 case DI_INFO:
4246 case DI_TRACE:
4247 default:
4248 msglevel = CE_NOTE;
4249 break;
4252 va_start(ap, fmt);
4253 vcmn_err(msglevel, fmt, ap);
4254 va_end(ap);
4257 static void
4258 di_hotplug_children(struct di_state *st)
4260 di_off_t off;
4261 struct di_hp *hp;
4262 struct i_hp *hp_list_node;
4264 while (hp_list_node = (struct i_hp *)list_remove_head(&st->hp_list)) {
4266 if ((hp_list_node->hp_child != NULL) &&
4267 (di_dip_find(st, hp_list_node->hp_child, &off) == 0)) {
4268 hp = DI_HP(di_mem_addr(st, hp_list_node->hp_off));
4269 hp->hp_child = off;
4272 kmem_free(hp_list_node, sizeof (i_hp_t));
4275 list_destroy(&st->hp_list);