2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
13 * Copyright 2021 Oxide Computer Company
19 * Currently this implements several different views at seeing into PCIe devices
20 * and is designed to (hopefully) replace pcitool and be a vector for new system
21 * functionality such as dealing with multicast filtering, ACS, etc.
23 * While most subcommands have their own implementations, there are a couple of
24 * things that are worth bearing in mind:
26 * 1) Where possible, prefer the use of libofmt. In particular, having good,
27 * parsable output is important. New subcommands should strive to meet that.
29 * 2) Because we're often processing binary data (and it's good hygiene),
30 * subcommands should make sure to drop privileges as early as they can by
31 * calling pcieadm_init_privs(). More on privileges below.
33 * Privilege Management
34 * --------------------
36 * In an attempt to minimize privilege exposure, but to allow subcommands
37 * flexibility when required (e.g. show-cfgspace needs full privs to read from
38 * the kernel), we have two privilege sets that we maintain. One which is the
39 * minimial privs, which basically is a set that has stripped everything. This
40 * is 'pia_priv_min'. The second is one that allows a subcommand to add in
41 * privileges that it requires which will be left in the permitted set. These
42 * are in 'pia_priv_eff'. It's important to know that this set is always
43 * intersected with what the user actually has, so this is not meant to be a way
44 * for a caller to get more privileges than they already have.
46 * A subcommand is expected to call pcieadm_init_privs() once they have
47 * processed enough arguments that they can set an upper bound on privileges.
48 * It's worth noting that a subcommand will be executed in an already minimial
49 * environment; however, we will have already set up a libdevinfo handle for
50 * them, which should make the need to do much more not so bad.
58 #include <libdevinfo.h>
61 #include <sys/pci_tools.h>
63 #include <sys/types.h>
65 #include <sys/debug.h>
72 const char *pcieadm_progname
;
75 pcieadm_init_privs(pcieadm_t
*pcip
)
77 static const char *msg
= "attempted to re-initialize privileges";
78 if (pcip
->pia_priv_init
== NULL
) {
79 upanic(msg
, strlen(msg
));
82 priv_intersect(pcip
->pia_priv_init
, pcip
->pia_priv_eff
);
84 if (setppriv(PRIV_SET
, PRIV_PERMITTED
, pcieadm
.pia_priv_eff
) != 0) {
85 err(EXIT_FAILURE
, "failed to reduce privileges");
88 if (setppriv(PRIV_SET
, PRIV_LIMIT
, pcieadm
.pia_priv_eff
) != 0) {
89 err(EXIT_FAILURE
, "failed to reduce privileges");
92 priv_freeset(pcip
->pia_priv_init
);
93 pcip
->pia_priv_init
= NULL
;
99 pcieadm
.pia_indent
+= 2;
103 pcieadm_deindent(void)
105 VERIFY3U(pcieadm
.pia_indent
, >, 0);
106 pcieadm
.pia_indent
-= 2;
110 pcieadm_print(const char *fmt
, ...)
114 if (pcieadm
.pia_indent
> 0) {
115 (void) printf("%*s", pcieadm
.pia_indent
, "");
119 (void) vprintf(fmt
, ap
);
124 pcieadm_ofmt_errx(const char *fmt
, ...)
129 verrx(EXIT_FAILURE
, fmt
, ap
);
133 * We determine if a node is PCI in a two step process. The first is to see if
134 * the node's name starts with pci, and has an additional character that
135 * indicates it's not the synthetic root of the tree. However, the node name
136 * changes for some classes of devices such as GPUs. As such, for those we try
137 * to look at the compatible property and see if we have a pciexclass or
138 * pciclass entry. We look specifically for the class to make sure that we don't
139 * fall for the synthetic nodes that have a compatible property of
140 * 'pciex_root_complex'.
142 * The compatible property is a single string that is actually a compressed
143 * string. That is, there are multiple strings concatenated together in a single
147 pcieadm_di_node_is_pci(di_node_t node
)
153 name
= di_node_name(node
);
154 if (strncmp("pci", name
, 3) == 0) {
155 return (name
[3] != '\0');
158 nents
= di_prop_lookup_strings(DDI_DEV_T_ANY
, node
, "compatible",
164 for (int i
= 0; i
< nents
; i
++) {
165 if (strncmp("pciclass,", compat
, strlen("pciclass,")) == 0 ||
166 strncmp("pciexclass,", compat
, strlen("pciexclass,")) ==
171 compat
+= strlen(compat
) + 1;
178 pcieadm_di_walk_cb(di_node_t node
, void *arg
)
180 pcieadm_di_walk_t
*walk
= arg
;
182 if (!pcieadm_di_node_is_pci(node
)) {
183 return (DI_WALK_CONTINUE
);
186 return (walk
->pdw_func(node
, walk
->pdw_arg
));
190 pcieadm_di_walk(pcieadm_t
*pcip
, pcieadm_di_walk_t
*arg
)
192 (void) di_walk_node(pcip
->pia_root
, DI_WALK_CLDFIRST
, arg
,
197 * Attempt to find the nexus that corresponds to this device. To do this, we
198 * walk up and walk the minors until we find a "reg" minor.
201 pcieadm_find_nexus(pcieadm_t
*pia
)
205 for (cur
= di_parent_node(pia
->pia_devi
); cur
!= DI_NODE_NIL
;
206 cur
= di_parent_node(cur
)) {
207 di_minor_t minor
= DI_MINOR_NIL
;
209 while ((minor
= di_minor_next(cur
, minor
)) != DI_MINOR_NIL
) {
210 if (di_minor_spectype(minor
) == S_IFCHR
&&
211 strcmp(di_minor_name(minor
), "reg") == 0) {
212 pia
->pia_nexus
= cur
;
220 pcieadm_find_dip_cb(di_node_t node
, void *arg
)
222 char *path
= NULL
, *driver
;
223 char dinst
[128], bdf
[128], altbdf
[128];
224 int inst
, nprop
, *regs
;
225 pcieadm_t
*pia
= arg
;
227 path
= di_devfs_path(node
);
229 err(EXIT_FAILURE
, "failed to construct devfs path for node: "
230 "%s", di_node_name(node
));
233 driver
= di_driver_name(node
);
234 inst
= di_instance(node
);
235 if (driver
!= NULL
&& inst
!= -1) {
236 (void) snprintf(dinst
, sizeof (dinst
), "%s%d", driver
, inst
);
239 nprop
= di_prop_lookup_ints(DDI_DEV_T_ANY
, node
, "reg", ®s
);
241 errx(EXIT_FAILURE
, "failed to lookup regs array for %s",
244 (void) snprintf(bdf
, sizeof (bdf
), "%x/%x/%x", PCI_REG_BUS_G(regs
[0]),
245 PCI_REG_DEV_G(regs
[0]), PCI_REG_FUNC_G(regs
[0]));
246 (void) snprintf(altbdf
, sizeof (altbdf
), "%02x/%02x/%02x",
247 PCI_REG_BUS_G(regs
[0]), PCI_REG_DEV_G(regs
[0]),
248 PCI_REG_FUNC_G(regs
[0]));
250 if (strcmp(pia
->pia_devstr
, path
) == 0 ||
251 strcmp(pia
->pia_devstr
, bdf
) == 0 ||
252 strcmp(pia
->pia_devstr
, altbdf
) == 0 ||
253 (driver
!= NULL
&& inst
!= -1 &&
254 strcmp(pia
->pia_devstr
, dinst
) == 0)) {
255 if (pia
->pia_devi
!= DI_NODE_NIL
) {
256 errx(EXIT_FAILURE
, "device name matched two device "
257 "nodes: %s and %s", di_node_name(pia
->pia_devi
),
261 pia
->pia_devi
= node
;
265 di_devfs_path_free(path
);
268 return (DI_WALK_CONTINUE
);
272 pcieadm_find_dip(pcieadm_t
*pcip
, const char *device
)
274 pcieadm_di_walk_t walk
;
277 * If someone specifies /devices, just skip over it.
279 pcip
->pia_devstr
= device
;
280 if (strncmp("/devices", device
, strlen("/devices")) == 0) {
281 pcip
->pia_devstr
+= strlen("/devices");
284 pcip
->pia_devi
= DI_NODE_NIL
;
286 walk
.pdw_func
= pcieadm_find_dip_cb
;
287 pcieadm_di_walk(pcip
, &walk
);
289 if (pcip
->pia_devi
== DI_NODE_NIL
) {
290 errx(EXIT_FAILURE
, "failed to find device node %s", device
);
293 pcip
->pia_nexus
= DI_NODE_NIL
;
294 pcieadm_find_nexus(pcip
);
295 if (pcip
->pia_nexus
== DI_NODE_NIL
) {
296 errx(EXIT_FAILURE
, "failed to find nexus for %s", device
);
300 typedef struct pcieadm_cfgspace_file
{
302 } pcieadm_cfgspace_file_t
;
305 pcieadm_read_cfgspace_file(uint32_t off
, uint8_t len
, void *buf
, void *arg
)
308 pcieadm_cfgspace_file_t
*pcfi
= arg
;
311 ssize_t ret
= pread(pcfi
->pcfi_fd
, buf
+ bufoff
, len
, off
);
313 err(EXIT_FAILURE
, "failed to read %u bytes at %"
315 } else if (ret
== 0) {
316 warnx("hit unexpected EOF reading cfgspace from file "
317 "at offest %" PRIu32
", still wanted to read %u "
332 pcieadm_init_cfgspace_file(pcieadm_t
*pcip
, const char *path
,
333 pcieadm_cfgspace_f
*funcp
, void **arg
)
337 pcieadm_cfgspace_file_t
*pcfi
;
339 if (setppriv(PRIV_SET
, PRIV_EFFECTIVE
, pcip
->pia_priv_eff
) != 0) {
340 err(EXIT_FAILURE
, "failed to raise privileges");
343 if ((fd
= open(path
, O_RDONLY
)) < 0) {
344 err(EXIT_FAILURE
, "failed to open input file %s", path
);
347 if (fstat(fd
, &st
) != 0) {
348 err(EXIT_FAILURE
, "failed to get stat information for %s",
352 if (setppriv(PRIV_SET
, PRIV_EFFECTIVE
, pcip
->pia_priv_min
) != 0) {
353 err(EXIT_FAILURE
, "failed to reduce privileges");
356 if (S_ISDIR(st
.st_mode
)) {
357 errx(EXIT_FAILURE
, "input file %s is a directory, unable "
358 "to read data", path
);
361 if (S_ISLNK(st
.st_mode
)) {
362 errx(EXIT_FAILURE
, "input file %s is a symbolic link, unable "
363 "to read data", path
);
366 if (S_ISDOOR(st
.st_mode
)) {
367 errx(EXIT_FAILURE
, "input file %s is a door, unable "
368 "to read data", path
);
371 if (S_ISPORT(st
.st_mode
)) {
372 errx(EXIT_FAILURE
, "input file %s is an event port, unable "
373 "to read data", path
);
377 * Assume if we were given a FIFO, character/block device, socket, or
378 * something else that it's probably fine.
380 pcfi
= calloc(1, sizeof (*pcfi
));
382 err(EXIT_FAILURE
, "failed to allocate memory for reading "
383 "cfgspace data from a file");
388 *funcp
= pcieadm_read_cfgspace_file
;
392 pcieadm_fini_cfgspace_file(void *arg
)
394 pcieadm_cfgspace_file_t
*pcfi
= arg
;
395 VERIFY0(close(pcfi
->pcfi_fd
));
399 typedef struct pcieadm_cfgspace_kernel
{
405 } pcieadm_cfgspace_kernel_t
;
408 pcieadm_read_cfgspace_kernel(uint32_t off
, uint8_t len
, void *buf
, void *arg
)
410 pcieadm_cfgspace_kernel_t
*pck
= arg
;
411 pcieadm_t
*pcip
= pck
->pck_pci
;
412 pcitool_reg_t pci_reg
;
414 bzero(&pci_reg
, sizeof (pci_reg
));
415 pci_reg
.user_version
= PCITOOL_VERSION
;
416 pci_reg
.bus_no
= pck
->pck_bus
;
417 pci_reg
.dev_no
= pck
->pck_dev
;
418 pci_reg
.func_no
= pck
->pck_func
;
420 pci_reg
.offset
= off
;
421 pci_reg
.acc_attr
= PCITOOL_ACC_ATTR_ENDN_LTL
;
425 pci_reg
.acc_attr
+= PCITOOL_ACC_ATTR_SIZE_1
;
428 pci_reg
.acc_attr
+= PCITOOL_ACC_ATTR_SIZE_2
;
431 pci_reg
.acc_attr
+= PCITOOL_ACC_ATTR_SIZE_4
;
434 pci_reg
.acc_attr
+= PCITOOL_ACC_ATTR_SIZE_8
;
437 errx(EXIT_FAILURE
, "asked to read invalid size from kernel: %u",
441 if (setppriv(PRIV_SET
, PRIV_EFFECTIVE
, pcip
->pia_priv_eff
) != 0) {
442 err(EXIT_FAILURE
, "failed to raise privileges");
445 if (ioctl(pck
->pck_fd
, PCITOOL_DEVICE_GET_REG
, &pci_reg
) != 0) {
446 err(EXIT_FAILURE
, "failed to read device offset 0x%x", off
);
449 if (setppriv(PRIV_SET
, PRIV_EFFECTIVE
, pcip
->pia_priv_min
) != 0) {
450 err(EXIT_FAILURE
, "failed to reduce privileges");
455 *(uint8_t *)buf
= (uint8_t)pci_reg
.data
;
458 *(uint16_t *)buf
= (uint16_t)pci_reg
.data
;
461 *(uint32_t *)buf
= (uint32_t)pci_reg
.data
;
464 *(uint64_t *)buf
= (uint64_t)pci_reg
.data
;
472 pcieadm_init_cfgspace_kernel(pcieadm_t
*pcip
, pcieadm_cfgspace_f
*funcp
,
476 char nexus_reg
[PATH_MAX
];
477 int fd
, nregs
, *regs
;
478 pcieadm_cfgspace_kernel_t
*pck
;
480 if ((nexus_base
= di_devfs_path(pcip
->pia_nexus
)) == NULL
) {
481 err(EXIT_FAILURE
, "failed to get path to nexus node");
484 if (snprintf(nexus_reg
, sizeof (nexus_reg
), "/devices%s:reg",
485 nexus_base
) >= sizeof (nexus_reg
)) {
486 errx(EXIT_FAILURE
, "failed to construct nexus path, path "
491 if (setppriv(PRIV_SET
, PRIV_EFFECTIVE
, pcip
->pia_priv_eff
) != 0) {
492 err(EXIT_FAILURE
, "failed to raise privileges");
495 if ((fd
= open(nexus_reg
, O_RDONLY
)) < 0) {
496 err(EXIT_FAILURE
, "failed to open %s", nexus_reg
);
499 if (setppriv(PRIV_SET
, PRIV_EFFECTIVE
, pcip
->pia_priv_min
) != 0) {
500 err(EXIT_FAILURE
, "failed to reduce privileges");
503 nregs
= di_prop_lookup_ints(DDI_DEV_T_ANY
, pcip
->pia_devi
, "reg",
506 errx(EXIT_FAILURE
, "failed to lookup regs array for %s",
510 pck
= calloc(1, sizeof (pcieadm_cfgspace_kernel_t
));
512 err(EXIT_FAILURE
, "failed to allocate memory for reading "
513 "kernel cfgspace data");
518 pck
->pck_bus
= PCI_REG_BUS_G(regs
[0]);
519 pck
->pck_dev
= PCI_REG_DEV_G(regs
[0]);
520 pck
->pck_func
= PCI_REG_FUNC_G(regs
[0]);
522 *funcp
= pcieadm_read_cfgspace_kernel
;
527 pcieadm_fini_cfgspace_kernel(void *arg
)
529 pcieadm_cfgspace_kernel_t
*pck
= arg
;
531 VERIFY0(close(pck
->pck_fd
));
535 static const pcieadm_cmdtab_t pcieadm_cmds
[] = {
536 { "save-cfgspace", pcieadm_save_cfgspace
, pcieadm_save_cfgspace_usage
},
537 { "show-cfgspace", pcieadm_show_cfgspace
, pcieadm_show_cfgspace_usage
},
538 { "show-devs", pcieadm_show_devs
, pcieadm_show_devs_usage
},
543 pcieadm_usage(const char *format
, ...)
547 if (format
!= NULL
) {
550 va_start(ap
, format
);
555 (void) fprintf(stderr
, "usage: %s <subcommand> <args> ...\n\n",
558 for (cmd
= 0; pcieadm_cmds
[cmd
].pct_name
!= NULL
; cmd
++) {
559 if (pcieadm_cmds
[cmd
].pct_use
!= NULL
) {
560 pcieadm_cmds
[cmd
].pct_use(stderr
);
566 main(int argc
, char *argv
[])
570 pcieadm_progname
= basename(argv
[0]);
573 pcieadm_usage("missing required sub-command");
577 for (cmd
= 0; pcieadm_cmds
[cmd
].pct_name
!= NULL
; cmd
++) {
578 if (strcmp(pcieadm_cmds
[cmd
].pct_name
, argv
[1]) == 0) {
583 if (pcieadm_cmds
[cmd
].pct_name
== NULL
) {
584 pcieadm_usage("unknown sub-command: %s", argv
[1]);
590 pcieadm
.pia_cmdtab
= &pcieadm_cmds
[cmd
];
593 * Set up common things that all of pcieadm needs before dispatching to
594 * a specific sub-command.
596 pcieadm
.pia_pcidb
= pcidb_open(PCIDB_VERSION
);
597 if (pcieadm
.pia_pcidb
== NULL
) {
598 err(EXIT_FAILURE
, "failed to open PCI ID database");
601 pcieadm
.pia_root
= di_init("/", DINFOCPYALL
);
602 if (pcieadm
.pia_root
== DI_NODE_NIL
) {
603 err(EXIT_FAILURE
, "failed to initialize devinfo tree");
607 * Set up privileges now that we have already opened our core libraries.
608 * We first set up the minimum actual privilege set that we use while
609 * running. We next set up a second privilege set that has additional
610 * privileges that are intersected with the users actual privileges and
611 * are appended to by the underlying command backends.
613 if ((pcieadm
.pia_priv_init
= priv_allocset()) == NULL
) {
614 err(EXIT_FAILURE
, "failed to allocate privilege set");
617 if (getppriv(PRIV_EFFECTIVE
, pcieadm
.pia_priv_init
) != 0) {
618 err(EXIT_FAILURE
, "failed to get current privileges");
621 if ((pcieadm
.pia_priv_min
= priv_allocset()) == NULL
) {
622 err(EXIT_FAILURE
, "failed to allocate privilege set");
625 if ((pcieadm
.pia_priv_eff
= priv_allocset()) == NULL
) {
626 err(EXIT_FAILURE
, "failed to allocate privilege set");
630 * Note, PRIV_FILE_READ is not removed from the basic set so that way we
631 * can still open libraries that are required due to lazy loading.
633 priv_basicset(pcieadm
.pia_priv_min
);
634 VERIFY0(priv_delset(pcieadm
.pia_priv_min
, PRIV_FILE_LINK_ANY
));
635 VERIFY0(priv_delset(pcieadm
.pia_priv_min
, PRIV_PROC_INFO
));
636 VERIFY0(priv_delset(pcieadm
.pia_priv_min
, PRIV_PROC_SESSION
));
637 VERIFY0(priv_delset(pcieadm
.pia_priv_min
, PRIV_PROC_FORK
));
638 VERIFY0(priv_delset(pcieadm
.pia_priv_min
, PRIV_NET_ACCESS
));
639 VERIFY0(priv_delset(pcieadm
.pia_priv_min
, PRIV_FILE_WRITE
));
640 VERIFY0(priv_delset(pcieadm
.pia_priv_min
, PRIV_PROC_EXEC
));
641 VERIFY0(priv_delset(pcieadm
.pia_priv_min
, PRIV_PROC_EXEC
));
643 priv_copyset(pcieadm
.pia_priv_min
, pcieadm
.pia_priv_eff
);
644 priv_intersect(pcieadm
.pia_priv_init
, pcieadm
.pia_priv_eff
);
646 if (setppriv(PRIV_SET
, PRIV_EFFECTIVE
, pcieadm
.pia_priv_min
) != 0) {
647 err(EXIT_FAILURE
, "failed to reduce privileges");
650 return (pcieadm
.pia_cmdtab
->pct_func(&pcieadm
, argc
, argv
));