Merge commit '1f1540205fa6366266184180654434272c425ac2'
[unleashed.git] / bin / dis / dis_main.c
blob263e3c837c78a57dee5b26852bff7a9194562272
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
26 * Copyright 2011 Jason King. All rights reserved.
27 * Copyright 2012 Joshua M. Clulow <josh@sysmgr.org>
28 * Copyright 2015 Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
29 * Copyright 2018, Joyent, Inc.
32 #include <ctype.h>
33 #include <getopt.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <sys/sysmacros.h>
38 #include <sys/elf_SPARC.h>
40 #include <libdisasm.h>
42 #include "dis_target.h"
43 #include "dis_util.h"
44 #include "dis_list.h"
46 int g_demangle; /* Demangle C++ names */
47 int g_quiet; /* Quiet mode */
48 int g_numeric; /* Numeric mode */
49 int g_flags; /* libdisasm language flags */
50 int g_doall; /* true if no functions or sections were given */
52 dis_namelist_t *g_funclist; /* list of functions to disassemble, if any */
53 dis_namelist_t *g_seclist; /* list of sections to disassemble, if any */
56 * Section options for -d, -D, and -s
58 #define DIS_DATA_RELATIVE 1
59 #define DIS_DATA_ABSOLUTE 2
60 #define DIS_TEXT 3
63 * libdisasm callback data. Keeps track of current data (function or section)
64 * and offset within that data.
66 typedef struct dis_buffer {
67 dis_tgt_t *db_tgt; /* current dis target */
68 void *db_data; /* function or section data */
69 uint64_t db_addr; /* address of function start */
70 size_t db_size; /* size of data */
71 uint64_t db_nextaddr; /* next address to be read */
72 } dis_buffer_t;
74 #define MINSYMWIDTH 22 /* Minimum width of symbol portion of line */
77 * Given a symbol+offset as returned by dis_tgt_lookup(), print an appropriately
78 * formatted symbol, based on the offset and current setttings.
80 void
81 getsymname(uint64_t addr, const char *symbol, off_t offset, char *buf,
82 size_t buflen)
84 if (symbol == NULL || g_numeric) {
85 if (g_flags & DIS_OCTAL)
86 (void) snprintf(buf, buflen, "0%llo", addr);
87 else
88 (void) snprintf(buf, buflen, "0x%llx", addr);
89 } else {
90 if (g_demangle)
91 symbol = dis_demangle(symbol);
93 if (offset == 0)
94 (void) snprintf(buf, buflen, "%s", symbol);
95 else if (g_flags & DIS_OCTAL)
96 (void) snprintf(buf, buflen, "%s+0%o", symbol, offset);
97 else
98 (void) snprintf(buf, buflen, "%s+0x%x", symbol, offset);
103 * Determine if we are on an architecture with fixed-size instructions,
104 * and if so, what size they are.
106 static int
107 insn_size(dis_handle_t *dhp)
109 int min = dis_min_instrlen(dhp);
110 int max = dis_max_instrlen(dhp);
112 if (min == max)
113 return (min);
115 return (0);
119 * The main disassembly routine. Given a fixed-sized buffer and starting
120 * address, disassemble the data using the supplied target and libdisasm handle.
122 void
123 dis_data(dis_tgt_t *tgt, dis_handle_t *dhp, uint64_t addr, void *data,
124 size_t datalen)
126 dis_buffer_t db = { 0 };
127 char buf[BUFSIZE];
128 char symbuf[BUFSIZE];
129 const char *symbol;
130 const char *last_symbol;
131 off_t symoffset;
132 int i;
133 int bytesperline;
134 size_t symsize;
135 int isfunc;
136 size_t symwidth = 0;
137 int ret;
138 int insz = insn_size(dhp);
140 db.db_tgt = tgt;
141 db.db_data = data;
142 db.db_addr = addr;
143 db.db_size = datalen;
145 dis_set_data(dhp, &db);
147 if ((bytesperline = dis_max_instrlen(dhp)) > 6)
148 bytesperline = 6;
150 symbol = NULL;
152 while (addr < db.db_addr + db.db_size) {
154 ret = dis_disassemble(dhp, addr, buf, BUFSIZE);
155 if (ret != 0 && insz > 0) {
157 * Since we know instructions are fixed size, we
158 * always know the address of the next instruction
160 (void) snprintf(buf, sizeof (buf),
161 "*** invalid opcode ***");
162 db.db_nextaddr = addr + insz;
164 } else if (ret != 0) {
165 off_t next;
167 (void) snprintf(buf, sizeof (buf),
168 "*** invalid opcode ***");
171 * On architectures with variable sized instructions
172 * we have no way to figure out where the next
173 * instruction starts if we encounter an invalid
174 * instruction. Instead we print the rest of the
175 * instruction stream as hex until we reach the
176 * next valid symbol in the section.
178 if ((next = dis_tgt_next_symbol(tgt, addr)) == 0) {
179 db.db_nextaddr = db.db_addr + db.db_size;
180 } else {
181 if (next > db.db_size)
182 db.db_nextaddr = db.db_addr +
183 db.db_size;
184 else
185 db.db_nextaddr = addr + next;
190 * Print out the line as:
192 * address: bytes text
194 * If there are more than 6 bytes in any given instruction,
195 * spread the bytes across two lines. We try to get symbolic
196 * information for the address, but if that fails we print out
197 * the numeric address instead.
199 * We try to keep the address portion of the text aligned at
200 * MINSYMWIDTH characters. If we are disassembling a function
201 * with a long name, this can be annoying. So we pick a width
202 * based on the maximum width that the current symbol can be.
203 * This at least produces text aligned within each function.
205 last_symbol = symbol;
206 symbol = dis_tgt_lookup(tgt, addr, &symoffset, 1, &symsize,
207 &isfunc);
208 if (symbol == NULL) {
209 symbol = dis_find_section(tgt, addr, &symoffset);
210 symsize = symoffset;
213 if (symbol != last_symbol)
214 getsymname(addr, symbol, symsize, symbuf,
215 sizeof (symbuf));
217 symwidth = MAX(symwidth, strlen(symbuf));
218 getsymname(addr, symbol, symoffset, symbuf, sizeof (symbuf));
221 * If we've crossed a new function boundary, print out the
222 * function name on a blank line.
224 if (!g_quiet && symoffset == 0 && symbol != NULL && isfunc)
225 (void) printf("%s()\n", symbol);
227 (void) printf(" %s:%*s ", symbuf,
228 symwidth - strlen(symbuf), "");
230 /* print bytes */
231 for (i = 0; i < MIN(bytesperline, (db.db_nextaddr - addr));
232 i++) {
233 int byte = *((uchar_t *)data + (addr - db.db_addr) + i);
234 if (g_flags & DIS_OCTAL)
235 (void) printf("%03o ", byte);
236 else
237 (void) printf("%02x ", byte);
240 /* trailing spaces for missing bytes */
241 for (; i < bytesperline; i++) {
242 if (g_flags & DIS_OCTAL)
243 (void) printf(" ");
244 else
245 (void) printf(" ");
248 /* contents of disassembly */
249 (void) printf(" %s", buf);
251 /* excess bytes that spill over onto subsequent lines */
252 for (; i < db.db_nextaddr - addr; i++) {
253 int byte = *((uchar_t *)data + (addr - db.db_addr) + i);
254 if (i % bytesperline == 0)
255 (void) printf("\n %*s ", symwidth, "");
256 if (g_flags & DIS_OCTAL)
257 (void) printf("%03o ", byte);
258 else
259 (void) printf("%02x ", byte);
262 (void) printf("\n");
264 addr = db.db_nextaddr;
269 * libdisasm wrapper around symbol lookup. Invoke the target-specific lookup
270 * function, and convert the result using getsymname().
273 do_lookup(void *data, uint64_t addr, char *buf, size_t buflen, uint64_t *start,
274 size_t *symlen)
276 dis_buffer_t *db = data;
277 const char *symbol;
278 off_t offset;
279 size_t size;
282 * If NULL symbol is returned, getsymname takes care of
283 * printing appropriate address in buf instead of symbol.
285 symbol = dis_tgt_lookup(db->db_tgt, addr, &offset, 0, &size, NULL);
287 if (buf != NULL)
288 getsymname(addr, symbol, offset, buf, buflen);
290 if (start != NULL)
291 *start = addr - offset;
292 if (symlen != NULL)
293 *symlen = size;
295 if (symbol == NULL)
296 return (-1);
298 return (0);
302 * libdisasm wrapper around target reading. libdisasm will always read data
303 * in order, so update our current offset within the buffer appropriately.
304 * We only support reading from within the current object; libdisasm should
305 * never ask us to do otherwise.
308 do_read(void *data, uint64_t addr, void *buf, size_t len)
310 dis_buffer_t *db = data;
311 size_t offset;
313 if (addr < db->db_addr || addr >= db->db_addr + db->db_size)
314 return (-1);
316 offset = addr - db->db_addr;
317 len = MIN(len, db->db_size - offset);
319 (void) memcpy(buf, (char *)db->db_data + offset, len);
321 db->db_nextaddr = addr + len;
323 return (len);
327 * Routine to dump raw data in a human-readable format. Used by the -d and -D
328 * options. We model our output after the xxd(1) program, which gives nicely
329 * formatted output, along with an ASCII translation of the result.
331 void
332 dump_data(uint64_t addr, void *data, size_t datalen)
334 uintptr_t curaddr = addr & (~0xf);
335 uint8_t *bytes = data;
336 int i;
337 int width;
340 * Determine if the address given to us fits in 32-bit range, in which
341 * case use a 4-byte width.
343 if (((addr + datalen) & 0xffffffff00000000ULL) == 0ULL)
344 width = 8;
345 else
346 width = 16;
348 while (curaddr < addr + datalen) {
350 * Display leading address
352 (void) printf("%0*x: ", width, curaddr);
355 * Print out data in two-byte chunks. If the current address
356 * is before the starting address or after the end of the
357 * section, print spaces.
359 for (i = 0; i < 16; i++) {
360 if (curaddr + i < addr ||curaddr + i >= addr + datalen)
361 (void) printf(" ");
362 else
363 (void) printf("%02x",
364 bytes[curaddr + i - addr]);
366 if (i & 1)
367 (void) printf(" ");
370 (void) printf(" ");
373 * Print out the ASCII representation
375 for (i = 0; i < 16; i++) {
376 if (curaddr + i < addr ||
377 curaddr + i >= addr + datalen) {
378 (void) printf(" ");
379 } else {
380 uint8_t byte = bytes[curaddr + i - addr];
381 if (isprint(byte))
382 (void) printf("%c", byte);
383 else
384 (void) printf(".");
388 (void) printf("\n");
390 curaddr += 16;
395 * Disassemble a section implicitly specified as part of a file. This function
396 * is called for all sections when no other flags are specified. We ignore any
397 * data sections, and print out only those sections containing text.
399 void
400 dis_text_section(dis_tgt_t *tgt, dis_scn_t *scn, void *data)
402 dis_handle_t *dhp = data;
404 /* ignore data sections */
405 if (!dis_section_istext(scn))
406 return;
408 if (!g_quiet)
409 (void) printf("\nsection %s\n", dis_section_name(scn));
411 dis_data(tgt, dhp, dis_section_addr(scn), dis_section_data(scn),
412 dis_section_size(scn));
416 * Structure passed to dis_named_{section,function} which keeps track of both
417 * the target and the libdisasm handle.
419 typedef struct callback_arg {
420 dis_tgt_t *ca_tgt;
421 dis_handle_t *ca_handle;
422 } callback_arg_t;
425 * Disassemble a section explicitly named with -s, -d, or -D. The 'type'
426 * argument contains the type of argument given. Pass the data onto the
427 * appropriate helper routine.
429 void
430 dis_named_section(dis_scn_t *scn, int type, void *data)
432 callback_arg_t *ca = data;
434 if (!g_quiet)
435 (void) printf("\nsection %s\n", dis_section_name(scn));
437 switch (type) {
438 case DIS_DATA_RELATIVE:
439 dump_data(0, dis_section_data(scn), dis_section_size(scn));
440 break;
441 case DIS_DATA_ABSOLUTE:
442 dump_data(dis_section_addr(scn), dis_section_data(scn),
443 dis_section_size(scn));
444 break;
445 case DIS_TEXT:
446 dis_data(ca->ca_tgt, ca->ca_handle, dis_section_addr(scn),
447 dis_section_data(scn), dis_section_size(scn));
448 break;
453 * Disassemble a function explicitly specified with '-F'. The 'type' argument
454 * is unused.
456 /* ARGSUSED */
457 void
458 dis_named_function(dis_func_t *func, int type, void *data)
460 callback_arg_t *ca = data;
462 dis_data(ca->ca_tgt, ca->ca_handle, dis_function_addr(func),
463 dis_function_data(func), dis_function_size(func));
467 * Disassemble a complete file. First, we determine the type of the file based
468 * on the ELF machine type, and instantiate a version of the disassembler
469 * appropriate for the file. We then resolve any named sections or functions
470 * against the file, and iterate over the results (or all sections if no flags
471 * were specified).
473 void
474 dis_file(const char *filename)
476 dis_tgt_t *tgt, *current;
477 dis_scnlist_t *sections;
478 dis_funclist_t *functions;
479 dis_handle_t *dhp;
480 GElf_Ehdr ehdr;
483 * First, initialize the target
485 if ((tgt = dis_tgt_create(filename)) == NULL)
486 return;
488 if (!g_quiet)
489 (void) printf("disassembly for %s\n\n", filename);
492 * A given file may contain multiple targets (if it is an archive, for
493 * example). We iterate over all possible targets if this is the case.
495 for (current = tgt; current != NULL; current = dis_tgt_next(current)) {
496 dis_tgt_ehdr(current, &ehdr);
499 * Eventually, this should probably live within libdisasm, and
500 * we should be able to disassemble targets from different
501 * architectures. For now, we only support objects as the
502 * native machine type.
504 switch (ehdr.e_machine) {
505 case EM_SPARC:
506 if (ehdr.e_ident[EI_CLASS] != ELFCLASS32 ||
507 ehdr.e_ident[EI_DATA] != ELFDATA2MSB) {
508 warn("invalid E_IDENT field for SPARC object");
509 return;
511 g_flags |= DIS_SPARC_V8;
512 break;
514 case EM_SPARC32PLUS:
516 uint64_t flags = ehdr.e_flags & EF_SPARC_32PLUS_MASK;
518 if (ehdr.e_ident[EI_CLASS] != ELFCLASS32 ||
519 ehdr.e_ident[EI_DATA] != ELFDATA2MSB) {
520 warn("invalid E_IDENT field for SPARC object");
521 return;
524 if (flags != 0 &&
525 (flags & (EF_SPARC_32PLUS | EF_SPARC_SUN_US1 |
526 EF_SPARC_SUN_US3)) != EF_SPARC_32PLUS)
527 g_flags |= DIS_SPARC_V9 | DIS_SPARC_V9_SGI;
528 else
529 g_flags |= DIS_SPARC_V9;
530 break;
533 case EM_SPARCV9:
534 if (ehdr.e_ident[EI_CLASS] != ELFCLASS64 ||
535 ehdr.e_ident[EI_DATA] != ELFDATA2MSB) {
536 warn("invalid E_IDENT field for SPARC object");
537 return;
540 g_flags |= DIS_SPARC_V9 | DIS_SPARC_V9_SGI;
541 break;
543 case EM_386:
544 g_flags |= DIS_X86 | DIS_SIZE_32;
545 break;
547 case EM_AMD64:
548 g_flags |= DIS_X86 | DIS_SIZE_64;
549 break;
551 case EM_S370:
552 g_flags |= DIS_S3X0 | DIS_SIZE_24;
554 if (ehdr.e_ident[EI_CLASS] != ELFCLASS32 ||
555 ehdr.e_ident[EI_DATA] != ELFDATA2MSB) {
556 warn("invalid E_IDENT field for S370 object");
557 return;
559 break;
561 case EM_S390:
563 * Both 390 and z/Architecture use EM_S390, the only
564 * differences is the class: ELFCLASS32 for plain
565 * old s390 and ELFCLASS64 for z/Architecture (aka.
566 * s390x).
568 if (ehdr.e_ident[EI_CLASS] == ELFCLASS32) {
569 g_flags |= DIS_S3X0 | DIS_SIZE_32;
570 } else if (ehdr.e_ident[EI_CLASS] == ELFCLASS64) {
571 g_flags |= DIS_S3X0 | DIS_SIZE_64;
572 } else {
573 warn("invalid E_IDENT field for S390 object");
574 return;
577 if (ehdr.e_ident[EI_DATA] != ELFDATA2MSB) {
578 warn("invalid E_IDENT field for S390 object");
579 return;
581 break;
583 case EM_RISCV:
585 * RISC-V is defined to be litle endian. The current ISA
586 * makes it clear that the 64-bit instructions can
587 * co-exist with the 32-bit ones and therefore we don't
588 * need a separate elf class at this time.
590 if (ehdr.e_ident[EI_DATA] != ELFDATA2LSB) {
591 warn("invalid EI_DATA field for RISC-V object");
592 return;
595 if (ehdr.e_ident[EI_CLASS] == ELFCLASS32) {
596 g_flags |= DIS_RISCV | DIS_SIZE_32;
597 } else if (ehdr.e_ident[EI_CLASS] == ELFCLASS64) {
598 g_flags |= DIS_RISCV | DIS_SIZE_64;
599 } else {
600 warn("invalid EI_CLASS field for RISC-V "
601 "object");
602 return;
604 break;
606 default:
607 die("%s: unsupported ELF machine 0x%x", filename,
608 ehdr.e_machine);
612 * If ET_REL (.o), printing immediate symbols is likely to
613 * result in garbage, as symbol lookups on unrelocated
614 * immediates find false and useless matches.
617 if (ehdr.e_type == ET_REL)
618 g_flags |= DIS_NOIMMSYM;
620 if (!g_quiet && dis_tgt_member(current) != NULL)
621 (void) printf("\narchive member %s\n",
622 dis_tgt_member(current));
625 * Instantiate a libdisasm handle based on the file type.
627 if ((dhp = dis_handle_create(g_flags, current, do_lookup,
628 do_read)) == NULL)
629 die("%s: failed to initialize disassembler: %s",
630 filename, dis_strerror(dis_errno()));
632 if (g_doall) {
634 * With no arguments, iterate over all sections and
635 * disassemble only those that contain text.
637 dis_tgt_section_iter(current, dis_text_section, dhp);
638 } else {
639 callback_arg_t ca;
641 ca.ca_tgt = current;
642 ca.ca_handle = dhp;
645 * If sections or functions were explicitly specified,
646 * resolve those names against the object, and iterate
647 * over just the resulting data.
649 sections = dis_namelist_resolve_sections(g_seclist,
650 current);
651 functions = dis_namelist_resolve_functions(g_funclist,
652 current);
654 dis_scnlist_iter(sections, dis_named_section, &ca);
655 dis_funclist_iter(functions, dis_named_function, &ca);
657 dis_scnlist_destroy(sections);
658 dis_funclist_destroy(functions);
661 dis_handle_destroy(dhp);
664 dis_tgt_destroy(tgt);
667 void
668 usage(void)
670 (void) fprintf(stderr, "usage: dis [-Cnoq] [-d sec] [-D sec] "
671 "[-F function] [-t sec] file ...\n");
672 exit(2);
676 main(int argc, char **argv)
678 int optchar;
679 int i;
680 g_funclist = dis_namelist_create();
681 g_seclist = dis_namelist_create();
683 while ((optchar = getopt(argc, argv, "Cd:D:F:ot:qn")) != -1) {
684 switch (optchar) {
685 case 'C':
686 g_demangle = 1;
687 break;
688 case 'd':
689 dis_namelist_add(g_seclist, optarg, DIS_DATA_RELATIVE);
690 break;
691 case 'D':
692 dis_namelist_add(g_seclist, optarg, DIS_DATA_ABSOLUTE);
693 break;
694 case 'F':
695 dis_namelist_add(g_funclist, optarg, 0);
696 break;
697 case 'n':
698 g_numeric = 1;
699 break;
700 case 'o':
701 g_flags |= DIS_OCTAL;
702 break;
703 case 'q':
704 g_quiet = 1;
705 break;
706 case 't':
707 dis_namelist_add(g_seclist, optarg, DIS_TEXT);
708 break;
709 default:
710 usage();
711 break;
715 argc -= optind;
716 argv += optind;
718 if (argc == 0) {
719 warn("no objects specified");
720 usage();
723 if (dis_namelist_empty(g_funclist) && dis_namelist_empty(g_seclist))
724 g_doall = 1;
726 for (i = 0; i < argc; i++)
727 dis_file(argv[i]);
729 dis_namelist_destroy(g_funclist);
730 dis_namelist_destroy(g_seclist);
732 return (g_error);