4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
26 * Copyright 2011 Jason King. All rights reserved.
27 * Copyright 2012 Joshua M. Clulow <josh@sysmgr.org>
28 * Copyright 2015 Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
29 * Copyright 2018, Joyent, Inc.
37 #include <sys/sysmacros.h>
38 #include <sys/elf_SPARC.h>
40 #include <libdisasm.h>
42 #include "dis_target.h"
46 int g_demangle
; /* Demangle C++ names */
47 int g_quiet
; /* Quiet mode */
48 int g_numeric
; /* Numeric mode */
49 int g_flags
; /* libdisasm language flags */
50 int g_doall
; /* true if no functions or sections were given */
52 dis_namelist_t
*g_funclist
; /* list of functions to disassemble, if any */
53 dis_namelist_t
*g_seclist
; /* list of sections to disassemble, if any */
56 * Section options for -d, -D, and -s
58 #define DIS_DATA_RELATIVE 1
59 #define DIS_DATA_ABSOLUTE 2
63 * libdisasm callback data. Keeps track of current data (function or section)
64 * and offset within that data.
66 typedef struct dis_buffer
{
67 dis_tgt_t
*db_tgt
; /* current dis target */
68 void *db_data
; /* function or section data */
69 uint64_t db_addr
; /* address of function start */
70 size_t db_size
; /* size of data */
71 uint64_t db_nextaddr
; /* next address to be read */
74 #define MINSYMWIDTH 22 /* Minimum width of symbol portion of line */
77 * Given a symbol+offset as returned by dis_tgt_lookup(), print an appropriately
78 * formatted symbol, based on the offset and current setttings.
81 getsymname(uint64_t addr
, const char *symbol
, uint64_t offset
, char *buf
,
84 if (symbol
== NULL
|| g_numeric
) {
85 if (g_flags
& DIS_OCTAL
)
86 (void) snprintf(buf
, buflen
, "0%llo", addr
);
88 (void) snprintf(buf
, buflen
, "0x%llx", addr
);
91 symbol
= dis_demangle(symbol
);
94 (void) snprintf(buf
, buflen
, "%s", symbol
);
95 else if (g_flags
& DIS_OCTAL
)
96 (void) snprintf(buf
, buflen
, "%s+0%llo", symbol
, offset
);
98 (void) snprintf(buf
, buflen
, "%s+0x%llx", symbol
, offset
);
103 * Determine if we are on an architecture with fixed-size instructions,
104 * and if so, what size they are.
107 insn_size(dis_handle_t
*dhp
)
109 int min
= dis_min_instrlen(dhp
);
110 int max
= dis_max_instrlen(dhp
);
119 * The main disassembly routine. Given a fixed-sized buffer and starting
120 * address, disassemble the data using the supplied target and libdisasm handle.
123 dis_data(dis_tgt_t
*tgt
, dis_handle_t
*dhp
, uint64_t addr
, void *data
,
126 dis_buffer_t db
= { 0 };
128 char symbuf
[BUFSIZE
];
130 const char *last_symbol
;
138 int insz
= insn_size(dhp
);
143 db
.db_size
= datalen
;
145 dis_set_data(dhp
, &db
);
147 if ((bytesperline
= dis_max_instrlen(dhp
)) > 6)
152 while (addr
< db
.db_addr
+ db
.db_size
) {
154 ret
= dis_disassemble(dhp
, addr
, buf
, BUFSIZE
);
155 if (ret
!= 0 && insz
> 0) {
157 * Since we know instructions are fixed size, we
158 * always know the address of the next instruction
160 (void) snprintf(buf
, sizeof (buf
),
161 "*** invalid opcode ***");
162 db
.db_nextaddr
= addr
+ insz
;
164 } else if (ret
!= 0) {
167 (void) snprintf(buf
, sizeof (buf
),
168 "*** invalid opcode ***");
171 * On architectures with variable sized instructions
172 * we have no way to figure out where the next
173 * instruction starts if we encounter an invalid
174 * instruction. Instead we print the rest of the
175 * instruction stream as hex until we reach the
176 * next valid symbol in the section.
178 if ((next
= dis_tgt_next_symbol(tgt
, addr
)) == 0) {
179 db
.db_nextaddr
= db
.db_addr
+ db
.db_size
;
181 if (next
> db
.db_size
)
182 db
.db_nextaddr
= db
.db_addr
+
185 db
.db_nextaddr
= addr
+ next
;
190 * Print out the line as:
192 * address: bytes text
194 * If there are more than 6 bytes in any given instruction,
195 * spread the bytes across two lines. We try to get symbolic
196 * information for the address, but if that fails we print out
197 * the numeric address instead.
199 * We try to keep the address portion of the text aligned at
200 * MINSYMWIDTH characters. If we are disassembling a function
201 * with a long name, this can be annoying. So we pick a width
202 * based on the maximum width that the current symbol can be.
203 * This at least produces text aligned within each function.
205 last_symbol
= symbol
;
206 symbol
= dis_tgt_lookup(tgt
, addr
, &symoffset
, 1, &symsize
,
208 if (symbol
== NULL
) {
209 symbol
= dis_find_section(tgt
, addr
, &symoffset
);
213 if (symbol
!= last_symbol
)
214 getsymname(addr
, symbol
, symsize
, symbuf
,
217 symwidth
= MAX(symwidth
, strlen(symbuf
));
218 getsymname(addr
, symbol
, symoffset
, symbuf
, sizeof (symbuf
));
221 * If we've crossed a new function boundary, print out the
222 * function name on a blank line.
224 if (!g_quiet
&& symoffset
== 0 && symbol
!= NULL
&& isfunc
)
225 (void) printf("%s()\n", symbol
);
227 (void) printf(" %s:%*s ", symbuf
,
228 (int)(symwidth
- strlen(symbuf
)), "");
231 for (i
= 0; i
< MIN(bytesperline
, (db
.db_nextaddr
- addr
));
233 int byte
= *((uchar_t
*)data
+ (addr
- db
.db_addr
) + i
);
234 if (g_flags
& DIS_OCTAL
)
235 (void) printf("%03o ", byte
);
237 (void) printf("%02x ", byte
);
240 /* trailing spaces for missing bytes */
241 for (; i
< bytesperline
; i
++) {
242 if (g_flags
& DIS_OCTAL
)
248 /* contents of disassembly */
249 (void) printf(" %s", buf
);
251 /* excess bytes that spill over onto subsequent lines */
252 for (; i
< db
.db_nextaddr
- addr
; i
++) {
253 int byte
= *((uchar_t
*)data
+ (addr
- db
.db_addr
) + i
);
254 if (i
% bytesperline
== 0)
255 (void) printf("\n %*s ", symwidth
, "");
256 if (g_flags
& DIS_OCTAL
)
257 (void) printf("%03o ", byte
);
259 (void) printf("%02x ", byte
);
264 addr
= db
.db_nextaddr
;
269 * libdisasm wrapper around symbol lookup. Invoke the target-specific lookup
270 * function, and convert the result using getsymname().
273 do_lookup(void *data
, uint64_t addr
, char *buf
, size_t buflen
, uint64_t *start
,
276 dis_buffer_t
*db
= data
;
282 * If NULL symbol is returned, getsymname takes care of
283 * printing appropriate address in buf instead of symbol.
285 symbol
= dis_tgt_lookup(db
->db_tgt
, addr
, &offset
, 0, &size
, NULL
);
288 getsymname(addr
, symbol
, offset
, buf
, buflen
);
291 *start
= addr
- offset
;
302 * libdisasm wrapper around target reading. libdisasm will always read data
303 * in order, so update our current offset within the buffer appropriately.
304 * We only support reading from within the current object; libdisasm should
305 * never ask us to do otherwise.
308 do_read(void *data
, uint64_t addr
, void *buf
, size_t len
)
310 dis_buffer_t
*db
= data
;
313 if (addr
< db
->db_addr
|| addr
>= db
->db_addr
+ db
->db_size
)
316 offset
= addr
- db
->db_addr
;
317 len
= MIN(len
, db
->db_size
- offset
);
319 (void) memcpy(buf
, (char *)db
->db_data
+ offset
, len
);
321 db
->db_nextaddr
= addr
+ len
;
327 * Routine to dump raw data in a human-readable format. Used by the -d and -D
328 * options. We model our output after the xxd(1) program, which gives nicely
329 * formatted output, along with an ASCII translation of the result.
332 dump_data(uint64_t addr
, void *data
, size_t datalen
)
334 uintptr_t curaddr
= addr
& (~0xf);
335 uint8_t *bytes
= data
;
340 * Determine if the address given to us fits in 32-bit range, in which
341 * case use a 4-byte width.
343 if (((addr
+ datalen
) & 0xffffffff00000000ULL
) == 0ULL)
348 while (curaddr
< addr
+ datalen
) {
350 * Display leading address
352 (void) printf("%0*"PRIxPTR
": ", width
, curaddr
);
355 * Print out data in two-byte chunks. If the current address
356 * is before the starting address or after the end of the
357 * section, print spaces.
359 for (i
= 0; i
< 16; i
++) {
360 if (curaddr
+ i
< addr
||curaddr
+ i
>= addr
+ datalen
)
363 (void) printf("%02x",
364 bytes
[curaddr
+ i
- addr
]);
373 * Print out the ASCII representation
375 for (i
= 0; i
< 16; i
++) {
376 if (curaddr
+ i
< addr
||
377 curaddr
+ i
>= addr
+ datalen
) {
380 uint8_t byte
= bytes
[curaddr
+ i
- addr
];
382 (void) printf("%c", byte
);
395 * Disassemble a section implicitly specified as part of a file. This function
396 * is called for all sections when no other flags are specified. We ignore any
397 * data sections, and print out only those sections containing text.
400 dis_text_section(dis_tgt_t
*tgt
, dis_scn_t
*scn
, void *data
)
402 dis_handle_t
*dhp
= data
;
404 /* ignore data sections */
405 if (!dis_section_istext(scn
))
409 (void) printf("\nsection %s\n", dis_section_name(scn
));
411 dis_data(tgt
, dhp
, dis_section_addr(scn
), dis_section_data(scn
),
412 dis_section_size(scn
));
416 * Structure passed to dis_named_{section,function} which keeps track of both
417 * the target and the libdisasm handle.
419 typedef struct callback_arg
{
421 dis_handle_t
*ca_handle
;
425 * Disassemble a section explicitly named with -s, -d, or -D. The 'type'
426 * argument contains the type of argument given. Pass the data onto the
427 * appropriate helper routine.
430 dis_named_section(dis_scn_t
*scn
, int type
, void *data
)
432 callback_arg_t
*ca
= data
;
435 (void) printf("\nsection %s\n", dis_section_name(scn
));
438 case DIS_DATA_RELATIVE
:
439 dump_data(0, dis_section_data(scn
), dis_section_size(scn
));
441 case DIS_DATA_ABSOLUTE
:
442 dump_data(dis_section_addr(scn
), dis_section_data(scn
),
443 dis_section_size(scn
));
446 dis_data(ca
->ca_tgt
, ca
->ca_handle
, dis_section_addr(scn
),
447 dis_section_data(scn
), dis_section_size(scn
));
453 * Disassemble a function explicitly specified with '-F'. The 'type' argument
458 dis_named_function(dis_func_t
*func
, int type
, void *data
)
460 callback_arg_t
*ca
= data
;
462 dis_data(ca
->ca_tgt
, ca
->ca_handle
, dis_function_addr(func
),
463 dis_function_data(func
), dis_function_size(func
));
467 * Disassemble a complete file. First, we determine the type of the file based
468 * on the ELF machine type, and instantiate a version of the disassembler
469 * appropriate for the file. We then resolve any named sections or functions
470 * against the file, and iterate over the results (or all sections if no flags
474 dis_file(const char *filename
)
476 dis_tgt_t
*tgt
, *current
;
477 dis_scnlist_t
*sections
;
478 dis_funclist_t
*functions
;
483 * First, initialize the target
485 if ((tgt
= dis_tgt_create(filename
)) == NULL
)
489 (void) printf("disassembly for %s\n\n", filename
);
492 * A given file may contain multiple targets (if it is an archive, for
493 * example). We iterate over all possible targets if this is the case.
495 for (current
= tgt
; current
!= NULL
; current
= dis_tgt_next(current
)) {
496 dis_tgt_ehdr(current
, &ehdr
);
499 * Eventually, this should probably live within libdisasm, and
500 * we should be able to disassemble targets from different
501 * architectures. For now, we only support objects as the
502 * native machine type.
504 switch (ehdr
.e_machine
) {
506 if (ehdr
.e_ident
[EI_CLASS
] != ELFCLASS32
||
507 ehdr
.e_ident
[EI_DATA
] != ELFDATA2MSB
) {
508 warn("invalid E_IDENT field for SPARC object");
511 g_flags
|= DIS_SPARC_V8
;
516 uint64_t flags
= ehdr
.e_flags
& EF_SPARC_32PLUS_MASK
;
518 if (ehdr
.e_ident
[EI_CLASS
] != ELFCLASS32
||
519 ehdr
.e_ident
[EI_DATA
] != ELFDATA2MSB
) {
520 warn("invalid E_IDENT field for SPARC object");
525 (flags
& (EF_SPARC_32PLUS
| EF_SPARC_SUN_US1
|
526 EF_SPARC_SUN_US3
)) != EF_SPARC_32PLUS
)
527 g_flags
|= DIS_SPARC_V9
| DIS_SPARC_V9_SGI
;
529 g_flags
|= DIS_SPARC_V9
;
534 if (ehdr
.e_ident
[EI_CLASS
] != ELFCLASS64
||
535 ehdr
.e_ident
[EI_DATA
] != ELFDATA2MSB
) {
536 warn("invalid E_IDENT field for SPARC object");
540 g_flags
|= DIS_SPARC_V9
| DIS_SPARC_V9_SGI
;
544 g_flags
|= DIS_X86
| DIS_SIZE_32
;
548 g_flags
|= DIS_X86
| DIS_SIZE_64
;
552 g_flags
|= DIS_S3X0
| DIS_SIZE_24
;
554 if (ehdr
.e_ident
[EI_CLASS
] != ELFCLASS32
||
555 ehdr
.e_ident
[EI_DATA
] != ELFDATA2MSB
) {
556 warn("invalid E_IDENT field for S370 object");
563 * Both 390 and z/Architecture use EM_S390, the only
564 * differences is the class: ELFCLASS32 for plain
565 * old s390 and ELFCLASS64 for z/Architecture (aka.
568 if (ehdr
.e_ident
[EI_CLASS
] == ELFCLASS32
) {
569 g_flags
|= DIS_S3X0
| DIS_SIZE_32
;
570 } else if (ehdr
.e_ident
[EI_CLASS
] == ELFCLASS64
) {
571 g_flags
|= DIS_S3X0
| DIS_SIZE_64
;
573 warn("invalid E_IDENT field for S390 object");
577 if (ehdr
.e_ident
[EI_DATA
] != ELFDATA2MSB
) {
578 warn("invalid E_IDENT field for S390 object");
585 * RISC-V is defined to be litle endian. The current ISA
586 * makes it clear that the 64-bit instructions can
587 * co-exist with the 32-bit ones and therefore we don't
588 * need a separate elf class at this time.
590 if (ehdr
.e_ident
[EI_DATA
] != ELFDATA2LSB
) {
591 warn("invalid EI_DATA field for RISC-V object");
595 if (ehdr
.e_ident
[EI_CLASS
] == ELFCLASS32
) {
596 g_flags
|= DIS_RISCV
| DIS_SIZE_32
;
597 } else if (ehdr
.e_ident
[EI_CLASS
] == ELFCLASS64
) {
598 g_flags
|= DIS_RISCV
| DIS_SIZE_64
;
600 warn("invalid EI_CLASS field for RISC-V "
607 die("%s: unsupported ELF machine 0x%x", filename
,
612 * If ET_REL (.o), printing immediate symbols is likely to
613 * result in garbage, as symbol lookups on unrelocated
614 * immediates find false and useless matches.
617 if (ehdr
.e_type
== ET_REL
)
618 g_flags
|= DIS_NOIMMSYM
;
620 if (!g_quiet
&& dis_tgt_member(current
) != NULL
)
621 (void) printf("\narchive member %s\n",
622 dis_tgt_member(current
));
625 * Instantiate a libdisasm handle based on the file type.
627 if ((dhp
= dis_handle_create(g_flags
, current
, do_lookup
,
629 die("%s: failed to initialize disassembler: %s",
630 filename
, dis_strerror(dis_errno()));
634 * With no arguments, iterate over all sections and
635 * disassemble only those that contain text.
637 dis_tgt_section_iter(current
, dis_text_section
, dhp
);
645 * If sections or functions were explicitly specified,
646 * resolve those names against the object, and iterate
647 * over just the resulting data.
649 sections
= dis_namelist_resolve_sections(g_seclist
,
651 functions
= dis_namelist_resolve_functions(g_funclist
,
654 dis_scnlist_iter(sections
, dis_named_section
, &ca
);
655 dis_funclist_iter(functions
, dis_named_function
, &ca
);
657 dis_scnlist_destroy(sections
);
658 dis_funclist_destroy(functions
);
661 dis_handle_destroy(dhp
);
664 dis_tgt_destroy(tgt
);
670 (void) fprintf(stderr
, "usage: dis [-Cnoq] [-d sec] [-D sec] "
671 "[-F function] [-t sec] file ...\n");
676 main(int argc
, char **argv
)
680 g_funclist
= dis_namelist_create();
681 g_seclist
= dis_namelist_create();
683 while ((optchar
= getopt(argc
, argv
, "Cd:D:F:ot:qn")) != -1) {
689 dis_namelist_add(g_seclist
, optarg
, DIS_DATA_RELATIVE
);
692 dis_namelist_add(g_seclist
, optarg
, DIS_DATA_ABSOLUTE
);
695 dis_namelist_add(g_funclist
, optarg
, 0);
701 g_flags
|= DIS_OCTAL
;
707 dis_namelist_add(g_seclist
, optarg
, DIS_TEXT
);
719 warn("no objects specified");
723 if (dis_namelist_empty(g_funclist
) && dis_namelist_empty(g_seclist
))
726 for (i
= 0; i
< argc
; i
++)
729 dis_namelist_destroy(g_funclist
);
730 dis_namelist_destroy(g_seclist
);