4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
26 * Copyright 2011 Jason King. All rights reserved.
27 * Copyright 2012 Joshua M. Clulow <josh@sysmgr.org>
28 * Copyright 2015 Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
36 #include <sys/sysmacros.h>
37 #include <sys/elf_SPARC.h>
39 #include <libdisasm.h>
41 #include "dis_target.h"
45 int g_demangle
; /* Demangle C++ names */
46 int g_quiet
; /* Quiet mode */
47 int g_numeric
; /* Numeric mode */
48 int g_flags
; /* libdisasm language flags */
49 int g_doall
; /* true if no functions or sections were given */
51 dis_namelist_t
*g_funclist
; /* list of functions to disassemble, if any */
52 dis_namelist_t
*g_seclist
; /* list of sections to disassemble, if any */
55 * Section options for -d, -D, and -s
57 #define DIS_DATA_RELATIVE 1
58 #define DIS_DATA_ABSOLUTE 2
62 * libdisasm callback data. Keeps track of current data (function or section)
63 * and offset within that data.
65 typedef struct dis_buffer
{
66 dis_tgt_t
*db_tgt
; /* current dis target */
67 void *db_data
; /* function or section data */
68 uint64_t db_addr
; /* address of function start */
69 size_t db_size
; /* size of data */
70 uint64_t db_nextaddr
; /* next address to be read */
73 #define MINSYMWIDTH 22 /* Minimum width of symbol portion of line */
76 * Given a symbol+offset as returned by dis_tgt_lookup(), print an appropriately
77 * formatted symbol, based on the offset and current setttings.
80 getsymname(uint64_t addr
, const char *symbol
, off_t offset
, char *buf
,
83 if (symbol
== NULL
|| g_numeric
) {
84 if (g_flags
& DIS_OCTAL
)
85 (void) snprintf(buf
, buflen
, "0%llo", addr
);
87 (void) snprintf(buf
, buflen
, "0x%llx", addr
);
90 symbol
= dis_demangle(symbol
);
93 (void) snprintf(buf
, buflen
, "%s", symbol
);
94 else if (g_flags
& DIS_OCTAL
)
95 (void) snprintf(buf
, buflen
, "%s+0%o", symbol
, offset
);
97 (void) snprintf(buf
, buflen
, "%s+0x%x", symbol
, offset
);
102 * Determine if we are on an architecture with fixed-size instructions,
103 * and if so, what size they are.
106 insn_size(dis_handle_t
*dhp
)
108 int min
= dis_min_instrlen(dhp
);
109 int max
= dis_max_instrlen(dhp
);
118 * The main disassembly routine. Given a fixed-sized buffer and starting
119 * address, disassemble the data using the supplied target and libdisasm handle.
122 dis_data(dis_tgt_t
*tgt
, dis_handle_t
*dhp
, uint64_t addr
, void *data
,
125 dis_buffer_t db
= { 0 };
127 char symbuf
[BUFSIZE
];
129 const char *last_symbol
;
137 int insz
= insn_size(dhp
);
142 db
.db_size
= datalen
;
144 dis_set_data(dhp
, &db
);
146 if ((bytesperline
= dis_max_instrlen(dhp
)) > 6)
151 while (addr
< db
.db_addr
+ db
.db_size
) {
153 ret
= dis_disassemble(dhp
, addr
, buf
, BUFSIZE
);
154 if (ret
!= 0 && insz
> 0) {
156 * Since we know instructions are fixed size, we
157 * always know the address of the next instruction
159 (void) snprintf(buf
, sizeof (buf
),
160 "*** invalid opcode ***");
161 db
.db_nextaddr
= addr
+ insz
;
163 } else if (ret
!= 0) {
166 (void) snprintf(buf
, sizeof (buf
),
167 "*** invalid opcode ***");
170 * On architectures with variable sized instructions
171 * we have no way to figure out where the next
172 * instruction starts if we encounter an invalid
173 * instruction. Instead we print the rest of the
174 * instruction stream as hex until we reach the
175 * next valid symbol in the section.
177 if ((next
= dis_tgt_next_symbol(tgt
, addr
)) == 0) {
178 db
.db_nextaddr
= db
.db_addr
+ db
.db_size
;
180 if (next
> db
.db_size
)
181 db
.db_nextaddr
= db
.db_addr
+
184 db
.db_nextaddr
= addr
+ next
;
189 * Print out the line as:
191 * address: bytes text
193 * If there are more than 6 bytes in any given instruction,
194 * spread the bytes across two lines. We try to get symbolic
195 * information for the address, but if that fails we print out
196 * the numeric address instead.
198 * We try to keep the address portion of the text aligned at
199 * MINSYMWIDTH characters. If we are disassembling a function
200 * with a long name, this can be annoying. So we pick a width
201 * based on the maximum width that the current symbol can be.
202 * This at least produces text aligned within each function.
204 last_symbol
= symbol
;
205 symbol
= dis_tgt_lookup(tgt
, addr
, &symoffset
, 1, &symsize
,
207 if (symbol
== NULL
) {
208 symbol
= dis_find_section(tgt
, addr
, &symoffset
);
212 if (symbol
!= last_symbol
)
213 getsymname(addr
, symbol
, symsize
, symbuf
,
216 symwidth
= MAX(symwidth
, strlen(symbuf
));
217 getsymname(addr
, symbol
, symoffset
, symbuf
, sizeof (symbuf
));
220 * If we've crossed a new function boundary, print out the
221 * function name on a blank line.
223 if (!g_quiet
&& symoffset
== 0 && symbol
!= NULL
&& isfunc
)
224 (void) printf("%s()\n", symbol
);
226 (void) printf(" %s:%*s ", symbuf
,
227 symwidth
- strlen(symbuf
), "");
230 for (i
= 0; i
< MIN(bytesperline
, (db
.db_nextaddr
- addr
));
232 int byte
= *((uchar_t
*)data
+ (addr
- db
.db_addr
) + i
);
233 if (g_flags
& DIS_OCTAL
)
234 (void) printf("%03o ", byte
);
236 (void) printf("%02x ", byte
);
239 /* trailing spaces for missing bytes */
240 for (; i
< bytesperline
; i
++) {
241 if (g_flags
& DIS_OCTAL
)
247 /* contents of disassembly */
248 (void) printf(" %s", buf
);
250 /* excess bytes that spill over onto subsequent lines */
251 for (; i
< db
.db_nextaddr
- addr
; i
++) {
252 int byte
= *((uchar_t
*)data
+ (addr
- db
.db_addr
) + i
);
253 if (i
% bytesperline
== 0)
254 (void) printf("\n %*s ", symwidth
, "");
255 if (g_flags
& DIS_OCTAL
)
256 (void) printf("%03o ", byte
);
258 (void) printf("%02x ", byte
);
263 addr
= db
.db_nextaddr
;
268 * libdisasm wrapper around symbol lookup. Invoke the target-specific lookup
269 * function, and convert the result using getsymname().
272 do_lookup(void *data
, uint64_t addr
, char *buf
, size_t buflen
, uint64_t *start
,
275 dis_buffer_t
*db
= data
;
281 * If NULL symbol is returned, getsymname takes care of
282 * printing appropriate address in buf instead of symbol.
284 symbol
= dis_tgt_lookup(db
->db_tgt
, addr
, &offset
, 0, &size
, NULL
);
287 getsymname(addr
, symbol
, offset
, buf
, buflen
);
290 *start
= addr
- offset
;
301 * libdisasm wrapper around target reading. libdisasm will always read data
302 * in order, so update our current offset within the buffer appropriately.
303 * We only support reading from within the current object; libdisasm should
304 * never ask us to do otherwise.
307 do_read(void *data
, uint64_t addr
, void *buf
, size_t len
)
309 dis_buffer_t
*db
= data
;
312 if (addr
< db
->db_addr
|| addr
>= db
->db_addr
+ db
->db_size
)
315 offset
= addr
- db
->db_addr
;
316 len
= MIN(len
, db
->db_size
- offset
);
318 (void) memcpy(buf
, (char *)db
->db_data
+ offset
, len
);
320 db
->db_nextaddr
= addr
+ len
;
326 * Routine to dump raw data in a human-readable format. Used by the -d and -D
327 * options. We model our output after the xxd(1) program, which gives nicely
328 * formatted output, along with an ASCII translation of the result.
331 dump_data(uint64_t addr
, void *data
, size_t datalen
)
333 uintptr_t curaddr
= addr
& (~0xf);
334 uint8_t *bytes
= data
;
339 * Determine if the address given to us fits in 32-bit range, in which
340 * case use a 4-byte width.
342 if (((addr
+ datalen
) & 0xffffffff00000000ULL
) == 0ULL)
347 while (curaddr
< addr
+ datalen
) {
349 * Display leading address
351 (void) printf("%0*x: ", width
, curaddr
);
354 * Print out data in two-byte chunks. If the current address
355 * is before the starting address or after the end of the
356 * section, print spaces.
358 for (i
= 0; i
< 16; i
++) {
359 if (curaddr
+ i
< addr
||curaddr
+ i
>= addr
+ datalen
)
362 (void) printf("%02x",
363 bytes
[curaddr
+ i
- addr
]);
372 * Print out the ASCII representation
374 for (i
= 0; i
< 16; i
++) {
375 if (curaddr
+ i
< addr
||
376 curaddr
+ i
>= addr
+ datalen
) {
379 uint8_t byte
= bytes
[curaddr
+ i
- addr
];
381 (void) printf("%c", byte
);
394 * Disassemble a section implicitly specified as part of a file. This function
395 * is called for all sections when no other flags are specified. We ignore any
396 * data sections, and print out only those sections containing text.
399 dis_text_section(dis_tgt_t
*tgt
, dis_scn_t
*scn
, void *data
)
401 dis_handle_t
*dhp
= data
;
403 /* ignore data sections */
404 if (!dis_section_istext(scn
))
408 (void) printf("\nsection %s\n", dis_section_name(scn
));
410 dis_data(tgt
, dhp
, dis_section_addr(scn
), dis_section_data(scn
),
411 dis_section_size(scn
));
415 * Structure passed to dis_named_{section,function} which keeps track of both
416 * the target and the libdisasm handle.
418 typedef struct callback_arg
{
420 dis_handle_t
*ca_handle
;
424 * Disassemble a section explicitly named with -s, -d, or -D. The 'type'
425 * argument contains the type of argument given. Pass the data onto the
426 * appropriate helper routine.
429 dis_named_section(dis_scn_t
*scn
, int type
, void *data
)
431 callback_arg_t
*ca
= data
;
434 (void) printf("\nsection %s\n", dis_section_name(scn
));
437 case DIS_DATA_RELATIVE
:
438 dump_data(0, dis_section_data(scn
), dis_section_size(scn
));
440 case DIS_DATA_ABSOLUTE
:
441 dump_data(dis_section_addr(scn
), dis_section_data(scn
),
442 dis_section_size(scn
));
445 dis_data(ca
->ca_tgt
, ca
->ca_handle
, dis_section_addr(scn
),
446 dis_section_data(scn
), dis_section_size(scn
));
452 * Disassemble a function explicitly specified with '-F'. The 'type' argument
457 dis_named_function(dis_func_t
*func
, int type
, void *data
)
459 callback_arg_t
*ca
= data
;
461 dis_data(ca
->ca_tgt
, ca
->ca_handle
, dis_function_addr(func
),
462 dis_function_data(func
), dis_function_size(func
));
466 * Disassemble a complete file. First, we determine the type of the file based
467 * on the ELF machine type, and instantiate a version of the disassembler
468 * appropriate for the file. We then resolve any named sections or functions
469 * against the file, and iterate over the results (or all sections if no flags
473 dis_file(const char *filename
)
475 dis_tgt_t
*tgt
, *current
;
476 dis_scnlist_t
*sections
;
477 dis_funclist_t
*functions
;
482 * First, initialize the target
484 if ((tgt
= dis_tgt_create(filename
)) == NULL
)
488 (void) printf("disassembly for %s\n\n", filename
);
491 * A given file may contain multiple targets (if it is an archive, for
492 * example). We iterate over all possible targets if this is the case.
494 for (current
= tgt
; current
!= NULL
; current
= dis_tgt_next(current
)) {
495 dis_tgt_ehdr(current
, &ehdr
);
498 * Eventually, this should probably live within libdisasm, and
499 * we should be able to disassemble targets from different
500 * architectures. For now, we only support objects as the
501 * native machine type.
503 switch (ehdr
.e_machine
) {
505 if (ehdr
.e_ident
[EI_CLASS
] != ELFCLASS32
||
506 ehdr
.e_ident
[EI_DATA
] != ELFDATA2MSB
) {
507 warn("invalid E_IDENT field for SPARC object");
510 g_flags
|= DIS_SPARC_V8
;
515 uint64_t flags
= ehdr
.e_flags
& EF_SPARC_32PLUS_MASK
;
517 if (ehdr
.e_ident
[EI_CLASS
] != ELFCLASS32
||
518 ehdr
.e_ident
[EI_DATA
] != ELFDATA2MSB
) {
519 warn("invalid E_IDENT field for SPARC object");
524 (flags
& (EF_SPARC_32PLUS
| EF_SPARC_SUN_US1
|
525 EF_SPARC_SUN_US3
)) != EF_SPARC_32PLUS
)
526 g_flags
|= DIS_SPARC_V9
| DIS_SPARC_V9_SGI
;
528 g_flags
|= DIS_SPARC_V9
;
533 if (ehdr
.e_ident
[EI_CLASS
] != ELFCLASS64
||
534 ehdr
.e_ident
[EI_DATA
] != ELFDATA2MSB
) {
535 warn("invalid E_IDENT field for SPARC object");
539 g_flags
|= DIS_SPARC_V9
| DIS_SPARC_V9_SGI
;
543 g_flags
|= DIS_X86_SIZE32
;
547 g_flags
|= DIS_X86_SIZE64
;
553 if (ehdr
.e_ident
[EI_CLASS
] != ELFCLASS32
||
554 ehdr
.e_ident
[EI_DATA
] != ELFDATA2MSB
) {
555 warn("invalid E_IDENT field for S370 object");
562 * Both 390 and z/Architecture use EM_S390, the only
563 * differences is the class: ELFCLASS32 for plain
564 * old s390 and ELFCLASS64 for z/Architecture (aka.
567 if (ehdr
.e_ident
[EI_CLASS
] == ELFCLASS32
) {
568 g_flags
|= DIS_S390_31
;
569 } else if (ehdr
.e_ident
[EI_CLASS
] == ELFCLASS64
) {
570 g_flags
|= DIS_S390_64
;
572 warn("invalid E_IDENT field for S390 object");
576 if (ehdr
.e_ident
[EI_DATA
] != ELFDATA2MSB
) {
577 warn("invalid E_IDENT field for S390 object");
583 die("%s: unsupported ELF machine 0x%x", filename
,
588 * If ET_REL (.o), printing immediate symbols is likely to
589 * result in garbage, as symbol lookups on unrelocated
590 * immediates find false and useless matches.
593 if (ehdr
.e_type
== ET_REL
)
594 g_flags
|= DIS_NOIMMSYM
;
596 if (!g_quiet
&& dis_tgt_member(current
) != NULL
)
597 (void) printf("\narchive member %s\n",
598 dis_tgt_member(current
));
601 * Instantiate a libdisasm handle based on the file type.
603 if ((dhp
= dis_handle_create(g_flags
, current
, do_lookup
,
605 die("%s: failed to initialize disassembler: %s",
606 filename
, dis_strerror(dis_errno()));
610 * With no arguments, iterate over all sections and
611 * disassemble only those that contain text.
613 dis_tgt_section_iter(current
, dis_text_section
, dhp
);
621 * If sections or functions were explicitly specified,
622 * resolve those names against the object, and iterate
623 * over just the resulting data.
625 sections
= dis_namelist_resolve_sections(g_seclist
,
627 functions
= dis_namelist_resolve_functions(g_funclist
,
630 dis_scnlist_iter(sections
, dis_named_section
, &ca
);
631 dis_funclist_iter(functions
, dis_named_function
, &ca
);
633 dis_scnlist_destroy(sections
);
634 dis_funclist_destroy(functions
);
637 dis_handle_destroy(dhp
);
640 dis_tgt_destroy(tgt
);
646 (void) fprintf(stderr
, "usage: dis [-CVoqn] [-d sec] \n");
647 (void) fprintf(stderr
, "\t[-D sec] [-F function] [-t sec] file ..\n");
651 typedef struct lib_node
{
653 struct lib_node
*next
;
657 main(int argc
, char **argv
)
661 lib_node_t
*libs
= NULL
;
663 g_funclist
= dis_namelist_create();
664 g_seclist
= dis_namelist_create();
666 while ((optchar
= getopt(argc
, argv
, "Cd:D:F:l:Lot:Vqn")) != -1) {
672 dis_namelist_add(g_seclist
, optarg
, DIS_DATA_RELATIVE
);
675 dis_namelist_add(g_seclist
, optarg
, DIS_DATA_ABSOLUTE
);
678 dis_namelist_add(g_funclist
, optarg
, 0);
682 * The '-l foo' option historically would attempt to
683 * disassemble '$LIBDIR/libfoo.a'. The $LIBDIR
684 * environment variable has never been supported or
685 * documented for our linker. However, until this
686 * option is formally EOLed, we have to support it.
692 if ((dir
= getenv("LIBDIR")) == NULL
||
695 node
= safe_malloc(sizeof (lib_node_t
));
696 len
= strlen(optarg
) + strlen(dir
) + sizeof ("/lib.a");
697 node
->path
= safe_malloc(len
);
699 (void) snprintf(node
->path
, len
, "%s/lib%s.a", dir
,
707 * The '-L' option historically would attempt to read
708 * the .debug section of the target to determine source
709 * line information in order to annotate the output.
710 * No compiler has emitted these sections in many years,
711 * and the option has never done what it purported to
712 * do. We silently consume the option for
720 g_flags
|= DIS_OCTAL
;
726 dis_namelist_add(g_seclist
, optarg
, DIS_TEXT
);
729 (void) printf("Solaris disassembler version 1.0\n");
740 if (argc
== 0 && libs
== NULL
) {
741 warn("no objects specified");
745 if (dis_namelist_empty(g_funclist
) && dis_namelist_empty(g_seclist
))
749 * See comment for 'l' option, above.
751 while (libs
!= NULL
) {
752 lib_node_t
*node
= libs
->next
;
754 dis_file(libs
->path
);
760 for (i
= 0; i
< argc
; i
++)
763 dis_namelist_destroy(g_funclist
);
764 dis_namelist_destroy(g_seclist
);