libuutil: move under bmake
[unleashed.git] / usr / src / lib / libctf / common / ctf_dwarf.c
blob9cdfc4194a42d6193882f5ae3a889814b44b3142
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 * Copyright 2012 Jason King. All rights reserved.
27 * Use is subject to license terms.
31 * Copyright 2015 Joyent, Inc.
35 * CTF DWARF conversion theory.
37 * DWARF data contains a series of compilation units. Each compilation unit
38 * generally refers to an object file or what once was, in the case of linked
39 * binaries and shared objects. Each compilation unit has a series of what DWARF
40 * calls a DIE (Debugging Information Entry). The set of entries that we care
41 * about have type information stored in a series of attributes. Each DIE also
42 * has a tag that identifies the kind of attributes that it has.
44 * A given DIE may itself have children. For example, a DIE that represents a
45 * structure has children which represent members. Whenever we encounter a DIE
46 * that has children or other values or types associated with it, we recursively
47 * process those children first so that way we can then refer to the generated
48 * CTF type id while processing its parent. This reduces the amount of unknowns
49 * and fixups that we need. It also ensures that we don't accidentally add types
50 * that an overzealous compiler might add to the DWARF data but aren't used by
51 * anything in the system.
53 * Once we do a conversion, we store a mapping in an AVL tree that goes from the
54 * DWARF's die offset, which is relative to the given compilation unit), to a
55 * ctf_id_t.
57 * Unfortunately, some compilers actually will emit duplicate entries for a
58 * given type that look similar, but aren't quite. To that end, we go through
59 * and do a variant on a merge once we're done processing a single compilation
60 * unit which deduplicates all of the types that are in the unit.
62 * Finally, if we encounter an object that has multiple compilation units, then
63 * we'll convert all of the compilation units separately and then do a merge, so
64 * that way we can result in one single ctf_file_t that represents everything
65 * for the object.
67 * Conversion Steps
68 * ----------------
70 * Because a given object we've been given to convert may have multiple
71 * compilation units, we break the work into two halves. The first half
72 * processes each compilation unit (potentially in parallel) and then the second
73 * half optionally merges all of the dies in the first half. First, we'll cover
74 * what's involved in converting a single ctf_die_t's dwarf to CTF. This covers
75 * the work done in ctf_dwarf_convert_one().
77 * An individual ctf_die_t, which represents a compilation unit, is converted to
78 * CTF in a series of multiple passes.
80 * Pass 1: During the first pass we walk all of the dies and if we find a
81 * function, variable, struct, union, enum or typedef, we recursively transform
82 * all of its types. We don't recurse or process everything, because we don't
83 * want to add some of the types that compilers may add which are effectively
84 * unused.
86 * During pass 1, if we encounter any structures or unions we mark them for
87 * fixing up later. This is necessary because we may not be able to determine
88 * the full size of a structure at the beginning of time. This will happen if
89 * the DWARF attribute DW_AT_byte_size is not present for a member. Because of
90 * this possibility we defer adding members to structures or even converting
91 * them during pass 1 and save that for pass 2. Adding all of the base
92 * structures without any of their members helps deal with any circular
93 * dependencies that we might encounter.
95 * Pass 2: This pass is used to do the first half of fixing up structures and
96 * unions. Rather than walk the entire type space again, we actually walk the
97 * list of structures and unions that we marked for later fixing up. Here, we
98 * iterate over every structure and add members to the underlying ctf_file_t,
99 * but not to the structs themselves. One might wonder why we don't, and the
100 * main reason is that libctf requires a ctf_update() be done before adding the
101 * members to structures or unions.
103 * Pass 3: This pass is used to do the second half of fixing up structures and
104 * unions. During this part we always go through and add members to structures
105 * and unions that we added to the container in the previous pass. In addition,
106 * we set the structure and union's actual size, which may have additional
107 * padding added by the compiler, it isn't simply the last offset. DWARF always
108 * guarantees an attribute exists for this. Importantly no ctf_id_t's change
109 * during pass 2.
111 * Pass 4: The next phase is to add CTF entries for all of the symbols and
112 * variables that are present in this die. During pass 1 we added entries to a
113 * map for each variable and function. During this pass, we iterate over the
114 * symbol table and when we encounter a symbol that we have in our lists of
115 * translated information which matches, we then add it to the ctf_file_t.
117 * Pass 5: Here we go and look for any weak symbols and functions and see if
118 * they match anything that we recognize. If so, then we add type information
119 * for them at this point based on the matching type.
121 * Pass 6: This pass is actually a variant on a merge. The traditional merge
122 * process expects there to be no duplicate types. As such, at the end of
123 * conversion, we do a dedup on all of the types in the system. The
124 * deduplication process is described in lib/libctf/common/ctf_merge.c.
126 * Once pass 6 is done, we've finished processing the individual compilation
127 * unit.
129 * The following steps reflect the general process of doing a conversion.
131 * 1) Walk the dwarf section and determine the number of compilation units
132 * 2) Create a ctf_die_t for each compilation unit
133 * 3) Add all ctf_die_t's to a workq
134 * 4) Have the workq process each die with ctf_dwarf_convert_one. This itself
135 * is comprised of several steps, which were already enumerated.
136 * 5) If we have multiple dies, we do a ctf merge of all the dies. The mechanics
137 * of the merge are discussed in lib/libctf/common/ctf_merge.c.
138 * 6) Free everything up and return a ctf_file_t to the user. If we only had a
139 * single compilation unit, then we give that to the user. Otherwise, we
140 * return the merged ctf_file_t.
142 * Threading
143 * ---------
145 * The process has been designed to be amenable to threading. Each compilation
146 * unit has its own type stream, therefore the logical place to divide and
147 * conquer is at the compilation unit. Each ctf_die_t has been built to be able
148 * to be processed independently of the others. It has its own libdwarf handle,
149 * as a given libdwarf handle may only be used by a single thread at a time.
150 * This allows the various ctf_die_t's to be processed in parallel by different
151 * threads.
153 * All of the ctf_die_t's are loaded into a workq which allows for a number of
154 * threads to be specified and used as a thread pool to process all of the
155 * queued work. We set the number of threads to use in the workq equal to the
156 * number of threads that the user has specified.
158 * After all of the compilation units have been drained, we use the same number
159 * of threads when performing a merge of multiple compilation units, if they
160 * exist.
162 * While all of these different parts do support and allow for multiple threads,
163 * it's important that when only a single thread is specified, that it be the
164 * calling thread. This allows the conversion routines to be used in a context
165 * that doesn't allow additional threads, such as rtld.
167 * Common DWARF Mechanics and Notes
168 * --------------------------------
170 * At this time, we really only support DWARFv2, though support for DWARFv4 is
171 * mostly there. There is no intent to support DWARFv3.
173 * Generally types for something are stored in the DW_AT_type attribute. For
174 * example, a function's return type will be stored in the local DW_AT_type
175 * attribute while the arguments will be in child DIEs. There are also various
176 * times when we don't have any DW_AT_type. In that case, the lack of a type
177 * implies, at least for C, that it's C type is void. Because DWARF doesn't emit
178 * one, we have a synthetic void type that we create and manipulate instead and
179 * pass it off to consumers on an as-needed basis. If nothing has a void type,
180 * it will not be emitted.
182 * Architecture Specific Parts
183 * ---------------------------
185 * The CTF tooling encodes various information about the various architectures
186 * in the system. Importantly, the tool assumes that every architecture has a
187 * data model where long and pointer are the same size. This is currently the
188 * case, as the two data models illumos supports are ILP32 and LP64.
190 * In addition, we encode the mapping of various floating point sizes to various
191 * types for each architecture. If a new architecture is being added, it should
192 * be added to the list. The general design of the ctf conversion tools is to be
193 * architecture independent. eg. any of the tools here should be able to convert
194 * any architecture's DWARF into ctf; however, this has not been rigorously
195 * tested and more importantly, the ctf routines don't currently write out the
196 * data in an endian-aware form, they only use that of the currently running
197 * library.
200 #include <libctf_impl.h>
201 #include <sys/avl.h>
202 #include <sys/debug.h>
203 #include <gelf.h>
204 #include <libdwarf.h>
205 #include <dwarf.h>
206 #include <libgen.h>
207 #include <workq.h>
208 #include <errno.h>
210 #define DWARF_VERSION_TWO 2
211 #define DWARF_VARARGS_NAME "..."
214 * Dwarf may refer recursively to other types that we've already processed. To
215 * see if we've already converted them, we look them up in an AVL tree that's
216 * sorted by the DWARF id.
218 typedef struct ctf_dwmap {
219 avl_node_t cdm_avl;
220 Dwarf_Off cdm_off;
221 Dwarf_Die cdm_die;
222 ctf_id_t cdm_id;
223 boolean_t cdm_fix;
224 } ctf_dwmap_t;
226 typedef struct ctf_dwvar {
227 ctf_list_t cdv_list;
228 char *cdv_name;
229 ctf_id_t cdv_type;
230 boolean_t cdv_global;
231 } ctf_dwvar_t;
233 typedef struct ctf_dwfunc {
234 ctf_list_t cdf_list;
235 char *cdf_name;
236 ctf_funcinfo_t cdf_fip;
237 ctf_id_t *cdf_argv;
238 boolean_t cdf_global;
239 } ctf_dwfunc_t;
241 typedef struct ctf_dwbitf {
242 ctf_list_t cdb_list;
243 ctf_id_t cdb_base;
244 uint_t cdb_nbits;
245 ctf_id_t cdb_id;
246 } ctf_dwbitf_t;
249 * The ctf_die_t represents a single top-level DWARF die unit. While generally,
250 * the typical object file hs only a single die, if we're asked to convert
251 * something that's been linked from multiple sources, multiple dies will exist.
253 typedef struct ctf_die {
254 Elf *cd_elf; /* shared libelf handle */
255 char *cd_name; /* basename of the DIE */
256 ctf_merge_t *cd_cmh; /* merge handle */
257 ctf_list_t cd_vars; /* List of variables */
258 ctf_list_t cd_funcs; /* List of functions */
259 ctf_list_t cd_bitfields; /* Bit field members */
260 Dwarf_Debug cd_dwarf; /* shared libdwarf handle */
261 Dwarf_Die cd_cu; /* libdwarf compilation unit */
262 Dwarf_Off cd_cuoff; /* cu's offset */
263 Dwarf_Off cd_maxoff; /* maximum offset */
264 ctf_file_t *cd_ctfp; /* output CTF file */
265 avl_tree_t cd_map; /* map die offsets to CTF types */
266 char *cd_errbuf; /* error message buffer */
267 size_t cd_errlen; /* error message buffer length */
268 size_t cd_ptrsz; /* object's pointer size */
269 boolean_t cd_bigend; /* is it big endian */
270 boolean_t cd_doweaks; /* should we convert weak symbols? */
271 uint_t cd_mach; /* machine type */
272 ctf_id_t cd_voidtid; /* void pointer */
273 ctf_id_t cd_longtid; /* id for a 'long' */
274 } ctf_die_t;
276 static int ctf_dwarf_offset(ctf_die_t *, Dwarf_Die, Dwarf_Off *);
277 static int ctf_dwarf_convert_die(ctf_die_t *, Dwarf_Die);
278 static int ctf_dwarf_convert_type(ctf_die_t *, Dwarf_Die, ctf_id_t *, int);
280 static int ctf_dwarf_function_count(ctf_die_t *, Dwarf_Die, ctf_funcinfo_t *,
281 boolean_t);
282 static int ctf_dwarf_convert_fargs(ctf_die_t *, Dwarf_Die, ctf_funcinfo_t *,
283 ctf_id_t *);
285 typedef int (ctf_dwarf_symtab_f)(ctf_die_t *, const GElf_Sym *, ulong_t,
286 const char *, const char *, void *);
289 * This is a generic way to set a CTF Conversion backend error depending on what
290 * we were doing. Unless it was one of a specific set of errors that don't
291 * indicate a programming / translation bug, eg. ENOMEM, then we transform it
292 * into a CTF backend error and fill in the error buffer.
294 static int
295 ctf_dwarf_error(ctf_die_t *cdp, ctf_file_t *cfp, int err, const char *fmt, ...)
297 va_list ap;
298 int ret;
299 size_t off = 0;
300 ssize_t rem = cdp->cd_errlen;
301 if (cfp != NULL)
302 err = ctf_errno(cfp);
304 if (err == ENOMEM)
305 return (err);
307 ret = snprintf(cdp->cd_errbuf, rem, "die %s: ", cdp->cd_name);
308 if (ret < 0)
309 goto err;
310 off += ret;
311 rem = MAX(rem - ret, 0);
313 va_start(ap, fmt);
314 ret = vsnprintf(cdp->cd_errbuf + off, rem, fmt, ap);
315 va_end(ap);
316 if (ret < 0)
317 goto err;
319 off += ret;
320 rem = MAX(rem - ret, 0);
321 if (fmt[strlen(fmt) - 1] != '\n') {
322 (void) snprintf(cdp->cd_errbuf + off, rem,
323 ": %s\n", ctf_errmsg(err));
325 va_end(ap);
326 return (ECTF_CONVBKERR);
328 err:
329 cdp->cd_errbuf[0] = '\0';
330 return (ECTF_CONVBKERR);
334 * DWARF often ops to put no explicit type to describe a void type. eg. if we
335 * have a reference type whose DW_AT_type member doesn't exist, then we should
336 * instead assume it points to void. Because this isn't represented, we
337 * instead cause it to come into existence.
339 static ctf_id_t
340 ctf_dwarf_void(ctf_die_t *cdp)
342 if (cdp->cd_voidtid == CTF_ERR) {
343 ctf_encoding_t enc = { CTF_INT_SIGNED, 0, 0 };
344 cdp->cd_voidtid = ctf_add_integer(cdp->cd_ctfp, CTF_ADD_ROOT,
345 "void", &enc);
346 if (cdp->cd_voidtid == CTF_ERR) {
347 (void) snprintf(cdp->cd_errbuf, cdp->cd_errlen,
348 "failed to create void type: %s\n",
349 ctf_errmsg(ctf_errno(cdp->cd_ctfp)));
353 return (cdp->cd_voidtid);
357 * There are many different forms that an array index may take. However, we just
358 * always force it to be of a type long no matter what. Therefore we use this to
359 * have a single instance of long across everything.
361 static ctf_id_t
362 ctf_dwarf_long(ctf_die_t *cdp)
364 if (cdp->cd_longtid == CTF_ERR) {
365 ctf_encoding_t enc;
367 enc.cte_format = CTF_INT_SIGNED;
368 enc.cte_offset = 0;
369 /* All illumos systems are LP */
370 enc.cte_bits = cdp->cd_ptrsz * 8;
371 cdp->cd_longtid = ctf_add_integer(cdp->cd_ctfp, CTF_ADD_NONROOT,
372 "long", &enc);
373 if (cdp->cd_longtid == CTF_ERR) {
374 (void) snprintf(cdp->cd_errbuf, cdp->cd_errlen,
375 "failed to create long type: %s\n",
376 ctf_errmsg(ctf_errno(cdp->cd_ctfp)));
381 return (cdp->cd_longtid);
384 static int
385 ctf_dwmap_comp(const void *a, const void *b)
387 const ctf_dwmap_t *ca = a;
388 const ctf_dwmap_t *cb = b;
390 if (ca->cdm_off > cb->cdm_off)
391 return (1);
392 if (ca->cdm_off < cb->cdm_off)
393 return (-1);
394 return (0);
397 static int
398 ctf_dwmap_add(ctf_die_t *cdp, ctf_id_t id, Dwarf_Die die, boolean_t fix)
400 int ret;
401 avl_index_t index;
402 ctf_dwmap_t *dwmap;
403 Dwarf_Off off;
405 VERIFY(id > 0 && id < CTF_MAX_TYPE);
407 if ((ret = ctf_dwarf_offset(cdp, die, &off)) != 0)
408 return (ret);
410 if ((dwmap = ctf_alloc(sizeof (ctf_dwmap_t))) == NULL)
411 return (ENOMEM);
413 dwmap->cdm_die = die;
414 dwmap->cdm_off = off;
415 dwmap->cdm_id = id;
416 dwmap->cdm_fix = fix;
418 ctf_dprintf("dwmap: %p %x->%d\n", dwmap, (uint32_t)off, id);
419 VERIFY(avl_find(&cdp->cd_map, dwmap, &index) == NULL);
420 avl_insert(&cdp->cd_map, dwmap, index);
421 return (0);
424 static int
425 ctf_dwarf_attribute(ctf_die_t *cdp, Dwarf_Die die, Dwarf_Half name,
426 Dwarf_Attribute *attrp)
428 int ret;
429 Dwarf_Error derr;
431 if ((ret = dwarf_attr(die, name, attrp, &derr)) == DW_DLV_OK)
432 return (0);
433 if (ret == DW_DLV_NO_ENTRY) {
434 *attrp = NULL;
435 return (ENOENT);
437 (void) snprintf(cdp->cd_errbuf, cdp->cd_errlen,
438 "failed to get attribute for type: %s\n",
439 dwarf_errmsg(derr));
440 return (ECTF_CONVBKERR);
443 static int
444 ctf_dwarf_ref(ctf_die_t *cdp, Dwarf_Die die, Dwarf_Half name, Dwarf_Off *refp)
446 int ret;
447 Dwarf_Attribute attr;
448 Dwarf_Error derr;
450 if ((ret = ctf_dwarf_attribute(cdp, die, name, &attr)) != 0)
451 return (ret);
453 if (dwarf_formref(attr, refp, &derr) == DW_DLV_OK) {
454 dwarf_dealloc(cdp->cd_dwarf, attr, DW_DLA_ATTR);
455 return (0);
458 (void) snprintf(cdp->cd_errbuf, cdp->cd_errlen,
459 "failed to get unsigned attribute for type: %s\n",
460 dwarf_errmsg(derr));
461 return (ECTF_CONVBKERR);
464 static int
465 ctf_dwarf_refdie(ctf_die_t *cdp, Dwarf_Die die, Dwarf_Half name,
466 Dwarf_Die *diep)
468 int ret;
469 Dwarf_Off off;
470 Dwarf_Error derr;
472 if ((ret = ctf_dwarf_ref(cdp, die, name, &off)) != 0)
473 return (ret);
475 off += cdp->cd_cuoff;
476 if ((ret = dwarf_offdie(cdp->cd_dwarf, off, diep, &derr)) !=
477 DW_DLV_OK) {
478 (void) snprintf(cdp->cd_errbuf, cdp->cd_errlen,
479 "failed to get die from offset %llu: %s\n",
480 off, dwarf_errmsg(derr));
481 return (ECTF_CONVBKERR);
484 return (0);
487 static int
488 ctf_dwarf_signed(ctf_die_t *cdp, Dwarf_Die die, Dwarf_Half name,
489 Dwarf_Signed *valp)
491 int ret;
492 Dwarf_Attribute attr;
493 Dwarf_Error derr;
495 if ((ret = ctf_dwarf_attribute(cdp, die, name, &attr)) != 0)
496 return (ret);
498 if (dwarf_formsdata(attr, valp, &derr) == DW_DLV_OK) {
499 dwarf_dealloc(cdp->cd_dwarf, attr, DW_DLA_ATTR);
500 return (0);
503 (void) snprintf(cdp->cd_errbuf, cdp->cd_errlen,
504 "failed to get unsigned attribute for type: %s\n",
505 dwarf_errmsg(derr));
506 return (ECTF_CONVBKERR);
509 static int
510 ctf_dwarf_unsigned(ctf_die_t *cdp, Dwarf_Die die, Dwarf_Half name,
511 Dwarf_Unsigned *valp)
513 int ret;
514 Dwarf_Attribute attr;
515 Dwarf_Error derr;
517 if ((ret = ctf_dwarf_attribute(cdp, die, name, &attr)) != 0)
518 return (ret);
520 if (dwarf_formudata(attr, valp, &derr) == DW_DLV_OK) {
521 dwarf_dealloc(cdp->cd_dwarf, attr, DW_DLA_ATTR);
522 return (0);
525 (void) snprintf(cdp->cd_errbuf, cdp->cd_errlen,
526 "failed to get unsigned attribute for type: %s\n",
527 dwarf_errmsg(derr));
528 return (ECTF_CONVBKERR);
531 static int
532 ctf_dwarf_boolean(ctf_die_t *cdp, Dwarf_Die die, Dwarf_Half name,
533 Dwarf_Bool *val)
535 int ret;
536 Dwarf_Attribute attr;
537 Dwarf_Error derr;
539 if ((ret = ctf_dwarf_attribute(cdp, die, name, &attr)) != 0)
540 return (ret);
542 if (dwarf_formflag(attr, val, &derr) == DW_DLV_OK) {
543 dwarf_dealloc(cdp->cd_dwarf, attr, DW_DLA_ATTR);
544 return (0);
547 (void) snprintf(cdp->cd_errbuf, cdp->cd_errlen,
548 "failed to get boolean attribute for type: %s\n",
549 dwarf_errmsg(derr));
551 return (ECTF_CONVBKERR);
554 static int
555 ctf_dwarf_string(ctf_die_t *cdp, Dwarf_Die die, Dwarf_Half name, char **strp)
557 int ret;
558 char *s;
559 Dwarf_Attribute attr;
560 Dwarf_Error derr;
562 *strp = NULL;
563 if ((ret = ctf_dwarf_attribute(cdp, die, name, &attr)) != 0)
564 return (ret);
566 if (dwarf_formstring(attr, &s, &derr) == DW_DLV_OK) {
567 if ((*strp = ctf_strdup(s)) == NULL)
568 ret = ENOMEM;
569 else
570 ret = 0;
571 dwarf_dealloc(cdp->cd_dwarf, attr, DW_DLA_ATTR);
572 return (ret);
575 (void) snprintf(cdp->cd_errbuf, cdp->cd_errlen,
576 "failed to get string attribute for type: %s\n",
577 dwarf_errmsg(derr));
578 return (ECTF_CONVBKERR);
581 static int
582 ctf_dwarf_member_location(ctf_die_t *cdp, Dwarf_Die die, Dwarf_Unsigned *valp)
584 int ret;
585 Dwarf_Error derr;
586 Dwarf_Attribute attr;
587 Dwarf_Locdesc *loc;
588 Dwarf_Signed locnum;
590 if ((ret = ctf_dwarf_attribute(cdp, die, DW_AT_data_member_location,
591 &attr)) != 0)
592 return (ret);
594 if (dwarf_loclist(attr, &loc, &locnum, &derr) != DW_DLV_OK) {
595 (void) snprintf(cdp->cd_errbuf, cdp->cd_errlen,
596 "failed to obtain location list for member offset: %s",
597 dwarf_errmsg(derr));
598 dwarf_dealloc(cdp->cd_dwarf, attr, DW_DLA_ATTR);
599 return (ECTF_CONVBKERR);
601 dwarf_dealloc(cdp->cd_dwarf, attr, DW_DLA_ATTR);
603 if (locnum != 1 || loc->ld_s->lr_atom != DW_OP_plus_uconst) {
604 (void) snprintf(cdp->cd_errbuf, cdp->cd_errlen,
605 "failed to parse location structure for member");
606 dwarf_dealloc(cdp->cd_dwarf, loc->ld_s, DW_DLA_LOC_BLOCK);
607 dwarf_dealloc(cdp->cd_dwarf, loc, DW_DLA_LOCDESC);
608 return (ECTF_CONVBKERR);
611 *valp = loc->ld_s->lr_number;
613 dwarf_dealloc(cdp->cd_dwarf, loc->ld_s, DW_DLA_LOC_BLOCK);
614 dwarf_dealloc(cdp->cd_dwarf, loc, DW_DLA_LOCDESC);
615 return (0);
619 static int
620 ctf_dwarf_offset(ctf_die_t *cdp, Dwarf_Die die, Dwarf_Off *offsetp)
622 Dwarf_Error derr;
624 if (dwarf_dieoffset(die, offsetp, &derr) == DW_DLV_OK)
625 return (0);
627 (void) snprintf(cdp->cd_errbuf, cdp->cd_errlen,
628 "failed to get die offset: %s\n",
629 dwarf_errmsg(derr));
630 return (ECTF_CONVBKERR);
633 static int
634 ctf_dwarf_tag(ctf_die_t *cdp, Dwarf_Die die, Dwarf_Half *tagp)
636 Dwarf_Error derr;
638 if (dwarf_tag(die, tagp, &derr) == DW_DLV_OK)
639 return (0);
641 (void) snprintf(cdp->cd_errbuf, cdp->cd_errlen,
642 "failed to get tag type: %s\n",
643 dwarf_errmsg(derr));
644 return (ECTF_CONVBKERR);
647 static int
648 ctf_dwarf_sib(ctf_die_t *cdp, Dwarf_Die base, Dwarf_Die *sibp)
650 Dwarf_Error derr;
651 int ret;
653 *sibp = NULL;
654 ret = dwarf_siblingof(cdp->cd_dwarf, base, sibp, &derr);
655 if (ret == DW_DLV_OK || ret == DW_DLV_NO_ENTRY)
656 return (0);
658 (void) snprintf(cdp->cd_errbuf, cdp->cd_errlen,
659 "failed to sibling from die: %s\n",
660 dwarf_errmsg(derr));
661 return (ECTF_CONVBKERR);
664 static int
665 ctf_dwarf_child(ctf_die_t *cdp, Dwarf_Die base, Dwarf_Die *childp)
667 Dwarf_Error derr;
668 int ret;
670 *childp = NULL;
671 ret = dwarf_child(base, childp, &derr);
672 if (ret == DW_DLV_OK || ret == DW_DLV_NO_ENTRY)
673 return (0);
675 (void) snprintf(cdp->cd_errbuf, cdp->cd_errlen,
676 "failed to child from die: %s\n",
677 dwarf_errmsg(derr));
678 return (ECTF_CONVBKERR);
682 * Compilers disagree on what to do to determine if something has global
683 * visiblity. Traditionally gcc has used DW_AT_external to indicate this while
684 * Studio has used DW_AT_visibility. We check DW_AT_visibility first and then
685 * fall back to DW_AT_external. Lack of DW_AT_external implies that it is not.
687 static int
688 ctf_dwarf_isglobal(ctf_die_t *cdp, Dwarf_Die die, boolean_t *igp)
690 int ret;
691 Dwarf_Signed vis;
692 Dwarf_Bool ext;
694 if ((ret = ctf_dwarf_signed(cdp, die, DW_AT_visibility, &vis)) == 0) {
695 *igp = vis == DW_VIS_exported;
696 return (0);
697 } else if (ret != ENOENT) {
698 return (ret);
701 if ((ret = ctf_dwarf_boolean(cdp, die, DW_AT_external, &ext)) != 0) {
702 if (ret == ENOENT) {
703 *igp = B_FALSE;
704 return (0);
706 return (ret);
708 *igp = ext != 0 ? B_TRUE : B_FALSE;
709 return (0);
712 static int
713 ctf_dwarf_die_elfenc(Elf *elf, ctf_die_t *cdp, char *errbuf, size_t errlen)
715 GElf_Ehdr ehdr;
717 if (gelf_getehdr(elf, &ehdr) == NULL) {
718 (void) snprintf(errbuf, errlen,
719 "failed to get ELF header: %s\n",
720 elf_errmsg(elf_errno()));
721 return (ECTF_CONVBKERR);
724 cdp->cd_mach = ehdr.e_machine;
726 if (ehdr.e_ident[EI_CLASS] == ELFCLASS32) {
727 cdp->cd_ptrsz = 4;
728 VERIFY(ctf_setmodel(cdp->cd_ctfp, CTF_MODEL_ILP32) == 0);
729 } else if (ehdr.e_ident[EI_CLASS] == ELFCLASS64) {
730 cdp->cd_ptrsz = 8;
731 VERIFY(ctf_setmodel(cdp->cd_ctfp, CTF_MODEL_LP64) == 0);
732 } else {
733 (void) snprintf(errbuf, errlen,
734 "unknown ELF class %d", ehdr.e_ident[EI_CLASS]);
735 return (ECTF_CONVBKERR);
738 if (ehdr.e_ident[EI_DATA] == ELFDATA2LSB) {
739 cdp->cd_bigend = B_FALSE;
740 } else if (ehdr.e_ident[EI_DATA] == ELFDATA2MSB) {
741 cdp->cd_bigend = B_TRUE;
742 } else {
743 (void) snprintf(errbuf, errlen,
744 "unknown ELF data encoding: %d", ehdr.e_ident[EI_DATA]);
745 return (ECTF_CONVBKERR);
748 return (0);
751 typedef struct ctf_dwarf_fpent {
752 size_t cdfe_size;
753 uint_t cdfe_enc[3];
754 } ctf_dwarf_fpent_t;
756 typedef struct ctf_dwarf_fpmap {
757 uint_t cdf_mach;
758 ctf_dwarf_fpent_t cdf_ents[4];
759 } ctf_dwarf_fpmap_t;
761 static const ctf_dwarf_fpmap_t ctf_dwarf_fpmaps[] = {
762 { EM_SPARC, {
763 { 4, { CTF_FP_SINGLE, CTF_FP_CPLX, CTF_FP_IMAGRY } },
764 { 8, { CTF_FP_DOUBLE, CTF_FP_DCPLX, CTF_FP_DIMAGRY } },
765 { 16, { CTF_FP_LDOUBLE, CTF_FP_LDCPLX, CTF_FP_LDIMAGRY } },
766 { 0, { 0 } }
767 } },
768 { EM_SPARC32PLUS, {
769 { 4, { CTF_FP_SINGLE, CTF_FP_CPLX, CTF_FP_IMAGRY } },
770 { 8, { CTF_FP_DOUBLE, CTF_FP_DCPLX, CTF_FP_DIMAGRY } },
771 { 16, { CTF_FP_LDOUBLE, CTF_FP_LDCPLX, CTF_FP_LDIMAGRY } },
772 { 0, { 0 } }
773 } },
774 { EM_SPARCV9, {
775 { 4, { CTF_FP_SINGLE, CTF_FP_CPLX, CTF_FP_IMAGRY } },
776 { 8, { CTF_FP_DOUBLE, CTF_FP_DCPLX, CTF_FP_DIMAGRY } },
777 { 16, { CTF_FP_LDOUBLE, CTF_FP_LDCPLX, CTF_FP_LDIMAGRY } },
778 { 0, { 0 } }
779 } },
780 { EM_386, {
781 { 4, { CTF_FP_SINGLE, CTF_FP_CPLX, CTF_FP_IMAGRY } },
782 { 8, { CTF_FP_DOUBLE, CTF_FP_DCPLX, CTF_FP_DIMAGRY } },
783 { 12, { CTF_FP_LDOUBLE, CTF_FP_LDCPLX, CTF_FP_LDIMAGRY } },
784 { 0, { 0 } }
785 } },
786 { EM_X86_64, {
787 { 4, { CTF_FP_SINGLE, CTF_FP_CPLX, CTF_FP_IMAGRY } },
788 { 8, { CTF_FP_DOUBLE, CTF_FP_DCPLX, CTF_FP_DIMAGRY } },
789 { 16, { CTF_FP_LDOUBLE, CTF_FP_LDCPLX, CTF_FP_LDIMAGRY } },
790 { 0, { 0 } }
791 } },
792 { EM_NONE }
795 static int
796 ctf_dwarf_float_base(ctf_die_t *cdp, Dwarf_Signed type, ctf_encoding_t *enc)
798 const ctf_dwarf_fpmap_t *map = &ctf_dwarf_fpmaps[0];
799 const ctf_dwarf_fpent_t *ent;
800 uint_t col = 0, mult = 1;
802 for (map = &ctf_dwarf_fpmaps[0]; map->cdf_mach != EM_NONE; map++) {
803 if (map->cdf_mach == cdp->cd_mach)
804 break;
807 if (map->cdf_mach == EM_NONE) {
808 (void) snprintf(cdp->cd_errbuf, cdp->cd_errlen,
809 "Unsupported machine type: %d\n", cdp->cd_mach);
810 return (ENOTSUP);
813 if (type == DW_ATE_complex_float) {
814 mult = 2;
815 col = 1;
816 } else if (type == DW_ATE_imaginary_float ||
817 type == DW_ATE_SUN_imaginary_float) {
818 col = 2;
821 ent = &map->cdf_ents[0];
822 for (ent = &map->cdf_ents[0]; ent->cdfe_size != 0; ent++) {
823 if (ent->cdfe_size * mult * 8 == enc->cte_bits) {
824 enc->cte_format = ent->cdfe_enc[col];
825 return (0);
829 (void) snprintf(cdp->cd_errbuf, cdp->cd_errlen,
830 "failed to find valid fp mapping for encoding %d, size %d bits\n",
831 type, enc->cte_bits);
832 return (EINVAL);
835 static int
836 ctf_dwarf_dwarf_base(ctf_die_t *cdp, Dwarf_Die die, int *kindp,
837 ctf_encoding_t *enc)
839 int ret;
840 Dwarf_Signed type;
842 if ((ret = ctf_dwarf_signed(cdp, die, DW_AT_encoding, &type)) != 0)
843 return (ret);
845 switch (type) {
846 case DW_ATE_unsigned:
847 case DW_ATE_address:
848 *kindp = CTF_K_INTEGER;
849 enc->cte_format = 0;
850 break;
851 case DW_ATE_unsigned_char:
852 *kindp = CTF_K_INTEGER;
853 enc->cte_format = CTF_INT_CHAR;
854 break;
855 case DW_ATE_signed:
856 *kindp = CTF_K_INTEGER;
857 enc->cte_format = CTF_INT_SIGNED;
858 break;
859 case DW_ATE_signed_char:
860 *kindp = CTF_K_INTEGER;
861 enc->cte_format = CTF_INT_SIGNED | CTF_INT_CHAR;
862 break;
863 case DW_ATE_boolean:
864 *kindp = CTF_K_INTEGER;
865 enc->cte_format = CTF_INT_SIGNED | CTF_INT_BOOL;
866 break;
867 case DW_ATE_float:
868 case DW_ATE_complex_float:
869 case DW_ATE_imaginary_float:
870 case DW_ATE_SUN_imaginary_float:
871 case DW_ATE_SUN_interval_float:
872 *kindp = CTF_K_FLOAT;
873 if ((ret = ctf_dwarf_float_base(cdp, type, enc)) != 0)
874 return (ret);
875 break;
876 default:
877 (void) snprintf(cdp->cd_errbuf, cdp->cd_errlen,
878 "encountered unkown DWARF encoding: %d", type);
879 return (ECTF_CONVBKERR);
882 return (0);
886 * Different compilers (at least GCC and Studio) use different names for types.
887 * This parses the types and attempts to unify them. If this fails, we just fall
888 * back to using the DWARF itself.
890 static int
891 ctf_dwarf_parse_base(const char *name, int *kindp, ctf_encoding_t *enc,
892 char **newnamep)
894 char buf[256];
895 char *base, *c;
896 int nlong = 0, nshort = 0, nchar = 0, nint = 0;
897 int sign = 1;
899 if (strlen(name) + 1 > sizeof (buf))
900 return (EINVAL);
902 (void) strlcpy(buf, name, sizeof (buf));
903 for (c = strtok(buf, " "); c != NULL; c = strtok(NULL, " ")) {
904 if (strcmp(c, "signed") == 0) {
905 sign = 1;
906 } else if (strcmp(c, "unsigned") == 0) {
907 sign = 0;
908 } else if (strcmp(c, "long") == 0) {
909 nlong++;
910 } else if (strcmp(c, "char") == 0) {
911 nchar++;
912 } else if (strcmp(c, "short") == 0) {
913 nshort++;
914 } else if (strcmp(c, "int") == 0) {
915 nint++;
916 } else {
918 * If we don't recognize any of the tokens, we'll tell
919 * the caller to fall back to the dwarf-provided
920 * encoding information.
922 return (EINVAL);
926 if (nchar > 1 || nshort > 1 || nint > 1 || nlong > 2)
927 return (EINVAL);
929 if (nchar > 0) {
930 if (nlong > 0 || nshort > 0 || nint > 0)
931 return (EINVAL);
932 base = "char";
933 } else if (nshort > 0) {
934 if (nlong > 0)
935 return (EINVAL);
936 base = "short";
937 } else if (nlong > 0) {
938 base = "long";
939 } else {
940 base = "int";
943 if (nchar > 0)
944 enc->cte_format = CTF_INT_CHAR;
945 else
946 enc->cte_format = 0;
948 if (sign > 0)
949 enc->cte_format |= CTF_INT_SIGNED;
951 (void) snprintf(buf, sizeof (buf), "%s%s%s",
952 (sign ? "" : "unsigned "),
953 (nlong > 1 ? "long " : ""),
954 base);
956 *newnamep = ctf_strdup(buf);
957 if (*newnamep == NULL)
958 return (ENOMEM);
959 *kindp = CTF_K_INTEGER;
960 return (0);
963 static int
964 ctf_dwarf_create_base(ctf_die_t *cdp, Dwarf_Die die, ctf_id_t *idp, int isroot,
965 Dwarf_Off off)
967 int ret;
968 char *name, *nname;
969 Dwarf_Unsigned sz;
970 int kind;
971 ctf_encoding_t enc;
972 ctf_id_t id;
974 if ((ret = ctf_dwarf_string(cdp, die, DW_AT_name, &name)) != 0)
975 return (ret);
976 if ((ret = ctf_dwarf_unsigned(cdp, die, DW_AT_byte_size, &sz)) != 0) {
977 goto out;
979 ctf_dprintf("Creating base type %s from off %llu, size: %d\n", name,
980 off, sz);
982 bzero(&enc, sizeof (ctf_encoding_t));
983 enc.cte_bits = sz * 8;
984 if ((ret = ctf_dwarf_parse_base(name, &kind, &enc, &nname)) == 0) {
985 ctf_free(name, strlen(name) + 1);
986 name = nname;
987 } else {
988 if (ret != EINVAL)
989 return (ret);
990 ctf_dprintf("falling back to dwarf for base type %s\n", name);
991 if ((ret = ctf_dwarf_dwarf_base(cdp, die, &kind, &enc)) != 0)
992 return (ret);
995 id = ctf_add_encoded(cdp->cd_ctfp, isroot, name, &enc, kind);
996 if (id == CTF_ERR) {
997 ret = ctf_errno(cdp->cd_ctfp);
998 } else {
999 *idp = id;
1000 ret = ctf_dwmap_add(cdp, id, die, B_FALSE);
1002 out:
1003 ctf_free(name, strlen(name) + 1);
1004 return (ret);
1008 * Getting a member's offset is a surprisingly intricate dance. It works as
1009 * follows:
1011 * 1) If we're in DWARFv4, then we either have a DW_AT_data_bit_offset or we
1012 * have a DW_AT_data_member_location. We won't have both. Thus we check first
1013 * for DW_AT_data_bit_offset, and if it exists, we're set.
1015 * Next, if we have a bitfield and we don't ahve a DW_AT_data_bit_offset, then
1016 * we have to grab the data location and use the following dance:
1018 * 2) Gather the set of DW_AT_byte_size, DW_AT_bit_offset, and DW_AT_bit_size.
1019 * Of course, the DW_AT_byte_size may be omitted, even though it isn't always.
1020 * When it's been omitted, we then have to say that the size is that of the
1021 * underlying type, which forces that to be after a ctf_update(). Here, we have
1022 * to do different things based on whether or not we're using big endian or
1023 * little endian to obtain the proper offset.
1025 static int
1026 ctf_dwarf_member_offset(ctf_die_t *cdp, Dwarf_Die die, ctf_id_t mid,
1027 ulong_t *offp)
1029 int ret;
1030 Dwarf_Unsigned loc, bitsz, bytesz;
1031 Dwarf_Signed bitoff;
1032 size_t off;
1033 ssize_t tsz;
1035 if ((ret = ctf_dwarf_unsigned(cdp, die, DW_AT_data_bit_offset,
1036 &loc)) == 0) {
1037 *offp = loc;
1038 return (0);
1039 } else if (ret != ENOENT) {
1040 return (ret);
1043 if ((ret = ctf_dwarf_member_location(cdp, die, &loc)) != 0)
1044 return (ret);
1045 off = loc * 8;
1047 if ((ret = ctf_dwarf_signed(cdp, die, DW_AT_bit_offset,
1048 &bitoff)) != 0) {
1049 if (ret != ENOENT)
1050 return (ret);
1051 *offp = off;
1052 return (0);
1055 /* At this point we have to have DW_AT_bit_size */
1056 if ((ret = ctf_dwarf_unsigned(cdp, die, DW_AT_bit_size, &bitsz)) != 0)
1057 return (ret);
1059 if ((ret = ctf_dwarf_unsigned(cdp, die, DW_AT_byte_size,
1060 &bytesz)) != 0) {
1061 if (ret != ENOENT)
1062 return (ret);
1063 if ((tsz = ctf_type_size(cdp->cd_ctfp, mid)) == CTF_ERR) {
1064 int e = ctf_errno(cdp->cd_ctfp);
1065 (void) snprintf(cdp->cd_errbuf, cdp->cd_errlen,
1066 "failed to get type size: %s", ctf_errmsg(e));
1067 return (ECTF_CONVBKERR);
1069 } else {
1070 tsz = bytesz;
1072 tsz *= 8;
1073 if (cdp->cd_bigend == B_TRUE) {
1074 *offp = off + bitoff;
1075 } else {
1076 *offp = off + tsz - bitoff - bitsz;
1079 return (0);
1083 * We need to determine if the member in question is a bitfield. If it is, then
1084 * we need to go through and create a new type that's based on the actual base
1085 * type, but has a different size. We also rename the type as a result to help
1086 * deal with future collisions.
1088 * Here we need to look and see if we have a DW_AT_bit_size value. If we have a
1089 * bit size member and it does not equal the byte size member, then we need to
1090 * create a bitfield type based on this.
1092 * Note: When we support DWARFv4, there may be a chance that we ned to also
1093 * search for the DW_AT_byte_size if we don't have a DW_AT_bit_size member.
1095 static int
1096 ctf_dwarf_member_bitfield(ctf_die_t *cdp, Dwarf_Die die, ctf_id_t *idp)
1098 int ret;
1099 Dwarf_Unsigned bitsz;
1100 ctf_encoding_t e;
1101 ctf_dwbitf_t *cdb;
1102 ctf_dtdef_t *dtd;
1103 ctf_id_t base = *idp;
1104 int kind;
1106 if ((ret = ctf_dwarf_unsigned(cdp, die, DW_AT_bit_size, &bitsz)) != 0) {
1107 if (ret == ENOENT)
1108 return (0);
1109 return (ret);
1112 ctf_dprintf("Trying to deal with bitfields on %d:%d\n", base, bitsz);
1114 * Given that we now have a bitsize, time to go do something about it.
1115 * We're going to create a new type based on the current one, but first
1116 * we need to find the base type. This means we need to traverse any
1117 * typedef's, consts, and volatiles until we get to what should be
1118 * something of type integer or enumeration.
1120 VERIFY(bitsz < UINT32_MAX);
1121 dtd = ctf_dtd_lookup(cdp->cd_ctfp, base);
1122 VERIFY(dtd != NULL);
1123 kind = CTF_INFO_KIND(dtd->dtd_data.ctt_info);
1124 while (kind == CTF_K_TYPEDEF || kind == CTF_K_CONST ||
1125 kind == CTF_K_VOLATILE) {
1126 dtd = ctf_dtd_lookup(cdp->cd_ctfp, dtd->dtd_data.ctt_type);
1127 VERIFY(dtd != NULL);
1128 kind = CTF_INFO_KIND(dtd->dtd_data.ctt_info);
1130 ctf_dprintf("got kind %d\n", kind);
1131 VERIFY(kind == CTF_K_INTEGER || kind == CTF_K_ENUM);
1134 * As surprising as it may be, it is strictly possible to create a
1135 * bitfield that is based on an enum. Of course, the C standard leaves
1136 * enums sizing as an ABI concern more or less. To that effect, today on
1137 * all illumos platforms the size of an enum is generally that of an
1138 * int as our supported data models and ABIs all agree on that. So what
1139 * we'll do is fake up a CTF enconding here to use. In this case, we'll
1140 * treat it as an unsigned value of whatever size the underlying enum
1141 * currently has (which is in the ctt_size member of its dynamic type
1142 * data).
1144 if (kind == CTF_K_INTEGER) {
1145 e = dtd->dtd_u.dtu_enc;
1146 } else {
1147 bzero(&e, sizeof (ctf_encoding_t));
1148 e.cte_bits = dtd->dtd_data.ctt_size * NBBY;
1151 for (cdb = ctf_list_next(&cdp->cd_bitfields); cdb != NULL;
1152 cdb = ctf_list_next(cdb)) {
1153 if (cdb->cdb_base == base && cdb->cdb_nbits == bitsz)
1154 break;
1158 * Create a new type if none exists. We name all types in a way that is
1159 * guaranteed not to conflict with the corresponding C type. We do this
1160 * by using the ':' operator.
1162 if (cdb == NULL) {
1163 size_t namesz;
1164 char *name;
1166 e.cte_bits = bitsz;
1167 namesz = snprintf(NULL, 0, "%s:%d", dtd->dtd_name,
1168 (uint32_t)bitsz);
1169 name = ctf_alloc(namesz + 1);
1170 if (name == NULL)
1171 return (ENOMEM);
1172 cdb = ctf_alloc(sizeof (ctf_dwbitf_t));
1173 if (cdb == NULL) {
1174 ctf_free(name, namesz + 1);
1175 return (ENOMEM);
1177 (void) snprintf(name, namesz + 1, "%s:%d", dtd->dtd_name,
1178 (uint32_t)bitsz);
1180 cdb->cdb_base = base;
1181 cdb->cdb_nbits = bitsz;
1182 cdb->cdb_id = ctf_add_integer(cdp->cd_ctfp, CTF_ADD_NONROOT,
1183 name, &e);
1184 if (cdb->cdb_id == CTF_ERR) {
1185 (void) snprintf(cdp->cd_errbuf, cdp->cd_errlen,
1186 "failed to get add bitfield type %s: %s", name,
1187 ctf_errmsg(ctf_errno(cdp->cd_ctfp)));
1188 ctf_free(name, namesz + 1);
1189 ctf_free(cdb, sizeof (ctf_dwbitf_t));
1190 return (ECTF_CONVBKERR);
1192 ctf_free(name, namesz + 1);
1193 ctf_list_append(&cdp->cd_bitfields, cdb);
1196 *idp = cdb->cdb_id;
1198 return (0);
1201 static int
1202 ctf_dwarf_fixup_sou(ctf_die_t *cdp, Dwarf_Die die, ctf_id_t base, boolean_t add)
1204 int ret, kind;
1205 Dwarf_Die child, memb;
1206 Dwarf_Unsigned size;
1207 ulong_t nsz;
1209 kind = ctf_type_kind(cdp->cd_ctfp, base);
1210 VERIFY(kind != CTF_ERR);
1211 VERIFY(kind == CTF_K_STRUCT || kind == CTF_K_UNION);
1214 * Members are in children. However, gcc also allows empty ones.
1216 if ((ret = ctf_dwarf_child(cdp, die, &child)) != 0)
1217 return (ret);
1218 if (child == NULL)
1219 return (0);
1221 memb = child;
1222 while (memb != NULL) {
1223 Dwarf_Die sib, tdie;
1224 Dwarf_Half tag;
1225 ctf_id_t mid;
1226 char *mname;
1227 ulong_t memboff = 0;
1229 if ((ret = ctf_dwarf_tag(cdp, memb, &tag)) != 0)
1230 return (ret);
1232 if (tag != DW_TAG_member)
1233 continue;
1235 if ((ret = ctf_dwarf_refdie(cdp, memb, DW_AT_type, &tdie)) != 0)
1236 return (ret);
1238 if ((ret = ctf_dwarf_convert_type(cdp, tdie, &mid,
1239 CTF_ADD_NONROOT)) != 0)
1240 return (ret);
1241 ctf_dprintf("Got back type id: %d\n", mid);
1244 * If we're not adding a member, just go ahead and return.
1246 if (add == B_FALSE) {
1247 if ((ret = ctf_dwarf_member_bitfield(cdp, memb,
1248 &mid)) != 0)
1249 return (ret);
1250 goto next;
1253 if ((ret = ctf_dwarf_string(cdp, memb, DW_AT_name,
1254 &mname)) != 0 && ret != ENOENT)
1255 return (ret);
1256 if (ret == ENOENT)
1257 mname = NULL;
1259 if (kind == CTF_K_UNION) {
1260 memboff = 0;
1261 } else if ((ret = ctf_dwarf_member_offset(cdp, memb, mid,
1262 &memboff)) != 0) {
1263 if (mname != NULL)
1264 ctf_free(mname, strlen(mname) + 1);
1265 return (ret);
1268 if ((ret = ctf_dwarf_member_bitfield(cdp, memb, &mid)) != 0)
1269 return (ret);
1271 ret = ctf_add_member(cdp->cd_ctfp, base, mname, mid, memboff);
1272 if (ret == CTF_ERR) {
1273 (void) snprintf(cdp->cd_errbuf, cdp->cd_errlen,
1274 "failed to add member %s: %s",
1275 mname, ctf_errmsg(ctf_errno(cdp->cd_ctfp)));
1276 if (mname != NULL)
1277 ctf_free(mname, strlen(mname) + 1);
1278 return (ECTF_CONVBKERR);
1281 if (mname != NULL)
1282 ctf_free(mname, strlen(mname) + 1);
1284 next:
1285 if ((ret = ctf_dwarf_sib(cdp, memb, &sib)) != 0)
1286 return (ret);
1287 memb = sib;
1291 * If we're not adding members, then we don't know the final size of the
1292 * structure, so end here.
1294 if (add == B_FALSE)
1295 return (0);
1297 /* Finally set the size of the structure to the actual byte size */
1298 if ((ret = ctf_dwarf_unsigned(cdp, die, DW_AT_byte_size, &size)) != 0)
1299 return (ret);
1300 nsz = size;
1301 if ((ctf_set_size(cdp->cd_ctfp, base, nsz)) == CTF_ERR) {
1302 int e = ctf_errno(cdp->cd_ctfp);
1303 (void) snprintf(cdp->cd_errbuf, cdp->cd_errlen,
1304 "failed to set type size for %d to 0x%x: %s", base,
1305 (uint32_t)size, ctf_errmsg(e));
1306 return (ECTF_CONVBKERR);
1309 return (0);
1312 static int
1313 ctf_dwarf_create_sou(ctf_die_t *cdp, Dwarf_Die die, ctf_id_t *idp,
1314 int kind, int isroot)
1316 int ret;
1317 char *name;
1318 ctf_id_t base;
1319 Dwarf_Die child;
1320 Dwarf_Bool decl;
1323 * Deal with the terribly annoying case of anonymous structs and unions.
1324 * If they don't have a name, set the name to the empty string.
1326 if ((ret = ctf_dwarf_string(cdp, die, DW_AT_name, &name)) != 0 &&
1327 ret != ENOENT)
1328 return (ret);
1329 if (ret == ENOENT)
1330 name = NULL;
1333 * We need to check if we just have a declaration here. If we do, then
1334 * instead of creating an actual structure or union, we're just going to
1335 * go ahead and create a forward. During a dedup or merge, the forward
1336 * will be replaced with the real thing.
1338 if ((ret = ctf_dwarf_boolean(cdp, die, DW_AT_declaration,
1339 &decl)) != 0) {
1340 if (ret != ENOENT)
1341 return (ret);
1342 decl = 0;
1345 if (decl != 0) {
1346 base = ctf_add_forward(cdp->cd_ctfp, isroot, name, kind);
1347 } else if (kind == CTF_K_STRUCT) {
1348 base = ctf_add_struct(cdp->cd_ctfp, isroot, name);
1349 } else {
1350 base = ctf_add_union(cdp->cd_ctfp, isroot, name);
1352 ctf_dprintf("added sou %s (%d) (%d)\n", name, kind, base);
1353 if (name != NULL)
1354 ctf_free(name, strlen(name) + 1);
1355 if (base == CTF_ERR)
1356 return (ctf_errno(cdp->cd_ctfp));
1357 *idp = base;
1360 * If it's just a declaration, we're not going to mark it for fix up or
1361 * do anything else.
1363 if (decl == B_TRUE)
1364 return (ctf_dwmap_add(cdp, base, die, B_FALSE));
1365 if ((ret = ctf_dwmap_add(cdp, base, die, B_TRUE)) != 0)
1366 return (ret);
1369 * Members are in children. However, gcc also allows empty ones.
1371 if ((ret = ctf_dwarf_child(cdp, die, &child)) != 0)
1372 return (ret);
1373 if (child == NULL)
1374 return (0);
1376 return (0);
1379 static int
1380 ctf_dwarf_create_array_range(ctf_die_t *cdp, Dwarf_Die range, ctf_id_t *idp,
1381 ctf_id_t base, int isroot)
1383 int ret;
1384 Dwarf_Die sib;
1385 Dwarf_Unsigned val;
1386 Dwarf_Signed sval;
1387 ctf_arinfo_t ar;
1389 ctf_dprintf("creating array range\n");
1391 if ((ret = ctf_dwarf_sib(cdp, range, &sib)) != 0)
1392 return (ret);
1393 if (sib != NULL) {
1394 ctf_id_t id;
1395 if ((ret = ctf_dwarf_create_array_range(cdp, sib, &id,
1396 base, CTF_ADD_NONROOT)) != 0)
1397 return (ret);
1398 ar.ctr_contents = id;
1399 } else {
1400 ar.ctr_contents = base;
1403 if ((ar.ctr_index = ctf_dwarf_long(cdp)) == CTF_ERR)
1404 return (ctf_errno(cdp->cd_ctfp));
1407 * Array bounds can be signed or unsigned, but there are several kinds
1408 * of signless forms (data1, data2, etc) that take their sign from the
1409 * routine that is trying to interpret them. That is, data1 can be
1410 * either signed or unsigned, depending on whether you use the signed or
1411 * unsigned accessor function. GCC will use the signless forms to store
1412 * unsigned values which have their high bit set, so we need to try to
1413 * read them first as unsigned to get positive values. We could also
1414 * try signed first, falling back to unsigned if we got a negative
1415 * value.
1417 if ((ret = ctf_dwarf_unsigned(cdp, range, DW_AT_upper_bound,
1418 &val)) == 0) {
1419 ar.ctr_nelems = val + 1;
1420 } else if (ret != ENOENT) {
1421 return (ret);
1422 } else if ((ret = ctf_dwarf_signed(cdp, range, DW_AT_upper_bound,
1423 &sval)) == 0) {
1424 ar.ctr_nelems = sval + 1;
1425 } else if (ret != ENOENT) {
1426 return (ret);
1427 } else {
1428 ar.ctr_nelems = 0;
1431 if ((*idp = ctf_add_array(cdp->cd_ctfp, isroot, &ar)) == CTF_ERR)
1432 return (ctf_errno(cdp->cd_ctfp));
1434 return (0);
1438 * Try and create an array type. First, the kind of the array is specified in
1439 * the DW_AT_type entry. Next, the number of entries is stored in a more
1440 * complicated form, we should have a child that has the DW_TAG_subrange type.
1442 static int
1443 ctf_dwarf_create_array(ctf_die_t *cdp, Dwarf_Die die, ctf_id_t *idp, int isroot)
1445 int ret;
1446 Dwarf_Die tdie, rdie;
1447 ctf_id_t tid;
1448 Dwarf_Half rtag;
1450 if ((ret = ctf_dwarf_refdie(cdp, die, DW_AT_type, &tdie)) != 0)
1451 return (ret);
1452 if ((ret = ctf_dwarf_convert_type(cdp, tdie, &tid,
1453 CTF_ADD_NONROOT)) != 0)
1454 return (ret);
1456 if ((ret = ctf_dwarf_child(cdp, die, &rdie)) != 0)
1457 return (ret);
1458 if ((ret = ctf_dwarf_tag(cdp, rdie, &rtag)) != 0)
1459 return (ret);
1460 if (rtag != DW_TAG_subrange_type) {
1461 (void) snprintf(cdp->cd_errbuf, cdp->cd_errlen,
1462 "encountered array without DW_TAG_subrange_type child\n");
1463 return (ECTF_CONVBKERR);
1467 * The compiler may opt to describe a multi-dimensional array as one
1468 * giant array or it may opt to instead encode it as a series of
1469 * subranges. If it's the latter, then for each subrange we introduce a
1470 * type. We can always use the base type.
1472 if ((ret = ctf_dwarf_create_array_range(cdp, rdie, idp, tid,
1473 isroot)) != 0)
1474 return (ret);
1475 ctf_dprintf("Got back id %d\n", *idp);
1476 return (ctf_dwmap_add(cdp, *idp, die, B_FALSE));
1479 static int
1480 ctf_dwarf_create_reference(ctf_die_t *cdp, Dwarf_Die die, ctf_id_t *idp,
1481 int kind, int isroot)
1483 int ret;
1484 ctf_id_t id;
1485 Dwarf_Die tdie;
1486 char *name;
1487 size_t namelen;
1489 if ((ret = ctf_dwarf_string(cdp, die, DW_AT_name, &name)) != 0 &&
1490 ret != ENOENT)
1491 return (ret);
1492 if (ret == ENOENT) {
1493 name = NULL;
1494 namelen = 0;
1495 } else {
1496 namelen = strlen(name);
1499 ctf_dprintf("reference kind %d %s\n", kind, name != NULL ? name : "<>");
1501 if ((ret = ctf_dwarf_refdie(cdp, die, DW_AT_type, &tdie)) != 0) {
1502 if (ret != ENOENT) {
1503 ctf_free(name, namelen);
1504 return (ret);
1506 if ((id = ctf_dwarf_void(cdp)) == CTF_ERR) {
1507 ctf_free(name, namelen);
1508 return (ctf_errno(cdp->cd_ctfp));
1510 } else {
1511 if ((ret = ctf_dwarf_convert_type(cdp, tdie, &id,
1512 CTF_ADD_NONROOT)) != 0) {
1513 ctf_free(name, namelen);
1514 return (ret);
1518 if ((*idp = ctf_add_reftype(cdp->cd_ctfp, isroot, name, id, kind)) ==
1519 CTF_ERR) {
1520 ctf_free(name, namelen);
1521 return (ctf_errno(cdp->cd_ctfp));
1524 ctf_free(name, namelen);
1525 return (ctf_dwmap_add(cdp, *idp, die, B_FALSE));
1528 static int
1529 ctf_dwarf_create_enum(ctf_die_t *cdp, Dwarf_Die die, ctf_id_t *idp, int isroot)
1531 int ret;
1532 ctf_id_t id;
1533 Dwarf_Die child;
1534 char *name;
1536 if ((ret = ctf_dwarf_string(cdp, die, DW_AT_name, &name)) != 0 &&
1537 ret != ENOENT)
1538 return (ret);
1539 if (ret == ENOENT)
1540 name = NULL;
1541 id = ctf_add_enum(cdp->cd_ctfp, isroot, name);
1542 ctf_dprintf("added enum %s (%d)\n", name, id);
1543 if (name != NULL)
1544 ctf_free(name, strlen(name) + 1);
1545 if (id == CTF_ERR)
1546 return (ctf_errno(cdp->cd_ctfp));
1547 *idp = id;
1548 if ((ret = ctf_dwmap_add(cdp, id, die, B_FALSE)) != 0)
1549 return (ret);
1552 if ((ret = ctf_dwarf_child(cdp, die, &child)) != 0) {
1553 if (ret == ENOENT)
1554 ret = 0;
1555 return (ret);
1558 while (child != NULL) {
1559 Dwarf_Half tag;
1560 Dwarf_Signed sval;
1561 Dwarf_Unsigned uval;
1562 Dwarf_Die arg = child;
1563 int eval;
1565 if ((ret = ctf_dwarf_sib(cdp, arg, &child)) != 0)
1566 return (ret);
1568 if ((ret = ctf_dwarf_tag(cdp, arg, &tag)) != 0)
1569 return (ret);
1571 if (tag != DW_TAG_enumerator) {
1572 if ((ret = ctf_dwarf_convert_type(cdp, arg, NULL,
1573 CTF_ADD_NONROOT)) != 0)
1574 return (ret);
1575 continue;
1578 if ((ret = ctf_dwarf_signed(cdp, arg, DW_AT_const_value,
1579 &sval)) == 0) {
1580 eval = sval;
1581 } else if (ret != ENOENT) {
1582 return (ret);
1583 } else if ((ret = ctf_dwarf_unsigned(cdp, arg,
1584 DW_AT_const_value, &uval)) == 0) {
1585 eval = (int)uval;
1586 } else {
1587 (void) snprintf(cdp->cd_errbuf, cdp->cd_errlen,
1588 "encountered enumration without constant value\n");
1589 return (ECTF_CONVBKERR);
1593 * DWARF v4 section 5.7 tells us we'll always have names.
1595 if ((ret = ctf_dwarf_string(cdp, arg, DW_AT_name,
1596 &name)) != 0)
1597 return (ret);
1599 ret = ctf_add_enumerator(cdp->cd_ctfp, id, name, eval);
1600 if (ret == CTF_ERR) {
1601 (void) snprintf(cdp->cd_errbuf, cdp->cd_errlen,
1602 "failed to add enumarator %s (%d) to %d\n",
1603 name, eval, id);
1604 ctf_free(name, strlen(name) + 1);
1605 return (ctf_errno(cdp->cd_ctfp));
1607 ctf_free(name, strlen(name) + 1);
1610 return (0);
1614 * For a function pointer, walk over and process all of its children, unless we
1615 * encounter one that's just a declaration. In which case, we error on it.
1617 static int
1618 ctf_dwarf_create_fptr(ctf_die_t *cdp, Dwarf_Die die, ctf_id_t *idp, int isroot)
1620 int ret;
1621 Dwarf_Bool b;
1622 ctf_funcinfo_t fi;
1623 Dwarf_Die retdie;
1624 ctf_id_t *argv = NULL;
1626 bzero(&fi, sizeof (ctf_funcinfo_t));
1628 if ((ret = ctf_dwarf_boolean(cdp, die, DW_AT_declaration, &b)) != 0) {
1629 if (ret != ENOENT)
1630 return (ret);
1631 } else {
1632 if (b != 0)
1633 return (EPROTOTYPE);
1637 * Return type is in DW_AT_type, if none, it returns void.
1639 if ((ret = ctf_dwarf_refdie(cdp, die, DW_AT_type, &retdie)) != 0) {
1640 if (ret != ENOENT)
1641 return (ret);
1642 if ((fi.ctc_return = ctf_dwarf_void(cdp)) == CTF_ERR)
1643 return (ctf_errno(cdp->cd_ctfp));
1644 } else {
1645 if ((ret = ctf_dwarf_convert_type(cdp, retdie, &fi.ctc_return,
1646 CTF_ADD_NONROOT)) != 0)
1647 return (ret);
1650 if ((ret = ctf_dwarf_function_count(cdp, die, &fi, B_TRUE)) != 0) {
1651 return (ret);
1654 if (fi.ctc_argc != 0) {
1655 argv = ctf_alloc(sizeof (ctf_id_t) * fi.ctc_argc);
1656 if (argv == NULL)
1657 return (ENOMEM);
1659 if ((ret = ctf_dwarf_convert_fargs(cdp, die, &fi, argv)) != 0) {
1660 ctf_free(argv, sizeof (ctf_id_t) * fi.ctc_argc);
1661 return (ret);
1665 if ((*idp = ctf_add_funcptr(cdp->cd_ctfp, isroot, &fi, argv)) ==
1666 CTF_ERR) {
1667 ctf_free(argv, sizeof (ctf_id_t) * fi.ctc_argc);
1668 return (ctf_errno(cdp->cd_ctfp));
1671 ctf_free(argv, sizeof (ctf_id_t) * fi.ctc_argc);
1672 return (ctf_dwmap_add(cdp, *idp, die, B_FALSE));
1675 static int
1676 ctf_dwarf_convert_type(ctf_die_t *cdp, Dwarf_Die die, ctf_id_t *idp,
1677 int isroot)
1679 int ret;
1680 Dwarf_Off offset;
1681 Dwarf_Half tag;
1682 ctf_dwmap_t lookup, *map;
1683 ctf_id_t id;
1685 if (idp == NULL)
1686 idp = &id;
1688 if ((ret = ctf_dwarf_offset(cdp, die, &offset)) != 0)
1689 return (ret);
1691 if (offset > cdp->cd_maxoff) {
1692 (void) snprintf(cdp->cd_errbuf, cdp->cd_errlen,
1693 "die offset %llu beyond maximum for header %llu\n",
1694 offset, cdp->cd_maxoff);
1695 return (ECTF_CONVBKERR);
1699 * If we've already added an entry for this offset, then we're done.
1701 lookup.cdm_off = offset;
1702 if ((map = avl_find(&cdp->cd_map, &lookup, NULL)) != NULL) {
1703 *idp = map->cdm_id;
1704 return (0);
1707 if ((ret = ctf_dwarf_tag(cdp, die, &tag)) != 0)
1708 return (ret);
1710 ret = ENOTSUP;
1711 switch (tag) {
1712 case DW_TAG_base_type:
1713 ctf_dprintf("base\n");
1714 ret = ctf_dwarf_create_base(cdp, die, idp, isroot, offset);
1715 break;
1716 case DW_TAG_array_type:
1717 ctf_dprintf("array\n");
1718 ret = ctf_dwarf_create_array(cdp, die, idp, isroot);
1719 break;
1720 case DW_TAG_enumeration_type:
1721 ctf_dprintf("enum\n");
1722 ret = ctf_dwarf_create_enum(cdp, die, idp, isroot);
1723 break;
1724 case DW_TAG_pointer_type:
1725 ctf_dprintf("pointer\n");
1726 ret = ctf_dwarf_create_reference(cdp, die, idp, CTF_K_POINTER,
1727 isroot);
1728 break;
1729 case DW_TAG_structure_type:
1730 ctf_dprintf("struct\n");
1731 ret = ctf_dwarf_create_sou(cdp, die, idp, CTF_K_STRUCT,
1732 isroot);
1733 break;
1734 case DW_TAG_subroutine_type:
1735 ctf_dprintf("fptr\n");
1736 ret = ctf_dwarf_create_fptr(cdp, die, idp, isroot);
1737 break;
1738 case DW_TAG_typedef:
1739 ctf_dprintf("typedef\n");
1740 ret = ctf_dwarf_create_reference(cdp, die, idp, CTF_K_TYPEDEF,
1741 isroot);
1742 break;
1743 case DW_TAG_union_type:
1744 ctf_dprintf("union\n");
1745 ret = ctf_dwarf_create_sou(cdp, die, idp, CTF_K_UNION,
1746 isroot);
1747 break;
1748 case DW_TAG_const_type:
1749 ctf_dprintf("const\n");
1750 ret = ctf_dwarf_create_reference(cdp, die, idp, CTF_K_CONST,
1751 isroot);
1752 break;
1753 case DW_TAG_volatile_type:
1754 ctf_dprintf("volatile\n");
1755 ret = ctf_dwarf_create_reference(cdp, die, idp, CTF_K_VOLATILE,
1756 isroot);
1757 break;
1758 case DW_TAG_restrict_type:
1759 ctf_dprintf("restrict\n");
1760 ret = ctf_dwarf_create_reference(cdp, die, idp, CTF_K_RESTRICT,
1761 isroot);
1762 break;
1763 default:
1764 ctf_dprintf("ignoring tag type %x\n", tag);
1765 ret = 0;
1766 break;
1768 ctf_dprintf("ctf_dwarf_convert_type tag specific handler returned %d\n",
1769 ret);
1771 return (ret);
1774 static int
1775 ctf_dwarf_walk_lexical(ctf_die_t *cdp, Dwarf_Die die)
1777 int ret;
1778 Dwarf_Die child;
1780 if ((ret = ctf_dwarf_child(cdp, die, &child)) != 0)
1781 return (ret);
1783 if (child == NULL)
1784 return (0);
1786 return (ctf_dwarf_convert_die(cdp, die));
1789 static int
1790 ctf_dwarf_function_count(ctf_die_t *cdp, Dwarf_Die die, ctf_funcinfo_t *fip,
1791 boolean_t fptr)
1793 int ret;
1794 Dwarf_Die child, sib, arg;
1796 if ((ret = ctf_dwarf_child(cdp, die, &child)) != 0)
1797 return (ret);
1799 arg = child;
1800 while (arg != NULL) {
1801 Dwarf_Half tag;
1803 if ((ret = ctf_dwarf_tag(cdp, arg, &tag)) != 0)
1804 return (ret);
1807 * We have to check for a varargs type decleration. This will
1808 * happen in one of two ways. If we have a function pointer
1809 * type, then it'll be done with a tag of type
1810 * DW_TAG_unspecified_parameters. However, it only means we have
1811 * a variable number of arguments, if we have more than one
1812 * argument found so far. Otherwise, when we have a function
1813 * type, it instead uses a formal parameter whose name is '...'
1814 * to indicate a variable arguments member.
1816 * Also, if we have a function pointer, then we have to expect
1817 * that we might not get a name at all.
1819 if (tag == DW_TAG_formal_parameter && fptr == B_FALSE) {
1820 char *name;
1821 if ((ret = ctf_dwarf_string(cdp, die, DW_AT_name,
1822 &name)) != 0)
1823 return (ret);
1824 if (strcmp(name, DWARF_VARARGS_NAME) == 0)
1825 fip->ctc_flags |= CTF_FUNC_VARARG;
1826 else
1827 fip->ctc_argc++;
1828 ctf_free(name, strlen(name) + 1);
1829 } else if (tag == DW_TAG_formal_parameter) {
1830 fip->ctc_argc++;
1831 } else if (tag == DW_TAG_unspecified_parameters &&
1832 fip->ctc_argc > 0) {
1833 fip->ctc_flags |= CTF_FUNC_VARARG;
1835 if ((ret = ctf_dwarf_sib(cdp, arg, &sib)) != 0)
1836 return (ret);
1837 arg = sib;
1840 return (0);
1843 static int
1844 ctf_dwarf_convert_fargs(ctf_die_t *cdp, Dwarf_Die die, ctf_funcinfo_t *fip,
1845 ctf_id_t *argv)
1847 int ret;
1848 int i = 0;
1849 Dwarf_Die child, sib, arg;
1851 if ((ret = ctf_dwarf_child(cdp, die, &child)) != 0)
1852 return (ret);
1854 arg = child;
1855 while (arg != NULL) {
1856 Dwarf_Half tag;
1858 if ((ret = ctf_dwarf_tag(cdp, arg, &tag)) != 0)
1859 return (ret);
1860 if (tag == DW_TAG_formal_parameter) {
1861 Dwarf_Die tdie;
1863 if ((ret = ctf_dwarf_refdie(cdp, arg, DW_AT_type,
1864 &tdie)) != 0)
1865 return (ret);
1867 if ((ret = ctf_dwarf_convert_type(cdp, tdie, &argv[i],
1868 CTF_ADD_ROOT)) != 0)
1869 return (ret);
1870 i++;
1873 * Once we hit argc entries, we're done. This ensures we
1874 * don't accidentally hit a varargs which should be the
1875 * least entry.
1877 if (i == fip->ctc_argc)
1878 break;
1881 if ((ret = ctf_dwarf_sib(cdp, arg, &sib)) != 0)
1882 return (ret);
1883 arg = sib;
1886 return (0);
1889 static int
1890 ctf_dwarf_convert_function(ctf_die_t *cdp, Dwarf_Die die)
1892 int ret;
1893 char *name;
1894 ctf_dwfunc_t *cdf;
1895 Dwarf_Die tdie;
1898 * Functions that don't have a name are generally functions that have
1899 * been inlined and thus most information about them has been lost. If
1900 * we can't get a name, then instead of returning ENOENT, we silently
1901 * swallow the error.
1903 if ((ret = ctf_dwarf_string(cdp, die, DW_AT_name, &name)) != 0) {
1904 if (ret == ENOENT)
1905 return (0);
1906 return (ret);
1909 ctf_dprintf("beginning work on function %s\n", name);
1910 if ((cdf = ctf_alloc(sizeof (ctf_dwfunc_t))) == NULL) {
1911 ctf_free(name, strlen(name) + 1);
1912 return (ENOMEM);
1914 bzero(cdf, sizeof (ctf_dwfunc_t));
1915 cdf->cdf_name = name;
1917 if ((ret = ctf_dwarf_refdie(cdp, die, DW_AT_type, &tdie)) == 0) {
1918 if ((ret = ctf_dwarf_convert_type(cdp, tdie,
1919 &(cdf->cdf_fip.ctc_return), CTF_ADD_ROOT)) != 0) {
1920 ctf_free(name, strlen(name) + 1);
1921 ctf_free(cdf, sizeof (ctf_dwfunc_t));
1922 return (ret);
1924 } else if (ret != ENOENT) {
1925 ctf_free(name, strlen(name) + 1);
1926 ctf_free(cdf, sizeof (ctf_dwfunc_t));
1927 return (ret);
1928 } else {
1929 if ((cdf->cdf_fip.ctc_return = ctf_dwarf_void(cdp)) ==
1930 CTF_ERR) {
1931 ctf_free(name, strlen(name) + 1);
1932 ctf_free(cdf, sizeof (ctf_dwfunc_t));
1933 return (ctf_errno(cdp->cd_ctfp));
1938 * A function has a number of children, some of which may not be ones we
1939 * care about. Children that we care about have a type of
1940 * DW_TAG_formal_parameter. We're going to do two passes, the first to
1941 * count the arguments, the second to process them. Afterwards, we
1942 * should be good to go ahead and add this function.
1944 * Note, we already got the return type by going in and grabbing it out
1945 * of the DW_AT_type.
1947 if ((ret = ctf_dwarf_function_count(cdp, die, &cdf->cdf_fip,
1948 B_FALSE)) != 0) {
1949 ctf_free(name, strlen(name) + 1);
1950 ctf_free(cdf, sizeof (ctf_dwfunc_t));
1951 return (ret);
1954 ctf_dprintf("beginning to convert function arguments %s\n", name);
1955 if (cdf->cdf_fip.ctc_argc != 0) {
1956 uint_t argc = cdf->cdf_fip.ctc_argc;
1957 cdf->cdf_argv = ctf_alloc(sizeof (ctf_id_t) * argc);
1958 if (cdf->cdf_argv == NULL) {
1959 ctf_free(name, strlen(name) + 1);
1960 ctf_free(cdf, sizeof (ctf_dwfunc_t));
1961 return (ENOMEM);
1963 if ((ret = ctf_dwarf_convert_fargs(cdp, die,
1964 &cdf->cdf_fip, cdf->cdf_argv)) != 0) {
1965 ctf_free(cdf->cdf_argv, sizeof (ctf_id_t) * argc);
1966 ctf_free(name, strlen(name) + 1);
1967 ctf_free(cdf, sizeof (ctf_dwfunc_t));
1968 return (ret);
1970 } else {
1971 cdf->cdf_argv = NULL;
1974 if ((ret = ctf_dwarf_isglobal(cdp, die, &cdf->cdf_global)) != 0) {
1975 ctf_free(cdf->cdf_argv, sizeof (ctf_id_t) *
1976 cdf->cdf_fip.ctc_argc);
1977 ctf_free(name, strlen(name) + 1);
1978 ctf_free(cdf, sizeof (ctf_dwfunc_t));
1979 return (ret);
1982 ctf_list_append(&cdp->cd_funcs, cdf);
1983 return (ret);
1987 * Convert variables, but only if they're not prototypes and have names.
1989 static int
1990 ctf_dwarf_convert_variable(ctf_die_t *cdp, Dwarf_Die die)
1992 int ret;
1993 char *name;
1994 Dwarf_Bool b;
1995 Dwarf_Die tdie;
1996 ctf_id_t id;
1997 ctf_dwvar_t *cdv;
1999 if ((ret = ctf_dwarf_boolean(cdp, die, DW_AT_declaration, &b)) != 0) {
2000 if (ret != ENOENT)
2001 return (ret);
2002 } else if (b != 0) {
2003 return (0);
2006 if ((ret = ctf_dwarf_string(cdp, die, DW_AT_name, &name)) != 0 &&
2007 ret != ENOENT)
2008 return (ret);
2009 if (ret == ENOENT)
2010 return (0);
2012 if ((ret = ctf_dwarf_refdie(cdp, die, DW_AT_type, &tdie)) != 0) {
2013 ctf_free(name, strlen(name) + 1);
2014 return (ret);
2017 if ((ret = ctf_dwarf_convert_type(cdp, tdie, &id,
2018 CTF_ADD_ROOT)) != 0)
2019 return (ret);
2021 if ((cdv = ctf_alloc(sizeof (ctf_dwvar_t))) == NULL) {
2022 ctf_free(name, strlen(name) + 1);
2023 return (ENOMEM);
2026 cdv->cdv_name = name;
2027 cdv->cdv_type = id;
2029 if ((ret = ctf_dwarf_isglobal(cdp, die, &cdv->cdv_global)) != 0) {
2030 ctf_free(cdv, sizeof (ctf_dwvar_t));
2031 ctf_free(name, strlen(name) + 1);
2032 return (ret);
2035 ctf_list_append(&cdp->cd_vars, cdv);
2036 return (0);
2040 * Walk through our set of top-level types and process them.
2042 static int
2043 ctf_dwarf_walk_toplevel(ctf_die_t *cdp, Dwarf_Die die)
2045 int ret;
2046 Dwarf_Off offset;
2047 Dwarf_Half tag;
2049 if ((ret = ctf_dwarf_offset(cdp, die, &offset)) != 0)
2050 return (ret);
2052 if (offset > cdp->cd_maxoff) {
2053 (void) snprintf(cdp->cd_errbuf, cdp->cd_errlen,
2054 "die offset %llu beyond maximum for header %llu\n",
2055 offset, cdp->cd_maxoff);
2056 return (ECTF_CONVBKERR);
2059 if ((ret = ctf_dwarf_tag(cdp, die, &tag)) != 0)
2060 return (ret);
2062 ret = 0;
2063 switch (tag) {
2064 case DW_TAG_subprogram:
2065 ctf_dprintf("top level func\n");
2066 ret = ctf_dwarf_convert_function(cdp, die);
2067 break;
2068 case DW_TAG_variable:
2069 ctf_dprintf("top level var\n");
2070 ret = ctf_dwarf_convert_variable(cdp, die);
2071 break;
2072 case DW_TAG_lexical_block:
2073 ctf_dprintf("top level block\n");
2074 ret = ctf_dwarf_walk_lexical(cdp, die);
2075 break;
2076 case DW_TAG_enumeration_type:
2077 case DW_TAG_structure_type:
2078 case DW_TAG_typedef:
2079 case DW_TAG_union_type:
2080 ctf_dprintf("top level type\n");
2081 ret = ctf_dwarf_convert_type(cdp, die, NULL, B_TRUE);
2082 break;
2083 default:
2084 break;
2087 return (ret);
2092 * We're given a node. At this node we need to convert it and then proceed to
2093 * convert any siblings that are associaed with this die.
2095 static int
2096 ctf_dwarf_convert_die(ctf_die_t *cdp, Dwarf_Die die)
2098 while (die != NULL) {
2099 int ret;
2100 Dwarf_Die sib;
2102 if ((ret = ctf_dwarf_walk_toplevel(cdp, die)) != 0)
2103 return (ret);
2105 if ((ret = ctf_dwarf_sib(cdp, die, &sib)) != 0)
2106 return (ret);
2107 die = sib;
2109 return (0);
2112 static int
2113 ctf_dwarf_fixup_die(ctf_die_t *cdp, boolean_t addpass)
2115 ctf_dwmap_t *map;
2117 for (map = avl_first(&cdp->cd_map); map != NULL;
2118 map = AVL_NEXT(&cdp->cd_map, map)) {
2119 int ret;
2120 if (map->cdm_fix == B_FALSE)
2121 continue;
2122 if ((ret = ctf_dwarf_fixup_sou(cdp, map->cdm_die, map->cdm_id,
2123 addpass)) != 0)
2124 return (ret);
2127 return (0);
2130 static ctf_dwfunc_t *
2131 ctf_dwarf_match_func(ctf_die_t *cdp, const char *file, const char *name,
2132 int bind)
2134 ctf_dwfunc_t *cdf;
2136 if (bind == STB_WEAK)
2137 return (NULL);
2139 /* Nothing we can do if we can't find a name to compare it to. */
2140 if (bind == STB_LOCAL && (file == NULL || cdp->cd_name == NULL))
2141 return (NULL);
2143 for (cdf = ctf_list_next(&cdp->cd_funcs); cdf != NULL;
2144 cdf = ctf_list_next(cdf)) {
2145 if (bind == STB_GLOBAL && cdf->cdf_global == B_FALSE)
2146 continue;
2147 if (bind == STB_LOCAL && cdf->cdf_global == B_TRUE)
2148 continue;
2149 if (strcmp(name, cdf->cdf_name) != 0)
2150 continue;
2151 if (bind == STB_LOCAL && strcmp(file, cdp->cd_name) != 0)
2152 continue;
2153 return (cdf);
2156 return (NULL);
2158 static ctf_dwvar_t *
2159 ctf_dwarf_match_var(ctf_die_t *cdp, const char *file, const char *name,
2160 int bind)
2162 ctf_dwvar_t *cdv;
2164 /* Nothing we can do if we can't find a name to compare it to. */
2165 if (bind == STB_LOCAL && (file == NULL || cdp->cd_name == NULL))
2166 return (NULL);
2167 ctf_dprintf("Still considering %s\n", name);
2169 for (cdv = ctf_list_next(&cdp->cd_vars); cdv != NULL;
2170 cdv = ctf_list_next(cdv)) {
2171 if (bind == STB_GLOBAL && cdv->cdv_global == B_FALSE)
2172 continue;
2173 if (bind == STB_LOCAL && cdv->cdv_global == B_TRUE)
2174 continue;
2175 if (strcmp(name, cdv->cdv_name) != 0)
2176 continue;
2177 if (bind == STB_LOCAL && strcmp(file, cdp->cd_name) != 0)
2178 continue;
2179 return (cdv);
2182 return (NULL);
2185 static int
2186 ctf_dwarf_symtab_iter(ctf_die_t *cdp, ctf_dwarf_symtab_f *func, void *arg)
2188 int ret;
2189 ulong_t i;
2190 ctf_file_t *fp = cdp->cd_ctfp;
2191 const char *file = NULL;
2192 uintptr_t symbase = (uintptr_t)fp->ctf_symtab.cts_data;
2193 uintptr_t strbase = (uintptr_t)fp->ctf_strtab.cts_data;
2195 for (i = 0; i < fp->ctf_nsyms; i++) {
2196 const char *name;
2197 int type;
2198 GElf_Sym gsym;
2199 const GElf_Sym *gsymp;
2201 if (fp->ctf_symtab.cts_entsize == sizeof (Elf32_Sym)) {
2202 const Elf32_Sym *symp = (Elf32_Sym *)symbase + i;
2203 type = ELF32_ST_TYPE(symp->st_info);
2204 if (type == STT_FILE) {
2205 file = (char *)(strbase + symp->st_name);
2206 continue;
2208 if (type != STT_OBJECT && type != STT_FUNC)
2209 continue;
2210 if (ctf_sym_valid(strbase, type, symp->st_shndx,
2211 symp->st_value, symp->st_name) == B_FALSE)
2212 continue;
2213 name = (char *)(strbase + symp->st_name);
2214 gsym.st_name = symp->st_name;
2215 gsym.st_value = symp->st_value;
2216 gsym.st_size = symp->st_size;
2217 gsym.st_info = symp->st_info;
2218 gsym.st_other = symp->st_other;
2219 gsym.st_shndx = symp->st_shndx;
2220 gsymp = &gsym;
2221 } else {
2222 const Elf64_Sym *symp = (Elf64_Sym *)symbase + i;
2223 type = ELF64_ST_TYPE(symp->st_info);
2224 if (type == STT_FILE) {
2225 file = (char *)(strbase + symp->st_name);
2226 continue;
2228 if (type != STT_OBJECT && type != STT_FUNC)
2229 continue;
2230 if (ctf_sym_valid(strbase, type, symp->st_shndx,
2231 symp->st_value, symp->st_name) == B_FALSE)
2232 continue;
2233 name = (char *)(strbase + symp->st_name);
2234 gsymp = symp;
2237 ret = func(cdp, gsymp, i, file, name, arg);
2238 if (ret != 0)
2239 return (ret);
2242 return (0);
2245 /*ARGSUSED*/
2246 static int
2247 ctf_dwarf_conv_funcvars_cb(ctf_die_t *cdp, const GElf_Sym *symp, ulong_t idx,
2248 const char *file, const char *name, void *arg)
2250 int ret, bind, type;
2252 bind = GELF_ST_BIND(symp->st_info);
2253 type = GELF_ST_TYPE(symp->st_info);
2256 * Come back to weak symbols in another pass
2258 if (bind == STB_WEAK)
2259 return (0);
2261 if (type == STT_OBJECT) {
2262 ctf_dwvar_t *cdv = ctf_dwarf_match_var(cdp, file, name,
2263 bind);
2264 ctf_dprintf("match for %s (%d): %p\n", name, idx, cdv);
2265 if (cdv == NULL)
2266 return (0);
2267 ret = ctf_add_object(cdp->cd_ctfp, idx, cdv->cdv_type);
2268 ctf_dprintf("added object %s\n", name);
2269 } else {
2270 ctf_dwfunc_t *cdf = ctf_dwarf_match_func(cdp, file, name,
2271 bind);
2272 if (cdf == NULL)
2273 return (0);
2274 ret = ctf_add_function(cdp->cd_ctfp, idx, &cdf->cdf_fip,
2275 cdf->cdf_argv);
2278 if (ret == CTF_ERR) {
2279 return (ctf_errno(cdp->cd_ctfp));
2282 return (0);
2285 static int
2286 ctf_dwarf_conv_funcvars(ctf_die_t *cdp)
2288 return (ctf_dwarf_symtab_iter(cdp, ctf_dwarf_conv_funcvars_cb, NULL));
2292 * Note, this comment comes from the original version of the CTF tools.
2294 * If we have a weak symbol, attempt to find the strong symbol it will
2295 * resolve to. Note: the code where this actually happens is in
2296 * sym_process() in cmd/sgs/libld/common/syms.c
2298 * Finding the matching symbol is unfortunately not trivial. For a
2299 * symbol to be a candidate, it must:
2301 * - have the same type (function, object)
2302 * - have the same value (address)
2303 * - have the same size
2304 * - not be another weak symbol
2305 * - belong to the same section (checked via section index)
2307 * If such a candidate is global, then we assume we've found it. The
2308 * linker generates the symbol table such that the curfile might be
2309 * incorrect; this is OK for global symbols, since find_iidesc() doesn't
2310 * need to check for the source file for the symbol.
2312 * We might have found a strong local symbol, where the curfile is
2313 * accurate and matches that of the weak symbol. We assume this is a
2314 * reasonable match.
2316 * If we've got a local symbol with a non-matching curfile, there are
2317 * two possibilities. Either this is a completely different symbol, or
2318 * it's a once-global symbol that was scoped to local via a mapfile. In
2319 * the latter case, curfile is likely inaccurate since the linker does
2320 * not preserve the needed curfile in the order of the symbol table (see
2321 * the comments about locally scoped symbols in libld's update_osym()).
2322 * As we can't tell this case from the former one, we use this symbol
2323 * iff no other matching symbol is found.
2325 * What we really need here is a SUNW section containing weak<->strong
2326 * mappings that we can consume.
2328 typedef struct ctf_dwarf_weak_arg {
2329 const GElf_Sym *cweak_symp;
2330 const char *cweak_file;
2331 boolean_t cweak_candidate;
2332 ulong_t cweak_idx;
2333 } ctf_dwarf_weak_arg_t;
2335 /*ARGSUSED*/
2336 static int
2337 ctf_dwarf_conv_check_weak(ctf_die_t *cdp, const GElf_Sym *symp,
2338 ulong_t idx, const char *file, const char *name, void *arg)
2340 ctf_dwarf_weak_arg_t *cweak = arg;
2341 const GElf_Sym *wsymp = cweak->cweak_symp;
2343 ctf_dprintf("comparing weak to %s\n", name);
2345 if (GELF_ST_BIND(symp->st_info) == STB_WEAK) {
2346 return (0);
2349 if (GELF_ST_TYPE(wsymp->st_info) != GELF_ST_TYPE(symp->st_info)) {
2350 return (0);
2353 if (wsymp->st_value != symp->st_value) {
2354 return (0);
2357 if (wsymp->st_size != symp->st_size) {
2358 return (0);
2361 if (wsymp->st_shndx != symp->st_shndx) {
2362 return (0);
2366 * Check if it's a weak candidate.
2368 if (GELF_ST_BIND(symp->st_info) == STB_LOCAL &&
2369 (file == NULL || cweak->cweak_file == NULL ||
2370 strcmp(file, cweak->cweak_file) != 0)) {
2371 cweak->cweak_candidate = B_TRUE;
2372 cweak->cweak_idx = idx;
2373 return (0);
2377 * Found a match, break.
2379 cweak->cweak_idx = idx;
2380 return (1);
2383 static int
2384 ctf_dwarf_duplicate_sym(ctf_die_t *cdp, ulong_t idx, ulong_t matchidx)
2386 ctf_id_t id = ctf_lookup_by_symbol(cdp->cd_ctfp, matchidx);
2389 * If we matched something that for some reason didn't have type data,
2390 * we don't consider that a fatal error and silently swallow it.
2392 if (id == CTF_ERR) {
2393 if (ctf_errno(cdp->cd_ctfp) == ECTF_NOTYPEDAT)
2394 return (0);
2395 else
2396 return (ctf_errno(cdp->cd_ctfp));
2399 if (ctf_add_object(cdp->cd_ctfp, idx, id) == CTF_ERR)
2400 return (ctf_errno(cdp->cd_ctfp));
2402 return (0);
2405 static int
2406 ctf_dwarf_duplicate_func(ctf_die_t *cdp, ulong_t idx, ulong_t matchidx)
2408 int ret;
2409 ctf_funcinfo_t fip;
2410 ctf_id_t *args = NULL;
2412 if (ctf_func_info(cdp->cd_ctfp, matchidx, &fip) == CTF_ERR) {
2413 if (ctf_errno(cdp->cd_ctfp) == ECTF_NOFUNCDAT)
2414 return (0);
2415 else
2416 return (ctf_errno(cdp->cd_ctfp));
2419 if (fip.ctc_argc != 0) {
2420 args = ctf_alloc(sizeof (ctf_id_t) * fip.ctc_argc);
2421 if (args == NULL)
2422 return (ENOMEM);
2424 if (ctf_func_args(cdp->cd_ctfp, matchidx, fip.ctc_argc, args) ==
2425 CTF_ERR) {
2426 ctf_free(args, sizeof (ctf_id_t) * fip.ctc_argc);
2427 return (ctf_errno(cdp->cd_ctfp));
2431 ret = ctf_add_function(cdp->cd_ctfp, idx, &fip, args);
2432 if (args != NULL)
2433 ctf_free(args, sizeof (ctf_id_t) * fip.ctc_argc);
2434 if (ret == CTF_ERR)
2435 return (ctf_errno(cdp->cd_ctfp));
2437 return (0);
2440 /*ARGSUSED*/
2441 static int
2442 ctf_dwarf_conv_weaks_cb(ctf_die_t *cdp, const GElf_Sym *symp,
2443 ulong_t idx, const char *file, const char *name, void *arg)
2445 int ret, type;
2446 ctf_dwarf_weak_arg_t cweak;
2449 * We only care about weak symbols.
2451 if (GELF_ST_BIND(symp->st_info) != STB_WEAK)
2452 return (0);
2454 type = GELF_ST_TYPE(symp->st_info);
2455 ASSERT(type == STT_OBJECT || type == STT_FUNC);
2458 * For each weak symbol we encounter, we need to do a second iteration
2459 * to try and find a match. We should probably think about other
2460 * techniques to try and save us time in the future.
2462 cweak.cweak_symp = symp;
2463 cweak.cweak_file = file;
2464 cweak.cweak_candidate = B_FALSE;
2465 cweak.cweak_idx = 0;
2467 ctf_dprintf("Trying to find weak equiv for %s\n", name);
2469 ret = ctf_dwarf_symtab_iter(cdp, ctf_dwarf_conv_check_weak, &cweak);
2470 VERIFY(ret == 0 || ret == 1);
2473 * Nothing was ever found, we're not going to add anything for this
2474 * entry.
2476 if (ret == 0 && cweak.cweak_candidate == B_FALSE) {
2477 ctf_dprintf("found no weak match for %s\n", name);
2478 return (0);
2482 * Now, finally go and add the type based on the match.
2484 if (type == STT_OBJECT) {
2485 ret = ctf_dwarf_duplicate_sym(cdp, idx, cweak.cweak_idx);
2486 } else {
2487 ret = ctf_dwarf_duplicate_func(cdp, idx, cweak.cweak_idx);
2490 return (ret);
2493 static int
2494 ctf_dwarf_conv_weaks(ctf_die_t *cdp)
2496 return (ctf_dwarf_symtab_iter(cdp, ctf_dwarf_conv_weaks_cb, NULL));
2499 /* ARGSUSED */
2500 static int
2501 ctf_dwarf_convert_one(void *arg, void *unused)
2503 int ret;
2504 ctf_file_t *dedup;
2505 ctf_die_t *cdp = arg;
2507 ctf_dprintf("converting die: %s\n", cdp->cd_name);
2508 ctf_dprintf("max offset: %x\n", cdp->cd_maxoff);
2509 VERIFY(cdp != NULL);
2511 ret = ctf_dwarf_convert_die(cdp, cdp->cd_cu);
2512 ctf_dprintf("ctf_dwarf_convert_die (%s) returned %d\n", cdp->cd_name,
2513 ret);
2514 if (ret != 0) {
2515 return (ret);
2517 if (ctf_update(cdp->cd_ctfp) != 0) {
2518 return (ctf_dwarf_error(cdp, cdp->cd_ctfp, 0,
2519 "failed to update output ctf container"));
2522 ret = ctf_dwarf_fixup_die(cdp, B_FALSE);
2523 ctf_dprintf("ctf_dwarf_fixup_die (%s) returned %d\n", cdp->cd_name,
2524 ret);
2525 if (ret != 0) {
2526 return (ret);
2528 if (ctf_update(cdp->cd_ctfp) != 0) {
2529 return (ctf_dwarf_error(cdp, cdp->cd_ctfp, 0,
2530 "failed to update output ctf container"));
2533 ret = ctf_dwarf_fixup_die(cdp, B_TRUE);
2534 ctf_dprintf("ctf_dwarf_fixup_die (%s) returned %d\n", cdp->cd_name,
2535 ret);
2536 if (ret != 0) {
2537 return (ret);
2539 if (ctf_update(cdp->cd_ctfp) != 0) {
2540 return (ctf_dwarf_error(cdp, cdp->cd_ctfp, 0,
2541 "failed to update output ctf container"));
2545 if ((ret = ctf_dwarf_conv_funcvars(cdp)) != 0) {
2546 return (ctf_dwarf_error(cdp, NULL, ret,
2547 "failed to convert strong functions and variables"));
2550 if (ctf_update(cdp->cd_ctfp) != 0) {
2551 return (ctf_dwarf_error(cdp, cdp->cd_ctfp, 0,
2552 "failed to update output ctf container"));
2555 if (cdp->cd_doweaks == B_TRUE) {
2556 if ((ret = ctf_dwarf_conv_weaks(cdp)) != 0) {
2557 return (ctf_dwarf_error(cdp, NULL, ret,
2558 "failed to convert weak functions and variables"));
2561 if (ctf_update(cdp->cd_ctfp) != 0) {
2562 return (ctf_dwarf_error(cdp, cdp->cd_ctfp, 0,
2563 "failed to update output ctf container"));
2567 ctf_phase_dump(cdp->cd_ctfp, "pre-dedup");
2568 ctf_dprintf("adding inputs for dedup\n");
2569 if ((ret = ctf_merge_add(cdp->cd_cmh, cdp->cd_ctfp)) != 0) {
2570 return (ctf_dwarf_error(cdp, NULL, ret,
2571 "failed to add inputs for merge"));
2574 ctf_dprintf("starting merge\n");
2575 if ((ret = ctf_merge_dedup(cdp->cd_cmh, &dedup)) != 0) {
2576 return (ctf_dwarf_error(cdp, NULL, ret,
2577 "failed to deduplicate die"));
2579 ctf_close(cdp->cd_ctfp);
2580 cdp->cd_ctfp = dedup;
2582 return (0);
2586 * Note, we expect that if we're returning a ctf_file_t from one of the dies,
2587 * say in the single node case, it's been saved and the entry here has been set
2588 * to NULL, which ctf_close happily ignores.
2590 static void
2591 ctf_dwarf_free_die(ctf_die_t *cdp)
2593 ctf_dwfunc_t *cdf, *ndf;
2594 ctf_dwvar_t *cdv, *ndv;
2595 ctf_dwbitf_t *cdb, *ndb;
2596 ctf_dwmap_t *map;
2597 void *cookie;
2598 Dwarf_Error derr;
2600 ctf_dprintf("Beginning to free die: %p\n", cdp);
2601 cdp->cd_elf = NULL;
2602 ctf_dprintf("Trying to free name: %p\n", cdp->cd_name);
2603 if (cdp->cd_name != NULL)
2604 ctf_free(cdp->cd_name, strlen(cdp->cd_name) + 1);
2605 ctf_dprintf("Trying to free merge handle: %p\n", cdp->cd_cmh);
2606 if (cdp->cd_cmh != NULL) {
2607 ctf_merge_fini(cdp->cd_cmh);
2608 cdp->cd_cmh = NULL;
2611 ctf_dprintf("Trying to free functions\n");
2612 for (cdf = ctf_list_next(&cdp->cd_funcs); cdf != NULL; cdf = ndf) {
2613 ndf = ctf_list_next(cdf);
2614 ctf_free(cdf->cdf_name, strlen(cdf->cdf_name) + 1);
2615 if (cdf->cdf_fip.ctc_argc != 0) {
2616 ctf_free(cdf->cdf_argv,
2617 sizeof (ctf_id_t) * cdf->cdf_fip.ctc_argc);
2619 ctf_free(cdf, sizeof (ctf_dwfunc_t));
2622 ctf_dprintf("Trying to free variables\n");
2623 for (cdv = ctf_list_next(&cdp->cd_vars); cdv != NULL; cdv = ndv) {
2624 ndv = ctf_list_next(cdv);
2625 ctf_free(cdv->cdv_name, strlen(cdv->cdv_name) + 1);
2626 ctf_free(cdv, sizeof (ctf_dwvar_t));
2629 ctf_dprintf("Trying to free bitfields\n");
2630 for (cdb = ctf_list_next(&cdp->cd_bitfields); cdb != NULL; cdb = ndb) {
2631 ndb = ctf_list_next(cdb);
2632 ctf_free(cdb, sizeof (ctf_dwbitf_t));
2635 /* How do we clean up die usage? */
2636 ctf_dprintf("Trying to clean up dwarf_t: %p\n", cdp->cd_dwarf);
2637 (void) dwarf_finish(cdp->cd_dwarf, &derr);
2638 cdp->cd_dwarf = NULL;
2639 ctf_close(cdp->cd_ctfp);
2641 cookie = NULL;
2642 while ((map = avl_destroy_nodes(&cdp->cd_map, &cookie)) != NULL) {
2643 ctf_free(map, sizeof (ctf_dwmap_t));
2645 avl_destroy(&cdp->cd_map);
2646 cdp->cd_errbuf = NULL;
2649 static void
2650 ctf_dwarf_free_dies(ctf_die_t *cdies, int ndies)
2652 int i;
2654 ctf_dprintf("Beginning to free dies\n");
2655 for (i = 0; i < ndies; i++) {
2656 ctf_dwarf_free_die(&cdies[i]);
2659 ctf_free(cdies, sizeof (ctf_die_t) * ndies);
2662 static int
2663 ctf_dwarf_count_dies(Dwarf_Debug dw, Dwarf_Error *derr, int *ndies,
2664 char *errbuf, size_t errlen)
2666 int ret;
2667 Dwarf_Half vers;
2668 Dwarf_Unsigned nexthdr;
2670 while ((ret = dwarf_next_cu_header(dw, NULL, &vers, NULL, NULL,
2671 &nexthdr, derr)) != DW_DLV_NO_ENTRY) {
2672 if (ret != DW_DLV_OK) {
2673 (void) snprintf(errbuf, errlen,
2674 "file does not contain valid DWARF data: %s\n",
2675 dwarf_errmsg(*derr));
2676 return (ECTF_CONVBKERR);
2679 if (vers != DWARF_VERSION_TWO) {
2680 (void) snprintf(errbuf, errlen,
2681 "unsupported DWARF version: %d\n", vers);
2682 return (ECTF_CONVBKERR);
2684 *ndies = *ndies + 1;
2687 if (*ndies == 0) {
2688 (void) snprintf(errbuf, errlen,
2689 "file does not contain valid DWARF data: %s\n",
2690 dwarf_errmsg(*derr));
2691 return (ECTF_CONVBKERR);
2694 return (0);
2698 * Iterate over all of the dies and create a ctf_die_t for each of them. This is
2699 * used to determine if we have zero, one, or multiple dies to convert. If we
2700 * have zero, that's an error. If there's only one die, that's the simple case.
2701 * No merge needed and only a single Dwarf_Debug as well.
2703 static int
2704 ctf_dwarf_init_die(int fd, Elf *elf, ctf_die_t *cdp, int ndie, char *errbuf,
2705 size_t errlen)
2707 int ret;
2708 Dwarf_Unsigned hdrlen, abboff, nexthdr;
2709 Dwarf_Half addrsz;
2710 Dwarf_Unsigned offset = 0;
2711 Dwarf_Error derr;
2713 while ((ret = dwarf_next_cu_header(cdp->cd_dwarf, &hdrlen, NULL,
2714 &abboff, &addrsz, &nexthdr, &derr)) != DW_DLV_NO_ENTRY) {
2715 char *name;
2716 Dwarf_Die cu, child;
2718 /* Based on the counting above, we should be good to go */
2719 VERIFY(ret == DW_DLV_OK);
2720 if (ndie > 0) {
2721 ndie--;
2722 offset = nexthdr;
2723 continue;
2727 * Compilers are apparently inconsistent. Some emit no DWARF for
2728 * empty files and others emit empty compilation unit.
2730 cdp->cd_voidtid = CTF_ERR;
2731 cdp->cd_longtid = CTF_ERR;
2732 cdp->cd_elf = elf;
2733 cdp->cd_maxoff = nexthdr - 1;
2734 cdp->cd_ctfp = ctf_fdcreate(fd, &ret);
2735 if (cdp->cd_ctfp == NULL) {
2736 ctf_free(cdp, sizeof (ctf_die_t));
2737 return (ret);
2739 avl_create(&cdp->cd_map, ctf_dwmap_comp, sizeof (ctf_dwmap_t),
2740 offsetof(ctf_dwmap_t, cdm_avl));
2741 cdp->cd_errbuf = errbuf;
2742 cdp->cd_errlen = errlen;
2743 bzero(&cdp->cd_vars, sizeof (ctf_list_t));
2744 bzero(&cdp->cd_funcs, sizeof (ctf_list_t));
2745 bzero(&cdp->cd_bitfields, sizeof (ctf_list_t));
2747 if ((ret = ctf_dwarf_die_elfenc(elf, cdp, errbuf,
2748 errlen)) != 0) {
2749 avl_destroy(&cdp->cd_map);
2750 ctf_free(cdp, sizeof (ctf_die_t));
2751 return (ret);
2754 if ((ret = ctf_dwarf_sib(cdp, NULL, &cu)) != 0) {
2755 avl_destroy(&cdp->cd_map);
2756 ctf_free(cdp, sizeof (ctf_die_t));
2757 return (ret);
2759 if (cu == NULL) {
2760 (void) snprintf(errbuf, errlen,
2761 "file does not contain DWARF data\n");
2762 avl_destroy(&cdp->cd_map);
2763 ctf_free(cdp, sizeof (ctf_die_t));
2764 return (ECTF_CONVBKERR);
2767 if ((ret = ctf_dwarf_child(cdp, cu, &child)) != 0) {
2768 avl_destroy(&cdp->cd_map);
2769 ctf_free(cdp, sizeof (ctf_die_t));
2770 return (ret);
2772 if (child == NULL) {
2773 (void) snprintf(errbuf, errlen,
2774 "file does not contain DWARF data\n");
2775 avl_destroy(&cdp->cd_map);
2776 ctf_free(cdp, sizeof (ctf_die_t));
2777 return (ECTF_CONVBKERR);
2780 cdp->cd_cuoff = offset;
2781 cdp->cd_cu = child;
2783 if ((cdp->cd_cmh = ctf_merge_init(fd, &ret)) == NULL) {
2784 avl_destroy(&cdp->cd_map);
2785 ctf_free(cdp, sizeof (ctf_die_t));
2786 return (ret);
2789 if (ctf_dwarf_string(cdp, cu, DW_AT_name, &name) == 0) {
2790 size_t len = strlen(name) + 1;
2791 char *b = basename(name);
2792 cdp->cd_name = strdup(b);
2793 ctf_free(name, len);
2795 break;
2798 return (0);
2802 ctf_conv_status_t
2803 ctf_dwarf_convert(int fd, Elf *elf, uint_t nthrs, int *errp, ctf_file_t **fpp,
2804 char *errmsg, size_t errlen)
2806 int err, ret, ndies, i;
2807 Dwarf_Debug dw;
2808 Dwarf_Error derr;
2809 ctf_die_t *cdies = NULL, *cdp;
2810 workq_t *wqp = NULL;
2812 if (errp == NULL)
2813 errp = &err;
2814 *errp = 0;
2815 *fpp = NULL;
2817 ret = dwarf_elf_init(elf, DW_DLC_READ, NULL, NULL, &dw, &derr);
2818 if (ret != DW_DLV_OK) {
2820 * The old CTF tools used to check if we expected DWARF data
2821 * here. In this case, if we actually have some amount of DWARF,
2822 * but no section, for now, just go ahead and create an empty
2823 * CTF file.
2825 if (ret == DW_DLV_NO_ENTRY ||
2826 dwarf_errno(derr) == DW_DLE_DEBUG_INFO_NULL) {
2827 *fpp = ctf_create(errp);
2828 return (*fpp != NULL ? CTF_CONV_SUCCESS :
2829 CTF_CONV_ERROR);
2831 (void) snprintf(errmsg, errlen,
2832 "failed to initialize DWARF: %s\n",
2833 dwarf_errmsg(derr));
2834 *errp = ECTF_CONVBKERR;
2835 return (CTF_CONV_ERROR);
2838 ndies = 0;
2839 ret = ctf_dwarf_count_dies(dw, &derr, &ndies, errmsg, errlen);
2840 if (ret != 0) {
2841 *errp = ret;
2842 goto out;
2845 (void) dwarf_finish(dw, &derr);
2846 cdies = ctf_alloc(sizeof (ctf_die_t) * ndies);
2847 if (cdies == NULL) {
2848 *errp = ENOMEM;
2849 return (CTF_CONV_ERROR);
2852 for (i = 0; i < ndies; i++) {
2853 cdp = &cdies[i];
2854 ret = dwarf_elf_init(elf, DW_DLC_READ, NULL, NULL,
2855 &cdp->cd_dwarf, &derr);
2856 if (ret != 0) {
2857 ctf_free(cdies, sizeof (ctf_die_t) * ndies);
2858 (void) snprintf(errmsg, errlen,
2859 "failed to initialize DWARF: %s\n",
2860 dwarf_errmsg(derr));
2861 *errp = ECTF_CONVBKERR;
2862 return (CTF_CONV_ERROR);
2865 ret = ctf_dwarf_init_die(fd, elf, &cdies[i], i, errmsg, errlen);
2866 if (ret != 0) {
2867 *errp = ret;
2868 goto out;
2870 cdp->cd_doweaks = ndies > 1 ? B_FALSE : B_TRUE;
2873 ctf_dprintf("found %d DWARF die(s)\n", ndies);
2876 * If we only have one die, there's no reason to use multiple threads,
2877 * even if the user requested them. After all, they just gave us an
2878 * upper bound.
2880 if (ndies == 1)
2881 nthrs = 1;
2883 if (workq_init(&wqp, nthrs) == -1) {
2884 *errp = errno;
2885 goto out;
2888 for (i = 0; i < ndies; i++) {
2889 cdp = &cdies[i];
2890 ctf_dprintf("adding die %s: %p, %x %x\n", cdp->cd_name,
2891 cdp->cd_cu, cdp->cd_cuoff, cdp->cd_maxoff);
2892 if (workq_add(wqp, cdp) == -1) {
2893 *errp = errno;
2894 goto out;
2898 ret = workq_work(wqp, ctf_dwarf_convert_one, NULL, errp);
2899 if (ret == WORKQ_ERROR) {
2900 *errp = errno;
2901 goto out;
2902 } else if (ret == WORKQ_UERROR) {
2903 ctf_dprintf("internal convert failed: %s\n",
2904 ctf_errmsg(*errp));
2905 goto out;
2908 ctf_dprintf("Determining next phase: have %d dies\n", ndies);
2909 if (ndies != 1) {
2910 ctf_merge_t *cmp;
2912 cmp = ctf_merge_init(fd, &ret);
2913 if (cmp == NULL) {
2914 *errp = ret;
2915 goto out;
2918 ctf_dprintf("setting threads\n");
2919 if ((ret = ctf_merge_set_nthreads(cmp, nthrs)) != 0) {
2920 ctf_merge_fini(cmp);
2921 *errp = ret;
2922 goto out;
2925 ctf_dprintf("adding dies\n");
2926 for (i = 0; i < ndies; i++) {
2927 cdp = &cdies[i];
2928 if ((ret = ctf_merge_add(cmp, cdp->cd_ctfp)) != 0) {
2929 ctf_merge_fini(cmp);
2930 *errp = ret;
2931 goto out;
2935 ctf_dprintf("performing merge\n");
2936 ret = ctf_merge_merge(cmp, fpp);
2937 if (ret != 0) {
2938 ctf_dprintf("failed merge!\n");
2939 *fpp = NULL;
2940 ctf_merge_fini(cmp);
2941 *errp = ret;
2942 goto out;
2944 ctf_merge_fini(cmp);
2945 *errp = 0;
2946 ctf_dprintf("successfully converted!\n");
2947 } else {
2948 *errp = 0;
2949 *fpp = cdies->cd_ctfp;
2950 cdies->cd_ctfp = NULL;
2951 ctf_dprintf("successfully converted!\n");
2954 out:
2955 workq_fini(wqp);
2956 ctf_dwarf_free_dies(cdies, ndies);
2957 return (*fpp != NULL ? CTF_CONV_SUCCESS : CTF_CONV_ERROR);