Bug 439685 compiler warning in callgrind/main.c
[valgrind.git] / coregrind / m_debuginfo / readmacho.c
blob33cc037b576b2d073ca0e8a1f6467ce01383832e
2 /*--------------------------------------------------------------------*/
3 /*--- Reading of syms & debug info from Mach-O files. ---*/
4 /*--- readmacho.c ---*/
5 /*--------------------------------------------------------------------*/
7 /*
8 This file is part of Valgrind, a dynamic binary instrumentation
9 framework.
11 Copyright (C) 2005-2017 Apple Inc.
12 Greg Parker gparker@apple.com
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, see <http://www.gnu.org/licenses/>.
27 The GNU General Public License is contained in the file COPYING.
30 #if defined(VGO_darwin)
32 #include "pub_core_basics.h"
33 #include "pub_core_vki.h"
34 #include "pub_core_libcbase.h"
35 #include "pub_core_libcprint.h"
36 #include "pub_core_libcassert.h"
37 #include "pub_core_libcfile.h"
38 #include "pub_core_libcproc.h"
39 #include "pub_core_aspacemgr.h" /* for mmaping debuginfo files */
40 #include "pub_core_machine.h" /* VG_ELF_CLASS */
41 #include "pub_core_options.h"
42 #include "pub_core_oset.h"
43 #include "pub_core_tooliface.h" /* VG_(needs) */
44 #include "pub_core_xarray.h"
45 #include "pub_core_clientstate.h"
46 #include "pub_core_debuginfo.h"
48 #include "priv_misc.h"
49 #include "priv_image.h"
50 #include "priv_d3basics.h"
51 #include "priv_tytypes.h"
52 #include "priv_storage.h"
53 #include "priv_readmacho.h"
54 #include "priv_readdwarf.h"
55 #include "priv_readdwarf3.h"
57 /* --- !!! --- EXTERNAL HEADERS start --- !!! --- */
58 #include <mach-o/loader.h>
59 #include <mach-o/nlist.h>
60 #include <mach-o/fat.h>
61 /* --- !!! --- EXTERNAL HEADERS end --- !!! --- */
63 #if VG_WORDSIZE == 4
64 # define MAGIC MH_MAGIC
65 # define MACH_HEADER mach_header
66 # define LC_SEGMENT_CMD LC_SEGMENT
67 # define SEGMENT_COMMAND segment_command
68 # define SECTION section
69 # define NLIST nlist
70 #else
71 # define MAGIC MH_MAGIC_64
72 # define MACH_HEADER mach_header_64
73 # define LC_SEGMENT_CMD LC_SEGMENT_64
74 # define SEGMENT_COMMAND segment_command_64
75 # define SECTION section_64
76 # define NLIST nlist_64
77 #endif
80 /*------------------------------------------------------------*/
81 /*--- ---*/
82 /*--- Mach-O file mapping/unmapping helpers ---*/
83 /*--- ---*/
84 /*------------------------------------------------------------*/
86 /* A DiSlice is used to handle the thin/fat distinction for MachO images.
87 (1) the entire mapped-in ("primary") image, fat headers, kitchen sink,
88 whatnot: the entire file. This is the DiImage* that is the backing
89 for the DiSlice.
90 (2) the Mach-O object of interest, which is presumably somewhere inside
91 the primary image. map_image_aboard() below, which generates this
92 info, will carefully check that the macho_ fields denote a section of
93 memory that falls entirely inside the primary image.
96 Bool ML_(is_macho_object_file)( const void* buf, SizeT szB )
98 /* (JRS: the Mach-O headers might not be in this mapped data,
99 because we only mapped a page for this initial check,
100 or at least not very much, and what's at the start of the file
101 is in general a so-called fat header. The Mach-O object we're
102 interested in could be arbitrarily far along the image, and so
103 we can't assume its header will fall within this page.) */
105 /* But we can say that either it's a fat object, in which case it
106 begins with a fat header, or it's unadorned Mach-O, in which
107 case it starts with a normal header. At least do what checks we
108 can to establish whether or not we're looking at something
109 sane. */
111 const struct fat_header* fh_be = buf;
112 const struct MACH_HEADER* mh = buf;
114 vg_assert(buf);
115 if (szB < sizeof(struct fat_header))
116 return False;
117 if (VG_(ntohl)(fh_be->magic) == FAT_MAGIC)
118 return True;
120 if (szB < sizeof(struct MACH_HEADER))
121 return False;
122 if (mh->magic == MAGIC)
123 return True;
125 return False;
129 /* Unmap an image mapped in by map_image_aboard. */
130 static void unmap_image ( /*MOD*/DiSlice* sli )
132 vg_assert(sli);
133 if (ML_(sli_is_valid)(*sli)) {
134 ML_(img_done)(sli->img);
135 *sli = DiSlice_INVALID;
140 /* Open the given file, find the thin part if necessary, do some
141 checks, and return a DiSlice containing details of both the thin
142 part and (implicitly, via the contained DiImage*) the fat part.
143 returns DiSlice_INVALID if it fails. If it succeeds, the returned
144 slice is guaranteed to refer to a valid(ish) Mach-O image. */
145 static DiSlice map_image_aboard ( DebugInfo* di, /* only for err msgs */
146 const HChar* filename )
148 DiSlice sli = DiSlice_INVALID;
150 /* First off, try to map the thing in. */
151 DiImage* mimg = ML_(img_from_local_file)(filename);
152 if (mimg == NULL) {
153 VG_(message)(Vg_UserMsg, "warning: connection to image %s failed\n",
154 filename );
155 VG_(message)(Vg_UserMsg, " no symbols or debug info loaded\n" );
156 return DiSlice_INVALID;
159 /* Now we have a viable DiImage* for it. Look for the embedded
160 Mach-O object. If not findable, close the image and fail. */
161 DiOffT fh_be_ioff = 0;
162 struct fat_header fh_be;
163 struct fat_header fh;
165 // Assume initially that we have a thin image, and narrow
166 // the bounds if it turns out to be fat. This stores |mimg| as
167 // |sli.img|, so NULL out |mimg| after this point, for the sake of
168 // clarity.
169 sli = ML_(sli_from_img)(mimg);
170 mimg = NULL;
172 // Check for fat header.
173 if (ML_(img_size)(sli.img) < sizeof(struct fat_header)) {
174 ML_(symerr)(di, True, "Invalid Mach-O file (0 too small).");
175 goto close_and_fail;
178 // Fat header is always BIG-ENDIAN
179 ML_(img_get)(&fh_be, sli.img, fh_be_ioff, sizeof(fh_be));
180 VG_(memset)(&fh, 0, sizeof(fh));
181 fh.magic = VG_(ntohl)(fh_be.magic);
182 fh.nfat_arch = VG_(ntohl)(fh_be.nfat_arch);
183 if (fh.magic == FAT_MAGIC) {
184 // Look for a good architecture.
185 if (ML_(img_size)(sli.img) < sizeof(struct fat_header)
186 + fh.nfat_arch * sizeof(struct fat_arch)) {
187 ML_(symerr)(di, True, "Invalid Mach-O file (1 too small).");
188 goto close_and_fail;
190 DiOffT arch_be_ioff;
191 Int f;
192 for (f = 0, arch_be_ioff = sizeof(struct fat_header);
193 f < fh.nfat_arch;
194 f++, arch_be_ioff += sizeof(struct fat_arch)) {
195 # if defined(VGA_ppc)
196 Int cputype = CPU_TYPE_POWERPC;
197 # elif defined(VGA_ppc64be)
198 Int cputype = CPU_TYPE_POWERPC64BE;
199 # elif defined(VGA_ppc64le)
200 Int cputype = CPU_TYPE_POWERPC64LE;
201 # elif defined(VGA_x86)
202 Int cputype = CPU_TYPE_X86;
203 # elif defined(VGA_amd64)
204 Int cputype = CPU_TYPE_X86_64;
205 # else
206 # error "unknown architecture"
207 # endif
208 struct fat_arch arch_be;
209 struct fat_arch arch;
210 ML_(img_get)(&arch_be, sli.img, arch_be_ioff, sizeof(arch_be));
211 VG_(memset)(&arch, 0, sizeof(arch));
212 arch.cputype = VG_(ntohl)(arch_be.cputype);
213 arch.cpusubtype = VG_(ntohl)(arch_be.cpusubtype);
214 arch.offset = VG_(ntohl)(arch_be.offset);
215 arch.size = VG_(ntohl)(arch_be.size);
216 if (arch.cputype == cputype) {
217 if (ML_(img_size)(sli.img) < arch.offset + arch.size) {
218 ML_(symerr)(di, True, "Invalid Mach-O file (2 too small).");
219 goto close_and_fail;
221 /* Found a suitable arch. Narrow down the slice accordingly. */
222 sli.ioff = arch.offset;
223 sli.szB = arch.size;
224 break;
227 if (f == fh.nfat_arch) {
228 ML_(symerr)(di, True,
229 "No acceptable architecture found in fat file.");
230 goto close_and_fail;
234 /* Sanity check what we found. */
236 /* assured by logic above */
237 vg_assert(ML_(img_size)(sli.img) >= sizeof(struct fat_header));
239 if (sli.szB < sizeof(struct MACH_HEADER)) {
240 ML_(symerr)(di, True, "Invalid Mach-O file (3 too small).");
241 goto close_and_fail;
244 if (sli.szB > ML_(img_size)(sli.img)) {
245 ML_(symerr)(di, True, "Invalid Mach-O file (thin bigger than fat).");
246 goto close_and_fail;
249 if (sli.ioff >= 0 && sli.ioff + sli.szB <= ML_(img_size)(sli.img)) {
250 /* thin entirely within fat, as expected */
251 } else {
252 ML_(symerr)(di, True, "Invalid Mach-O file (thin not inside fat).");
253 goto close_and_fail;
256 /* Peer at the Mach header for the thin object, starting at the
257 beginning of the slice, to check it's at least marginally
258 sane. */
259 struct MACH_HEADER mh;
260 ML_(cur_read_get)(&mh, ML_(cur_from_sli)(sli), sizeof(mh));
261 if (mh.magic != MAGIC) {
262 ML_(symerr)(di, True, "Invalid Mach-O file (bad magic).");
263 goto close_and_fail;
266 if (sli.szB < sizeof(struct MACH_HEADER) + mh.sizeofcmds) {
267 ML_(symerr)(di, True, "Invalid Mach-O file (4 too small).");
268 goto close_and_fail;
271 /* "main image is plausible" */
272 vg_assert(sli.img);
273 vg_assert(ML_(img_size)(sli.img) > 0);
274 /* "thin image exists and is a sub-part (or all) of main image" */
275 vg_assert(sli.ioff >= 0);
276 vg_assert(sli.szB > 0);
277 vg_assert(sli.ioff + sli.szB <= ML_(img_size)(sli.img));
278 return sli; /* success */
279 /*NOTREACHED*/
281 close_and_fail:
282 unmap_image(&sli);
283 return DiSlice_INVALID; /* bah! */
287 /*------------------------------------------------------------*/
288 /*--- ---*/
289 /*--- Mach-O symbol table reading ---*/
290 /*--- ---*/
291 /*------------------------------------------------------------*/
293 /* Read a symbol table (nlist). Add the resulting candidate symbols
294 to 'syms'; the caller will post-process them and hand them off to
295 ML_(addSym) itself. */
296 static
297 void read_symtab( /*OUT*/XArray* /* DiSym */ syms,
298 struct _DebugInfo* di,
299 DiCursor symtab_cur, UInt symtab_count,
300 DiCursor strtab_cur, UInt strtab_sz )
302 Int i;
303 DiSym disym;
305 // "start_according_to_valgrind"
306 static const HChar* s_a_t_v = NULL; /* do not make non-static */
308 for (i = 0; i < symtab_count; i++) {
309 struct NLIST nl;
310 ML_(cur_read_get)(&nl,
311 ML_(cur_plus)(symtab_cur, i * sizeof(struct NLIST)),
312 sizeof(nl));
314 Addr sym_addr = 0;
315 if ((nl.n_type & N_TYPE) == N_SECT) {
316 sym_addr = di->text_bias + nl.n_value;
317 /*} else if ((nl.n_type & N_TYPE) == N_ABS) {
318 GrP fixme don't ignore absolute symbols?
319 sym_addr = nl.n_value; */
320 } else {
321 continue;
324 if (di->trace_symtab) {
325 HChar* str = ML_(cur_read_strdup)(
326 ML_(cur_plus)(strtab_cur, nl.n_un.n_strx),
327 "di.read_symtab.1");
328 VG_(printf)("nlist raw: avma %010lx %s\n", sym_addr, str );
329 ML_(dinfo_free)(str);
332 /* If no part of the symbol falls within the mapped range,
333 ignore it. */
334 if (sym_addr <= di->text_avma
335 || sym_addr >= di->text_avma+di->text_size) {
336 continue;
339 /* skip names which point outside the string table;
340 following these risks segfaulting Valgrind */
341 if (nl.n_un.n_strx < 0 || nl.n_un.n_strx >= strtab_sz) {
342 continue;
345 HChar* name
346 = ML_(cur_read_strdup)( ML_(cur_plus)(strtab_cur, nl.n_un.n_strx),
347 "di.read_symtab.2");
349 /* skip nameless symbols; these appear to be common, but
350 useless */
351 if (*name == 0) {
352 ML_(dinfo_free)(name);
353 continue;
356 VG_(bzero_inline)(&disym, sizeof(disym));
357 disym.avmas.main = sym_addr;
358 SET_TOCPTR_AVMA(disym, 0);
359 SET_LOCAL_EP_AVMA(disym, 0);
360 disym.pri_name = ML_(addStr)(di, name, -1);
361 disym.sec_names = NULL;
362 disym.size = // let canonicalize fix it
363 di->text_avma+di->text_size - sym_addr;
364 disym.isText = True;
365 disym.isIFunc = False;
366 disym.isGlobal = False;
367 // Lots of user function names get prepended with an underscore. Eg. the
368 // function 'f' becomes the symbol '_f'. And the "below main"
369 // function is called "start". So we skip the leading underscore, and
370 // if we see 'start' and --show-below-main=no, we rename it as
371 // "start_according_to_valgrind", which makes it easy to spot later
372 // and display as "(below main)".
373 if (disym.pri_name[0] == '_') {
374 disym.pri_name++;
376 else if (!VG_(clo_show_below_main) && VG_STREQ(disym.pri_name, "start")) {
377 if (s_a_t_v == NULL)
378 s_a_t_v = ML_(addStr)(di, "start_according_to_valgrind", -1);
379 vg_assert(s_a_t_v);
380 disym.pri_name = s_a_t_v;
383 vg_assert(disym.pri_name);
384 VG_(addToXA)( syms, &disym );
385 ML_(dinfo_free)(name);
390 /* Compare DiSyms by their start address, and for equal addresses, use
391 the primary name as a secondary sort key. */
392 static Int cmp_DiSym_by_start_then_name ( const void* v1, const void* v2 )
394 const DiSym* s1 = (const DiSym*)v1;
395 const DiSym* s2 = (const DiSym*)v2;
396 if (s1->avmas.main < s2->avmas.main) return -1;
397 if (s1->avmas.main > s2->avmas.main) return 1;
398 return VG_(strcmp)(s1->pri_name, s2->pri_name);
401 /* 'cand' is a bunch of candidate symbols obtained by reading
402 nlist-style symbol table entries. Their ends may overlap, so sort
403 them and truncate them accordingly. The code in this routine is
404 copied almost verbatim from read_symbol_table() in readxcoff.c. */
405 static void tidy_up_cand_syms ( /*MOD*/XArray* /* of DiSym */ syms,
406 Bool trace_symtab )
408 Word nsyms, i, j, k, m;
410 nsyms = VG_(sizeXA)(syms);
412 VG_(setCmpFnXA)(syms, cmp_DiSym_by_start_then_name);
413 VG_(sortXA)(syms);
415 /* We only know for sure the start addresses (actual VMAs) of
416 symbols, and an overestimation of their end addresses. So sort
417 by start address, then clip each symbol so that its end address
418 does not overlap with the next one along.
420 There is a small refinement: if a group of symbols have the same
421 address, treat them as a group: find the next symbol along that
422 has a higher start address, and clip all of the group
423 accordingly. This clips the group as a whole so as not to
424 overlap following symbols. This leaves prefersym() in
425 storage.c, which is not nlist-specific, to later decide which of
426 the symbols in the group to keep.
428 Another refinement is that we need to get rid of symbols which,
429 after clipping, have identical starts, ends, and names. So the
430 sorting uses the name as a secondary key.
433 for (i = 0; i < nsyms; i++) {
434 for (k = i+1;
435 k < nsyms
436 && ((DiSym*)VG_(indexXA)(syms,i))->avmas.main
437 == ((DiSym*)VG_(indexXA)(syms,k))->avmas.main;
438 k++)
440 /* So now [i .. k-1] is a group all with the same start address.
441 Clip their ending addresses so they don't overlap [k]. In
442 the normal case (no overlaps), k == i+1. */
443 if (k < nsyms) {
444 DiSym* next = (DiSym*)VG_(indexXA)(syms,k);
445 for (m = i; m < k; m++) {
446 DiSym* here = (DiSym*)VG_(indexXA)(syms,m);
447 vg_assert(here->avmas.main < next->avmas.main);
448 if (here->avmas.main + here->size > next->avmas.main)
449 here->size = next->avmas.main - here->avmas.main;
452 i = k-1;
453 vg_assert(i <= nsyms);
456 j = 0;
457 if (nsyms > 0) {
458 j = 1;
459 for (i = 1; i < nsyms; i++) {
460 DiSym *s_j1, *s_j, *s_i;
461 vg_assert(j <= i);
462 s_j1 = (DiSym*)VG_(indexXA)(syms, j-1);
463 s_j = (DiSym*)VG_(indexXA)(syms, j);
464 s_i = (DiSym*)VG_(indexXA)(syms, i);
465 if (s_i->avmas.main != s_j1->avmas.main
466 || s_i->size != s_j1->size
467 || 0 != VG_(strcmp)(s_i->pri_name, s_j1->pri_name)) {
468 *s_j = *s_i;
469 j++;
470 } else {
471 if (trace_symtab)
472 VG_(printf)("nlist cleanup: dump duplicate avma %010lx %s\n",
473 s_i->avmas.main, s_i->pri_name );
477 vg_assert(j >= 0 && j <= nsyms);
478 VG_(dropTailXA)(syms, nsyms - j);
482 /*------------------------------------------------------------*/
483 /*--- ---*/
484 /*--- Mach-O top-level processing ---*/
485 /*--- ---*/
486 /*------------------------------------------------------------*/
488 #if !defined(APPLE_DSYM_EXT_AND_SUBDIRECTORY)
489 #define APPLE_DSYM_EXT_AND_SUBDIRECTORY ".dSYM/Contents/Resources/DWARF/"
490 #endif
493 static Bool file_exists_p(const HChar *path)
495 struct vg_stat sbuf;
496 SysRes res = VG_(stat)(path, &sbuf);
497 return sr_isError(res) ? False : True;
501 /* Search for an existing dSYM file as a possible separate debug file.
502 Adapted from gdb. */
503 static HChar *
504 find_separate_debug_file (const HChar *executable_name)
506 const HChar *basename_str;
507 HChar *dot_ptr;
508 HChar *slash_ptr;
509 HChar *dsymfile;
511 /* Make sure the object file name itself doesn't contain ".dSYM" in it or we
512 will end up with an infinite loop where after we add a dSYM symbol file,
513 it will then enter this function asking if there is a debug file for the
514 dSYM file itself. */
515 if (VG_(strcasestr) (executable_name, ".dSYM") == NULL)
517 /* Check for the existence of a .dSYM file for a given executable. */
518 basename_str = VG_(basename) (executable_name);
519 dsymfile = ML_(dinfo_zalloc)("di.readmacho.dsymfile",
520 VG_(strlen) (executable_name)
521 + VG_(strlen) (APPLE_DSYM_EXT_AND_SUBDIRECTORY)
522 + VG_(strlen) (basename_str)
526 /* First try for the dSYM in the same directory as the original file. */
527 VG_(strcpy) (dsymfile, executable_name);
528 VG_(strcat) (dsymfile, APPLE_DSYM_EXT_AND_SUBDIRECTORY);
529 VG_(strcat) (dsymfile, basename_str);
531 if (file_exists_p (dsymfile))
532 return dsymfile;
534 /* Now search for any parent directory that has a '.' in it so we can find
535 Mac OS X applications, bundles, plugins, and any other kinds of files.
536 Mac OS X application bundles wil have their program in
537 "/some/path/MyApp.app/Contents/MacOS/MyApp" (or replace ".app" with
538 ".bundle" or ".plugin" for other types of bundles). So we look for any
539 prior '.' character and try appending the apple dSYM extension and
540 subdirectory and see if we find an existing dSYM file (in the above
541 MyApp example the dSYM would be at either:
542 "/some/path/MyApp.app.dSYM/Contents/Resources/DWARF/MyApp" or
543 "/some/path/MyApp.dSYM/Contents/Resources/DWARF/MyApp". */
544 VG_(strcpy) (dsymfile, VG_(dirname) (executable_name));
545 while ((dot_ptr = VG_(strrchr) (dsymfile, '.')))
547 /* Find the directory delimiter that follows the '.' character since
548 we now look for a .dSYM that follows any bundle extension. */
549 slash_ptr = VG_(strchr) (dot_ptr, '/');
550 if (slash_ptr)
552 /* NULL terminate the string at the '/' character and append
553 the path down to the dSYM file. */
554 *slash_ptr = '\0';
555 VG_(strcat) (slash_ptr, APPLE_DSYM_EXT_AND_SUBDIRECTORY);
556 VG_(strcat) (slash_ptr, basename_str);
557 if (file_exists_p (dsymfile))
558 return dsymfile;
561 /* NULL terminate the string at the '.' character and append
562 the path down to the dSYM file. */
563 *dot_ptr = '\0';
564 VG_(strcat) (dot_ptr, APPLE_DSYM_EXT_AND_SUBDIRECTORY);
565 VG_(strcat) (dot_ptr, basename_str);
566 if (file_exists_p (dsymfile))
567 return dsymfile;
569 /* NULL terminate the string at the '.' locatated by the strrchr()
570 function again. */
571 *dot_ptr = '\0';
573 /* We found a previous extension '.' character and did not find a
574 dSYM file so now find previous directory delimiter so we don't
575 try multiple times on a file name that may have a version number
576 in it such as "/some/path/MyApp.6.0.4.app". */
577 slash_ptr = VG_(strrchr) (dsymfile, '/');
578 if (!slash_ptr)
579 break;
580 /* NULL terminate the string at the previous directory character
581 and search again. */
582 *slash_ptr = '\0';
586 return NULL;
590 /* Given a DiSlice covering the entire Mach-O thin image, find the
591 DiSlice for the specified (segname, sectname) pairing, if
592 possible. Also return the section's .addr field in *svma if
593 svma is non-NULL. */
594 static DiSlice getsectdata ( DiSlice img,
595 const HChar *segname, const HChar *sectname,
596 /*OUT*/Addr* svma )
598 DiCursor cur = ML_(cur_from_sli)(img);
600 struct MACH_HEADER mh;
601 ML_(cur_step_get)(&mh, &cur, sizeof(mh));
603 Int c;
604 for (c = 0; c < mh.ncmds; c++) {
605 struct load_command cmd;
606 ML_(cur_read_get)(&cmd, cur, sizeof(cmd));
607 if (cmd.cmd == LC_SEGMENT_CMD) {
608 struct SEGMENT_COMMAND seg;
609 ML_(cur_read_get)(&seg, cur, sizeof(seg));
610 if (0 == VG_(strncmp)(&seg.segname[0],
611 segname, sizeof(seg.segname))) {
612 DiCursor sects_cur = ML_(cur_plus)(cur, sizeof(seg));
613 Int s;
614 for (s = 0; s < seg.nsects; s++) {
615 struct SECTION sect;
616 ML_(cur_step_get)(&sect, &sects_cur, sizeof(sect));
617 if (0 == VG_(strncmp)(sect.sectname, sectname,
618 sizeof(sect.sectname))) {
619 DiSlice res = img;
620 res.ioff = sect.offset;
621 res.szB = sect.size;
622 if (svma) *svma = (Addr)sect.addr;
623 return res;
629 cur = ML_(cur_plus)(cur, cmd.cmdsize);
632 return DiSlice_INVALID;
636 /* Brute force just simply search for uuid[0..15] in |sli| */
637 static Bool check_uuid_matches ( DiSlice sli, UChar* uuid )
639 if (sli.szB < 16)
640 return False;
642 /* Work through the slice in 1 KB chunks. */
643 UChar first = uuid[0];
644 DiOffT min_off = sli.ioff;
645 DiOffT max1_off = sli.ioff + sli.szB;
646 DiOffT curr_off = min_off;
647 vg_assert(min_off < max1_off);
648 while (1) {
649 vg_assert(curr_off >= min_off && curr_off <= max1_off);
650 if (curr_off == max1_off) break;
651 DiOffT avail = max1_off - curr_off;
652 vg_assert(avail > 0 && avail <= max1_off);
653 if (avail > 1024) avail = 1024;
654 UChar buf[1024];
655 SizeT nGot = ML_(img_get_some)(buf, sli.img, curr_off, avail);
656 vg_assert(nGot >= 1 && nGot <= avail);
657 UInt i;
658 /* Scan through the 1K chunk we got, looking for the start char. */
659 for (i = 0; i < (UInt)nGot; i++) {
660 if (LIKELY(buf[i] != first))
661 continue;
662 /* first char matches. See if we can get 16 bytes at this
663 offset, and compare. */
664 if (curr_off + i < max1_off && max1_off - (curr_off + i) >= 16) {
665 UChar buff16[16];
666 ML_(img_get)(&buff16[0], sli.img, curr_off + i, 16);
667 if (0 == VG_(memcmp)(&buff16[0], &uuid[0], 16))
668 return True;
671 curr_off += nGot;
673 return False;
677 /* Heuristic kludge: return True if this looks like an installed
678 standard library; hence we shouldn't consider automagically running
679 dsymutil on it. */
680 static Bool is_systemish_library_name ( const HChar* name )
682 vg_assert(name);
683 if (0 == VG_(strncasecmp)(name, "/usr/", 5)
684 || 0 == VG_(strncasecmp)(name, "/bin/", 5)
685 || 0 == VG_(strncasecmp)(name, "/sbin/", 6)
686 || 0 == VG_(strncasecmp)(name, "/opt/", 5)
687 || 0 == VG_(strncasecmp)(name, "/sw/", 4)
688 || 0 == VG_(strncasecmp)(name, "/System/", 8)
689 || 0 == VG_(strncasecmp)(name, "/Library/", 9)
690 || 0 == VG_(strncasecmp)(name, "/Applications/", 14)) {
691 return True;
692 } else {
693 return False;
698 Bool ML_(read_macho_debug_info)( struct _DebugInfo* di )
700 DiSlice msli = DiSlice_INVALID; // the main image
701 DiSlice dsli = DiSlice_INVALID; // the debuginfo image
702 DiCursor sym_cur = DiCursor_INVALID;
703 DiCursor dysym_cur = DiCursor_INVALID;
704 HChar* dsymfilename = NULL;
705 Bool have_uuid = False;
706 UChar uuid[16];
707 Word i;
708 const DebugInfoMapping* rx_map = NULL;
709 const DebugInfoMapping* rw_map = NULL;
711 /* mmap the object file to look for di->soname and di->text_bias
712 and uuid and nlist */
714 /* This should be ensured by our caller (that we're in the accept
715 state). */
716 vg_assert(di->fsm.have_rx_map);
717 vg_assert(di->fsm.rw_map_count);
719 for (i = 0; i < VG_(sizeXA)(di->fsm.maps); i++) {
720 const DebugInfoMapping* map = VG_(indexXA)(di->fsm.maps, i);
721 if (map->rx && !rx_map)
722 rx_map = map;
723 if (map->rw && !rw_map)
724 rw_map = map;
725 if (rx_map && rw_map)
726 break;
728 vg_assert(rx_map);
729 vg_assert(rw_map);
731 if (VG_(clo_verbosity) > 1)
732 VG_(message)(Vg_DebugMsg,
733 "%s (rx at %#lx, rw at %#lx)\n", di->fsm.filename,
734 rx_map->avma, rw_map->avma );
736 VG_(memset)(&uuid, 0, sizeof(uuid));
738 msli = map_image_aboard( di, di->fsm.filename );
739 if (!ML_(sli_is_valid)(msli)) {
740 ML_(symerr)(di, False, "Connect to main image failed.");
741 goto fail;
744 vg_assert(msli.img != NULL && msli.szB > 0);
746 /* Poke around in the Mach-O header, to find some important
747 stuff. */
748 // Find LC_SYMTAB and LC_DYSYMTAB, if present.
749 // Read di->soname from LC_ID_DYLIB if present,
750 // or from LC_ID_DYLINKER if present,
751 // or use "NONE".
752 // Get di->text_bias (aka slide) based on the corresponding LC_SEGMENT
753 // Get uuid for later dsym search
755 di->text_bias = 0;
758 DiCursor cmd_cur = ML_(cur_from_sli)(msli);
760 struct MACH_HEADER mh;
761 ML_(cur_step_get)(&mh, &cmd_cur, sizeof(mh));
763 /* Now cur_cmd points just after the Mach header, right at the
764 start of the load commands, which is where we need it to start
765 the following loop. */
767 Int c;
768 for (c = 0; c < mh.ncmds; c++) {
769 struct load_command cmd;
770 ML_(cur_read_get)(&cmd, cmd_cur, sizeof(cmd));
772 if (cmd.cmd == LC_SYMTAB) {
773 sym_cur = cmd_cur;
775 else if (cmd.cmd == LC_DYSYMTAB) {
776 dysym_cur = cmd_cur;
778 else if (cmd.cmd == LC_ID_DYLIB && mh.filetype == MH_DYLIB) {
779 // GrP fixme bundle?
780 struct dylib_command dcmd;
781 ML_(cur_read_get)(&dcmd, cmd_cur, sizeof(dcmd));
782 DiCursor dylibname_cur
783 = ML_(cur_plus)(cmd_cur, dcmd.dylib.name.offset);
784 HChar* dylibname
785 = ML_(cur_read_strdup)(dylibname_cur, "di.rmdi.1");
786 HChar* soname = VG_(strrchr)(dylibname, '/');
787 if (!soname) soname = dylibname;
788 else soname++;
789 di->soname = ML_(dinfo_strdup)("di.readmacho.dylibname",
790 soname);
791 ML_(dinfo_free)(dylibname);
793 else if (cmd.cmd==LC_ID_DYLINKER && mh.filetype==MH_DYLINKER) {
794 struct dylinker_command dcmd;
795 ML_(cur_read_get)(&dcmd, cmd_cur, sizeof(dcmd));
796 DiCursor dylinkername_cur
797 = ML_(cur_plus)(cmd_cur, dcmd.name.offset);
798 HChar* dylinkername
799 = ML_(cur_read_strdup)(dylinkername_cur, "di.rmdi.2");
800 HChar* soname = VG_(strrchr)(dylinkername, '/');
801 if (!soname) soname = dylinkername;
802 else soname++;
803 di->soname = ML_(dinfo_strdup)("di.readmacho.dylinkername",
804 soname);
805 ML_(dinfo_free)(dylinkername);
808 // A comment from Julian about why varinfo[35] fail:
810 // My impression is, from comparing the output of otool -l for these
811 // executables with the logic in ML_(read_macho_debug_info),
812 // specifically the part that begins "else if (cmd->cmd ==
813 // LC_SEGMENT_CMD) {", that it's a complete hack which just happens
814 // to work ok for text symbols. In particular, it appears to assume
815 // that in a "struct load_command" of type LC_SEGMENT_CMD, the first
816 // "struct SEGMENT_COMMAND" inside it is going to contain the info we
817 // need. However, otool -l shows, and also the Apple docs state,
818 // that a struct load_command may contain an arbitrary number of
819 // struct SEGMENT_COMMANDs, so I'm not sure why it's OK to merely
820 // snarf the first. But I'm not sure about this.
822 // The "Try for __DATA" block below simply adds acquisition of data
823 // svma/bias values using the same assumption. It also needs
824 // (probably) to deal with bss sections, but I don't understand how
825 // this all ties together really, so it requires further study.
827 // If you can get your head around the relationship between MachO
828 // segments, sections and load commands, this might be relatively
829 // easy to fix properly.
831 // Basically we need to come up with plausible numbers for di->
832 // {text,data,bss}_{avma,svma}, from which the _bias numbers are
833 // then trivially derived. Then I think the debuginfo reader should
834 // work pretty well.
835 else if (cmd.cmd == LC_SEGMENT_CMD) {
836 struct SEGMENT_COMMAND seg;
837 ML_(cur_read_get)(&seg, cmd_cur, sizeof(seg));
838 /* Try for __TEXT */
839 if (!di->text_present
840 && 0 == VG_(strcmp)(&seg.segname[0], "__TEXT")
841 /* DDD: is the next line a kludge? -- JRS */
842 && seg.fileoff == 0 && seg.filesize != 0) {
843 di->text_present = True;
844 di->text_svma = (Addr)seg.vmaddr;
845 di->text_avma = rx_map->avma;
846 di->text_size = seg.vmsize;
847 di->text_bias = di->text_avma - di->text_svma;
848 /* Make the _debug_ values be the same as the
849 svma/bias for the primary object, since there is
850 no secondary (debuginfo) object, but nevertheless
851 downstream biasing of Dwarf3 relies on the
852 _debug_ values. */
853 di->text_debug_svma = di->text_svma;
854 di->text_debug_bias = di->text_bias;
856 /* Try for __DATA */
857 if (!di->data_present
858 && 0 == VG_(strcmp)(&seg.segname[0], "__DATA")
859 /* && DDD:seg->fileoff == 0 */ && seg.filesize != 0) {
860 di->data_present = True;
861 di->data_svma = (Addr)seg.vmaddr;
862 di->data_avma = rw_map->avma;
863 di->data_size = seg.vmsize;
864 di->data_bias = di->data_avma - di->data_svma;
865 di->data_debug_svma = di->data_svma;
866 di->data_debug_bias = di->data_bias;
869 else if (cmd.cmd == LC_UUID) {
870 ML_(cur_read_get)(&uuid, cmd_cur, sizeof(uuid));
871 have_uuid = True;
873 // Move the cursor along
874 cmd_cur = ML_(cur_plus)(cmd_cur, cmd.cmdsize);
878 if (!di->soname) {
879 di->soname = ML_(dinfo_strdup)("di.readmacho.noname", "NONE");
882 if (di->trace_symtab) {
883 VG_(printf)("\n");
884 VG_(printf)("SONAME = %s\n", di->soname);
885 VG_(printf)("\n");
888 /* Now we have the base object to hand. Read symbols from it. */
890 // We already asserted that ..
891 vg_assert(msli.img != NULL && msli.szB > 0);
893 if (ML_(cur_is_valid)(sym_cur) && ML_(cur_is_valid)(dysym_cur)) {
895 struct symtab_command symcmd;
896 struct dysymtab_command dysymcmd;
898 ML_(cur_read_get)(&symcmd, sym_cur, sizeof(symcmd));
899 ML_(cur_read_get)(&dysymcmd, dysym_cur, sizeof(dysymcmd));
901 /* Read nlist symbol table */
902 DiCursor syms = DiCursor_INVALID;
903 DiCursor strs = DiCursor_INVALID;
904 XArray* /* DiSym */ candSyms = NULL;
905 Word nCandSyms;
907 if (msli.szB < symcmd.stroff + symcmd.strsize
908 || msli.szB < symcmd.symoff + symcmd.nsyms
909 * sizeof(struct NLIST)) {
910 ML_(symerr)(di, False, "Invalid Mach-O file (5 too small).");
911 goto fail;
913 if (dysymcmd.ilocalsym + dysymcmd.nlocalsym > symcmd.nsyms
914 || dysymcmd.iextdefsym + dysymcmd.nextdefsym > symcmd.nsyms) {
915 ML_(symerr)(di, False, "Invalid Mach-O file (bad symbol table).");
916 goto fail;
919 syms = ML_(cur_plus)(ML_(cur_from_sli)(msli), symcmd.symoff);
920 strs = ML_(cur_plus)(ML_(cur_from_sli)(msli), symcmd.stroff);
922 if (VG_(clo_verbosity) > 1)
923 VG_(message)(Vg_DebugMsg,
924 " reading syms from primary file (%d %d)\n",
925 dysymcmd.nextdefsym, dysymcmd.nlocalsym );
927 /* Read candidate symbols into 'candSyms', so we can truncate
928 overlapping ends and generally tidy up, before presenting
929 them to ML_(addSym). */
930 candSyms = VG_(newXA)(
931 ML_(dinfo_zalloc), "di.readmacho.candsyms.1",
932 ML_(dinfo_free), sizeof(DiSym)
935 // extern symbols
936 read_symtab(candSyms,
938 ML_(cur_plus)(syms,
939 dysymcmd.iextdefsym * sizeof(struct NLIST)),
940 dysymcmd.nextdefsym, strs, symcmd.strsize);
941 // static and private_extern symbols
942 read_symtab(candSyms,
944 ML_(cur_plus)(syms,
945 dysymcmd.ilocalsym * sizeof(struct NLIST)),
946 dysymcmd.nlocalsym, strs, symcmd.strsize);
948 /* tidy up the cand syms -- trim overlapping ends. May resize
949 candSyms. */
950 tidy_up_cand_syms( candSyms, di->trace_symtab );
952 /* and finally present them to ML_(addSym) */
953 nCandSyms = VG_(sizeXA)( candSyms );
954 for (i = 0; i < nCandSyms; i++) {
955 DiSym* cand = (DiSym*) VG_(indexXA)( candSyms, i );
956 vg_assert(cand->pri_name != NULL);
957 vg_assert(cand->sec_names == NULL);
958 if (di->trace_symtab)
959 VG_(printf)("nlist final: acquire avma %010lx-%010lx %s\n",
960 cand->avmas.main, cand->avmas.main + cand->size - 1,
961 cand->pri_name );
962 ML_(addSym)( di, cand );
964 VG_(deleteXA)( candSyms );
967 /* If there's no UUID in the primary, don't even bother to try and
968 read any DWARF, since we won't be able to verify it matches.
969 Our policy is not to load debug info unless we can verify that
970 it matches the primary. Just declare success at this point.
971 And don't complain to the user, since that would cause us to
972 complain on objects compiled without -g. (Some versions of
973 XCode are observed to omit a UUID entry for object linked(?)
974 without -g. Others don't appear to omit it.) */
975 if (!have_uuid)
976 goto success;
978 /* mmap the dSYM file to look for DWARF debug info. If successful,
979 use the .macho_img and .macho_img_szB in dsli. */
981 dsymfilename = find_separate_debug_file( di->fsm.filename );
983 /* Try to load it. */
984 if (dsymfilename) {
985 Bool valid;
987 if (VG_(clo_verbosity) > 1)
988 VG_(message)(Vg_DebugMsg, " dSYM= %s\n", dsymfilename);
990 dsli = map_image_aboard( di, dsymfilename );
991 if (!ML_(sli_is_valid)(dsli)) {
992 ML_(symerr)(di, False, "Connect to debuginfo image failed "
993 "(first attempt).");
994 goto fail;
997 /* check it has the right uuid. */
998 vg_assert(have_uuid);
999 valid = dsli.img && dsli.szB > 0 && check_uuid_matches( dsli, uuid );
1000 if (valid)
1001 goto read_the_dwarf;
1003 if (VG_(clo_verbosity) > 1)
1004 VG_(message)(Vg_DebugMsg, " dSYM does not have "
1005 "correct UUID (out of date?)\n");
1008 /* There was no dsym file, or it doesn't match. We'll have to try
1009 regenerating it, unless --dsymutil=no, in which case just complain
1010 instead. */
1012 /* If this looks like a lib that we shouldn't run dsymutil on, just
1013 give up. (possible reasons: is system lib, or in /usr etc, or
1014 the dsym dir would not be writable by the user, or we're running
1015 as root) */
1016 vg_assert(di->fsm.filename);
1017 if (is_systemish_library_name(di->fsm.filename))
1018 goto success;
1020 if (!VG_(clo_dsymutil)) {
1021 if (VG_(clo_verbosity) == 1) {
1022 VG_(message)(Vg_DebugMsg, "%s:\n", di->fsm.filename);
1024 if (VG_(clo_verbosity) > 0)
1025 VG_(message)(Vg_DebugMsg, "%sdSYM directory %s; consider using "
1026 "--dsymutil=yes\n",
1027 VG_(clo_verbosity) > 1 ? " " : "",
1028 dsymfilename ? "has wrong UUID" : "is missing");
1029 goto success;
1032 /* Run dsymutil */
1034 { Int r;
1035 const HChar* dsymutil = "/usr/bin/dsymutil ";
1036 HChar* cmd = ML_(dinfo_zalloc)( "di.readmacho.tmp1",
1037 VG_(strlen)(dsymutil)
1038 + VG_(strlen)(di->fsm.filename)
1039 + 32 /* misc */ );
1040 VG_(strcpy)(cmd, dsymutil);
1041 if (0) VG_(strcat)(cmd, "--verbose ");
1042 VG_(strcat)(cmd, "\"");
1043 VG_(strcat)(cmd, di->fsm.filename);
1044 VG_(strcat)(cmd, "\"");
1045 VG_(message)(Vg_DebugMsg, "run: %s\n", cmd);
1046 r = VG_(system)( cmd );
1047 if (r)
1048 VG_(message)(Vg_DebugMsg, "run: %s FAILED\n", dsymutil);
1049 ML_(dinfo_free)(cmd);
1050 dsymfilename = find_separate_debug_file(di->fsm.filename);
1053 /* Try again to load it. */
1054 if (dsymfilename) {
1055 Bool valid;
1057 if (VG_(clo_verbosity) > 1)
1058 VG_(message)(Vg_DebugMsg, " dsyms= %s\n", dsymfilename);
1060 dsli = map_image_aboard( di, dsymfilename );
1061 if (!ML_(sli_is_valid)(dsli)) {
1062 ML_(symerr)(di, False, "Connect to debuginfo image failed "
1063 "(second attempt).");
1064 goto fail;
1067 /* check it has the right uuid. */
1068 vg_assert(have_uuid);
1069 vg_assert(have_uuid);
1070 valid = dsli.img && dsli.szB > 0 && check_uuid_matches( dsli, uuid );
1071 if (!valid) {
1072 if (VG_(clo_verbosity) > 0) {
1073 VG_(message)(Vg_DebugMsg,
1074 "WARNING: did not find expected UUID %02X%02X%02X%02X"
1075 "-%02X%02X-%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X"
1076 " in dSYM dir\n",
1077 (UInt)uuid[0], (UInt)uuid[1], (UInt)uuid[2], (UInt)uuid[3],
1078 (UInt)uuid[4], (UInt)uuid[5], (UInt)uuid[6], (UInt)uuid[7],
1079 (UInt)uuid[8], (UInt)uuid[9], (UInt)uuid[10],
1080 (UInt)uuid[11], (UInt)uuid[12], (UInt)uuid[13],
1081 (UInt)uuid[14], (UInt)uuid[15] );
1082 VG_(message)(Vg_DebugMsg,
1083 "WARNING: for %s\n", di->fsm.filename);
1085 unmap_image( &dsli );
1086 /* unmap_image zeroes out dsli, so it's safe for "fail:" to
1087 re-try unmap_image. */
1088 goto fail;
1092 /* Right. Finally we have our best try at the dwarf image, so go
1093 on to reading stuff out of it. */
1095 read_the_dwarf:
1096 if (ML_(sli_is_valid)(dsli) && dsli.szB > 0) {
1097 // "_mscn" is "mach-o section"
1098 DiSlice debug_info_mscn
1099 = getsectdata(dsli, "__DWARF", "__debug_info", NULL);
1100 DiSlice debug_abbv_mscn
1101 = getsectdata(dsli, "__DWARF", "__debug_abbrev", NULL);
1102 DiSlice debug_line_mscn
1103 = getsectdata(dsli, "__DWARF", "__debug_line", NULL);
1104 DiSlice debug_str_mscn
1105 = getsectdata(dsli, "__DWARF", "__debug_str", NULL);
1106 DiSlice debug_line_str_mscn
1107 = getsectdata(dsli, "__DWARF", "__debug_line_str", NULL);
1108 DiSlice debug_ranges_mscn
1109 = getsectdata(dsli, "__DWARF", "__debug_ranges", NULL);
1110 DiSlice debug_rnglists_mscn
1111 = getsectdata(dsli, "__DWARF", "__debug_rnglists", NULL);
1112 DiSlice debug_loclists_mscn
1113 = getsectdata(dsli, "__DWARF", "__debug_loclists", NULL);
1114 DiSlice debug_loc_mscn
1115 = getsectdata(dsli, "__DWARF", "__debug_loc", NULL);
1116 DiSlice debug_addr_mscn
1117 = getsectdata(dsli, "__DWARF", "__debug_addr", NULL);
1118 DiSlice debug_str_offsets_mscn
1119 = getsectdata(dsli, "__DWARF", "__debug_str_offsets", NULL);
1121 /* It appears (jrs, 2014-oct-19) that section "__eh_frame" in
1122 segment "__TEXT" appears in both the main and dsym files, but
1123 only the main one gives the right results. Since it's in the
1124 __TEXT segment, we calculate the __eh_frame avma using its
1125 svma and the text bias, and that sounds reasonable. */
1126 Addr eh_frame_svma = 0;
1127 DiSlice eh_frame_mscn
1128 = getsectdata(msli, "__TEXT", "__eh_frame", &eh_frame_svma);
1130 if (ML_(sli_is_valid)(eh_frame_mscn)) {
1131 vg_assert(di->text_bias == di->text_debug_bias);
1132 ML_(read_callframe_info_dwarf3)(di, eh_frame_mscn,
1133 eh_frame_svma + di->text_bias,
1134 True/*is_ehframe*/);
1137 if (ML_(sli_is_valid)(debug_info_mscn)) {
1138 if (VG_(clo_verbosity) > 1) {
1139 if (0)
1140 VG_(message)(Vg_DebugMsg,
1141 "Reading dwarf3 for %s (%#lx) from %s"
1142 " (%lld %lld %lld %lld %lld %lld)\n",
1143 di->fsm.filename, di->text_avma, dsymfilename,
1144 debug_info_mscn.szB, debug_abbv_mscn.szB,
1145 debug_line_mscn.szB, debug_str_mscn.szB,
1146 debug_ranges_mscn.szB, debug_loc_mscn.szB
1148 VG_(message)(Vg_DebugMsg,
1149 " reading dwarf3 from dsyms file\n");
1151 /* The old reader: line numbers and unwind info only */
1152 ML_(read_debuginfo_dwarf3) ( di,
1153 debug_info_mscn,
1154 DiSlice_INVALID, /* .debug_types */
1155 debug_abbv_mscn,
1156 debug_line_mscn,
1157 debug_str_mscn,
1158 DiSlice_INVALID, /* ALT .debug_str */
1159 debug_line_str_mscn );
1161 /* The new reader: read the DIEs in .debug_info to acquire
1162 information on variable types and locations or inline info.
1163 But only if the tool asks for it, or the user requests it on
1164 the command line. */
1165 if (VG_(clo_read_var_info) /* the user or tool asked for it */
1166 || VG_(clo_read_inline_info)) {
1167 ML_(new_dwarf3_reader)(
1168 di, debug_info_mscn,
1169 DiSlice_INVALID, /* .debug_types */
1170 debug_abbv_mscn,
1171 debug_line_mscn,
1172 debug_str_mscn,
1173 debug_ranges_mscn,
1174 debug_rnglists_mscn,
1175 debug_loclists_mscn,
1176 debug_loc_mscn,
1177 DiSlice_INVALID, /* ALT .debug_info */
1178 DiSlice_INVALID, /* ALT .debug_abbv */
1179 DiSlice_INVALID, /* ALT .debug_line */
1180 DiSlice_INVALID, /* ALT .debug_str */
1181 debug_line_str_mscn, /* .debug_line_str */
1182 debug_addr_mscn,
1183 debug_str_offsets_mscn
1189 if (dsymfilename) ML_(dinfo_free)(dsymfilename);
1191 success:
1192 unmap_image(&msli);
1193 unmap_image(&dsli);
1194 return True;
1196 /* NOTREACHED */
1198 fail:
1199 ML_(symerr)(di, True, "Error reading Mach-O object.");
1200 unmap_image(&msli);
1201 unmap_image(&dsli);
1202 return False;
1205 #endif // defined(VGO_darwin)
1207 /*--------------------------------------------------------------------*/
1208 /*--- end ---*/
1209 /*--------------------------------------------------------------------*/