FreeBSD: fix reading debuginfo of the tool itself
[valgrind.git] / coregrind / m_debuginfo / priv_storage.h
blob441b379d2ea0926f338371b250f5f6e9c9a0a300
2 /*--------------------------------------------------------------------*/
3 /*--- Format-neutral storage of and querying of info acquired from ---*/
4 /*--- ELF/XCOFF stabs/dwarf1/dwarf2 debug info. ---*/
5 /*--- priv_storage.h ---*/
6 /*--------------------------------------------------------------------*/
8 /*
9 This file is part of Valgrind, a dynamic binary instrumentation
10 framework.
12 Copyright (C) 2000-2017 Julian Seward
13 jseward@acm.org
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, see <http://www.gnu.org/licenses/>.
28 The GNU General Public License is contained in the file COPYING.
31 Stabs reader greatly improved by Nick Nethercote, Apr 02.
32 This module was also extensively hacked on by Jeremy Fitzhardinge
33 and Tom Hughes.
35 /* See comment at top of debuginfo.c for explanation of
36 the _svma / _avma / _image / _bias naming scheme.
38 /* Note this is not freestanding; needs pub_core_xarray.h and
39 priv_tytypes.h to be included before it. */
41 #ifndef __PRIV_STORAGE_H
42 #define __PRIV_STORAGE_H
44 #include "pub_core_basics.h" // Addr
45 #include "pub_core_xarray.h" // XArray
46 #include "pub_core_deduppoolalloc.h" // DedupPoolAlloc
47 #include "priv_d3basics.h" // GExpr et al.
48 #include "priv_image.h" // DiCursor
50 /* --------------------- SYMBOLS --------------------- */
52 /* A structure to hold an ELF/MachO symbol (very crudely). Usually
53 the symbol only has one name, which is stored in ::pri_name, and
54 ::sec_names is NULL. If there are other names, these are stored in
55 ::sec_names, which is a NULL terminated vector holding the names.
56 The vector is allocated in VG_AR_DINFO, the names themselves live
57 in DebugInfo::strpool.
59 From the point of view of ELF, the primary vs secondary distinction
60 is artificial: they are all just names associated with the address,
61 none of which has higher precedence than any other. However, from
62 the point of view of mapping an address to a name to display to the
63 user, we need to choose one "preferred" name, and so that might as
64 well be installed as the pri_name, whilst all others can live in
65 sec_names[]. This has the convenient side effect that, in the
66 common case where there is only one name for the address,
67 sec_names[] does not need to be allocated.
69 typedef
70 struct {
71 SymAVMAs avmas; /* Symbol Actual VMAs: lowest address of entity,
72 + platform specific fields, to access with
73 the macros defined in pub_core_debuginfo.h */
74 const HChar* pri_name; /* primary name, never NULL */
75 const HChar** sec_names; /* NULL, or a NULL term'd array of other names */
76 // XXX: DiSym could be shrunk (on 32-bit platforms to exactly 16
77 // bytes, on 64-bit platforms the first 3 pointers already add
78 // up to 24 bytes, so size plus bits will extend to 32 bytes
79 // anyway) by using 29 bits for the size and 1 bit each for
80 // isText, isIFunc and isGlobal. If you do this, make sure that
81 // all assignments to the latter two use 0 or 1 (or True or
82 // False), and that a positive number larger than 1 is never
83 // used to represent True.
84 UInt size; /* size in bytes */
85 Bool isText;
86 Bool isIFunc; /* symbol is an indirect function? */
87 Bool isGlobal; /* Is this symbol globally visible? */
89 DiSym;
91 /* --------------------- SRCLOCS --------------------- */
93 /* Line count at which overflow happens, due to line numbers being
94 stored as shorts in `struct nlist' in a.out.h. */
95 #define LINENO_OVERFLOW (1 << (sizeof(short) * 8))
97 #define LINENO_BITS 20
98 #define LOC_SIZE_BITS (32 - LINENO_BITS)
99 #define MAX_LINENO ((1 << LINENO_BITS) - 1)
101 /* Unlikely to have any lines with instruction ranges > 4096 bytes */
102 #define MAX_LOC_SIZE ((1 << LOC_SIZE_BITS) - 1)
104 /* Number used to detect line number overflows; if one line is
105 60000-odd smaller than the previous, it was probably an overflow.
107 #define OVERFLOW_DIFFERENCE (LINENO_OVERFLOW - 5000)
109 /* Filename and Dirname pair. FnDn are stored in di->fndnpool
110 and are allocated using VG_(allocFixedEltDedupPA).
111 The filename/dirname strings are themselves stored in di->strpool. */
112 typedef
113 struct {
114 const HChar* filename; /* source filename */
115 const HChar* dirname; /* source directory name */
116 } FnDn;
118 /* A structure to hold addr-to-source info for a single line. There
119 can be a lot of these, hence the dense packing. */
120 typedef
121 struct {
122 /* Word 1 */
123 Addr addr; /* lowest address for this line */
124 /* Word 2 */
125 UShort size:LOC_SIZE_BITS; /* # bytes; we catch overflows of this */
126 UInt lineno:LINENO_BITS; /* source line number, or zero */
128 DiLoc;
130 #define LEVEL_BITS (32 - LINENO_BITS)
131 #define MAX_LEVEL ((1 << LEVEL_BITS) - 1)
133 /* A structure to hold addr-to-inlined fn info. There
134 can be a lot of these, hence the dense packing.
135 Only caller source filename and lineno are stored.
136 Handling dirname should be done using fndn_ix technique
137 similar to ML_(addLineInfo). */
138 typedef
139 struct {
140 /* Word 1 */
141 Addr addr_lo; /* lowest address for inlined fn */
142 /* Word 2 */
143 Addr addr_hi; /* highest address following the inlined fn */
144 /* Word 3 */
145 const HChar* inlinedfn; /* inlined function name */
146 /* Word 4 and 5 */
147 UInt fndn_ix; /* index in di->fndnpool of caller source
148 dirname/filename */
149 UInt lineno:LINENO_BITS; /* caller line number */
150 UShort level:LEVEL_BITS; /* level of inlining */
152 DiInlLoc;
154 /* --------------------- CF INFO --------------------- */
156 /* DiCfSI: a structure to summarise DWARF2/3 CFA info for the code
157 address range [base .. base+len-1].
159 On x86 and amd64 ("IA"), if you know ({e,r}sp, {e,r}bp, {e,r}ip) at
160 some point and {e,r}ip is in the range [base .. base+len-1], it
161 tells you how to calculate ({e,r}sp, {e,r}bp) for the caller of the
162 current frame and also ra, the return address of the current frame.
164 First off, calculate CFA, the Canonical Frame Address, thusly:
166 cfa = case cfa_how of
167 CFIC_IA_SPREL -> {e,r}sp + cfa_off
168 CFIC_IA_BPREL -> {e,r}bp + cfa_off
169 CFIC_EXPR -> expr whose index is in cfa_off
171 Once that is done, the previous frame's {e,r}sp/{e,r}bp values and
172 this frame's {e,r}ra value can be calculated like this:
174 old_{e,r}sp/{e,r}bp/ra
175 = case {e,r}sp/{e,r}bp/ra_how of
176 CFIR_UNKNOWN -> we don't know, sorry
177 CFIR_SAME -> same as it was before (sp/fp only)
178 CFIR_CFAREL -> cfa + sp/bp/ra_off
179 CFIR_MEMCFAREL -> *( cfa + sp/bp/ra_off )
180 CFIR_EXPR -> expr whose index is in sp/bp/ra_off
182 On ARM it's pretty much the same, except we have more registers to
183 keep track of:
185 cfa = case cfa_how of
186 CFIC_ARM_R13REL -> r13 + cfa_off
187 CFIC_ARM_R12REL -> r12 + cfa_off
188 CFIC_ARM_R11REL -> r11 + cfa_off
189 CFIC_ARM_R7REL -> r7 + cfa_off
190 CFIR_EXPR -> expr whose index is in cfa_off
192 old_r14/r13/r12/r11/r7/ra
193 = case r14/r13/r12/r11/r7/ra_how of
194 CFIR_UNKNOWN -> we don't know, sorry
195 CFIR_SAME -> same as it was before (r14/r13/r12/r11/r7 only)
196 CFIR_CFAREL -> cfa + r14/r13/r12/r11/r7/ra_off
197 CFIR_MEMCFAREL -> *( cfa + r14/r13/r12/r11/r7/ra_off )
198 CFIR_EXPR -> expr whose index is in r14/r13/r12/r11/r7/ra_off
200 On ARM64:
202 cfa = case cfa_how of
203 CFIC_ARM64_SPREL -> sp + cfa_off
204 CFIC_ARM64_X29REL -> x29 + cfa_off
205 CFIC_EXPR -> expr whose index is in cfa_off
207 old_sp/x30/x29/ra
208 = case sp/x30/x29/ra_how of
209 CFIR_UNKNOWN -> we don't know, sorry
210 CFIR_SAME -> same as it was before
211 CFIR_CFAREL -> cfa + sp/x30/x29/ra_how
212 CFIR_MEMCFAREL -> *( cfa + sp/x30/x29/ra_how )
213 CFIR_EXPR -> expr whose index is in sp/x30/x29/ra_off
215 On s390x we have a similar logic as x86 or amd64. We need the stack pointer
216 (r15), the frame pointer r11 (like BP) and together with the instruction
217 address in the PSW we can calculate the previous values:
218 cfa = case cfa_how of
219 CFIC_IA_SPREL -> r15 + cfa_off
220 CFIC_IA_BPREL -> r11 + cfa_off
221 CFIC_EXPR -> expr whose index is in cfa_off
223 old_sp/fp/ra
224 = case sp/fp/ra_how of
225 CFIR_UNKNOWN -> we don't know, sorry
226 CFIR_SAME -> same as it was before (sp/fp only)
227 CFIR_CFAREL -> cfa + sp/fp/ra_off
228 CFIR_MEMCFAREL -> *( cfa + sp/fp/ra_off )
229 CFIR_EXPR -> expr whose index is in sp/fp/ra_off
230 CFIR_S390X_F0 -> old value of %f0
231 CFIR_S390X_F1 -> old value of %f1
232 CFIR_S390X_F2 -> old value of %f2
233 CFIR_S390X_F3 -> old value of %f3
234 CFIR_S390X_F4 -> old value of %f4
235 CFIR_S390X_F5 -> old value of %f5
236 CFIR_S390X_F6 -> old value of %f6
237 CFIR_S390X_F7 -> old value of %f7
240 #define CFIC_IA_SPREL ((UChar)1)
241 #define CFIC_IA_BPREL ((UChar)2)
242 #define CFIC_ARM_R13REL ((UChar)3)
243 #define CFIC_ARM_R12REL ((UChar)4)
244 #define CFIC_ARM_R11REL ((UChar)5)
245 #define CFIC_ARM_R7REL ((UChar)6)
246 #define CFIC_ARM64_SPREL ((UChar)7)
247 #define CFIC_ARM64_X29REL ((UChar)8)
248 #define CFIC_EXPR ((UChar)9) /* all targets */
250 #define CFIR_UNKNOWN ((UChar)64)
251 #define CFIR_SAME ((UChar)65)
252 #define CFIR_CFAREL ((UChar)66)
253 #define CFIR_MEMCFAREL ((UChar)67)
254 #define CFIR_EXPR ((UChar)68)
255 #define CFIR_S390X_F0 ((UChar)69)
256 #define CFIR_S390X_F1 ((UChar)70)
257 #define CFIR_S390X_F2 ((UChar)71)
258 #define CFIR_S390X_F3 ((UChar)72)
259 #define CFIR_S390X_F4 ((UChar)73)
260 #define CFIR_S390X_F5 ((UChar)74)
261 #define CFIR_S390X_F6 ((UChar)75)
262 #define CFIR_S390X_F7 ((UChar)76)
264 /* Definition of the DiCfSI_m DiCfSI machine dependent part.
265 These are highly duplicated, and are stored in a pool. */
266 #if defined(VGA_x86) || defined(VGA_amd64)
267 typedef
268 struct {
269 UChar cfa_how; /* a CFIC_IA value */
270 UChar ra_how; /* a CFIR_ value */
271 UChar sp_how; /* a CFIR_ value */
272 UChar bp_how; /* a CFIR_ value */
273 Int cfa_off;
274 Int ra_off;
275 Int sp_off;
276 Int bp_off;
278 DiCfSI_m;
279 #elif defined(VGA_arm)
280 typedef
281 struct {
282 UChar cfa_how; /* a CFIC_ value */
283 UChar ra_how; /* a CFIR_ value */
284 UChar r14_how; /* a CFIR_ value */
285 UChar r13_how; /* a CFIR_ value */
286 UChar r12_how; /* a CFIR_ value */
287 UChar r11_how; /* a CFIR_ value */
288 UChar r7_how; /* a CFIR_ value */
289 Int cfa_off;
290 Int ra_off;
291 Int r14_off;
292 Int r13_off;
293 Int r12_off;
294 Int r11_off;
295 Int r7_off;
296 // If you add additional fields, don't forget to update the
297 // initialisation of this in readexidx.c accordingly.
299 DiCfSI_m;
300 #elif defined(VGA_arm64)
301 typedef
302 struct {
303 UChar cfa_how; /* a CFIC_ value */
304 UChar ra_how; /* a CFIR_ value */
305 UChar sp_how; /* a CFIR_ value */ /*dw31=SP*/
306 UChar x30_how; /* a CFIR_ value */ /*dw30=LR*/
307 UChar x29_how; /* a CFIR_ value */ /*dw29=FP*/
308 Int cfa_off;
309 Int ra_off;
310 Int sp_off;
311 Int x30_off;
312 Int x29_off;
314 DiCfSI_m;
315 #elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
316 /* Just have a struct with the common fields in, so that code that
317 processes the common fields doesn't have to be ifdef'd against
318 VGP_/VGA_ symbols. These are not used in any way on ppc32/64-linux
319 at the moment. */
320 typedef
321 struct {
322 UChar cfa_how; /* a CFIC_ value */
323 UChar ra_how; /* a CFIR_ value */
324 Int cfa_off;
325 Int ra_off;
327 DiCfSI_m;
328 #elif defined(VGA_s390x)
329 typedef
330 struct {
331 UChar cfa_how; /* a CFIC_ value */
332 UChar sp_how; /* a CFIR_ value */
333 UChar ra_how; /* a CFIR_ value */
334 UChar fp_how; /* a CFIR_ value */
335 UChar f0_how; /* a CFIR_ value */
336 UChar f1_how; /* a CFIR_ value */
337 UChar f2_how; /* a CFIR_ value */
338 UChar f3_how; /* a CFIR_ value */
339 UChar f4_how; /* a CFIR_ value */
340 UChar f5_how; /* a CFIR_ value */
341 UChar f6_how; /* a CFIR_ value */
342 UChar f7_how; /* a CFIR_ value */
343 Int cfa_off;
344 Int sp_off;
345 Int ra_off;
346 Int fp_off;
347 Int f0_off;
348 Int f1_off;
349 Int f2_off;
350 Int f3_off;
351 Int f4_off;
352 Int f5_off;
353 Int f6_off;
354 Int f7_off;
356 DiCfSI_m;
357 #elif defined(VGA_mips32) || defined(VGA_mips64) || defined(VGA_nanomips)
358 typedef
359 struct {
360 UChar cfa_how; /* a CFIC_ value */
361 UChar ra_how; /* a CFIR_ value */
362 UChar sp_how; /* a CFIR_ value */
363 UChar fp_how; /* a CFIR_ value */
364 Int cfa_off;
365 Int ra_off;
366 Int sp_off;
367 Int fp_off;
369 DiCfSI_m;
370 #else
371 # error "Unknown arch"
372 #endif
374 typedef
375 struct {
376 Addr base;
377 UInt len;
378 UInt cfsi_m_ix;
380 DiCfSI;
382 typedef
383 enum {
384 Cunop_Abs=0x231,
385 Cunop_Neg,
386 Cunop_Not
388 CfiUnop;
390 typedef
391 enum {
392 Cbinop_Add=0x321,
393 Cbinop_Sub,
394 Cbinop_And,
395 Cbinop_Mul,
396 Cbinop_Shl,
397 Cbinop_Shr,
398 Cbinop_Eq,
399 Cbinop_Ge,
400 Cbinop_Gt,
401 Cbinop_Le,
402 Cbinop_Lt,
403 Cbinop_Ne
405 CfiBinop;
407 typedef
408 enum {
409 Creg_INVALID=0x213,
410 Creg_IA_SP,
411 Creg_IA_BP,
412 Creg_IA_IP,
413 Creg_ARM_R13,
414 Creg_ARM_R12,
415 Creg_ARM_R15,
416 Creg_ARM_R14,
417 Creg_ARM_R7,
418 Creg_ARM64_SP,
419 Creg_ARM64_X30,
420 Creg_ARM64_X29,
421 Creg_S390_IA,
422 Creg_S390_SP,
423 Creg_S390_FP,
424 Creg_S390_LR,
425 Creg_MIPS_RA
427 CfiReg;
429 typedef
430 enum {
431 Cex_Undef=0x123,
432 Cex_Deref,
433 Cex_Const,
434 Cex_Unop,
435 Cex_Binop,
436 Cex_CfiReg,
437 Cex_DwReg
439 CfiExprTag;
441 typedef
442 struct {
443 CfiExprTag tag;
444 union {
445 struct {
446 } Undef;
447 struct {
448 Int ixAddr;
449 } Deref;
450 struct {
451 UWord con;
452 } Const;
453 struct {
454 CfiUnop op;
455 Int ix;
456 } Unop;
457 struct {
458 CfiBinop op;
459 Int ixL;
460 Int ixR;
461 } Binop;
462 struct {
463 CfiReg reg;
464 } CfiReg;
465 struct {
466 Int reg;
467 } DwReg;
469 Cex;
471 CfiExpr;
473 extern Int ML_(CfiExpr_Undef) ( XArray* dst );
474 extern Int ML_(CfiExpr_Deref) ( XArray* dst, Int ixAddr );
475 extern Int ML_(CfiExpr_Const) ( XArray* dst, UWord con );
476 extern Int ML_(CfiExpr_Unop) ( XArray* dst, CfiUnop op, Int ix );
477 extern Int ML_(CfiExpr_Binop) ( XArray* dst, CfiBinop op, Int ixL, Int ixR );
478 extern Int ML_(CfiExpr_CfiReg)( XArray* dst, CfiReg reg );
479 extern Int ML_(CfiExpr_DwReg) ( XArray* dst, Int reg );
481 extern void ML_(ppCfiExpr)( const XArray* src, Int ix );
483 /* ---------------- FPO INFO (Windows PE) -------------- */
485 /* for apps using Wine: MSVC++ PDB FramePointerOmitted: somewhat like
486 a primitive CFI */
487 typedef
488 struct _FPO_DATA { /* 16 bytes */
489 UInt ulOffStart; /* offset of 1st byte of function code */
490 UInt cbProcSize; /* # bytes in function */
491 UInt cdwLocals; /* # bytes/4 in locals */
492 UShort cdwParams; /* # bytes/4 in params */
493 UChar cbProlog; /* # bytes in prolog */
494 UChar cbRegs :3; /* # regs saved */
495 UChar fHasSEH:1; /* Structured Exception Handling */
496 UChar fUseBP :1; /* EBP has been used */
497 UChar reserved:1;
498 UChar cbFrame:2; /* frame type */
500 FPO_DATA;
502 #define PDB_FRAME_FPO 0
503 #define PDB_FRAME_TRAP 1
504 #define PDB_FRAME_TSS 2
506 /* --------------------- VARIABLES --------------------- */
508 typedef
509 struct {
510 Addr aMin;
511 Addr aMax;
512 XArray* /* of DiVariable */ vars;
514 DiAddrRange;
516 typedef
517 struct {
518 const HChar* name; /* in DebugInfo.strpool */
519 UWord typeR; /* a cuOff */
520 const GExpr* gexpr; /* on DebugInfo.gexprs list */
521 const GExpr* fbGX; /* SHARED. */
522 UInt fndn_ix; /* where declared; may be zero. index
523 in DebugInfo.fndnpool */
524 Int lineNo; /* where declared; may be zero. */
526 DiVariable;
528 Word
529 ML_(cmp_for_DiAddrRange_range) ( const void* keyV, const void* elemV );
531 /* --------------------- DEBUGINFO --------------------- */
533 /* This is the top-level data type. It's a structure which contains
534 information pertaining to one mapped ELF object. This type is
535 exported only abstractly - in pub_tool_debuginfo.h. */
537 /* First though, here's an auxiliary data structure. It is only ever
538 used as part of a struct _DebugInfo. We use it to record
539 observations about mappings and permission changes to the
540 associated file, so as to decide when to read debug info. It's
541 essentially an ultra-trivial finite state machine which, when it
542 reaches an accept state, signals that we should now read debug info
543 from the object into the associated struct _DebugInfo. The accept
544 state is arrived at when have_rx_map is true and rw_map_count
545 is 1 or 2. The initial state is one in which we have no observations,
546 so have_rx_map is false and rw_map_count is 0.
548 This all started as a rather ad-hoc solution, but was further
549 expanded to handle weird object layouts, e.g. more than one rw
550 or rx mapping for one binary.
552 The normal sequence of events is one of
554 start --> r-x mapping --> rw- mapping --> accept
555 start --> rw- mapping --> r-x mapping --> accept
557 that is, take the first r-x and rw- mapping we see, and we're done.
559 On MacOSX >= 10.7, 32-bit, there appears to be a new variant:
561 start --> r-- mapping --> rw- mapping
562 --> upgrade r-- mapping to r-x mapping --> accept
564 where the upgrade is done by a call to mach_vm_protect (OSX 10.7)
565 or kernelrpc_mach_vm_protect_trap (OSX 10.9 and possibly 10.8).
566 Hence we need to also track this possibility.
568 From perusal of dyld sources, it appears that this scheme could
569 also be used 64 bit libraries, although that doesn't seem to happen
570 in practice. dyld uses this scheme when the text section requires
571 relocation, which only appears to be the case for 32 bit objects.
574 typedef struct
576 Addr avma; /* these fields record the file offset, length */
577 SizeT size; /* and map address of each mapping */
578 OffT foff;
579 Bool rx, rw, ro; /* memory access flags for this mapping */
580 #if defined(VGO_freebsd)
581 Bool ignore_foff;
582 #endif
583 } DebugInfoMapping;
585 struct _DebugInfoFSM
587 HChar* filename; /* in mallocville (VG_AR_DINFO) */
588 HChar* dbgname; /* in mallocville (VG_AR_DINFO) */
589 XArray* maps; /* XArray of DebugInfoMapping structs */
590 Bool have_rx_map; /* did we see a r?x mapping yet for the file? */
591 Int rw_map_count; /* count of w? mappings seen (may be > 1 ) */
592 Bool have_ro_map; /* did we see a r-- mapping yet for the file? */
596 /* To do with the string table in struct _DebugInfo (::strpool) */
597 #define SEGINFO_STRPOOLSIZE (64*1024)
600 /* We may encounter more than one .eh_frame section in an object --
601 unusual but apparently allowed by ELF. See
602 http://sourceware.org/bugzilla/show_bug.cgi?id=12675
604 #define N_EHFRAME_SECTS 2
607 /* So, the main structure for holding debug info for one object. */
609 struct _DebugInfo {
611 /* Admin stuff */
613 struct _DebugInfo* next; /* list of DebugInfos */
614 Bool mark; /* marked for deletion? */
616 /* An abstract handle, which can be used by entities outside of
617 m_debuginfo to (in an abstract datatype sense) refer to this
618 struct _DebugInfo. A .handle of zero is invalid; valid handles
619 are 1 and above. The same handle is never issued twice (in any
620 given run of Valgrind), so a handle becomes invalid when the
621 associated struct _DebugInfo is discarded, and remains invalid
622 forever thereafter. The .handle field is set as soon as this
623 structure is allocated. */
624 ULong handle;
626 /* The range of epochs for which this DebugInfo is valid. These also
627 divide the DebugInfo's lifetime into three parts:
629 (1) Allocated: but with only .fsm holding useful info -- in
630 particular, not yet holding any debug info.
631 .first_epoch == DebugInfoEpoch_INVALID
632 .last_epoch == DebugInfoEpoch_INVALID
634 (2) Active: containing debug info, and current.
635 .first_epoch != DebugInfoEpoch_INVALID
636 .last_epoch == DebugInfoEpoch_INVALID
638 (3) Archived: containing debug info, but no longer current.
639 .first_epoch != DebugInfoEpoch_INVALID
640 .last_epoch != DebugInfoEpoch_INVALID
642 State (2) corresponds to an object which is currently mapped. When
643 the object is unmapped, what happens depends on the setting of
644 --keep-debuginfo:
646 * when =no, the DebugInfo is removed from debugInfo_list and
647 deleted.
649 * when =yes, the DebugInfo is retained in debugInfo_list, but its
650 .last_epoch field is filled in, and current_epoch is advanced. This
651 effectively moves the DebugInfo into state (3).
653 DiEpoch first_epoch;
654 DiEpoch last_epoch;
656 /* Used for debugging only - indicate what stuff to dump whilst
657 reading stuff into the seginfo. Are computed as early in the
658 lifetime of the DebugInfo as possible -- at the point when it is
659 created. Use these when deciding what to spew out; do not use
660 the global VG_(clo_blah) flags. */
662 Bool trace_symtab; /* symbols, our style */
663 Bool trace_cfi; /* dwarf frame unwind, our style */
664 Bool ddump_syms; /* mimic /usr/bin/readelf --syms */
665 Bool ddump_line; /* mimic /usr/bin/readelf --debug-dump=line */
666 Bool ddump_frames; /* mimic /usr/bin/readelf --debug-dump=frames */
668 /* The "decide when it is time to read debuginfo" state machine.
669 This structure must get filled in before we can start reading
670 anything from the ELF/MachO file. This structure is filled in
671 by VG_(di_notify_mmap) and its immediate helpers. */
672 struct _DebugInfoFSM fsm;
674 /* Once the ::fsm has reached an accept state -- typically, when
675 both a rw? and r?x mapping for .filename have been observed --
676 we can go on to read the symbol tables and debug info.
677 .have_dinfo changes from False to True when the debug info has
678 been completely read in and postprocessed (canonicalised) and is
679 now suitable for querying. */
680 /* If have_dinfo is False, then all fields below this point are
681 invalid and should not be consulted. */
682 Bool have_dinfo; /* initially False */
684 /* If true then the reading of .debug_* section has been deferred
685 until it this information is required (such as when printing
686 a stacktrace). Additionally, if true then the reading of any
687 separate debuginfo files associated with this object has also
688 been deferred. */
689 Bool deferred;
691 /* All the rest of the fields in this structure are filled in once
692 we have committed to reading the symbols and debug info (that
693 is, at the point where .have_dinfo is set to True). */
695 /* The file's soname. */
696 HChar* soname;
698 /* Description of some important mapped segments. The presence or
699 absence of the mapping is denoted by the _present field, since
700 in some obscure circumstances (to do with data/sdata/bss) it is
701 possible for the mapping to be present but have zero size.
702 Certainly text_ is mandatory on all platforms; not sure about
703 the rest though.
705 --------------------------------------------------------
707 Comment_on_IMPORTANT_CFSI_REPRESENTATIONAL_INVARIANTS: we require that
709 either (size of all rx maps == 0 && cfsi == NULL) (the degenerate case)
711 or the normal case, which is the AND of the following:
712 (0) size of at least one rx mapping > 0
713 (1) no two non-archived DebugInfos with some rx mapping of size > 0
714 have overlapping rx mappings
715 (2) Each address in [cfsi_minavma,cfsi_maxavma] is in an rx mapping
716 or else no cfsi can cover this address.
717 The typical case is a single rx mapping covering the full range.
718 In some cases, the union of several rx mappings covers the range,
719 with possibly some holes between the rx mappings, and no cfsi fall
720 within such an hole.
721 (3) all DiCfSI in the cfsi array all have ranges that fall within
722 [avma,+size) of that rx mapping.
723 (4) all DiCfSI in the cfsi array are non-overlapping
725 The cumulative effect of these restrictions is to ensure that
726 all the DiCfSI records in the entire system are non overlapping.
727 Hence any address falls into either exactly one DiCfSI record,
728 or none. Hence it is safe to cache the results of searches for
729 DiCfSI records. This is the whole point of these restrictions.
730 The caching of DiCfSI searches is done in VG_(use_CF_info). The
731 cache is flushed after any change to debugInfo_list. DiCfSI
732 searches are cached because they are central to stack unwinding
733 on amd64-linux.
735 Where are these invariants imposed and checked?
737 They are checked after a successful read of debuginfo into
738 a DebugInfo*, in check_CFSI_related_invariants.
740 (1) is not really imposed anywhere. We simply assume that the
741 kernel will not map the text segments from two different objects
742 into the same space. Sounds reasonable.
744 (2) follows from (4) and (3). It is ensured by canonicaliseCFI.
745 (3) is ensured by ML_(addDiCfSI).
746 (4) is ensured by canonicaliseCFI.
748 --------------------------------------------------------
750 Comment_on_DEBUG_SVMA_and_DEBUG_BIAS_fields:
752 The _debug_{svma,bias} fields were added as part of a fix to
753 #185816. The problem encompassed in that bug report was that it
754 wasn't correct to use apply the bias values deduced for a
755 primary object to its associated debuginfo object, because the
756 debuginfo object (or the primary) could have been prelinked to a
757 different SVMA. Hence debuginfo and primary objects need to
758 have their own biases.
760 ------ JRS: (referring to r9329): ------
761 Let me see if I understand the workings correctly. Initially
762 the _debug_ values are set to the same values as the "normal"
763 ones, as there's a bunch of bits of code like this (in
764 readelf.c)
766 di->text_svma = svma;
768 di->text_bias = rx_bias;
769 di->text_debug_svma = svma;
770 di->text_debug_bias = rx_bias;
772 If a debuginfo object subsequently shows up then the
773 _debug_svma/bias are set for the debuginfo object. Result is
774 that if there's no debuginfo object then the values are the same
775 as the primary-object values, and if there is a debuginfo object
776 then they will (or at least may) be different.
778 Then when we need to actually bias something, we'll have to
779 decide whether to use the primary bias or the debuginfo bias.
780 And the strategy is to use the primary bias for ELF symbols but
781 the debuginfo bias for anything pulled out of Dwarf.
783 ------ THH: ------
784 Correct - the debug_svma and bias values apply to any address
785 read from the debug data regardless of where that debug data is
786 stored and the other values are used for addresses from other
787 places (primarily the symbol table).
789 ------ JRS: ------
790 Ok; so this was my only area of concern. Are there any
791 corner-case scenarios where this wouldn't be right? It sounds
792 like we're assuming the ELF symbols come from the primary object
793 and, if there is a debug object, then all the Dwarf comes from
794 there. But what if (eg) both symbols and Dwarf come from the
795 debug object? Is that even possible or allowable?
797 ------ THH: ------
798 You may have a point...
800 The current logic is to try and take any one set of data from
801 either the base object or the debug object. There are four sets
802 of data we consider:
804 - Symbol Table
805 - Stabs
806 - DWARF1
807 - DWARF2
809 If we see the primary section for a given set in the base object
810 then we ignore all sections relating to that set in the debug
811 object.
813 Now in principle if we saw a secondary section (like debug_line
814 say) in the base object, but not the main section (debug_info in
815 this case) then we would take debug_info from the debug object
816 but would use the debug_line from the base object unless we saw
817 a replacement copy in the debug object. That's probably unlikely
818 however.
820 A bigger issue might be, as you say, the symbol table as we will
821 pick that up from the debug object if it isn't in the base. The
822 dynamic symbol table will always have to be in the base object
823 though so we will have to be careful when processing symbols to
824 know which table we are reading in that case.
826 What we probably need to do is tell read_elf_symtab which object
827 the symbols it is being asked to read came from.
829 (A followup patch to deal with this was committed in r9469).
831 /* .text */
832 Bool text_present;
833 Addr text_avma;
834 Addr text_svma;
835 SizeT text_size;
836 PtrdiffT text_bias;
837 Addr text_debug_svma;
838 PtrdiffT text_debug_bias;
839 /* .data */
840 Bool data_present;
841 Addr data_svma;
842 Addr data_avma;
843 SizeT data_size;
844 PtrdiffT data_bias;
845 Addr data_debug_svma;
846 PtrdiffT data_debug_bias;
847 /* .sdata */
848 Bool sdata_present;
849 Addr sdata_svma;
850 Addr sdata_avma;
851 SizeT sdata_size;
852 PtrdiffT sdata_bias;
853 Addr sdata_debug_svma;
854 PtrdiffT sdata_debug_bias;
855 /* .rodata */
856 Bool rodata_present;
857 Addr rodata_svma;
858 Addr rodata_avma;
859 SizeT rodata_size;
860 PtrdiffT rodata_bias;
861 Addr rodata_debug_svma;
862 PtrdiffT rodata_debug_bias;
863 /* .bss */
864 Bool bss_present;
865 Addr bss_svma;
866 Addr bss_avma;
867 SizeT bss_size;
868 PtrdiffT bss_bias;
869 Addr bss_debug_svma;
870 PtrdiffT bss_debug_bias;
871 /* .sbss */
872 Bool sbss_present;
873 Addr sbss_svma;
874 Addr sbss_avma;
875 SizeT sbss_size;
876 PtrdiffT sbss_bias;
877 Addr sbss_debug_svma;
878 PtrdiffT sbss_debug_bias;
879 /* .ARM.exidx -- sometimes present on arm32, containing unwind info. */
880 Bool exidx_present;
881 Addr exidx_avma;
882 Addr exidx_svma;
883 SizeT exidx_size;
884 PtrdiffT exidx_bias;
885 /* .ARM.extab -- sometimes present on arm32, containing unwind info. */
886 Bool extab_present;
887 Addr extab_avma;
888 Addr extab_svma;
889 SizeT extab_size;
890 PtrdiffT extab_bias;
891 /* .plt */
892 Bool plt_present;
893 Addr plt_avma;
894 SizeT plt_size;
895 /* .got */
896 Bool got_present;
897 Addr got_avma;
898 SizeT got_size;
899 /* .got.plt */
900 Bool gotplt_present;
901 Addr gotplt_avma;
902 SizeT gotplt_size;
903 /* .opd -- needed on ppc64be-linux for finding symbols */
904 Bool opd_present;
905 Addr opd_avma;
906 SizeT opd_size;
907 /* .ehframe -- needed on amd64-linux for stack unwinding. We might
908 see more than one, hence the arrays. */
909 UInt n_ehframe; /* 0 .. N_EHFRAME_SECTS */
910 Addr ehframe_avma[N_EHFRAME_SECTS];
911 SizeT ehframe_size[N_EHFRAME_SECTS];
913 /* Sorted tables of stuff we snarfed from the file. This is the
914 eventual product of reading the debug info. All this stuff
915 lives in VG_AR_DINFO. */
917 /* An expandable array of symbols. */
918 DiSym* symtab;
919 UWord symtab_used;
920 UWord symtab_size;
921 /* Two expandable arrays, storing locations and their filename/dirname. */
922 DiLoc* loctab;
923 UInt sizeof_fndn_ix; /* Similar use as sizeof_cfsi_m_ix below. */
924 void* loctab_fndn_ix; /* loctab[i] filename/dirname is identified by
925 loctab_fnindex_ix[i] (an index in di->fndnpool)
926 0 means filename/dirname unknown.
927 The void* is an UChar* or UShort* or UInt*
928 depending on sizeof_fndn_ix. */
929 UWord loctab_used;
930 UWord loctab_size;
931 /* An expandable array of inlined fn info.
932 maxinl_codesz is the biggest inlined piece of code
933 in inltab (i.e. the max of 'addr_hi - addr_lo'. */
934 DiInlLoc* inltab;
935 UWord inltab_used;
936 UWord inltab_size;
937 SizeT maxinl_codesz;
939 /* A set of expandable arrays to store CFI summary info records.
940 The machine specific information (i.e. the DiCfSI_m struct)
941 are stored in cfsi_m_pool, as these are highly duplicated.
942 The DiCfSI_m are allocated in cfsi_m_pool and identified using
943 a (we hope) small integer : often one byte is enough, sometimes
944 2 bytes are needed.
946 cfsi_base contains the bases of the code address ranges.
947 cfsi_size is the size of the cfsi_base array.
948 The elements cfsi_base[0] till cfsi_base[cfsi_used-1] are used.
949 Following elements are not used (yet).
951 For each base in cfsi_base, an index into cfsi_m_pool is stored
952 in cfsi_m_ix array. The size of cfsi_m_ix is equal to
953 cfsi_size*sizeof_cfsi_m_ix. The used portion of cfsi_m_ix is
954 cfsi_m_ix[0] till cfsi_m_ix[(cfsi_used-1)*sizeof_cfsi_m_ix].
956 cfsi_base[i] gives the base address of a code range covered by
957 some CF Info. The corresponding CF Info is identified by an index
958 in cfsi_m_pool. The DiCfSI_m index in cfsi_m_pool corresponding to
959 cfsi_base[i] is given
960 by ((UChar*) cfsi_m_ix)[i] if sizeof_cfsi_m_ix == 1
961 by ((UShort*)cfsi_m_ix)[i] if sizeof_cfsi_m_ix == 2
962 by ((UInt*) cfsi_m_ix)[i] if sizeof_cfsi_m_ix == 4.
964 The end of the code range starting at cfsi_base[i] is given by
965 cfsi_base[i+1]-1 (or cfsi_maxavma for cfsi_base[cfsi_used-1]).
966 Some code ranges between cfsi_minavma and cfsi_maxavma might not
967 be covered by cfi information. Such not covered ranges are stored by
968 a base in cfsi_base and a corresponding 0 index in cfsi_m_ix.
970 A variable size representation has been chosen for the elements of
971 cfsi_m_ix as in many case, one byte is good enough. For big
972 objects, 2 bytes are needed. No object has yet been found where
973 4 bytes are needed (but the code is ready to handle this case).
974 Not covered ranges ('cfi holes') are stored explicitly in
975 cfsi_base/cfsi_m_ix as this is more memory efficient than storing
976 a length for each covered range : on x86 or amd64, we typically have
977 a hole every 8 covered ranges. On arm64, we have very few holes
978 (1 every 50 or 100 ranges).
980 The cfsi information is read and prepared in the cfsi_rd array.
981 Once all the information has been read, the cfsi_base and cfsi_m_ix
982 arrays will be filled in from cfsi_rd. cfsi_rd will then be freed.
983 This is all done by ML_(finish_CFSI_arrays).
985 Also includes summary address bounds, showing the min and max address
986 covered by any of the records, as an aid to fast searching. And, if the
987 records require any expression nodes, they are stored in
988 cfsi_exprs. */
989 Addr* cfsi_base;
990 UInt sizeof_cfsi_m_ix; /* size in byte of indexes stored in cfsi_m_ix. */
991 void* cfsi_m_ix; /* Each index occupies sizeof_cfsi_m_ix bytes.
992 The void* is an UChar* or UShort* or UInt*
993 depending on sizeof_cfsi_m_ix. */
995 DiCfSI* cfsi_rd; /* Only used during reading, NULL once info is read. */
997 UWord cfsi_used;
998 UWord cfsi_size;
1000 DedupPoolAlloc *cfsi_m_pool;
1001 Addr cfsi_minavma;
1002 Addr cfsi_maxavma;
1003 XArray* cfsi_exprs; /* XArray of CfiExpr */
1005 /* Optimized code under Wine x86: MSVC++ PDB FramePointerOmitted
1006 data. Non-expandable array, hence .size == .used. */
1007 FPO_DATA* fpo;
1008 UWord fpo_size;
1009 Addr fpo_minavma;
1010 Addr fpo_maxavma;
1011 Addr fpo_base_avma;
1013 /* Pool of strings -- the string table. Pointers
1014 into this are stable (the memory is not reallocated). */
1015 DedupPoolAlloc *strpool;
1017 /* Pool of FnDn -- filename and dirname.
1018 Elements in the pool are allocated using VG_(allocFixedEltDedupPA). */
1019 DedupPoolAlloc *fndnpool;
1021 /* Variable scope information, as harvested from Dwarf3 files.
1023 In short it's an
1025 array of (array of PC address ranges and variables)
1027 The outer array indexes over scopes, with Entry 0 containing
1028 information on variables which exist for any value of the program
1029 counter (PC) -- that is, the outermost scope. Entries 1, 2, 3,
1030 etc contain information on increasinly deeply nested variables.
1032 Each inner array is an array of (an address range, and a set
1033 of variables that are in scope over that address range).
1035 The address ranges may not overlap.
1037 Since Entry 0 in the outer array holds information on variables
1038 that exist for any value of the PC (that is, global vars), it
1039 follows that Entry 0's inner array can only have one address
1040 range pair, one that covers the entire address space.
1042 XArray* /* of OSet of DiAddrRange */varinfo;
1044 /* These are arrays of the relevant typed objects, held here
1045 partially for the purposes of visiting each object exactly once
1046 when we need to delete them. */
1048 /* An array of TyEnts. These are needed to make sense of any types
1049 in the .varinfo. Also, when deleting this DebugInfo, we must
1050 first traverse this array and throw away malloc'd stuff hanging
1051 off it -- by calling ML_(TyEnt__make_EMPTY) on each entry. */
1052 XArray* /* of TyEnt */ admin_tyents;
1054 /* An array of guarded DWARF3 expressions. */
1055 XArray* admin_gexprs;
1057 /* Cached last rx mapping matched and returned by ML_(find_rx_mapping).
1058 This helps performance a lot during ML_(addLineInfo) etc., which can
1059 easily be invoked hundreds of thousands of times. */
1060 DebugInfoMapping* last_rx_map;
1063 /* --------------------- functions --------------------- */
1065 /* ------ Adding ------ */
1067 /* Add a symbol to si's symbol table. The contents of 'sym' are
1068 copied. It is assumed (and checked) that 'sym' only contains one
1069 name, so there is no auxiliary ::sec_names vector to duplicate.
1070 IOW, the copy is a shallow copy, and there are assertions in place
1071 to ensure that's OK. */
1072 extern void ML_(addSym) ( struct _DebugInfo* di, DiSym* sym );
1074 /* Add a filename/dirname pair to a DebugInfo and returns the index
1075 in the fndnpool fixed pool. */
1076 extern UInt ML_(addFnDn) (struct _DebugInfo* di,
1077 const HChar* filename,
1078 const HChar* dirname); /* NULL is allowable */
1080 /* Returns the filename of the fndn pair identified by fndn_ix.
1081 Returns "???" if fndn_ix is 0. */
1082 extern const HChar* ML_(fndn_ix2filename) (const DebugInfo* di,
1083 UInt fndn_ix);
1085 /* Returns the dirname of the fndn pair identified by fndn_ix.
1086 Returns "" if fndn_ix is 0 or fndn->dirname is NULL. */
1087 extern const HChar* ML_(fndn_ix2dirname) (const DebugInfo* di,
1088 UInt fndn_ix);
1090 /* Returns the fndn_ix for the LineInfo locno in di->loctab.
1091 0 if filename/dirname are unknown. */
1092 extern UInt ML_(fndn_ix) (const DebugInfo* di, Word locno);
1094 /* Add a line-number record to a DebugInfo.
1095 fndn_ix is an index in di->fndnpool, allocated using ML_(addFnDn).
1096 Give a 0 index for a unknown filename/dirname pair. */
1097 extern
1098 void ML_(addLineInfo) ( struct _DebugInfo* di,
1099 UInt fndn_ix,
1100 Addr this, Addr next, Int lineno, Int entry);
1102 /* Add a call inlined record to a DebugInfo.
1103 A call to the below means that inlinedfn code has been
1104 inlined, resulting in code from [addr_lo, addr_hi[.
1105 Note that addr_hi is excluded, i.e. is not part of the inlined code.
1106 fndn_ix and lineno identifies the location of the call that caused
1107 this inlining.
1108 fndn_ix is an index in di->fndnpool, allocated using ML_(addFnDn).
1109 Give a 0 index for an unknown filename/dirname pair.
1110 In case of nested inlining, a small level indicates the call
1111 is closer to main that a call with a higher level. */
1112 extern
1113 void ML_(addInlInfo) ( struct _DebugInfo* di,
1114 Addr addr_lo, Addr addr_hi,
1115 const HChar* inlinedfn,
1116 UInt fndn_ix,
1117 Int lineno, UShort level);
1119 /* Add a CFI summary record. The supplied DiCfSI_m is copied. */
1120 extern void ML_(addDiCfSI) ( struct _DebugInfo* di,
1121 Addr base, UInt len, DiCfSI_m* cfsi_m );
1123 /* Given a position in the di->cfsi_base/cfsi_m_ix arrays, return
1124 the corresponding cfsi_m*. Return NULL if the position corresponds
1125 to a cfsi hole. */
1126 DiCfSI_m* ML_(get_cfsi_m) (const DebugInfo* di, UInt pos);
1128 /* Add a string to the string table of a DebugInfo. If len==-1,
1129 ML_(addStr) will itself measure the length of the string. */
1130 extern const HChar* ML_(addStr) ( DebugInfo* di, const HChar* str, Int len );
1132 /* Add a string to the string table of a DebugInfo, by copying the
1133 string from the given DiCursor. Measures the length of the string
1134 itself. */
1135 extern const HChar* ML_(addStrFromCursor)( DebugInfo* di, DiCursor c );
1137 extern void ML_(addVar)( struct _DebugInfo* di,
1138 Int level,
1139 Addr aMin,
1140 Addr aMax,
1141 const HChar* name,
1142 UWord typeR, /* a cuOff */
1143 const GExpr* gexpr,
1144 const GExpr* fbGX, /* SHARED. */
1145 UInt fndn_ix, /* where decl'd - may be zero */
1146 Int lineNo, /* where decl'd - may be zero */
1147 Bool show );
1148 /* Note: fndn_ix identifies a filename/dirname pair similarly to
1149 ML_(addInlInfo) and ML_(addLineInfo). */
1151 /* Canonicalise the tables held by 'di', in preparation for use. Call
1152 this after finishing adding entries to these tables. */
1153 extern void ML_(canonicaliseTables) ( struct _DebugInfo* di );
1155 /* Canonicalise the call-frame-info table held by 'di', in preparation
1156 for use. This is called by ML_(canonicaliseTables) but can also be
1157 called on it's own to sort just this table. */
1158 extern void ML_(canonicaliseCFI) ( struct _DebugInfo* di );
1160 /* ML_(finish_CFSI_arrays) fills in the cfsi_base and cfsi_m_ix arrays
1161 from cfsi_rd array. cfsi_rd is then freed. */
1162 extern void ML_(finish_CFSI_arrays) ( struct _DebugInfo* di );
1164 /* ------ Searching ------ */
1166 /* Find a symbol-table index containing the specified pointer, or -1
1167 if not found. Binary search. */
1168 extern Word ML_(search_one_symtab) ( DebugInfo* di, Addr ptr,
1169 Bool findText );
1171 /* Find a location-table index containing the specified pointer, or -1
1172 if not found. Binary search. */
1173 extern Word ML_(search_one_loctab) ( DebugInfo* di, Addr ptr );
1175 /* Find a CFI-table index containing the specified pointer, or -1 if
1176 not found. Binary search. */
1177 extern Word ML_(search_one_cfitab) ( DebugInfo* di, Addr ptr );
1179 /* Find a FPO-table index containing the specified pointer, or -1
1180 if not found. Binary search. */
1181 extern Word ML_(search_one_fpotab) ( const DebugInfo* di, Addr ptr );
1183 /* Helper function for the most often needed searching for an rx
1184 mapping containing the specified address range. The range must
1185 fall entirely within the mapping to be considered to be within it.
1186 Asserts if lo > hi; caller must ensure this doesn't happen. */
1187 extern DebugInfoMapping* ML_(find_rx_mapping) ( DebugInfo* di,
1188 Addr lo, Addr hi );
1190 /* ------ Misc ------ */
1192 /* Show a non-fatal debug info reading error. Use VG_(core_panic) for
1193 fatal errors. 'serious' errors are always shown, not 'serious' ones
1194 are shown only at verbosity level 2 and above. */
1195 extern
1196 void ML_(symerr) ( const DebugInfo* di, Bool serious, const HChar* msg );
1198 /* Print a symbol. */
1199 extern void ML_(ppSym) ( Int idx, const DiSym* sym );
1201 /* Print a call-frame-info summary. */
1202 extern void ML_(ppDiCfSI) ( const XArray* /* of CfiExpr */ exprs,
1203 Addr base, UInt len,
1204 const DiCfSI_m* si_m );
1207 #define TRACE_SYMTAB_ENABLED (di->trace_symtab)
1208 #define TRACE_SYMTAB(format, args...) \
1209 if (TRACE_SYMTAB_ENABLED) { VG_(printf)(format, ## args); }
1212 #endif /* ndef __PRIV_STORAGE_H */
1214 /*--------------------------------------------------------------------*/
1215 /*--- end ---*/
1216 /*--------------------------------------------------------------------*/