coregrind/m_debuginfo/readdwarf3.c

   1 /* -*- mode: C; c-basic-offset: 3; -*- */
   2
   3 /*--------------------------------------------------------------------*/
   4 /*--- Read DWARF3/4 ".debug_info" sections (DIE trees).            ---*/
   5 /*---                                                 readdwarf3.c ---*/
   6 /*--------------------------------------------------------------------*/
   7
   8 /*
   9    This file is part of Valgrind, a dynamic binary instrumentation
  10    framework.
  11
  12    Copyright (C) 2008-2017 OpenWorks LLP
  13       info@open-works.co.uk
  14
  15    This program is free software; you can redistribute it and/or
  16    modify it under the terms of the GNU General Public License as
  17    published by the Free Software Foundation; either version 2 of the
  18    License, or (at your option) any later version.
  19
  20    This program is distributed in the hope that it will be useful, but
  21    WITHOUT ANY WARRANTY; without even the implied warranty of
  22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  23    General Public License for more details.
  24
  25    You should have received a copy of the GNU General Public License
  26    along with this program; if not, see <http://www.gnu.org/licenses/>.
  27
  28    The GNU General Public License is contained in the file COPYING.
  29
  30    Neither the names of the U.S. Department of Energy nor the
  31    University of California nor the names of its contributors may be
  32    used to endorse or promote products derived from this software
  33    without prior written permission.
  34 */
  35
  36 #if defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_solaris) || defined(VGO_freebsd)
  37
  38 /* REFERENCE (without which this code will not make much sense):
  39
  40    DWARF Debugging Information Format, Version 3,
  41    dated 20 December 2005 (the "D3 spec").
  42
  43    Available at http://www.dwarfstd.org/Dwarf3.pdf.  There's also a
  44    .doc (MS Word) version, but for some reason the section numbers
  45    between the Word and PDF versions differ by 1 in the first digit.
  46    All section references in this code are to the PDF version.
  47
  48    CURRENT HACKS:
  49
  50    DW_TAG_{const,volatile}_type no DW_AT_type is allowed; it is
  51       assumed to mean "const void" or "volatile void" respectively.
  52       GDB appears to interpret them like this, anyway.
  53
  54    In many cases it is important to know the svma of a CU (the "base
  55    address of the CU", as the D3 spec calls it).  There are some
  56    situations in which the spec implies this value is unknown, but the
  57    Dwarf3 produced by gcc-4.1 seems to assume is not unknown but
  58    merely zero when not explicitly stated.  So we too have to make
  59    that assumption.
  60
  61    POTENTIAL BUG?  Spotted 6 Sept 08.  Why doesn't
  62    unitary_range_list() bias the resulting range list in the same way
  63    that its more general cousin, get_range_list(), does?  I don't
  64    know.
  65
  66    TODO, 2008 Feb 17:
  67
  68    get rid of cu_svma_known and document the assumed-zero svma hack.
  69
  70    ML_(sizeOfType): differentiate between zero sized types and types
  71    for which the size is unknown.  Is this important?  I don't know.
  72
  73    DW_TAG_array_types: deal with explicit sizes (currently we compute
  74    the size from the bounds and the element size, although that's
  75    fragile, if the bounds incompletely specified, or completely
  76    absent)
  77
  78    Document reason for difference (by 1) of stack preening depth in
  79    parse_var_DIE vs parse_type_DIE.
  80
  81    Don't hand to ML_(addVars), vars whose locations are entirely in
  82    registers (DW_OP_reg*).  This is merely a space-saving
  83    optimisation, as ML_(evaluate_Dwarf3_Expr) should handle these
  84    expressions correctly, by failing to evaluate them and hence
  85    effectively ignoring the variable with which they are associated.
  86
  87    Deal with DW_TAG_array_types which have element size != stride
  88
  89    In some cases, the info for a variable is split between two
  90    different DIEs (generally a declarer and a definer).  We punt on
  91    these.  Could do better here.
  92
  93    The 'data_bias' argument passed to the expression evaluator
  94    (ML_(evaluate_Dwarf3_Expr)) should really be changed to a
  95    MaybeUWord, to make it clear when we do vs don't know what it is
  96    for the evaluation of an expression.  At the moment zero is passed
  97    for this parameter in the don't know case.  That's a bit fragile
  98    and obscure; using a MaybeUWord would be clearer.
  99
 100    POTENTIAL PERFORMANCE IMPROVEMENTS:
 101
 102    Currently, duplicate removal and all other queries for the type
 103    entities array is done using cuOffset-based pointing, which
 104    involves a binary search (VG_(lookupXA)) for each access.  This is
 105    wildly inefficient, although simple.  It would be better to
 106    translate all the cuOffset-based references (iow, all the "R" and
 107    "Rs" fields in the TyEnts in 'tyents') to direct index numbers in
 108    'tyents' right at the start of dedup_types(), and use direct
 109    indexing (VG_(indexXA)) wherever possible after that.
 110
 111    cmp__XArrays_of_AddrRange is also a performance bottleneck.  Move
 112    VG_(indexXA) into pub_tool_xarray.h so it can be inlined at all use
 113    points, and possibly also make an _UNCHECKED version which skips
 114    the range checks in performance-critical situations such as this.
 115
 116    Handle interaction between read_DIE and parse_{var,type}_DIE
 117    better.  Currently read_DIE reads the entire DIE just to find where
 118    the end is (and for debug printing), so that it can later reliably
 119    move the cursor to the end regardless of what parse_{var,type}_DIE
 120    do.  This means many DIEs (most, even?) are read twice.  It would
 121    be smarter to make parse_{var,type}_DIE return a Bool indicating
 122    whether or not they advanced the DIE cursor, and only if they
 123    didn't should read_DIE itself read through the DIE.
 124
 125    ML_(addVar) and add_var_to_arange: quite a lot of DiAddrRanges have
 126    zero variables in their .vars XArray.  Rather than have an XArray
 127    with zero elements (which uses 2 malloc'd blocks), allow the .vars
 128    pointer to be NULL in this case.
 129
 130    More generally, reduce the amount of memory allocated and freed
 131    while reading Dwarf3 type/variable information.  Even modest (20MB)
 132    objects cause this module to allocate and free hundreds of
 133    thousands of small blocks, and ML_(arena_malloc) and its various
 134    groupies always show up at the top of performance profiles. */
 135
 136 #include "pub_core_basics.h"
 137 #include "pub_core_debuginfo.h"
 138 #include "pub_core_libcbase.h"
 139 #include "pub_core_libcassert.h"
 140 #include "pub_core_libcprint.h"
 141 #include "pub_core_libcsetjmp.h"   // setjmp facilities
 142 #include "pub_core_hashtable.h"
 143 #include "pub_core_options.h"
 144 #include "pub_core_tooliface.h"    /* VG_(needs) */
 145 #include "pub_core_xarray.h"
 146 #include "pub_core_wordfm.h"
 147 #include "priv_misc.h"             /* dinfo_zalloc/free */
 148 #include "priv_image.h"
 149 #include "priv_tytypes.h"
 150 #include "priv_d3basics.h"
 151 #include "priv_storage.h"
 152 #include "priv_readdwarf3.h"       /* self */
 153
 154
 155 /*------------------------------------------------------------*/
 156 /*---                                                      ---*/
 157 /*--- Basic machinery for parsing DIEs.                    ---*/
 158 /*---                                                      ---*/
 159 /*------------------------------------------------------------*/
 160
 161 #define TRACE_D3(format, args...) \
 162    if (UNLIKELY(td3)) { VG_(printf)(format, ## args); }
 163 #define TD3 (UNLIKELY(td3))
 164
 165 #define D3_INVALID_CUOFF  ((UWord)(-1UL))
 166 #define D3_FAKEVOID_CUOFF ((UWord)(-2UL))
 167
 168 typedef
 169    struct {
 170       DiSlice sli;      // to which this cursor applies
 171       DiOffT  sli_next; // offset in underlying DiImage; must be >= sli.ioff
 172       void (*barf)( const HChar* ) __attribute__((noreturn));
 173       const HChar* barfstr;
 174    }
 175    Cursor;
 176
 177 static inline Bool is_sane_Cursor ( const Cursor* c ) {
 178    if (!c)                return False;
 179    if (!c->barf)          return False;
 180    if (!c->barfstr)       return False;
 181    if (!ML_(sli_is_valid)(c->sli))    return False;
 182    if (c->sli.ioff == DiOffT_INVALID) return False;
 183    if (c->sli_next < c->sli.ioff)     return False;
 184    return True;
 185 }
 186
 187 // Initialise a cursor from a DiSlice (ELF section, really) so as to
 188 // start reading at offset |sli_initial_offset| from the start of the
 189 // slice.
 190 static void init_Cursor ( /*OUT*/Cursor* c,
 191                           DiSlice sli,
 192                           ULong   sli_initial_offset,
 193                           __attribute__((noreturn)) void (*barf)(const HChar*),
 194                           const HChar* barfstr )
 195 {
 196    vg_assert(c);
 197    VG_(bzero_inline)(c, sizeof(*c));
 198    c->sli              = sli;
 199    c->sli_next         = c->sli.ioff + sli_initial_offset;
 200    c->barf             = barf;
 201    c->barfstr          = barfstr;
 202    vg_assert(is_sane_Cursor(c));
 203 }
 204
 205 static Bool is_at_end_Cursor ( const Cursor* c ) {
 206    vg_assert(is_sane_Cursor(c));
 207    return c->sli_next >= c->sli.ioff + c->sli.szB;
 208 }
 209
 210 static inline ULong get_position_of_Cursor ( const Cursor* c ) {
 211    vg_assert(is_sane_Cursor(c));
 212    return c->sli_next - c->sli.ioff;
 213 }
 214 static inline void set_position_of_Cursor ( Cursor* c, ULong pos ) {
 215    c->sli_next = c->sli.ioff + pos;
 216    vg_assert(is_sane_Cursor(c));
 217 }
 218 static inline void advance_position_of_Cursor ( Cursor* c, ULong delta ) {
 219    c->sli_next += delta;
 220    vg_assert(is_sane_Cursor(c));
 221 }
 222
 223 static /*signed*/Long get_remaining_length_Cursor ( const Cursor* c ) {
 224    vg_assert(is_sane_Cursor(c));
 225    return c->sli.ioff + c->sli.szB - c->sli_next;
 226 }
 227
 228 //static void* get_address_of_Cursor ( Cursor* c ) {
 229 //   vg_assert(is_sane_Cursor(c));
 230 //   return &c->region_start_img[ c->region_next ];
 231 //}
 232
 233 static DiCursor get_DiCursor_from_Cursor ( const Cursor* c ) {
 234    return mk_DiCursor(c->sli.img, c->sli_next);
 235 }
 236
 237 /* FIXME: document assumptions on endianness for
 238    get_UShort/UInt/ULong. */
 239 static inline UChar get_UChar ( Cursor* c ) {
 240    UChar r;
 241    vg_assert(is_sane_Cursor(c));
 242    if (c->sli_next + sizeof(UChar) > c->sli.ioff + c->sli.szB) {
 243       c->barf(c->barfstr);
 244       /*NOTREACHED*/
 245       vg_assert(0);
 246    }
 247    r = ML_(img_get_UChar)(c->sli.img, c->sli_next);
 248    c->sli_next += sizeof(UChar);
 249    return r;
 250 }
 251 static UShort get_UShort ( Cursor* c ) {
 252    UShort r;
 253    vg_assert(is_sane_Cursor(c));
 254    if (c->sli_next + sizeof(UShort) > c->sli.ioff + c->sli.szB) {
 255       c->barf(c->barfstr);
 256       /*NOTREACHED*/
 257       vg_assert(0);
 258    }
 259    r = ML_(img_get_UShort)(c->sli.img, c->sli_next);
 260    c->sli_next += sizeof(UShort);
 261    return r;
 262 }
 263 static UInt get_UInt ( Cursor* c ) {
 264    UInt r;
 265    vg_assert(is_sane_Cursor(c));
 266    if (c->sli_next + sizeof(UInt) > c->sli.ioff + c->sli.szB) {
 267       c->barf(c->barfstr);
 268       /*NOTREACHED*/
 269       vg_assert(0);
 270    }
 271    r = ML_(img_get_UInt)(c->sli.img, c->sli_next);
 272    c->sli_next += sizeof(UInt);
 273    return r;
 274 }
 275 static ULong get_ULong ( Cursor* c ) {
 276    ULong r;
 277    vg_assert(is_sane_Cursor(c));
 278    if (c->sli_next + sizeof(ULong) > c->sli.ioff + c->sli.szB) {
 279       c->barf(c->barfstr);
 280       /*NOTREACHED*/
 281       vg_assert(0);
 282    }
 283    r = ML_(img_get_ULong)(c->sli.img, c->sli_next);
 284    c->sli_next += sizeof(ULong);
 285    return r;
 286 }
 287 static ULong get_ULEB128 ( Cursor* c ) {
 288    ULong result;
 289    Int   shift;
 290    UChar byte;
 291    /* unroll first iteration */
 292    byte = get_UChar( c );
 293    result = (ULong)(byte & 0x7f);
 294    if (LIKELY(!(byte & 0x80))) return result;
 295    shift = 7;
 296    /* end unroll first iteration */
 297    do {
 298       byte = get_UChar( c );
 299       result |= ((ULong)(byte & 0x7f)) << shift;
 300       shift += 7;
 301    } while (byte & 0x80);
 302    return result;
 303 }
 304 static Long get_SLEB128 ( Cursor* c ) {
 305    ULong  result = 0;
 306    Int    shift = 0;
 307    UChar  byte;
 308    do {
 309       byte = get_UChar(c);
 310       result |= ((ULong)(byte & 0x7f)) << shift;
 311       shift += 7;
 312    } while (byte & 0x80);
 313    if (shift < 64 && (byte & 0x40))
 314       result |= -(1ULL << shift);
 315    return result;
 316 }
 317 static UInt get_UInt3 ( Cursor* c ) {
 318    UChar c1, c2, c3;
 319    vg_assert(is_sane_Cursor(c));
 320    if (c->sli_next + 3 > c->sli.ioff + c->sli.szB) {
 321       c->barf(c->barfstr);
 322       /*NOTREACHED*/
 323       vg_assert(0);
 324    }
 325    c1 = ML_(img_get_UChar)(c->sli.img, c->sli_next);
 326    c2 = ML_(img_get_UChar)(c->sli.img, c->sli_next+1);
 327    c3 = ML_(img_get_UChar)(c->sli.img, c->sli_next+2);
 328    c->sli_next += 3;
 329 #if defined(VG_BIGENDIAN)
 330    return c1 << 16 | c2 << 8 | c3;
 331 #else
 332    return c1 | c2 << 8 | c3 << 16;
 333 #endif
 334 }
 335
 336
 337 /* Assume 'c' points to the start of a string.  Return a DiCursor of
 338    whatever it points at, and advance it past the terminating zero.
 339    This makes it safe for the caller to then copy the string with
 340    ML_(addStr), since (w.r.t. image overruns) the process of advancing
 341    past the terminating zero will already have "vetted" the string. */
 342 static DiCursor get_AsciiZ ( Cursor* c ) {
 343    UChar uc;
 344    DiCursor res = get_DiCursor_from_Cursor(c);
 345    do { uc = get_UChar(c); } while (uc != 0);
 346    return res;
 347 }
 348
 349 static ULong peek_ULEB128 ( Cursor* c ) {
 350    DiOffT here = c->sli_next;
 351    ULong  r    = get_ULEB128( c );
 352    c->sli_next = here;
 353    return r;
 354 }
 355 static UChar peek_UChar ( Cursor* c ) {
 356    DiOffT here = c->sli_next;
 357    UChar  r    = get_UChar( c );
 358    c->sli_next = here;
 359    return r;
 360 }
 361
 362 static ULong get_Dwarfish_UWord ( Cursor* c, Bool is_dw64 ) {
 363    return is_dw64 ? get_ULong(c) : (ULong) get_UInt(c);
 364 }
 365
 366 static UWord get_UWord ( Cursor* c ) {
 367    vg_assert(sizeof(UWord) == sizeof(void*));
 368    if (sizeof(UWord) == 4) return get_UInt(c);
 369    if (sizeof(UWord) == 8) return get_ULong(c);
 370    vg_assert(0);
 371 }
 372
 373 /* Read a DWARF3 'Initial Length' field */
 374 static ULong get_Initial_Length ( /*OUT*/Bool* is64,
 375                                   Cursor* c,
 376                                   const HChar* barfMsg )
 377 {
 378    ULong w64;
 379    UInt  w32;
 380    *is64 = False;
 381    w32 = get_UInt( c );
 382    if (w32 >= 0xFFFFFFF0 && w32 < 0xFFFFFFFF) {
 383       c->barf( barfMsg );
 384    }
 385    else if (w32 == 0xFFFFFFFF) {
 386       *is64 = True;
 387       w64   = get_ULong( c );
 388    } else {
 389       *is64 = False;
 390       w64 = (ULong)w32;
 391    }
 392    return w64;
 393 }
 394
 395
 396 /*------------------------------------------------------------*/
 397 /*---                                                      ---*/
 398 /*--- "CUConst" structure                                  ---*/
 399 /*---                                                      ---*/
 400 /*------------------------------------------------------------*/
 401
 402 typedef
 403    struct _name_form {
 404       ULong at_name;  // Dwarf Attribute name
 405       ULong at_form;  // Dwarf Attribute form
 406       Long  at_val;   // Dwarf Attribute value (for implicit_const)
 407       UInt  skip_szB; // Nr of bytes skippable from here ...
 408       UInt  next_nf;  // ... to reach this attr/form index in the g_abbv.nf
 409    } name_form;
 410 /* skip_szB and next_nf are used to optimise the skipping of uninteresting DIEs.
 411    Each name_form maintains how many (fixed) nr of bytes can be skipped from
 412    the beginning of this form till the next attr/form to look at.
 413    The next form to look can be:
 414        an 'interesting' attr/form to read while skipping a DIE
 415           (currently, this is only DW_AT_sibling)
 416    or
 417        a variable length form which must be read to be skipped.
 418    For a variable length form, the skip_szB will be equal to VARSZ_FORM.
 419
 420    Note: this technique could also be used to speed up the parsing
 421    of DIEs : for each parser kind, we could have the nr of bytes
 422    to skip to directly reach the interesting form(s) for the parser. */
 423
 424 typedef
 425    struct _g_abbv {
 426       struct _g_abbv *next; // read/write by hash table.
 427       UWord  abbv_code;     // key, read by hash table
 428       ULong  atag;
 429       ULong  has_children;
 430       name_form nf[0];
 431       /* Variable-length array of name/form pairs, terminated
 432          by a 0/0 pair.
 433          The skip_szB/next_nf allows to skip efficiently a DIE
 434          described by this g_abbv; */
 435     } g_abbv;
 436
 437 /* Holds information about the .debug_abbrev section for this CU.  The current
 438   Cursor into the abbrev section, the known abbrev codes are but into an hash
 439   table.  The (starting) offset into the abbrev_offset can be used to check
 440   whether the abbv can be shared between CUs.  The done boolean is set when all
 441   known codes have been read.  Initialize a new abbv_state with init_ht_abbvs.
 442   To read any new abbrev codes not yet in the hash table call find_ht_abbvs
 443   (get_abbv will first query the ht_abbvs, then if not done, call
 444   find_ht_abbvs).  */
 445 typedef
 446    struct _abbv_state {
 447       Cursor c; /* Current cursor into .debug_abbrev.  */
 448       VgHashTable *ht_abbvs; /* Hash table mapping codes to abbrevs.  */
 449       ULong debug_abbrev_offset; /* Starting offset into .debug_abbrev.  */
 450       Bool done; /* Whether there (might) still be new abbrev codes not yet
 451                     in the cache.  */
 452    } abbv_state;
 453
 454 /* Holds information that is constant through the parsing of a
 455    Compilation Unit.  This is basically plumbed through to
 456    everywhere. */
 457 typedef
 458    struct {
 459       /* Call here if anything goes wrong */
 460       void (*barf)( const HChar* ) __attribute__((noreturn));
 461       /* Is this 64-bit DWARF ? */
 462       Bool   is_dw64;
 463       /* Which DWARF version ?  (2, 3, 4 or 5) */
 464       UShort version;
 465       /* Length of this Compilation Unit, as stated in the
 466          .unit_length :: InitialLength field of the CU Header.
 467          However, this size (as specified by the D3 spec) does not
 468          include the size of the .unit_length field itself, which is
 469          either 4 or 12 bytes (32-bit or 64-bit Dwarf3).  That value
 470          can be obtained through the expression ".is_dw64 ? 12 : 4". */
 471       ULong  unit_length;
 472       /* Offset of start of this unit in .debug_info */
 473       UWord  cu_start_offset;
 474       /* SVMA for this CU.  In the D3 spec, is known as the "base
 475          address of the compilation unit (last para sec 3.1.1).
 476          Needed for (amongst things) interpretation of location-list
 477          values. */
 478       Addr   cu_svma;
 479       Bool   cu_svma_known;
 480
 481       /* The debug_abbreviations table to be used for this Unit */
 482       //UChar* debug_abbv;
 483       /* Upper bound on size thereof (an overestimate, in general) */
 484       //UWord  debug_abbv_maxszB;
 485       /* A bounded area of the image, to be used as the
 486          debug_abbreviations table tobe used for this Unit. */
 487       DiSlice debug_abbv;
 488
 489       /* Image information for various sections. */
 490       DiSlice escn_debug_str;
 491       DiSlice escn_debug_ranges;
 492       DiSlice escn_debug_rnglists;
 493       DiSlice escn_debug_loclists;
 494       DiSlice escn_debug_loc;
 495       DiSlice escn_debug_line;
 496       DiSlice escn_debug_info;
 497       DiSlice escn_debug_types;
 498       DiSlice escn_debug_info_alt;
 499       DiSlice escn_debug_str_alt;
 500       DiSlice escn_debug_line_str;
 501       DiSlice escn_debug_addr;
 502       DiSlice escn_debug_str_offsets;
 503       /* How much to add to .debug_types resp. alternate .debug_info offsets
 504          in cook_die*.  */
 505       UWord  types_cuOff_bias;
 506       UWord  alt_cuOff_bias;
 507       /* DW_AT_addr_base */
 508       Addr   cu_addr_base;
 509       Bool   cu_has_addr_base;
 510       /* DW_AT_str_offsets_base */
 511       Addr   cu_str_offsets_base;
 512       Bool   cu_has_str_offsets_base;
 513       /* DW_AT_rnglists_base */
 514       Addr   cu_rnglists_base;
 515       Bool   cu_has_rnglists_base;
 516       /* DW_AT_loclists_base */
 517       Addr   cu_loclists_base;
 518       Bool   cu_has_loclists_base;
 519       /* --- Needed so we can add stuff to the string table. --- */
 520       struct _DebugInfo* di;
 521       /* --- State of the hash table of g_abbv (i.e. parsed abbreviations)
 522              technically makes this struct not const.  --- */
 523       abbv_state abbv;
 524
 525       /* True if this came from .debug_types; otherwise it came from
 526          .debug_info.  */
 527       Bool is_type_unit;
 528       /* For a unit coming from .debug_types, these hold the TU's type
 529          signature and the uncooked DIE offset of the TU's signatured
 530          type.  For a unit coming from .debug_info, these are unused.  */
 531       ULong type_signature;
 532       ULong type_offset;
 533
 534       /* Signatured type hash; computed once and then shared by all
 535          CUs.  */
 536       VgHashTable *signature_types;
 537
 538       /* True if this came from alternate .debug_info; otherwise
 539          it came from normal .debug_info or .debug_types.  */
 540       Bool is_alt_info;
 541    }
 542    CUConst;
 543
 544
 545 /* Return the cooked value of DIE depending on whether CC represents a
 546    .debug_types unit.  To cook a DIE, we pretend that the .debug_info,
 547    .debug_types and optional alternate .debug_info sections form
 548    a contiguous whole, so that DIEs coming from .debug_types are numbered
 549    starting at the end of .debug_info and DIEs coming from alternate
 550    .debug_info are numbered starting at the end of .debug_types.  */
 551 static UWord cook_die( const CUConst* cc, UWord die )
 552 {
 553    if (cc->is_type_unit)
 554       die += cc->types_cuOff_bias;
 555    else if (cc->is_alt_info)
 556       die += cc->alt_cuOff_bias;
 557    return die;
 558 }
 559
 560 /* Like cook_die, but understand that DIEs coming from a
 561    DW_FORM_ref_sig8 reference are already cooked.  Also, handle
 562    DW_FORM_GNU_ref_alt from within primary .debug_info or .debug_types
 563    as reference to alternate .debug_info.  */
 564 static UWord cook_die_using_form( const CUConst *cc, UWord die, DW_FORM form)
 565 {
 566    if (form == DW_FORM_ref_sig8)
 567       return die;
 568    if (form == DW_FORM_GNU_ref_alt)
 569       return die + cc->alt_cuOff_bias;
 570    return cook_die( cc, die );
 571 }
 572
 573 /* Return the uncooked offset of DIE and set *TYPE_FLAG to true if the DIE
 574    came from the .debug_types section and *ALT_FLAG to true if the DIE
 575    came from alternate .debug_info section.  */
 576 static UWord uncook_die( const CUConst *cc, UWord die, /*OUT*/Bool *type_flag,
 577                          Bool *alt_flag )
 578 {
 579    *alt_flag = False;
 580    *type_flag = False;
 581    /* The use of escn_debug_{info,types}.szB seems safe to me even if
 582       escn_debug_{info,types} are DiSlice_INVALID (meaning the
 583       sections were not found), because DiSlice_INVALID.szB is always
 584       zero.  That said, it seems unlikely we'd ever get here if
 585       .debug_info or .debug_types were missing. */
 586    if (die >= cc->escn_debug_info.szB) {
 587       if (die >= cc->escn_debug_info.szB + cc->escn_debug_types.szB) {
 588          *alt_flag = True;
 589          die -= cc->escn_debug_info.szB + cc->escn_debug_types.szB;
 590       } else {
 591          *type_flag = True;
 592          die -= cc->escn_debug_info.szB;
 593       }
 594    }
 595    return die;
 596 }
 597
 598 /* Return an entry from .debug_addr with the given index.
 599    Call one of the variants below that do error-checking. */
 600 static ULong get_debug_addr_entry_common( ULong index, const CUConst* cc )
 601 {
 602    vg_assert(cc->cu_has_addr_base);
 603    /* We make the same word-size assumption as DW_FORM_addr. */
 604    UWord addr_pos = cc->cu_addr_base + index * sizeof(UWord);
 605    Cursor cur;
 606    init_Cursor( &cur, cc->escn_debug_addr, addr_pos, cc->barf,
 607                 "get_debug_addr_entry_common: index points outside .debug_addr" );
 608    return (ULong)(UWord)get_UWord(&cur);
 609 }
 610
 611 static ULong get_debug_addr_entry_form( ULong index, const CUConst* cc,
 612                                         DW_FORM form )
 613 {
 614    if(!cc->cu_has_addr_base) {
 615       VG_(printf)(
 616          "get_debug_addr_entry_form: %u (%s) without DW_AT_addr_base\n",
 617          form, ML_(pp_DW_FORM)(form));
 618       cc->barf("get_debug_addr_entry_form: DW_AT_addr_base not set");
 619    }
 620    return get_debug_addr_entry_common( index, cc );
 621 }
 622
 623 static ULong get_debug_addr_entry_lle( ULong index, const CUConst* cc,
 624                                        DW_LLE entry )
 625 {
 626    if(!cc->cu_has_addr_base) {
 627       VG_(printf)(
 628          "get_debug_addr_entry_lle: %u (%s) without DW_AT_addr_base\n",
 629          entry, ML_(pp_DW_LLE)(entry));
 630       cc->barf("get_debug_addr_entry_lle: DW_AT_addr_base not set");
 631    }
 632    return get_debug_addr_entry_common( index, cc );
 633 }
 634
 635 static ULong get_debug_addr_entry_rle( ULong index, const CUConst* cc,
 636                                        DW_RLE entry )
 637 {
 638    if(!cc->cu_has_addr_base) {
 639       VG_(printf)(
 640          "get_debug_addr_entry_rle: %u (%s) without DW_AT_addr_base\n",
 641          entry, ML_(pp_DW_RLE)(entry));
 642       cc->barf("get_debug_addr_entry_rle: DW_AT_addr_base not set");
 643    }
 644    return get_debug_addr_entry_common( index, cc );
 645 }
 646
 647 /*------------------------------------------------------------*/
 648 /*---                                                      ---*/
 649 /*--- Helper functions for Guarded Expressions             ---*/
 650 /*---                                                      ---*/
 651 /*------------------------------------------------------------*/
 652
 653 /* Parse the location list starting at img-offset 'debug_loc_offset'
 654    in .debug_loc.  Results are biased with 'svma_of_referencing_CU'
 655    and so I believe are correct SVMAs for the object as a whole.  This
 656    function allocates the UChar*, and the caller must deallocate it.
 657    The resulting block is in so-called Guarded-Expression format.
 658
 659    Guarded-Expression format is similar but not identical to the DWARF3
 660    location-list format.  The format of each returned block is:
 661
 662       UChar biasMe;
 663       UChar isEnd;
 664       followed by zero or more of
 665
 666       (Addr aMin;  Addr aMax;  UShort nbytes;  ..bytes..;  UChar isEnd)
 667
 668    '..bytes..' is an standard DWARF3 location expression which is
 669    valid when aMin <= pc <= aMax (possibly after suitable biasing).
 670
 671    The number of bytes in '..bytes..' is nbytes.
 672
 673    The end of the sequence is marked by an isEnd == 1 value.  All
 674    previous isEnd values must be zero.
 675
 676    biasMe is 1 if the aMin/aMax fields need this DebugInfo's
 677    text_bias added before use, and 0 if the GX is this is not
 678    necessary (is ready to go).
 679
 680    Hence the block can be quickly parsed and is self-describing.  Note
 681    that aMax is 1 less than the corresponding value in a DWARF3
 682    location list.  Zero length ranges, with aMax == aMin-1, are not
 683    allowed.
 684 */
 685 /* 2008-sept-12: moved ML_(pp_GX) from here to d3basics.c, where
 686    it more logically belongs. */
 687
 688
 689 /* Apply a text bias to a GX. */
 690 static void bias_GX ( /*MOD*/GExpr* gx, const DebugInfo* di )
 691 {
 692    UShort nbytes;
 693    UChar* p = &gx->payload[0];
 694    UChar* pA;
 695    UChar  uc;
 696    uc = *p++; /*biasMe*/
 697    if (uc == 0)
 698       return;
 699    vg_assert(uc == 1);
 700    p[-1] = 0; /* mark it as done */
 701    while (True) {
 702       uc = *p++;
 703       if (uc == 1)
 704          break; /*isEnd*/
 705       vg_assert(uc == 0);
 706       /* t-bias aMin */
 707       pA = (UChar*)p;
 708       ML_(write_Addr)(pA, ML_(read_Addr)(pA) + di->text_debug_bias);
 709       p += sizeof(Addr);
 710       /* t-bias aMax */
 711       pA = (UChar*)p;
 712       ML_(write_Addr)(pA, ML_(read_Addr)(pA) + di->text_debug_bias);
 713       p += sizeof(Addr);
 714       /* nbytes, and actual expression */
 715       nbytes = ML_(read_UShort)(p); p += sizeof(UShort);
 716       p += nbytes;
 717    }
 718 }
 719
 720 __attribute__((noinline))
 721 static GExpr* make_singleton_GX ( DiCursor block, ULong nbytes )
 722 {
 723    SizeT  bytesReqd;
 724    GExpr* gx;
 725    UChar *p, *pstart;
 726
 727    vg_assert(sizeof(UWord) == sizeof(Addr));
 728    vg_assert(nbytes <= 0xFFFF); /* else we overflow the nbytes field */
 729    bytesReqd
 730       =   sizeof(UChar)  /*biasMe*/    + sizeof(UChar) /*!isEnd*/
 731         + sizeof(UWord)  /*aMin*/      + sizeof(UWord) /*aMax*/
 732         + sizeof(UShort) /*nbytes*/    + (SizeT)nbytes
 733         + sizeof(UChar); /*isEnd*/
 734
 735    gx = ML_(dinfo_zalloc)( "di.readdwarf3.msGX.1",
 736                            sizeof(GExpr) + bytesReqd );
 737
 738    p = pstart = &gx->payload[0];
 739
 740    p = ML_(write_UChar)(p, 0);        /*biasMe*/
 741    p = ML_(write_UChar)(p, 0);        /*!isEnd*/
 742    p = ML_(write_Addr)(p, 0);         /*aMin*/
 743    p = ML_(write_Addr)(p, ~0);        /*aMax*/
 744    p = ML_(write_UShort)(p, nbytes);  /*nbytes*/
 745    ML_(cur_read_get)(p, block, nbytes); p += nbytes;
 746    p = ML_(write_UChar)(p, 1);        /*isEnd*/
 747
 748    vg_assert( (SizeT)(p - pstart) == bytesReqd);
 749    vg_assert( &gx->payload[bytesReqd]
 750               == ((UChar*)gx) + sizeof(GExpr) + bytesReqd );
 751
 752    return gx;
 753 }
 754
 755 __attribute__((noinline))
 756 static GExpr* make_general_GX ( const CUConst* cc,
 757                                 Bool     td3,
 758                                 ULong    offset,
 759                                 Addr     svma_of_referencing_CU )
 760 {
 761    Bool      done;
 762    Addr      base;
 763    Cursor    loc;
 764    XArray*   xa; /* XArray of UChar */
 765    GExpr*    gx;
 766    Word      nbytes;
 767    Bool      addBase = cc->version < 5;
 768
 769    vg_assert(sizeof(UWord) == sizeof(Addr));
 770    if (cc->version < 5 && (!ML_(sli_is_valid)(cc->escn_debug_loc)
 771                            || cc->escn_debug_loc.szB == 0))
 772       cc->barf("make_general_GX: .debug_loc is empty/missing");
 773    if (cc->version >= 5 && (!ML_(sli_is_valid)(cc->escn_debug_loclists)
 774                            || cc->escn_debug_loclists.szB == 0))
 775       cc->barf("make_general_GX: .debug_loclists is empty/missing");
 776
 777    if (cc->version < 5)
 778       init_Cursor( &loc, cc->escn_debug_loc, 0, cc->barf,
 779                    "Overrun whilst reading .debug_loc section(2)" );
 780    else
 781       init_Cursor( &loc, cc->escn_debug_loclists, 0, cc->barf,
 782                    "Overrun whilst reading .debug_loclists section(2)" );
 783    set_position_of_Cursor( &loc, offset );
 784
 785    TRACE_D3("make_general_GX (offset = %llu, ioff = %llu) {\n",
 786             offset, get_DiCursor_from_Cursor(&loc).ioff );
 787
 788    /* Who frees this xa?  It is freed before this fn exits. */
 789    xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.mgGX.1",
 790                     ML_(dinfo_free),
 791                     sizeof(UChar) );
 792
 793    { UChar c = 1; /*biasMe*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); }
 794
 795    base = 0;
 796    done = False;
 797    while (!done) {
 798       Bool  acquire;
 799       UWord len;
 800       UWord w1;
 801       UWord w2;
 802       if (cc->version < 5) {
 803          /* Read a (host-)word pair.  This is something of a hack since
 804             the word size to read is really dictated by the ELF file;
 805             however, we assume we're reading a file with the same
 806             word-sizeness as the host.  Reasonably enough. */
 807          w1 = get_UWord( &loc );
 808          w2 = get_UWord( &loc );
 809
 810          TRACE_D3("   %08lx %08lx\n", w1, w2);
 811          if (w1 == 0 && w2 == 0) {
 812             done = True;
 813             break; /* end of list */
 814          }
 815
 816          if (w1 == -1UL) {
 817             /* new value for 'base' */
 818             base = w2;
 819             continue;
 820          }
 821          /* else a location expression follows */
 822          len = (UWord)get_UShort( &loc );
 823       } else {
 824          w1 = 0;
 825          w2 = 0;
 826          len = 0;
 827          DW_LLE r = get_UChar( &loc );
 828          switch (r) {
 829          case DW_LLE_end_of_list:
 830             done = True;
 831             break;
 832          case DW_LLE_base_address:
 833             base = get_UWord( &loc );
 834             break;
 835          case DW_LLE_start_length:
 836             w1 = get_UWord( &loc );
 837             w2 = w1 + get_ULEB128( &loc );
 838             len = get_ULEB128( &loc );
 839             break;
 840          case DW_LLE_offset_pair:
 841             w1 = base + get_ULEB128( &loc );
 842             w2 = base + get_ULEB128( &loc );
 843             len = get_ULEB128( &loc );
 844             break;
 845          case DW_LLE_start_end:
 846             w1 = get_UWord ( &loc );
 847             w2 = get_UWord ( &loc );
 848             len = get_ULEB128( &loc );
 849             break;
 850          case DW_LLE_GNU_view_pair:
 851             get_ULEB128( &loc );
 852             get_ULEB128( &loc );
 853             break;
 854          case DW_LLE_base_addressx:
 855             base = get_debug_addr_entry_lle( get_ULEB128( &loc ), cc,
 856                                              DW_LLE_base_addressx );
 857             break;
 858          case DW_LLE_startx_endx:
 859             w1 = get_debug_addr_entry_lle( get_ULEB128( &loc ), cc,
 860                                            DW_LLE_startx_endx );
 861             w2 = get_debug_addr_entry_lle( get_ULEB128( &loc ), cc,
 862                                            DW_LLE_startx_endx );
 863             len = get_ULEB128( &loc );
 864             break;
 865          case DW_LLE_startx_length:
 866             w1 = get_debug_addr_entry_lle( get_ULEB128( &loc ), cc,
 867                                            DW_LLE_startx_length );
 868             w2 = w1 + get_ULEB128( &loc );
 869             len = get_ULEB128( &loc );
 870             break;
 871          case DW_LLE_default_location:
 872          default:
 873             cc->barf( "Unhandled or unknown loclists entry" );
 874             done = True;
 875          }
 876       }
 877
 878       /* else enumerate [w1+base, w2+base) */
 879       /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc"
 880          (sec 2.17.2) */
 881       if (w1 > w2) {
 882          TRACE_D3("negative range is for .debug_loc expr at "
 883                   "file offset %llu\n",
 884                   offset);
 885          cc->barf( "negative range in .debug_loc section" );
 886       }
 887
 888       /* ignore zero length ranges */
 889       acquire = w1 < w2;
 890
 891       if (acquire) {
 892          UWord  w;
 893          UShort s;
 894          UChar  c;
 895          c = 0; /* !isEnd*/
 896          VG_(addBytesToXA)( xa, &c, sizeof(c) );
 897          w = w1    + (addBase ? base : 0) + svma_of_referencing_CU;
 898          VG_(addBytesToXA)( xa, &w, sizeof(w) );
 899          w = w2 -1 + (addBase ? base : 0) + svma_of_referencing_CU;
 900          VG_(addBytesToXA)( xa, &w, sizeof(w) );
 901          s = (UShort)len;
 902          VG_(addBytesToXA)( xa, &s, sizeof(s) );
 903       }
 904
 905       while (len > 0) {
 906          UChar byte = get_UChar( &loc );
 907          TRACE_D3("%02x", (UInt)byte);
 908          if (acquire)
 909             VG_(addBytesToXA)( xa, &byte, 1 );
 910          len--;
 911       }
 912       TRACE_D3("\n");
 913    }
 914
 915    { UChar c = 1; /*isEnd*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); }
 916
 917    nbytes = VG_(sizeXA)( xa );
 918    vg_assert(nbytes >= 1);
 919
 920    gx = ML_(dinfo_zalloc)( "di.readdwarf3.mgGX.2", sizeof(GExpr) + nbytes );
 921    VG_(memcpy)( &gx->payload[0], (UChar*)VG_(indexXA)(xa,0), nbytes );
 922    vg_assert( &gx->payload[nbytes]
 923               == ((UChar*)gx) + sizeof(GExpr) + nbytes );
 924
 925    VG_(deleteXA)( xa );
 926
 927    TRACE_D3("}\n");
 928
 929    return gx;
 930 }
 931
 932
 933 /*------------------------------------------------------------*/
 934 /*---                                                      ---*/
 935 /*--- Helper functions for range lists and CU headers      ---*/
 936 /*---                                                      ---*/
 937 /*------------------------------------------------------------*/
 938
 939 /* Denotes an address range.  Both aMin and aMax are included in the
 940    range; hence a complete range is (0, ~0) and an empty range is any
 941    (X, X-1) for X > 0.*/
 942 typedef
 943    struct { Addr aMin; Addr aMax; }
 944    AddrRange;
 945
 946
 947 /* Generate an arbitrary structural total ordering on
 948    XArray* of AddrRange. */
 949 static Word cmp__XArrays_of_AddrRange ( const XArray* rngs1,
 950                                         const XArray* rngs2 )
 951 {
 952    Word n1, n2, i;
 953    vg_assert(rngs1 && rngs2);
 954    n1 = VG_(sizeXA)( rngs1 );
 955    n2 = VG_(sizeXA)( rngs2 );
 956    if (n1 < n2) return -1;
 957    if (n1 > n2) return 1;
 958    for (i = 0; i < n1; i++) {
 959       AddrRange* rng1 = (AddrRange*)VG_(indexXA)( rngs1, i );
 960       AddrRange* rng2 = (AddrRange*)VG_(indexXA)( rngs2, i );
 961       if (rng1->aMin < rng2->aMin) return -1;
 962       if (rng1->aMin > rng2->aMin) return 1;
 963       if (rng1->aMax < rng2->aMax) return -1;
 964       if (rng1->aMax > rng2->aMax) return 1;
 965    }
 966    return 0;
 967 }
 968
 969
 970 __attribute__((noinline))
 971 static XArray* /* of AddrRange */ empty_range_list ( void )
 972 {
 973    XArray* xa; /* XArray of AddrRange */
 974    /* Who frees this xa?  varstack_preen() does. */
 975    xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.erl.1",
 976                     ML_(dinfo_free),
 977                     sizeof(AddrRange) );
 978    return xa;
 979 }
 980
 981
 982 __attribute__((noinline))
 983 static XArray* unitary_range_list ( Addr aMin, Addr aMax )
 984 {
 985    XArray*   xa;
 986    AddrRange pair;
 987    vg_assert(aMin <= aMax);
 988    /* Who frees this xa?  varstack_preen() does. */
 989    xa = VG_(newXA)( ML_(dinfo_zalloc),  "di.readdwarf3.url.1",
 990                     ML_(dinfo_free),
 991                     sizeof(AddrRange) );
 992    pair.aMin = aMin;
 993    pair.aMax = aMax;
 994    VG_(addToXA)( xa, &pair );
 995    return xa;
 996 }
 997
 998
 999 /* Enumerate the address ranges starting at img-offset
1000    'debug_ranges_offset' in .debug_ranges.  Results are biased with
1001    'svma_of_referencing_CU' and so I believe are correct SVMAs for the
1002    object as a whole.  This function allocates the XArray, and the
1003    caller must deallocate it. */
1004 __attribute__((noinline))
1005 static XArray* /* of AddrRange */
1006 get_range_list ( const CUConst* cc,
1007                  Bool     td3,
1008                  UWord    debug_ranges_offset,
1009                  Addr     svma_of_referencing_CU )
1010 {
1011    Addr      base;
1012    Cursor    ranges;
1013    XArray*   xa; /* XArray of AddrRange */
1014    AddrRange pair;
1015
1016    if (cc->version < 5 && (!ML_(sli_is_valid)(cc->escn_debug_ranges)
1017                            || cc->escn_debug_ranges.szB == 0))
1018       cc->barf("get_range_list: .debug_ranges is empty/missing");
1019    if (cc->version >= 5 && (!ML_(sli_is_valid)(cc->escn_debug_rnglists)
1020                             || cc->escn_debug_rnglists.szB == 0))
1021       cc->barf("get_range_list: .debug_rnglists is empty/missing");
1022
1023    if (cc->version < 5)
1024       init_Cursor( &ranges, cc->escn_debug_ranges, 0, cc->barf,
1025                    "Overrun whilst reading .debug_ranges section(2)" );
1026    else
1027       init_Cursor( &ranges, cc->escn_debug_rnglists, 0, cc->barf,
1028                    "Overrun whilst reading .debug_rnglists section(2)" );
1029
1030    set_position_of_Cursor( &ranges, debug_ranges_offset );
1031
1032    /* Who frees this xa?  varstack_preen() does. */
1033    xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.grl.1", ML_(dinfo_free),
1034                     sizeof(AddrRange) );
1035    base = 0;
1036    if (cc->version < 5) {
1037       while (True) {
1038          /* Read a (host-)word pair.  This is something of a hack since
1039             the word size to read is really dictated by the ELF file;
1040             however, we assume we're reading a file with the same
1041             word-sizeness as the host.  Reasonably enough. */
1042          UWord w1 = get_UWord( &ranges );
1043          UWord w2 = get_UWord( &ranges );
1044
1045          if (w1 == 0 && w2 == 0)
1046             break; /* end of list. */
1047
1048          if (w1 == -1UL) {
1049             /* new value for 'base' */
1050             base = w2;
1051             continue;
1052          }
1053
1054          /* else enumerate [w1+base, w2+base) */
1055          /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc"
1056             (sec 2.17.2) */
1057          if (w1 > w2)
1058             cc->barf( "negative range in .debug_ranges section" );
1059          if (w1 < w2) {
1060             pair.aMin = w1     + base + svma_of_referencing_CU;
1061             pair.aMax = w2 - 1 + base + svma_of_referencing_CU;
1062             vg_assert(pair.aMin <= pair.aMax);
1063             VG_(addToXA)( xa, &pair );
1064          }
1065       }
1066    } else {
1067       Bool done = False;
1068       while (!done) {
1069          UWord w1 = 0;
1070          UWord w2 = 0;
1071          DW_RLE r = get_UChar( &ranges );
1072          switch (r) {
1073          case DW_RLE_end_of_list:
1074             done = True;
1075             break;
1076          case DW_RLE_base_address:
1077             base = get_UWord( &ranges );
1078             break;
1079          case DW_RLE_start_length:
1080             w1 = get_UWord( &ranges );
1081             w2 = w1 + get_ULEB128( &ranges );
1082             break;
1083          case DW_RLE_offset_pair:
1084             w1 = base + get_ULEB128( &ranges );
1085             w2 = base + get_ULEB128( &ranges );
1086             break;
1087          case DW_RLE_start_end:
1088             w1 = get_UWord ( &ranges );
1089             w2 = get_UWord ( &ranges );
1090             break;
1091          case DW_RLE_base_addressx:
1092             base = get_debug_addr_entry_rle( get_ULEB128( &ranges ), cc,
1093                                              DW_RLE_base_addressx );
1094             break;
1095          case DW_RLE_startx_endx:
1096             w1 = get_debug_addr_entry_rle( get_ULEB128( &ranges ), cc,
1097                                            DW_RLE_startx_endx );
1098             w2 = get_debug_addr_entry_rle( get_ULEB128( &ranges ), cc,
1099                                            DW_RLE_startx_endx );
1100             break;
1101          case DW_RLE_startx_length:
1102             w1 = get_debug_addr_entry_rle( get_ULEB128( &ranges ), cc,
1103                                            DW_RLE_startx_length );
1104             w2 = w1 + get_ULEB128( &ranges );
1105             break;
1106          default:
1107             cc->barf( "Unhandled or unknown range list entry" );
1108             done = True;
1109          }
1110          if (w1 > w2)
1111             cc->barf( "negative range in .debug_rnglists section" );
1112          if (w1 < w2) {
1113             pair.aMin = w1     + svma_of_referencing_CU;
1114             pair.aMax = w2 - 1 + svma_of_referencing_CU;
1115             vg_assert(pair.aMin <= pair.aMax);
1116             VG_(addToXA)( xa, &pair );
1117          }
1118       }
1119    }
1120    return xa;
1121 }
1122
1123 #define VARSZ_FORM 0xffffffff
1124 static UInt get_Form_szB (const CUConst* cc, DW_FORM form );
1125
1126 /* Initialises the hash table of abbreviations.  This only sets up the abbv
1127    Cursor and hash table, but does not try to read any abbrevs yes. The actual
1128    reading of abbrevs will be done by get_abbv by calling find_ht_abbvs on
1129    demand if a requested abbrev code isn't in the hash table yet. When using the
1130    inline parser a lot of abbrevs will not be needed so reading everything
1131    upfront will often waste time and memory.  */
1132 static void init_ht_abbvs (CUConst* cc, ULong debug_abbrev_offset,
1133                            Bool td3)
1134 {
1135    Cursor *c = &cc->abbv.c;
1136    init_Cursor( c, cc->debug_abbv, 0, cc->barf,
1137                "Overrun whilst parsing .debug_abbrev section(2)" );
1138    cc->abbv.ht_abbvs = VG_(HT_construct) ("di.readdwarf3.ht_abbvs");
1139    cc->abbv.debug_abbrev_offset = debug_abbrev_offset;
1140    cc->abbv.done = False;
1141 }
1142
1143 static g_abbv *find_ht_abbvs (CUConst* cc, ULong abbv_code,
1144                               Bool td3)
1145 {
1146    Cursor *c;
1147    g_abbv *ta; // temporary abbreviation, reallocated if needed.
1148    UInt ta_nf_maxE; // max nr of pairs in ta.nf[], doubled when reallocated.
1149    UInt ta_nf_n;    // nr of pairs in ta->nf that are initialised.
1150    g_abbv *ht_ta; // abbv to insert in hash table.
1151    Int i;
1152
1153    #define SZ_G_ABBV(_nf_szE) (sizeof(g_abbv) + _nf_szE * sizeof(name_form))
1154
1155    ta_nf_maxE = 10; // starting with enough for 9 pairs+terminating pair.
1156    ta = ML_(dinfo_zalloc) ("di.readdwarf3.ht_ta_nf", SZ_G_ABBV(ta_nf_maxE));
1157
1158    c = &cc->abbv.c;
1159    while (True) {
1160       ht_ta = NULL;
1161       ta->abbv_code = get_ULEB128( c );
1162       if (ta->abbv_code == 0) {
1163          cc->abbv.done = True;
1164          break; /* end of the table */
1165       }
1166
1167       ta->atag = get_ULEB128( c );
1168       ta->has_children = get_UChar( c );
1169       ta_nf_n = 0;
1170       while (True) {
1171          if (ta_nf_n >= ta_nf_maxE) {
1172             g_abbv *old_ta = ta;
1173             ta = ML_(dinfo_zalloc) ("di.readdwarf3.ht_ta_nf",
1174                                     SZ_G_ABBV(2 * ta_nf_maxE));
1175             ta_nf_maxE = 2 * ta_nf_maxE;
1176             VG_(memcpy) (ta, old_ta, SZ_G_ABBV(ta_nf_n));
1177             ML_(dinfo_free) (old_ta);
1178          }
1179          ta->nf[ta_nf_n].at_name = get_ULEB128( c );
1180          ta->nf[ta_nf_n].at_form = get_ULEB128( c );
1181          if (ta->nf[ta_nf_n].at_form == DW_FORM_implicit_const)
1182             ta->nf[ta_nf_n].at_val = get_SLEB128( c );
1183          if (ta->nf[ta_nf_n].at_name == 0 && ta->nf[ta_nf_n].at_form == 0) {
1184             ta_nf_n++;
1185             break;
1186          }
1187         ta_nf_n++;
1188       }
1189
1190       // Initialises the skip_szB/next_nf elements : an element at position
1191       // i must contain the sum of its own size + the sizes of all elements
1192       // following i till either the next variable size element, the next
1193       // sibling element or the end of the DIE.
1194       ta->nf[ta_nf_n - 1].skip_szB = 0;
1195       ta->nf[ta_nf_n - 1].next_nf = 0;
1196       for (i = ta_nf_n - 2; i >= 0; i--) {
1197          const UInt form_szB = get_Form_szB (cc, (DW_FORM)ta->nf[i].at_form);
1198
1199          if (ta->nf[i+1].at_name == DW_AT_sibling
1200              || ta->nf[i+1].skip_szB == VARSZ_FORM) {
1201             ta->nf[i].skip_szB = form_szB;
1202             ta->nf[i].next_nf  = i+1;
1203          } else if (form_szB == VARSZ_FORM) {
1204             ta->nf[i].skip_szB = form_szB;
1205             ta->nf[i].next_nf  = i+1;
1206          } else {
1207             ta->nf[i].skip_szB = ta->nf[i+1].skip_szB + form_szB;
1208             ta->nf[i].next_nf  = ta->nf[i+1].next_nf;
1209          }
1210       }
1211
1212       ht_ta = ML_(dinfo_zalloc) ("di.readdwarf3.ht_ta", SZ_G_ABBV(ta_nf_n));
1213       VG_(memcpy) (ht_ta, ta, SZ_G_ABBV(ta_nf_n));
1214       VG_(HT_add_node) ( cc->abbv.ht_abbvs, ht_ta );
1215       if (TD3) {
1216          TRACE_D3("  Adding abbv_code %lu TAG  %s [%s] nf %u ",
1217                   ht_ta->abbv_code, ML_(pp_DW_TAG)(ht_ta->atag),
1218                   ML_(pp_DW_children)(ht_ta->has_children),
1219                   ta_nf_n);
1220          TRACE_D3("  ");
1221          for (i = 0; i < ta_nf_n; i++)
1222             TRACE_D3("[%u,%u] ", ta->nf[i].skip_szB, ta->nf[i].next_nf);
1223          TRACE_D3("\n");
1224       }
1225       if (ht_ta->abbv_code == abbv_code)
1226          break;
1227    }
1228
1229    ML_(dinfo_free) (ta);
1230    #undef SZ_G_ABBV
1231
1232    return ht_ta;
1233 }
1234
1235 static g_abbv* get_abbv (CUConst* cc, ULong abbv_code,
1236                          Bool td3)
1237 {
1238    g_abbv *abbv;
1239
1240    abbv = VG_(HT_lookup) (cc->abbv.ht_abbvs, abbv_code);
1241    if (!abbv && !cc->abbv.done)
1242       abbv = find_ht_abbvs (cc, abbv_code, td3);
1243    if (!abbv)
1244       cc->barf ("abbv_code not found in ht_abbvs table");
1245
1246    return abbv;
1247 }
1248
1249 /* Parse the Compilation Unit header indicated at 'c' and
1250    initialise 'cc' accordingly. */
1251 static __attribute__((noinline))
1252 void parse_CU_Header ( /*OUT*/CUConst* cc,
1253                        Bool td3,
1254                        Cursor* c,
1255                        DiSlice escn_debug_abbv,
1256                        abbv_state last_abbv,
1257                        Bool type_unit,
1258                        Bool alt_info )
1259 {
1260    UChar  address_size, unit_type;
1261    ULong  debug_abbrev_offset;
1262
1263    VG_(memset)(cc, 0, sizeof(*cc));
1264    vg_assert(c && c->barf);
1265    cc->barf = c->barf;
1266
1267    /* initial_length field */
1268    cc->unit_length
1269       = get_Initial_Length( &cc->is_dw64, c,
1270            "parse_CU_Header: invalid initial-length field" );
1271
1272    TRACE_D3("   Length:        %llu\n", cc->unit_length );
1273
1274    /* version */
1275    cc->version = get_UShort( c );
1276    if (cc->version != 2 && cc->version != 3 && cc->version != 4
1277        && cc->version != 5)
1278       cc->barf( "parse_CU_Header: "
1279                 "is neither DWARF2 nor DWARF3 nor DWARF4 nor DWARF5" );
1280    TRACE_D3("   Version:       %d\n", (Int)cc->version );
1281
1282    /* unit type */
1283    if (cc->version >= 5) {
1284       unit_type = get_UChar( c );
1285       address_size = get_UChar( c );
1286    } else {
1287       unit_type = type_unit ? DW_UT_type : DW_UT_compile;
1288       address_size = 0; /* Will be read later. */
1289    }
1290
1291    /* debug_abbrev_offset */
1292    debug_abbrev_offset = get_Dwarfish_UWord( c, cc->is_dw64 );
1293    if (debug_abbrev_offset >= escn_debug_abbv.szB)
1294       cc->barf( "parse_CU_Header: invalid debug_abbrev_offset" );
1295    TRACE_D3("   Abbrev Offset: %llu\n", debug_abbrev_offset );
1296
1297    /* address size.  If this isn't equal to the host word size, just
1298       give up.  This makes it safe to assume elsewhere that
1299       DW_FORM_addr and DW_FORM_ref_addr can be treated as a host
1300       word. */
1301    if (cc->version < 5)
1302       address_size = get_UChar( c );
1303
1304    if (address_size != sizeof(void*))
1305       cc->barf( "parse_CU_Header: invalid address_size" );
1306    TRACE_D3("   Pointer Size:  %d\n", (Int)address_size );
1307
1308    cc->is_type_unit = type_unit;
1309    cc->is_alt_info = alt_info;
1310
1311    if (type_unit || (cc->version >= 5 && (unit_type == DW_UT_type
1312                                           || unit_type == DW_UT_split_type))) {
1313       cc->type_signature = get_ULong( c );
1314       cc->type_offset = get_Dwarfish_UWord( c, cc->is_dw64 );
1315    }
1316
1317    if (cc->version >= 5 && (unit_type == DW_UT_skeleton
1318                             || unit_type == DW_UT_split_compile)) {
1319       /* dwo_id = */ get_ULong( c );
1320    }
1321
1322    /* Set up cc->debug_abbv to point to the relevant table for this
1323       CU.  Set its .szB so that at least we can't read off the end of
1324       the debug_abbrev section -- potentially (and quite likely) too
1325       big, if this isn't the last table in the section, but at least
1326       it's safe.
1327
1328       This amounts to taking debug_abbv_escn and moving the start
1329       position along by debug_abbrev_offset bytes, hence forming a
1330       smaller DiSlice which has the same end point.  Since we checked
1331       just above that debug_abbrev_offset is less than the size of
1332       debug_abbv_escn, this should leave us with a nonempty slice. */
1333    vg_assert(debug_abbrev_offset < escn_debug_abbv.szB);
1334    cc->debug_abbv      = escn_debug_abbv;
1335    cc->debug_abbv.ioff += debug_abbrev_offset;
1336    cc->debug_abbv.szB  -= debug_abbrev_offset;
1337
1338    if (last_abbv.ht_abbvs != NULL
1339        && debug_abbrev_offset == last_abbv.debug_abbrev_offset) {
1340       cc->abbv = last_abbv;
1341    } else {
1342       if (last_abbv.ht_abbvs != NULL)
1343          VG_(HT_destruct) (last_abbv.ht_abbvs, ML_(dinfo_free));
1344       init_ht_abbvs(cc, debug_abbrev_offset, td3);
1345    }
1346 }
1347
1348 /* This represents a single signatured type.  It maps a type signature
1349    (a ULong) to a cooked DIE offset.  Objects of this type are stored
1350    in the type signature hash table.  */
1351 typedef
1352    struct D3SignatureType {
1353       struct D3SignatureType *next;
1354       UWord data;
1355       ULong type_signature;
1356       UWord die;
1357    }
1358    D3SignatureType;
1359
1360 /* Record a signatured type in the hash table.  */
1361 static void record_signatured_type ( VgHashTable *tab,
1362                                      ULong type_signature,
1363                                      UWord die )
1364 {
1365    D3SignatureType *dstype = ML_(dinfo_zalloc) ( "di.readdwarf3.sigtype",
1366                                                  sizeof(D3SignatureType) );
1367    dstype->data = (UWord) type_signature;
1368    dstype->type_signature = type_signature;
1369    dstype->die = die;
1370    VG_(HT_add_node) ( tab, dstype );
1371 }
1372
1373 /* Given a type signature hash table and a type signature, return the
1374    cooked DIE offset of the type.  If the type cannot be found, call
1375    BARF.  */
1376 static UWord lookup_signatured_type ( const VgHashTable *tab,
1377                                       ULong type_signature,
1378                                       void (*barf)( const HChar* ) __attribute__((noreturn)) )
1379 {
1380    D3SignatureType *dstype = VG_(HT_lookup) ( tab, (UWord) type_signature );
1381    /* This may be unwarranted chumminess with the hash table
1382       implementation.  */
1383    while ( dstype != NULL && dstype->type_signature != type_signature)
1384       dstype = dstype->next;
1385    if (dstype == NULL) {
1386       barf("lookup_signatured_type: could not find signatured type");
1387       /*NOTREACHED*/
1388       vg_assert(0);
1389    }
1390    return dstype->die;
1391 }
1392
1393
1394 /* Represents Form data.  If szB is 1/2/4/8 then the result is in the
1395    lowest 1/2/4/8 bytes of u.val.  If szB is zero or negative then the
1396    result is an image section beginning at u.cur and with size -szB.
1397    No other szB values are allowed. */
1398 typedef
1399    struct {
1400       Long szB; // 1, 2, 4, 8 or non-positive values only.
1401       union { ULong val; DiCursor cur; } u;
1402    }
1403    FormContents;
1404
1405 // Read data for get_Form_contents() from .debug_addr for the 'index' entry.
1406 static void get_Form_contents_addr( /*OUT*/FormContents* cts, DW_FORM form,
1407                                     ULong index, const CUConst* cc, Bool td3 )
1408 {
1409    cts->u.val = get_debug_addr_entry_form( index, cc, form );
1410    cts->szB   = sizeof(UWord);
1411    TRACE_D3("0x%lx", (UWord)cts->u.val);
1412 }
1413
1414 // Read data for get_Form_contents() from .debug_str for the given offset.
1415 static void get_Form_contents_str( /*OUT*/FormContents* cts, DW_FORM form,
1416                                     UWord offset, const CUConst* cc, Bool td3 )
1417 {
1418    if (!ML_(sli_is_valid)(cc->escn_debug_str)
1419        || offset >= cc->escn_debug_str.szB) {
1420       VG_(printf)(
1421          "get_Form_contents_str: %u (%s) points outside .debug_str\n",
1422          form, ML_(pp_DW_FORM)(form));
1423       cc->barf("get_Form_contents_str: index points outside .debug_str");
1424    }
1425    /* FIXME: check the entire string lies inside debug_str,
1426       not just the first byte of it. */
1427    DiCursor str
1428       = ML_(cur_plus)( ML_(cur_from_sli)(cc->escn_debug_str), offset );
1429    if (TD3) {
1430       HChar* tmp = ML_(cur_read_strdup)(str, "di.getFC.1");
1431       TRACE_D3("(indirect string, offset: 0x%lx): %s", offset, tmp);
1432       ML_(dinfo_free)(tmp);
1433    }
1434    cts->u.cur = str;
1435    cts->szB   = - (Long)(1 + (ULong)ML_(cur_strlen)(str));
1436 }
1437
1438 static inline UInt sizeof_Dwarfish_UWord (Bool is_dw64)
1439 {
1440    if (is_dw64)
1441       return sizeof(ULong);
1442    else
1443       return sizeof(UInt);
1444 }
1445
1446 // Read data for get_Form_contents() from .debug_str_offsets for the 'index' entry.
1447 static void get_Form_contents_str_offsets( /*OUT*/FormContents* cts, DW_FORM form,
1448                                     ULong index, const CUConst* cc, Bool td3 )
1449 {
1450    if(!cc->cu_has_str_offsets_base) {
1451       VG_(printf)(
1452          "get_Form_contents_str_offsets: %u (%s) without DW_AT_str_offsets_base\n",
1453          form, ML_(pp_DW_FORM)(form));
1454       cc->barf("get_Form_contents_str_offsets: DW_AT_str_offsets_base not set");
1455    }
1456    UWord str_offsets_pos = cc->cu_str_offsets_base
1457                            + index * sizeof_Dwarfish_UWord (cc->is_dw64);
1458    Cursor cur;
1459    init_Cursor( &cur, cc->escn_debug_str_offsets, str_offsets_pos, cc->barf,
1460                 "get_Form_contents_str_offsets: index "
1461                 "points outside .debug_str_offsets" );
1462    if (TD3) {
1463       HChar* tmp = ML_(cur_read_strdup)(get_DiCursor_from_Cursor(&cur), "di.getFC.1");
1464       TRACE_D3("(indirect string offset, offset: 0x%lx): %s", str_offsets_pos, tmp);
1465       ML_(dinfo_free)(tmp);
1466    }
1467    get_Form_contents_str( cts, form, get_Dwarfish_UWord(&cur, cc->is_dw64), cc, td3 );
1468 }
1469
1470 /* From 'c', get the Form data into 'cts'.  Either it gets a 1/2/4/8
1471    byte scalar value, or (a reference to) zero or more bytes starting
1472    at a DiCursor.*/
1473 static
1474 void get_Form_contents ( /*OUT*/FormContents* cts,
1475                          const CUConst* cc, Cursor* c,
1476                          Bool td3, const name_form *abbv )
1477 {
1478    DW_FORM form = abbv->at_form;
1479    VG_(bzero_inline)(cts, sizeof(*cts));
1480    // !!! keep switch in sync with get_Form_szB. The nr of characters read below
1481    // must be computed similarly in get_Form_szB.
1482    // The consistency is verified in trace_DIE.
1483    switch (form) {
1484       case DW_FORM_data1:
1485          cts->u.val = (ULong)(UChar)get_UChar(c);
1486          cts->szB   = 1;
1487          TRACE_D3("%u", (UInt)cts->u.val);
1488          break;
1489       case DW_FORM_data2:
1490          cts->u.val = (ULong)(UShort)get_UShort(c);
1491          cts->szB   = 2;
1492          TRACE_D3("%u", (UInt)cts->u.val);
1493          break;
1494       case DW_FORM_data4:
1495          cts->u.val = (ULong)(UInt)get_UInt(c);
1496          cts->szB   = 4;
1497          TRACE_D3("%u", (UInt)cts->u.val);
1498          break;
1499       case DW_FORM_data8:
1500          cts->u.val = get_ULong(c);
1501          cts->szB   = 8;
1502          TRACE_D3("%llu", cts->u.val);
1503          break;
1504      case DW_FORM_data16: {
1505          /* This is more like a block than an integral value.  */
1506          ULong    u64b;
1507          DiCursor data16 = get_DiCursor_from_Cursor(c);
1508          TRACE_D3("data16: ");
1509          for (u64b = 16; u64b > 0; u64b--) {
1510             UChar u8 = get_UChar(c);
1511             TRACE_D3("%x ", (UInt)u8);
1512          }
1513          cts->u.cur = data16;
1514          cts->szB   = - (Long)16;
1515          break;
1516          }
1517       case DW_FORM_sec_offset:
1518          cts->u.val = (ULong)get_Dwarfish_UWord( c, cc->is_dw64 );
1519          cts->szB   = cc->is_dw64 ? 8 : 4;
1520          TRACE_D3("%llu", cts->u.val);
1521          break;
1522       case DW_FORM_rnglistx: {
1523          if(!cc->cu_has_rnglists_base) {
1524             cc->barf("get_Form_contents: DW_FORM_rnglistsx"
1525                      " without DW_AT_rnglists_base");
1526          }
1527          /* Convert index to offset pointing to the offsets list. */
1528          ULong index = get_ULEB128(c);
1529          ULong offset_to_offset = cc->cu_rnglists_base + index * sizeof_Dwarfish_UWord( cc->is_dw64 );
1530          /* And read the offset value from there. */
1531          Cursor cur;
1532          init_Cursor( &cur, cc->escn_debug_rnglists, offset_to_offset, cc->barf,
1533                       "get_Form_contents: index points outside .debug_rnglists" );
1534          cts->u.val = cc->cu_rnglists_base + get_Dwarfish_UWord(&cur, cc->is_dw64);
1535          cts->szB   = 8;
1536          TRACE_D3("%llu", cts->u.val);
1537          break;
1538       }
1539       case DW_FORM_loclistx: {
1540          if(!cc->cu_has_loclists_base) {
1541             cc->barf("get_Form_contents: DW_FORM_loclistsx"
1542                      " without DW_AT_loclists_base");
1543          }
1544          /* Convert index to offset pointing to the offsets list. */
1545          ULong index = get_ULEB128(c);
1546          ULong offset_to_offset = cc->cu_loclists_base + index * sizeof_Dwarfish_UWord( cc->is_dw64 );
1547          /* And read the offset value from there. */
1548          Cursor cur;
1549          init_Cursor( &cur, cc->escn_debug_loclists, offset_to_offset, cc->barf,
1550                       "get_Form_contents: index points outside .debug_loclists" );
1551          cts->u.val = cc->cu_loclists_base + get_Dwarfish_UWord(&cur, cc->is_dw64);
1552          cts->szB   = 8;
1553          TRACE_D3("%llu", cts->u.val);
1554          break;
1555       }
1556       case DW_FORM_sdata:
1557          cts->u.val = (ULong)(Long)get_SLEB128(c);
1558          cts->szB   = 8;
1559          TRACE_D3("%llu", cts->u.val);
1560          break;
1561       case DW_FORM_udata:
1562          cts->u.val = (ULong)(Long)get_ULEB128(c);
1563          cts->szB   = 8;
1564          TRACE_D3("%llu", cts->u.val);
1565          break;
1566       case DW_FORM_addr:
1567          /* note, this is a hack.  DW_FORM_addr is defined as getting
1568             a word the size of the target machine as defined by the
1569             address_size field in the CU Header.  However,
1570             parse_CU_Header() rejects all inputs except those for
1571             which address_size == sizeof(Word), hence we can just
1572             treat it as a (host) Word.  */
1573          cts->u.val = (ULong)(UWord)get_UWord(c);
1574          cts->szB   = sizeof(UWord);
1575          TRACE_D3("0x%lx", (UWord)cts->u.val);
1576          break;
1577
1578       case DW_FORM_ref_addr:
1579          /* We make the same word-size assumption as DW_FORM_addr. */
1580          /* What does this really mean?  From D3 Sec 7.5.4,
1581             description of "reference", it would appear to reference
1582             some other DIE, by specifying the offset from the
1583             beginning of a .debug_info section.  The D3 spec mentions
1584             that this might be in some other shared object and
1585             executable.  But I don't see how the name of the other
1586             object/exe is specified.
1587
1588             At least for the DW_FORM_ref_addrs created by icc11, the
1589             references seem to be within the same object/executable.
1590             So for the moment we merely range-check, to see that they
1591             actually do specify a plausible offset within this
1592             object's .debug_info, and return the value unchanged.
1593
1594             In DWARF 2, DW_FORM_ref_addr is address-sized, but in
1595             DWARF 3 and later, it is offset-sized.
1596          */
1597          if (cc->version == 2) {
1598             cts->u.val = (ULong)(UWord)get_UWord(c);
1599             cts->szB   = sizeof(UWord);
1600          } else {
1601             cts->u.val = get_Dwarfish_UWord(c, cc->is_dw64);
1602             cts->szB   = cc->is_dw64 ? sizeof(ULong) : sizeof(UInt);
1603          }
1604          TRACE_D3("0x%lx", (UWord)cts->u.val);
1605          if (0) VG_(printf)("DW_FORM_ref_addr 0x%lx\n", (UWord)cts->u.val);
1606          if (/* the following is surely impossible, but ... */
1607              !ML_(sli_is_valid)(cc->escn_debug_info)
1608              || cts->u.val >= (ULong)cc->escn_debug_info.szB) {
1609             /* Hmm.  Offset is nonsensical for this object's .debug_info
1610                section.  Be safe and reject it. */
1611             cc->barf("get_Form_contents: DW_FORM_ref_addr points "
1612                      "outside .debug_info");
1613          }
1614          break;
1615
1616       case DW_FORM_strp: {
1617          /* this is an offset into .debug_str */
1618          UWord uw = (UWord)get_Dwarfish_UWord( c, cc->is_dw64 );
1619          get_Form_contents_str( cts, form, uw, cc, td3 );
1620          break;
1621       }
1622       case DW_FORM_line_strp: {
1623          /* this is an offset into .debug_line_str */
1624          UWord uw = (UWord)get_Dwarfish_UWord( c, cc->is_dw64 );
1625          if (!ML_(sli_is_valid)(cc->escn_debug_line_str)
1626              || uw >= cc->escn_debug_line_str.szB)
1627             cc->barf("get_Form_contents: DW_FORM_line_strp "
1628                      "points outside .debug_line_str");
1629          /* FIXME: check the entire string lies inside debug_line_str,
1630             not just the first byte of it. */
1631          DiCursor line_str
1632             = ML_(cur_plus)( ML_(cur_from_sli)(cc->escn_debug_line_str), uw );
1633          if (TD3) {
1634             HChar* tmp = ML_(cur_read_strdup)(line_str, "di.getFC.1.5");
1635             TRACE_D3("(indirect line string, offset: 0x%lx): %s", uw, tmp);
1636             ML_(dinfo_free)(tmp);
1637          }
1638          cts->u.cur = line_str;
1639          cts->szB   = - (Long)(1 + (ULong)ML_(cur_strlen)(line_str));
1640          break;
1641       }
1642       case DW_FORM_string: {
1643          DiCursor str = get_AsciiZ(c);
1644          if (TD3) {
1645             HChar* tmp = ML_(cur_read_strdup)(str, "di.getFC.2");
1646             TRACE_D3("%s", tmp);
1647             ML_(dinfo_free)(tmp);
1648          }
1649          cts->u.cur = str;
1650          /* strlen is safe because get_AsciiZ already 'vetted' the
1651             entire string */
1652          cts->szB   = - (Long)(1 + (ULong)ML_(cur_strlen)(str));
1653          break;
1654       }
1655       case DW_FORM_ref1: {
1656          UChar u8   = get_UChar(c);
1657          UWord res  = cc->cu_start_offset + (UWord)u8;
1658          cts->u.val = (ULong)res;
1659          cts->szB   = sizeof(UWord);
1660          TRACE_D3("<%lx>", res);
1661          break;
1662       }
1663       case DW_FORM_ref2: {
1664          UShort u16 = get_UShort(c);
1665          UWord  res = cc->cu_start_offset + (UWord)u16;
1666          cts->u.val = (ULong)res;
1667          cts->szB   = sizeof(UWord);
1668          TRACE_D3("<%lx>", res);
1669          break;
1670       }
1671       case DW_FORM_ref4: {
1672          UInt  u32  = get_UInt(c);
1673          UWord res  = cc->cu_start_offset + (UWord)u32;
1674          cts->u.val = (ULong)res;
1675          cts->szB   = sizeof(UWord);
1676          TRACE_D3("<%lx>", res);
1677          break;
1678       }
1679       case DW_FORM_ref8: {
1680          ULong u64  = get_ULong(c);
1681          UWord res  = cc->cu_start_offset + (UWord)u64;
1682          cts->u.val = (ULong)res;
1683          cts->szB   = sizeof(UWord);
1684          TRACE_D3("<%lx>", res);
1685          break;
1686       }
1687       case DW_FORM_ref_udata: {
1688          ULong u64  = get_ULEB128(c);
1689          UWord res  = cc->cu_start_offset + (UWord)u64;
1690          cts->u.val = (ULong)res;
1691          cts->szB   = sizeof(UWord);
1692          TRACE_D3("<%lx>", res);
1693          break;
1694       }
1695       case DW_FORM_flag: {
1696          UChar u8 = get_UChar(c);
1697          TRACE_D3("%u", (UInt)u8);
1698          cts->u.val = (ULong)u8;
1699          cts->szB   = 1;
1700          break;
1701       }
1702       case DW_FORM_flag_present:
1703          TRACE_D3("1");
1704          cts->u.val = 1;
1705          cts->szB   = 1;
1706          break;
1707       case DW_FORM_implicit_const:
1708          cts->u.val = (ULong)abbv->at_val;
1709          cts->szB   = 8;
1710          TRACE_D3("%llu", cts->u.val);
1711          break;
1712       case DW_FORM_block1: {
1713          ULong    u64b;
1714          ULong    u64   = (ULong)get_UChar(c);
1715          DiCursor block = get_DiCursor_from_Cursor(c);
1716          TRACE_D3("%llu byte block: ", u64);
1717          for (u64b = u64; u64b > 0; u64b--) {
1718             UChar u8 = get_UChar(c);
1719             TRACE_D3("%x ", (UInt)u8);
1720          }
1721          cts->u.cur = block;
1722          cts->szB   = - (Long)u64;
1723          break;
1724       }
1725       case DW_FORM_block2: {
1726          ULong    u64b;
1727          ULong    u64   = (ULong)get_UShort(c);
1728          DiCursor block = get_DiCursor_from_Cursor(c);
1729          TRACE_D3("%llu byte block: ", u64);
1730          for (u64b = u64; u64b > 0; u64b--) {
1731             UChar u8 = get_UChar(c);
1732             TRACE_D3("%x ", (UInt)u8);
1733          }
1734          cts->u.cur = block;
1735          cts->szB   = - (Long)u64;
1736          break;
1737       }
1738       case DW_FORM_block4: {
1739          ULong    u64b;
1740          ULong    u64   = (ULong)get_UInt(c);
1741          DiCursor block = get_DiCursor_from_Cursor(c);
1742          TRACE_D3("%llu byte block: ", u64);
1743          for (u64b = u64; u64b > 0; u64b--) {
1744             UChar u8 = get_UChar(c);
1745             TRACE_D3("%x ", (UInt)u8);
1746          }
1747          cts->u.cur = block;
1748          cts->szB   = - (Long)u64;
1749          break;
1750       }
1751       case DW_FORM_exprloc:
1752       case DW_FORM_block: {
1753          ULong    u64b;
1754          ULong    u64   = (ULong)get_ULEB128(c);
1755          DiCursor block = get_DiCursor_from_Cursor(c);
1756          TRACE_D3("%llu byte block: ", u64);
1757          for (u64b = u64; u64b > 0; u64b--) {
1758             UChar u8 = get_UChar(c);
1759             TRACE_D3("%x ", (UInt)u8);
1760          }
1761          cts->u.cur = block;
1762          cts->szB   = - (Long)u64;
1763          break;
1764       }
1765       case DW_FORM_ref_sig8: {
1766          ULong  u64b;
1767          ULong  signature = get_ULong (c);
1768          ULong  work = signature;
1769          TRACE_D3("8 byte signature: ");
1770          for (u64b = 8; u64b > 0; u64b--) {
1771             UChar u8 = work & 0xff;
1772             TRACE_D3("%x ", (UInt)u8);
1773             work >>= 8;
1774          }
1775
1776          /* cc->signature_types is only built/initialised when
1777             VG_(clo_read_var_info) is set. In this case,
1778             the DW_FORM_ref_sig8 can be looked up.
1779             But we can also arrive here when only reading inline info
1780             and VG_(clo_trace_symtab) is set. In such a case,
1781             we cannot lookup the DW_FORM_ref_sig8, we rather assign
1782             a dummy value. This is a kludge, but otherwise,
1783             the 'dwarf inline info reader' tracing would have to
1784             do type processing/reading. It is better to avoid
1785             adding significant 'real' processing only due to tracing. */
1786          if (VG_(clo_read_var_info)) {
1787             /* Due to the way that the hash table is constructed, the
1788                resulting DIE offset here is already "cooked".  See
1789                cook_die_using_form.  */
1790             cts->u.val = lookup_signatured_type (cc->signature_types, signature,
1791                                                  c->barf);
1792          } else {
1793             vg_assert (td3);
1794             vg_assert (VG_(clo_read_inline_info));
1795             TRACE_D3("<not dereferencing signature type>");
1796             cts->u.val = 0; /* Assign a dummy/rubbish value */
1797          }
1798          cts->szB   = sizeof(UWord);
1799          break;
1800       }
1801       case DW_FORM_indirect: {
1802          /* Urgh, this is ugly and somewhat unclear how it works
1803             with DW_FORM_implicit_const. HACK.  */
1804          name_form nfi = *abbv;
1805          nfi.at_form = (DW_FORM)get_ULEB128(c);
1806          get_Form_contents (cts, cc, c, td3, &nfi);
1807          return;
1808       }
1809
1810       case DW_FORM_GNU_ref_alt:
1811          cts->u.val = get_Dwarfish_UWord(c, cc->is_dw64);
1812          cts->szB   = cc->is_dw64 ? sizeof(ULong) : sizeof(UInt);
1813          TRACE_D3("0x%lx", (UWord)cts->u.val);
1814          if (0) VG_(printf)("DW_FORM_GNU_ref_alt 0x%lx\n", (UWord)cts->u.val);
1815          if (/* the following is surely impossible, but ... */
1816              !ML_(sli_is_valid)(cc->escn_debug_info_alt))
1817             cc->barf("get_Form_contents: DW_FORM_GNU_ref_addr used, "
1818                      "but no alternate .debug_info");
1819          else if (cts->u.val >= (ULong)cc->escn_debug_info_alt.szB) {
1820             /* Hmm.  Offset is nonsensical for this object's .debug_info
1821                section.  Be safe and reject it. */
1822             cc->barf("get_Form_contents: DW_FORM_GNU_ref_addr points "
1823                      "outside alternate .debug_info");
1824          }
1825          break;
1826
1827       case DW_FORM_GNU_strp_alt: {
1828          /* this is an offset into alternate .debug_str */
1829          SizeT uw = (UWord)get_Dwarfish_UWord( c, cc->is_dw64 );
1830          if (!ML_(sli_is_valid)(cc->escn_debug_str_alt))
1831             cc->barf("get_Form_contents: DW_FORM_GNU_strp_alt used, "
1832                      "but no alternate .debug_str");
1833          else if (uw >= cc->escn_debug_str_alt.szB)
1834             cc->barf("get_Form_contents: DW_FORM_GNU_strp_alt "
1835                      "points outside alternate .debug_str");
1836          /* FIXME: check the entire string lies inside debug_str,
1837             not just the first byte of it. */
1838          DiCursor str
1839             = ML_(cur_plus)( ML_(cur_from_sli)(cc->escn_debug_str_alt), uw);
1840          if (TD3) {
1841             HChar* tmp = ML_(cur_read_strdup)(str, "di.getFC.3");
1842             TRACE_D3("(indirect alt string, offset: 0x%lx): %s", uw, tmp);
1843             ML_(dinfo_free)(tmp);
1844          }
1845          cts->u.cur = str;
1846          cts->szB   = - (Long)(1 + (ULong)ML_(cur_strlen)(str));
1847          break;
1848       }
1849
1850       case DW_FORM_addrx: {
1851          /* this is an offset into .debug_addr */
1852          ULong index = (ULong)(Long)get_ULEB128(c);
1853          get_Form_contents_addr(cts, form, index, cc, td3);
1854          break;
1855       }
1856       case DW_FORM_addrx1: {
1857          /* this is an offset into .debug_addr */
1858          ULong index = (ULong)get_UChar(c);
1859          get_Form_contents_addr(cts, form, index, cc, td3);
1860          break;
1861       }
1862       case DW_FORM_addrx2: {
1863          /* this is an offset into .debug_addr */
1864          ULong index = (ULong)get_UShort(c);
1865          get_Form_contents_addr(cts, form, index, cc, td3);
1866          break;
1867       }
1868       case DW_FORM_addrx3: {
1869          /* this is an offset into .debug_addr */
1870          ULong index = (ULong)get_UInt3(c);
1871          get_Form_contents_addr(cts, form, index, cc, td3);
1872          break;
1873       }
1874       case DW_FORM_addrx4: {
1875          /* this is an offset into .debug_addr */
1876          ULong index = (ULong)get_UInt(c);
1877          get_Form_contents_addr(cts, form, index, cc, td3);
1878          break;
1879       }
1880       case DW_FORM_strx: {
1881          /* this is an offset into .debug_str_offsets */
1882          ULong index = (ULong)(Long)get_ULEB128(c);
1883          get_Form_contents_str_offsets(cts, form, index, cc, td3);
1884          break;
1885       }
1886       case DW_FORM_strx1: {
1887          /* this is an offset into .debug_str_offsets */
1888          ULong index = get_UChar(c);
1889          get_Form_contents_str_offsets(cts, form, index, cc, td3);
1890          break;
1891       }
1892       case DW_FORM_strx2: {
1893          /* this is an offset into .debug_str_offsets */
1894          ULong index = (ULong)get_UShort(c);
1895          get_Form_contents_str_offsets(cts, form, index, cc, td3);
1896          break;
1897       }
1898       case DW_FORM_strx3: {
1899          /* this is an offset into .debug_str_offsets */
1900          ULong index = (ULong)get_UInt3(c);
1901          get_Form_contents_str_offsets(cts, form, index, cc, td3);
1902          break;
1903       }
1904       case DW_FORM_strx4: {
1905          /* this is an offset into .debug_str_offsets */
1906          ULong index = (ULong)get_UInt(c);
1907          get_Form_contents_str_offsets(cts, form, index, cc, td3);
1908          break;
1909       }
1910
1911       default:
1912          VG_(printf)(
1913             "get_Form_contents: unhandled %u (%s) at <%llx>\n",
1914             form, ML_(pp_DW_FORM)(form), get_position_of_Cursor(c));
1915          c->barf("get_Form_contents: unhandled DW_FORM");
1916    }
1917 }
1918
1919 #define VARSZ_FORM 0xffffffff
1920 /* If the form is a fixed length form, return the nr of bytes for this form.
1921    If the form is a variable length form, return VARSZ_FORM. */
1922 static
1923 UInt get_Form_szB (const CUConst* cc, DW_FORM form )
1924 {
1925    // !!! keep switch in sync with get_Form_contents : the nr of bytes
1926    // read from a cursor by get_Form_contents must be returned by
1927    // the below switch.
1928    // The consistency is verified in trace_DIE.
1929    switch (form) {
1930       case DW_FORM_data1: return 1;
1931       case DW_FORM_data2: return 2;
1932       case DW_FORM_data4: return 4;
1933       case DW_FORM_data8: return 8;
1934       case DW_FORM_data16: return 16;
1935       case DW_FORM_sec_offset:
1936          if (cc->is_dw64)
1937             return 8;
1938          else
1939             return 4;
1940       case DW_FORM_rnglistx:
1941       case DW_FORM_loclistx:
1942          return VARSZ_FORM;
1943       case DW_FORM_sdata:
1944          return VARSZ_FORM;
1945       case DW_FORM_udata:
1946          return VARSZ_FORM;
1947       case DW_FORM_addr: // See hack in get_Form_contents
1948          return sizeof(UWord);
1949       case DW_FORM_ref_addr: // See hack in get_Form_contents
1950          if (cc->version == 2)
1951             return sizeof(UWord);
1952          else
1953             return sizeof_Dwarfish_UWord (cc->is_dw64);
1954       case DW_FORM_strp:
1955       case DW_FORM_line_strp:
1956          return sizeof_Dwarfish_UWord (cc->is_dw64);
1957       case DW_FORM_string:
1958          return VARSZ_FORM;
1959       case DW_FORM_ref1:
1960          return 1;
1961       case DW_FORM_ref2:
1962          return 2;
1963       case DW_FORM_ref4:
1964          return 4;
1965       case DW_FORM_ref8:
1966          return 8;
1967       case DW_FORM_ref_udata:
1968          return VARSZ_FORM;
1969       case DW_FORM_flag:
1970          return 1;
1971       case DW_FORM_flag_present:
1972          return 0; // !!! special case, no data.
1973       case DW_FORM_block1:
1974          return VARSZ_FORM;
1975       case DW_FORM_block2:
1976          return VARSZ_FORM;
1977       case DW_FORM_block4:
1978          return VARSZ_FORM;
1979       case DW_FORM_exprloc:
1980       case DW_FORM_block:
1981          return VARSZ_FORM;
1982       case DW_FORM_ref_sig8:
1983          return 8;
1984       case DW_FORM_indirect:
1985          return VARSZ_FORM;
1986       case DW_FORM_GNU_ref_alt:
1987          return sizeof_Dwarfish_UWord(cc->is_dw64);
1988       case DW_FORM_GNU_strp_alt:
1989          return sizeof_Dwarfish_UWord(cc->is_dw64);
1990       case DW_FORM_implicit_const:
1991          return 0; /* Value inside abbrev. */
1992       case DW_FORM_addrx:
1993          return VARSZ_FORM;
1994       case DW_FORM_strx:
1995          return VARSZ_FORM;
1996       case DW_FORM_addrx1:
1997       case DW_FORM_strx1:
1998          return 1;
1999       case DW_FORM_addrx2:
2000       case DW_FORM_strx2:
2001          return 2;
2002       case DW_FORM_addrx3:
2003       case DW_FORM_strx3:
2004          return 3;
2005       case DW_FORM_addrx4:
2006       case DW_FORM_strx4:
2007          return 4;
2008       default:
2009          VG_(printf)(
2010             "get_Form_szB: unhandled %u (%s)\n",
2011             form, ML_(pp_DW_FORM)(form));
2012          cc->barf("get_Form_contents: unhandled DW_FORM");
2013    }
2014 }
2015
2016 /* Skip a DIE as described by abbv.
2017    If the DIE has a sibling, *sibling is set to the skipped DIE sibling value. */
2018 static
2019 void skip_DIE (UWord  *sibling,
2020                Cursor* c_die,
2021                const g_abbv *abbv,
2022                const CUConst* cc)
2023 {
2024    UInt nf_i;
2025    FormContents cts;
2026    nf_i = 0;
2027    while (True) {
2028       if (abbv->nf[nf_i].at_name == DW_AT_sibling) {
2029          get_Form_contents( &cts, cc, c_die, False /*td3*/,
2030                             &abbv->nf[nf_i] );
2031          if ( cts.szB > 0 )
2032             *sibling = cts.u.val;
2033          nf_i++;
2034       } else if (abbv->nf[nf_i].skip_szB == VARSZ_FORM) {
2035          DW_FORM form = abbv->nf[nf_i].at_form;
2036          if(form == DW_FORM_addrx || form == DW_FORM_strx
2037             || form == DW_FORM_rnglistx || form == DW_FORM_loclistx) {
2038             /* Skip without interpreting them, they may depend on e.g.
2039                DW_AT_addr_base that has not been read yet. */
2040             (void) get_ULEB128(c_die);
2041          } else
2042             get_Form_contents( &cts, cc, c_die, False /*td3*/,
2043                                &abbv->nf[nf_i] );
2044          nf_i++;
2045       } else {
2046          advance_position_of_Cursor (c_die, (ULong)abbv->nf[nf_i].skip_szB);
2047          nf_i = abbv->nf[nf_i].next_nf;
2048       }
2049       if (nf_i == 0)
2050          break;
2051    }
2052 }
2053
2054
2055 /*------------------------------------------------------------*/
2056 /*---                                                      ---*/
2057 /*--- Parsing of variable-related DIEs                     ---*/
2058 /*---                                                      ---*/
2059 /*------------------------------------------------------------*/
2060
2061 typedef
2062    struct _TempVar {
2063       const HChar*  name; /* in DebugInfo's .strpool */
2064       /* Represent ranges economically.  nRanges is the number of
2065          ranges.  Cases:
2066          0: .rngOneMin .rngOneMax .manyRanges are all zero
2067          1: .rngOneMin .rngOneMax hold the range; .rngMany is NULL
2068          2: .rngOneMin .rngOneMax are zero; .rngMany holds the ranges.
2069          This is merely an optimisation to avoid having to allocate
2070          and free the XArray in the common (98%) of cases where there
2071          is zero or one address ranges. */
2072       UWord   nRanges;
2073       Addr    rngOneMin;
2074       Addr    rngOneMax;
2075       XArray* rngMany; /* of AddrRange.  NON-UNIQUE PTR in AR_DINFO. */
2076       /* Do not free .rngMany, since many TempVars will have the same
2077          value.  Instead the associated storage is to be freed by
2078          deleting 'rangetree', which stores a single copy of each
2079          range. */
2080       /* --- */
2081       Int     level;
2082       UWord   typeR; /* a cuOff */
2083       GExpr*  gexpr; /* for this variable */
2084       GExpr*  fbGX;  /* to find the frame base of the enclosing fn, if
2085                         any */
2086       UInt    fndn_ix; /* declaring file/dirname index in fndnpool, or 0 */
2087       Int     fLine; /* declaring file line number, or zero */
2088       /* offset in .debug_info, so that abstract instances can be
2089          found to satisfy references from concrete instances. */
2090       UWord   dioff;
2091       UWord   absOri; /* so the absOri fields refer to dioff fields
2092                          in some other, related TempVar. */
2093    }
2094    TempVar;
2095
2096 typedef
2097    struct {
2098       /* Contains the range stack: a stack of address ranges, one
2099          stack entry for each nested scope.
2100
2101          Some scope entries are created by function definitions
2102          (DW_AT_subprogram), and for those, we also note the GExpr
2103          derived from its DW_AT_frame_base attribute, if any.
2104          Consequently it should be possible to find, for any
2105          variable's DIE, the GExpr for the containing function's
2106          DW_AT_frame_base by scanning back through the stack to find
2107          the nearest entry associated with a function.  This somewhat
2108          elaborate scheme is provided so as to make it possible to
2109          obtain the correct DW_AT_frame_base expression even in the
2110          presence of nested functions (or to be more precise, in the
2111          presence of nested DW_AT_subprogram DIEs).
2112       */
2113       Int     sp; /* [sp] is innermost active entry; sp==-1 for empty
2114                      stack */
2115       Int     stack_size;
2116       XArray **ranges; /* XArray of AddrRange */
2117       Int     *level;  /* D3 DIE levels */
2118       Bool    *isFunc; /* from DW_AT_subprogram? */
2119       GExpr  **fbGX;   /* if isFunc, contains the FB expr, else NULL */
2120    }
2121    D3VarParser;
2122
2123 /* Completely initialise a variable parser object */
2124 static void
2125 var_parser_init ( D3VarParser *parser )
2126 {
2127    parser->sp = -1;
2128    parser->stack_size = 0;
2129    parser->ranges = NULL;
2130    parser->level  = NULL;
2131    parser->isFunc = NULL;
2132    parser->fbGX = NULL;
2133 }
2134
2135 /* Release any memory hanging off a variable parser object */
2136 static void
2137 var_parser_release ( D3VarParser *parser )
2138 {
2139    ML_(dinfo_free)( parser->ranges );
2140    ML_(dinfo_free)( parser->level );
2141    ML_(dinfo_free)( parser->isFunc );
2142    ML_(dinfo_free)( parser->fbGX );
2143 }
2144
2145 static void varstack_show ( const D3VarParser* parser, const HChar* str )
2146 {
2147    Word i, j;
2148    VG_(printf)("  varstack (%s) {\n", str);
2149    for (i = 0; i <= parser->sp; i++) {
2150       XArray* xa = parser->ranges[i];
2151       vg_assert(xa);
2152       VG_(printf)("    [%ld] (level %d)", i, parser->level[i]);
2153       if (parser->isFunc[i]) {
2154          VG_(printf)(" (fbGX=%p)", parser->fbGX[i]);
2155       } else {
2156          vg_assert(parser->fbGX[i] == NULL);
2157       }
2158       VG_(printf)(": ");
2159       if (VG_(sizeXA)( xa ) == 0) {
2160          VG_(printf)("** empty PC range array **");
2161       } else {
2162          for (j = 0; j < VG_(sizeXA)( xa ); j++) {
2163             AddrRange* range = (AddrRange*) VG_(indexXA)( xa, j );
2164             vg_assert(range);
2165             VG_(printf)("[%#lx,%#lx] ", range->aMin, range->aMax);
2166          }
2167       }
2168       VG_(printf)("\n");
2169    }
2170    VG_(printf)("  }\n");
2171 }
2172
2173 /* Remove from the stack, all entries with .level > 'level' */
2174 static
2175 void varstack_preen ( D3VarParser* parser, Bool td3, Int level )
2176 {
2177    Bool changed = False;
2178    vg_assert(parser->sp < parser->stack_size);
2179    while (True) {
2180       vg_assert(parser->sp >= -1);
2181       if (parser->sp == -1) break;
2182       if (parser->level[parser->sp] <= level) break;
2183       if (0)
2184          TRACE_D3("BBBBAAAA varstack_pop [newsp=%d]\n", parser->sp-1);
2185       vg_assert(parser->ranges[parser->sp]);
2186       /* Who allocated this xa?  get_range_list() or
2187          unitary_range_list(). */
2188       VG_(deleteXA)( parser->ranges[parser->sp] );
2189       parser->sp--;
2190       changed = True;
2191    }
2192    if (changed && td3)
2193       varstack_show( parser, "after preen" );
2194 }
2195
2196 static void varstack_push ( const CUConst* cc,
2197                             D3VarParser* parser,
2198                             Bool td3,
2199                             XArray* ranges, Int level,
2200                             Bool    isFunc, GExpr* fbGX ) {
2201    if (0)
2202    TRACE_D3("BBBBAAAA varstack_push[newsp=%d]: %d  %p\n",
2203             parser->sp+1, level, ranges);
2204
2205    /* First we need to zap everything >= 'level', as we are about to
2206       replace any previous entry at 'level', so .. */
2207    varstack_preen(parser, /*td3*/False, level-1);
2208
2209    vg_assert(parser->sp >= -1);
2210    vg_assert(parser->sp < parser->stack_size);
2211    if (parser->sp == parser->stack_size - 1) {
2212       parser->stack_size += 48;
2213       parser->ranges =
2214          ML_(dinfo_realloc)("di.readdwarf3.varpush.1", parser->ranges,
2215                             parser->stack_size * sizeof parser->ranges[0]);
2216       parser->level =
2217          ML_(dinfo_realloc)("di.readdwarf3.varpush.2", parser->level,
2218                             parser->stack_size * sizeof parser->level[0]);
2219       parser->isFunc =
2220          ML_(dinfo_realloc)("di.readdwarf3.varpush.3", parser->isFunc,
2221                             parser->stack_size * sizeof parser->isFunc[0]);
2222       parser->fbGX =
2223          ML_(dinfo_realloc)("di.readdwarf3.varpush.4", parser->fbGX,
2224                             parser->stack_size * sizeof parser->fbGX[0]);
2225    }
2226    if (parser->sp >= 0)
2227       vg_assert(parser->level[parser->sp] < level);
2228    parser->sp++;
2229    vg_assert(ranges != NULL);
2230    if (!isFunc) vg_assert(fbGX == NULL);
2231    parser->ranges[parser->sp] = ranges;
2232    parser->level[parser->sp]  = level;
2233    parser->isFunc[parser->sp] = isFunc;
2234    parser->fbGX[parser->sp]   = fbGX;
2235    if (TD3)
2236       varstack_show( parser, "after push" );
2237 }
2238
2239
2240 /* cts is derived from a DW_AT_location and so refers either to a
2241    location expression or to a location list.  Figure out which, and
2242    in both cases bundle the expression or location list into a
2243    so-called GExpr (guarded expression). */
2244 __attribute__((noinline))
2245 static GExpr* get_GX ( const CUConst* cc, Bool td3, const FormContents* cts )
2246 {
2247    GExpr* gexpr = NULL;
2248    if (cts->szB < 0) {
2249       /* represents a non-empty in-line location expression, and
2250          cts->u.cur points at the image bytes */
2251       gexpr = make_singleton_GX( cts->u.cur, (ULong)(- cts->szB) );
2252    }
2253    else
2254    if (cts->szB > 0) {
2255       /* represents a location list.  cts->u.val is the offset of it
2256          in .debug_loc. */
2257       if (!cc->cu_svma_known)
2258          cc->barf("get_GX: location list, but CU svma is unknown");
2259       gexpr = make_general_GX( cc, td3, cts->u.val, cc->cu_svma );
2260    }
2261    else {
2262       vg_assert(0); /* else caller is bogus */
2263    }
2264    return gexpr;
2265 }
2266
2267 static
2268 HChar * get_line_str (struct _DebugInfo* di, Bool is_dw64,
2269                       Cursor *data, const UInt form,
2270                       DiSlice debugstr_img, DiSlice debuglinestr_img)
2271 {
2272    HChar *str = NULL;
2273    switch (form) {
2274    case DW_FORM_string: {
2275       DiCursor distr = get_AsciiZ(data);
2276       str = ML_(cur_step_strdup)(&distr, "di.gls.string");
2277       break;
2278    }
2279    case DW_FORM_strp: {
2280       UWord uw = (UWord)get_Dwarfish_UWord( data, is_dw64 );
2281       DiCursor distr
2282          = ML_(cur_plus)( ML_(cur_from_sli)(debugstr_img), uw );
2283       str = ML_(cur_read_strdup)(distr, "di.gls.strp");
2284       break;
2285    }
2286    case DW_FORM_line_strp: {
2287       UWord uw = (UWord)get_Dwarfish_UWord( data, is_dw64 );
2288       DiCursor distr
2289          = ML_(cur_plus)( ML_(cur_from_sli)(debuglinestr_img), uw );
2290       str = ML_(cur_read_strdup)(distr, "di.gls.line_strp");
2291       break;
2292    }
2293    default:
2294       ML_(symerr)(di, True,
2295                   "Unknown path string FORM in .debug_line");
2296       break;
2297    }
2298    return str;
2299 }
2300
2301 static
2302 Int get_line_ndx (struct _DebugInfo* di,
2303                   Cursor *data, const UInt form)
2304 {
2305    Int res = 0;
2306    switch (form) {
2307    case DW_FORM_data1:
2308       res = get_UChar(data);
2309       break;
2310    case DW_FORM_data2:
2311       res = get_UShort(data);
2312       break;
2313    case DW_FORM_udata:
2314       res = get_ULEB128(data);
2315       break;
2316    default:
2317       ML_(symerr)(di, True,
2318                   "Unknown directory_index value FORM in .debug_line");
2319       break;
2320    }
2321    return res;
2322 }
2323
2324 static
2325 void skip_line_form (struct _DebugInfo* di, Bool is_dw64,
2326                          Cursor *d, const UInt form)
2327 {
2328    switch (form) {
2329    case DW_FORM_block: {
2330       ULong len = get_ULEB128(d);
2331       advance_position_of_Cursor (d, len);
2332       break;
2333    }
2334    case DW_FORM_block1: {
2335       UChar len = get_UChar(d);
2336       advance_position_of_Cursor (d, len);
2337       break;
2338    }
2339    case DW_FORM_block2: {
2340       UShort len = get_UShort(d);
2341       advance_position_of_Cursor (d, len);
2342       break;
2343    }
2344    case DW_FORM_block4: {
2345       UInt len = get_UInt(d);
2346       advance_position_of_Cursor (d, len);
2347       break;
2348    }
2349    case DW_FORM_flag:
2350    case DW_FORM_data1:
2351       advance_position_of_Cursor (d, 1);
2352       break;
2353    case DW_FORM_data2:
2354       advance_position_of_Cursor (d, 2);
2355       break;
2356    case DW_FORM_data4:
2357       advance_position_of_Cursor (d, 4);
2358       break;
2359    case DW_FORM_data8:
2360       advance_position_of_Cursor (d, 8);
2361       break;
2362    case DW_FORM_data16:
2363       advance_position_of_Cursor (d, 16);
2364       break;
2365    case DW_FORM_string:
2366       (void)get_AsciiZ (d);
2367       break;
2368    case DW_FORM_strp:
2369    case DW_FORM_line_strp:
2370    case DW_FORM_sec_offset:
2371       advance_position_of_Cursor (d, is_dw64 ? 8 : 4);
2372       break;
2373    case DW_FORM_udata:
2374       (void)get_ULEB128(d);
2375       break;
2376    case DW_FORM_sdata:
2377       (void)get_SLEB128(d);
2378       break;
2379    default:
2380       ML_(symerr)(di, True, "Unknown FORM in .debug_line");
2381       break;
2382    }
2383 }
2384
2385 /* Returns an xarray* of directory names (indexed by the dwarf dirname
2386    integer).
2387    If 'compdir' is NULL, entry [0] will be set to "."
2388    otherwise entry [0] is set to compdir.
2389    Entry [0] basically means "the current directory of the compilation",
2390    whatever that means, according to the DWARF3 spec.
2391    FIXME??? readdwarf3.c/readdwarf.c have a lot of duplicated code */
2392 static
2393 XArray* read_dirname_xa (DebugInfo* di, UShort version, const HChar *compdir,
2394                          Cursor *c, const CUConst *cc,
2395                          Bool td3 )
2396 {
2397    XArray*        dirname_xa;   /* xarray of HChar* dirname */
2398    const HChar*   dirname;
2399    UInt           compdir_len;
2400
2401    dirname_xa = VG_(newXA) (ML_(dinfo_zalloc), "di.rdxa.1", ML_(dinfo_free),
2402                             sizeof(HChar*) );
2403
2404    if (compdir == NULL) {
2405       dirname = ".";
2406       compdir_len = 1;
2407    } else {
2408       dirname = compdir;
2409       compdir_len = VG_(strlen)(compdir);
2410    }
2411
2412    /* For version 5, the compdir is the first (zero) entry. */
2413    if (version < 5)
2414       VG_(addToXA) (dirname_xa, &dirname);
2415
2416    if (version < 5) {
2417       TRACE_D3("The Directory Table%s\n",
2418                peek_UChar(c) == 0 ? " is empty." : ":" );
2419
2420       while (peek_UChar(c) != 0) {
2421
2422          DiCursor cur = get_AsciiZ(c);
2423          HChar* data_str = ML_(cur_read_strdup)( cur, "dirname_xa.1" );
2424          TRACE_D3("  %s\n", data_str);
2425
2426          /* If data_str[0] is '/', then 'data' is an absolute path and we
2427             don't mess with it.  Otherwise, construct the
2428             path 'compdir' ++ "/" ++ 'data'. */
2429
2430          if (data_str[0] != '/'
2431              /* not an absolute path */
2432              && compdir
2433              /* actually got something sensible for compdir */
2434              && compdir_len)
2435          {
2436             SizeT  len = compdir_len + 1 + VG_(strlen)(data_str);
2437             HChar *buf = ML_(dinfo_zalloc)("dirname_xa.2", len + 1);
2438
2439             VG_(strcpy)(buf, compdir);
2440             VG_(strcat)(buf, "/");
2441             VG_(strcat)(buf, data_str);
2442
2443             dirname = ML_(addStr)(di, buf, len);
2444             VG_(addToXA) (dirname_xa, &dirname);
2445             if (0) VG_(printf)("rel path  %s\n", buf);
2446             ML_(dinfo_free)(buf);
2447          } else {
2448             /* just use 'data'. */
2449             dirname = ML_(addStr)(di,data_str,-1);
2450             VG_(addToXA) (dirname_xa, &dirname);
2451             if (0) VG_(printf)("abs path  %s\n", data_str);
2452          }
2453
2454          ML_(dinfo_free)(data_str);
2455       }
2456    } else {
2457       UChar forms[256];
2458       UChar p_ndx = 0;
2459       UInt directories_count;
2460       UChar directory_entry_format_count;
2461       UInt n;
2462       DiSlice debugstr_img = cc->escn_debug_str;
2463       DiSlice debuglinestr_img = cc->escn_debug_line_str;
2464
2465       directory_entry_format_count = get_UChar(c);
2466       for (n = 0; n < directory_entry_format_count; n++) {
2467          UInt lnct = get_ULEB128(c);
2468          UInt form = get_ULEB128(c);
2469          if (lnct == DW_LNCT_path)
2470             p_ndx = n;
2471          forms[n] = form;
2472       }
2473       directories_count = get_ULEB128(c);
2474       TRACE_D3("The Directory Table%s\n",
2475                directories_count == 0 ? " is empty." : ":" );
2476
2477       for (n = 0; n < directories_count; n++) {
2478          UInt f;
2479          for (f = 0; f < directory_entry_format_count; f++) {
2480             UInt form = forms[f];
2481             if (f == p_ndx) {
2482                HChar *data_str = get_line_str (di, cc->is_dw64, c, form,
2483                                                debugstr_img,
2484                                                debuglinestr_img);
2485                TRACE_D3("  %s\n", data_str);
2486
2487                /* If data_str[0] is '/', then 'data' is an absolute path and we
2488                   don't mess with it.  Otherwise, construct the
2489                   path 'compdir' ++ "/" ++ 'data'. */
2490
2491                if (data_str[0] != '/'
2492                    /* not an absolute path */
2493                    && compdir
2494                    /* actually got something sensible for compdir */
2495                    && compdir_len)
2496                {
2497                   SizeT  len = compdir_len + 1 + VG_(strlen)(data_str);
2498                   HChar *buf = ML_(dinfo_zalloc)("dirname_xa.2", len + 1);
2499
2500                   VG_(strcpy)(buf, compdir);
2501                   VG_(strcat)(buf, "/");
2502                   VG_(strcat)(buf, data_str);
2503
2504                   dirname = ML_(addStr)(di, buf, len);
2505                   VG_(addToXA) (dirname_xa, &dirname);
2506                   if (0) VG_(printf)("rel path  %s\n", buf);
2507                   ML_(dinfo_free)(buf);
2508                } else {
2509                   /* just use 'data'. */
2510                   dirname = ML_(addStr)(di,data_str,-1);
2511                   VG_(addToXA) (dirname_xa, &dirname);
2512                   if (0) VG_(printf)("abs path  %s\n", data_str);
2513                }
2514
2515                ML_(dinfo_free)(data_str);
2516             } else {
2517                skip_line_form (di, cc->is_dw64, c, form);
2518             }
2519          }
2520       }
2521    }
2522
2523    TRACE_D3 ("\n");
2524
2525    if (version < 5 && get_UChar (c) != 0) {
2526       ML_(symerr)(NULL, True,
2527                   "could not get NUL at end of DWARF directory table");
2528       VG_(deleteXA)(dirname_xa);
2529       return NULL;
2530    }
2531
2532    return dirname_xa;
2533 }
2534
2535 static
2536 void read_filename_table( /*MOD*/XArray* /* of UInt* */ fndn_ix_Table,
2537                           const HChar* compdir,
2538                           const CUConst* cc, ULong debug_line_offset,
2539                           Bool td3 )
2540 {
2541    Bool   is_dw64;
2542    Cursor c;
2543    Word   i;
2544    UShort version;
2545    UChar  opcode_base;
2546    const HChar* str;
2547    XArray* dirname_xa;   /* xarray of HChar* dirname */
2548    ULong  dir_xa_ix;     /* Index in dirname_xa, as read from dwarf info. */
2549    const HChar* dirname;
2550    UInt   fndn_ix;
2551
2552    vg_assert(fndn_ix_Table && cc && cc->barf);
2553    if (!ML_(sli_is_valid)(cc->escn_debug_line)
2554        || cc->escn_debug_line.szB <= debug_line_offset) {
2555       cc->barf("read_filename_table: .debug_line is missing?");
2556    }
2557
2558    init_Cursor( &c, cc->escn_debug_line, debug_line_offset, cc->barf,
2559                 "Overrun whilst reading .debug_line section(1)" );
2560
2561    /* unit_length = */
2562    get_Initial_Length( &is_dw64, &c,
2563                        "read_filename_table: invalid initial-length field" );
2564    version = get_UShort( &c );
2565    if (version != 2 && version != 3 && version != 4 && version != 5)
2566      cc->barf("read_filename_table: Only DWARF version 2, 3, 4 and 5 "
2567               "line info is currently supported.");
2568    if (version >= 5) {
2569       /* addrs_size = */ get_UChar( &c );
2570       /* seg_size =   */ get_UChar( &c );
2571    }
2572    /*header_length              = (ULong)*/ get_Dwarfish_UWord( &c, is_dw64 );
2573    /*minimum_instruction_length = */ get_UChar( &c );
2574    if (version >= 4)
2575       /*maximum_operations_per_insn = */ get_UChar( &c );
2576    /*default_is_stmt            = */ get_UChar( &c );
2577    /*line_base                  = (Char)*/ get_UChar( &c );
2578    /*line_range                 = */ get_UChar( &c );
2579    opcode_base                = get_UChar( &c );
2580    /* skip over "standard_opcode_lengths" */
2581    for (i = 1; i < (Word)opcode_base; i++)
2582      (void)get_UChar( &c );
2583
2584    dirname_xa = read_dirname_xa(cc->di, version, compdir, &c, cc, td3);
2585
2586    /* Read and record the file names table */
2587    vg_assert( VG_(sizeXA)( fndn_ix_Table ) == 0 );
2588    if (version < 5) {
2589       /* Add a dummy index-zero entry.  DWARF3 numbers its files
2590          from 1, for some reason. */
2591       fndn_ix = ML_(addFnDn) ( cc->di, "<unknown_file>", NULL );
2592       VG_(addToXA)( fndn_ix_Table, &fndn_ix );
2593       while (peek_UChar(&c) != 0) {
2594          DiCursor cur = get_AsciiZ(&c);
2595          str = ML_(addStrFromCursor)( cc->di, cur );
2596          dir_xa_ix = get_ULEB128( &c );
2597          if (dirname_xa != NULL && dir_xa_ix < VG_(sizeXA) (dirname_xa))
2598             dirname = *(HChar**)VG_(indexXA) ( dirname_xa, dir_xa_ix );
2599          else
2600             dirname = NULL;
2601          fndn_ix = ML_(addFnDn)( cc->di, str, dirname);
2602          TRACE_D3("  read_filename_table: %ld fndn_ix %u %s %s\n",
2603                   VG_(sizeXA)(fndn_ix_Table), fndn_ix,
2604                   dirname, str);
2605          VG_(addToXA)( fndn_ix_Table, &fndn_ix );
2606          (void)get_ULEB128( &c ); /* skip last mod time */
2607          (void)get_ULEB128( &c ); /* file size */
2608       }
2609    } else {
2610       UChar forms[256];
2611       UChar p_ndx = 0, d_ndx = 0;
2612       UInt file_names_count;
2613       UChar file_names_entry_format_count;
2614       UInt n;
2615       DiSlice debugstr_img = cc->escn_debug_str;
2616       DiSlice debuglinestr_img = cc->escn_debug_line_str;
2617       file_names_entry_format_count = get_UChar( &c );
2618       for (n = 0; n < file_names_entry_format_count; n++) {
2619          UInt lnct = get_ULEB128( &c );
2620          UInt form = get_ULEB128( &c );
2621          if (lnct == DW_LNCT_path)
2622             p_ndx = n;
2623          if (lnct == DW_LNCT_directory_index)
2624             d_ndx = n;
2625          forms[n] = form;
2626       }
2627       file_names_count = get_ULEB128( &c );
2628       for (n = 0; n < file_names_count; n++) {
2629          UInt f;
2630          dir_xa_ix  = 0;
2631          str = NULL;
2632          for (f = 0; f < file_names_entry_format_count; f++) {
2633             UInt form = forms[f];
2634             if (f == p_ndx)
2635                str = get_line_str (cc->di, cc->is_dw64, &c, form,
2636                                    debugstr_img, debuglinestr_img);
2637             else if (f == d_ndx)
2638                dir_xa_ix = get_line_ndx (cc->di, &c, form);
2639             else
2640                skip_line_form (cc->di, cc->is_dw64, &c, form);
2641          }
2642
2643          if (dirname_xa != NULL
2644              && dir_xa_ix >= 0 && dir_xa_ix < VG_(sizeXA) (dirname_xa))
2645             dirname = *(HChar**)VG_(indexXA) ( dirname_xa, dir_xa_ix );
2646          else
2647             dirname = NULL;
2648          fndn_ix = ML_(addFnDn)( cc->di, str, dirname);
2649          TRACE_D3("  read_filename_table: %ld fndn_ix %u %s %s\n",
2650                   VG_(sizeXA)(fndn_ix_Table), fndn_ix,
2651                   dirname, str);
2652          VG_(addToXA)( fndn_ix_Table, &fndn_ix );
2653       }
2654    }
2655    /* We're done!  The rest of it is not interesting. */
2656    if (dirname_xa != NULL)
2657       VG_(deleteXA)(dirname_xa);
2658 }
2659
2660 /* setup_cu_svma to be called when a cu is found at level 0,
2661    to establish the cu_svma. */
2662 static void setup_cu_svma(CUConst* cc, Bool have_lo, Addr ip_lo, Bool td3)
2663 {
2664    Addr cu_svma;
2665    /* We have potentially more than one type of parser parsing the
2666       dwarf information. At least currently, each parser establishes
2667       the cu_svma. So, in case cu_svma_known, we check that the same
2668       result is obtained by the 2nd parsing of the cu.
2669
2670       Alternatively, we could reset cu_svma_known after each parsing
2671       and then check that we only see a single DW_TAG_compile_unit DIE
2672       at level 0, DWARF3 only allows exactly one top level DIE per
2673       CU. */
2674
2675    if (have_lo)
2676       cu_svma = ip_lo;
2677    else {
2678       /* Now, it may be that this DIE doesn't tell us the CU's
2679          SVMA, by way of not having a DW_AT_low_pc.  That's OK --
2680          the CU doesn't *have* to have its SVMA specified.
2681
2682          But as per last para D3 spec sec 3.1.1 ("Normal and
2683          Partial Compilation Unit Entries", "If the base address
2684          (viz, the SVMA) is undefined, then any DWARF entry of
2685          structure defined interms of the base address of that
2686          compilation unit is not valid.".  So that means, if whilst
2687          processing the children of this top level DIE (or their
2688          children, etc) we see a DW_AT_range, and cu_svma_known is
2689          False, then the DIE that contains it is (per the spec)
2690          invalid, and we can legitimately stop and complain. */
2691       /* .. whereas The Reality is, simply assume the SVMA is zero
2692          if it isn't specified. */
2693       cu_svma = 0;
2694    }
2695
2696    if (cc->cu_svma_known) {
2697       vg_assert (cu_svma == cc->cu_svma);
2698    } else {
2699       cc->cu_svma_known = True;
2700       cc->cu_svma = cu_svma;
2701       if (0)
2702          TRACE_D3("setup_cu_svma: acquire CU_SVMA of %p\n", (void*) cc->cu_svma);
2703    }
2704 }
2705
2706 /* Setup info from DW_AT_addr_base, DW_AT_str_offsets_base, DW_AT_rnglists_base
2707    and DW_AT_loclists_base. This needs to be done early, because other DW_AT_*
2708    info may depend on it. */
2709 static void setup_cu_bases(CUConst* cc, Cursor* c_die, const g_abbv* abbv)
2710 {
2711    FormContents cts;
2712    UInt nf_i;
2713    ULong saved_c_pos;
2714    if(cc->cu_has_addr_base && cc->cu_has_str_offsets_base
2715       && cc->cu_has_rnglists_base && cc->cu_has_loclists_base)
2716       return;
2717    saved_c_pos = get_position_of_Cursor(c_die);
2718    nf_i = 0;
2719    while (!cc->cu_has_addr_base || !cc->cu_has_str_offsets_base
2720       || !cc->cu_has_rnglists_base || !cc->cu_has_loclists_base) {
2721       DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
2722       DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
2723       const name_form *nf = &abbv->nf[nf_i];
2724       if (attr == 0 && form == 0)
2725          break;
2726       if (attr != DW_AT_addr_base && attr != DW_AT_str_offsets_base
2727           && attr != DW_AT_rnglists_base && attr != DW_AT_loclists_base) {
2728          const UInt form_szB = get_Form_szB (cc, form);
2729          if (form_szB == VARSZ_FORM) {
2730             if(form == DW_FORM_addrx || form == DW_FORM_strx
2731                || form == DW_FORM_rnglistx || form == DW_FORM_loclistx) {
2732                /* Skip without interpreting them, they depend on *_base. */
2733                (void) get_ULEB128(c_die);
2734             } else {
2735                /* Need to read the contents of this one to skip it. */
2736                get_Form_contents( &cts, cc, c_die, False /*td3*/,
2737                                   &abbv->nf[nf_i] );
2738             }
2739          } else {
2740             /* Skip without even reading it, as it may depend on *_base. */
2741             advance_position_of_Cursor (c_die, form_szB);
2742          }
2743          nf_i++;
2744          continue;
2745       }
2746       get_Form_contents( &cts, cc, c_die, False/*td3*/, nf );
2747       if (attr == DW_AT_addr_base && cts.szB > 0) {
2748          Addr addr_base = cts.u.val;
2749          if (cc->cu_has_addr_base)
2750             vg_assert (addr_base == cc->cu_addr_base);
2751          else {
2752             cc->cu_has_addr_base = True;
2753             cc->cu_addr_base = addr_base;
2754          }
2755       }
2756       if (attr == DW_AT_str_offsets_base && cts.szB > 0) {
2757          Addr str_offsets_base = cts.u.val;
2758          if (cc->cu_has_str_offsets_base)
2759             vg_assert (str_offsets_base == cc->cu_str_offsets_base);
2760          else {
2761             cc->cu_has_str_offsets_base = True;
2762             cc->cu_str_offsets_base = str_offsets_base;
2763          }
2764        }
2765       if (attr == DW_AT_rnglists_base && cts.szB > 0) {
2766          Addr rnglists_base = cts.u.val;
2767          if (cc->cu_has_rnglists_base)
2768             vg_assert (rnglists_base == cc->cu_rnglists_base);
2769          else {
2770             cc->cu_has_rnglists_base = True;
2771             cc->cu_rnglists_base = rnglists_base;
2772          }
2773        }
2774       if (attr == DW_AT_loclists_base && cts.szB > 0) {
2775          Addr loclists_base = cts.u.val;
2776          if (cc->cu_has_loclists_base)
2777             vg_assert (loclists_base == cc->cu_loclists_base);
2778          else {
2779             cc->cu_has_loclists_base = True;
2780             cc->cu_loclists_base = loclists_base;
2781          }
2782        }
2783        nf_i++;
2784    }
2785    set_position_of_Cursor(c_die, saved_c_pos);
2786 }
2787
2788 static void trace_DIE(
2789    DW_TAG dtag,
2790    UWord posn,
2791    Int level,
2792    UWord saved_die_c_offset,
2793    const g_abbv *abbv,
2794    const CUConst* cc)
2795 {
2796    Cursor c;
2797    FormContents cts;
2798    UWord sibling = 0;
2799    UInt nf_i;
2800    Bool  debug_types_flag;
2801    Bool  alt_flag;
2802    Cursor check_skip;
2803    UWord check_sibling = 0;
2804
2805    posn = uncook_die( cc, posn, &debug_types_flag, &alt_flag );
2806    init_Cursor (&c,
2807                 debug_types_flag ? cc->escn_debug_types :
2808                 alt_flag ? cc->escn_debug_info_alt : cc->escn_debug_info,
2809                 saved_die_c_offset, cc->barf,
2810                 "Overrun trace_DIE");
2811    check_skip = c;
2812    VG_(printf)(" <%d><%lx>: Abbrev Number: %llu (%s)%s%s\n",
2813                level, posn, (ULong) abbv->abbv_code, ML_(pp_DW_TAG)( dtag ),
2814                debug_types_flag ? " (in .debug_types)" : "",
2815                alt_flag ? " (in alternate .debug_info)" : "");
2816    nf_i = 0;
2817    while (True) {
2818       DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
2819       DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
2820       const name_form *nf = &abbv->nf[nf_i];
2821       nf_i++;
2822       if (attr == 0 && form == 0) break;
2823       VG_(printf)("     %-18s: ", ML_(pp_DW_AT)(attr));
2824       /* Get the form contents, so as to print them */
2825       get_Form_contents( &cts, cc, &c, True, nf );
2826       if (attr == DW_AT_sibling && cts.szB > 0) {
2827          sibling = cts.u.val;
2828       }
2829       VG_(printf)("\t\n");
2830    }
2831
2832    /* Verify that skipping a DIE gives the same displacement as
2833       tracing (i.e. reading) a DIE. If there is an inconsistency in
2834       the nr of bytes read by get_Form_contents and get_Form_szB, this
2835       should be detected by the below. Using --trace-symtab=yes
2836       --read-var-info=yes will ensure all DIEs are systematically
2837       verified. */
2838    skip_DIE (&check_sibling, &check_skip, abbv, cc);
2839    vg_assert (check_sibling == sibling);
2840    vg_assert (get_position_of_Cursor (&check_skip)
2841               == get_position_of_Cursor (&c));
2842 }
2843
2844 __attribute__((noreturn))
2845 static void dump_bad_die_and_barf(
2846    const HChar *whichparser,
2847    DW_TAG dtag,
2848    UWord posn,
2849    Int level,
2850    Cursor* c_die,
2851    UWord saved_die_c_offset,
2852    const g_abbv *abbv,
2853    const CUConst* cc)
2854 {
2855    trace_DIE (dtag, posn, level, saved_die_c_offset, abbv, cc);
2856    VG_(printf)("%s:\n", whichparser);
2857    cc->barf("confused by the above DIE");
2858 }
2859
2860 __attribute__((noinline))
2861 static void bad_DIE_confusion(int linenr)
2862 {
2863    VG_(printf)("\nparse DIE(readdwarf3.c:%d): confused by:\n", linenr);
2864 }
2865 #define goto_bad_DIE do {bad_DIE_confusion(__LINE__); goto bad_DIE;} while (0)
2866
2867 /* Reset the fndn_ix_Table.  When we come across the top level DIE for a CU we
2868    will copy all the file names out of the .debug_line img area and use this
2869    table to look up the copies when we later see filename numbers in
2870    DW_TAG_variables etc. The table can be be reused between parsers (var and
2871    inline) and between CUs. So we keep a copy of the last one parsed. Call
2872    reset_fndn_ix_table before reading a new one from a new offset.  */
2873 static
2874 void reset_fndn_ix_table (XArray** fndn_ix_Table, ULong *debug_line_offset,
2875                           ULong new_offset)
2876 {
2877    vg_assert (new_offset == -1
2878               || *debug_line_offset != new_offset);
2879    Int size = *fndn_ix_Table == NULL ? 0 : VG_(sizeXA) (*fndn_ix_Table);
2880    if (size > 0) {
2881       VG_(deleteXA) (*fndn_ix_Table);
2882       *fndn_ix_Table = NULL;
2883    }
2884    if (*fndn_ix_Table == NULL)
2885       *fndn_ix_Table = VG_(newXA)( ML_(dinfo_zalloc),
2886                                    "di.readdwarf3.reset_ix_table",
2887                                    ML_(dinfo_free),
2888                                    sizeof(UInt) );
2889    *debug_line_offset = new_offset;
2890 }
2891
2892 __attribute__((noinline))
2893 static void parse_var_DIE (
2894    /*MOD*/WordFM* /* of (XArray* of AddrRange, void) */ rangestree,
2895    /*MOD*/XArray* /* of TempVar* */ tempvars,
2896    /*MOD*/XArray* /* of GExpr* */ gexprs,
2897    /*MOD*/D3VarParser* parser,
2898    XArray** fndn_ix_Table,
2899    ULong *debug_line_offset,
2900    DW_TAG dtag,
2901    UWord posn,
2902    Int level,
2903    Cursor* c_die,
2904    const g_abbv *abbv,
2905    CUConst* cc,
2906    Bool td3
2907 )
2908 {
2909    FormContents cts;
2910    UInt nf_i;
2911
2912    UWord saved_die_c_offset  = get_position_of_Cursor( c_die );
2913
2914    varstack_preen( parser, td3, level-1 );
2915
2916    if (dtag == DW_TAG_compile_unit
2917        || dtag == DW_TAG_type_unit
2918        || dtag == DW_TAG_partial_unit
2919        || dtag == DW_TAG_skeleton_unit) {
2920       Bool have_lo    = False;
2921       Bool have_hi1   = False;
2922       Bool hiIsRelative = False;
2923       Bool have_range = False;
2924       Addr ip_lo    = 0;
2925       Addr ip_hi1   = 0;
2926       Addr rangeoff = 0;
2927       const HChar *compdir = NULL;
2928
2929       if (level == 0)
2930          setup_cu_bases(cc, c_die, abbv);
2931       nf_i = 0;
2932       while (True) {
2933          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
2934          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
2935          const name_form *nf = &abbv->nf[nf_i];
2936          nf_i++;
2937          if (attr == 0 && form == 0) break;
2938          get_Form_contents( &cts, cc, c_die, False/*td3*/, nf );
2939          if (attr == DW_AT_low_pc && cts.szB > 0) {
2940             ip_lo   = cts.u.val;
2941             have_lo = True;
2942          }
2943          if (attr == DW_AT_high_pc && cts.szB > 0) {
2944             ip_hi1   = cts.u.val;
2945             have_hi1 = True;
2946             if (form != DW_FORM_addr)
2947                hiIsRelative = True;
2948          }
2949          if (attr == DW_AT_ranges && cts.szB > 0) {
2950             rangeoff   = cts.u.val;
2951             have_range = True;
2952          }
2953          if (attr == DW_AT_comp_dir) {
2954             if (cts.szB >= 0)
2955                cc->barf("parse_var_DIE compdir: expecting indirect string");
2956             HChar *str = ML_(cur_read_strdup)( cts.u.cur,
2957                                                "parse_var_DIE.compdir" );
2958             compdir = ML_(addStr)(cc->di, str, -1);
2959             ML_(dinfo_free) (str);
2960          }
2961          if (attr == DW_AT_stmt_list && cts.szB > 0) {
2962             if (cts.u.val != *debug_line_offset) {
2963                reset_fndn_ix_table( fndn_ix_Table, debug_line_offset,
2964                                     cts.u.val );
2965                read_filename_table( *fndn_ix_Table, compdir,
2966                                     cc, cts.u.val, td3 );
2967             }
2968          }
2969       }
2970       if (have_lo && have_hi1 && hiIsRelative)
2971          ip_hi1 += ip_lo;
2972
2973       /* Now, does this give us an opportunity to find this
2974          CU's svma? */
2975       if (level == 0)
2976          setup_cu_svma(cc, have_lo, ip_lo, td3);
2977
2978       /* Do we have something that looks sane? */
2979       if (have_lo && have_hi1 && (!have_range)) {
2980          if (ip_lo < ip_hi1)
2981             varstack_push( cc, parser, td3,
2982                            unitary_range_list(ip_lo, ip_hi1 - 1),
2983                            level,
2984                            False/*isFunc*/, NULL/*fbGX*/ );
2985          else if (ip_lo == 0 && ip_hi1 == 0)
2986             /* CU has no code, presumably?
2987                Such situations have been encountered for code
2988                compiled with -ffunction-sections -fdata-sections
2989                and linked with --gc-sections. Completely
2990                eliminated CU gives such 0 lo/hi pc. Similarly
2991                to a CU which has no lo/hi/range pc, we push
2992                an empty range list. */
2993             varstack_push( cc, parser, td3,
2994                            empty_range_list(),
2995                            level,
2996                            False/*isFunc*/, NULL/*fbGX*/ );
2997       } else
2998       if ((!have_lo) && (!have_hi1) && have_range) {
2999          varstack_push( cc, parser, td3,
3000                         get_range_list( cc, td3,
3001                                         rangeoff, cc->cu_svma ),
3002                         level,
3003                         False/*isFunc*/, NULL/*fbGX*/ );
3004       } else
3005       if ((!have_lo) && (!have_hi1) && (!have_range)) {
3006          /* CU has no code, presumably? */
3007          varstack_push( cc, parser, td3,
3008                         empty_range_list(),
3009                         level,
3010                         False/*isFunc*/, NULL/*fbGX*/ );
3011       } else
3012       if (have_lo && (!have_hi1) && have_range && ip_lo == 0) {
3013          /* broken DIE created by gcc-4.3.X ?  Ignore the
3014             apparently-redundant DW_AT_low_pc and use the DW_AT_ranges
3015             instead. */
3016          varstack_push( cc, parser, td3,
3017                         get_range_list( cc, td3,
3018                                         rangeoff, cc->cu_svma ),
3019                         level,
3020                         False/*isFunc*/, NULL/*fbGX*/ );
3021       } else {
3022          if (0) VG_(printf)("I got hlo %d hhi1 %d hrange %d\n",
3023                             (Int)have_lo, (Int)have_hi1, (Int)have_range);
3024          goto_bad_DIE;
3025       }
3026    }
3027
3028    if (dtag == DW_TAG_lexical_block || dtag == DW_TAG_subprogram) {
3029       Bool   have_lo    = False;
3030       Bool   have_hi1   = False;
3031       Bool   have_range = False;
3032       Bool   hiIsRelative = False;
3033       Addr   ip_lo      = 0;
3034       Addr   ip_hi1     = 0;
3035       Addr   rangeoff   = 0;
3036       Bool   isFunc     = dtag == DW_TAG_subprogram;
3037       GExpr* fbGX       = NULL;
3038       nf_i = 0;
3039       while (True) {
3040          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3041          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3042          const name_form *nf = &abbv->nf[nf_i];
3043          nf_i++;
3044          if (attr == 0 && form == 0) break;
3045          get_Form_contents( &cts, cc, c_die, False/*td3*/, nf );
3046          if (attr == DW_AT_low_pc && cts.szB > 0) {
3047             ip_lo   = cts.u.val;
3048             have_lo = True;
3049          }
3050          if (attr == DW_AT_high_pc && cts.szB > 0) {
3051             ip_hi1   = cts.u.val;
3052             have_hi1 = True;
3053             if (form != DW_FORM_addr)
3054                hiIsRelative = True;
3055          }
3056          if (attr == DW_AT_ranges && cts.szB > 0) {
3057             rangeoff   = cts.u.val;
3058             have_range = True;
3059          }
3060          if (isFunc
3061              && attr == DW_AT_frame_base
3062              && cts.szB != 0 /* either scalar or nonempty block */) {
3063             fbGX = get_GX( cc, False/*td3*/, &cts );
3064             vg_assert(fbGX);
3065             VG_(addToXA)(gexprs, &fbGX);
3066          }
3067       }
3068       if (have_lo && have_hi1 && hiIsRelative)
3069          ip_hi1 += ip_lo;
3070       /* Do we have something that looks sane? */
3071       if (dtag == DW_TAG_subprogram
3072           && (!have_lo) && (!have_hi1) && (!have_range)) {
3073          /* This is legit - ignore it. Sec 3.3.3: "A subroutine entry
3074             representing a subroutine declaration that is not also a
3075             definition does not have code address or range
3076             attributes." */
3077       } else
3078       if (dtag == DW_TAG_lexical_block
3079           && (!have_lo) && (!have_hi1) && (!have_range)) {
3080          /* I believe this is legit, and means the lexical block
3081             contains no insns (whatever that might mean).  Ignore. */
3082       } else
3083       if (have_lo && have_hi1 && (!have_range)) {
3084          /* This scope supplies just a single address range. */
3085          if (ip_lo < ip_hi1)
3086             varstack_push( cc, parser, td3,
3087                            unitary_range_list(ip_lo, ip_hi1 - 1),
3088                            level, isFunc, fbGX );
3089       } else
3090       if ((!have_lo) && (!have_hi1) && have_range) {
3091          /* This scope supplies multiple address ranges via the use of
3092             a range list. */
3093          varstack_push( cc, parser, td3,
3094                         get_range_list( cc, td3,
3095                                         rangeoff, cc->cu_svma ),
3096                         level, isFunc, fbGX );
3097       } else
3098       if (have_lo && (!have_hi1) && (!have_range)) {
3099          /* This scope is bogus.  The D3 spec sec 3.4 (Lexical Block
3100             Entries) says fairly clearly that a scope must have either
3101             _range or (_low_pc and _high_pc). */
3102          /* The spec is a bit ambiguous though.  Perhaps a single byte
3103             range is intended?  See sec 2.17 (Code Addresses And Ranges) */
3104          /* This case is here because icc9 produced this:
3105          <2><13bd>: DW_TAG_lexical_block
3106             DW_AT_decl_line   : 5229
3107             DW_AT_decl_column : 37
3108             DW_AT_decl_file   : 1
3109             DW_AT_low_pc      : 0x401b03
3110          */
3111          /* Ignore (seems safe than pushing a single byte range) */
3112       } else
3113          goto_bad_DIE;
3114    }
3115
3116    if (dtag == DW_TAG_variable || dtag == DW_TAG_formal_parameter) {
3117       const  HChar* name = NULL;
3118       UWord  typeR       = D3_INVALID_CUOFF;
3119       Bool   global      = False;
3120       GExpr* gexpr       = NULL;
3121       Int    n_attrs     = 0;
3122       UWord  abs_ori     = (UWord)D3_INVALID_CUOFF;
3123       Int    lineNo      = 0;
3124       UInt   fndn_ix     = 0;
3125       nf_i = 0;
3126       while (True) {
3127          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3128          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3129          const name_form *nf = &abbv->nf[nf_i];
3130          nf_i++;
3131          if (attr == 0 && form == 0) break;
3132          get_Form_contents( &cts, cc, c_die, False/*td3*/, nf );
3133          n_attrs++;
3134          if (attr == DW_AT_name && cts.szB < 0) {
3135             name = ML_(addStrFromCursor)( cc->di, cts.u.cur );
3136          }
3137          if (attr == DW_AT_location
3138              && cts.szB != 0 /* either scalar or nonempty block */) {
3139             gexpr = get_GX( cc, False/*td3*/, &cts );
3140             vg_assert(gexpr);
3141             VG_(addToXA)(gexprs, &gexpr);
3142          }
3143          if (attr == DW_AT_type && cts.szB > 0) {
3144             typeR = cook_die_using_form( cc, cts.u.val, form );
3145          }
3146          if (attr == DW_AT_external && cts.szB > 0 && cts.u.val > 0) {
3147             global = True;
3148          }
3149          if (attr == DW_AT_abstract_origin && cts.szB > 0) {
3150             abs_ori = (UWord)cts.u.val;
3151          }
3152          if (attr == DW_AT_declaration && cts.szB > 0 && cts.u.val > 0) {
3153             /*declaration = True;*/
3154          }
3155          if (attr == DW_AT_decl_line && cts.szB > 0) {
3156             lineNo = (Int)cts.u.val;
3157          }
3158          if (attr == DW_AT_decl_file && cts.szB > 0) {
3159             Int ftabIx = (Int)cts.u.val;
3160             if (ftabIx >= 1
3161                 && ftabIx < VG_(sizeXA)( *fndn_ix_Table )) {
3162                fndn_ix = *(UInt*)VG_(indexXA)( *fndn_ix_Table, ftabIx );
3163             }
3164             if (0) VG_(printf)("XXX filename fndn_ix = %u %s\n", fndn_ix,
3165                                ML_(fndn_ix2filename) (cc->di, fndn_ix));
3166          }
3167       }
3168       if (!global && dtag == DW_TAG_variable && level == 1) {
3169          /* Case of a static variable. It is better to declare
3170             it global as the variable is not really related to
3171             a PC range, as its address can be used by program
3172             counters outside of the ranges where it is visible . */
3173          global = True;
3174       }
3175
3176       /* We'll collect it under if one of the following three
3177          conditions holds:
3178          (1) has location and type    -> completed
3179          (2) has type only            -> is an abstract instance
3180          (3) has location and abs_ori -> is a concrete instance
3181          Name, fndn_ix and line number are all optional frills.
3182       */
3183       if ( /* 1 */ (gexpr && typeR != D3_INVALID_CUOFF)
3184            /* 2 */ || (typeR != D3_INVALID_CUOFF)
3185            /* 3 */ || (gexpr && abs_ori != (UWord)D3_INVALID_CUOFF) ) {
3186
3187          /* Add this variable to the list of interesting looking
3188             variables.  Crucially, note along with it the address
3189             range(s) associated with the variable, which for locals
3190             will be the address ranges at the top of the varparser's
3191             stack. */
3192          GExpr*   fbGX = NULL;
3193          Word     i, nRanges;
3194          const XArray*  /* of AddrRange */ xa;
3195          TempVar* tv;
3196          /* Stack can't be empty; we put a dummy entry on it for the
3197             entire address range before starting with the DIEs for
3198             this CU. */
3199          vg_assert(parser->sp >= 0);
3200
3201          /* If this is a local variable (non-global), try to find
3202             the GExpr for the DW_AT_frame_base of the containing
3203             function.  It should have been pushed on the stack at the
3204             time we encountered its DW_TAG_subprogram DIE, so the way
3205             to find it is to scan back down the stack looking for it.
3206             If there isn't an enclosing stack entry marked 'isFunc'
3207             then we must be seeing variable or formal param DIEs
3208             outside of a function, so we deem the Dwarf to be
3209             malformed if that happens.  Note that the fbGX may be NULL
3210             if the containing DT_TAG_subprogram didn't supply a
3211             DW_AT_frame_base -- that's OK, but there must actually be
3212             a containing DW_TAG_subprogram. */
3213          if (!global) {
3214             Bool found = False;
3215             for (i = parser->sp; i >= 0; i--) {
3216                if (parser->isFunc[i]) {
3217                   fbGX = parser->fbGX[i];
3218                   found = True;
3219                   break;
3220                }
3221             }
3222             if (!found) {
3223                if (0 && VG_(clo_verbosity) >= 0) {
3224                   VG_(message)(Vg_DebugMsg,
3225                      "warning: parse_var_DIE: non-global variable "
3226                      "outside DW_TAG_subprogram\n");
3227                }
3228                /* goto_bad_DIE; */
3229                /* This seems to happen a lot.  Just ignore it -- if,
3230                   when we come to evaluation of the location (guarded)
3231                   expression, it requires a frame base value, and
3232                   there's no expression for that, then evaluation as a
3233                   whole will fail.  Harmless - a bit of a waste of
3234                   cycles but nothing more. */
3235             }
3236          }
3237
3238          /* re "global ? 0 : parser->sp" (twice), if the var is
3239             marked 'global' then we must put it at the global scope,
3240             as only the global scope (level 0) covers the entire PC
3241             address space.  It is asserted elsewhere that level 0
3242             always covers the entire address space. */
3243          xa = parser->ranges[global ? 0 : parser->sp];
3244          nRanges = VG_(sizeXA)(xa);
3245          vg_assert(nRanges >= 0);
3246
3247          tv = ML_(dinfo_zalloc)( "di.readdwarf3.pvD.1", sizeof(TempVar) );
3248          tv->name   = name;
3249          tv->level  = global ? 0 : parser->sp;
3250          tv->typeR  = typeR;
3251          tv->gexpr  = gexpr;
3252          tv->fbGX   = fbGX;
3253          tv->fndn_ix= fndn_ix;
3254          tv->fLine  = lineNo;
3255          tv->dioff  = posn;
3256          tv->absOri = abs_ori;
3257
3258          /* See explanation on definition of type TempVar for the
3259             reason for this elaboration. */
3260          tv->nRanges = nRanges;
3261          tv->rngOneMin = 0;
3262          tv->rngOneMax = 0;
3263          tv->rngMany = NULL;
3264          if (nRanges == 1) {
3265             AddrRange* range = VG_(indexXA)(xa, 0);
3266             tv->rngOneMin = range->aMin;
3267             tv->rngOneMax = range->aMax;
3268          }
3269          else if (nRanges > 1) {
3270             /* See if we already have a range list which is
3271                structurally identical.  If so, use that; if not, clone
3272                this one, and add it to our collection. */
3273             UWord keyW, valW;
3274             if (VG_(lookupFM)( rangestree, &keyW, &valW, (UWord)xa )) {
3275                XArray* old = (XArray*)keyW;
3276                vg_assert(valW == 0);
3277                vg_assert(old != xa);
3278                tv->rngMany = old;
3279             } else {
3280                XArray* cloned = VG_(cloneXA)( "di.readdwarf3.pvD.2", xa );
3281                tv->rngMany = cloned;
3282                VG_(addToFM)( rangestree, (UWord)cloned, 0 );
3283             }
3284          }
3285
3286          VG_(addToXA)( tempvars, &tv );
3287
3288          TRACE_D3("  Recording this variable, with %ld PC range(s)\n",
3289                   VG_(sizeXA)(xa) );
3290          /* collect stats on how effective the ->ranges special
3291             casing is */
3292          if (0) {
3293             static Int ntot=0, ngt=0;
3294             ntot++;
3295             if (tv->rngMany) ngt++;
3296             if (0 == (ntot % 100000))
3297                VG_(printf)("XXXX %d tot, %d cloned\n", ntot, ngt);
3298          }
3299
3300       }
3301
3302       /* Here are some other weird cases seen in the wild:
3303
3304             We have a variable with a name and a type, but no
3305             location.  I guess that's a sign that it has been
3306             optimised away.  Ignore it.  Here's an example:
3307
3308             static Int lc_compar(void* n1, void* n2) {
3309                MC_Chunk* mc1 = *(MC_Chunk**)n1;
3310                MC_Chunk* mc2 = *(MC_Chunk**)n2;
3311                return (mc1->data < mc2->data ? -1 : 1);
3312             }
3313
3314             Both mc1 and mc2 are like this
3315             <2><5bc>: Abbrev Number: 21 (DW_TAG_variable)
3316                 DW_AT_name        : mc1
3317                 DW_AT_decl_file   : 1
3318                 DW_AT_decl_line   : 216
3319                 DW_AT_type        : <5d3>
3320
3321             whereas n1 and n2 do have locations specified.
3322
3323             ---------------------------------------------
3324
3325             We see a DW_TAG_formal_parameter with a type, but
3326             no name and no location.  It's probably part of a function type
3327             construction, thusly, hence ignore it:
3328          <1><2b4>: Abbrev Number: 12 (DW_TAG_subroutine_type)
3329              DW_AT_sibling     : <2c9>
3330              DW_AT_prototyped  : 1
3331              DW_AT_type        : <114>
3332          <2><2be>: Abbrev Number: 13 (DW_TAG_formal_parameter)
3333              DW_AT_type        : <13e>
3334          <2><2c3>: Abbrev Number: 13 (DW_TAG_formal_parameter)
3335              DW_AT_type        : <133>
3336
3337             ---------------------------------------------
3338
3339             Is very minimal, like this:
3340             <4><81d>: Abbrev Number: 44 (DW_TAG_variable)
3341                 DW_AT_abstract_origin: <7ba>
3342             What that signifies I have no idea.  Ignore.
3343
3344             ----------------------------------------------
3345
3346             Is very minimal, like this:
3347             <200f>: DW_TAG_formal_parameter
3348                 DW_AT_abstract_ori: <1f4c>
3349                 DW_AT_location    : 13440
3350             What that signifies I have no idea.  Ignore.
3351             It might be significant, though: the variable at least
3352             has a location and so might exist somewhere.
3353             Maybe we should handle this.
3354
3355             ---------------------------------------------
3356
3357             <22407>: DW_TAG_variable
3358               DW_AT_name        : (indirect string, offset: 0x6579):
3359                                   vgPlain_trampoline_stuff_start
3360               DW_AT_decl_file   : 29
3361               DW_AT_decl_line   : 56
3362               DW_AT_external    : 1
3363               DW_AT_declaration : 1
3364
3365             Nameless and typeless variable that has a location?  Who
3366             knows.  Not me.
3367             <2><3d178>: Abbrev Number: 22 (DW_TAG_variable)
3368                  DW_AT_location    : 9 byte block: 3 c0 c7 13 38 0 0 0 0
3369                                      (DW_OP_addr: 3813c7c0)
3370
3371             No, really.  Check it out.  gcc is quite simply borked.
3372             <3><168cc>: Abbrev Number: 141 (DW_TAG_variable)
3373             // followed by no attributes, and the next DIE is a sibling,
3374             // not a child
3375             */
3376    }
3377    return;
3378
3379   bad_DIE:
3380    dump_bad_die_and_barf("parse_var_DIE", dtag, posn, level,
3381                          c_die, saved_die_c_offset,
3382                          abbv,
3383                          cc);
3384    /*NOTREACHED*/
3385 }
3386
3387 typedef
3388    struct {
3389       UWord sibling; // sibling of the last read DIE (if it has a sibling).
3390    }
3391    D3InlParser;
3392
3393 /* Return the function name corresponding to absori.
3394
3395    absori is a 'cooked' reference to a DIE, i.e. absori can be either
3396    in cc->escn_debug_info or in cc->escn_debug_info_alt.
3397    get_inlFnName will uncook absori.
3398
3399    The returned value is a (permanent) string in DebugInfo's .strchunks.
3400
3401    LIMITATION: absori must point in the CU of cc. If absori points
3402    in another CU, returns "UnknownInlinedFun".
3403
3404    Here are the problems to retrieve the fun name if absori is in
3405    another CU:  the DIE reading code cannot properly extract data from
3406    another CU, as the abbv code retrieved in the other CU cannot be
3407    translated in an abbreviation. Reading data from the alternate debug
3408    info also gives problems as the string reference is also in the alternate
3409    file, but when reading the alt DIE, the string form is a 'local' string,
3410    but cannot be read in the current CU, but must be read in the alt CU.
3411    See bug 338803 comment#3 and attachment for a failed attempt to handle
3412    these problems (failed because with the patch, only one alt abbrev hash
3413    table is kept, while we must handle all abbreviations in all CUs
3414    referenced by an absori (being a reference to an alt CU, or a previous
3415    or following CU). */
3416 static const HChar* get_inlFnName (Int absori, CUConst* cc, Bool td3)
3417 {
3418    Cursor c;
3419    const g_abbv *abbv;
3420    ULong  atag, abbv_code;
3421    UInt   has_children;
3422    UWord  posn;
3423    Bool type_flag, alt_flag;
3424    const HChar *ret = NULL;
3425    FormContents cts;
3426    UInt nf_i;
3427
3428    /* Some inlined subroutine call dwarf entries do not have the abstract
3429       origin attribute, resulting in absori being 0 (see callers of
3430       get_inlFnName). This is observed at least with gcc 6.3.0 when compiling
3431       valgrind with lto. So, in case we have a 0 absori, do not report an
3432       error, instead, rather return an unknown inlined function. */
3433    if (absori == 0) {
3434       static Bool absori0_reported = False;
3435       if (!absori0_reported && VG_(clo_verbosity) > 1) {
3436          VG_(message)(Vg_DebugMsg,
3437                       "Warning: inlined fn name without absori\n"
3438                       "is shown as UnknownInlinedFun\n");
3439          absori0_reported = True;
3440       }
3441       TRACE_D3(" <get_inlFnName>: absori is not set");
3442       return ML_(addStr)(cc->di, "UnknownInlinedFun", -1);
3443    }
3444
3445    posn = uncook_die( cc, absori, &type_flag, &alt_flag);
3446    if (type_flag)
3447       cc->barf("get_inlFnName: uncooked absori in type debug info");
3448
3449    /* LIMITATION: check we are in the same CU.
3450       If not, return unknown inlined function name. */
3451    /* if crossing between alt debug info<>normal info
3452           or posn not in the cu range,
3453       then it is in another CU. */
3454    if (alt_flag != cc->is_alt_info
3455        || posn < cc->cu_start_offset
3456        || posn >= cc->cu_start_offset + cc->unit_length) {
3457       static Bool reported = False;
3458       if (!reported && VG_(clo_verbosity) > 1) {
3459          VG_(message)(Vg_DebugMsg,
3460                       "Warning: cross-CU LIMITATION: some inlined fn names\n"
3461                       "might be shown as UnknownInlinedFun\n");
3462          reported = True;
3463       }
3464       TRACE_D3(" <get_inlFnName><%lx>: cross-CU LIMITATION", posn);
3465       return ML_(addStr)(cc->di, "UnknownInlinedFun", -1);
3466    }
3467
3468    init_Cursor (&c, cc->escn_debug_info, posn, cc->barf,
3469                 "Overrun get_inlFnName absori");
3470
3471    abbv_code = get_ULEB128( &c );
3472    abbv      = get_abbv ( cc, abbv_code, td3);
3473    atag      = abbv->atag;
3474    TRACE_D3(" <get_inlFnName><%lx>: Abbrev Number: %llu (%s)\n",
3475             posn, abbv_code, ML_(pp_DW_TAG)( atag ) );
3476
3477    if (atag == 0)
3478       cc->barf("get_inlFnName: invalid zero tag on DIE");
3479
3480    has_children = abbv->has_children;
3481    if (has_children != DW_children_no && has_children != DW_children_yes)
3482       cc->barf("get_inlFnName: invalid has_children value");
3483
3484    if (atag != DW_TAG_subprogram)
3485       cc->barf("get_inlFnName: absori not a subprogram");
3486
3487    nf_i = 0;
3488    while (True) {
3489       DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3490       DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3491       const name_form *nf = &abbv->nf[nf_i];
3492       nf_i++;
3493       if (attr == 0 && form == 0) break;
3494       get_Form_contents( &cts, cc, &c, False/*td3*/, nf );
3495       if (attr == DW_AT_name) {
3496          HChar *fnname;
3497          if (cts.szB >= 0)
3498             cc->barf("get_inlFnName: expecting indirect string");
3499          fnname = ML_(cur_read_strdup)( cts.u.cur,
3500                                         "get_inlFnName.1" );
3501          ret = ML_(addStr)(cc->di, fnname, -1);
3502          ML_(dinfo_free) (fnname);
3503          break; /* Name found, get out of the loop, as this has priority over
3504                  DW_AT_specification. */
3505       }
3506       if (attr == DW_AT_specification) {
3507          UWord cdie;
3508
3509          if (cts.szB == 0)
3510             cc->barf("get_inlFnName: AT specification missing");
3511
3512          /* The recursive call to get_inlFnName will uncook its arg.
3513             So, we need to cook it here, so as to reference the
3514             correct section (e.g. the alt info). */
3515          cdie = cook_die_using_form(cc, (UWord)cts.u.val, form);
3516
3517          /* hoping that there is no loop */
3518          ret = get_inlFnName (cdie, cc, td3);
3519          /* Unclear if having both DW_AT_specification and DW_AT_name is
3520             possible but in any case, we do not break here.
3521             If we find later on a DW_AT_name, it will override the name found
3522             in the DW_AT_specification.*/
3523       }
3524    }
3525
3526    if (ret)
3527       return ret;
3528    else {
3529       TRACE_D3("AbsOriFnNameNotFound");
3530       return ML_(addStr)(cc->di, "AbsOriFnNameNotFound", -1);
3531    }
3532 }
3533
3534 /* Returns True if the (possibly) childrens of the current DIE are interesting
3535    to parse. Returns False otherwise.
3536    If the current DIE has a sibling, the non interesting children can
3537    maybe be skipped (if the DIE has a DW_AT_sibling).  */
3538 __attribute__((noinline))
3539 static Bool parse_inl_DIE (
3540    /*MOD*/D3InlParser* parser,
3541    XArray** fndn_ix_Table,
3542    ULong *debug_line_offset,
3543    DW_TAG dtag,
3544    UWord posn,
3545    Int level,
3546    Cursor* c_die,
3547    const g_abbv *abbv,
3548    CUConst* cc,
3549    Bool td3
3550 )
3551 {
3552    FormContents cts;
3553    UInt nf_i;
3554
3555    UWord saved_die_c_offset  = get_position_of_Cursor( c_die );
3556
3557    /* Get info about DW_TAG_compile_unit and DW_TAG_partial_unit which in theory
3558       could also contain inlined fn calls, if they cover an address range.  */
3559    Bool unit_has_addrs = False;
3560    if (dtag == DW_TAG_compile_unit || dtag == DW_TAG_partial_unit
3561        || dtag == DW_TAG_skeleton_unit) {
3562       Bool have_lo    = False;
3563       Addr ip_lo    = 0;
3564       const HChar *compdir = NULL;
3565       Bool has_stmt_list = False;
3566       ULong cu_line_offset = 0;
3567
3568       if (level == 0)
3569          setup_cu_bases(cc, c_die, abbv);
3570       nf_i = 0;
3571       while (True) {
3572          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3573          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3574          const name_form *nf = &abbv->nf[nf_i];
3575          nf_i++;
3576          if (attr == 0 && form == 0) break;
3577          get_Form_contents( &cts, cc, c_die, False/*td3*/, nf );
3578          if (attr == DW_AT_low_pc && cts.szB > 0) {
3579             ip_lo   = cts.u.val;
3580             have_lo = True;
3581             unit_has_addrs = True;
3582          }
3583          if (attr == DW_AT_ranges && cts.szB > 0)
3584             unit_has_addrs = True;
3585          if (attr == DW_AT_comp_dir) {
3586             if (cts.szB >= 0)
3587                cc->barf("parse_inl_DIE compdir: expecting indirect string");
3588             HChar *str = ML_(cur_read_strdup)( cts.u.cur,
3589                                                "parse_inl_DIE.compdir" );
3590             compdir = ML_(addStr)(cc->di, str, -1);
3591             ML_(dinfo_free) (str);
3592          }
3593          if (attr == DW_AT_stmt_list && cts.szB > 0) {
3594             has_stmt_list = True;
3595             cu_line_offset = cts.u.val;
3596          }
3597          if (attr == DW_AT_sibling && cts.szB > 0) {
3598             parser->sibling = cts.u.val;
3599          }
3600       }
3601       if (level == 0) {
3602          setup_cu_svma (cc, have_lo, ip_lo, td3);
3603          if (has_stmt_list && unit_has_addrs
3604             && *debug_line_offset != cu_line_offset) {
3605             reset_fndn_ix_table ( fndn_ix_Table, debug_line_offset,
3606                                   cu_line_offset );
3607             read_filename_table( *fndn_ix_Table, compdir,
3608                                  cc, cu_line_offset, td3 );
3609          }
3610       }
3611    }
3612
3613    if (dtag == DW_TAG_inlined_subroutine) {
3614       Bool   have_lo    = False;
3615       Bool   have_hi1   = False;
3616       Bool   have_range = False;
3617       Bool   hiIsRelative = False;
3618       Addr   ip_lo      = 0;
3619       Addr   ip_hi1     = 0;
3620       Addr   rangeoff   = 0;
3621       UInt   caller_fndn_ix = 0;
3622       Int caller_lineno = 0;
3623       Int inlinedfn_abstract_origin = 0;
3624       // 0 will be interpreted as no abstract origin by get_inlFnName
3625
3626       nf_i = 0;
3627       while (True) {
3628          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3629          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3630          const name_form *nf = &abbv->nf[nf_i];
3631          nf_i++;
3632          if (attr == 0 && form == 0) break;
3633          get_Form_contents( &cts, cc, c_die, False/*td3*/, nf );
3634          if (attr == DW_AT_call_file && cts.szB > 0) {
3635             Int ftabIx = (Int)cts.u.val;
3636             if (ftabIx >= 1
3637                 && ftabIx < VG_(sizeXA)( *fndn_ix_Table )) {
3638                caller_fndn_ix = *(UInt*)
3639                           VG_(indexXA)( *fndn_ix_Table, ftabIx );
3640             }
3641             if (0) VG_(printf)("XXX caller_fndn_ix = %u %s\n", caller_fndn_ix,
3642                                ML_(fndn_ix2filename) (cc->di, caller_fndn_ix));
3643          }
3644          if (attr == DW_AT_call_line && cts.szB > 0) {
3645             caller_lineno = cts.u.val;
3646          }
3647
3648          if (attr == DW_AT_abstract_origin  && cts.szB > 0) {
3649             inlinedfn_abstract_origin
3650                = cook_die_using_form (cc, (UWord)cts.u.val, form);
3651          }
3652
3653          if (attr == DW_AT_low_pc && cts.szB > 0) {
3654             ip_lo   = cts.u.val;
3655             have_lo = True;
3656          }
3657          if (attr == DW_AT_high_pc && cts.szB > 0) {
3658             ip_hi1   = cts.u.val;
3659             have_hi1 = True;
3660             if (form != DW_FORM_addr)
3661                hiIsRelative = True;
3662          }
3663          if (attr == DW_AT_ranges && cts.szB > 0) {
3664             rangeoff   = cts.u.val;
3665             have_range = True;
3666          }
3667          if (attr == DW_AT_sibling && cts.szB > 0) {
3668             parser->sibling = cts.u.val;
3669          }
3670       }
3671       if (have_lo && have_hi1 && hiIsRelative)
3672          ip_hi1 += ip_lo;
3673       /* Do we have something that looks sane? */
3674       if (dtag == DW_TAG_inlined_subroutine
3675           && (!have_lo) && (!have_hi1) && (!have_range)) {
3676          /* Seems strange. How can an inlined subroutine have
3677             no code ? */
3678          goto_bad_DIE;
3679       } else
3680       if (have_lo && have_hi1 && (!have_range)) {
3681          /* This inlined call is just a single address range. */
3682          if (ip_lo < ip_hi1) {
3683             /* Apply text debug biasing */
3684             ip_lo += cc->di->text_debug_bias;
3685             ip_hi1 += cc->di->text_debug_bias;
3686             ML_(addInlInfo) (cc->di,
3687                              ip_lo, ip_hi1,
3688                              get_inlFnName (inlinedfn_abstract_origin, cc, td3),
3689                              caller_fndn_ix,
3690                              caller_lineno, level);
3691          }
3692       } else if (have_range) {
3693          /* This inlined call is several address ranges. */
3694          XArray *ranges;
3695          Word j;
3696          const HChar *inlfnname =
3697             get_inlFnName (inlinedfn_abstract_origin, cc, td3);
3698
3699          /* Ranges are biased for the inline info using the same logic
3700             as what is used for biasing ranges for the var info, for which
3701             ranges are read using cc->cu_svma (see parse_var_DIE).
3702             Then text_debug_bias is added when a (non global) var
3703             is recorded (see just before the call to ML_(addVar)) */
3704          ranges = get_range_list( cc, td3,
3705                                   rangeoff, cc->cu_svma );
3706          for (j = 0; j < VG_(sizeXA)( ranges ); j++) {
3707             AddrRange* range = (AddrRange*) VG_(indexXA)( ranges, j );
3708             ML_(addInlInfo) (cc->di,
3709                              range->aMin   + cc->di->text_debug_bias,
3710                              range->aMax+1 + cc->di->text_debug_bias,
3711                              // aMax+1 as range has its last bound included
3712                              // while ML_(addInlInfo) expects last bound not
3713                              // included.
3714                              inlfnname,
3715                              caller_fndn_ix,
3716                              caller_lineno, level);
3717          }
3718          VG_(deleteXA)( ranges );
3719       } else
3720          goto_bad_DIE;
3721    }
3722
3723    // Only recursively parse the (possible) children for the DIE which
3724    // might maybe contain a DW_TAG_inlined_subroutine:
3725    Bool ret = (unit_has_addrs
3726                || dtag == DW_TAG_lexical_block || dtag == DW_TAG_subprogram
3727                || dtag == DW_TAG_inlined_subroutine || dtag == DW_TAG_namespace);
3728    return ret;
3729
3730   bad_DIE:
3731    dump_bad_die_and_barf("parse_inl_DIE", dtag, posn, level,
3732                          c_die, saved_die_c_offset,
3733                          abbv,
3734                          cc);
3735    /*NOTREACHED*/
3736 }
3737
3738
3739 /*------------------------------------------------------------*/
3740 /*---                                                      ---*/
3741 /*--- Parsing of type-related DIEs                         ---*/
3742 /*---                                                      ---*/
3743 /*------------------------------------------------------------*/
3744
3745 typedef
3746    struct {
3747       /* What source language?  'A'=Ada83/95,
3748                                 'C'=C/C++,
3749                                 'F'=Fortran,
3750                                 '?'=other
3751          Established once per compilation unit. */
3752       UChar language;
3753       /* A stack of types which are currently under construction */
3754       Int   sp; /* [sp] is innermost active entry; sp==-1 for empty
3755                    stack */
3756       Int   stack_size;
3757       /* Note that the TyEnts in qparentE are temporary copies of the
3758          ones accumulating in the main tyent array.  So it is not safe
3759          to free up anything on them when popping them off the stack
3760          (iow, it isn't safe to use TyEnt__make_EMPTY on them).  Just
3761          memset them to zero when done. */
3762       TyEnt *qparentE; /* parent TyEnts */
3763       Int   *qlevel;
3764    }
3765    D3TypeParser;
3766
3767 /* Completely initialise a type parser object */
3768 static void
3769 type_parser_init ( D3TypeParser *parser )
3770 {
3771    parser->sp = -1;
3772    parser->language = '?';
3773    parser->stack_size = 0;
3774    parser->qparentE = NULL;
3775    parser->qlevel   = NULL;
3776 }
3777
3778 /* Release any memory hanging off a type parser object */
3779 static void
3780 type_parser_release ( D3TypeParser *parser )
3781 {
3782    ML_(dinfo_free)( parser->qparentE );
3783    ML_(dinfo_free)( parser->qlevel );
3784 }
3785
3786 static void typestack_show ( const D3TypeParser* parser, const HChar* str )
3787 {
3788    Word i;
3789    VG_(printf)("  typestack (%s) {\n", str);
3790    for (i = 0; i <= parser->sp; i++) {
3791       VG_(printf)("    [%ld] (level %d): ", i, parser->qlevel[i]);
3792       ML_(pp_TyEnt)( &parser->qparentE[i] );
3793       VG_(printf)("\n");
3794    }
3795    VG_(printf)("  }\n");
3796 }
3797
3798 /* Remove from the stack, all entries with .level > 'level' */
3799 static
3800 void typestack_preen ( D3TypeParser* parser, Bool td3, Int level )
3801 {
3802    Bool changed = False;
3803    vg_assert(parser->sp < parser->stack_size);
3804    while (True) {
3805       vg_assert(parser->sp >= -1);
3806       if (parser->sp == -1) break;
3807       if (parser->qlevel[parser->sp] <= level) break;
3808       if (0)
3809          TRACE_D3("BBBBAAAA typestack_pop [newsp=%d]\n", parser->sp-1);
3810       vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
3811       parser->sp--;
3812       changed = True;
3813    }
3814    if (changed && td3)
3815       typestack_show( parser, "after preen" );
3816 }
3817
3818 static Bool typestack_is_empty ( const D3TypeParser* parser )
3819 {
3820    vg_assert(parser->sp >= -1 && parser->sp < parser->stack_size);
3821    return parser->sp == -1;
3822 }
3823
3824 static void typestack_push ( const CUConst* cc,
3825                              D3TypeParser* parser,
3826                              Bool td3,
3827                              const TyEnt* parentE, Int level )
3828 {
3829    if (0)
3830    TRACE_D3("BBBBAAAA typestack_push[newsp=%d]: %d  %05lx\n",
3831             parser->sp+1, level, parentE->cuOff);
3832
3833    /* First we need to zap everything >= 'level', as we are about to
3834       replace any previous entry at 'level', so .. */
3835    typestack_preen(parser, /*td3*/False, level-1);
3836
3837    vg_assert(parser->sp >= -1);
3838    vg_assert(parser->sp < parser->stack_size);
3839    if (parser->sp == parser->stack_size - 1) {
3840       parser->stack_size += 16;
3841       parser->qparentE =
3842          ML_(dinfo_realloc)("di.readdwarf3.typush.1", parser->qparentE,
3843                             parser->stack_size * sizeof parser->qparentE[0]);
3844       parser->qlevel =
3845          ML_(dinfo_realloc)("di.readdwarf3.typush.2", parser->qlevel,
3846                             parser->stack_size * sizeof parser->qlevel[0]);
3847    }
3848    if (parser->sp >= 0)
3849       vg_assert(parser->qlevel[parser->sp] < level);
3850    parser->sp++;
3851    vg_assert(parentE);
3852    vg_assert(ML_(TyEnt__is_type)(parentE));
3853    vg_assert(parentE->cuOff != D3_INVALID_CUOFF);
3854    parser->qparentE[parser->sp] = *parentE;
3855    parser->qlevel[parser->sp]  = level;
3856    if (TD3)
3857       typestack_show( parser, "after push" );
3858 }
3859
3860 /* True if the subrange type being parsed gives the bounds of an array. */
3861 static Bool subrange_type_denotes_array_bounds ( const D3TypeParser* parser,
3862                                                  DW_TAG dtag ) {
3863    vg_assert(dtag == DW_TAG_subrange_type);
3864    /* If we don't know the language, assume false.  */
3865    if (parser->language == '?')
3866       return False;
3867    /* For most languages, a subrange_type dtag always gives the
3868       bounds of an array.
3869       For Ada, there are additional conditions as a subrange_type
3870       is also used for other purposes. */
3871    if (parser->language != 'A')
3872       /* not Ada, so it definitely denotes an array bound. */
3873       return True;
3874    else
3875       /* Extra constraints for Ada: it only denotes an array bound if .. */
3876       return (! typestack_is_empty(parser)
3877               && parser->qparentE[parser->sp].tag == Te_TyArray);
3878 }
3879
3880 /* True if the form is one of the forms supported to give an array bound.
3881    For some arrays (scope local arrays with variable size),
3882    a DW_FORM_ref4 was used, and was wrongly used as the bound value.
3883    So, refuse the forms that are known to give a problem. */
3884 static Bool form_expected_for_bound ( DW_FORM form ) {
3885    if (form == DW_FORM_ref1
3886        || form == DW_FORM_ref2
3887        || form == DW_FORM_ref4
3888        || form == DW_FORM_ref8)
3889       return False;
3890
3891    return True;
3892 }
3893
3894 /* Parse a type-related DIE.  'parser' holds the current parser state.
3895    'admin' is where the completed types are dumped.  'dtag' is the tag
3896    for this DIE.  'c_die' points to the start of the data fields (FORM
3897    stuff) for the DIE.  abbv is the parsed abbreviation which describe
3898    the DIE.
3899
3900    We may find the DIE uninteresting, in which case we should ignore
3901    it.
3902
3903    What happens: the DIE is examined.  If uninteresting, it is ignored.
3904    Otherwise, the DIE gives rise to two things:
3905
3906    (1) the offset of this DIE in the CU -- the cuOffset, a UWord
3907    (2) a TyAdmin structure, which holds the type, or related stuff
3908
3909    (2) is added at the end of 'tyadmins', at some index, say 'i'.
3910
3911    A pair (cuOffset, i) is added to 'tydict'.
3912
3913    Hence 'tyadmins' holds the actual type entities, and 'tydict' holds
3914    a mapping from cuOffset to the index of the corresponding entry in
3915    'tyadmin'.
3916
3917    When resolving a cuOffset to a TyAdmin, first look up the cuOffset
3918    in the tydict (by binary search).  This gives an index into
3919    tyadmins, and the required entity lives in tyadmins at that index.
3920 */
3921 __attribute__((noinline))
3922 static void parse_type_DIE ( /*MOD*/XArray* /* of TyEnt */ tyents,
3923                              /*MOD*/D3TypeParser* parser,
3924                              DW_TAG dtag,
3925                              UWord posn,
3926                              Int level,
3927                              Cursor* c_die,
3928                              const g_abbv *abbv,
3929                              CUConst* cc,
3930                              Bool td3 )
3931 {
3932    FormContents cts;
3933    UInt nf_i;
3934    TyEnt typeE;
3935    TyEnt atomE;
3936    TyEnt fieldE;
3937    TyEnt boundE;
3938
3939    UWord saved_die_c_offset  = get_position_of_Cursor( c_die );
3940
3941    VG_(memset)( &typeE,  0xAA, sizeof(typeE) );
3942    VG_(memset)( &atomE,  0xAA, sizeof(atomE) );
3943    VG_(memset)( &fieldE, 0xAA, sizeof(fieldE) );
3944    VG_(memset)( &boundE, 0xAA, sizeof(boundE) );
3945
3946    /* If we've returned to a level at or above any previously noted
3947       parent, un-note it, so we don't believe we're still collecting
3948       its children. */
3949    typestack_preen( parser, td3, level-1 );
3950
3951    if (dtag == DW_TAG_compile_unit
3952        || dtag == DW_TAG_type_unit
3953        || dtag == DW_TAG_partial_unit
3954        || dtag == DW_TAG_skeleton_unit) {
3955       if (level == 0)
3956          setup_cu_bases(cc, c_die, abbv);
3957       /* See if we can find DW_AT_language, since it is important for
3958          establishing array bounds (see DW_TAG_subrange_type below in
3959          this fn) */
3960       nf_i = 0;
3961       while (True) {
3962          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3963          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3964          const name_form *nf = &abbv->nf[nf_i];
3965          nf_i++;
3966          if (attr == 0 && form == 0) break;
3967          get_Form_contents( &cts, cc, c_die, False/*td3*/, nf );
3968          if (attr != DW_AT_language)
3969             continue;
3970          if (cts.szB <= 0)
3971            goto_bad_DIE;
3972          switch (cts.u.val) {
3973             case DW_LANG_C89: case DW_LANG_C:
3974             case DW_LANG_C_plus_plus: case DW_LANG_ObjC:
3975             case DW_LANG_ObjC_plus_plus: case DW_LANG_UPC:
3976             case DW_LANG_Upc: case DW_LANG_C99: case DW_LANG_C11:
3977             case DW_LANG_C_plus_plus_11: case DW_LANG_C_plus_plus_14:
3978                parser->language = 'C'; break;
3979             case DW_LANG_Fortran77: case DW_LANG_Fortran90:
3980             case DW_LANG_Fortran95: case DW_LANG_Fortran03:
3981             case DW_LANG_Fortran08:
3982                parser->language = 'F'; break;
3983             case DW_LANG_Ada83: case DW_LANG_Ada95:
3984                parser->language = 'A'; break;
3985             case DW_LANG_Cobol74:
3986             case DW_LANG_Cobol85: case DW_LANG_Pascal83:
3987             case DW_LANG_Modula2: case DW_LANG_Java:
3988             case DW_LANG_PLI:
3989             case DW_LANG_D: case DW_LANG_Python: case DW_LANG_Go:
3990             case DW_LANG_Mips_Assembler:
3991                parser->language = '?'; break;
3992             default:
3993                goto_bad_DIE;
3994          }
3995       }
3996    }
3997
3998    if (dtag == DW_TAG_base_type) {
3999       /* We can pick up a new base type any time. */
4000       VG_(memset)(&typeE, 0, sizeof(typeE));
4001       typeE.cuOff = D3_INVALID_CUOFF;
4002       typeE.tag   = Te_TyBase;
4003       nf_i = 0;
4004       while (True) {
4005          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
4006          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
4007          const name_form *nf = &abbv->nf[nf_i];
4008          nf_i++;
4009          if (attr == 0 && form == 0) break;
4010          get_Form_contents( &cts, cc, c_die, False/*td3*/, nf );
4011          if (attr == DW_AT_name && cts.szB < 0) {
4012             typeE.Te.TyBase.name
4013                = ML_(cur_read_strdup)( cts.u.cur,
4014                                        "di.readdwarf3.ptD.base_type.1" );
4015          }
4016          if (attr == DW_AT_byte_size && cts.szB > 0) {
4017             typeE.Te.TyBase.szB = cts.u.val;
4018          }
4019          if (attr == DW_AT_encoding && cts.szB > 0) {
4020             switch (cts.u.val) {
4021                case DW_ATE_unsigned: case DW_ATE_unsigned_char:
4022                case DW_ATE_UTF: /* since DWARF4, e.g. char16_t from C++ */
4023                case DW_ATE_boolean:/* FIXME - is this correct? */
4024                case DW_ATE_unsigned_fixed:
4025                   typeE.Te.TyBase.enc = 'U'; break;
4026                case DW_ATE_signed: case DW_ATE_signed_char:
4027                case DW_ATE_signed_fixed:
4028                   typeE.Te.TyBase.enc = 'S'; break;
4029                case DW_ATE_float:
4030                   typeE.Te.TyBase.enc = 'F'; break;
4031                case DW_ATE_complex_float:
4032                   typeE.Te.TyBase.enc = 'C'; break;
4033                default:
4034                   goto_bad_DIE;
4035             }
4036          }
4037       }
4038
4039       /* Invent a name if it doesn't have one.  gcc-4.3
4040          -ftree-vectorize is observed to emit nameless base types. */
4041       if (!typeE.Te.TyBase.name)
4042          typeE.Te.TyBase.name
4043             = ML_(dinfo_strdup)( "di.readdwarf3.ptD.base_type.2",
4044                                  "<anon_base_type>" );
4045
4046       /* Do we have something that looks sane? */
4047       if (/* must have a name */
4048           typeE.Te.TyBase.name == NULL
4049           /* and a plausible size.  Yes, really 32: "complex long
4050              double" apparently has size=32 */
4051           || typeE.Te.TyBase.szB < 0 || typeE.Te.TyBase.szB > 32
4052           /* and a plausible encoding */
4053           || (typeE.Te.TyBase.enc != 'U'
4054               && typeE.Te.TyBase.enc != 'S'
4055               && typeE.Te.TyBase.enc != 'F'
4056               && typeE.Te.TyBase.enc != 'C'))
4057          goto_bad_DIE;
4058       /* Last minute hack: if we see this
4059          <1><515>: DW_TAG_base_type
4060              DW_AT_byte_size   : 0
4061              DW_AT_encoding    : 5
4062              DW_AT_name        : void
4063          convert it into a real Void type. */
4064       if (typeE.Te.TyBase.szB == 0
4065           && 0 == VG_(strcmp)("void", typeE.Te.TyBase.name)) {
4066          ML_(TyEnt__make_EMPTY)(&typeE);
4067          typeE.tag = Te_TyVoid;
4068          typeE.Te.TyVoid.isFake = False; /* it's a real one! */
4069       }
4070
4071       goto acquire_Type;
4072    }
4073
4074    /*
4075     * An example of DW_TAG_rvalue_reference_type:
4076     *
4077     * $ readelf --debug-dump /usr/lib/debug/usr/lib/libstdc++.so.6.0.16.debug
4078     *  <1><1014>: Abbrev Number: 55 (DW_TAG_rvalue_reference_type)
4079     *     <1015>   DW_AT_byte_size   : 4
4080     *     <1016>   DW_AT_type        : <0xe52>
4081     */
4082    if (dtag == DW_TAG_pointer_type || dtag == DW_TAG_reference_type
4083        || dtag == DW_TAG_ptr_to_member_type
4084        || dtag == DW_TAG_rvalue_reference_type) {
4085       /* This seems legit for _pointer_type and _reference_type.  I
4086          don't know if rolling _ptr_to_member_type in here really is
4087          legit, but it's better than not handling it at all. */
4088       VG_(memset)(&typeE, 0, sizeof(typeE));
4089       typeE.cuOff = D3_INVALID_CUOFF;
4090       switch (dtag) {
4091       case DW_TAG_pointer_type:
4092          typeE.tag = Te_TyPtr;
4093          break;
4094       case DW_TAG_reference_type:
4095          typeE.tag = Te_TyRef;
4096          break;
4097       case DW_TAG_ptr_to_member_type:
4098          typeE.tag = Te_TyPtrMbr;
4099          break;
4100       case DW_TAG_rvalue_reference_type:
4101          typeE.tag = Te_TyRvalRef;
4102          break;
4103       default:
4104          vg_assert(False);
4105       }
4106       /* target type defaults to void */
4107       typeE.Te.TyPorR.typeR = D3_FAKEVOID_CUOFF;
4108       /* These four type kinds don't *have* to specify their size, in
4109          which case we assume it's a machine word.  But if they do
4110          specify it, it must be a machine word :-)  This probably
4111          assumes that the word size of the Dwarf3 we're reading is the
4112          same size as that on the machine.  gcc appears to give a size
4113          whereas icc9 doesn't. */
4114       typeE.Te.TyPorR.szB = sizeof(UWord);
4115       nf_i = 0;
4116       while (True) {
4117          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
4118          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
4119          const name_form *nf = &abbv->nf[nf_i];
4120          nf_i++;
4121          if (attr == 0 && form == 0) break;
4122          get_Form_contents( &cts, cc, c_die, False/*td3*/, nf );
4123          if (attr == DW_AT_byte_size && cts.szB > 0) {
4124             typeE.Te.TyPorR.szB = cts.u.val;
4125          }
4126          if (attr == DW_AT_type && cts.szB > 0) {
4127             typeE.Te.TyPorR.typeR
4128                = cook_die_using_form( cc, (UWord)cts.u.val, form );
4129          }
4130       }
4131       /* Do we have something that looks sane? */
4132       if (typeE.Te.TyPorR.szB != sizeof(UWord))
4133          goto_bad_DIE;
4134       else
4135          goto acquire_Type;
4136    }
4137
4138    if (dtag == DW_TAG_enumeration_type) {
4139       /* Create a new Type to hold the results. */
4140       VG_(memset)(&typeE, 0, sizeof(typeE));
4141       typeE.cuOff = posn;
4142       typeE.tag   = Te_TyEnum;
4143       Bool is_decl = False;
4144       typeE.Te.TyEnum.atomRs
4145          = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ptD.enum_type.1",
4146                        ML_(dinfo_free),
4147                        sizeof(UWord) );
4148       nf_i=0;
4149       while (True) {
4150          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
4151          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
4152          const name_form *nf = &abbv->nf[nf_i];
4153          nf_i++;
4154          if (attr == 0 && form == 0) break;
4155          get_Form_contents( &cts, cc, c_die, False/*td3*/, nf );
4156          if (attr == DW_AT_name && cts.szB < 0) {
4157             typeE.Te.TyEnum.name
4158                = ML_(cur_read_strdup)( cts.u.cur,
4159                                        "di.readdwarf3.pTD.enum_type.2" );
4160          }
4161          if (attr == DW_AT_byte_size && cts.szB > 0) {
4162             typeE.Te.TyEnum.szB = cts.u.val;
4163          }
4164          if (attr == DW_AT_declaration) {
4165             is_decl = True;
4166          }
4167       }
4168
4169       if (!typeE.Te.TyEnum.name)
4170          typeE.Te.TyEnum.name
4171             = ML_(dinfo_strdup)( "di.readdwarf3.pTD.enum_type.3",
4172                                  "<anon_enum_type>" );
4173
4174       /* Do we have something that looks sane? */
4175       if (typeE.Te.TyEnum.szB == 0
4176           /* we must know the size */
4177           /* but not for Ada, which uses such dummy
4178              enumerations as helper for gdb ada mode.
4179              Also GCC allows incomplete enums as GNU extension.
4180              http://gcc.gnu.org/onlinedocs/gcc/Incomplete-Enums.html
4181              These are marked as DW_AT_declaration and won't have
4182              a size. They can only be used in declaration or as
4183              pointer types.  You can't allocate variables or storage
4184              using such an enum type. (Also GCC seems to have a bug
4185              that will put such an enumeration_type into a .debug_types
4186              unit which should only contain complete types.) */
4187           && (parser->language != 'A' && !is_decl)) {
4188          goto_bad_DIE;
4189       }
4190
4191       /* On't stack! */
4192       typestack_push( cc, parser, td3, &typeE, level );
4193       goto acquire_Type;
4194    }
4195
4196    /* gcc (GCC) 4.4.0 20081017 (experimental) occasionally produces
4197       DW_TAG_enumerator with only a DW_AT_name but no
4198       DW_AT_const_value.  This is in violation of the Dwarf3 standard,
4199       and appears to be a new "feature" of gcc - versions 4.3.x and
4200       earlier do not appear to do this.  So accept DW_TAG_enumerator
4201       which only have a name but no value.  An example:
4202
4203       <1><180>: Abbrev Number: 6 (DW_TAG_enumeration_type)
4204          <181>   DW_AT_name        : (indirect string, offset: 0xda70):
4205                                      QtMsgType
4206          <185>   DW_AT_byte_size   : 4
4207          <186>   DW_AT_decl_file   : 14
4208          <187>   DW_AT_decl_line   : 1480
4209          <189>   DW_AT_sibling     : <0x1a7>
4210       <2><18d>: Abbrev Number: 7 (DW_TAG_enumerator)
4211          <18e>   DW_AT_name        : (indirect string, offset: 0x9e18):
4212                                      QtDebugMsg
4213       <2><192>: Abbrev Number: 7 (DW_TAG_enumerator)
4214          <193>   DW_AT_name        : (indirect string, offset: 0x1505f):
4215                                      QtWarningMsg
4216       <2><197>: Abbrev Number: 7 (DW_TAG_enumerator)
4217          <198>   DW_AT_name        : (indirect string, offset: 0x16f4a):
4218                                      QtCriticalMsg
4219       <2><19c>: Abbrev Number: 7 (DW_TAG_enumerator)
4220          <19d>   DW_AT_name        : (indirect string, offset: 0x156dd):
4221                                      QtFatalMsg
4222       <2><1a1>: Abbrev Number: 7 (DW_TAG_enumerator)
4223          <1a2>   DW_AT_name        : (indirect string, offset: 0x13660):
4224                                      QtSystemMsg
4225    */
4226    if (dtag == DW_TAG_enumerator) {
4227       VG_(memset)( &atomE, 0, sizeof(atomE) );
4228       atomE.cuOff = posn;
4229       atomE.tag   = Te_Atom;
4230       nf_i = 0;
4231       while (True) {
4232          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
4233          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
4234          const name_form *nf = &abbv->nf[nf_i];
4235          nf_i++;
4236          if (attr == 0 && form == 0) break;
4237          get_Form_contents( &cts, cc, c_die, False/*td3*/, nf );
4238          if (attr == DW_AT_name && cts.szB < 0) {
4239             atomE.Te.Atom.name
4240               = ML_(cur_read_strdup)( cts.u.cur,
4241                                       "di.readdwarf3.pTD.enumerator.1" );
4242          }
4243          if (attr == DW_AT_const_value && cts.szB > 0) {
4244             atomE.Te.Atom.value      = cts.u.val;
4245             atomE.Te.Atom.valueKnown = True;
4246          }
4247       }
4248       /* Do we have something that looks sane? */
4249       if (atomE.Te.Atom.name == NULL)
4250          goto_bad_DIE;
4251       /* Do we have a plausible parent? */
4252       if (typestack_is_empty(parser)) goto_bad_DIE;
4253       vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
4254       vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
4255       if (level != parser->qlevel[parser->sp]+1) goto_bad_DIE;
4256       if (parser->qparentE[parser->sp].tag != Te_TyEnum) goto_bad_DIE;
4257       /* Record this child in the parent */
4258       vg_assert(parser->qparentE[parser->sp].Te.TyEnum.atomRs);
4259       VG_(addToXA)( parser->qparentE[parser->sp].Te.TyEnum.atomRs,
4260                     &atomE );
4261       /* And record the child itself */
4262       goto acquire_Atom;
4263    }
4264
4265    /* Treat DW_TAG_class_type as if it was a DW_TAG_structure_type.  I
4266       don't know if this is correct, but it at least makes this reader
4267       usable for gcc-4.3 produced Dwarf3. */
4268    if (dtag == DW_TAG_structure_type || dtag == DW_TAG_class_type
4269        || dtag == DW_TAG_union_type) {
4270       Bool have_szB = False;
4271       Bool is_decl  = False;
4272       Bool is_spec  = False;
4273       /* Create a new Type to hold the results. */
4274       VG_(memset)(&typeE, 0, sizeof(typeE));
4275       typeE.cuOff = posn;
4276       typeE.tag   = Te_TyStOrUn;
4277       typeE.Te.TyStOrUn.name = NULL;
4278       typeE.Te.TyStOrUn.typeR = D3_INVALID_CUOFF;
4279       typeE.Te.TyStOrUn.fieldRs
4280          = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.pTD.struct_type.1",
4281                        ML_(dinfo_free),
4282                        sizeof(UWord) );
4283       typeE.Te.TyStOrUn.complete = True;
4284       typeE.Te.TyStOrUn.isStruct = dtag == DW_TAG_structure_type
4285                                    || dtag == DW_TAG_class_type;
4286       nf_i = 0;
4287       while (True) {
4288          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
4289          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
4290          const name_form *nf = &abbv->nf[nf_i];
4291          nf_i++;
4292          if (attr == 0 && form == 0) break;
4293          get_Form_contents( &cts, cc, c_die, False/*td3*/, nf );
4294          if (attr == DW_AT_name && cts.szB < 0) {
4295             typeE.Te.TyStOrUn.name
4296                = ML_(cur_read_strdup)( cts.u.cur,
4297                                        "di.readdwarf3.ptD.struct_type.2" );
4298          }
4299          if (attr == DW_AT_byte_size && cts.szB >= 0) {
4300             typeE.Te.TyStOrUn.szB = cts.u.val;
4301             have_szB = True;
4302          }
4303          if (attr == DW_AT_declaration && cts.szB > 0 && cts.u.val > 0) {
4304             is_decl = True;
4305          }
4306          if (attr == DW_AT_specification && cts.szB > 0 && cts.u.val > 0) {
4307             is_spec = True;
4308          }
4309          if (attr == DW_AT_signature && form == DW_FORM_ref_sig8
4310              && cts.szB > 0) {
4311             have_szB = True;
4312             typeE.Te.TyStOrUn.szB = 8;
4313             typeE.Te.TyStOrUn.typeR
4314                = cook_die_using_form( cc, (UWord)cts.u.val, form );
4315          }
4316       }
4317       /* Do we have something that looks sane? */
4318       if (is_decl && (!is_spec)) {
4319          /* It's a DW_AT_declaration.  We require the name but
4320             nothing else. */
4321          /* JRS 2012-06-28: following discussion w/ tromey, if the
4322             type doesn't have name, just make one up, and accept it.
4323             It might be referred to by other DIEs, so ignoring it
4324             doesn't seem like a safe option. */
4325          if (typeE.Te.TyStOrUn.name == NULL)
4326             typeE.Te.TyStOrUn.name
4327                = ML_(dinfo_strdup)( "di.readdwarf3.ptD.struct_type.3",
4328                                     "<anon_struct_type>" );
4329          typeE.Te.TyStOrUn.complete = False;
4330          /* JRS 2009 Aug 10: <possible kludge>? */
4331          /* Push this tyent on the stack, even though it's incomplete.
4332             It appears that gcc-4.4 on Fedora 11 will sometimes create
4333             DW_TAG_member entries for it, and so we need to have a
4334             plausible parent present in order for that to work.  See
4335             #200029 comments 8 and 9. */
4336          typestack_push( cc, parser, td3, &typeE, level );
4337          /* </possible kludge> */
4338          goto acquire_Type;
4339       }
4340       if ((!is_decl) /* && (!is_spec) */) {
4341          /* this is the common, ordinary case */
4342          /* The name can be present, or not */
4343          if (!have_szB) {
4344             /* We must know the size.
4345                But in Ada, record with discriminants might have no size.
4346                But in C, VLA in the middle of a struct (gcc extension)
4347                might have no size.
4348                Instead, some GNAT dwarf extensions and/or dwarf entries
4349                allow to calculate the struct size at runtime.
4350                We cannot do that (yet?) so, the temporary kludge is to use
4351                a small size. */
4352             typeE.Te.TyStOrUn.szB = 1;
4353          }
4354          /* On't stack! */
4355          typestack_push( cc, parser, td3, &typeE, level );
4356          goto acquire_Type;
4357       }
4358       else {
4359          /* don't know how to handle any other variants just now */
4360          goto_bad_DIE;
4361       }
4362    }
4363
4364    if (dtag == DW_TAG_member) {
4365       /* Acquire member entries for both DW_TAG_structure_type and
4366          DW_TAG_union_type.  They differ minorly, in that struct
4367          members must have a DW_AT_data_member_location expression
4368          whereas union members must not. */
4369       Bool parent_is_struct;
4370       Bool is_artificial = False;
4371       VG_(memset)( &fieldE, 0, sizeof(fieldE) );
4372       fieldE.cuOff = posn;
4373       fieldE.tag   = Te_Field;
4374       fieldE.Te.Field.typeR = D3_INVALID_CUOFF;
4375       nf_i = 0;
4376       while (True) {
4377          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
4378          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
4379          const name_form *nf = &abbv->nf[nf_i];
4380          nf_i++;
4381          if (attr == 0 && form == 0) break;
4382          get_Form_contents( &cts, cc, c_die, False/*td3*/, nf );
4383          if (attr == DW_AT_name && cts.szB < 0) {
4384             fieldE.Te.Field.name
4385                = ML_(cur_read_strdup)( cts.u.cur,
4386                                        "di.readdwarf3.ptD.member.1" );
4387          }
4388          if (attr == DW_AT_type && cts.szB > 0) {
4389             fieldE.Te.Field.typeR
4390                = cook_die_using_form( cc, (UWord)cts.u.val, form );
4391          }
4392          /* There are 2 different cases for DW_AT_data_member_location.
4393             If it is a constant class attribute, it contains byte offset
4394             from the beginning of the containing entity.
4395             Otherwise it is a location expression.  */
4396          if (attr == DW_AT_data_member_location && cts.szB > 0) {
4397             fieldE.Te.Field.nLoc = -1;
4398             fieldE.Te.Field.pos.offset = cts.u.val;
4399          }
4400          if (attr == DW_AT_data_member_location && cts.szB <= 0) {
4401             fieldE.Te.Field.nLoc = (UWord)(-cts.szB);
4402             fieldE.Te.Field.pos.loc
4403                = ML_(cur_read_memdup)( cts.u.cur,
4404                                        (SizeT)fieldE.Te.Field.nLoc,
4405                                        "di.readdwarf3.ptD.member.2" );
4406          }
4407          if (attr == DW_AT_artificial && cts.u.val == 1)
4408             is_artificial = True;
4409       }
4410       /* Skip artificial members, they might not behave as expected.  */
4411       if (is_artificial)
4412          goto no_location;
4413       /* Do we have a plausible parent? */
4414       if (typestack_is_empty(parser)) goto_bad_DIE;
4415       vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
4416       vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
4417       if (level != parser->qlevel[parser->sp]+1) goto_bad_DIE;
4418       if (parser->qparentE[parser->sp].tag != Te_TyStOrUn) goto_bad_DIE;
4419       /* Do we have something that looks sane?  If this a member of a
4420          struct, we must have a location expression; but if a member
4421          of a union that is irrelevant (D3 spec sec 5.6.6).  We ought
4422          to reject in the latter case, but some compilers have been
4423          observed to emit constant-zero expressions.  So just ignore
4424          them. */
4425       parent_is_struct
4426          = parser->qparentE[parser->sp].Te.TyStOrUn.isStruct;
4427       if (!fieldE.Te.Field.name)
4428          fieldE.Te.Field.name
4429             = ML_(dinfo_strdup)( "di.readdwarf3.ptD.member.3",
4430                                  "<anon_field>" );
4431       if (fieldE.Te.Field.typeR == D3_INVALID_CUOFF)
4432          goto_bad_DIE;
4433       if (fieldE.Te.Field.nLoc) {
4434          if (!parent_is_struct) {
4435             /* If this is a union type, pretend we haven't seen the data
4436                member location expression, as it is by definition
4437                redundant (it must be zero). */
4438             if (fieldE.Te.Field.nLoc > 0)
4439                ML_(dinfo_free)(fieldE.Te.Field.pos.loc);
4440             fieldE.Te.Field.pos.loc = NULL;
4441             fieldE.Te.Field.nLoc = 0;
4442          }
4443          /* Record this child in the parent */
4444          fieldE.Te.Field.isStruct = parent_is_struct;
4445          vg_assert(parser->qparentE[parser->sp].Te.TyStOrUn.fieldRs);
4446          VG_(addToXA)( parser->qparentE[parser->sp].Te.TyStOrUn.fieldRs,
4447                        &posn );
4448          /* And record the child itself */
4449          goto acquire_Field;
4450       } else {
4451          /* Member with no location - this can happen with static
4452             const members in C++ code which are compile time constants
4453             that do no exist in the class. They're not of any interest
4454             to us so we ignore them. */
4455         no_location:
4456          ML_(TyEnt__make_EMPTY)(&fieldE);
4457       }
4458    }
4459
4460    if (dtag == DW_TAG_array_type) {
4461       VG_(memset)(&typeE, 0, sizeof(typeE));
4462       typeE.cuOff = posn;
4463       typeE.tag   = Te_TyArray;
4464       typeE.Te.TyArray.typeR = D3_INVALID_CUOFF;
4465       typeE.Te.TyArray.boundRs
4466          = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ptD.array_type.1",
4467                        ML_(dinfo_free),
4468                        sizeof(UWord) );
4469       nf_i = 0;
4470       while (True) {
4471          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
4472          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
4473          const name_form *nf = &abbv->nf[nf_i];
4474          nf_i++;
4475          if (attr == 0 && form == 0) break;
4476          get_Form_contents( &cts, cc, c_die, False/*td3*/, nf );
4477          if (attr == DW_AT_type && cts.szB > 0) {
4478             typeE.Te.TyArray.typeR
4479                = cook_die_using_form( cc, (UWord)cts.u.val, form );
4480          }
4481       }
4482       if (typeE.Te.TyArray.typeR == D3_INVALID_CUOFF)
4483          goto_bad_DIE;
4484       /* On't stack! */
4485       typestack_push( cc, parser, td3, &typeE, level );
4486       goto acquire_Type;
4487    }
4488
4489    /* this is a subrange type defining the bounds of an array. */
4490    if (dtag == DW_TAG_subrange_type
4491        && subrange_type_denotes_array_bounds(parser, dtag)) {
4492       Bool have_lower = False;
4493       Bool have_upper = False;
4494       Bool have_count = False;
4495       Long lower = 0;
4496       Long upper = 0;
4497       Long count = 0;
4498
4499       switch (parser->language) {
4500          case 'C': have_lower = True;  lower = 0; break;
4501          case 'F': have_lower = True;  lower = 1; break;
4502          case '?': have_lower = False; break;
4503          case 'A': have_lower = False; break;
4504          default:  vg_assert(0); /* assured us by handling of
4505                                     DW_TAG_compile_unit in this fn */
4506       }
4507
4508       VG_(memset)( &boundE, 0, sizeof(boundE) );
4509       boundE.cuOff = D3_INVALID_CUOFF;
4510       boundE.tag   = Te_Bound;
4511       nf_i = 0;
4512       while (True) {
4513          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
4514          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
4515          const name_form *nf = &abbv->nf[nf_i];
4516          nf_i++;
4517          if (attr == 0 && form == 0) break;
4518          get_Form_contents( &cts, cc, c_die, False/*td3*/, nf );
4519          if (attr == DW_AT_lower_bound && cts.szB > 0
4520              && form_expected_for_bound (form)) {
4521             lower      = (Long)cts.u.val;
4522             have_lower = True;
4523          }
4524          if (attr == DW_AT_upper_bound && cts.szB > 0
4525              && form_expected_for_bound (form)) {
4526             upper      = (Long)cts.u.val;
4527             have_upper = True;
4528          }
4529          if (attr == DW_AT_count && cts.szB > 0) {
4530             count    = (Long)cts.u.val;
4531             have_count = True;
4532          }
4533       }
4534       /* FIXME: potentially skip the rest if no parent present, since
4535          it could be the case that this subrange type is free-standing
4536          (not being used to describe the bounds of a containing array
4537          type) */
4538       /* Do we have a plausible parent? */
4539       if (typestack_is_empty(parser)) goto_bad_DIE;
4540       vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
4541       vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
4542       if (level != parser->qlevel[parser->sp]+1) goto_bad_DIE;
4543       if (parser->qparentE[parser->sp].tag != Te_TyArray) goto_bad_DIE;
4544
4545       /* Figure out if we have a definite range or not */
4546       if (have_lower && have_upper && (!have_count)) {
4547          boundE.Te.Bound.knownL = True;
4548          boundE.Te.Bound.knownU = True;
4549          boundE.Te.Bound.boundL = lower;
4550          boundE.Te.Bound.boundU = upper;
4551       }
4552       else if (have_lower && (!have_upper) && (!have_count)) {
4553          boundE.Te.Bound.knownL = True;
4554          boundE.Te.Bound.knownU = False;
4555          boundE.Te.Bound.boundL = lower;
4556          boundE.Te.Bound.boundU = 0;
4557       }
4558       else if ((!have_lower) && have_upper && (!have_count)) {
4559          boundE.Te.Bound.knownL = False;
4560          boundE.Te.Bound.knownU = True;
4561          boundE.Te.Bound.boundL = 0;
4562          boundE.Te.Bound.boundU = upper;
4563       }
4564       else if ((!have_lower) && (!have_upper) && (!have_count)) {
4565          boundE.Te.Bound.knownL = False;
4566          boundE.Te.Bound.knownU = False;
4567          boundE.Te.Bound.boundL = 0;
4568          boundE.Te.Bound.boundU = 0;
4569       } else if (have_lower && (!have_upper) && (have_count)) {
4570          boundE.Te.Bound.knownL = True;
4571          boundE.Te.Bound.knownU = True;
4572          boundE.Te.Bound.boundL = lower;
4573          boundE.Te.Bound.boundU = lower + count - 1;
4574       } else {
4575          /* FIXME: handle more cases */
4576          goto_bad_DIE;
4577       }
4578
4579       /* Record this bound in the parent */
4580       boundE.cuOff = posn;
4581       vg_assert(parser->qparentE[parser->sp].Te.TyArray.boundRs);
4582       VG_(addToXA)( parser->qparentE[parser->sp].Te.TyArray.boundRs,
4583                     &boundE.cuOff );
4584       /* And record the child itself */
4585       goto acquire_Bound;
4586    }
4587
4588    /* typedef or subrange_type other than array bounds. */
4589    if (dtag == DW_TAG_typedef
4590        || (dtag == DW_TAG_subrange_type
4591            && !subrange_type_denotes_array_bounds(parser, dtag))) {
4592       /* subrange_type other than array bound is only for Ada. */
4593       vg_assert (dtag == DW_TAG_typedef || (parser->language == 'A'
4594                                             || parser->language == '?'));
4595       /* We can pick up a new typedef/subrange_type any time. */
4596       VG_(memset)(&typeE, 0, sizeof(typeE));
4597       typeE.cuOff = D3_INVALID_CUOFF;
4598       typeE.tag   = Te_TyTyDef;
4599       typeE.Te.TyTyDef.name = NULL;
4600       typeE.Te.TyTyDef.typeR = D3_INVALID_CUOFF;
4601       nf_i = 0;
4602       while (True) {
4603          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
4604          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
4605          const name_form *nf = &abbv->nf[nf_i];
4606          nf_i++;
4607          if (attr == 0 && form == 0) break;
4608          get_Form_contents( &cts, cc, c_die, False/*td3*/, nf );
4609          if (attr == DW_AT_name && cts.szB < 0) {
4610             typeE.Te.TyTyDef.name
4611                = ML_(cur_read_strdup)( cts.u.cur,
4612                                        "di.readdwarf3.ptD.typedef.1" );
4613          }
4614          if (attr == DW_AT_type && cts.szB > 0) {
4615             typeE.Te.TyTyDef.typeR
4616                = cook_die_using_form( cc, (UWord)cts.u.val, form );
4617          }
4618       }
4619       /* Do we have something that looks sane?
4620          gcc gnat Ada generates minimal typedef
4621          such as the below
4622          <6><91cc>: DW_TAG_typedef
4623             DW_AT_abstract_ori: <9066>
4624          g++ for OMP can generate artificial functions that have
4625          parameters that refer to pointers to unnamed typedefs.
4626          See https://bugs.kde.org/show_bug.cgi?id=273475
4627          So we cannot require a name for a DW_TAG_typedef.
4628       */
4629       goto acquire_Type;
4630    }
4631
4632    if (dtag == DW_TAG_subroutine_type) {
4633       /* function type? just record that one fact and ask no
4634          further questions. */
4635       VG_(memset)(&typeE, 0, sizeof(typeE));
4636       typeE.cuOff = D3_INVALID_CUOFF;
4637       typeE.tag   = Te_TyFn;
4638       goto acquire_Type;
4639    }
4640
4641    if (dtag == DW_TAG_volatile_type || dtag == DW_TAG_const_type
4642        || dtag == DW_TAG_restrict_type || dtag == DW_TAG_atomic_type) {
4643       Int have_ty = 0;
4644       VG_(memset)(&typeE, 0, sizeof(typeE));
4645       typeE.cuOff = D3_INVALID_CUOFF;
4646       typeE.tag   = Te_TyQual;
4647       typeE.Te.TyQual.qual
4648          = (dtag == DW_TAG_volatile_type ? 'V'
4649             : (dtag == DW_TAG_const_type ? 'C'
4650                : (dtag == DW_TAG_restrict_type ? 'R' : 'A')));
4651       /* target type defaults to 'void' */
4652       typeE.Te.TyQual.typeR = D3_FAKEVOID_CUOFF;
4653       nf_i = 0;
4654       while (True) {
4655          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
4656          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
4657          const name_form *nf = &abbv->nf[nf_i];
4658          nf_i++;
4659          if (attr == 0 && form == 0) break;
4660          get_Form_contents( &cts, cc, c_die, False/*td3*/, nf );
4661          if (attr == DW_AT_type && cts.szB > 0) {
4662             typeE.Te.TyQual.typeR
4663                = cook_die_using_form( cc, (UWord)cts.u.val, form );
4664             have_ty++;
4665          }
4666       }
4667       /* gcc sometimes generates DW_TAG_const/volatile_type without
4668          DW_AT_type and GDB appears to interpret the type as 'const
4669          void' (resp. 'volatile void').  So just allow it .. */
4670       if (have_ty == 1 || have_ty == 0)
4671          goto acquire_Type;
4672       else
4673          goto_bad_DIE;
4674    }
4675
4676    /*
4677     * Treat DW_TAG_unspecified_type as type void. An example of DW_TAG_unspecified_type:
4678     *
4679     * $ readelf --debug-dump /usr/lib/debug/usr/lib/libstdc++.so.6.0.16.debug
4680     *  <1><10d4>: Abbrev Number: 53 (DW_TAG_unspecified_type)
4681     *     <10d5>   DW_AT_name        : (indirect string, offset: 0xdb7): decltype(nullptr)
4682     */
4683    if (dtag == DW_TAG_unspecified_type) {
4684       VG_(memset)(&typeE, 0, sizeof(typeE));
4685       typeE.cuOff           = D3_INVALID_CUOFF;
4686       typeE.tag             = Te_TyQual;
4687       typeE.Te.TyQual.typeR = D3_FAKEVOID_CUOFF;
4688       goto acquire_Type;
4689    }
4690
4691    /* else ignore this DIE */
4692    return;
4693    /*NOTREACHED*/
4694
4695   acquire_Type:
4696    if (0) VG_(printf)("YYYY Acquire Type\n");
4697    vg_assert(ML_(TyEnt__is_type)( &typeE ));
4698    vg_assert(typeE.cuOff == D3_INVALID_CUOFF || typeE.cuOff == posn);
4699    typeE.cuOff = posn;
4700    VG_(addToXA)( tyents, &typeE );
4701    return;
4702    /*NOTREACHED*/
4703
4704   acquire_Atom:
4705    if (0) VG_(printf)("YYYY Acquire Atom\n");
4706    vg_assert(atomE.tag == Te_Atom);
4707    vg_assert(atomE.cuOff == D3_INVALID_CUOFF || atomE.cuOff == posn);
4708    atomE.cuOff = posn;
4709    VG_(addToXA)( tyents, &atomE );
4710    return;
4711    /*NOTREACHED*/
4712
4713   acquire_Field:
4714    /* For union members, Expr should be absent */
4715    if (0) VG_(printf)("YYYY Acquire Field\n");
4716    vg_assert(fieldE.tag == Te_Field);
4717    vg_assert(fieldE.Te.Field.nLoc <= 0 || fieldE.Te.Field.pos.loc != NULL);
4718    vg_assert(fieldE.Te.Field.nLoc != 0 || fieldE.Te.Field.pos.loc == NULL);
4719    if (fieldE.Te.Field.isStruct) {
4720       vg_assert(fieldE.Te.Field.nLoc != 0);
4721    } else {
4722       vg_assert(fieldE.Te.Field.nLoc == 0);
4723    }
4724    vg_assert(fieldE.cuOff == D3_INVALID_CUOFF || fieldE.cuOff == posn);
4725    fieldE.cuOff = posn;
4726    VG_(addToXA)( tyents, &fieldE );
4727    return;
4728    /*NOTREACHED*/
4729
4730   acquire_Bound:
4731    if (0) VG_(printf)("YYYY Acquire Bound\n");
4732    vg_assert(boundE.tag == Te_Bound);
4733    vg_assert(boundE.cuOff == D3_INVALID_CUOFF || boundE.cuOff == posn);
4734    boundE.cuOff = posn;
4735    VG_(addToXA)( tyents, &boundE );
4736    return;
4737    /*NOTREACHED*/
4738
4739   bad_DIE:
4740    dump_bad_die_and_barf("parse_type_DIE", dtag, posn, level,
4741                          c_die, saved_die_c_offset,
4742                          abbv,
4743                          cc);
4744    /*NOTREACHED*/
4745 }
4746
4747
4748 /*------------------------------------------------------------*/
4749 /*---                                                      ---*/
4750 /*--- Compression of type DIE information                  ---*/
4751 /*---                                                      ---*/
4752 /*------------------------------------------------------------*/
4753
4754 static UWord chase_cuOff ( Bool* changed,
4755                            const XArray* /* of TyEnt */ ents,
4756                            TyEntIndexCache* ents_cache,
4757                            UWord cuOff )
4758 {
4759    TyEnt* ent;
4760    ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache, cuOff );
4761
4762    if (!ent) {
4763       if (VG_(clo_verbosity) > 1)
4764          VG_(printf)("chase_cuOff: no entry for 0x%05lx\n", cuOff);
4765       *changed = False;
4766       return cuOff;
4767    }
4768
4769    vg_assert(ent->tag != Te_EMPTY);
4770    if (ent->tag != Te_INDIR) {
4771       *changed = False;
4772       return cuOff;
4773    } else {
4774       vg_assert(ent->Te.INDIR.indR < cuOff);
4775       *changed = True;
4776       return ent->Te.INDIR.indR;
4777    }
4778 }
4779
4780 static
4781 void chase_cuOffs_in_XArray ( Bool* changed,
4782                               const XArray* /* of TyEnt */ ents,
4783                               TyEntIndexCache* ents_cache,
4784                               /*MOD*/XArray* /* of UWord */ cuOffs )
4785 {
4786    Bool b2 = False;
4787    Word i, n = VG_(sizeXA)( cuOffs );
4788    for (i = 0; i < n; i++) {
4789       Bool   b = False;
4790       UWord* p = VG_(indexXA)( cuOffs, i );
4791       *p = chase_cuOff( &b, ents, ents_cache, *p );
4792       if (b)
4793          b2 = True;
4794    }
4795    *changed = b2;
4796 }
4797
4798 static Bool TyEnt__subst_R_fields ( const XArray* /* of TyEnt */ ents,
4799                                     TyEntIndexCache* ents_cache,
4800                                     /*MOD*/TyEnt* te )
4801 {
4802    Bool b, changed = False;
4803    switch (te->tag) {
4804       case Te_EMPTY:
4805          break;
4806       case Te_INDIR:
4807          te->Te.INDIR.indR
4808             = chase_cuOff( &b, ents, ents_cache, te->Te.INDIR.indR );
4809          if (b) changed = True;
4810          break;
4811       case Te_UNKNOWN:
4812          break;
4813       case Te_Atom:
4814          break;
4815       case Te_Field:
4816          te->Te.Field.typeR
4817             = chase_cuOff( &b, ents, ents_cache, te->Te.Field.typeR );
4818          if (b) changed = True;
4819          break;
4820       case Te_Bound:
4821          break;
4822       case Te_TyBase:
4823          break;
4824       case Te_TyPtr:
4825       case Te_TyRef:
4826       case Te_TyPtrMbr:
4827       case Te_TyRvalRef:
4828          te->Te.TyPorR.typeR
4829             = chase_cuOff( &b, ents, ents_cache, te->Te.TyPorR.typeR );
4830          if (b) changed = True;
4831          break;
4832       case Te_TyTyDef:
4833          te->Te.TyTyDef.typeR
4834             = chase_cuOff( &b, ents, ents_cache, te->Te.TyTyDef.typeR );
4835          if (b) changed = True;
4836          break;
4837       case Te_TyStOrUn:
4838          chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyStOrUn.fieldRs );
4839          if (b) changed = True;
4840          break;
4841       case Te_TyEnum:
4842          chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyEnum.atomRs );
4843          if (b) changed = True;
4844          break;
4845       case Te_TyArray:
4846          te->Te.TyArray.typeR
4847             = chase_cuOff( &b, ents, ents_cache, te->Te.TyArray.typeR );
4848          if (b) changed = True;
4849          chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyArray.boundRs );
4850          if (b) changed = True;
4851          break;
4852       case Te_TyFn:
4853          break;
4854       case Te_TyQual:
4855          te->Te.TyQual.typeR
4856             = chase_cuOff( &b, ents, ents_cache, te->Te.TyQual.typeR );
4857          if (b) changed = True;
4858          break;
4859       case Te_TyVoid:
4860          break;
4861       default:
4862          ML_(pp_TyEnt)(te);
4863          vg_assert(0);
4864    }
4865    return changed;
4866 }
4867
4868 /* Make a pass over 'ents'.  For each tyent, inspect the target of any
4869    'R' or 'Rs' fields (those which refer to other tyents), and replace
4870    any which point to INDIR nodes with the target of the indirection
4871    (which should not itself be an indirection).  In summary, this
4872    routine shorts out all references to indirection nodes. */
4873 static
4874 Word dedup_types_substitution_pass ( /*MOD*/XArray* /* of TyEnt */ ents,
4875                                      TyEntIndexCache* ents_cache )
4876 {
4877    Word i, n, nChanged = 0;
4878    Bool b;
4879    n = VG_(sizeXA)( ents );
4880    for (i = 0; i < n; i++) {
4881       TyEnt* ent = VG_(indexXA)( ents, i );
4882       vg_assert(ent->tag != Te_EMPTY);
4883       /* We have to substitute everything, even indirections, so as to
4884          ensure that chains of indirections don't build up. */
4885       b = TyEnt__subst_R_fields( ents, ents_cache, ent );
4886       if (b)
4887          nChanged++;
4888    }
4889
4890    return nChanged;
4891 }
4892
4893
4894 /* Make a pass over 'ents', building a dictionary of TyEnts as we go.
4895    Look up each new tyent in the dictionary in turn.  If it is already
4896    in the dictionary, replace this tyent with an indirection to the
4897    existing one, and delete any malloc'd stuff hanging off this one.
4898    In summary, this routine commons up all tyents that are identical
4899    as defined by TyEnt__cmp_by_all_except_cuOff. */
4900 static
4901 Word dedup_types_commoning_pass ( /*MOD*/XArray* /* of TyEnt */ ents )
4902 {
4903    Word    n, i, nDeleted;
4904    WordFM* dict; /* TyEnt* -> void */
4905    TyEnt*  ent;
4906    UWord   keyW, valW;
4907
4908    dict = VG_(newFM)(
4909              ML_(dinfo_zalloc), "di.readdwarf3.dtcp.1",
4910              ML_(dinfo_free),
4911              (Word(*)(UWord,UWord)) ML_(TyEnt__cmp_by_all_except_cuOff)
4912           );
4913
4914    nDeleted = 0;
4915    n = VG_(sizeXA)( ents );
4916    for (i = 0; i < n; i++) {
4917       ent = VG_(indexXA)( ents, i );
4918       vg_assert(ent->tag != Te_EMPTY);
4919
4920       /* Ignore indirections, although check that they are
4921          not forming a cycle. */
4922       if (ent->tag == Te_INDIR) {
4923          vg_assert(ent->Te.INDIR.indR < ent->cuOff);
4924          continue;
4925       }
4926
4927       keyW = valW = 0;
4928       if (VG_(lookupFM)( dict, &keyW, &valW, (UWord)ent )) {
4929          /* it's already in the dictionary. */
4930          TyEnt* old = (TyEnt*)keyW;
4931          vg_assert(valW == 0);
4932          vg_assert(old != ent);
4933          vg_assert(old->tag != Te_INDIR);
4934          /* since we are traversing the array in increasing order of
4935             cuOff: */
4936          vg_assert(old->cuOff < ent->cuOff);
4937          /* So anyway, dump this entry and replace it with an
4938             indirection to the one in the dictionary.  Note that the
4939             assertion above guarantees that we cannot create cycles of
4940             indirections, since we are always creating an indirection
4941             to a tyent with a cuOff lower than this one. */
4942          ML_(TyEnt__make_EMPTY)( ent );
4943          ent->tag = Te_INDIR;
4944          ent->Te.INDIR.indR = old->cuOff;
4945          nDeleted++;
4946       } else {
4947          /* not in dictionary; add it and keep going. */
4948          VG_(addToFM)( dict, (UWord)ent, 0 );
4949       }
4950    }
4951
4952    VG_(deleteFM)( dict, NULL, NULL );
4953
4954    return nDeleted;
4955 }
4956
4957
4958 static
4959 void dedup_types ( Bool td3,
4960                    /*MOD*/XArray* /* of TyEnt */ ents,
4961                    TyEntIndexCache* ents_cache )
4962 {
4963    Word m, n, i, nDel, nSubst, nThresh;
4964    if (0) td3 = True;
4965
4966    n = VG_(sizeXA)( ents );
4967
4968    /* If a commoning pass and a substitution pass both make fewer than
4969       this many changes, just stop.  It's pointless to burn up CPU
4970       time trying to compress the last 1% or so out of the array. */
4971    nThresh = n / 200;
4972
4973    /* First we must sort .ents by its .cuOff fields, so we
4974       can index into it. */
4975    VG_(setCmpFnXA)( ents, (XACmpFn_t) ML_(TyEnt__cmp_by_cuOff_only) );
4976    VG_(sortXA)( ents );
4977
4978    /* Now repeatedly do commoning and substitution passes over
4979       the array, until there are no more changes. */
4980    do {
4981       nDel   = dedup_types_commoning_pass ( ents );
4982       nSubst = dedup_types_substitution_pass ( ents, ents_cache );
4983       vg_assert(nDel >= 0 && nSubst >= 0);
4984       TRACE_D3("   %ld deletions, %ld substitutions\n", nDel, nSubst);
4985    } while (nDel > nThresh || nSubst > nThresh);
4986
4987    /* Sanity check: all INDIR nodes should point at a non-INDIR thing.
4988       In fact this should be true at the end of every loop iteration
4989       above (a commoning pass followed by a substitution pass), but
4990       checking it on every iteration is excessively expensive.  Note,
4991       this loop also computes 'm' for the stats printing below it. */
4992    m = 0;
4993    n = VG_(sizeXA)( ents );
4994    for (i = 0; i < n; i++) {
4995       TyEnt *ent, *ind;
4996       ent = VG_(indexXA)( ents, i );
4997       if (ent->tag != Te_INDIR) continue;
4998       m++;
4999       ind = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
5000                                          ent->Te.INDIR.indR );
5001       vg_assert(ind);
5002       vg_assert(ind->tag != Te_INDIR);
5003    }
5004
5005    TRACE_D3("Overall: %ld before, %ld after\n", n, n-m);
5006 }
5007
5008
5009 /*------------------------------------------------------------*/
5010 /*---                                                      ---*/
5011 /*--- Resolution of references to type DIEs                ---*/
5012 /*---                                                      ---*/
5013 /*------------------------------------------------------------*/
5014
5015 /* Make a pass through the (temporary) variables array.  Examine the
5016    type of each variable, check is it found, and chase any Te_INDIRs.
5017    Postcondition is: each variable has a typeR field that refers to a
5018    valid type in tyents, or a Te_UNKNOWN, and is certainly guaranteed
5019    not to refer to a Te_INDIR.  (This is so that we can throw all the
5020    Te_INDIRs away later). */
5021
5022 __attribute__((noinline))
5023 static void resolve_variable_types (
5024                void (*barf)( const HChar* ) __attribute__((noreturn)),
5025                /*R-O*/XArray* /* of TyEnt */ ents,
5026                /*MOD*/TyEntIndexCache* ents_cache,
5027                /*MOD*/XArray* /* of TempVar* */ vars
5028             )
5029 {
5030    Word i, n;
5031    n = VG_(sizeXA)( vars );
5032    for (i = 0; i < n; i++) {
5033       TempVar* var = *(TempVar**)VG_(indexXA)( vars, i );
5034       /* This is the stated type of the variable.  But it might be
5035          an indirection, so be careful. */
5036       TyEnt* ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
5037                                                 var->typeR );
5038       if (ent && ent->tag == Te_INDIR) {
5039          ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
5040                                             ent->Te.INDIR.indR );
5041          vg_assert(ent);
5042          vg_assert(ent->tag != Te_INDIR);
5043       }
5044
5045       /* Deal first with "normal" cases */
5046       if (ent && ML_(TyEnt__is_type)(ent)) {
5047          var->typeR = ent->cuOff;
5048          continue;
5049       }
5050
5051       /* If there's no ent, it probably we did not manage to read a
5052          type at the cuOffset which is stated as being this variable's
5053          type.  Maybe a deficiency in parse_type_DIE.  Complain. */
5054       if (ent == NULL) {
5055          VG_(printf)("\n: Invalid cuOff = 0x%05lx\n", var->typeR );
5056          barf("resolve_variable_types: "
5057               "cuOff does not refer to a known type");
5058       }
5059       vg_assert(ent);
5060       /* If ent has any other tag, something bad happened, along the
5061          lines of var->typeR not referring to a type at all. */
5062       vg_assert(ent->tag == Te_UNKNOWN);
5063       /* Just accept it; the type will be useless, but at least keep
5064          going. */
5065       var->typeR = ent->cuOff;
5066    }
5067 }
5068
5069
5070 /*------------------------------------------------------------*/
5071 /*---                                                      ---*/
5072 /*--- Parsing of Compilation Units                         ---*/
5073 /*---                                                      ---*/
5074 /*------------------------------------------------------------*/
5075
5076 static Int cmp_TempVar_by_dioff ( const void* v1, const void* v2 ) {
5077    const TempVar* t1 = *(const TempVar *const *)v1;
5078    const TempVar* t2 = *(const TempVar *const *)v2;
5079    if (t1->dioff < t2->dioff) return -1;
5080    if (t1->dioff > t2->dioff) return 1;
5081    return 0;
5082 }
5083
5084 static void read_DIE (
5085    /*MOD*/WordFM* /* of (XArray* of AddrRange, void) */ rangestree,
5086    /*MOD*/XArray* /* of TyEnt */ tyents,
5087    /*MOD*/XArray* /* of TempVar* */ tempvars,
5088    /*MOD*/XArray* /* of GExpr* */ gexprs,
5089    /*MOD*/D3TypeParser* typarser,
5090    /*MOD*/D3VarParser* varparser,
5091    /*MOD*/D3InlParser* inlparser,
5092    XArray** fndn_ix_Table,
5093    ULong *debug_line_offset,
5094    Cursor* c, Bool td3, CUConst* cc, Int level
5095 )
5096 {
5097    const g_abbv *abbv;
5098    ULong  atag, abbv_code;
5099    UWord  posn;
5100    UInt   has_children;
5101    UWord  start_die_c_offset;
5102    UWord  after_die_c_offset;
5103    // If the DIE we will parse has a sibling and the parser(s) are
5104    // all indicating that parse_children is not necessary, then
5105    // we will skip the children by jumping to the sibling of this DIE
5106    // (if it has a sibling).
5107    UWord  sibling = 0;
5108    Bool   parse_children = False;
5109
5110    /* --- Deal with this DIE --- */
5111    posn      = cook_die( cc, get_position_of_Cursor( c ) );
5112    abbv_code = get_ULEB128( c );
5113    abbv = get_abbv(cc, abbv_code, td3);
5114    atag      = abbv->atag;
5115
5116    if (TD3) {
5117       TRACE_D3("\n");
5118       trace_DIE ((DW_TAG)atag, posn, level,
5119                  get_position_of_Cursor( c ), abbv, cc);
5120    }
5121
5122    if (atag == 0)
5123       cc->barf("read_DIE: invalid zero tag on DIE");
5124
5125    has_children = abbv->has_children;
5126    if (has_children != DW_children_no && has_children != DW_children_yes)
5127       cc->barf("read_DIE: invalid has_children value");
5128
5129    /* We're set up to look at the fields of this DIE.  Hand it off to
5130       any parser(s) that want to see it.  Since they will in general
5131       advance the DIE cursor, remember the current settings so that we
5132       can then back up. . */
5133    start_die_c_offset  = get_position_of_Cursor( c );
5134    after_die_c_offset  = 0; // set to c position if a parser has read the DIE.
5135
5136    if (VG_(clo_read_var_info)) {
5137       parse_type_DIE( tyents,
5138                       typarser,
5139                       (DW_TAG)atag,
5140                       posn,
5141                       level,
5142                       c,     /* DIE cursor */
5143                       abbv,  /* abbrev */
5144                       cc,
5145                       td3 );
5146       if (get_position_of_Cursor( c ) != start_die_c_offset) {
5147          after_die_c_offset = get_position_of_Cursor( c );
5148          set_position_of_Cursor( c, start_die_c_offset );
5149       }
5150
5151       parse_var_DIE( rangestree,
5152                      tempvars,
5153                      gexprs,
5154                      varparser,
5155                      fndn_ix_Table,
5156                      debug_line_offset,
5157                      (DW_TAG)atag,
5158                      posn,
5159                      level,
5160                      c,     /* DIE cursor */
5161                      abbv,  /* abbrev */
5162                      cc,
5163                      td3 );
5164       if (get_position_of_Cursor( c ) != start_die_c_offset) {
5165          after_die_c_offset = get_position_of_Cursor( c );
5166          set_position_of_Cursor( c, start_die_c_offset );
5167       }
5168
5169       parse_children = True;
5170       // type and var parsers do not have logic to skip childrens and establish
5171       // the value of sibling.
5172    }
5173
5174    if (VG_(clo_read_inline_info)) {
5175       inlparser->sibling = 0;
5176       parse_children =
5177          parse_inl_DIE( inlparser,
5178                         fndn_ix_Table,
5179                         debug_line_offset,
5180                         (DW_TAG)atag,
5181                         posn,
5182                         level,
5183                         c,     /* DIE cursor */
5184                         abbv, /* abbrev */
5185                         cc,
5186                         td3 )
5187          || parse_children;
5188       if (get_position_of_Cursor( c ) != start_die_c_offset) {
5189          after_die_c_offset = get_position_of_Cursor( c );
5190          // Last parser, no need to reset the cursor to start_die_c_offset.
5191       }
5192       if (sibling == 0)
5193          sibling = inlparser->sibling;
5194       vg_assert (inlparser->sibling == 0 || inlparser->sibling == sibling);
5195    }
5196
5197    /* Top level CU DIE, but we don't want to read anything else, just skip
5198       to the end and return.  */
5199    if (level == 0 && !parse_children) {
5200       UWord cu_size_including_IniLen = (cc->unit_length
5201                                         + (cc->is_dw64 ? 12 : 4));
5202       set_position_of_Cursor( c, (cc->cu_start_offset
5203                                   + cu_size_including_IniLen));
5204       return;
5205    }
5206
5207    if (after_die_c_offset > 0) {
5208       // DIE was read by a parser above, so we know where the DIE ends.
5209       set_position_of_Cursor( c, after_die_c_offset );
5210    } else {
5211       /* No parser has parsed this DIE. So, we need to skip the DIE,
5212          in order to read the next DIE.
5213          At the same time, establish sibling value if the DIE has one. */
5214       TRACE_D3("    uninteresting DIE -> skipping ...\n");
5215       skip_DIE (&sibling, c, abbv, cc);
5216    }
5217
5218    /* --- Now recurse into its children, if any
5219       and the parsing of the children is requested by a parser --- */
5220    if (has_children == DW_children_yes) {
5221       if (parse_children || sibling == 0) {
5222          if (0) TRACE_D3("BEGIN children of level %d\n", level);
5223          while (True) {
5224             atag = peek_ULEB128( c );
5225             if (atag == 0) break;
5226             if (parse_children) {
5227                read_DIE( rangestree, tyents, tempvars, gexprs,
5228                          typarser, varparser, inlparser,
5229                          fndn_ix_Table, debug_line_offset,
5230                          c, td3, cc, level+1 );
5231             } else {
5232                Int skip_level = level + 1;
5233                while (True) {
5234                   atag = peek_ULEB128( c );
5235                   if (atag == 0) {
5236                      skip_level--;
5237                      if (skip_level == level) break;
5238                      /* Eat the terminating zero and continue skipping the
5239                         children one level up.  */
5240                      atag = get_ULEB128( c );
5241                      vg_assert(atag == 0);
5242                      continue;
5243                   }
5244
5245                   abbv_code = get_ULEB128( c );
5246                   abbv = get_abbv(cc, abbv_code, td3);
5247                   sibling = 0;
5248                   skip_DIE (&sibling, c, abbv, cc);
5249                   if (abbv->has_children) {
5250                      if (sibling == 0)
5251                         skip_level++;
5252                      else
5253                         set_position_of_Cursor( c, sibling );
5254                   }
5255                }
5256             }
5257          }
5258          /* Now we need to eat the terminating zero */
5259          atag = get_ULEB128( c );
5260          vg_assert(atag == 0);
5261          if (0) TRACE_D3("END children of level %d\n", level);
5262       } else {
5263          // We can skip the childrens, by jumping to the sibling
5264          TRACE_D3("    SKIPPING DIE's children,"
5265                   "jumping to sibling <%d><%lx>\n",
5266                   level, sibling);
5267          set_position_of_Cursor( c, sibling );
5268       }
5269    }
5270
5271 }
5272
5273 static void trace_debug_loc (const DebugInfo* di,
5274                              __attribute__((noreturn)) void (*barf)( const HChar* ),
5275                              DiSlice escn_debug_loc)
5276 {
5277 #if 0
5278    /* This doesn't work properly because it assumes all entries are
5279       packed end to end, with no holes.  But that doesn't always
5280       appear to be the case, so it loses sync.  And the D3 spec
5281       doesn't appear to require a no-hole situation either. */
5282    /* Display .debug_loc */
5283    Addr  dl_base;
5284    UWord dl_offset;
5285    Cursor loc; /* for showing .debug_loc */
5286    Bool td3 = di->trace_symtab;
5287
5288    TRACE_SYMTAB("\n");
5289    TRACE_SYMTAB("\n------ The contents of .debug_loc ------\n");
5290    TRACE_SYMTAB("    Offset   Begin    End      Expression\n");
5291    if (ML_(sli_is_valid)(escn_debug_loc)) {
5292       init_Cursor( &loc, escn_debug_loc, 0, barf,
5293                    "Overrun whilst reading .debug_loc section(1)" );
5294       dl_base = 0;
5295       dl_offset = 0;
5296       while (True) {
5297          UWord  w1, w2;
5298          UWord  len;
5299          if (is_at_end_Cursor( &loc ))
5300             break;
5301
5302          /* Read a (host-)word pair.  This is something of a hack since
5303             the word size to read is really dictated by the ELF file;
5304             however, we assume we're reading a file with the same
5305             word-sizeness as the host.  Reasonably enough. */
5306          w1 = get_UWord( &loc );
5307          w2 = get_UWord( &loc );
5308
5309          if (w1 == 0 && w2 == 0) {
5310             /* end of list.  reset 'base' */
5311             TRACE_D3("    %08lx <End of list>\n", dl_offset);
5312             dl_base = 0;
5313             dl_offset = get_position_of_Cursor( &loc );
5314             continue;
5315          }
5316
5317          if (w1 == -1UL) {
5318             /* new value for 'base' */
5319             TRACE_D3("    %08lx %16lx %08lx (base address)\n",
5320                      dl_offset, w1, w2);
5321             dl_base = w2;
5322             continue;
5323          }
5324
5325          /* else a location expression follows */
5326          TRACE_D3("    %08lx %08lx %08lx ",
5327                   dl_offset, w1 + dl_base, w2 + dl_base);
5328          len = (UWord)get_UShort( &loc );
5329          while (len > 0) {
5330             UChar byte = get_UChar( &loc );
5331             TRACE_D3("%02x", (UInt)byte);
5332             len--;
5333          }
5334          TRACE_SYMTAB("\n");
5335       }
5336    }
5337 #endif
5338 }
5339
5340 static void trace_debug_ranges (const DebugInfo* di,
5341                                 __attribute__((noreturn)) void (*barf)( const HChar* ),
5342                                 DiSlice escn_debug_ranges)
5343 {
5344    Cursor ranges; /* for showing .debug_ranges */
5345    Addr  dr_base;
5346    UWord dr_offset;
5347    Bool td3 = di->trace_symtab;
5348
5349    /* Display .debug_ranges */
5350    TRACE_SYMTAB("\n");
5351    TRACE_SYMTAB("\n------ The contents of .debug_ranges ------\n");
5352    TRACE_SYMTAB("    Offset   Begin    End\n");
5353    if (ML_(sli_is_valid)(escn_debug_ranges)) {
5354       init_Cursor( &ranges, escn_debug_ranges, 0, barf,
5355                    "Overrun whilst reading .debug_ranges section(1)" );
5356       dr_base = 0;
5357       dr_offset = 0;
5358       while (True) {
5359          UWord  w1, w2;
5360
5361          if (is_at_end_Cursor( &ranges ))
5362             break;
5363
5364          /* Read a (host-)word pair.  This is something of a hack since
5365             the word size to read is really dictated by the ELF file;
5366             however, we assume we're reading a file with the same
5367             word-sizeness as the host.  Reasonably enough. */
5368          w1 = get_UWord( &ranges );
5369          w2 = get_UWord( &ranges );
5370
5371          if (w1 == 0 && w2 == 0) {
5372             /* end of list.  reset 'base' */
5373             TRACE_D3("    %08lx <End of list>\n", dr_offset);
5374             dr_base = 0;
5375             dr_offset = get_position_of_Cursor( &ranges );
5376             continue;
5377          }
5378
5379          if (w1 == -1UL) {
5380             /* new value for 'base' */
5381             TRACE_D3("    %08lx %16lx %08lx (base address)\n",
5382                      dr_offset, w1, w2);
5383             dr_base = w2;
5384             continue;
5385          }
5386
5387          /* else a range [w1+base, w2+base) is denoted */
5388          TRACE_D3("    %08lx %08lx %08lx\n",
5389                   dr_offset, w1 + dr_base, w2 + dr_base);
5390       }
5391    }
5392 }
5393
5394 static void trace_debug_abbrev (const DebugInfo* di,
5395                                 __attribute__((noreturn)) void (*barf)( const HChar* ),
5396                                 DiSlice escn_debug_abbv)
5397 {
5398    Cursor abbv; /* for showing .debug_abbrev */
5399    Bool td3 = di->trace_symtab;
5400
5401    /* Display .debug_abbrev */
5402    TRACE_SYMTAB("\n");
5403    TRACE_SYMTAB("\n------ The contents of .debug_abbrev ------\n");
5404    if (ML_(sli_is_valid)(escn_debug_abbv)) {
5405       init_Cursor( &abbv, escn_debug_abbv, 0, barf,
5406                    "Overrun whilst reading .debug_abbrev section" );
5407       while (True) {
5408          if (is_at_end_Cursor( &abbv ))
5409             break;
5410          /* Read one abbreviation table */
5411          TRACE_D3("  Number TAG\n");
5412          while (True) {
5413             ULong atag;
5414             UInt  has_children;
5415             ULong acode = get_ULEB128( &abbv );
5416             if (acode == 0) break; /* end of the table */
5417             atag = get_ULEB128( &abbv );
5418             has_children = get_UChar( &abbv );
5419             TRACE_D3("   %llu      %s    [%s]\n",
5420                      acode, ML_(pp_DW_TAG)(atag),
5421                             ML_(pp_DW_children)(has_children));
5422             while (True) {
5423                ULong at_name = get_ULEB128( &abbv );
5424                ULong at_form = get_ULEB128( &abbv );
5425                if (at_form == DW_FORM_implicit_const) {
5426                   /* Long at_val = */ get_SLEB128 ( &abbv );
5427                }
5428                if (at_name == 0 && at_form == 0) break;
5429                TRACE_D3("    %-18s %s\n",
5430                         ML_(pp_DW_AT)(at_name), ML_(pp_DW_FORM)(at_form));
5431             }
5432          }
5433       }
5434    }
5435 }
5436
5437 static
5438 void new_dwarf3_reader_wrk (
5439    DebugInfo* di,
5440    __attribute__((noreturn)) void (*barf)( const HChar* ),
5441    DiSlice escn_debug_info,      DiSlice escn_debug_types,
5442    DiSlice escn_debug_abbv,      DiSlice escn_debug_line,
5443    DiSlice escn_debug_str,       DiSlice escn_debug_ranges,
5444    DiSlice escn_debug_rnglists,  DiSlice escn_debug_loclists,
5445    DiSlice escn_debug_loc,       DiSlice escn_debug_info_alt,
5446    DiSlice escn_debug_abbv_alt,  DiSlice escn_debug_line_alt,
5447    DiSlice escn_debug_str_alt,   DiSlice escn_debug_line_str,
5448    DiSlice escn_debug_addr,      DiSlice escn_debug_str_offsets
5449 )
5450 {
5451    XArray* /* of TyEnt */     tyents = NULL;
5452    XArray* /* of TyEnt */     tyents_to_keep = NULL;
5453    XArray* /* of GExpr* */    gexprs = NULL;
5454    XArray* /* of TempVar* */  tempvars = NULL;
5455    WordFM* /* of (XArray* of AddrRange, void) */ rangestree = NULL;
5456    TyEntIndexCache* tyents_cache = NULL;
5457    TyEntIndexCache* tyents_to_keep_cache = NULL;
5458    TempVar *varp, *varp2;
5459    GExpr* gexpr;
5460    Cursor info; /* primary cursor for parsing .debug_info */
5461    D3TypeParser typarser;
5462    D3VarParser varparser;
5463    D3InlParser inlparser;
5464    XArray* /* of UInt */ fndn_ix_Table = NULL;
5465    ULong debug_line_offset = (ULong) -1;
5466    Word  i, j, n;
5467    Bool td3 = di->trace_symtab;
5468    XArray* /* of TempVar* */ dioff_lookup_tab;
5469    Int pass;
5470    VgHashTable *signature_types = NULL;
5471
5472    /* Display/trace various information, if requested. */
5473    if (TD3) {
5474       trace_debug_loc    (di, barf, escn_debug_loc);
5475       trace_debug_ranges (di, barf, escn_debug_ranges);
5476       trace_debug_abbrev (di, barf, escn_debug_abbv);
5477       TRACE_SYMTAB("\n");
5478    }
5479
5480    /* Zero out all parsers. Parsers will really be initialised
5481       according to VG_(clo_read_*_info). */
5482    VG_(memset)( &inlparser, 0, sizeof(inlparser) );
5483
5484    if (VG_(clo_read_var_info)) {
5485       /* We'll park the harvested type information in here.  Also create
5486          a fake "void" entry with offset D3_FAKEVOID_CUOFF, so we always
5487          have at least one type entry to refer to.  D3_FAKEVOID_CUOFF is
5488          huge and presumably will not occur in any valid DWARF3 file --
5489          it would need to have a .debug_info section 4GB long for that to
5490          happen.  These type entries end up in the DebugInfo. */
5491       tyents = VG_(newXA)( ML_(dinfo_zalloc),
5492                            "di.readdwarf3.ndrw.1 (TyEnt temp array)",
5493                            ML_(dinfo_free), sizeof(TyEnt) );
5494       { TyEnt tyent;
5495         VG_(memset)(&tyent, 0, sizeof(tyent));
5496         tyent.tag   = Te_TyVoid;
5497         tyent.cuOff = D3_FAKEVOID_CUOFF;
5498         tyent.Te.TyVoid.isFake = True;
5499         VG_(addToXA)( tyents, &tyent );
5500       }
5501       { TyEnt tyent;
5502         VG_(memset)(&tyent, 0, sizeof(tyent));
5503         tyent.tag   = Te_UNKNOWN;
5504         tyent.cuOff = D3_INVALID_CUOFF;
5505         VG_(addToXA)( tyents, &tyent );
5506       }
5507
5508       /* This is a tree used to unique-ify the range lists that are
5509          manufactured by parse_var_DIE.  References to the keys in the
5510          tree wind up in .rngMany fields in TempVars.  We'll need to
5511          delete this tree, and the XArrays attached to it, at the end of
5512          this function. */
5513       rangestree = VG_(newFM)( ML_(dinfo_zalloc),
5514                                "di.readdwarf3.ndrw.2 (rangestree)",
5515                                ML_(dinfo_free),
5516                                (Word(*)(UWord,UWord))cmp__XArrays_of_AddrRange );
5517
5518       /* List of variables we're accumulating.  These don't end up in the
5519          DebugInfo; instead their contents are handed to ML_(addVar) and
5520          the list elements are then deleted. */
5521       tempvars = VG_(newXA)( ML_(dinfo_zalloc),
5522                              "di.readdwarf3.ndrw.3 (TempVar*s array)",
5523                              ML_(dinfo_free),
5524                              sizeof(TempVar*) );
5525
5526       /* List of GExprs we're accumulating.  These wind up in the
5527          DebugInfo. */
5528       gexprs = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.4",
5529                            ML_(dinfo_free), sizeof(GExpr*) );
5530
5531       /* We need a D3TypeParser to keep track of partially constructed
5532          types.  It'll be discarded as soon as we've completed the CU,
5533          since the resulting information is tipped in to 'tyents' as it
5534          is generated. */
5535       type_parser_init(&typarser);
5536
5537       var_parser_init(&varparser);
5538
5539       signature_types = VG_(HT_construct) ("signature_types");
5540    }
5541
5542    /* Do an initial pass to scan the .debug_types section, if any, and
5543       fill in the signatured types hash table.  This lets us handle
5544       mapping from a type signature to a (cooked) DIE offset directly
5545       in get_Form_contents.  */
5546    if (VG_(clo_read_var_info) && ML_(sli_is_valid)(escn_debug_types)) {
5547       init_Cursor( &info, escn_debug_types, 0, barf,
5548                    "Overrun whilst reading .debug_types section" );
5549       TRACE_D3("\n------ Collecting signatures from "
5550                ".debug_types section ------\n");
5551
5552       abbv_state last_abbv;
5553       last_abbv.debug_abbrev_offset = (ULong) -1;
5554       last_abbv.ht_abbvs = NULL;
5555       while (True) {
5556          UWord   cu_start_offset, cu_offset_now;
5557          CUConst cc;
5558
5559          cu_start_offset = get_position_of_Cursor( &info );
5560          TRACE_D3("\n");
5561          TRACE_D3("  Compilation Unit @ offset 0x%lx:\n", cu_start_offset);
5562          /* parse_CU_header initialises the CU's abbv hash table.  */
5563          parse_CU_Header( &cc, td3, &info, escn_debug_abbv,
5564                           last_abbv, True, False );
5565
5566          /* Needed by cook_die.  */
5567          cc.types_cuOff_bias = escn_debug_info.szB;
5568
5569          record_signatured_type( signature_types, cc.type_signature,
5570                                  cook_die( &cc, cc.type_offset ));
5571
5572          /* Until proven otherwise we assume we don't need the icc9
5573             workaround in this case; see the DIE-reading loop below
5574             for details.  */
5575          cu_offset_now = (cu_start_offset + cc.unit_length
5576                           + (cc.is_dw64 ? 12 : 4));
5577
5578          last_abbv = cc.abbv;
5579
5580          if (cu_offset_now >= escn_debug_types.szB) {
5581             break;
5582          }
5583
5584          set_position_of_Cursor ( &info, cu_offset_now );
5585       }
5586       if (last_abbv.ht_abbvs != NULL)
5587          VG_(HT_destruct) (last_abbv.ht_abbvs, ML_(dinfo_free));
5588    }
5589
5590    /* Perform three DIE-reading passes.  The first pass reads DIEs from
5591       alternate .debug_info (if any), the second pass reads DIEs from
5592       .debug_info, and the third pass reads DIEs from .debug_types.
5593       Moving the body of this loop into a separate function would
5594       require a large number of arguments to be passed in, so it is
5595       kept inline instead.  */
5596    for (pass = 0; pass < 3; ++pass) {
5597       ULong section_size;
5598
5599       if (pass == 0) {
5600          if (!ML_(sli_is_valid)(escn_debug_info_alt))
5601             continue;
5602          /* Now loop over the Compilation Units listed in the alternate
5603             .debug_info section (see D3SPEC sec 7.5) paras 1 and 2.
5604             Each compilation unit contains a Compilation Unit Header
5605             followed by precisely one DW_TAG_compile_unit or
5606             DW_TAG_partial_unit DIE. */
5607          init_Cursor( &info, escn_debug_info_alt, 0, barf,
5608                       "Overrun whilst reading alternate .debug_info section" );
5609          section_size = escn_debug_info_alt.szB;
5610
5611          /* Keep track of the last line table we have seen,
5612             it might turn up again.  */
5613          reset_fndn_ix_table(&fndn_ix_Table, &debug_line_offset, (ULong) -1);
5614
5615          TRACE_D3("\n------ Parsing alternate .debug_info section ------\n");
5616       } else if (pass == 1) {
5617          /* Now loop over the Compilation Units listed in the .debug_info
5618             section (see D3SPEC sec 7.5) paras 1 and 2.  Each compilation
5619             unit contains a Compilation Unit Header followed by precisely
5620             one DW_TAG_compile_unit or DW_TAG_partial_unit DIE. */
5621          init_Cursor( &info, escn_debug_info, 0, barf,
5622                       "Overrun whilst reading .debug_info section" );
5623          section_size = escn_debug_info.szB;
5624
5625          /* Keep track of the last line table we have seen,
5626             it might turn up again.  */
5627          reset_fndn_ix_table(&fndn_ix_Table, &debug_line_offset, (ULong) -1);
5628
5629          TRACE_D3("\n------ Parsing .debug_info section ------\n");
5630       } else {
5631          if (!ML_(sli_is_valid)(escn_debug_types))
5632             continue;
5633          if (!VG_(clo_read_var_info))
5634             continue; // Types not needed when only reading inline info.
5635          init_Cursor( &info, escn_debug_types, 0, barf,
5636                       "Overrun whilst reading .debug_types section" );
5637          section_size = escn_debug_types.szB;
5638
5639          /* Keep track of the last line table we have seen,
5640             it might turn up again.  */
5641          reset_fndn_ix_table(&fndn_ix_Table, &debug_line_offset, (ULong) -1);
5642
5643          TRACE_D3("\n------ Parsing .debug_types section ------\n");
5644       }
5645
5646       abbv_state last_abbv;
5647       last_abbv.debug_abbrev_offset = (ULong) -1;
5648       last_abbv.ht_abbvs = NULL;
5649       while (True) {
5650          ULong   cu_start_offset, cu_offset_now;
5651          CUConst cc;
5652          /* It may be that the stated size of this CU is larger than the
5653             amount of stuff actually in it.  icc9 seems to generate CUs
5654             thusly.  We use these variables to figure out if this is
5655             indeed the case, and if so how many bytes we need to skip to
5656             get to the start of the next CU.  Not skipping those bytes
5657             causes us to misidentify the start of the next CU, and it all
5658             goes badly wrong after that (not surprisingly). */
5659          UWord cu_size_including_IniLen, cu_amount_used;
5660
5661          /* It seems icc9 finishes the DIE info before debug_info_sz
5662             bytes have been used up.  So be flexible, and declare the
5663             sequence complete if there is not enough remaining bytes to
5664             hold even the smallest conceivable CU header.  (11 bytes I
5665             reckon). */
5666          /* JRS 23Jan09: I suspect this is no longer necessary now that
5667             the code below contains a 'while (cu_amount_used <
5668             cu_size_including_IniLen ...'  style loop, which skips over
5669             any leftover bytes at the end of a CU in the case where the
5670             CU's stated size is larger than its actual size (as
5671             determined by reading all its DIEs).  However, for prudence,
5672             I'll leave the following test in place.  I can't see that a
5673             CU header can be smaller than 11 bytes, so I don't think
5674             there's any harm possible through the test -- it just adds
5675             robustness. */
5676          Word avail = get_remaining_length_Cursor( &info );
5677          if (avail < 11) {
5678             if (avail > 0)
5679                TRACE_D3("new_dwarf3_reader_wrk: warning: "
5680                         "%ld unused bytes after end of DIEs\n", avail);
5681             break;
5682          }
5683
5684          if (VG_(clo_read_var_info)) {
5685             /* Check the varparser's stack is in a sane state. */
5686             vg_assert(varparser.sp == -1);
5687             /* Check the typarser's stack is in a sane state. */
5688             vg_assert(typarser.sp == -1);
5689          }
5690
5691          cu_start_offset = get_position_of_Cursor( &info );
5692          TRACE_D3("\n");
5693          TRACE_D3("  Compilation Unit @ offset 0x%llx:\n", cu_start_offset);
5694          /* parse_CU_header initialises the CU's hashtable of abbvs ht_abbvs */
5695          if (pass == 0) {
5696             parse_CU_Header( &cc, td3, &info, escn_debug_abbv_alt,
5697                              last_abbv, False, True );
5698          } else {
5699             parse_CU_Header( &cc, td3, &info, escn_debug_abbv,
5700                              last_abbv, pass == 2, False );
5701          }
5702          cc.escn_debug_str      = pass == 0 ? escn_debug_str_alt
5703                                             : escn_debug_str;
5704          cc.escn_debug_ranges   = escn_debug_ranges;
5705          cc.escn_debug_rnglists = escn_debug_rnglists;
5706          cc.escn_debug_loclists = escn_debug_loclists;
5707          cc.escn_debug_loc      = escn_debug_loc;
5708          cc.escn_debug_line     = pass == 0 ? escn_debug_line_alt
5709                                             : escn_debug_line;
5710          cc.escn_debug_info     = pass == 0 ? escn_debug_info_alt
5711                                             : escn_debug_info;
5712          cc.escn_debug_types    = escn_debug_types;
5713          cc.escn_debug_info_alt = escn_debug_info_alt;
5714          cc.escn_debug_str_alt  = escn_debug_str_alt;
5715          cc.escn_debug_line_str = escn_debug_line_str;
5716          cc.escn_debug_addr     = escn_debug_addr;
5717          cc.escn_debug_str_offsets = escn_debug_str_offsets;
5718          cc.types_cuOff_bias    = escn_debug_info.szB;
5719          cc.alt_cuOff_bias      = escn_debug_info.szB + escn_debug_types.szB;
5720          cc.cu_start_offset     = cu_start_offset;
5721          cc.cu_addr_base        = 0;
5722          cc.cu_has_addr_base    = False;
5723          cc.cu_str_offsets_base = 0;
5724          cc.cu_has_str_offsets_base = False;
5725          cc.cu_rnglists_base = 0;
5726          cc.cu_has_rnglists_base = False;
5727          cc.cu_loclists_base = 0;
5728          cc.cu_has_loclists_base = False;
5729          cc.di = di;
5730          /* The CU's svma can be deduced by looking at the AT_low_pc
5731             value in the top level TAG_compile_unit, which is the topmost
5732             DIE.  We'll leave it for the 'varparser' to acquire that info
5733             and fill it in -- since it is the only party to want to know
5734             it. */
5735          cc.cu_svma_known = False;
5736          cc.cu_svma       = 0;
5737
5738          if (VG_(clo_read_var_info)) {
5739             cc.signature_types = signature_types;
5740
5741             /* Create a fake outermost-level range covering the entire
5742                address range.  So we always have *something* to catch all
5743                variable declarations. */
5744             varstack_push( &cc, &varparser, td3,
5745                            unitary_range_list(0UL, ~0UL),
5746                            -1, False/*isFunc*/, NULL/*fbGX*/ );
5747
5748          }
5749
5750          /* Now read the one-and-only top-level DIE for this CU. */
5751          vg_assert(!VG_(clo_read_var_info) || varparser.sp == 0);
5752          read_DIE( rangestree,
5753                    tyents, tempvars, gexprs,
5754                    &typarser, &varparser, &inlparser,
5755                    &fndn_ix_Table, &debug_line_offset,
5756                    &info, td3, &cc, 0 );
5757
5758          cu_offset_now = get_position_of_Cursor( &info );
5759
5760          if (0) VG_(printf)("Travelled: %llu  size %llu\n",
5761                             cu_offset_now - cc.cu_start_offset,
5762                             cc.unit_length + (cc.is_dw64 ? 12 : 4));
5763
5764          /* How big the CU claims it is .. */
5765          cu_size_including_IniLen = cc.unit_length + (cc.is_dw64 ? 12 : 4);
5766          /* .. vs how big we have found it to be */
5767          cu_amount_used = cu_offset_now - cc.cu_start_offset;
5768
5769          if (1) TRACE_D3("offset now %llu, d-i-size %llu\n",
5770                          cu_offset_now, section_size);
5771          if (cu_offset_now > section_size)
5772             barf("toplevel DIEs beyond end of CU");
5773
5774          /* If the CU is bigger than it claims to be, we've got a serious
5775             problem. */
5776          if (cu_amount_used > cu_size_including_IniLen)
5777             barf("CU's actual size appears to be larger than it claims it is");
5778
5779          /* If the CU is smaller than it claims to be, we need to skip some
5780             bytes.  Loop updates cu_offset_new and cu_amount_used. */
5781          while (cu_amount_used < cu_size_including_IniLen
5782                 && get_remaining_length_Cursor( &info ) > 0) {
5783             if (0) VG_(printf)("SKIP\n");
5784             (void)get_UChar( &info );
5785             cu_offset_now = get_position_of_Cursor( &info );
5786             cu_amount_used = cu_offset_now - cc.cu_start_offset;
5787          }
5788
5789          if (VG_(clo_read_var_info)) {
5790             /* Preen to level -2.  DIEs have level >= 0 so -2 cannot occur
5791                anywhere else at all.  Our fake the-entire-address-space
5792                range is at level -1, so preening to -2 should completely
5793                empty the stack out. */
5794             TRACE_D3("\n");
5795             varstack_preen( &varparser, td3, -2 );
5796             /* Similarly, empty the type stack out. */
5797             typestack_preen( &typarser, td3, -2 );
5798          }
5799
5800          last_abbv = cc.abbv;
5801
5802          if (cu_offset_now == section_size)
5803             break;
5804          /* else keep going */
5805       }
5806       if (last_abbv.ht_abbvs != NULL)
5807          VG_(HT_destruct) (last_abbv.ht_abbvs, ML_(dinfo_free));
5808    }
5809
5810    if (fndn_ix_Table != NULL)
5811       VG_(deleteXA)(fndn_ix_Table);
5812
5813    if (VG_(clo_read_var_info)) {
5814       /* From here on we're post-processing the stuff we got
5815          out of the .debug_info section. */
5816       if (TD3) {
5817          TRACE_D3("\n");
5818          ML_(pp_TyEnts)(tyents, "Initial type entity (TyEnt) array");
5819          TRACE_D3("\n");
5820          TRACE_D3("------ Compressing type entries ------\n");
5821       }
5822
5823       tyents_cache = ML_(dinfo_zalloc)( "di.readdwarf3.ndrw.6",
5824                                         sizeof(TyEntIndexCache) );
5825       ML_(TyEntIndexCache__invalidate)( tyents_cache );
5826       dedup_types( td3, tyents, tyents_cache );
5827       if (TD3) {
5828          TRACE_D3("\n");
5829          ML_(pp_TyEnts)(tyents, "After type entity (TyEnt) compression");
5830       }
5831
5832       TRACE_D3("\n");
5833       TRACE_D3("------ Resolving the types of variables ------\n" );
5834       resolve_variable_types( barf, tyents, tyents_cache, tempvars );
5835
5836       /* Copy all the non-INDIR tyents into a new table.  For large
5837          .so's, about 90% of the tyents will by now have been resolved to
5838          INDIRs, and we no longer need them, and so don't need to store
5839          them. */
5840       tyents_to_keep
5841          = VG_(newXA)( ML_(dinfo_zalloc),
5842                        "di.readdwarf3.ndrw.7 (TyEnt to-keep array)",
5843                        ML_(dinfo_free), sizeof(TyEnt) );
5844       n = VG_(sizeXA)( tyents );
5845       for (i = 0; i < n; i++) {
5846          TyEnt* ent = VG_(indexXA)( tyents, i );
5847          if (ent->tag != Te_INDIR)
5848             VG_(addToXA)( tyents_to_keep, ent );
5849       }
5850
5851       VG_(deleteXA)( tyents );
5852       tyents = NULL;
5853       ML_(dinfo_free)( tyents_cache );
5854       tyents_cache = NULL;
5855
5856       /* Sort tyents_to_keep so we can lookup in it.  A complete (if
5857          minor) waste of time, since tyents itself is sorted, but
5858          necessary since VG_(lookupXA) refuses to cooperate if we
5859          don't. */
5860       VG_(setCmpFnXA)( tyents_to_keep, (XACmpFn_t) ML_(TyEnt__cmp_by_cuOff_only) );
5861       VG_(sortXA)( tyents_to_keep );
5862
5863       /* Enable cacheing on tyents_to_keep */
5864       tyents_to_keep_cache
5865          = ML_(dinfo_zalloc)( "di.readdwarf3.ndrw.8",
5866                               sizeof(TyEntIndexCache) );
5867       ML_(TyEntIndexCache__invalidate)( tyents_to_keep_cache );
5868
5869       /* And record the tyents in the DebugInfo.  We do this before
5870          starting to hand variables to ML_(addVar), since if ML_(addVar)
5871          wants to do debug printing (of the types of said vars) then it
5872          will need the tyents.*/
5873       vg_assert(!di->admin_tyents);
5874       di->admin_tyents = tyents_to_keep;
5875
5876       /* Bias all the location expressions. */
5877       TRACE_D3("\n");
5878       TRACE_D3("------ Biasing the location expressions ------\n" );
5879
5880       n = VG_(sizeXA)( gexprs );
5881       for (i = 0; i < n; i++) {
5882          gexpr = *(GExpr**)VG_(indexXA)( gexprs, i );
5883          bias_GX( gexpr, di );
5884       }
5885
5886       TRACE_D3("\n");
5887       TRACE_D3("------ Acquired the following variables: ------\n\n");
5888
5889       /* Park (pointers to) all the vars in an XArray, so we can look up
5890          abstract origins quickly.  The array is sorted (hence, looked-up
5891          by) the .dioff fields.  Since the .dioffs should be in strictly
5892          ascending order, there is no need to sort the array after
5893          construction.  The ascendingness is however asserted for. */
5894       dioff_lookup_tab
5895          = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.9",
5896                        ML_(dinfo_free),
5897                        sizeof(TempVar*) );
5898
5899       n = VG_(sizeXA)( tempvars );
5900       Word first_primary_var = 0;
5901       for (first_primary_var = 0;
5902            escn_debug_info_alt.szB/*really?*/ && first_primary_var < n;
5903            first_primary_var++) {
5904          varp = *(TempVar**)VG_(indexXA)( tempvars, first_primary_var );
5905          if (varp->dioff < escn_debug_info.szB + escn_debug_types.szB)
5906             break;
5907       }
5908       for (i = 0; i < n; i++) {
5909          varp = *(TempVar**)VG_(indexXA)( tempvars, (i + first_primary_var) % n );
5910          if (i > first_primary_var) {
5911             varp2 = *(TempVar**)VG_(indexXA)( tempvars,
5912                                               (i + first_primary_var - 1) % n );
5913             /* why should this hold?  Only, I think, because we've
5914                constructed the array by reading .debug_info sequentially,
5915                and so the array .dioff fields should reflect that, and be
5916                strictly ascending. */
5917             vg_assert(varp2->dioff < varp->dioff);
5918          }
5919          VG_(addToXA)( dioff_lookup_tab, &varp );
5920       }
5921       VG_(setCmpFnXA)( dioff_lookup_tab, cmp_TempVar_by_dioff );
5922       VG_(sortXA)( dioff_lookup_tab ); /* POINTLESS; FIXME: rm */
5923
5924       /* Now visit each var.  Collect up as much info as possible for
5925          each var and hand it to ML_(addVar). */
5926       n = VG_(sizeXA)( tempvars );
5927       for (j = 0; j < n; j++) {
5928          TyEnt* ent;
5929          varp = *(TempVar**)VG_(indexXA)( tempvars, j );
5930
5931          /* Possibly show .. */
5932          if (TD3) {
5933             VG_(printf)("<%lx> addVar: level %d: %s :: ",
5934                         varp->dioff,
5935                         varp->level,
5936                         varp->name ? varp->name : "<anon_var>" );
5937             if (varp->typeR) {
5938                ML_(pp_TyEnt_C_ishly)( tyents_to_keep, varp->typeR );
5939             } else {
5940                VG_(printf)("NULL");
5941             }
5942             VG_(printf)("\n  Loc=");
5943             if (varp->gexpr) {
5944                ML_(pp_GX)(varp->gexpr);
5945             } else {
5946                VG_(printf)("NULL");
5947             }
5948             VG_(printf)("\n");
5949             if (varp->fbGX) {
5950                VG_(printf)("  FrB=");
5951                ML_(pp_GX)( varp->fbGX );
5952                VG_(printf)("\n");
5953             } else {
5954                VG_(printf)("  FrB=none\n");
5955             }
5956             VG_(printf)("  declared at: %u %s:%d\n",
5957                         varp->fndn_ix,
5958                         ML_(fndn_ix2filename) (di, varp->fndn_ix),
5959                         varp->fLine );
5960             if (varp->absOri != (UWord)D3_INVALID_CUOFF)
5961                VG_(printf)("  abstract origin: <%lx>\n", varp->absOri);
5962          }
5963
5964          /* Skip variables which have no location.  These must be
5965             abstract instances; they are useless as-is since with no
5966             location they have no specified memory location.  They will
5967             presumably be referred to via the absOri fields of other
5968             variables. */
5969          if (!varp->gexpr) {
5970             TRACE_D3("  SKIP (no location)\n\n");
5971             continue;
5972          }
5973
5974          /* So it has a location, at least.  If it refers to some other
5975             entry through its absOri field, pull in further info through
5976             that. */
5977          if (varp->absOri != (UWord)D3_INVALID_CUOFF) {
5978             Bool found;
5979             Word ixFirst, ixLast;
5980             TempVar key;
5981             TempVar* keyp = &key;
5982             TempVar *varAI;
5983             VG_(memset)(&key, 0, sizeof(key)); /* not necessary */
5984             key.dioff = varp->absOri; /* this is what we want to find */
5985             found = VG_(lookupXA)( dioff_lookup_tab, &keyp,
5986                                    &ixFirst, &ixLast );
5987             if (!found) {
5988                /* barf("DW_AT_abstract_origin can't be resolved"); */
5989                TRACE_D3("  SKIP (DW_AT_abstract_origin can't be resolved)\n\n");
5990                continue;
5991             }
5992             /* If the following fails, there is more than one entry with
5993                the same dioff.  Which can't happen. */
5994             vg_assert(ixFirst == ixLast);
5995             varAI = *(TempVar**)VG_(indexXA)( dioff_lookup_tab, ixFirst );
5996             /* stay sane */
5997             vg_assert(varAI);
5998             vg_assert(varAI->dioff == varp->absOri);
5999
6000             /* Copy what useful info we can. */
6001             if (varAI->typeR && !varp->typeR)
6002                varp->typeR = varAI->typeR;
6003             if (varAI->name && !varp->name)
6004                varp->name = varAI->name;
6005             if (varAI->fndn_ix && !varp->fndn_ix)
6006                varp->fndn_ix = varAI->fndn_ix;
6007             if (varAI->fLine > 0 && varp->fLine == 0)
6008                varp->fLine = varAI->fLine;
6009          }
6010
6011          /* Give it a name if it doesn't have one. */
6012          if (!varp->name)
6013             varp->name = ML_(addStr)( di, "<anon_var>", -1 );
6014
6015          /* So now does it have enough info to be useful? */
6016          /* NOTE: re typeR: this is a hack.  If typeR is Te_UNKNOWN then
6017             the type didn't get resolved.  Really, in that case
6018             something's broken earlier on, and should be fixed, rather
6019             than just skipping the variable. */
6020          ent = ML_(TyEnts__index_by_cuOff)( tyents_to_keep,
6021                                             tyents_to_keep_cache,
6022                                             varp->typeR );
6023          /* The next two assertions should be guaranteed by
6024             our previous call to resolve_variable_types. */
6025          vg_assert(ent);
6026          vg_assert(ML_(TyEnt__is_type)(ent) || ent->tag == Te_UNKNOWN);
6027
6028          if (ent->tag == Te_UNKNOWN) continue;
6029
6030          vg_assert(varp->gexpr);
6031          vg_assert(varp->name);
6032          vg_assert(varp->typeR);
6033          vg_assert(varp->level >= 0);
6034
6035          /* Ok.  So we're going to keep it.  Call ML_(addVar) once for
6036             each address range in which the variable exists. */
6037          TRACE_D3("  ACQUIRE for range(s) ");
6038          { AddrRange  oneRange;
6039            AddrRange* varPcRanges;
6040            Word       nVarPcRanges;
6041            /* Set up to iterate over address ranges, however
6042               represented. */
6043            if (varp->nRanges == 0 || varp->nRanges == 1) {
6044               vg_assert(!varp->rngMany);
6045               if (varp->nRanges == 0) {
6046                  vg_assert(varp->rngOneMin == 0);
6047                  vg_assert(varp->rngOneMax == 0);
6048               }
6049               nVarPcRanges = varp->nRanges;
6050               oneRange.aMin = varp->rngOneMin;
6051               oneRange.aMax = varp->rngOneMax;
6052               varPcRanges = &oneRange;
6053            } else {
6054               vg_assert(varp->rngMany);
6055               vg_assert(varp->rngOneMin == 0);
6056               vg_assert(varp->rngOneMax == 0);
6057               nVarPcRanges = VG_(sizeXA)(varp->rngMany);
6058               vg_assert(nVarPcRanges >= 2);
6059               vg_assert(nVarPcRanges == (Word)varp->nRanges);
6060               varPcRanges = VG_(indexXA)(varp->rngMany, 0);
6061            }
6062            if (varp->level == 0)
6063               vg_assert( nVarPcRanges == 1 );
6064            /* and iterate */
6065            for (i = 0; i < nVarPcRanges; i++) {
6066               Addr pcMin = varPcRanges[i].aMin;
6067               Addr pcMax = varPcRanges[i].aMax;
6068               vg_assert(pcMin <= pcMax);
6069               /* Level 0 is the global address range.  So at level 0 we
6070                  don't want to bias pcMin/pcMax; but at all other levels
6071                  we do since those are derived from svmas in the Dwarf
6072                  we're reading.  Be paranoid ... */
6073               if (varp->level == 0) {
6074                  vg_assert(pcMin == (Addr)0);
6075                  vg_assert(pcMax == ~(Addr)0);
6076               } else {
6077                  /* vg_assert(pcMin > (Addr)0);
6078                     No .. we can legitimately expect to see ranges like
6079                     0x0-0x11D (pre-biasing, of course). */
6080                  vg_assert(pcMax < ~(Addr)0);
6081               }
6082
6083               /* Apply text biasing, for non-global variables. */
6084               if (varp->level > 0) {
6085                  pcMin += di->text_debug_bias;
6086                  pcMax += di->text_debug_bias;
6087               }
6088
6089               if (i > 0 && (i%2) == 0)
6090                  TRACE_D3("\n                       ");
6091               TRACE_D3("[%#lx,%#lx] ", pcMin, pcMax );
6092
6093               ML_(addVar)(
6094                  di, varp->level,
6095                      pcMin, pcMax,
6096                      varp->name,  varp->typeR,
6097                      varp->gexpr, varp->fbGX,
6098                      varp->fndn_ix, varp->fLine, td3
6099               );
6100            }
6101          }
6102
6103          TRACE_D3("\n\n");
6104          /* and move on to the next var */
6105       }
6106
6107       /* Now free all the TempVars */
6108       n = VG_(sizeXA)( tempvars );
6109       for (i = 0; i < n; i++) {
6110          varp = *(TempVar**)VG_(indexXA)( tempvars, i );
6111          ML_(dinfo_free)(varp);
6112       }
6113       VG_(deleteXA)( tempvars );
6114       tempvars = NULL;
6115
6116       /* and the temp lookup table */
6117       VG_(deleteXA)( dioff_lookup_tab );
6118
6119       /* and the ranges tree.  Note that we need to also free the XArrays
6120          which constitute the keys, hence pass VG_(deleteXA) as a
6121          key-finalizer. */
6122       VG_(deleteFM)( rangestree, (void(*)(UWord))VG_(deleteXA), NULL );
6123
6124       /* and the tyents_to_keep cache */
6125       ML_(dinfo_free)( tyents_to_keep_cache );
6126       tyents_to_keep_cache = NULL;
6127
6128       /* And the signatured type hash.  */
6129       VG_(HT_destruct) ( signature_types, ML_(dinfo_free) );
6130
6131       /* record the GExprs in di so they can be freed later */
6132       vg_assert(!di->admin_gexprs);
6133       di->admin_gexprs = gexprs;
6134    }
6135
6136    // Free up dynamically allocated memory
6137    if (VG_(clo_read_var_info)) {
6138       type_parser_release(&typarser);
6139       var_parser_release(&varparser);
6140    }
6141 }
6142
6143
6144 /*------------------------------------------------------------*/
6145 /*---                                                      ---*/
6146 /*--- The "new" DWARF3 reader -- top level control logic   ---*/
6147 /*---                                                      ---*/
6148 /*------------------------------------------------------------*/
6149
6150 static Bool               d3rd_jmpbuf_valid  = False;
6151 static const HChar*       d3rd_jmpbuf_reason = NULL;
6152 static VG_MINIMAL_JMP_BUF(d3rd_jmpbuf);
6153
6154 static __attribute__((noreturn)) void barf ( const HChar* reason ) {
6155    vg_assert(d3rd_jmpbuf_valid);
6156    d3rd_jmpbuf_reason = reason;
6157    VG_MINIMAL_LONGJMP(d3rd_jmpbuf);
6158    /*NOTREACHED*/
6159    vg_assert(0);
6160 }
6161
6162
6163 void
6164 ML_(new_dwarf3_reader) (
6165    DebugInfo* di,
6166    DiSlice escn_debug_info,      DiSlice escn_debug_types,
6167    DiSlice escn_debug_abbv,      DiSlice escn_debug_line,
6168    DiSlice escn_debug_str,       DiSlice escn_debug_ranges,
6169    DiSlice escn_debug_rnglists,  DiSlice escn_debug_loclists,
6170    DiSlice escn_debug_loc,       DiSlice escn_debug_info_alt,
6171    DiSlice escn_debug_abbv_alt,  DiSlice escn_debug_line_alt,
6172    DiSlice escn_debug_str_alt,   DiSlice escn_debug_line_str,
6173    DiSlice escn_debug_addr,      DiSlice escn_debug_str_offsets
6174 )
6175 {
6176    volatile Int  jumped;
6177    volatile Bool td3 = di->trace_symtab;
6178
6179    /* Run the _wrk function to read the dwarf3.  If it succeeds, it
6180       just returns normally.  If there is any failure, it longjmp's
6181       back here, having first set d3rd_jmpbuf_reason to something
6182       useful. */
6183    vg_assert(d3rd_jmpbuf_valid  == False);
6184    vg_assert(d3rd_jmpbuf_reason == NULL);
6185
6186    d3rd_jmpbuf_valid = True;
6187    jumped = VG_MINIMAL_SETJMP(d3rd_jmpbuf);
6188    if (jumped == 0) {
6189       /* try this ... */
6190       new_dwarf3_reader_wrk( di, barf,
6191                              escn_debug_info,     escn_debug_types,
6192                              escn_debug_abbv,     escn_debug_line,
6193                              escn_debug_str,      escn_debug_ranges,
6194                              escn_debug_rnglists, escn_debug_loclists,
6195                              escn_debug_loc,      escn_debug_info_alt,
6196                              escn_debug_abbv_alt, escn_debug_line_alt,
6197                              escn_debug_str_alt,  escn_debug_line_str,
6198                              escn_debug_addr,     escn_debug_str_offsets );
6199       d3rd_jmpbuf_valid = False;
6200       TRACE_D3("\n------ .debug_info reading was successful ------\n");
6201    } else {
6202       /* It longjmp'd. */
6203       d3rd_jmpbuf_valid = False;
6204       /* Can't longjump without giving some sort of reason. */
6205       vg_assert(d3rd_jmpbuf_reason != NULL);
6206
6207       TRACE_D3("\n------ .debug_info reading failed ------\n");
6208
6209       ML_(symerr)(di, True, d3rd_jmpbuf_reason);
6210    }
6211
6212    d3rd_jmpbuf_valid  = False;
6213    d3rd_jmpbuf_reason = NULL;
6214 }
6215
6216
6217
6218 /* --- Unused code fragments which might be useful one day. --- */
6219
6220 #if 0
6221    /* Read the arange tables */
6222    TRACE_SYMTAB("\n");
6223    TRACE_SYMTAB("\n------ The contents of .debug_arange ------\n");
6224    init_Cursor( &aranges, debug_aranges_img,
6225                 debug_aranges_sz, 0, barf,
6226                 "Overrun whilst reading .debug_aranges section" );
6227    while (True) {
6228       ULong  len, d_i_offset;
6229       Bool   is64;
6230       UShort version;
6231       UChar  asize, segsize;
6232
6233       if (is_at_end_Cursor( &aranges ))
6234          break;
6235       /* Read one arange thingy */
6236       /* initial_length field */
6237       len = get_Initial_Length( &is64, &aranges,
6238                "in .debug_aranges: invalid initial-length field" );
6239       version    = get_UShort( &aranges );
6240       d_i_offset = get_Dwarfish_UWord( &aranges, is64 );
6241       asize      = get_UChar( &aranges );
6242       segsize    = get_UChar( &aranges );
6243       TRACE_D3("  Length:                   %llu\n", len);
6244       TRACE_D3("  Version:                  %d\n", (Int)version);
6245       TRACE_D3("  Offset into .debug_info:  %llx\n", d_i_offset);
6246       TRACE_D3("  Pointer Size:             %d\n", (Int)asize);
6247       TRACE_D3("  Segment Size:             %d\n", (Int)segsize);
6248       TRACE_D3("\n");
6249       TRACE_D3("    Address            Length\n");
6250
6251       while ((get_position_of_Cursor( &aranges ) % (2 * asize)) > 0) {
6252          (void)get_UChar( & aranges );
6253       }
6254       while (True) {
6255          ULong address = get_Dwarfish_UWord( &aranges, asize==8 );
6256          ULong length = get_Dwarfish_UWord( &aranges, asize==8 );
6257          TRACE_D3("    0x%016llx 0x%llx\n", address, length);
6258          if (address == 0 && length == 0) break;
6259       }
6260    }
6261    TRACE_SYMTAB("\n");
6262 #endif
6263
6264 #endif // defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_solaris) || defined(VGO_freebsd)
6265
6266 /*--------------------------------------------------------------------*/
6267 /*--- end                                                          ---*/
6268 /*--------------------------------------------------------------------*/