1 /* -*- mode: C; c-basic-offset: 3; -*- */
3 /*--------------------------------------------------------------------*/
4 /*--- MemCheck: Maintain bitmaps of memory, tracking the ---*/
5 /*--- accessibility (A) and validity (V) status of each byte. ---*/
7 /*--------------------------------------------------------------------*/
10 This file is part of MemCheck, a heavyweight Valgrind tool for
11 detecting memory errors.
13 Copyright (C) 2000-2017 Julian Seward
16 This program is free software; you can redistribute it and/or
17 modify it under the terms of the GNU General Public License as
18 published by the Free Software Foundation; either version 2 of the
19 License, or (at your option) any later version.
21 This program is distributed in the hope that it will be useful, but
22 WITHOUT ANY WARRANTY; without even the implied warranty of
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 General Public License for more details.
26 You should have received a copy of the GNU General Public License
27 along with this program; if not, write to the Free Software
28 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
31 The GNU General Public License is contained in the file COPYING.
34 #include "pub_tool_basics.h"
35 #include "pub_tool_aspacemgr.h"
36 #include "pub_tool_gdbserver.h"
37 #include "pub_tool_poolalloc.h"
38 #include "pub_tool_hashtable.h" // For mc_include.h
39 #include "pub_tool_libcbase.h"
40 #include "pub_tool_libcassert.h"
41 #include "pub_tool_libcprint.h"
42 #include "pub_tool_machine.h"
43 #include "pub_tool_mallocfree.h"
44 #include "pub_tool_options.h"
45 #include "pub_tool_oset.h"
46 #include "pub_tool_rangemap.h"
47 #include "pub_tool_replacemalloc.h"
48 #include "pub_tool_tooliface.h"
49 #include "pub_tool_threadstate.h"
50 #include "pub_tool_xarray.h"
51 #include "pub_tool_xtree.h"
52 #include "pub_tool_xtmemory.h"
54 #include "mc_include.h"
55 #include "memcheck.h" /* for client requests */
58 /* Set to 1 to enable handwritten assembly helpers on targets for
59 which it is supported. */
60 #define ENABLE_ASSEMBLY_HELPERS 1
62 /* Set to 1 to do a little more sanity checking */
63 #define VG_DEBUG_MEMORY 0
65 #define DEBUG(fmt, args...) //VG_(printf)(fmt, ## args)
67 static void ocache_sarp_Set_Origins ( Addr
, UWord
, UInt
); /* fwds */
68 static void ocache_sarp_Clear_Origins ( Addr
, UWord
); /* fwds */
71 /*------------------------------------------------------------*/
72 /*--- Fast-case knobs ---*/
73 /*------------------------------------------------------------*/
75 // Comment these out to disable the fast cases (don't just set them to zero).
77 #define PERF_FAST_LOADV 1
78 #define PERF_FAST_STOREV 1
80 #define PERF_FAST_SARP 1
82 #define PERF_FAST_STACK 1
83 #define PERF_FAST_STACK2 1
85 /* Change this to 1 to enable assertions on origin tracking cache fast
87 #define OC_ENABLE_ASSERTIONS 0
90 /*------------------------------------------------------------*/
91 /*--- Comments on the origin tracking implementation ---*/
92 /*------------------------------------------------------------*/
94 /* See detailed comment entitled
95 AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
96 which is contained further on in this file. */
99 /*------------------------------------------------------------*/
100 /*--- V bits and A bits ---*/
101 /*------------------------------------------------------------*/
103 /* Conceptually, every byte value has 8 V bits, which track whether Memcheck
104 thinks the corresponding value bit is defined. And every memory byte
105 has an A bit, which tracks whether Memcheck thinks the program can access
106 it safely (ie. it's mapped, and has at least one of the RWX permission bits
107 set). So every N-bit register is shadowed with N V bits, and every memory
108 byte is shadowed with 8 V bits and one A bit.
110 In the implementation, we use two forms of compression (compressed V bits
111 and distinguished secondary maps) to avoid the 9-bit-per-byte overhead
114 Memcheck also tracks extra information about each heap block that is
115 allocated, for detecting memory leaks and other purposes.
118 /*------------------------------------------------------------*/
119 /*--- Basic A/V bitmap representation. ---*/
120 /*------------------------------------------------------------*/
122 /* All reads and writes are checked against a memory map (a.k.a. shadow
123 memory), which records the state of all memory in the process.
125 On 32-bit machines the memory map is organised as follows.
126 The top 16 bits of an address are used to index into a top-level
127 map table, containing 65536 entries. Each entry is a pointer to a
128 second-level map, which records the accesibililty and validity
129 permissions for the 65536 bytes indexed by the lower 16 bits of the
130 address. Each byte is represented by two bits (details are below). So
131 each second-level map contains 16384 bytes. This two-level arrangement
132 conveniently divides the 4G address space into 64k lumps, each size 64k
135 All entries in the primary (top-level) map must point to a valid
136 secondary (second-level) map. Since many of the 64kB chunks will
137 have the same status for every bit -- ie. noaccess (for unused
138 address space) or entirely addressable and defined (for code segments) --
139 there are three distinguished secondary maps, which indicate 'noaccess',
140 'undefined' and 'defined'. For these uniform 64kB chunks, the primary
141 map entry points to the relevant distinguished map. In practice,
142 typically more than half of the addressable memory is represented with
143 the 'undefined' or 'defined' distinguished secondary map, so it gives a
144 good saving. It also lets us set the V+A bits of large address regions
145 quickly in set_address_range_perms().
147 On 64-bit machines it's more complicated. If we followed the same basic
148 scheme we'd have a four-level table which would require too many memory
149 accesses. So instead the top-level map table has 2^20 entries (indexed
150 using bits 16..35 of the address); this covers the bottom 64GB. Any
151 accesses above 64GB are handled with a slow, sparse auxiliary table.
152 Valgrind's address space manager tries very hard to keep things below
153 this 64GB barrier so that performance doesn't suffer too much.
155 Note that this file has a lot of different functions for reading and
156 writing shadow memory. Only a couple are strictly necessary (eg.
157 get_vabits2 and set_vabits2), most are just specialised for specific
158 common cases to improve performance.
160 Aside: the V+A bits are less precise than they could be -- we have no way
161 of marking memory as read-only. It would be great if we could add an
162 extra state VA_BITSn_READONLY. But then we'd have 5 different states,
163 which requires 2.3 bits to hold, and there's no way to do that elegantly
164 -- we'd have to double up to 4 bits of metadata per byte, which doesn't
168 /* --------------- Basic configuration --------------- */
170 /* Only change this. N_PRIMARY_MAP *must* be a power of 2. */
174 /* cover the entire address space */
175 # define N_PRIMARY_BITS 16
179 /* Just handle the first 128G fast and the rest via auxiliary
180 primaries. If you change this, Memcheck will assert at startup.
181 See the definition of UNALIGNED_OR_HIGH for extensive comments. */
182 # define N_PRIMARY_BITS 21
187 /* Do not change this. */
188 #define N_PRIMARY_MAP ( ((UWord)1) << N_PRIMARY_BITS)
190 /* Do not change this. */
191 #define MAX_PRIMARY_ADDRESS (Addr)((((Addr)65536) * N_PRIMARY_MAP)-1)
194 /* --------------- Secondary maps --------------- */
196 // Each byte of memory conceptually has an A bit, which indicates its
197 // addressability, and 8 V bits, which indicates its definedness.
199 // But because very few bytes are partially defined, we can use a nice
200 // compression scheme to reduce the size of shadow memory. Each byte of
201 // memory has 2 bits which indicates its state (ie. V+A bits):
203 // 00: noaccess (unaddressable but treated as fully defined)
204 // 01: undefined (addressable and fully undefined)
205 // 10: defined (addressable and fully defined)
206 // 11: partdefined (addressable and partially defined)
208 // In the "partdefined" case, we use a secondary table to store the V bits.
209 // Each entry in the secondary-V-bits table maps a byte address to its 8 V
212 // We store the compressed V+A bits in 8-bit chunks, ie. the V+A bits for
213 // four bytes (32 bits) of memory are in each chunk. Hence the name
214 // "vabits8". This lets us get the V+A bits for four bytes at a time
215 // easily (without having to do any shifting and/or masking), and that is a
216 // very common operation. (Note that although each vabits8 chunk
217 // is 8 bits in size, it represents 32 bits of memory.)
219 // The representation is "inverse" little-endian... each 4 bytes of
220 // memory is represented by a 1 byte value, where:
222 // - the status of byte (a+0) is held in bits [1..0]
223 // - the status of byte (a+1) is held in bits [3..2]
224 // - the status of byte (a+2) is held in bits [5..4]
225 // - the status of byte (a+3) is held in bits [7..6]
227 // It's "inverse" because endianness normally describes a mapping from
228 // value bits to memory addresses; in this case the mapping is inverted.
229 // Ie. instead of particular value bits being held in certain addresses, in
230 // this case certain addresses are represented by particular value bits.
231 // See insert_vabits2_into_vabits8() for an example.
233 // But note that we don't compress the V bits stored in registers; they
234 // need to be explicit to made the shadow operations possible. Therefore
235 // when moving values between registers and memory we need to convert
236 // between the expanded in-register format and the compressed in-memory
237 // format. This isn't so difficult, it just requires careful attention in a
240 // These represent eight bits of memory.
241 #define VA_BITS2_NOACCESS 0x0 // 00b
242 #define VA_BITS2_UNDEFINED 0x1 // 01b
243 #define VA_BITS2_DEFINED 0x2 // 10b
244 #define VA_BITS2_PARTDEFINED 0x3 // 11b
246 // These represent 16 bits of memory.
247 #define VA_BITS4_NOACCESS 0x0 // 00_00b
248 #define VA_BITS4_UNDEFINED 0x5 // 01_01b
249 #define VA_BITS4_DEFINED 0xa // 10_10b
251 // These represent 32 bits of memory.
252 #define VA_BITS8_NOACCESS 0x00 // 00_00_00_00b
253 #define VA_BITS8_UNDEFINED 0x55 // 01_01_01_01b
254 #define VA_BITS8_DEFINED 0xaa // 10_10_10_10b
256 // These represent 64 bits of memory.
257 #define VA_BITS16_NOACCESS 0x0000 // 00_00_00_00b x 2
258 #define VA_BITS16_UNDEFINED 0x5555 // 01_01_01_01b x 2
259 #define VA_BITS16_DEFINED 0xaaaa // 10_10_10_10b x 2
261 // These represent 128 bits of memory.
262 #define VA_BITS32_UNDEFINED 0x55555555 // 01_01_01_01b x 4
265 #define SM_CHUNKS 16384 // Each SM covers 64k of memory.
266 #define SM_OFF(aaa) (((aaa) & 0xffff) >> 2)
267 #define SM_OFF_16(aaa) (((aaa) & 0xffff) >> 3)
269 // Paranoia: it's critical for performance that the requested inlining
270 // occurs. So try extra hard.
271 #define INLINE inline __attribute__((always_inline))
273 static INLINE Addr
start_of_this_sm ( Addr a
) {
274 return (a
& (~SM_MASK
));
276 static INLINE Bool
is_start_of_sm ( Addr a
) {
277 return (start_of_this_sm(a
) == a
);
280 STATIC_ASSERT(SM_CHUNKS
% 2 == 0);
284 UChar vabits8
[SM_CHUNKS
];
285 UShort vabits16
[SM_CHUNKS
/2];
289 // 3 distinguished secondary maps, one for no-access, one for
290 // accessible but undefined, and one for accessible and defined.
291 // Distinguished secondaries may never be modified.
292 #define SM_DIST_NOACCESS 0
293 #define SM_DIST_UNDEFINED 1
294 #define SM_DIST_DEFINED 2
296 static SecMap sm_distinguished
[3];
298 static INLINE Bool
is_distinguished_sm ( SecMap
* sm
) {
299 return sm
>= &sm_distinguished
[0] && sm
<= &sm_distinguished
[2];
302 // Forward declaration
303 static void update_SM_counts(SecMap
* oldSM
, SecMap
* newSM
);
305 /* dist_sm points to one of our three distinguished secondaries. Make
306 a copy of it so that we can write to it.
308 static SecMap
* copy_for_writing ( SecMap
* dist_sm
)
311 tl_assert(dist_sm
== &sm_distinguished
[0]
312 || dist_sm
== &sm_distinguished
[1]
313 || dist_sm
== &sm_distinguished
[2]);
315 new_sm
= VG_(am_shadow_alloc
)(sizeof(SecMap
));
317 VG_(out_of_memory_NORETURN
)( "memcheck:allocate new SecMap",
319 VG_(memcpy
)(new_sm
, dist_sm
, sizeof(SecMap
));
320 update_SM_counts(dist_sm
, new_sm
);
324 /* --------------- Stats --------------- */
326 static Int n_issued_SMs
= 0;
327 static Int n_deissued_SMs
= 0;
328 static Int n_noaccess_SMs
= N_PRIMARY_MAP
; // start with many noaccess DSMs
329 static Int n_undefined_SMs
= 0;
330 static Int n_defined_SMs
= 0;
331 static Int n_non_DSM_SMs
= 0;
332 static Int max_noaccess_SMs
= 0;
333 static Int max_undefined_SMs
= 0;
334 static Int max_defined_SMs
= 0;
335 static Int max_non_DSM_SMs
= 0;
337 /* # searches initiated in auxmap_L1, and # base cmps required */
338 static ULong n_auxmap_L1_searches
= 0;
339 static ULong n_auxmap_L1_cmps
= 0;
340 /* # of searches that missed in auxmap_L1 and therefore had to
341 be handed to auxmap_L2. And the number of nodes inserted. */
342 static ULong n_auxmap_L2_searches
= 0;
343 static ULong n_auxmap_L2_nodes
= 0;
345 static Int n_sanity_cheap
= 0;
346 static Int n_sanity_expensive
= 0;
348 static Int n_secVBit_nodes
= 0;
349 static Int max_secVBit_nodes
= 0;
351 static void update_SM_counts(SecMap
* oldSM
, SecMap
* newSM
)
353 if (oldSM
== &sm_distinguished
[SM_DIST_NOACCESS
]) n_noaccess_SMs
--;
354 else if (oldSM
== &sm_distinguished
[SM_DIST_UNDEFINED
]) n_undefined_SMs
--;
355 else if (oldSM
== &sm_distinguished
[SM_DIST_DEFINED
]) n_defined_SMs
--;
356 else { n_non_DSM_SMs
--;
359 if (newSM
== &sm_distinguished
[SM_DIST_NOACCESS
]) n_noaccess_SMs
++;
360 else if (newSM
== &sm_distinguished
[SM_DIST_UNDEFINED
]) n_undefined_SMs
++;
361 else if (newSM
== &sm_distinguished
[SM_DIST_DEFINED
]) n_defined_SMs
++;
362 else { n_non_DSM_SMs
++;
365 if (n_noaccess_SMs
> max_noaccess_SMs
) max_noaccess_SMs
= n_noaccess_SMs
;
366 if (n_undefined_SMs
> max_undefined_SMs
) max_undefined_SMs
= n_undefined_SMs
;
367 if (n_defined_SMs
> max_defined_SMs
) max_defined_SMs
= n_defined_SMs
;
368 if (n_non_DSM_SMs
> max_non_DSM_SMs
) max_non_DSM_SMs
= n_non_DSM_SMs
;
371 /* --------------- Primary maps --------------- */
373 /* The main primary map. This covers some initial part of the address
374 space, addresses 0 .. (N_PRIMARY_MAP << 16)-1. The rest of it is
375 handled using the auxiliary primary map.
377 static SecMap
* primary_map
[N_PRIMARY_MAP
];
380 /* An entry in the auxiliary primary map. base must be a 64k-aligned
381 value, and sm points at the relevant secondary map. As with the
382 main primary map, the secondary may be either a real secondary, or
383 one of the three distinguished secondaries. DO NOT CHANGE THIS
384 LAYOUT: the first word has to be the key for OSet fast lookups.
393 /* Tunable parameter: How big is the L1 queue? */
394 #define N_AUXMAP_L1 24
396 /* Tunable parameter: How far along the L1 queue to insert
397 entries resulting from L2 lookups? */
398 #define AUXMAP_L1_INSERT_IX 12
402 AuxMapEnt
* ent
; // pointer to the matching auxmap_L2 node
404 auxmap_L1
[N_AUXMAP_L1
];
406 static OSet
* auxmap_L2
= NULL
;
408 static void init_auxmap_L1_L2 ( void )
411 for (i
= 0; i
< N_AUXMAP_L1
; i
++) {
412 auxmap_L1
[i
].base
= 0;
413 auxmap_L1
[i
].ent
= NULL
;
416 tl_assert(0 == offsetof(AuxMapEnt
,base
));
417 tl_assert(sizeof(Addr
) == sizeof(void*));
418 auxmap_L2
= VG_(OSetGen_Create
)( /*keyOff*/ offsetof(AuxMapEnt
,base
),
420 VG_(malloc
), "mc.iaLL.1", VG_(free
) );
423 /* Check representation invariants; if OK return NULL; else a
424 descriptive bit of text. Also return the number of
425 non-distinguished secondary maps referred to from the auxiliary
428 static const HChar
* check_auxmap_L1_L2_sanity ( Word
* n_secmaps_found
)
431 /* On a 32-bit platform, the L2 and L1 tables should
432 both remain empty forever.
434 On a 64-bit platform:
436 all .base & 0xFFFF == 0
437 all .base > MAX_PRIMARY_ADDRESS
439 all .base & 0xFFFF == 0
440 all (.base > MAX_PRIMARY_ADDRESS
442 and .ent points to an AuxMapEnt with the same .base)
444 (.base == 0 and .ent == NULL)
446 *n_secmaps_found
= 0;
447 if (sizeof(void*) == 4) {
448 /* 32-bit platform */
449 if (VG_(OSetGen_Size
)(auxmap_L2
) != 0)
450 return "32-bit: auxmap_L2 is non-empty";
451 for (i
= 0; i
< N_AUXMAP_L1
; i
++)
452 if (auxmap_L1
[i
].base
!= 0 || auxmap_L1
[i
].ent
!= NULL
)
453 return "32-bit: auxmap_L1 is non-empty";
455 /* 64-bit platform */
456 UWord elems_seen
= 0;
457 AuxMapEnt
*elem
, *res
;
460 VG_(OSetGen_ResetIter
)(auxmap_L2
);
461 while ( (elem
= VG_(OSetGen_Next
)(auxmap_L2
)) ) {
463 if (0 != (elem
->base
& (Addr
)0xFFFF))
464 return "64-bit: nonzero .base & 0xFFFF in auxmap_L2";
465 if (elem
->base
<= MAX_PRIMARY_ADDRESS
)
466 return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L2";
467 if (elem
->sm
== NULL
)
468 return "64-bit: .sm in _L2 is NULL";
469 if (!is_distinguished_sm(elem
->sm
))
470 (*n_secmaps_found
)++;
472 if (elems_seen
!= n_auxmap_L2_nodes
)
473 return "64-bit: disagreement on number of elems in _L2";
474 /* Check L1-L2 correspondence */
475 for (i
= 0; i
< N_AUXMAP_L1
; i
++) {
476 if (auxmap_L1
[i
].base
== 0 && auxmap_L1
[i
].ent
== NULL
)
478 if (0 != (auxmap_L1
[i
].base
& (Addr
)0xFFFF))
479 return "64-bit: nonzero .base & 0xFFFF in auxmap_L1";
480 if (auxmap_L1
[i
].base
<= MAX_PRIMARY_ADDRESS
)
481 return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L1";
482 if (auxmap_L1
[i
].ent
== NULL
)
483 return "64-bit: .ent is NULL in auxmap_L1";
484 if (auxmap_L1
[i
].ent
->base
!= auxmap_L1
[i
].base
)
485 return "64-bit: _L1 and _L2 bases are inconsistent";
486 /* Look it up in auxmap_L2. */
487 key
.base
= auxmap_L1
[i
].base
;
489 res
= VG_(OSetGen_Lookup
)(auxmap_L2
, &key
);
491 return "64-bit: _L1 .base not found in _L2";
492 if (res
!= auxmap_L1
[i
].ent
)
493 return "64-bit: _L1 .ent disagrees with _L2 entry";
495 /* Check L1 contains no duplicates */
496 for (i
= 0; i
< N_AUXMAP_L1
; i
++) {
497 if (auxmap_L1
[i
].base
== 0)
499 for (j
= i
+1; j
< N_AUXMAP_L1
; j
++) {
500 if (auxmap_L1
[j
].base
== 0)
502 if (auxmap_L1
[j
].base
== auxmap_L1
[i
].base
)
503 return "64-bit: duplicate _L1 .base entries";
507 return NULL
; /* ok */
510 static void insert_into_auxmap_L1_at ( Word rank
, AuxMapEnt
* ent
)
514 tl_assert(rank
>= 0 && rank
< N_AUXMAP_L1
);
515 for (i
= N_AUXMAP_L1
-1; i
> rank
; i
--)
516 auxmap_L1
[i
] = auxmap_L1
[i
-1];
517 auxmap_L1
[rank
].base
= ent
->base
;
518 auxmap_L1
[rank
].ent
= ent
;
521 static INLINE AuxMapEnt
* maybe_find_in_auxmap ( Addr a
)
527 tl_assert(a
> MAX_PRIMARY_ADDRESS
);
530 /* First search the front-cache, which is a self-organising
531 list containing the most popular entries. */
533 if (LIKELY(auxmap_L1
[0].base
== a
))
534 return auxmap_L1
[0].ent
;
535 if (LIKELY(auxmap_L1
[1].base
== a
)) {
536 Addr t_base
= auxmap_L1
[0].base
;
537 AuxMapEnt
* t_ent
= auxmap_L1
[0].ent
;
538 auxmap_L1
[0].base
= auxmap_L1
[1].base
;
539 auxmap_L1
[0].ent
= auxmap_L1
[1].ent
;
540 auxmap_L1
[1].base
= t_base
;
541 auxmap_L1
[1].ent
= t_ent
;
542 return auxmap_L1
[0].ent
;
545 n_auxmap_L1_searches
++;
547 for (i
= 0; i
< N_AUXMAP_L1
; i
++) {
548 if (auxmap_L1
[i
].base
== a
) {
552 tl_assert(i
>= 0 && i
<= N_AUXMAP_L1
);
554 n_auxmap_L1_cmps
+= (ULong
)(i
+1);
556 if (i
< N_AUXMAP_L1
) {
558 Addr t_base
= auxmap_L1
[i
-1].base
;
559 AuxMapEnt
* t_ent
= auxmap_L1
[i
-1].ent
;
560 auxmap_L1
[i
-1].base
= auxmap_L1
[i
-0].base
;
561 auxmap_L1
[i
-1].ent
= auxmap_L1
[i
-0].ent
;
562 auxmap_L1
[i
-0].base
= t_base
;
563 auxmap_L1
[i
-0].ent
= t_ent
;
566 return auxmap_L1
[i
].ent
;
569 n_auxmap_L2_searches
++;
571 /* First see if we already have it. */
575 res
= VG_(OSetGen_Lookup
)(auxmap_L2
, &key
);
577 insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX
, res
);
581 static AuxMapEnt
* find_or_alloc_in_auxmap ( Addr a
)
583 AuxMapEnt
*nyu
, *res
;
585 /* First see if we already have it. */
586 res
= maybe_find_in_auxmap( a
);
590 /* Ok, there's no entry in the secondary map, so we'll have
594 nyu
= (AuxMapEnt
*) VG_(OSetGen_AllocNode
)( auxmap_L2
, sizeof(AuxMapEnt
) );
596 nyu
->sm
= &sm_distinguished
[SM_DIST_NOACCESS
];
597 VG_(OSetGen_Insert
)( auxmap_L2
, nyu
);
598 insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX
, nyu
);
603 /* --------------- SecMap fundamentals --------------- */
605 // In all these, 'low' means it's definitely in the main primary map,
606 // 'high' means it's definitely in the auxiliary table.
608 static INLINE UWord
get_primary_map_low_offset ( Addr a
)
610 UWord pm_off
= a
>> 16;
614 static INLINE SecMap
** get_secmap_low_ptr ( Addr a
)
616 UWord pm_off
= a
>> 16;
617 # if VG_DEBUG_MEMORY >= 1
618 tl_assert(pm_off
< N_PRIMARY_MAP
);
620 return &primary_map
[ pm_off
];
623 static INLINE SecMap
** get_secmap_high_ptr ( Addr a
)
625 AuxMapEnt
* am
= find_or_alloc_in_auxmap(a
);
629 static INLINE SecMap
** get_secmap_ptr ( Addr a
)
631 return ( a
<= MAX_PRIMARY_ADDRESS
632 ? get_secmap_low_ptr(a
)
633 : get_secmap_high_ptr(a
));
636 static INLINE SecMap
* get_secmap_for_reading_low ( Addr a
)
638 return *get_secmap_low_ptr(a
);
641 static INLINE SecMap
* get_secmap_for_reading_high ( Addr a
)
643 return *get_secmap_high_ptr(a
);
646 static INLINE SecMap
* get_secmap_for_writing_low(Addr a
)
648 SecMap
** p
= get_secmap_low_ptr(a
);
649 if (UNLIKELY(is_distinguished_sm(*p
)))
650 *p
= copy_for_writing(*p
);
654 static INLINE SecMap
* get_secmap_for_writing_high ( Addr a
)
656 SecMap
** p
= get_secmap_high_ptr(a
);
657 if (UNLIKELY(is_distinguished_sm(*p
)))
658 *p
= copy_for_writing(*p
);
662 /* Produce the secmap for 'a', either from the primary map or by
663 ensuring there is an entry for it in the aux primary map. The
664 secmap may be a distinguished one as the caller will only want to
667 static INLINE SecMap
* get_secmap_for_reading ( Addr a
)
669 return ( a
<= MAX_PRIMARY_ADDRESS
670 ? get_secmap_for_reading_low (a
)
671 : get_secmap_for_reading_high(a
) );
674 /* Produce the secmap for 'a', either from the primary map or by
675 ensuring there is an entry for it in the aux primary map. The
676 secmap may not be a distinguished one, since the caller will want
677 to be able to write it. If it is a distinguished secondary, make a
678 writable copy of it, install it, and return the copy instead. (COW
681 static INLINE SecMap
* get_secmap_for_writing ( Addr a
)
683 return ( a
<= MAX_PRIMARY_ADDRESS
684 ? get_secmap_for_writing_low (a
)
685 : get_secmap_for_writing_high(a
) );
688 /* If 'a' has a SecMap, produce it. Else produce NULL. But don't
689 allocate one if one doesn't already exist. This is used by the
692 static SecMap
* maybe_get_secmap_for ( Addr a
)
694 if (a
<= MAX_PRIMARY_ADDRESS
) {
695 return get_secmap_for_reading_low(a
);
697 AuxMapEnt
* am
= maybe_find_in_auxmap(a
);
698 return am
? am
->sm
: NULL
;
702 /* --------------- Fundamental functions --------------- */
705 void insert_vabits2_into_vabits8 ( Addr a
, UChar vabits2
, UChar
* vabits8
)
707 UInt shift
= (a
& 3) << 1; // shift by 0, 2, 4, or 6
708 *vabits8
&= ~(0x3 << shift
); // mask out the two old bits
709 *vabits8
|= (vabits2
<< shift
); // mask in the two new bits
713 void insert_vabits4_into_vabits8 ( Addr a
, UChar vabits4
, UChar
* vabits8
)
716 tl_assert(VG_IS_2_ALIGNED(a
)); // Must be 2-aligned
717 shift
= (a
& 2) << 1; // shift by 0 or 4
718 *vabits8
&= ~(0xf << shift
); // mask out the four old bits
719 *vabits8
|= (vabits4
<< shift
); // mask in the four new bits
723 UChar
extract_vabits2_from_vabits8 ( Addr a
, UChar vabits8
)
725 UInt shift
= (a
& 3) << 1; // shift by 0, 2, 4, or 6
726 vabits8
>>= shift
; // shift the two bits to the bottom
727 return 0x3 & vabits8
; // mask out the rest
731 UChar
extract_vabits4_from_vabits8 ( Addr a
, UChar vabits8
)
734 tl_assert(VG_IS_2_ALIGNED(a
)); // Must be 2-aligned
735 shift
= (a
& 2) << 1; // shift by 0 or 4
736 vabits8
>>= shift
; // shift the four bits to the bottom
737 return 0xf & vabits8
; // mask out the rest
740 // Note that these four are only used in slow cases. The fast cases do
741 // clever things like combine the auxmap check (in
742 // get_secmap_{read,writ}able) with alignment checks.
745 // Any time this function is called, if it is possible that vabits2
746 // is equal to VA_BITS2_PARTDEFINED, then the corresponding entry in the
747 // sec-V-bits table must also be set!
749 void set_vabits2 ( Addr a
, UChar vabits2
)
751 SecMap
* sm
= get_secmap_for_writing(a
);
752 UWord sm_off
= SM_OFF(a
);
753 insert_vabits2_into_vabits8( a
, vabits2
, &(sm
->vabits8
[sm_off
]) );
757 UChar
get_vabits2 ( Addr a
)
759 SecMap
* sm
= get_secmap_for_reading(a
);
760 UWord sm_off
= SM_OFF(a
);
761 UChar vabits8
= sm
->vabits8
[sm_off
];
762 return extract_vabits2_from_vabits8(a
, vabits8
);
766 // Any time this function is called, if it is possible that any of the
767 // 4 2-bit fields in vabits8 are equal to VA_BITS2_PARTDEFINED, then the
768 // corresponding entry(s) in the sec-V-bits table must also be set!
770 UChar
get_vabits8_for_aligned_word32 ( Addr a
)
772 SecMap
* sm
= get_secmap_for_reading(a
);
773 UWord sm_off
= SM_OFF(a
);
774 UChar vabits8
= sm
->vabits8
[sm_off
];
779 void set_vabits8_for_aligned_word32 ( Addr a
, UChar vabits8
)
781 SecMap
* sm
= get_secmap_for_writing(a
);
782 UWord sm_off
= SM_OFF(a
);
783 sm
->vabits8
[sm_off
] = vabits8
;
787 // Forward declarations
788 static UWord
get_sec_vbits8(Addr a
);
789 static void set_sec_vbits8(Addr a
, UWord vbits8
);
791 // Returns False if there was an addressability error.
793 Bool
set_vbits8 ( Addr a
, UChar vbits8
)
796 UChar vabits2
= get_vabits2(a
);
797 if ( VA_BITS2_NOACCESS
!= vabits2
) {
798 // Addressable. Convert in-register format to in-memory format.
799 // Also remove any existing sec V bit entry for the byte if no
801 if ( V_BITS8_DEFINED
== vbits8
) { vabits2
= VA_BITS2_DEFINED
; }
802 else if ( V_BITS8_UNDEFINED
== vbits8
) { vabits2
= VA_BITS2_UNDEFINED
; }
803 else { vabits2
= VA_BITS2_PARTDEFINED
;
804 set_sec_vbits8(a
, vbits8
); }
805 set_vabits2(a
, vabits2
);
808 // Unaddressable! Do nothing -- when writing to unaddressable
809 // memory it acts as a black hole, and the V bits can never be seen
810 // again. So we don't have to write them at all.
816 // Returns False if there was an addressability error. In that case, we put
817 // all defined bits into vbits8.
819 Bool
get_vbits8 ( Addr a
, UChar
* vbits8
)
822 UChar vabits2
= get_vabits2(a
);
824 // Convert the in-memory format to in-register format.
825 if ( VA_BITS2_DEFINED
== vabits2
) { *vbits8
= V_BITS8_DEFINED
; }
826 else if ( VA_BITS2_UNDEFINED
== vabits2
) { *vbits8
= V_BITS8_UNDEFINED
; }
827 else if ( VA_BITS2_NOACCESS
== vabits2
) {
828 *vbits8
= V_BITS8_DEFINED
; // Make V bits defined!
831 tl_assert( VA_BITS2_PARTDEFINED
== vabits2
);
832 *vbits8
= get_sec_vbits8(a
);
838 /* --------------- Secondary V bit table ------------ */
840 // This table holds the full V bit pattern for partially-defined bytes
841 // (PDBs) that are represented by VA_BITS2_PARTDEFINED in the main shadow
844 // Note: the nodes in this table can become stale. Eg. if you write a PDB,
845 // then overwrite the same address with a fully defined byte, the sec-V-bit
846 // node will not necessarily be removed. This is because checking for
847 // whether removal is necessary would slow down the fast paths.
849 // To avoid the stale nodes building up too much, we periodically (once the
850 // table reaches a certain size) garbage collect (GC) the table by
851 // traversing it and evicting any nodes not having PDB.
852 // If more than a certain proportion of nodes survived, we increase the
853 // table size so that GCs occur less often.
855 // This policy is designed to avoid bad table bloat in the worst case where
856 // a program creates huge numbers of stale PDBs -- we would get this bloat
857 // if we had no GC -- while handling well the case where a node becomes
858 // stale but shortly afterwards is rewritten with a PDB and so becomes
859 // non-stale again (which happens quite often, eg. in perf/bz2). If we just
860 // remove all stale nodes as soon as possible, we just end up re-adding a
861 // lot of them in later again. The "sufficiently stale" approach avoids
862 // this. (If a program has many live PDBs, performance will just suck,
863 // there's no way around that.)
865 // Further comments, JRS 14 Feb 2012. It turns out that the policy of
866 // holding on to stale entries for 2 GCs before discarding them can lead
867 // to massive space leaks. So we're changing to an arrangement where
868 // lines are evicted as soon as they are observed to be stale during a
869 // GC. This also has a side benefit of allowing the sufficiently_stale
870 // field to be removed from the SecVBitNode struct, reducing its size by
871 // 8 bytes, which is a substantial space saving considering that the
872 // struct was previously 32 or so bytes, on a 64 bit target.
874 // In order to try and mitigate the problem that the "sufficiently stale"
875 // heuristic was designed to avoid, the table size is allowed to drift
876 // up ("DRIFTUP") slowly to 80000, even if the residency is low. This
877 // means that nodes will exist in the table longer on average, and hopefully
878 // will be deleted and re-added less frequently.
880 // The previous scaling up mechanism (now called STEPUP) is retained:
881 // if residency exceeds 50%, the table is scaled up, although by a
882 // factor sqrt(2) rather than 2 as before. This effectively doubles the
883 // frequency of GCs when there are many PDBs at reduces the tendency of
884 // stale PDBs to reside for long periods in the table.
886 static OSet
* secVBitTable
;
889 static ULong sec_vbits_new_nodes
= 0;
890 static ULong sec_vbits_updates
= 0;
892 // This must be a power of two; this is checked in mc_pre_clo_init().
893 // The size chosen here is a trade-off: if the nodes are bigger (ie. cover
894 // a larger address range) they take more space but we can get multiple
895 // partially-defined bytes in one if they are close to each other, reducing
896 // the number of total nodes. In practice sometimes they are clustered (eg.
897 // perf/bz2 repeatedly writes then reads more than 20,000 in a contiguous
898 // row), but often not. So we choose something intermediate.
899 #define BYTES_PER_SEC_VBIT_NODE 16
901 // We make the table bigger by a factor of STEPUP_GROWTH_FACTOR if
902 // more than this many nodes survive a GC.
903 #define STEPUP_SURVIVOR_PROPORTION 0.5
904 #define STEPUP_GROWTH_FACTOR 1.414213562
906 // If the above heuristic doesn't apply, then we may make the table
907 // slightly bigger, by a factor of DRIFTUP_GROWTH_FACTOR, if more than
908 // this many nodes survive a GC, _and_ the total table size does
909 // not exceed a fixed limit. The numbers are somewhat arbitrary, but
910 // work tolerably well on long Firefox runs. The scaleup ratio of 1.5%
911 // effectively although gradually reduces residency and increases time
912 // between GCs for programs with small numbers of PDBs. The 80000 limit
913 // effectively limits the table size to around 2MB for programs with
914 // small numbers of PDBs, whilst giving a reasonably long lifetime to
915 // entries, to try and reduce the costs resulting from deleting and
916 // re-adding of entries.
917 #define DRIFTUP_SURVIVOR_PROPORTION 0.15
918 #define DRIFTUP_GROWTH_FACTOR 1.015
919 #define DRIFTUP_MAX_SIZE 80000
921 // We GC the table when it gets this many nodes in it, ie. it's effectively
922 // the table size. It can change.
923 static Int secVBitLimit
= 1000;
925 // The number of GCs done, used to age sec-V-bit nodes for eviction.
926 // Because it's unsigned, wrapping doesn't matter -- the right answer will
928 static UInt GCs_done
= 0;
933 UChar vbits8
[BYTES_PER_SEC_VBIT_NODE
];
937 static OSet
* createSecVBitTable(void)
939 OSet
* newSecVBitTable
;
940 newSecVBitTable
= VG_(OSetGen_Create_With_Pool
)
941 ( offsetof(SecVBitNode
, a
),
942 NULL
, // use fast comparisons
943 VG_(malloc
), "mc.cSVT.1 (sec VBit table)",
946 sizeof(SecVBitNode
));
947 return newSecVBitTable
;
950 static void gcSecVBitTable(void)
954 Int i
, n_nodes
= 0, n_survivors
= 0;
958 // Create the new table.
959 secVBitTable2
= createSecVBitTable();
961 // Traverse the table, moving fresh nodes into the new table.
962 VG_(OSetGen_ResetIter
)(secVBitTable
);
963 while ( (n
= VG_(OSetGen_Next
)(secVBitTable
)) ) {
964 // Keep node if any of its bytes are non-stale. Using
965 // get_vabits2() for the lookup is not very efficient, but I don't
967 for (i
= 0; i
< BYTES_PER_SEC_VBIT_NODE
; i
++) {
968 if (VA_BITS2_PARTDEFINED
== get_vabits2(n
->a
+ i
)) {
969 // Found a non-stale byte, so keep =>
970 // Insert a copy of the node into the new table.
972 VG_(OSetGen_AllocNode
)(secVBitTable2
, sizeof(SecVBitNode
));
974 VG_(OSetGen_Insert
)(secVBitTable2
, n2
);
980 // Get the before and after sizes.
981 n_nodes
= VG_(OSetGen_Size
)(secVBitTable
);
982 n_survivors
= VG_(OSetGen_Size
)(secVBitTable2
);
984 // Destroy the old table, and put the new one in its place.
985 VG_(OSetGen_Destroy
)(secVBitTable
);
986 secVBitTable
= secVBitTable2
;
988 if (VG_(clo_verbosity
) > 1 && n_nodes
!= 0) {
989 VG_(message
)(Vg_DebugMsg
, "memcheck GC: %d nodes, %d survivors (%.1f%%)\n",
990 n_nodes
, n_survivors
, n_survivors
* 100.0 / n_nodes
);
993 // Increase table size if necessary.
994 if ((Double
)n_survivors
995 > ((Double
)secVBitLimit
* STEPUP_SURVIVOR_PROPORTION
)) {
996 secVBitLimit
= (Int
)((Double
)secVBitLimit
* (Double
)STEPUP_GROWTH_FACTOR
);
997 if (VG_(clo_verbosity
) > 1)
998 VG_(message
)(Vg_DebugMsg
,
999 "memcheck GC: %d new table size (stepup)\n",
1003 if (secVBitLimit
< DRIFTUP_MAX_SIZE
1004 && (Double
)n_survivors
1005 > ((Double
)secVBitLimit
* DRIFTUP_SURVIVOR_PROPORTION
)) {
1006 secVBitLimit
= (Int
)((Double
)secVBitLimit
* (Double
)DRIFTUP_GROWTH_FACTOR
);
1007 if (VG_(clo_verbosity
) > 1)
1008 VG_(message
)(Vg_DebugMsg
,
1009 "memcheck GC: %d new table size (driftup)\n",
1014 static UWord
get_sec_vbits8(Addr a
)
1016 Addr aAligned
= VG_ROUNDDN(a
, BYTES_PER_SEC_VBIT_NODE
);
1017 Int amod
= a
% BYTES_PER_SEC_VBIT_NODE
;
1018 SecVBitNode
* n
= VG_(OSetGen_Lookup
)(secVBitTable
, &aAligned
);
1020 tl_assert2(n
, "get_sec_vbits8: no node for address %p (%p)\n", aAligned
, a
);
1021 // Shouldn't be fully defined or fully undefined -- those cases shouldn't
1022 // make it to the secondary V bits table.
1023 vbits8
= n
->vbits8
[amod
];
1024 tl_assert(V_BITS8_DEFINED
!= vbits8
&& V_BITS8_UNDEFINED
!= vbits8
);
1028 static void set_sec_vbits8(Addr a
, UWord vbits8
)
1030 Addr aAligned
= VG_ROUNDDN(a
, BYTES_PER_SEC_VBIT_NODE
);
1031 Int i
, amod
= a
% BYTES_PER_SEC_VBIT_NODE
;
1032 SecVBitNode
* n
= VG_(OSetGen_Lookup
)(secVBitTable
, &aAligned
);
1033 // Shouldn't be fully defined or fully undefined -- those cases shouldn't
1034 // make it to the secondary V bits table.
1035 tl_assert(V_BITS8_DEFINED
!= vbits8
&& V_BITS8_UNDEFINED
!= vbits8
);
1037 n
->vbits8
[amod
] = vbits8
; // update
1038 sec_vbits_updates
++;
1040 // Do a table GC if necessary. Nb: do this before creating and
1041 // inserting the new node, to avoid erroneously GC'ing the new node.
1042 if (secVBitLimit
== VG_(OSetGen_Size
)(secVBitTable
)) {
1046 // New node: assign the specific byte, make the rest invalid (they
1047 // should never be read as-is, but be cautious).
1048 n
= VG_(OSetGen_AllocNode
)(secVBitTable
, sizeof(SecVBitNode
));
1050 for (i
= 0; i
< BYTES_PER_SEC_VBIT_NODE
; i
++) {
1051 n
->vbits8
[i
] = V_BITS8_UNDEFINED
;
1053 n
->vbits8
[amod
] = vbits8
;
1055 // Insert the new node.
1056 VG_(OSetGen_Insert
)(secVBitTable
, n
);
1057 sec_vbits_new_nodes
++;
1059 n_secVBit_nodes
= VG_(OSetGen_Size
)(secVBitTable
);
1060 if (n_secVBit_nodes
> max_secVBit_nodes
)
1061 max_secVBit_nodes
= n_secVBit_nodes
;
1065 /* --------------- Endianness helpers --------------- */
1067 /* Returns the offset in memory of the byteno-th most significant byte
1068 in a wordszB-sized word, given the specified endianness. */
1069 static INLINE UWord
byte_offset_w ( UWord wordszB
, Bool bigendian
,
1071 return bigendian
? (wordszB
-1-byteno
) : byteno
;
1075 /* --------------- Ignored address ranges --------------- */
1077 /* Denotes the address-error-reportability status for address ranges:
1078 IAR_NotIgnored: the usual case -- report errors in this range
1079 IAR_CommandLine: don't report errors -- from command line setting
1080 IAR_ClientReq: don't report errors -- from client request
1083 enum { IAR_INVALID
=99,
1089 static const HChar
* showIARKind ( IARKind iark
)
1092 case IAR_INVALID
: return "INVALID";
1093 case IAR_NotIgnored
: return "NotIgnored";
1094 case IAR_CommandLine
: return "CommandLine";
1095 case IAR_ClientReq
: return "ClientReq";
1096 default: return "???";
1100 // RangeMap<IARKind>
1101 static RangeMap
* gIgnoredAddressRanges
= NULL
;
1103 static void init_gIgnoredAddressRanges ( void )
1105 if (LIKELY(gIgnoredAddressRanges
!= NULL
))
1107 gIgnoredAddressRanges
= VG_(newRangeMap
)( VG_(malloc
), "mc.igIAR.1",
1108 VG_(free
), IAR_NotIgnored
);
1111 Bool
MC_(in_ignored_range
) ( Addr a
)
1113 if (LIKELY(gIgnoredAddressRanges
== NULL
))
1115 UWord how
= IAR_INVALID
;
1116 UWord key_min
= ~(UWord
)0;
1117 UWord key_max
= (UWord
)0;
1118 VG_(lookupRangeMap
)(&key_min
, &key_max
, &how
, gIgnoredAddressRanges
, a
);
1119 tl_assert(key_min
<= a
&& a
<= key_max
);
1121 case IAR_NotIgnored
: return False
;
1122 case IAR_CommandLine
: return True
;
1123 case IAR_ClientReq
: return True
;
1124 default: break; /* invalid */
1126 VG_(tool_panic
)("MC_(in_ignore_range)");
1130 Bool
MC_(in_ignored_range_below_sp
) ( Addr sp
, Addr a
, UInt szB
)
1132 if (LIKELY(!MC_(clo_ignore_range_below_sp
)))
1134 tl_assert(szB
>= 1 && szB
<= 32);
1135 tl_assert(MC_(clo_ignore_range_below_sp__first_offset
)
1136 > MC_(clo_ignore_range_below_sp__last_offset
));
1137 Addr range_lo
= sp
- MC_(clo_ignore_range_below_sp__first_offset
);
1138 Addr range_hi
= sp
- MC_(clo_ignore_range_below_sp__last_offset
);
1139 if (range_lo
>= range_hi
) {
1140 /* Bizarre. We have a wraparound situation. What should we do? */
1141 return False
; // Play safe
1143 /* This is the expected case. */
1144 if (range_lo
<= a
&& a
+ szB
- 1 <= range_hi
)
1153 /* Parse two Addrs (in hex) separated by a dash, or fail. */
1155 static Bool
parse_Addr_pair ( const HChar
** ppc
, Addr
* result1
, Addr
* result2
)
1157 Bool ok
= VG_(parse_Addr
) (ppc
, result1
);
1163 ok
= VG_(parse_Addr
) (ppc
, result2
);
1169 /* Parse two UInts (32 bit unsigned, in decimal) separated by a dash,
1172 static Bool
parse_UInt_pair ( const HChar
** ppc
, UInt
* result1
, UInt
* result2
)
1174 Bool ok
= VG_(parse_UInt
) (ppc
, result1
);
1180 ok
= VG_(parse_UInt
) (ppc
, result2
);
1186 /* Parse a set of ranges separated by commas into 'ignoreRanges', or
1187 fail. If they are valid, add them to the global set of ignored
1189 static Bool
parse_ignore_ranges ( const HChar
* str0
)
1191 init_gIgnoredAddressRanges();
1192 const HChar
* str
= str0
;
1193 const HChar
** ppc
= &str
;
1195 Addr start
= ~(Addr
)0;
1197 Bool ok
= parse_Addr_pair(ppc
, &start
, &end
);
1202 VG_(bindRangeMap
)( gIgnoredAddressRanges
, start
, end
, IAR_CommandLine
);
1213 /* Add or remove [start, +len) from the set of ignored ranges. */
1214 static Bool
modify_ignore_ranges ( Bool addRange
, Addr start
, Addr len
)
1216 init_gIgnoredAddressRanges();
1217 const Bool verbose
= (VG_(clo_verbosity
) > 1);
1222 VG_(bindRangeMap
)(gIgnoredAddressRanges
,
1223 start
, start
+len
-1, IAR_ClientReq
);
1225 VG_(dmsg
)("memcheck: modify_ignore_ranges: add %p %p\n",
1226 (void*)start
, (void*)(start
+len
-1));
1228 VG_(bindRangeMap
)(gIgnoredAddressRanges
,
1229 start
, start
+len
-1, IAR_NotIgnored
);
1231 VG_(dmsg
)("memcheck: modify_ignore_ranges: del %p %p\n",
1232 (void*)start
, (void*)(start
+len
-1));
1235 VG_(dmsg
)("memcheck: now have %u ranges:\n",
1236 VG_(sizeRangeMap
)(gIgnoredAddressRanges
));
1238 for (i
= 0; i
< VG_(sizeRangeMap
)(gIgnoredAddressRanges
); i
++) {
1239 UWord val
= IAR_INVALID
;
1240 UWord key_min
= ~(UWord
)0;
1241 UWord key_max
= (UWord
)0;
1242 VG_(indexRangeMap
)( &key_min
, &key_max
, &val
,
1243 gIgnoredAddressRanges
, i
);
1244 VG_(dmsg
)("memcheck: [%u] %016lx-%016lx %s\n",
1245 i
, key_min
, key_max
, showIARKind(val
));
1252 /* --------------- Load/store slow cases. --------------- */
1255 __attribute__((noinline
))
1256 void mc_LOADV_128_or_256_slow ( /*OUT*/ULong
* res
,
1257 Addr a
, SizeT nBits
, Bool bigendian
)
1259 ULong pessim
[4]; /* only used when p-l-ok=yes */
1260 SSizeT szB
= nBits
/ 8;
1261 SSizeT szL
= szB
/ 8; /* Size in Longs (64-bit units) */
1262 SSizeT i
, j
; /* Must be signed. */
1263 SizeT n_addrs_bad
= 0;
1268 /* Code below assumes load size is a power of two and at least 64
1270 tl_assert((szB
& (szB
-1)) == 0 && szL
> 0);
1272 /* If this triggers, you probably just need to increase the size of
1273 the pessim array. */
1274 tl_assert(szL
<= sizeof(pessim
) / sizeof(pessim
[0]));
1276 for (j
= 0; j
< szL
; j
++) {
1277 pessim
[j
] = V_BITS64_DEFINED
;
1278 res
[j
] = V_BITS64_UNDEFINED
;
1281 /* Make up a result V word, which contains the loaded data for
1282 valid addresses and Defined for invalid addresses. Iterate over
1283 the bytes in the word, from the most significant down to the
1284 least. The vbits to return are calculated into vbits128. Also
1285 compute the pessimising value to be used when
1286 --partial-loads-ok=yes. n_addrs_bad is redundant (the relevant
1287 info can be gleaned from the pessim array) but is used as a
1289 for (j
= szL
-1; j
>= 0; j
--) {
1290 ULong vbits64
= V_BITS64_UNDEFINED
;
1291 ULong pessim64
= V_BITS64_DEFINED
;
1292 UWord long_index
= byte_offset_w(szL
, bigendian
, j
);
1293 for (i
= 8-1; i
>= 0; i
--) {
1294 PROF_EVENT(MCPE_LOADV_128_OR_256_SLOW_LOOP
);
1295 ai
= a
+ 8*long_index
+ byte_offset_w(8, bigendian
, i
);
1296 ok
= get_vbits8(ai
, &vbits8
);
1299 if (!ok
) n_addrs_bad
++;
1301 pessim64
|= (ok
? V_BITS8_DEFINED
: V_BITS8_UNDEFINED
);
1303 res
[long_index
] = vbits64
;
1304 pessim
[long_index
] = pessim64
;
1307 /* In the common case, all the addresses involved are valid, so we
1308 just return the computed V bits and have done. */
1309 if (LIKELY(n_addrs_bad
== 0))
1312 /* If there's no possibility of getting a partial-loads-ok
1313 exemption, report the error and quit. */
1314 if (!MC_(clo_partial_loads_ok
)) {
1315 MC_(record_address_error
)( VG_(get_running_tid
)(), a
, szB
, False
);
1319 /* The partial-loads-ok excemption might apply. Find out if it
1320 does. If so, don't report an addressing error, but do return
1321 Undefined for the bytes that are out of range, so as to avoid
1322 false negatives. If it doesn't apply, just report an addressing
1323 error in the usual way. */
1325 /* Some code steps along byte strings in aligned chunks
1326 even when there is only a partially defined word at the end (eg,
1327 optimised strlen). This is allowed by the memory model of
1328 modern machines, since an aligned load cannot span two pages and
1329 thus cannot "partially fault".
1331 Therefore, a load from a partially-addressible place is allowed
1332 if all of the following hold:
1333 - the command-line flag is set [by default, it isn't]
1334 - it's an aligned load
1335 - at least one of the addresses in the word *is* valid
1337 Since this suppresses the addressing error, we avoid false
1338 negatives by marking bytes undefined when they come from an
1342 /* "at least one of the addresses is invalid" */
1344 for (j
= 0; j
< szL
; j
++)
1345 ok
|= pessim
[j
] != V_BITS64_DEFINED
;
1348 if (0 == (a
& (szB
- 1)) && n_addrs_bad
< szB
) {
1349 /* Exemption applies. Use the previously computed pessimising
1350 value and return the combined result, but don't flag an
1351 addressing error. The pessimising value is Defined for valid
1352 addresses and Undefined for invalid addresses. */
1353 /* for assumption that doing bitwise or implements UifU */
1354 tl_assert(V_BIT_UNDEFINED
== 1 && V_BIT_DEFINED
== 0);
1355 /* (really need "UifU" here...)
1356 vbits[j] UifU= pessim[j] (is pessimised by it, iow) */
1357 for (j
= szL
-1; j
>= 0; j
--)
1358 res
[j
] |= pessim
[j
];
1362 /* Exemption doesn't apply. Flag an addressing error in the normal
1364 MC_(record_address_error
)( VG_(get_running_tid
)(), a
, szB
, False
);
1369 __attribute__((noinline
))
1370 __attribute__((used
))
1371 VG_REGPARM(3) /* make sure we're using a fixed calling convention, since
1372 this function may get called from hand written assembly. */
1373 ULong
mc_LOADVn_slow ( Addr a
, SizeT nBits
, Bool bigendian
)
1375 PROF_EVENT(MCPE_LOADVN_SLOW
);
1377 /* ------------ BEGIN semi-fast cases ------------ */
1378 /* These deal quickly-ish with the common auxiliary primary map
1379 cases on 64-bit platforms. Are merely a speedup hack; can be
1380 omitted without loss of correctness/functionality. Note that in
1381 both cases the "sizeof(void*) == 8" causes these cases to be
1382 folded out by compilers on 32-bit platforms. These are derived
1383 from LOADV64 and LOADV32.
1385 if (LIKELY(sizeof(void*) == 8
1386 && nBits
== 64 && VG_IS_8_ALIGNED(a
))) {
1387 SecMap
* sm
= get_secmap_for_reading(a
);
1388 UWord sm_off16
= SM_OFF_16(a
);
1389 UWord vabits16
= sm
->vabits16
[sm_off16
];
1390 if (LIKELY(vabits16
== VA_BITS16_DEFINED
))
1391 return V_BITS64_DEFINED
;
1392 if (LIKELY(vabits16
== VA_BITS16_UNDEFINED
))
1393 return V_BITS64_UNDEFINED
;
1394 /* else fall into the slow case */
1396 if (LIKELY(sizeof(void*) == 8
1397 && nBits
== 32 && VG_IS_4_ALIGNED(a
))) {
1398 SecMap
* sm
= get_secmap_for_reading(a
);
1399 UWord sm_off
= SM_OFF(a
);
1400 UWord vabits8
= sm
->vabits8
[sm_off
];
1401 if (LIKELY(vabits8
== VA_BITS8_DEFINED
))
1402 return ((UWord
)0xFFFFFFFF00000000ULL
| (UWord
)V_BITS32_DEFINED
);
1403 if (LIKELY(vabits8
== VA_BITS8_UNDEFINED
))
1404 return ((UWord
)0xFFFFFFFF00000000ULL
| (UWord
)V_BITS32_UNDEFINED
);
1405 /* else fall into slow case */
1407 /* ------------ END semi-fast cases ------------ */
1409 ULong vbits64
= V_BITS64_UNDEFINED
; /* result */
1410 ULong pessim64
= V_BITS64_DEFINED
; /* only used when p-l-ok=yes */
1411 SSizeT szB
= nBits
/ 8;
1412 SSizeT i
; /* Must be signed. */
1413 SizeT n_addrs_bad
= 0;
1418 tl_assert(nBits
== 64 || nBits
== 32 || nBits
== 16 || nBits
== 8);
1420 /* Make up a 64-bit result V word, which contains the loaded data
1421 for valid addresses and Defined for invalid addresses. Iterate
1422 over the bytes in the word, from the most significant down to
1423 the least. The vbits to return are calculated into vbits64.
1424 Also compute the pessimising value to be used when
1425 --partial-loads-ok=yes. n_addrs_bad is redundant (the relevant
1426 info can be gleaned from pessim64) but is used as a
1428 for (i
= szB
-1; i
>= 0; i
--) {
1429 PROF_EVENT(MCPE_LOADVN_SLOW_LOOP
);
1430 ai
= a
+ byte_offset_w(szB
, bigendian
, i
);
1431 ok
= get_vbits8(ai
, &vbits8
);
1434 if (!ok
) n_addrs_bad
++;
1436 pessim64
|= (ok
? V_BITS8_DEFINED
: V_BITS8_UNDEFINED
);
1439 /* In the common case, all the addresses involved are valid, so we
1440 just return the computed V bits and have done. */
1441 if (LIKELY(n_addrs_bad
== 0))
1444 /* If there's no possibility of getting a partial-loads-ok
1445 exemption, report the error and quit. */
1446 if (!MC_(clo_partial_loads_ok
)) {
1447 MC_(record_address_error
)( VG_(get_running_tid
)(), a
, szB
, False
);
1451 /* The partial-loads-ok excemption might apply. Find out if it
1452 does. If so, don't report an addressing error, but do return
1453 Undefined for the bytes that are out of range, so as to avoid
1454 false negatives. If it doesn't apply, just report an addressing
1455 error in the usual way. */
1457 /* Some code steps along byte strings in aligned word-sized chunks
1458 even when there is only a partially defined word at the end (eg,
1459 optimised strlen). This is allowed by the memory model of
1460 modern machines, since an aligned load cannot span two pages and
1461 thus cannot "partially fault". Despite such behaviour being
1462 declared undefined by ANSI C/C++.
1464 Therefore, a load from a partially-addressible place is allowed
1465 if all of the following hold:
1466 - the command-line flag is set [by default, it isn't]
1467 - it's a word-sized, word-aligned load
1468 - at least one of the addresses in the word *is* valid
1470 Since this suppresses the addressing error, we avoid false
1471 negatives by marking bytes undefined when they come from an
1475 /* "at least one of the addresses is invalid" */
1476 tl_assert(pessim64
!= V_BITS64_DEFINED
);
1478 if (szB
== VG_WORDSIZE
&& VG_IS_WORD_ALIGNED(a
)
1479 && n_addrs_bad
< VG_WORDSIZE
) {
1480 /* Exemption applies. Use the previously computed pessimising
1481 value for vbits64 and return the combined result, but don't
1482 flag an addressing error. The pessimising value is Defined
1483 for valid addresses and Undefined for invalid addresses. */
1484 /* for assumption that doing bitwise or implements UifU */
1485 tl_assert(V_BIT_UNDEFINED
== 1 && V_BIT_DEFINED
== 0);
1486 /* (really need "UifU" here...)
1487 vbits64 UifU= pessim64 (is pessimised by it, iow) */
1488 vbits64
|= pessim64
;
1492 /* Also, in appears that gcc generates string-stepping code in
1493 32-bit chunks on 64 bit platforms. So, also grant an exception
1494 for this case. Note that the first clause of the conditional
1495 (VG_WORDSIZE == 8) is known at compile time, so the whole clause
1496 will get folded out in 32 bit builds. */
1497 if (VG_WORDSIZE
== 8
1498 && VG_IS_4_ALIGNED(a
) && nBits
== 32 && n_addrs_bad
< 4) {
1499 tl_assert(V_BIT_UNDEFINED
== 1 && V_BIT_DEFINED
== 0);
1500 /* (really need "UifU" here...)
1501 vbits64 UifU= pessim64 (is pessimised by it, iow) */
1502 vbits64
|= pessim64
;
1503 /* Mark the upper 32 bits as undefined, just to be on the safe
1505 vbits64
|= (((ULong
)V_BITS32_UNDEFINED
) << 32);
1509 /* Exemption doesn't apply. Flag an addressing error in the normal
1511 MC_(record_address_error
)( VG_(get_running_tid
)(), a
, szB
, False
);
1518 __attribute__((noinline
))
1519 void mc_STOREVn_slow ( Addr a
, SizeT nBits
, ULong vbytes
, Bool bigendian
)
1521 SizeT szB
= nBits
/ 8;
1522 SizeT i
, n_addrs_bad
= 0;
1527 PROF_EVENT(MCPE_STOREVN_SLOW
);
1529 /* ------------ BEGIN semi-fast cases ------------ */
1530 /* These deal quickly-ish with the common auxiliary primary map
1531 cases on 64-bit platforms. Are merely a speedup hack; can be
1532 omitted without loss of correctness/functionality. Note that in
1533 both cases the "sizeof(void*) == 8" causes these cases to be
1534 folded out by compilers on 32-bit platforms. The logic below
1535 is somewhat similar to some cases extensively commented in
1536 MC_(helperc_STOREV8).
1538 if (LIKELY(sizeof(void*) == 8
1539 && nBits
== 64 && VG_IS_8_ALIGNED(a
))) {
1540 SecMap
* sm
= get_secmap_for_reading(a
);
1541 UWord sm_off16
= SM_OFF_16(a
);
1542 UWord vabits16
= sm
->vabits16
[sm_off16
];
1543 if (LIKELY( !is_distinguished_sm(sm
) &&
1544 (VA_BITS16_DEFINED
== vabits16
||
1545 VA_BITS16_UNDEFINED
== vabits16
) )) {
1546 /* Handle common case quickly: a is suitably aligned, */
1547 /* is mapped, and is addressible. */
1548 // Convert full V-bits in register to compact 2-bit form.
1549 if (LIKELY(V_BITS64_DEFINED
== vbytes
)) {
1550 sm
->vabits16
[sm_off16
] = VA_BITS16_DEFINED
;
1552 } else if (V_BITS64_UNDEFINED
== vbytes
) {
1553 sm
->vabits16
[sm_off16
] = VA_BITS16_UNDEFINED
;
1556 /* else fall into the slow case */
1558 /* else fall into the slow case */
1560 if (LIKELY(sizeof(void*) == 8
1561 && nBits
== 32 && VG_IS_4_ALIGNED(a
))) {
1562 SecMap
* sm
= get_secmap_for_reading(a
);
1563 UWord sm_off
= SM_OFF(a
);
1564 UWord vabits8
= sm
->vabits8
[sm_off
];
1565 if (LIKELY( !is_distinguished_sm(sm
) &&
1566 (VA_BITS8_DEFINED
== vabits8
||
1567 VA_BITS8_UNDEFINED
== vabits8
) )) {
1568 /* Handle common case quickly: a is suitably aligned, */
1569 /* is mapped, and is addressible. */
1570 // Convert full V-bits in register to compact 2-bit form.
1571 if (LIKELY(V_BITS32_DEFINED
== (vbytes
& 0xFFFFFFFF))) {
1572 sm
->vabits8
[sm_off
] = VA_BITS8_DEFINED
;
1574 } else if (V_BITS32_UNDEFINED
== (vbytes
& 0xFFFFFFFF)) {
1575 sm
->vabits8
[sm_off
] = VA_BITS8_UNDEFINED
;
1578 /* else fall into the slow case */
1580 /* else fall into the slow case */
1582 /* ------------ END semi-fast cases ------------ */
1584 tl_assert(nBits
== 64 || nBits
== 32 || nBits
== 16 || nBits
== 8);
1586 /* Dump vbytes in memory, iterating from least to most significant
1587 byte. At the same time establish addressibility of the location. */
1588 for (i
= 0; i
< szB
; i
++) {
1589 PROF_EVENT(MCPE_STOREVN_SLOW_LOOP
);
1590 ai
= a
+ byte_offset_w(szB
, bigendian
, i
);
1591 vbits8
= vbytes
& 0xff;
1592 ok
= set_vbits8(ai
, vbits8
);
1593 if (!ok
) n_addrs_bad
++;
1597 /* If an address error has happened, report it. */
1598 if (n_addrs_bad
> 0)
1599 MC_(record_address_error
)( VG_(get_running_tid
)(), a
, szB
, True
);
1603 /*------------------------------------------------------------*/
1604 /*--- Setting permissions over address ranges. ---*/
1605 /*------------------------------------------------------------*/
1607 static void set_address_range_perms ( Addr a
, SizeT lenT
, UWord vabits16
,
1610 UWord sm_off
, sm_off16
;
1611 UWord vabits2
= vabits16
& 0x3;
1612 SizeT lenA
, lenB
, len_to_next_secmap
;
1616 SecMap
* example_dsm
;
1618 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS
);
1620 /* Check the V+A bits make sense. */
1621 tl_assert(VA_BITS16_NOACCESS
== vabits16
||
1622 VA_BITS16_UNDEFINED
== vabits16
||
1623 VA_BITS16_DEFINED
== vabits16
);
1625 // This code should never write PDBs; ensure this. (See comment above
1627 tl_assert(VA_BITS2_PARTDEFINED
!= vabits2
);
1632 if (lenT
> 256 * 1024 * 1024) {
1633 if (VG_(clo_verbosity
) > 0 && !VG_(clo_xml
)) {
1634 const HChar
* s
= "unknown???";
1635 if (vabits16
== VA_BITS16_NOACCESS
) s
= "noaccess";
1636 if (vabits16
== VA_BITS16_UNDEFINED
) s
= "undefined";
1637 if (vabits16
== VA_BITS16_DEFINED
) s
= "defined";
1638 VG_(message
)(Vg_UserMsg
, "Warning: set address range perms: "
1639 "large range [0x%lx, 0x%lx) (%s)\n",
1644 #ifndef PERF_FAST_SARP
1645 /*------------------ debug-only case ------------------ */
1647 // Endianness doesn't matter here because all bytes are being set to
1649 // Nb: We don't have to worry about updating the sec-V-bits table
1650 // after these set_vabits2() calls because this code never writes
1651 // VA_BITS2_PARTDEFINED values.
1653 for (i
= 0; i
< lenT
; i
++) {
1654 set_vabits2(a
+ i
, vabits2
);
1660 /*------------------ standard handling ------------------ */
1662 /* Get the distinguished secondary that we might want
1663 to use (part of the space-compression scheme). */
1664 example_dsm
= &sm_distinguished
[dsm_num
];
1666 // We have to handle ranges covering various combinations of partial and
1667 // whole sec-maps. Here is how parts 1, 2 and 3 are used in each case.
1668 // Cases marked with a '*' are common.
1672 // * one partial sec-map (p) 1
1673 // - one whole sec-map (P) 2
1675 // * two partial sec-maps (pp) 1,3
1676 // - one partial, one whole sec-map (pP) 1,2
1677 // - one whole, one partial sec-map (Pp) 2,3
1678 // - two whole sec-maps (PP) 2,2
1680 // * one partial, one whole, one partial (pPp) 1,2,3
1681 // - one partial, two whole (pPP) 1,2,2
1682 // - two whole, one partial (PPp) 2,2,3
1683 // - three whole (PPP) 2,2,2
1685 // * one partial, N-2 whole, one partial (pP...Pp) 1,2...2,3
1686 // - one partial, N-1 whole (pP...PP) 1,2...2,2
1687 // - N-1 whole, one partial (PP...Pp) 2,2...2,3
1688 // - N whole (PP...PP) 2,2...2,3
1690 // Break up total length (lenT) into two parts: length in the first
1691 // sec-map (lenA), and the rest (lenB); lenT == lenA + lenB.
1692 aNext
= start_of_this_sm(a
) + SM_SIZE
;
1693 len_to_next_secmap
= aNext
- a
;
1694 if ( lenT
<= len_to_next_secmap
) {
1695 // Range entirely within one sec-map. Covers almost all cases.
1696 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_SINGLE_SECMAP
);
1699 } else if (is_start_of_sm(a
)) {
1700 // Range spans at least one whole sec-map, and starts at the beginning
1701 // of a sec-map; skip to Part 2.
1702 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_STARTOF_SECMAP
);
1707 // Range spans two or more sec-maps, first one is partial.
1708 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_MULTIPLE_SECMAPS
);
1709 lenA
= len_to_next_secmap
;
1713 //------------------------------------------------------------------------
1714 // Part 1: Deal with the first sec_map. Most of the time the range will be
1715 // entirely within a sec_map and this part alone will suffice. Also,
1716 // doing it this way lets us avoid repeatedly testing for the crossing of
1717 // a sec-map boundary within these loops.
1718 //------------------------------------------------------------------------
1720 // If it's distinguished, make it undistinguished if necessary.
1721 sm_ptr
= get_secmap_ptr(a
);
1722 if (is_distinguished_sm(*sm_ptr
)) {
1723 if (*sm_ptr
== example_dsm
) {
1724 // Sec-map already has the V+A bits that we want, so skip.
1725 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1_QUICK
);
1729 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1
);
1730 *sm_ptr
= copy_for_writing(*sm_ptr
);
1737 if (VG_IS_8_ALIGNED(a
)) break;
1738 if (lenA
< 1) break;
1739 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1A
);
1741 insert_vabits2_into_vabits8( a
, vabits2
, &(sm
->vabits8
[sm_off
]) );
1745 // 8-aligned, 8 byte steps
1747 if (lenA
< 8) break;
1748 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8A
);
1749 sm_off16
= SM_OFF_16(a
);
1750 sm
->vabits16
[sm_off16
] = vabits16
;
1756 if (lenA
< 1) break;
1757 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1B
);
1759 insert_vabits2_into_vabits8( a
, vabits2
, &(sm
->vabits8
[sm_off
]) );
1764 // We've finished the first sec-map. Is that it?
1768 //------------------------------------------------------------------------
1769 // Part 2: Fast-set entire sec-maps at a time.
1770 //------------------------------------------------------------------------
1772 // 64KB-aligned, 64KB steps.
1773 // Nb: we can reach here with lenB < SM_SIZE
1774 tl_assert(0 == lenA
);
1776 if (lenB
< SM_SIZE
) break;
1777 tl_assert(is_start_of_sm(a
));
1778 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K
);
1779 sm_ptr
= get_secmap_ptr(a
);
1780 if (!is_distinguished_sm(*sm_ptr
)) {
1781 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K_FREE_DIST_SM
);
1782 // Free the non-distinguished sec-map that we're replacing. This
1783 // case happens moderately often, enough to be worthwhile.
1784 SysRes sres
= VG_(am_munmap_valgrind
)((Addr
)*sm_ptr
, sizeof(SecMap
));
1785 tl_assert2(! sr_isError(sres
), "SecMap valgrind munmap failure\n");
1787 update_SM_counts(*sm_ptr
, example_dsm
);
1788 // Make the sec-map entry point to the example DSM
1789 *sm_ptr
= example_dsm
;
1794 // We've finished the whole sec-maps. Is that it?
1798 //------------------------------------------------------------------------
1799 // Part 3: Finish off the final partial sec-map, if necessary.
1800 //------------------------------------------------------------------------
1802 tl_assert(is_start_of_sm(a
) && lenB
< SM_SIZE
);
1804 // If it's distinguished, make it undistinguished if necessary.
1805 sm_ptr
= get_secmap_ptr(a
);
1806 if (is_distinguished_sm(*sm_ptr
)) {
1807 if (*sm_ptr
== example_dsm
) {
1808 // Sec-map already has the V+A bits that we want, so stop.
1809 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2_QUICK
);
1812 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2
);
1813 *sm_ptr
= copy_for_writing(*sm_ptr
);
1818 // 8-aligned, 8 byte steps
1820 if (lenB
< 8) break;
1821 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8B
);
1822 sm_off16
= SM_OFF_16(a
);
1823 sm
->vabits16
[sm_off16
] = vabits16
;
1829 if (lenB
< 1) return;
1830 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1C
);
1832 insert_vabits2_into_vabits8( a
, vabits2
, &(sm
->vabits8
[sm_off
]) );
1839 /* --- Set permissions for arbitrary address ranges --- */
1841 void MC_(make_mem_noaccess
) ( Addr a
, SizeT len
)
1843 PROF_EVENT(MCPE_MAKE_MEM_NOACCESS
);
1844 DEBUG("MC_(make_mem_noaccess)(%p, %lu)\n", a
, len
);
1845 set_address_range_perms ( a
, len
, VA_BITS16_NOACCESS
, SM_DIST_NOACCESS
);
1846 if (UNLIKELY( MC_(clo_mc_level
) == 3 ))
1847 ocache_sarp_Clear_Origins ( a
, len
);
1850 static void make_mem_undefined ( Addr a
, SizeT len
)
1852 PROF_EVENT(MCPE_MAKE_MEM_UNDEFINED
);
1853 DEBUG("make_mem_undefined(%p, %lu)\n", a
, len
);
1854 set_address_range_perms ( a
, len
, VA_BITS16_UNDEFINED
, SM_DIST_UNDEFINED
);
1857 void MC_(make_mem_undefined_w_otag
) ( Addr a
, SizeT len
, UInt otag
)
1859 PROF_EVENT(MCPE_MAKE_MEM_UNDEFINED_W_OTAG
);
1860 DEBUG("MC_(make_mem_undefined)(%p, %lu)\n", a
, len
);
1861 set_address_range_perms ( a
, len
, VA_BITS16_UNDEFINED
, SM_DIST_UNDEFINED
);
1862 if (UNLIKELY( MC_(clo_mc_level
) == 3 ))
1863 ocache_sarp_Set_Origins ( a
, len
, otag
);
1867 void make_mem_undefined_w_tid_and_okind ( Addr a
, SizeT len
,
1868 ThreadId tid
, UInt okind
)
1872 /* VG_(record_ExeContext) checks for validity of tid, and asserts
1873 if it is invalid. So no need to do it here. */
1874 tl_assert(okind
<= 3);
1875 here
= VG_(record_ExeContext
)( tid
, 0/*first_ip_delta*/ );
1877 ecu
= VG_(get_ECU_from_ExeContext
)(here
);
1878 tl_assert(VG_(is_plausible_ECU
)(ecu
));
1879 MC_(make_mem_undefined_w_otag
) ( a
, len
, ecu
| okind
);
1883 void mc_new_mem_w_tid_make_ECU ( Addr a
, SizeT len
, ThreadId tid
)
1885 make_mem_undefined_w_tid_and_okind ( a
, len
, tid
, MC_OKIND_UNKNOWN
);
1889 void mc_new_mem_w_tid_no_ECU ( Addr a
, SizeT len
, ThreadId tid
)
1891 MC_(make_mem_undefined_w_otag
) ( a
, len
, MC_OKIND_UNKNOWN
);
1894 void MC_(make_mem_defined
) ( Addr a
, SizeT len
)
1896 PROF_EVENT(MCPE_MAKE_MEM_DEFINED
);
1897 DEBUG("MC_(make_mem_defined)(%p, %lu)\n", a
, len
);
1898 set_address_range_perms ( a
, len
, VA_BITS16_DEFINED
, SM_DIST_DEFINED
);
1899 if (UNLIKELY( MC_(clo_mc_level
) == 3 ))
1900 ocache_sarp_Clear_Origins ( a
, len
);
1903 __attribute__((unused
))
1904 static void make_mem_defined_w_tid ( Addr a
, SizeT len
, ThreadId tid
)
1906 MC_(make_mem_defined
)(a
, len
);
1909 /* For each byte in [a,a+len), if the byte is addressable, make it be
1910 defined, but if it isn't addressible, leave it alone. In other
1911 words a version of MC_(make_mem_defined) that doesn't mess with
1912 addressibility. Low-performance implementation. */
1913 static void make_mem_defined_if_addressable ( Addr a
, SizeT len
)
1917 DEBUG("make_mem_defined_if_addressable(%p, %llu)\n", a
, (ULong
)len
);
1918 for (i
= 0; i
< len
; i
++) {
1919 vabits2
= get_vabits2( a
+i
);
1920 if (LIKELY(VA_BITS2_NOACCESS
!= vabits2
)) {
1921 set_vabits2(a
+i
, VA_BITS2_DEFINED
);
1922 if (UNLIKELY(MC_(clo_mc_level
) >= 3)) {
1923 MC_(helperc_b_store1
)( a
+i
, 0 ); /* clear the origin tag */
1929 /* Similarly (needed for mprotect handling ..) */
1930 static void make_mem_defined_if_noaccess ( Addr a
, SizeT len
)
1934 DEBUG("make_mem_defined_if_noaccess(%p, %llu)\n", a
, (ULong
)len
);
1935 for (i
= 0; i
< len
; i
++) {
1936 vabits2
= get_vabits2( a
+i
);
1937 if (LIKELY(VA_BITS2_NOACCESS
== vabits2
)) {
1938 set_vabits2(a
+i
, VA_BITS2_DEFINED
);
1939 if (UNLIKELY(MC_(clo_mc_level
) >= 3)) {
1940 MC_(helperc_b_store1
)( a
+i
, 0 ); /* clear the origin tag */
1946 /* --- Block-copy permissions (needed for implementing realloc() and
1949 void MC_(copy_address_range_state
) ( Addr src
, Addr dst
, SizeT len
)
1952 UChar vabits2
, vabits8
;
1953 Bool aligned
, nooverlap
;
1955 DEBUG("MC_(copy_address_range_state)\n");
1956 PROF_EVENT(MCPE_COPY_ADDRESS_RANGE_STATE
);
1958 if (len
== 0 || src
== dst
)
1961 aligned
= VG_IS_4_ALIGNED(src
) && VG_IS_4_ALIGNED(dst
);
1962 nooverlap
= src
+len
<= dst
|| dst
+len
<= src
;
1964 if (nooverlap
&& aligned
) {
1966 /* Vectorised fast case, when no overlap and suitably aligned */
1970 vabits8
= get_vabits8_for_aligned_word32( src
+i
);
1971 set_vabits8_for_aligned_word32( dst
+i
, vabits8
);
1972 if (LIKELY(VA_BITS8_DEFINED
== vabits8
1973 || VA_BITS8_UNDEFINED
== vabits8
1974 || VA_BITS8_NOACCESS
== vabits8
)) {
1977 /* have to copy secondary map info */
1978 if (VA_BITS2_PARTDEFINED
== get_vabits2( src
+i
+0 ))
1979 set_sec_vbits8( dst
+i
+0, get_sec_vbits8( src
+i
+0 ) );
1980 if (VA_BITS2_PARTDEFINED
== get_vabits2( src
+i
+1 ))
1981 set_sec_vbits8( dst
+i
+1, get_sec_vbits8( src
+i
+1 ) );
1982 if (VA_BITS2_PARTDEFINED
== get_vabits2( src
+i
+2 ))
1983 set_sec_vbits8( dst
+i
+2, get_sec_vbits8( src
+i
+2 ) );
1984 if (VA_BITS2_PARTDEFINED
== get_vabits2( src
+i
+3 ))
1985 set_sec_vbits8( dst
+i
+3, get_sec_vbits8( src
+i
+3 ) );
1992 vabits2
= get_vabits2( src
+i
);
1993 set_vabits2( dst
+i
, vabits2
);
1994 if (VA_BITS2_PARTDEFINED
== vabits2
) {
1995 set_sec_vbits8( dst
+i
, get_sec_vbits8( src
+i
) );
2003 /* We have to do things the slow way */
2005 for (i
= 0, j
= len
-1; i
< len
; i
++, j
--) {
2006 PROF_EVENT(MCPE_COPY_ADDRESS_RANGE_STATE_LOOP1
);
2007 vabits2
= get_vabits2( src
+j
);
2008 set_vabits2( dst
+j
, vabits2
);
2009 if (VA_BITS2_PARTDEFINED
== vabits2
) {
2010 set_sec_vbits8( dst
+j
, get_sec_vbits8( src
+j
) );
2016 for (i
= 0; i
< len
; i
++) {
2017 PROF_EVENT(MCPE_COPY_ADDRESS_RANGE_STATE_LOOP2
);
2018 vabits2
= get_vabits2( src
+i
);
2019 set_vabits2( dst
+i
, vabits2
);
2020 if (VA_BITS2_PARTDEFINED
== vabits2
) {
2021 set_sec_vbits8( dst
+i
, get_sec_vbits8( src
+i
) );
2030 /*------------------------------------------------------------*/
2031 /*--- Origin tracking stuff - cache basics ---*/
2032 /*------------------------------------------------------------*/
2034 /* AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
2035 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2037 Note that this implementation draws inspiration from the "origin
2038 tracking by value piggybacking" scheme described in "Tracking Bad
2039 Apples: Reporting the Origin of Null and Undefined Value Errors"
2040 (Michael Bond, Nicholas Nethercote, Stephen Kent, Samuel Guyer,
2041 Kathryn McKinley, OOPSLA07, Montreal, Oct 2007) but in fact it is
2042 implemented completely differently.
2044 Origin tags and ECUs -- about the shadow values
2045 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2047 This implementation tracks the defining point of all uninitialised
2048 values using so called "origin tags", which are 32-bit integers,
2049 rather than using the values themselves to encode the origins. The
2050 latter, so-called value piggybacking", is what the OOPSLA07 paper
2053 Origin tags, as tracked by the machinery below, are 32-bit unsigned
2054 ints (UInts), regardless of the machine's word size. Each tag
2055 comprises an upper 30-bit ECU field and a lower 2-bit
2056 'kind' field. The ECU field is a number given out by m_execontext
2057 and has a 1-1 mapping with ExeContext*s. An ECU can be used
2058 directly as an origin tag (otag), but in fact we want to put
2059 additional information 'kind' field to indicate roughly where the
2060 tag came from. This helps print more understandable error messages
2061 for the user -- it has no other purpose. In summary:
2063 * Both ECUs and origin tags are represented as 32-bit words
2065 * m_execontext and the core-tool interface deal purely in ECUs.
2066 They have no knowledge of origin tags - that is a purely
2067 Memcheck-internal matter.
2069 * all valid ECUs have the lowest 2 bits zero and at least
2070 one of the upper 30 bits nonzero (see VG_(is_plausible_ECU))
2072 * to convert from an ECU to an otag, OR in one of the MC_OKIND_
2073 constants defined in mc_include.h.
2075 * to convert an otag back to an ECU, AND it with ~3
2077 One important fact is that no valid otag is zero. A zero otag is
2078 used by the implementation to indicate "no origin", which could
2079 mean that either the value is defined, or it is undefined but the
2080 implementation somehow managed to lose the origin.
2082 The ECU used for memory created by malloc etc is derived from the
2083 stack trace at the time the malloc etc happens. This means the
2084 mechanism can show the exact allocation point for heap-created
2085 uninitialised values.
2087 In contrast, it is simply too expensive to create a complete
2088 backtrace for each stack allocation. Therefore we merely use a
2089 depth-1 backtrace for stack allocations, which can be done once at
2090 translation time, rather than N times at run time. The result of
2091 this is that, for stack created uninitialised values, Memcheck can
2092 only show the allocating function, and not what called it.
2093 Furthermore, compilers tend to move the stack pointer just once at
2094 the start of the function, to allocate all locals, and so in fact
2095 the stack origin almost always simply points to the opening brace
2096 of the function. Net result is, for stack origins, the mechanism
2097 can tell you in which function the undefined value was created, but
2098 that's all. Users will need to carefully check all locals in the
2101 Shadowing registers and memory
2102 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2104 Memory is shadowed using a two level cache structure (ocacheL1 and
2105 ocacheL2). Memory references are first directed to ocacheL1. This
2106 is a traditional 2-way set associative cache with 32-byte lines and
2107 approximate LRU replacement within each set.
2109 A naive implementation would require storing one 32 bit otag for
2110 each byte of memory covered, a 4:1 space overhead. Instead, there
2111 is one otag for every 4 bytes of memory covered, plus a 4-bit mask
2112 that shows which of the 4 bytes have that shadow value and which
2113 have a shadow value of zero (indicating no origin). Hence a lot of
2114 space is saved, but the cost is that only one different origin per
2115 4 bytes of address space can be represented. This is a source of
2116 imprecision, but how much of a problem it really is remains to be
2119 A cache line that contains all zeroes ("no origins") contains no
2120 useful information, and can be ejected from the L1 cache "for
2121 free", in the sense that a read miss on the L1 causes a line of
2122 zeroes to be installed. However, ejecting a line containing
2123 nonzeroes risks losing origin information permanently. In order to
2124 prevent such lossage, ejected nonzero lines are placed in a
2125 secondary cache (ocacheL2), which is an OSet (AVL tree) of cache
2126 lines. This can grow arbitrarily large, and so should ensure that
2127 Memcheck runs out of memory in preference to losing useful origin
2128 info due to cache size limitations.
2130 Shadowing registers is a bit tricky, because the shadow values are
2131 32 bits, regardless of the size of the register. That gives a
2132 problem for registers smaller than 32 bits. The solution is to
2133 find spaces in the guest state that are unused, and use those to
2134 shadow guest state fragments smaller than 32 bits. For example, on
2135 ppc32/64, each vector register is 16 bytes long. If 4 bytes of the
2136 shadow are allocated for the register's otag, then there are still
2137 12 bytes left over which could be used to shadow 3 other values.
2139 This implies there is some non-obvious mapping from guest state
2140 (start,length) pairs to the relevant shadow offset (for the origin
2141 tags). And it is unfortunately guest-architecture specific. The
2142 mapping is contained in mc_machine.c, which is quite lengthy but
2145 Instrumenting the IR
2146 ~~~~~~~~~~~~~~~~~~~~
2148 Instrumentation is largely straightforward, and done by the
2149 functions schemeE and schemeS in mc_translate.c. These generate
2150 code for handling the origin tags of expressions (E) and statements
2151 (S) respectively. The rather strange names are a reference to the
2152 "compilation schemes" shown in Simon Peyton Jones' book "The
2153 Implementation of Functional Programming Languages" (Prentice Hall,
2155 http://research.microsoft.com/~simonpj/papers/slpj-book-1987/index.htm).
2157 schemeS merely arranges to move shadow values around the guest
2158 state to track the incoming IR. schemeE is largely trivial too.
2159 The only significant point is how to compute the otag corresponding
2160 to binary (or ternary, quaternary, etc) operator applications. The
2161 rule is simple: just take whichever value is larger (32-bit
2162 unsigned max). Constants get the special value zero. Hence this
2163 rule always propagates a nonzero (known) otag in preference to a
2164 zero (unknown, or more likely, value-is-defined) tag, as we want.
2165 If two different undefined values are inputs to a binary operator
2166 application, then which is propagated is arbitrary, but that
2167 doesn't matter, since the program is erroneous in using either of
2168 the values, and so there's no point in attempting to propagate
2171 Since constants are abstracted to (otag) zero, much of the
2172 instrumentation code can be folded out without difficulty by the
2173 generic post-instrumentation IR cleanup pass, using these rules:
2174 Max32U(0,x) -> x, Max32U(x,0) -> x, Max32(x,y) where x and y are
2175 constants is evaluated at JIT time. And the resulting dead code
2176 removal. In practice this causes surprisingly few Max32Us to
2177 survive through to backend code generation.
2179 Integration with the V-bits machinery
2180 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2182 This is again largely straightforward. Mostly the otag and V bits
2183 stuff are independent. The only point of interaction is when the V
2184 bits instrumenter creates a call to a helper function to report an
2185 uninitialised value error -- in that case it must first use schemeE
2186 to get hold of the origin tag expression for the value, and pass
2187 that to the helper too.
2189 There is the usual stuff to do with setting address range
2190 permissions. When memory is painted undefined, we must also know
2191 the origin tag to paint with, which involves some tedious plumbing,
2192 particularly to do with the fast case stack handlers. When memory
2193 is painted defined or noaccess then the origin tags must be forced
2196 One of the goals of the implementation was to ensure that the
2197 non-origin tracking mode isn't slowed down at all. To do this,
2198 various functions to do with memory permissions setting (again,
2199 mostly pertaining to the stack) are duplicated for the with- and
2202 Dealing with stack redzones, and the NIA cache
2203 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2205 This is one of the few non-obvious parts of the implementation.
2207 Some ABIs (amd64-ELF, ppc64-ELF, ppc32/64-XCOFF) define a small
2208 reserved area below the stack pointer, that can be used as scratch
2209 space by compiler generated code for functions. In the Memcheck
2210 sources this is referred to as the "stack redzone". The important
2211 thing here is that such redzones are considered volatile across
2212 function calls and returns. So Memcheck takes care to mark them as
2213 undefined for each call and return, on the afflicted platforms.
2214 Past experience shows this is essential in order to get reliable
2215 messages about uninitialised values that come from the stack.
2217 So the question is, when we paint a redzone undefined, what origin
2218 tag should we use for it? Consider a function f() calling g(). If
2219 we paint the redzone using an otag derived from the ExeContext of
2220 the CALL/BL instruction in f, then any errors in g causing it to
2221 use uninitialised values that happen to lie in the redzone, will be
2222 reported as having their origin in f. Which is highly confusing.
2224 The same applies for returns: if, on a return, we paint the redzone
2225 using a origin tag derived from the ExeContext of the RET/BLR
2226 instruction in g, then any later errors in f causing it to use
2227 uninitialised values in the redzone, will be reported as having
2228 their origin in g. Which is just as confusing.
2230 To do it right, in both cases we need to use an origin tag which
2231 pertains to the instruction which dynamically follows the CALL/BL
2232 or RET/BLR. In short, one derived from the NIA - the "next
2233 instruction address".
2235 To make this work, Memcheck's redzone-painting helper,
2236 MC_(helperc_MAKE_STACK_UNINIT), now takes a third argument, the
2237 NIA. It converts the NIA to a 1-element ExeContext, and uses that
2238 ExeContext's ECU as the basis for the otag used to paint the
2239 redzone. The expensive part of this is converting an NIA into an
2240 ECU, since this happens once for every call and every return. So
2241 we use a simple 511-line, 2-way set associative cache
2242 (nia_to_ecu_cache) to cache the mappings, and that knocks most of
2245 Further background comments
2246 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
2248 > Question: why is otag a UInt? Wouldn't a UWord be better? Isn't
2249 > it really just the address of the relevant ExeContext?
2251 Well, it's not the address, but a value which has a 1-1 mapping
2252 with ExeContexts, and is guaranteed not to be zero, since zero
2253 denotes (to memcheck) "unknown origin or defined value". So these
2254 UInts are just numbers starting at 4 and incrementing by 4; each
2255 ExeContext is given a number when it is created. (*** NOTE this
2256 confuses otags and ECUs; see comments above ***).
2258 Making these otags 32-bit regardless of the machine's word size
2259 makes the 64-bit implementation easier (next para). And it doesn't
2260 really limit us in any way, since for the tags to overflow would
2261 require that the program somehow caused 2^30-1 different
2262 ExeContexts to be created, in which case it is probably in deep
2263 trouble. Not to mention V will have soaked up many tens of
2264 gigabytes of memory merely to store them all.
2266 So having 64-bit origins doesn't really buy you anything, and has
2267 the following downsides:
2269 Suppose that instead, an otag is a UWord. This would mean that, on
2272 1. It becomes hard to shadow any element of guest state which is
2273 smaller than 8 bytes. To do so means you'd need to find some
2274 8-byte-sized hole in the guest state which you don't want to
2275 shadow, and use that instead to hold the otag. On ppc64, the
2276 condition code register(s) are split into 20 UChar sized pieces,
2277 all of which need to be tracked (guest_XER_SO .. guest_CR7_0)
2278 and so that would entail finding 160 bytes somewhere else in the
2281 Even on x86, I want to track origins for %AH .. %DH (bits 15:8
2282 of %EAX .. %EDX) that are separate from %AL .. %DL (bits 7:0 of
2283 same) and so I had to look for 4 untracked otag-sized areas in
2284 the guest state to make that possible.
2286 The same problem exists of course when origin tags are only 32
2287 bits, but it's less extreme.
2289 2. (More compelling) it doubles the size of the origin shadow
2290 memory. Given that the shadow memory is organised as a fixed
2291 size cache, and that accuracy of tracking is limited by origins
2292 falling out the cache due to space conflicts, this isn't good.
2294 > Another question: is the origin tracking perfect, or are there
2295 > cases where it fails to determine an origin?
2297 It is imperfect for at least for the following reasons, and
2300 * Insufficient capacity in the origin cache. When a line is
2301 evicted from the cache it is gone forever, and so subsequent
2302 queries for the line produce zero, indicating no origin
2303 information. Interestingly, a line containing all zeroes can be
2304 evicted "free" from the cache, since it contains no useful
2305 information, so there is scope perhaps for some cleverer cache
2306 management schemes. (*** NOTE, with the introduction of the
2307 second level origin tag cache, ocacheL2, this is no longer a
2310 * The origin cache only stores one otag per 32-bits of address
2311 space, plus 4 bits indicating which of the 4 bytes has that tag
2312 and which are considered defined. The result is that if two
2313 undefined bytes in the same word are stored in memory, the first
2314 stored byte's origin will be lost and replaced by the origin for
2317 * Nonzero origin tags for defined values. Consider a binary
2318 operator application op(x,y). Suppose y is undefined (and so has
2319 a valid nonzero origin tag), and x is defined, but erroneously
2320 has a nonzero origin tag (defined values should have tag zero).
2321 If the erroneous tag has a numeric value greater than y's tag,
2322 then the rule for propagating origin tags though binary
2323 operations, which is simply to take the unsigned max of the two
2324 tags, will erroneously propagate x's tag rather than y's.
2326 * Some obscure uses of x86/amd64 byte registers can cause lossage
2327 or confusion of origins. %AH .. %DH are treated as different
2328 from, and unrelated to, their parent registers, %EAX .. %EDX.
2329 So some weird sequences like
2331 movb undefined-value, %AH
2332 movb defined-value, %AL
2333 .. use %AX or %EAX ..
2335 will cause the origin attributed to %AH to be ignored, since %AL,
2336 %AX, %EAX are treated as the same register, and %AH as a
2337 completely separate one.
2339 But having said all that, it actually seems to work fairly well in
2343 static UWord stats_ocacheL1_find
= 0;
2344 static UWord stats_ocacheL1_found_at_1
= 0;
2345 static UWord stats_ocacheL1_found_at_N
= 0;
2346 static UWord stats_ocacheL1_misses
= 0;
2347 static UWord stats_ocacheL1_lossage
= 0;
2348 static UWord stats_ocacheL1_movefwds
= 0;
2350 static UWord stats__ocacheL2_refs
= 0;
2351 static UWord stats__ocacheL2_misses
= 0;
2352 static UWord stats__ocacheL2_n_nodes_max
= 0;
2354 /* Cache of 32-bit values, one every 32 bits of address space */
2356 #define OC_BITS_PER_LINE 5
2357 #define OC_W32S_PER_LINE (1 << (OC_BITS_PER_LINE - 2))
2359 static INLINE UWord
oc_line_offset ( Addr a
) {
2360 return (a
>> 2) & (OC_W32S_PER_LINE
- 1);
2362 static INLINE Bool
is_valid_oc_tag ( Addr tag
) {
2363 return 0 == (tag
& ((1 << OC_BITS_PER_LINE
) - 1));
2366 #define OC_LINES_PER_SET 2
2368 #define OC_N_SET_BITS 20
2369 #define OC_N_SETS (1 << OC_N_SET_BITS)
2371 /* These settings give:
2372 64 bit host: ocache: 100,663,296 sizeB 67,108,864 useful
2373 32 bit host: ocache: 92,274,688 sizeB 67,108,864 useful
2376 #define OC_MOVE_FORWARDS_EVERY_BITS 7
2382 UInt w32
[OC_W32S_PER_LINE
];
2383 UChar descr
[OC_W32S_PER_LINE
];
2387 /* Classify and also sanity-check 'line'. Return 'e' (empty) if not
2388 in use, 'n' (nonzero) if it contains at least one valid origin tag,
2389 and 'z' if all the represented tags are zero. */
2390 static UChar
classify_OCacheLine ( OCacheLine
* line
)
2393 if (line
->tag
== 1/*invalid*/)
2394 return 'e'; /* EMPTY */
2395 tl_assert(is_valid_oc_tag(line
->tag
));
2396 for (i
= 0; i
< OC_W32S_PER_LINE
; i
++) {
2397 tl_assert(0 == ((~0xF) & line
->descr
[i
]));
2398 if (line
->w32
[i
] > 0 && line
->descr
[i
] > 0)
2399 return 'n'; /* NONZERO - contains useful info */
2401 return 'z'; /* ZERO - no useful info */
2406 OCacheLine line
[OC_LINES_PER_SET
];
2412 OCacheSet set
[OC_N_SETS
];
2416 static OCache
* ocacheL1
= NULL
;
2417 static UWord ocacheL1_event_ctr
= 0;
2419 static void init_ocacheL2 ( void ); /* fwds */
2420 static void init_OCache ( void )
2423 tl_assert(MC_(clo_mc_level
) >= 3);
2424 tl_assert(ocacheL1
== NULL
);
2425 ocacheL1
= VG_(am_shadow_alloc
)(sizeof(OCache
));
2426 if (ocacheL1
== NULL
) {
2427 VG_(out_of_memory_NORETURN
)( "memcheck:allocating ocacheL1",
2430 tl_assert(ocacheL1
!= NULL
);
2431 for (set
= 0; set
< OC_N_SETS
; set
++) {
2432 for (line
= 0; line
< OC_LINES_PER_SET
; line
++) {
2433 ocacheL1
->set
[set
].line
[line
].tag
= 1/*invalid*/;
2439 static void moveLineForwards ( OCacheSet
* set
, UWord lineno
)
2442 stats_ocacheL1_movefwds
++;
2443 tl_assert(lineno
> 0 && lineno
< OC_LINES_PER_SET
);
2444 tmp
= set
->line
[lineno
-1];
2445 set
->line
[lineno
-1] = set
->line
[lineno
];
2446 set
->line
[lineno
] = tmp
;
2449 static void zeroise_OCacheLine ( OCacheLine
* line
, Addr tag
) {
2451 for (i
= 0; i
< OC_W32S_PER_LINE
; i
++) {
2452 line
->w32
[i
] = 0; /* NO ORIGIN */
2453 line
->descr
[i
] = 0; /* REALLY REALLY NO ORIGIN! */
2458 //////////////////////////////////////////////////////////////
2459 //// OCache backing store
2461 static OSet
* ocacheL2
= NULL
;
2463 static void* ocacheL2_malloc ( const HChar
* cc
, SizeT szB
) {
2464 return VG_(malloc
)(cc
, szB
);
2466 static void ocacheL2_free ( void* v
) {
2470 /* Stats: # nodes currently in tree */
2471 static UWord stats__ocacheL2_n_nodes
= 0;
2473 static void init_ocacheL2 ( void )
2475 tl_assert(!ocacheL2
);
2476 tl_assert(sizeof(Word
) == sizeof(Addr
)); /* since OCacheLine.tag :: Addr */
2477 tl_assert(0 == offsetof(OCacheLine
,tag
));
2479 = VG_(OSetGen_Create
)( offsetof(OCacheLine
,tag
),
2480 NULL
, /* fast cmp */
2481 ocacheL2_malloc
, "mc.ioL2", ocacheL2_free
);
2482 stats__ocacheL2_n_nodes
= 0;
2485 /* Find line with the given tag in the tree, or NULL if not found. */
2486 static OCacheLine
* ocacheL2_find_tag ( Addr tag
)
2489 tl_assert(is_valid_oc_tag(tag
));
2490 stats__ocacheL2_refs
++;
2491 line
= VG_(OSetGen_Lookup
)( ocacheL2
, &tag
);
2495 /* Delete the line with the given tag from the tree, if it is present, and
2496 free up the associated memory. */
2497 static void ocacheL2_del_tag ( Addr tag
)
2500 tl_assert(is_valid_oc_tag(tag
));
2501 stats__ocacheL2_refs
++;
2502 line
= VG_(OSetGen_Remove
)( ocacheL2
, &tag
);
2504 VG_(OSetGen_FreeNode
)(ocacheL2
, line
);
2505 tl_assert(stats__ocacheL2_n_nodes
> 0);
2506 stats__ocacheL2_n_nodes
--;
2510 /* Add a copy of the given line to the tree. It must not already be
2512 static void ocacheL2_add_line ( OCacheLine
* line
)
2515 tl_assert(is_valid_oc_tag(line
->tag
));
2516 copy
= VG_(OSetGen_AllocNode
)( ocacheL2
, sizeof(OCacheLine
) );
2518 stats__ocacheL2_refs
++;
2519 VG_(OSetGen_Insert
)( ocacheL2
, copy
);
2520 stats__ocacheL2_n_nodes
++;
2521 if (stats__ocacheL2_n_nodes
> stats__ocacheL2_n_nodes_max
)
2522 stats__ocacheL2_n_nodes_max
= stats__ocacheL2_n_nodes
;
2526 //////////////////////////////////////////////////////////////
2528 __attribute__((noinline
))
2529 static OCacheLine
* find_OCacheLine_SLOW ( Addr a
)
2531 OCacheLine
*victim
, *inL2
;
2534 UWord setno
= (a
>> OC_BITS_PER_LINE
) & (OC_N_SETS
- 1);
2535 UWord tagmask
= ~((1 << OC_BITS_PER_LINE
) - 1);
2536 UWord tag
= a
& tagmask
;
2537 tl_assert(setno
>= 0 && setno
< OC_N_SETS
);
2539 /* we already tried line == 0; skip therefore. */
2540 for (line
= 1; line
< OC_LINES_PER_SET
; line
++) {
2541 if (ocacheL1
->set
[setno
].line
[line
].tag
== tag
) {
2543 stats_ocacheL1_found_at_1
++;
2545 stats_ocacheL1_found_at_N
++;
2547 if (UNLIKELY(0 == (ocacheL1_event_ctr
++
2548 & ((1<<OC_MOVE_FORWARDS_EVERY_BITS
)-1)))) {
2549 moveLineForwards( &ocacheL1
->set
[setno
], line
);
2552 return &ocacheL1
->set
[setno
].line
[line
];
2556 /* A miss. Use the last slot. Implicitly this means we're
2557 ejecting the line in the last slot. */
2558 stats_ocacheL1_misses
++;
2559 tl_assert(line
== OC_LINES_PER_SET
);
2561 tl_assert(line
> 0);
2563 /* First, move the to-be-ejected line to the L2 cache. */
2564 victim
= &ocacheL1
->set
[setno
].line
[line
];
2565 c
= classify_OCacheLine(victim
);
2568 /* the line is empty (has invalid tag); ignore it. */
2571 /* line contains zeroes. We must ensure the backing store is
2572 updated accordingly, either by copying the line there
2573 verbatim, or by ensuring it isn't present there. We
2574 chosse the latter on the basis that it reduces the size of
2575 the backing store. */
2576 ocacheL2_del_tag( victim
->tag
);
2579 /* line contains at least one real, useful origin. Copy it
2580 to the backing store. */
2581 stats_ocacheL1_lossage
++;
2582 inL2
= ocacheL2_find_tag( victim
->tag
);
2586 ocacheL2_add_line( victim
);
2593 /* Now we must reload the L1 cache from the backing tree, if
2595 tl_assert(tag
!= victim
->tag
); /* stay sane */
2596 inL2
= ocacheL2_find_tag( tag
);
2598 /* We're in luck. It's in the L2. */
2599 ocacheL1
->set
[setno
].line
[line
] = *inL2
;
2601 /* Missed at both levels of the cache hierarchy. We have to
2602 declare it as full of zeroes (unknown origins). */
2603 stats__ocacheL2_misses
++;
2604 zeroise_OCacheLine( &ocacheL1
->set
[setno
].line
[line
], tag
);
2607 /* Move it one forwards */
2608 moveLineForwards( &ocacheL1
->set
[setno
], line
);
2611 return &ocacheL1
->set
[setno
].line
[line
];
2614 static INLINE OCacheLine
* find_OCacheLine ( Addr a
)
2616 UWord setno
= (a
>> OC_BITS_PER_LINE
) & (OC_N_SETS
- 1);
2617 UWord tagmask
= ~((1 << OC_BITS_PER_LINE
) - 1);
2618 UWord tag
= a
& tagmask
;
2620 stats_ocacheL1_find
++;
2622 if (OC_ENABLE_ASSERTIONS
) {
2623 tl_assert(setno
>= 0 && setno
< OC_N_SETS
);
2624 tl_assert(0 == (tag
& (4 * OC_W32S_PER_LINE
- 1)));
2627 if (LIKELY(ocacheL1
->set
[setno
].line
[0].tag
== tag
)) {
2628 return &ocacheL1
->set
[setno
].line
[0];
2631 return find_OCacheLine_SLOW( a
);
2634 static INLINE
void set_aligned_word64_Origin_to_undef ( Addr a
, UInt otag
)
2636 //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2637 //// Set the origins for a+0 .. a+7
2639 UWord lineoff
= oc_line_offset(a
);
2640 if (OC_ENABLE_ASSERTIONS
) {
2641 tl_assert(lineoff
>= 0
2642 && lineoff
< OC_W32S_PER_LINE
-1/*'cos 8-aligned*/);
2644 line
= find_OCacheLine( a
);
2645 line
->descr
[lineoff
+0] = 0xF;
2646 line
->descr
[lineoff
+1] = 0xF;
2647 line
->w32
[lineoff
+0] = otag
;
2648 line
->w32
[lineoff
+1] = otag
;
2650 //// END inlined, specialised version of MC_(helperc_b_store8)
2654 /*------------------------------------------------------------*/
2655 /*--- Aligned fast case permission setters, ---*/
2656 /*--- for dealing with stacks ---*/
2657 /*------------------------------------------------------------*/
2659 /*--------------------- 32-bit ---------------------*/
2661 /* Nb: by "aligned" here we mean 4-byte aligned */
2663 static INLINE
void make_aligned_word32_undefined ( Addr a
)
2665 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_UNDEFINED
);
2667 #ifndef PERF_FAST_STACK2
2668 make_mem_undefined(a
, 4);
2674 if (UNLIKELY(a
> MAX_PRIMARY_ADDRESS
)) {
2675 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_UNDEFINED_SLOW
);
2676 make_mem_undefined(a
, 4);
2680 sm
= get_secmap_for_writing_low(a
);
2682 sm
->vabits8
[sm_off
] = VA_BITS8_UNDEFINED
;
2688 void make_aligned_word32_undefined_w_otag ( Addr a
, UInt otag
)
2690 make_aligned_word32_undefined(a
);
2691 //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
2692 //// Set the origins for a+0 .. a+3
2694 UWord lineoff
= oc_line_offset(a
);
2695 if (OC_ENABLE_ASSERTIONS
) {
2696 tl_assert(lineoff
>= 0 && lineoff
< OC_W32S_PER_LINE
);
2698 line
= find_OCacheLine( a
);
2699 line
->descr
[lineoff
] = 0xF;
2700 line
->w32
[lineoff
] = otag
;
2702 //// END inlined, specialised version of MC_(helperc_b_store4)
2706 void make_aligned_word32_noaccess ( Addr a
)
2708 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_NOACCESS
);
2710 #ifndef PERF_FAST_STACK2
2711 MC_(make_mem_noaccess
)(a
, 4);
2717 if (UNLIKELY(a
> MAX_PRIMARY_ADDRESS
)) {
2718 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_NOACCESS_SLOW
);
2719 MC_(make_mem_noaccess
)(a
, 4);
2723 sm
= get_secmap_for_writing_low(a
);
2725 sm
->vabits8
[sm_off
] = VA_BITS8_NOACCESS
;
2727 //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
2728 //// Set the origins for a+0 .. a+3.
2729 if (UNLIKELY( MC_(clo_mc_level
) == 3 )) {
2731 UWord lineoff
= oc_line_offset(a
);
2732 if (OC_ENABLE_ASSERTIONS
) {
2733 tl_assert(lineoff
>= 0 && lineoff
< OC_W32S_PER_LINE
);
2735 line
= find_OCacheLine( a
);
2736 line
->descr
[lineoff
] = 0;
2738 //// END inlined, specialised version of MC_(helperc_b_store4)
2743 /*--------------------- 64-bit ---------------------*/
2745 /* Nb: by "aligned" here we mean 8-byte aligned */
2747 static INLINE
void make_aligned_word64_undefined ( Addr a
)
2749 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_UNDEFINED
);
2751 #ifndef PERF_FAST_STACK2
2752 make_mem_undefined(a
, 8);
2758 if (UNLIKELY(a
> MAX_PRIMARY_ADDRESS
)) {
2759 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_UNDEFINED_SLOW
);
2760 make_mem_undefined(a
, 8);
2764 sm
= get_secmap_for_writing_low(a
);
2765 sm_off16
= SM_OFF_16(a
);
2766 sm
->vabits16
[sm_off16
] = VA_BITS16_UNDEFINED
;
2772 void make_aligned_word64_undefined_w_otag ( Addr a
, UInt otag
)
2774 make_aligned_word64_undefined(a
);
2775 //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2776 //// Set the origins for a+0 .. a+7
2778 UWord lineoff
= oc_line_offset(a
);
2779 tl_assert(lineoff
>= 0
2780 && lineoff
< OC_W32S_PER_LINE
-1/*'cos 8-aligned*/);
2781 line
= find_OCacheLine( a
);
2782 line
->descr
[lineoff
+0] = 0xF;
2783 line
->descr
[lineoff
+1] = 0xF;
2784 line
->w32
[lineoff
+0] = otag
;
2785 line
->w32
[lineoff
+1] = otag
;
2787 //// END inlined, specialised version of MC_(helperc_b_store8)
2791 void make_aligned_word64_noaccess ( Addr a
)
2793 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_NOACCESS
);
2795 #ifndef PERF_FAST_STACK2
2796 MC_(make_mem_noaccess
)(a
, 8);
2802 if (UNLIKELY(a
> MAX_PRIMARY_ADDRESS
)) {
2803 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_NOACCESS_SLOW
);
2804 MC_(make_mem_noaccess
)(a
, 8);
2808 sm
= get_secmap_for_writing_low(a
);
2809 sm_off16
= SM_OFF_16(a
);
2810 sm
->vabits16
[sm_off16
] = VA_BITS16_NOACCESS
;
2812 //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2813 //// Clear the origins for a+0 .. a+7.
2814 if (UNLIKELY( MC_(clo_mc_level
) == 3 )) {
2816 UWord lineoff
= oc_line_offset(a
);
2817 tl_assert(lineoff
>= 0
2818 && lineoff
< OC_W32S_PER_LINE
-1/*'cos 8-aligned*/);
2819 line
= find_OCacheLine( a
);
2820 line
->descr
[lineoff
+0] = 0;
2821 line
->descr
[lineoff
+1] = 0;
2823 //// END inlined, specialised version of MC_(helperc_b_store8)
2829 /*------------------------------------------------------------*/
2830 /*--- Stack pointer adjustment ---*/
2831 /*------------------------------------------------------------*/
2833 #ifdef PERF_FAST_STACK
2836 # define MAYBE_USED __attribute__((unused))
2839 /*--------------- adjustment by 4 bytes ---------------*/
2842 static void VG_REGPARM(2) mc_new_mem_stack_4_w_ECU(Addr new_SP
, UInt ecu
)
2844 UInt otag
= ecu
| MC_OKIND_STACK
;
2845 PROF_EVENT(MCPE_NEW_MEM_STACK_4
);
2846 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
2847 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
2849 MC_(make_mem_undefined_w_otag
) ( -VG_STACK_REDZONE_SZB
+ new_SP
, 4, otag
);
2854 static void VG_REGPARM(1) mc_new_mem_stack_4(Addr new_SP
)
2856 PROF_EVENT(MCPE_NEW_MEM_STACK_4
);
2857 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
2858 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
2860 make_mem_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
, 4 );
2865 static void VG_REGPARM(1) mc_die_mem_stack_4(Addr new_SP
)
2867 PROF_EVENT(MCPE_DIE_MEM_STACK_4
);
2868 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
2869 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-4 );
2871 MC_(make_mem_noaccess
) ( -VG_STACK_REDZONE_SZB
+ new_SP
-4, 4 );
2875 /*--------------- adjustment by 8 bytes ---------------*/
2878 static void VG_REGPARM(2) mc_new_mem_stack_8_w_ECU(Addr new_SP
, UInt ecu
)
2880 UInt otag
= ecu
| MC_OKIND_STACK
;
2881 PROF_EVENT(MCPE_NEW_MEM_STACK_8
);
2882 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
2883 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
2884 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
2885 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
2886 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+4, otag
);
2888 MC_(make_mem_undefined_w_otag
) ( -VG_STACK_REDZONE_SZB
+ new_SP
, 8, otag
);
2893 static void VG_REGPARM(1) mc_new_mem_stack_8(Addr new_SP
)
2895 PROF_EVENT(MCPE_NEW_MEM_STACK_8
);
2896 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
2897 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
2898 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
2899 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
2900 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+4 );
2902 make_mem_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
, 8 );
2907 static void VG_REGPARM(1) mc_die_mem_stack_8(Addr new_SP
)
2909 PROF_EVENT(MCPE_DIE_MEM_STACK_8
);
2910 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
2911 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-8 );
2912 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
2913 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-8 );
2914 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-4 );
2916 MC_(make_mem_noaccess
) ( -VG_STACK_REDZONE_SZB
+ new_SP
-8, 8 );
2920 /*--------------- adjustment by 12 bytes ---------------*/
2923 static void VG_REGPARM(2) mc_new_mem_stack_12_w_ECU(Addr new_SP
, UInt ecu
)
2925 UInt otag
= ecu
| MC_OKIND_STACK
;
2926 PROF_EVENT(MCPE_NEW_MEM_STACK_12
);
2927 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
2928 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
2929 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+8, otag
);
2930 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
2931 /* from previous test we don't have 8-alignment at offset +0,
2932 hence must have 8 alignment at offsets +4/-4. Hence safe to
2933 do 4 at +0 and then 8 at +4/. */
2934 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
2935 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+4, otag
);
2937 MC_(make_mem_undefined_w_otag
) ( -VG_STACK_REDZONE_SZB
+ new_SP
, 12, otag
);
2942 static void VG_REGPARM(1) mc_new_mem_stack_12(Addr new_SP
)
2944 PROF_EVENT(MCPE_NEW_MEM_STACK_12
);
2945 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
2946 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
2947 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+8 );
2948 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
2949 /* from previous test we don't have 8-alignment at offset +0,
2950 hence must have 8 alignment at offsets +4/-4. Hence safe to
2951 do 4 at +0 and then 8 at +4/. */
2952 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
2953 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+4 );
2955 make_mem_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
, 12 );
2960 static void VG_REGPARM(1) mc_die_mem_stack_12(Addr new_SP
)
2962 PROF_EVENT(MCPE_DIE_MEM_STACK_12
);
2963 /* Note the -12 in the test */
2964 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
-12 )) {
2965 /* We have 8-alignment at -12, hence ok to do 8 at -12 and 4 at
2967 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-12 );
2968 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-4 );
2969 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
2970 /* We have 4-alignment at +0, but we don't have 8-alignment at
2971 -12. So we must have 8-alignment at -8. Hence do 4 at -12
2972 and then 8 at -8. */
2973 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-12 );
2974 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-8 );
2976 MC_(make_mem_noaccess
) ( -VG_STACK_REDZONE_SZB
+ new_SP
-12, 12 );
2980 /*--------------- adjustment by 16 bytes ---------------*/
2983 static void VG_REGPARM(2) mc_new_mem_stack_16_w_ECU(Addr new_SP
, UInt ecu
)
2985 UInt otag
= ecu
| MC_OKIND_STACK
;
2986 PROF_EVENT(MCPE_NEW_MEM_STACK_16
);
2987 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
2988 /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
2989 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
2990 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+8, otag
);
2991 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
2992 /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
2993 Hence do 4 at +0, 8 at +4, 4 at +12. */
2994 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
2995 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+4 , otag
);
2996 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+12, otag
);
2998 MC_(make_mem_undefined_w_otag
) ( -VG_STACK_REDZONE_SZB
+ new_SP
, 16, otag
);
3003 static void VG_REGPARM(1) mc_new_mem_stack_16(Addr new_SP
)
3005 PROF_EVENT(MCPE_NEW_MEM_STACK_16
);
3006 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3007 /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
3008 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
3009 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+8 );
3010 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3011 /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
3012 Hence do 4 at +0, 8 at +4, 4 at +12. */
3013 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
3014 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+4 );
3015 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+12 );
3017 make_mem_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
, 16 );
3022 static void VG_REGPARM(1) mc_die_mem_stack_16(Addr new_SP
)
3024 PROF_EVENT(MCPE_DIE_MEM_STACK_16
);
3025 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3026 /* Have 8-alignment at +0, hence do 8 at -16 and 8 at -8. */
3027 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-16 );
3028 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-8 );
3029 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3030 /* 8 alignment must be at -12. Do 4 at -16, 8 at -12, 4 at -4. */
3031 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-16 );
3032 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-12 );
3033 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-4 );
3035 MC_(make_mem_noaccess
) ( -VG_STACK_REDZONE_SZB
+ new_SP
-16, 16 );
3039 /*--------------- adjustment by 32 bytes ---------------*/
3042 static void VG_REGPARM(2) mc_new_mem_stack_32_w_ECU(Addr new_SP
, UInt ecu
)
3044 UInt otag
= ecu
| MC_OKIND_STACK
;
3045 PROF_EVENT(MCPE_NEW_MEM_STACK_32
);
3046 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3047 /* Straightforward */
3048 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
3049 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+8 , otag
);
3050 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+16, otag
);
3051 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+24, otag
);
3052 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3053 /* 8 alignment must be at +4. Hence do 8 at +4,+12,+20 and 4 at
3055 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
3056 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+4 , otag
);
3057 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+12, otag
);
3058 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+20, otag
);
3059 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+28, otag
);
3061 MC_(make_mem_undefined_w_otag
) ( -VG_STACK_REDZONE_SZB
+ new_SP
, 32, otag
);
3066 static void VG_REGPARM(1) mc_new_mem_stack_32(Addr new_SP
)
3068 PROF_EVENT(MCPE_NEW_MEM_STACK_32
);
3069 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3070 /* Straightforward */
3071 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
3072 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+8 );
3073 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+16 );
3074 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+24 );
3075 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3076 /* 8 alignment must be at +4. Hence do 8 at +4,+12,+20 and 4 at
3078 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
3079 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+4 );
3080 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+12 );
3081 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+20 );
3082 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+28 );
3084 make_mem_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
, 32 );
3089 static void VG_REGPARM(1) mc_die_mem_stack_32(Addr new_SP
)
3091 PROF_EVENT(MCPE_DIE_MEM_STACK_32
);
3092 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3093 /* Straightforward */
3094 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-32 );
3095 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-24 );
3096 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-16 );
3097 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
- 8 );
3098 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3099 /* 8 alignment must be at -4 etc. Hence do 8 at -12,-20,-28 and
3101 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-32 );
3102 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-28 );
3103 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-20 );
3104 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-12 );
3105 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-4 );
3107 MC_(make_mem_noaccess
) ( -VG_STACK_REDZONE_SZB
+ new_SP
-32, 32 );
3111 /*--------------- adjustment by 112 bytes ---------------*/
3114 static void VG_REGPARM(2) mc_new_mem_stack_112_w_ECU(Addr new_SP
, UInt ecu
)
3116 UInt otag
= ecu
| MC_OKIND_STACK
;
3117 PROF_EVENT(MCPE_NEW_MEM_STACK_112
);
3118 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3119 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
3120 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+8 , otag
);
3121 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+16, otag
);
3122 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+24, otag
);
3123 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+32, otag
);
3124 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+40, otag
);
3125 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+48, otag
);
3126 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+56, otag
);
3127 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+64, otag
);
3128 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+72, otag
);
3129 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+80, otag
);
3130 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+88, otag
);
3131 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+96, otag
);
3132 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+104, otag
);
3134 MC_(make_mem_undefined_w_otag
) ( -VG_STACK_REDZONE_SZB
+ new_SP
, 112, otag
);
3139 static void VG_REGPARM(1) mc_new_mem_stack_112(Addr new_SP
)
3141 PROF_EVENT(MCPE_NEW_MEM_STACK_112
);
3142 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3143 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
3144 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+8 );
3145 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+16 );
3146 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+24 );
3147 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+32 );
3148 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+40 );
3149 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+48 );
3150 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+56 );
3151 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+64 );
3152 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+72 );
3153 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+80 );
3154 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+88 );
3155 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+96 );
3156 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+104 );
3158 make_mem_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
, 112 );
3163 static void VG_REGPARM(1) mc_die_mem_stack_112(Addr new_SP
)
3165 PROF_EVENT(MCPE_DIE_MEM_STACK_112
);
3166 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3167 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-112);
3168 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-104);
3169 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-96 );
3170 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-88 );
3171 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-80 );
3172 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-72 );
3173 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-64 );
3174 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-56 );
3175 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-48 );
3176 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-40 );
3177 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-32 );
3178 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-24 );
3179 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-16 );
3180 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
- 8 );
3182 MC_(make_mem_noaccess
) ( -VG_STACK_REDZONE_SZB
+ new_SP
-112, 112 );
3186 /*--------------- adjustment by 128 bytes ---------------*/
3189 static void VG_REGPARM(2) mc_new_mem_stack_128_w_ECU(Addr new_SP
, UInt ecu
)
3191 UInt otag
= ecu
| MC_OKIND_STACK
;
3192 PROF_EVENT(MCPE_NEW_MEM_STACK_128
);
3193 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3194 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
3195 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+8 , otag
);
3196 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+16, otag
);
3197 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+24, otag
);
3198 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+32, otag
);
3199 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+40, otag
);
3200 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+48, otag
);
3201 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+56, otag
);
3202 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+64, otag
);
3203 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+72, otag
);
3204 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+80, otag
);
3205 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+88, otag
);
3206 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+96, otag
);
3207 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+104, otag
);
3208 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+112, otag
);
3209 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+120, otag
);
3211 MC_(make_mem_undefined_w_otag
) ( -VG_STACK_REDZONE_SZB
+ new_SP
, 128, otag
);
3216 static void VG_REGPARM(1) mc_new_mem_stack_128(Addr new_SP
)
3218 PROF_EVENT(MCPE_NEW_MEM_STACK_128
);
3219 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3220 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
3221 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+8 );
3222 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+16 );
3223 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+24 );
3224 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+32 );
3225 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+40 );
3226 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+48 );
3227 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+56 );
3228 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+64 );
3229 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+72 );
3230 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+80 );
3231 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+88 );
3232 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+96 );
3233 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+104 );
3234 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+112 );
3235 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+120 );
3237 make_mem_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
, 128 );
3242 static void VG_REGPARM(1) mc_die_mem_stack_128(Addr new_SP
)
3244 PROF_EVENT(MCPE_DIE_MEM_STACK_128
);
3245 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3246 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-128);
3247 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-120);
3248 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-112);
3249 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-104);
3250 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-96 );
3251 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-88 );
3252 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-80 );
3253 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-72 );
3254 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-64 );
3255 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-56 );
3256 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-48 );
3257 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-40 );
3258 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-32 );
3259 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-24 );
3260 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-16 );
3261 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
- 8 );
3263 MC_(make_mem_noaccess
) ( -VG_STACK_REDZONE_SZB
+ new_SP
-128, 128 );
3267 /*--------------- adjustment by 144 bytes ---------------*/
3270 static void VG_REGPARM(2) mc_new_mem_stack_144_w_ECU(Addr new_SP
, UInt ecu
)
3272 UInt otag
= ecu
| MC_OKIND_STACK
;
3273 PROF_EVENT(MCPE_NEW_MEM_STACK_144
);
3274 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3275 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
3276 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+8, otag
);
3277 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+16, otag
);
3278 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+24, otag
);
3279 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+32, otag
);
3280 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+40, otag
);
3281 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+48, otag
);
3282 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+56, otag
);
3283 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+64, otag
);
3284 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+72, otag
);
3285 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+80, otag
);
3286 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+88, otag
);
3287 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+96, otag
);
3288 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+104, otag
);
3289 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+112, otag
);
3290 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+120, otag
);
3291 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+128, otag
);
3292 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+136, otag
);
3294 MC_(make_mem_undefined_w_otag
) ( -VG_STACK_REDZONE_SZB
+ new_SP
, 144, otag
);
3299 static void VG_REGPARM(1) mc_new_mem_stack_144(Addr new_SP
)
3301 PROF_EVENT(MCPE_NEW_MEM_STACK_144
);
3302 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3303 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
3304 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+8 );
3305 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+16 );
3306 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+24 );
3307 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+32 );
3308 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+40 );
3309 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+48 );
3310 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+56 );
3311 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+64 );
3312 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+72 );
3313 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+80 );
3314 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+88 );
3315 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+96 );
3316 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+104 );
3317 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+112 );
3318 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+120 );
3319 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+128 );
3320 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+136 );
3322 make_mem_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
, 144 );
3327 static void VG_REGPARM(1) mc_die_mem_stack_144(Addr new_SP
)
3329 PROF_EVENT(MCPE_DIE_MEM_STACK_144
);
3330 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3331 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-144);
3332 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-136);
3333 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-128);
3334 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-120);
3335 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-112);
3336 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-104);
3337 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-96 );
3338 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-88 );
3339 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-80 );
3340 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-72 );
3341 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-64 );
3342 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-56 );
3343 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-48 );
3344 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-40 );
3345 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-32 );
3346 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-24 );
3347 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-16 );
3348 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
- 8 );
3350 MC_(make_mem_noaccess
) ( -VG_STACK_REDZONE_SZB
+ new_SP
-144, 144 );
3354 /*--------------- adjustment by 160 bytes ---------------*/
3357 static void VG_REGPARM(2) mc_new_mem_stack_160_w_ECU(Addr new_SP
, UInt ecu
)
3359 UInt otag
= ecu
| MC_OKIND_STACK
;
3360 PROF_EVENT(MCPE_NEW_MEM_STACK_160
);
3361 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3362 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
3363 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+8, otag
);
3364 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+16, otag
);
3365 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+24, otag
);
3366 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+32, otag
);
3367 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+40, otag
);
3368 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+48, otag
);
3369 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+56, otag
);
3370 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+64, otag
);
3371 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+72, otag
);
3372 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+80, otag
);
3373 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+88, otag
);
3374 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+96, otag
);
3375 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+104, otag
);
3376 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+112, otag
);
3377 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+120, otag
);
3378 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+128, otag
);
3379 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+136, otag
);
3380 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+144, otag
);
3381 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+152, otag
);
3383 MC_(make_mem_undefined_w_otag
) ( -VG_STACK_REDZONE_SZB
+ new_SP
, 160, otag
);
3388 static void VG_REGPARM(1) mc_new_mem_stack_160(Addr new_SP
)
3390 PROF_EVENT(MCPE_NEW_MEM_STACK_160
);
3391 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3392 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
3393 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+8 );
3394 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+16 );
3395 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+24 );
3396 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+32 );
3397 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+40 );
3398 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+48 );
3399 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+56 );
3400 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+64 );
3401 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+72 );
3402 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+80 );
3403 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+88 );
3404 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+96 );
3405 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+104 );
3406 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+112 );
3407 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+120 );
3408 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+128 );
3409 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+136 );
3410 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+144 );
3411 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+152 );
3413 make_mem_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
, 160 );
3418 static void VG_REGPARM(1) mc_die_mem_stack_160(Addr new_SP
)
3420 PROF_EVENT(MCPE_DIE_MEM_STACK_160
);
3421 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3422 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-160);
3423 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-152);
3424 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-144);
3425 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-136);
3426 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-128);
3427 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-120);
3428 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-112);
3429 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-104);
3430 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-96 );
3431 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-88 );
3432 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-80 );
3433 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-72 );
3434 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-64 );
3435 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-56 );
3436 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-48 );
3437 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-40 );
3438 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-32 );
3439 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-24 );
3440 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-16 );
3441 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
- 8 );
3443 MC_(make_mem_noaccess
) ( -VG_STACK_REDZONE_SZB
+ new_SP
-160, 160 );
3447 /*--------------- adjustment by N bytes ---------------*/
3449 static void mc_new_mem_stack_w_ECU ( Addr a
, SizeT len
, UInt ecu
)
3451 UInt otag
= ecu
| MC_OKIND_STACK
;
3452 PROF_EVENT(MCPE_NEW_MEM_STACK
);
3453 MC_(make_mem_undefined_w_otag
) ( -VG_STACK_REDZONE_SZB
+ a
, len
, otag
);
3456 static void mc_new_mem_stack ( Addr a
, SizeT len
)
3458 PROF_EVENT(MCPE_NEW_MEM_STACK
);
3459 make_mem_undefined ( -VG_STACK_REDZONE_SZB
+ a
, len
);
3462 static void mc_die_mem_stack ( Addr a
, SizeT len
)
3464 PROF_EVENT(MCPE_DIE_MEM_STACK
);
3465 MC_(make_mem_noaccess
) ( -VG_STACK_REDZONE_SZB
+ a
, len
);
3469 /* The AMD64 ABI says:
3471 "The 128-byte area beyond the location pointed to by %rsp is considered
3472 to be reserved and shall not be modified by signal or interrupt
3473 handlers. Therefore, functions may use this area for temporary data
3474 that is not needed across function calls. In particular, leaf functions
3475 may use this area for their entire stack frame, rather than adjusting
3476 the stack pointer in the prologue and epilogue. This area is known as
3479 So after any call or return we need to mark this redzone as containing
3482 Consider this: we're in function f. f calls g. g moves rsp down
3483 modestly (say 16 bytes) and writes stuff all over the red zone, making it
3484 defined. g returns. f is buggy and reads from parts of the red zone
3485 that it didn't write on. But because g filled that area in, f is going
3486 to be picking up defined V bits and so any errors from reading bits of
3487 the red zone it didn't write, will be missed. The only solution I could
3488 think of was to make the red zone undefined when g returns to f.
3490 This is in accordance with the ABI, which makes it clear the redzone
3491 is volatile across function calls.
3493 The problem occurs the other way round too: f could fill the RZ up
3494 with defined values and g could mistakenly read them. So the RZ
3495 also needs to be nuked on function calls.
3499 /* Here's a simple cache to hold nia -> ECU mappings. It could be
3500 improved so as to have a lower miss rate. */
3502 static UWord stats__nia_cache_queries
= 0;
3503 static UWord stats__nia_cache_misses
= 0;
3506 struct { UWord nia0
; UWord ecu0
; /* nia0 maps to ecu0 */
3507 UWord nia1
; UWord ecu1
; } /* nia1 maps to ecu1 */
3510 #define N_NIA_TO_ECU_CACHE 511
3512 static WCacheEnt nia_to_ecu_cache
[N_NIA_TO_ECU_CACHE
];
3514 static void init_nia_to_ecu_cache ( void )
3518 ExeContext
* zero_ec
;
3520 /* Fill all the slots with an entry for address zero, and the
3521 relevant otags accordingly. Hence the cache is initially filled
3523 zero_ec
= VG_(make_depth_1_ExeContext_from_Addr
)(zero_addr
);
3525 zero_ecu
= VG_(get_ECU_from_ExeContext
)(zero_ec
);
3526 tl_assert(VG_(is_plausible_ECU
)(zero_ecu
));
3527 for (i
= 0; i
< N_NIA_TO_ECU_CACHE
; i
++) {
3528 nia_to_ecu_cache
[i
].nia0
= zero_addr
;
3529 nia_to_ecu_cache
[i
].ecu0
= zero_ecu
;
3530 nia_to_ecu_cache
[i
].nia1
= zero_addr
;
3531 nia_to_ecu_cache
[i
].ecu1
= zero_ecu
;
3535 static inline UInt
convert_nia_to_ecu ( Addr nia
)
3541 tl_assert( sizeof(nia_to_ecu_cache
[0].nia1
) == sizeof(nia
) );
3543 stats__nia_cache_queries
++;
3544 i
= nia
% N_NIA_TO_ECU_CACHE
;
3545 tl_assert(i
>= 0 && i
< N_NIA_TO_ECU_CACHE
);
3547 if (LIKELY( nia_to_ecu_cache
[i
].nia0
== nia
))
3548 return nia_to_ecu_cache
[i
].ecu0
;
3550 if (LIKELY( nia_to_ecu_cache
[i
].nia1
== nia
)) {
3551 # define SWAP(_w1,_w2) { UWord _t = _w1; _w1 = _w2; _w2 = _t; }
3552 SWAP( nia_to_ecu_cache
[i
].nia0
, nia_to_ecu_cache
[i
].nia1
);
3553 SWAP( nia_to_ecu_cache
[i
].ecu0
, nia_to_ecu_cache
[i
].ecu1
);
3555 return nia_to_ecu_cache
[i
].ecu0
;
3558 stats__nia_cache_misses
++;
3559 ec
= VG_(make_depth_1_ExeContext_from_Addr
)(nia
);
3561 ecu
= VG_(get_ECU_from_ExeContext
)(ec
);
3562 tl_assert(VG_(is_plausible_ECU
)(ecu
));
3564 nia_to_ecu_cache
[i
].nia1
= nia_to_ecu_cache
[i
].nia0
;
3565 nia_to_ecu_cache
[i
].ecu1
= nia_to_ecu_cache
[i
].ecu0
;
3567 nia_to_ecu_cache
[i
].nia0
= nia
;
3568 nia_to_ecu_cache
[i
].ecu0
= (UWord
)ecu
;
3573 /* This marks the stack as addressible but undefined, after a call or
3574 return for a target that has an ABI defined stack redzone. It
3575 happens quite a lot and needs to be fast. This is the version for
3576 origin tracking. The non-origin-tracking version is below. */
3578 void MC_(helperc_MAKE_STACK_UNINIT_w_o
) ( Addr base
, UWord len
, Addr nia
)
3580 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_W_O
);
3582 VG_(printf
)("helperc_MAKE_STACK_UNINIT_w_o (%#lx,%lu,nia=%#lx)\n",
3585 UInt ecu
= convert_nia_to_ecu ( nia
);
3586 tl_assert(VG_(is_plausible_ECU
)(ecu
));
3588 UInt otag
= ecu
| MC_OKIND_STACK
;
3591 /* Slow(ish) version, which is fairly easily seen to be correct.
3593 if (LIKELY( VG_IS_8_ALIGNED(base
) && len
==128 )) {
3594 make_aligned_word64_undefined_w_otag(base
+ 0, otag
);
3595 make_aligned_word64_undefined_w_otag(base
+ 8, otag
);
3596 make_aligned_word64_undefined_w_otag(base
+ 16, otag
);
3597 make_aligned_word64_undefined_w_otag(base
+ 24, otag
);
3599 make_aligned_word64_undefined_w_otag(base
+ 32, otag
);
3600 make_aligned_word64_undefined_w_otag(base
+ 40, otag
);
3601 make_aligned_word64_undefined_w_otag(base
+ 48, otag
);
3602 make_aligned_word64_undefined_w_otag(base
+ 56, otag
);
3604 make_aligned_word64_undefined_w_otag(base
+ 64, otag
);
3605 make_aligned_word64_undefined_w_otag(base
+ 72, otag
);
3606 make_aligned_word64_undefined_w_otag(base
+ 80, otag
);
3607 make_aligned_word64_undefined_w_otag(base
+ 88, otag
);
3609 make_aligned_word64_undefined_w_otag(base
+ 96, otag
);
3610 make_aligned_word64_undefined_w_otag(base
+ 104, otag
);
3611 make_aligned_word64_undefined_w_otag(base
+ 112, otag
);
3612 make_aligned_word64_undefined_w_otag(base
+ 120, otag
);
3614 MC_(make_mem_undefined_w_otag
)(base
, len
, otag
);
3618 /* Idea is: go fast when
3619 * 8-aligned and length is 128
3620 * the sm is available in the main primary map
3621 * the address range falls entirely with a single secondary map
3622 If all those conditions hold, just update the V+A bits by writing
3623 directly into the vabits array. (If the sm was distinguished, this
3624 will make a copy and then write to it.)
3626 if (LIKELY( len
== 128 && VG_IS_8_ALIGNED(base
) )) {
3627 /* Now we know the address range is suitably sized and aligned. */
3628 UWord a_lo
= (UWord
)(base
);
3629 UWord a_hi
= (UWord
)(base
+ 128 - 1);
3630 tl_assert(a_lo
< a_hi
); // paranoia: detect overflow
3631 if (LIKELY(a_hi
<= MAX_PRIMARY_ADDRESS
)) {
3632 /* Now we know the entire range is within the main primary map. */
3633 UWord pm_off_lo
= get_primary_map_low_offset(a_lo
);
3634 UWord pm_off_hi
= get_primary_map_low_offset(a_hi
);
3635 if (LIKELY(pm_off_lo
== pm_off_hi
)) {
3636 /* Now we know that the entire address range falls within a
3637 single secondary map, and that that secondary 'lives' in
3638 the main primary map. */
3639 SecMap
* sm
= get_secmap_for_writing_low(a_lo
);
3640 UWord v_off16
= SM_OFF_16(a_lo
);
3641 UShort
* p
= &sm
->vabits16
[v_off16
];
3642 p
[ 0] = VA_BITS16_UNDEFINED
;
3643 p
[ 1] = VA_BITS16_UNDEFINED
;
3644 p
[ 2] = VA_BITS16_UNDEFINED
;
3645 p
[ 3] = VA_BITS16_UNDEFINED
;
3646 p
[ 4] = VA_BITS16_UNDEFINED
;
3647 p
[ 5] = VA_BITS16_UNDEFINED
;
3648 p
[ 6] = VA_BITS16_UNDEFINED
;
3649 p
[ 7] = VA_BITS16_UNDEFINED
;
3650 p
[ 8] = VA_BITS16_UNDEFINED
;
3651 p
[ 9] = VA_BITS16_UNDEFINED
;
3652 p
[10] = VA_BITS16_UNDEFINED
;
3653 p
[11] = VA_BITS16_UNDEFINED
;
3654 p
[12] = VA_BITS16_UNDEFINED
;
3655 p
[13] = VA_BITS16_UNDEFINED
;
3656 p
[14] = VA_BITS16_UNDEFINED
;
3657 p
[15] = VA_BITS16_UNDEFINED
;
3658 set_aligned_word64_Origin_to_undef( base
+ 8 * 0, otag
);
3659 set_aligned_word64_Origin_to_undef( base
+ 8 * 1, otag
);
3660 set_aligned_word64_Origin_to_undef( base
+ 8 * 2, otag
);
3661 set_aligned_word64_Origin_to_undef( base
+ 8 * 3, otag
);
3662 set_aligned_word64_Origin_to_undef( base
+ 8 * 4, otag
);
3663 set_aligned_word64_Origin_to_undef( base
+ 8 * 5, otag
);
3664 set_aligned_word64_Origin_to_undef( base
+ 8 * 6, otag
);
3665 set_aligned_word64_Origin_to_undef( base
+ 8 * 7, otag
);
3666 set_aligned_word64_Origin_to_undef( base
+ 8 * 8, otag
);
3667 set_aligned_word64_Origin_to_undef( base
+ 8 * 9, otag
);
3668 set_aligned_word64_Origin_to_undef( base
+ 8 * 10, otag
);
3669 set_aligned_word64_Origin_to_undef( base
+ 8 * 11, otag
);
3670 set_aligned_word64_Origin_to_undef( base
+ 8 * 12, otag
);
3671 set_aligned_word64_Origin_to_undef( base
+ 8 * 13, otag
);
3672 set_aligned_word64_Origin_to_undef( base
+ 8 * 14, otag
);
3673 set_aligned_word64_Origin_to_undef( base
+ 8 * 15, otag
);
3679 /* 288 bytes (36 ULongs) is the magic value for ELF ppc64. */
3680 if (LIKELY( len
== 288 && VG_IS_8_ALIGNED(base
) )) {
3681 /* Now we know the address range is suitably sized and aligned. */
3682 UWord a_lo
= (UWord
)(base
);
3683 UWord a_hi
= (UWord
)(base
+ 288 - 1);
3684 tl_assert(a_lo
< a_hi
); // paranoia: detect overflow
3685 if (a_hi
<= MAX_PRIMARY_ADDRESS
) {
3686 UWord pm_off_lo
= get_primary_map_low_offset(a_lo
);
3687 UWord pm_off_hi
= get_primary_map_low_offset(a_hi
);
3688 if (LIKELY(pm_off_lo
== pm_off_hi
)) {
3689 /* Now we know that the entire address range falls within a
3690 single secondary map, and that that secondary 'lives' in
3691 the main primary map. */
3692 SecMap
* sm
= get_secmap_for_writing_low(a_lo
);
3693 UWord v_off16
= SM_OFF_16(a_lo
);
3694 UShort
* p
= &sm
->vabits16
[v_off16
];
3695 p
[ 0] = VA_BITS16_UNDEFINED
;
3696 p
[ 1] = VA_BITS16_UNDEFINED
;
3697 p
[ 2] = VA_BITS16_UNDEFINED
;
3698 p
[ 3] = VA_BITS16_UNDEFINED
;
3699 p
[ 4] = VA_BITS16_UNDEFINED
;
3700 p
[ 5] = VA_BITS16_UNDEFINED
;
3701 p
[ 6] = VA_BITS16_UNDEFINED
;
3702 p
[ 7] = VA_BITS16_UNDEFINED
;
3703 p
[ 8] = VA_BITS16_UNDEFINED
;
3704 p
[ 9] = VA_BITS16_UNDEFINED
;
3705 p
[10] = VA_BITS16_UNDEFINED
;
3706 p
[11] = VA_BITS16_UNDEFINED
;
3707 p
[12] = VA_BITS16_UNDEFINED
;
3708 p
[13] = VA_BITS16_UNDEFINED
;
3709 p
[14] = VA_BITS16_UNDEFINED
;
3710 p
[15] = VA_BITS16_UNDEFINED
;
3711 p
[16] = VA_BITS16_UNDEFINED
;
3712 p
[17] = VA_BITS16_UNDEFINED
;
3713 p
[18] = VA_BITS16_UNDEFINED
;
3714 p
[19] = VA_BITS16_UNDEFINED
;
3715 p
[20] = VA_BITS16_UNDEFINED
;
3716 p
[21] = VA_BITS16_UNDEFINED
;
3717 p
[22] = VA_BITS16_UNDEFINED
;
3718 p
[23] = VA_BITS16_UNDEFINED
;
3719 p
[24] = VA_BITS16_UNDEFINED
;
3720 p
[25] = VA_BITS16_UNDEFINED
;
3721 p
[26] = VA_BITS16_UNDEFINED
;
3722 p
[27] = VA_BITS16_UNDEFINED
;
3723 p
[28] = VA_BITS16_UNDEFINED
;
3724 p
[29] = VA_BITS16_UNDEFINED
;
3725 p
[30] = VA_BITS16_UNDEFINED
;
3726 p
[31] = VA_BITS16_UNDEFINED
;
3727 p
[32] = VA_BITS16_UNDEFINED
;
3728 p
[33] = VA_BITS16_UNDEFINED
;
3729 p
[34] = VA_BITS16_UNDEFINED
;
3730 p
[35] = VA_BITS16_UNDEFINED
;
3731 set_aligned_word64_Origin_to_undef( base
+ 8 * 0, otag
);
3732 set_aligned_word64_Origin_to_undef( base
+ 8 * 1, otag
);
3733 set_aligned_word64_Origin_to_undef( base
+ 8 * 2, otag
);
3734 set_aligned_word64_Origin_to_undef( base
+ 8 * 3, otag
);
3735 set_aligned_word64_Origin_to_undef( base
+ 8 * 4, otag
);
3736 set_aligned_word64_Origin_to_undef( base
+ 8 * 5, otag
);
3737 set_aligned_word64_Origin_to_undef( base
+ 8 * 6, otag
);
3738 set_aligned_word64_Origin_to_undef( base
+ 8 * 7, otag
);
3739 set_aligned_word64_Origin_to_undef( base
+ 8 * 8, otag
);
3740 set_aligned_word64_Origin_to_undef( base
+ 8 * 9, otag
);
3741 set_aligned_word64_Origin_to_undef( base
+ 8 * 10, otag
);
3742 set_aligned_word64_Origin_to_undef( base
+ 8 * 11, otag
);
3743 set_aligned_word64_Origin_to_undef( base
+ 8 * 12, otag
);
3744 set_aligned_word64_Origin_to_undef( base
+ 8 * 13, otag
);
3745 set_aligned_word64_Origin_to_undef( base
+ 8 * 14, otag
);
3746 set_aligned_word64_Origin_to_undef( base
+ 8 * 15, otag
);
3747 set_aligned_word64_Origin_to_undef( base
+ 8 * 16, otag
);
3748 set_aligned_word64_Origin_to_undef( base
+ 8 * 17, otag
);
3749 set_aligned_word64_Origin_to_undef( base
+ 8 * 18, otag
);
3750 set_aligned_word64_Origin_to_undef( base
+ 8 * 19, otag
);
3751 set_aligned_word64_Origin_to_undef( base
+ 8 * 20, otag
);
3752 set_aligned_word64_Origin_to_undef( base
+ 8 * 21, otag
);
3753 set_aligned_word64_Origin_to_undef( base
+ 8 * 22, otag
);
3754 set_aligned_word64_Origin_to_undef( base
+ 8 * 23, otag
);
3755 set_aligned_word64_Origin_to_undef( base
+ 8 * 24, otag
);
3756 set_aligned_word64_Origin_to_undef( base
+ 8 * 25, otag
);
3757 set_aligned_word64_Origin_to_undef( base
+ 8 * 26, otag
);
3758 set_aligned_word64_Origin_to_undef( base
+ 8 * 27, otag
);
3759 set_aligned_word64_Origin_to_undef( base
+ 8 * 28, otag
);
3760 set_aligned_word64_Origin_to_undef( base
+ 8 * 29, otag
);
3761 set_aligned_word64_Origin_to_undef( base
+ 8 * 30, otag
);
3762 set_aligned_word64_Origin_to_undef( base
+ 8 * 31, otag
);
3763 set_aligned_word64_Origin_to_undef( base
+ 8 * 32, otag
);
3764 set_aligned_word64_Origin_to_undef( base
+ 8 * 33, otag
);
3765 set_aligned_word64_Origin_to_undef( base
+ 8 * 34, otag
);
3766 set_aligned_word64_Origin_to_undef( base
+ 8 * 35, otag
);
3772 /* else fall into slow case */
3773 MC_(make_mem_undefined_w_otag
)(base
, len
, otag
);
3777 /* This is a version of MC_(helperc_MAKE_STACK_UNINIT_w_o) that is
3778 specialised for the non-origin-tracking case. */
3780 void MC_(helperc_MAKE_STACK_UNINIT_no_o
) ( Addr base
, UWord len
)
3782 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_NO_O
);
3784 VG_(printf
)("helperc_MAKE_STACK_UNINIT_no_o (%#lx,%lu)\n",
3788 /* Slow(ish) version, which is fairly easily seen to be correct.
3790 if (LIKELY( VG_IS_8_ALIGNED(base
) && len
==128 )) {
3791 make_aligned_word64_undefined(base
+ 0);
3792 make_aligned_word64_undefined(base
+ 8);
3793 make_aligned_word64_undefined(base
+ 16);
3794 make_aligned_word64_undefined(base
+ 24);
3796 make_aligned_word64_undefined(base
+ 32);
3797 make_aligned_word64_undefined(base
+ 40);
3798 make_aligned_word64_undefined(base
+ 48);
3799 make_aligned_word64_undefined(base
+ 56);
3801 make_aligned_word64_undefined(base
+ 64);
3802 make_aligned_word64_undefined(base
+ 72);
3803 make_aligned_word64_undefined(base
+ 80);
3804 make_aligned_word64_undefined(base
+ 88);
3806 make_aligned_word64_undefined(base
+ 96);
3807 make_aligned_word64_undefined(base
+ 104);
3808 make_aligned_word64_undefined(base
+ 112);
3809 make_aligned_word64_undefined(base
+ 120);
3811 make_mem_undefined(base
, len
);
3815 /* Idea is: go fast when
3816 * 8-aligned and length is 128
3817 * the sm is available in the main primary map
3818 * the address range falls entirely with a single secondary map
3819 If all those conditions hold, just update the V+A bits by writing
3820 directly into the vabits array. (If the sm was distinguished, this
3821 will make a copy and then write to it.)
3823 if (LIKELY( len
== 128 && VG_IS_8_ALIGNED(base
) )) {
3824 /* Now we know the address range is suitably sized and aligned. */
3825 UWord a_lo
= (UWord
)(base
);
3826 UWord a_hi
= (UWord
)(base
+ 128 - 1);
3827 tl_assert(a_lo
< a_hi
); // paranoia: detect overflow
3828 if (LIKELY(a_hi
<= MAX_PRIMARY_ADDRESS
)) {
3829 /* Now we know the entire range is within the main primary map. */
3830 UWord pm_off_lo
= get_primary_map_low_offset(a_lo
);
3831 UWord pm_off_hi
= get_primary_map_low_offset(a_hi
);
3832 if (LIKELY(pm_off_lo
== pm_off_hi
)) {
3833 /* Now we know that the entire address range falls within a
3834 single secondary map, and that that secondary 'lives' in
3835 the main primary map. */
3836 SecMap
* sm
= get_secmap_for_writing_low(a_lo
);
3837 UWord v_off16
= SM_OFF_16(a_lo
);
3838 UShort
* p
= &sm
->vabits16
[v_off16
];
3839 p
[ 0] = VA_BITS16_UNDEFINED
;
3840 p
[ 1] = VA_BITS16_UNDEFINED
;
3841 p
[ 2] = VA_BITS16_UNDEFINED
;
3842 p
[ 3] = VA_BITS16_UNDEFINED
;
3843 p
[ 4] = VA_BITS16_UNDEFINED
;
3844 p
[ 5] = VA_BITS16_UNDEFINED
;
3845 p
[ 6] = VA_BITS16_UNDEFINED
;
3846 p
[ 7] = VA_BITS16_UNDEFINED
;
3847 p
[ 8] = VA_BITS16_UNDEFINED
;
3848 p
[ 9] = VA_BITS16_UNDEFINED
;
3849 p
[10] = VA_BITS16_UNDEFINED
;
3850 p
[11] = VA_BITS16_UNDEFINED
;
3851 p
[12] = VA_BITS16_UNDEFINED
;
3852 p
[13] = VA_BITS16_UNDEFINED
;
3853 p
[14] = VA_BITS16_UNDEFINED
;
3854 p
[15] = VA_BITS16_UNDEFINED
;
3860 /* 288 bytes (36 ULongs) is the magic value for ELF ppc64. */
3861 if (LIKELY( len
== 288 && VG_IS_8_ALIGNED(base
) )) {
3862 /* Now we know the address range is suitably sized and aligned. */
3863 UWord a_lo
= (UWord
)(base
);
3864 UWord a_hi
= (UWord
)(base
+ 288 - 1);
3865 tl_assert(a_lo
< a_hi
); // paranoia: detect overflow
3866 if (a_hi
<= MAX_PRIMARY_ADDRESS
) {
3867 UWord pm_off_lo
= get_primary_map_low_offset(a_lo
);
3868 UWord pm_off_hi
= get_primary_map_low_offset(a_hi
);
3869 if (LIKELY(pm_off_lo
== pm_off_hi
)) {
3870 /* Now we know that the entire address range falls within a
3871 single secondary map, and that that secondary 'lives' in
3872 the main primary map. */
3873 SecMap
* sm
= get_secmap_for_writing_low(a_lo
);
3874 UWord v_off16
= SM_OFF_16(a_lo
);
3875 UShort
* p
= &sm
->vabits16
[v_off16
];
3876 p
[ 0] = VA_BITS16_UNDEFINED
;
3877 p
[ 1] = VA_BITS16_UNDEFINED
;
3878 p
[ 2] = VA_BITS16_UNDEFINED
;
3879 p
[ 3] = VA_BITS16_UNDEFINED
;
3880 p
[ 4] = VA_BITS16_UNDEFINED
;
3881 p
[ 5] = VA_BITS16_UNDEFINED
;
3882 p
[ 6] = VA_BITS16_UNDEFINED
;
3883 p
[ 7] = VA_BITS16_UNDEFINED
;
3884 p
[ 8] = VA_BITS16_UNDEFINED
;
3885 p
[ 9] = VA_BITS16_UNDEFINED
;
3886 p
[10] = VA_BITS16_UNDEFINED
;
3887 p
[11] = VA_BITS16_UNDEFINED
;
3888 p
[12] = VA_BITS16_UNDEFINED
;
3889 p
[13] = VA_BITS16_UNDEFINED
;
3890 p
[14] = VA_BITS16_UNDEFINED
;
3891 p
[15] = VA_BITS16_UNDEFINED
;
3892 p
[16] = VA_BITS16_UNDEFINED
;
3893 p
[17] = VA_BITS16_UNDEFINED
;
3894 p
[18] = VA_BITS16_UNDEFINED
;
3895 p
[19] = VA_BITS16_UNDEFINED
;
3896 p
[20] = VA_BITS16_UNDEFINED
;
3897 p
[21] = VA_BITS16_UNDEFINED
;
3898 p
[22] = VA_BITS16_UNDEFINED
;
3899 p
[23] = VA_BITS16_UNDEFINED
;
3900 p
[24] = VA_BITS16_UNDEFINED
;
3901 p
[25] = VA_BITS16_UNDEFINED
;
3902 p
[26] = VA_BITS16_UNDEFINED
;
3903 p
[27] = VA_BITS16_UNDEFINED
;
3904 p
[28] = VA_BITS16_UNDEFINED
;
3905 p
[29] = VA_BITS16_UNDEFINED
;
3906 p
[30] = VA_BITS16_UNDEFINED
;
3907 p
[31] = VA_BITS16_UNDEFINED
;
3908 p
[32] = VA_BITS16_UNDEFINED
;
3909 p
[33] = VA_BITS16_UNDEFINED
;
3910 p
[34] = VA_BITS16_UNDEFINED
;
3911 p
[35] = VA_BITS16_UNDEFINED
;
3917 /* else fall into slow case */
3918 make_mem_undefined(base
, len
);
3922 /* And this is an even more specialised case, for the case where there
3923 is no origin tracking, and the length is 128. */
3925 void MC_(helperc_MAKE_STACK_UNINIT_128_no_o
) ( Addr base
)
3927 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O
);
3929 VG_(printf
)("helperc_MAKE_STACK_UNINIT_128_no_o (%#lx)\n", base
);
3932 /* Slow(ish) version, which is fairly easily seen to be correct.
3934 if (LIKELY( VG_IS_8_ALIGNED(base
) )) {
3935 make_aligned_word64_undefined(base
+ 0);
3936 make_aligned_word64_undefined(base
+ 8);
3937 make_aligned_word64_undefined(base
+ 16);
3938 make_aligned_word64_undefined(base
+ 24);
3940 make_aligned_word64_undefined(base
+ 32);
3941 make_aligned_word64_undefined(base
+ 40);
3942 make_aligned_word64_undefined(base
+ 48);
3943 make_aligned_word64_undefined(base
+ 56);
3945 make_aligned_word64_undefined(base
+ 64);
3946 make_aligned_word64_undefined(base
+ 72);
3947 make_aligned_word64_undefined(base
+ 80);
3948 make_aligned_word64_undefined(base
+ 88);
3950 make_aligned_word64_undefined(base
+ 96);
3951 make_aligned_word64_undefined(base
+ 104);
3952 make_aligned_word64_undefined(base
+ 112);
3953 make_aligned_word64_undefined(base
+ 120);
3955 make_mem_undefined(base
, 128);
3959 /* Idea is: go fast when
3960 * 16-aligned and length is 128
3961 * the sm is available in the main primary map
3962 * the address range falls entirely with a single secondary map
3963 If all those conditions hold, just update the V+A bits by writing
3964 directly into the vabits array. (If the sm was distinguished, this
3965 will make a copy and then write to it.)
3967 Typically this applies to amd64 'ret' instructions, since RSP is
3968 16-aligned (0 % 16) after the instruction (per the amd64-ELF ABI).
3970 if (LIKELY( VG_IS_16_ALIGNED(base
) )) {
3971 /* Now we know the address range is suitably sized and aligned. */
3972 UWord a_lo
= (UWord
)(base
);
3973 UWord a_hi
= (UWord
)(base
+ 128 - 1);
3974 /* FIXME: come up with a sane story on the wraparound case
3975 (which of course cnanot happen, but still..) */
3976 /* tl_assert(a_lo < a_hi); */ // paranoia: detect overflow
3977 if (LIKELY(a_hi
<= MAX_PRIMARY_ADDRESS
)) {
3978 /* Now we know the entire range is within the main primary map. */
3979 UWord pm_off_lo
= get_primary_map_low_offset(a_lo
);
3980 UWord pm_off_hi
= get_primary_map_low_offset(a_hi
);
3981 if (LIKELY(pm_off_lo
== pm_off_hi
)) {
3982 /* Now we know that the entire address range falls within a
3983 single secondary map, and that that secondary 'lives' in
3984 the main primary map. */
3985 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_16
);
3986 SecMap
* sm
= get_secmap_for_writing_low(a_lo
);
3987 UWord v_off
= SM_OFF(a_lo
);
3988 UInt
* w32
= ASSUME_ALIGNED(UInt
*, &sm
->vabits8
[v_off
]);
3989 w32
[ 0] = VA_BITS32_UNDEFINED
;
3990 w32
[ 1] = VA_BITS32_UNDEFINED
;
3991 w32
[ 2] = VA_BITS32_UNDEFINED
;
3992 w32
[ 3] = VA_BITS32_UNDEFINED
;
3993 w32
[ 4] = VA_BITS32_UNDEFINED
;
3994 w32
[ 5] = VA_BITS32_UNDEFINED
;
3995 w32
[ 6] = VA_BITS32_UNDEFINED
;
3996 w32
[ 7] = VA_BITS32_UNDEFINED
;
4002 /* The same, but for when base is 8 % 16, which is the situation
4003 with RSP for amd64-ELF immediately after call instructions.
4005 if (LIKELY( VG_IS_16_ALIGNED(base
+8) )) { // restricts to 8 aligned
4006 /* Now we know the address range is suitably sized and aligned. */
4007 UWord a_lo
= (UWord
)(base
);
4008 UWord a_hi
= (UWord
)(base
+ 128 - 1);
4009 /* FIXME: come up with a sane story on the wraparound case
4010 (which of course cnanot happen, but still..) */
4011 /* tl_assert(a_lo < a_hi); */ // paranoia: detect overflow
4012 if (LIKELY(a_hi
<= MAX_PRIMARY_ADDRESS
)) {
4013 /* Now we know the entire range is within the main primary map. */
4014 UWord pm_off_lo
= get_primary_map_low_offset(a_lo
);
4015 UWord pm_off_hi
= get_primary_map_low_offset(a_hi
);
4016 if (LIKELY(pm_off_lo
== pm_off_hi
)) {
4017 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_8
);
4018 /* Now we know that the entire address range falls within a
4019 single secondary map, and that that secondary 'lives' in
4020 the main primary map. */
4021 SecMap
* sm
= get_secmap_for_writing_low(a_lo
);
4022 UWord v_off16
= SM_OFF_16(a_lo
);
4023 UShort
* w16
= &sm
->vabits16
[v_off16
];
4024 UInt
* w32
= ASSUME_ALIGNED(UInt
*, &w16
[1]);
4025 /* The following assertion is commented out for obvious
4026 performance reasons, but was verified as valid when
4027 running the entire testsuite and also Firefox. */
4028 /* tl_assert(VG_IS_4_ALIGNED(w32)); */
4029 w16
[ 0] = VA_BITS16_UNDEFINED
; // w16[0]
4030 w32
[ 0] = VA_BITS32_UNDEFINED
; // w16[1,2]
4031 w32
[ 1] = VA_BITS32_UNDEFINED
; // w16[3,4]
4032 w32
[ 2] = VA_BITS32_UNDEFINED
; // w16[5,6]
4033 w32
[ 3] = VA_BITS32_UNDEFINED
; // w16[7,8]
4034 w32
[ 4] = VA_BITS32_UNDEFINED
; // w16[9,10]
4035 w32
[ 5] = VA_BITS32_UNDEFINED
; // w16[11,12]
4036 w32
[ 6] = VA_BITS32_UNDEFINED
; // w16[13,14]
4037 w16
[15] = VA_BITS16_UNDEFINED
; // w16[15]
4043 /* else fall into slow case */
4044 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O_SLOWCASE
);
4045 make_mem_undefined(base
, 128);
4049 /*------------------------------------------------------------*/
4050 /*--- Checking memory ---*/
4051 /*------------------------------------------------------------*/
4062 /* Check permissions for address range. If inadequate permissions
4063 exist, *bad_addr is set to the offending address, so the caller can
4066 /* Returns True if [a .. a+len) is not addressible. Otherwise,
4067 returns False, and if bad_addr is non-NULL, sets *bad_addr to
4068 indicate the lowest failing address. Functions below are
4070 Bool
MC_(check_mem_is_noaccess
) ( Addr a
, SizeT len
, Addr
* bad_addr
)
4075 PROF_EVENT(MCPE_CHECK_MEM_IS_NOACCESS
);
4076 for (i
= 0; i
< len
; i
++) {
4077 PROF_EVENT(MCPE_CHECK_MEM_IS_NOACCESS_LOOP
);
4078 vabits2
= get_vabits2(a
);
4079 if (VA_BITS2_NOACCESS
!= vabits2
) {
4080 if (bad_addr
!= NULL
) *bad_addr
= a
;
4088 static Bool
is_mem_addressable ( Addr a
, SizeT len
,
4089 /*OUT*/Addr
* bad_addr
)
4094 PROF_EVENT(MCPE_IS_MEM_ADDRESSABLE
);
4095 for (i
= 0; i
< len
; i
++) {
4096 PROF_EVENT(MCPE_IS_MEM_ADDRESSABLE_LOOP
);
4097 vabits2
= get_vabits2(a
);
4098 if (VA_BITS2_NOACCESS
== vabits2
) {
4099 if (bad_addr
!= NULL
) *bad_addr
= a
;
4107 static MC_ReadResult
is_mem_defined ( Addr a
, SizeT len
,
4108 /*OUT*/Addr
* bad_addr
,
4114 PROF_EVENT(MCPE_IS_MEM_DEFINED
);
4115 DEBUG("is_mem_defined\n");
4117 if (otag
) *otag
= 0;
4118 if (bad_addr
) *bad_addr
= 0;
4119 for (i
= 0; i
< len
; i
++) {
4120 PROF_EVENT(MCPE_IS_MEM_DEFINED_LOOP
);
4121 vabits2
= get_vabits2(a
);
4122 if (VA_BITS2_DEFINED
!= vabits2
) {
4123 // Error! Nb: Report addressability errors in preference to
4124 // definedness errors. And don't report definedeness errors unless
4125 // --undef-value-errors=yes.
4129 if (VA_BITS2_NOACCESS
== vabits2
) {
4132 if (MC_(clo_mc_level
) >= 2) {
4133 if (otag
&& MC_(clo_mc_level
) == 3) {
4134 *otag
= MC_(helperc_b_load1
)( a
);
4145 /* Like is_mem_defined but doesn't give up at the first uninitialised
4146 byte -- the entire range is always checked. This is important for
4147 detecting errors in the case where a checked range strays into
4148 invalid memory, but that fact is not detected by the ordinary
4149 is_mem_defined(), because of an undefined section that precedes the
4150 out of range section, possibly as a result of an alignment hole in
4151 the checked data. This version always checks the entire range and
4152 can report both a definedness and an accessbility error, if
4154 static void is_mem_defined_comprehensive (
4156 /*OUT*/Bool
* errorV
, /* is there a definedness err? */
4157 /*OUT*/Addr
* bad_addrV
, /* if so where? */
4158 /*OUT*/UInt
* otagV
, /* and what's its otag? */
4159 /*OUT*/Bool
* errorA
, /* is there an addressability err? */
4160 /*OUT*/Addr
* bad_addrA
/* if so where? */
4165 Bool already_saw_errV
= False
;
4167 PROF_EVENT(MCPE_IS_MEM_DEFINED_COMPREHENSIVE
);
4168 DEBUG("is_mem_defined_comprehensive\n");
4170 tl_assert(!(*errorV
|| *errorA
));
4172 for (i
= 0; i
< len
; i
++) {
4173 PROF_EVENT(MCPE_IS_MEM_DEFINED_COMPREHENSIVE_LOOP
);
4174 vabits2
= get_vabits2(a
);
4176 case VA_BITS2_DEFINED
:
4179 case VA_BITS2_UNDEFINED
:
4180 case VA_BITS2_PARTDEFINED
:
4181 if (!already_saw_errV
) {
4184 if (MC_(clo_mc_level
) == 3) {
4185 *otagV
= MC_(helperc_b_load1
)( a
);
4189 already_saw_errV
= True
;
4191 a
++; /* keep going */
4193 case VA_BITS2_NOACCESS
:
4196 return; /* give up now. */
4204 /* Check a zero-terminated ascii string. Tricky -- don't want to
4205 examine the actual bytes, to find the end, until we're sure it is
4208 static Bool
mc_is_defined_asciiz ( Addr a
, Addr
* bad_addr
, UInt
* otag
)
4212 PROF_EVENT(MCPE_IS_DEFINED_ASCIIZ
);
4213 DEBUG("mc_is_defined_asciiz\n");
4215 if (otag
) *otag
= 0;
4216 if (bad_addr
) *bad_addr
= 0;
4218 PROF_EVENT(MCPE_IS_DEFINED_ASCIIZ_LOOP
);
4219 vabits2
= get_vabits2(a
);
4220 if (VA_BITS2_DEFINED
!= vabits2
) {
4221 // Error! Nb: Report addressability errors in preference to
4222 // definedness errors. And don't report definedeness errors unless
4223 // --undef-value-errors=yes.
4227 if (VA_BITS2_NOACCESS
== vabits2
) {
4230 if (MC_(clo_mc_level
) >= 2) {
4231 if (otag
&& MC_(clo_mc_level
) == 3) {
4232 *otag
= MC_(helperc_b_load1
)( a
);
4237 /* Ok, a is safe to read. */
4238 if (* ((UChar
*)a
) == 0) {
4246 /*------------------------------------------------------------*/
4247 /*--- Memory event handlers ---*/
4248 /*------------------------------------------------------------*/
4251 void check_mem_is_addressable ( CorePart part
, ThreadId tid
, const HChar
* s
,
4252 Addr base
, SizeT size
)
4255 Bool ok
= is_mem_addressable ( base
, size
, &bad_addr
);
4259 case Vg_CoreSysCall
:
4260 MC_(record_memparam_error
) ( tid
, bad_addr
,
4261 /*isAddrErr*/True
, s
, 0/*otag*/ );
4265 MC_(record_core_mem_error
)( tid
, s
);
4269 VG_(tool_panic
)("check_mem_is_addressable: unexpected CorePart");
4275 void check_mem_is_defined ( CorePart part
, ThreadId tid
, const HChar
* s
,
4276 Addr base
, SizeT size
)
4280 MC_ReadResult res
= is_mem_defined ( base
, size
, &bad_addr
, &otag
);
4283 Bool isAddrErr
= ( MC_AddrErr
== res
? True
: False
);
4286 case Vg_CoreSysCall
:
4287 MC_(record_memparam_error
) ( tid
, bad_addr
, isAddrErr
, s
,
4288 isAddrErr
? 0 : otag
);
4291 case Vg_CoreSysCallArgInMem
:
4292 MC_(record_regparam_error
) ( tid
, s
, otag
);
4295 /* If we're being asked to jump to a silly address, record an error
4296 message before potentially crashing the entire system. */
4297 case Vg_CoreTranslate
:
4298 MC_(record_jump_error
)( tid
, bad_addr
);
4302 VG_(tool_panic
)("check_mem_is_defined: unexpected CorePart");
4308 void check_mem_is_defined_asciiz ( CorePart part
, ThreadId tid
,
4309 const HChar
* s
, Addr str
)
4312 Addr bad_addr
= 0; // shut GCC up
4315 tl_assert(part
== Vg_CoreSysCall
);
4316 res
= mc_is_defined_asciiz ( (Addr
)str
, &bad_addr
, &otag
);
4318 Bool isAddrErr
= ( MC_AddrErr
== res
? True
: False
);
4319 MC_(record_memparam_error
) ( tid
, bad_addr
, isAddrErr
, s
,
4320 isAddrErr
? 0 : otag
);
4324 /* Handling of mmap and mprotect is not as simple as it seems.
4326 The underlying semantics are that memory obtained from mmap is
4327 always initialised, but may be inaccessible. And changes to the
4328 protection of memory do not change its contents and hence not its
4329 definedness state. Problem is we can't model
4330 inaccessible-but-with-some-definedness state; once we mark memory
4331 as inaccessible we lose all info about definedness, and so can't
4332 restore that if it is later made accessible again.
4334 One obvious thing to do is this:
4336 mmap/mprotect NONE -> noaccess
4337 mmap/mprotect other -> defined
4339 The problem case here is: taking accessible memory, writing
4340 uninitialised data to it, mprotecting it NONE and later mprotecting
4341 it back to some accessible state causes the undefinedness to be
4344 A better proposal is:
4346 (1) mmap NONE -> make noaccess
4347 (2) mmap other -> make defined
4349 (3) mprotect NONE -> # no change
4350 (4) mprotect other -> change any "noaccess" to "defined"
4352 (2) is OK because memory newly obtained from mmap really is defined
4353 (zeroed out by the kernel -- doing anything else would
4354 constitute a massive security hole.)
4356 (1) is OK because the only way to make the memory usable is via
4357 (4), in which case we also wind up correctly marking it all as
4360 (3) is the weak case. We choose not to change memory state.
4361 (presumably the range is in some mixture of "defined" and
4362 "undefined", viz, accessible but with arbitrary V bits). Doing
4363 nothing means we retain the V bits, so that if the memory is
4364 later mprotected "other", the V bits remain unchanged, so there
4365 can be no false negatives. The bad effect is that if there's
4366 an access in the area, then MC cannot warn; but at least we'll
4367 get a SEGV to show, so it's better than nothing.
4369 Consider the sequence (3) followed by (4). Any memory that was
4370 "defined" or "undefined" previously retains its state (as
4371 required). Any memory that was "noaccess" before can only have
4372 been made that way by (1), and so it's OK to change it to
4375 See https://bugs.kde.org/show_bug.cgi?id=205541
4376 and https://bugs.kde.org/show_bug.cgi?id=210268
4379 void mc_new_mem_mmap ( Addr a
, SizeT len
, Bool rr
, Bool ww
, Bool xx
,
4382 if (rr
|| ww
|| xx
) {
4383 /* (2) mmap/mprotect other -> defined */
4384 MC_(make_mem_defined
)(a
, len
);
4386 /* (1) mmap/mprotect NONE -> noaccess */
4387 MC_(make_mem_noaccess
)(a
, len
);
4392 void mc_new_mem_mprotect ( Addr a
, SizeT len
, Bool rr
, Bool ww
, Bool xx
)
4394 if (rr
|| ww
|| xx
) {
4395 /* (4) mprotect other -> change any "noaccess" to "defined" */
4396 make_mem_defined_if_noaccess(a
, len
);
4398 /* (3) mprotect NONE -> # no change */
4405 void mc_new_mem_startup( Addr a
, SizeT len
,
4406 Bool rr
, Bool ww
, Bool xx
, ULong di_handle
)
4408 // Because code is defined, initialised variables get put in the data
4409 // segment and are defined, and uninitialised variables get put in the
4410 // bss segment and are auto-zeroed (and so defined).
4412 // It's possible that there will be padding between global variables.
4413 // This will also be auto-zeroed, and marked as defined by Memcheck. If
4414 // a program uses it, Memcheck will not complain. This is arguably a
4415 // false negative, but it's a grey area -- the behaviour is defined (the
4416 // padding is zeroed) but it's probably not what the user intended. And
4417 // we can't avoid it.
4419 // Note: we generally ignore RWX permissions, because we can't track them
4420 // without requiring more than one A bit which would slow things down a
4421 // lot. But on Darwin the 0th page is mapped but !R and !W and !X.
4422 // So we mark any such pages as "unaddressable".
4423 DEBUG("mc_new_mem_startup(%#lx, %llu, rr=%u, ww=%u, xx=%u)\n",
4424 a
, (ULong
)len
, rr
, ww
, xx
);
4425 mc_new_mem_mmap(a
, len
, rr
, ww
, xx
, di_handle
);
4429 void mc_post_mem_write(CorePart part
, ThreadId tid
, Addr a
, SizeT len
)
4431 MC_(make_mem_defined
)(a
, len
);
4435 /*------------------------------------------------------------*/
4436 /*--- Register event handlers ---*/
4437 /*------------------------------------------------------------*/
4439 /* Try and get a nonzero origin for the guest state section of thread
4440 tid characterised by (offset,size). Return 0 if nothing to show
4442 static UInt
mb_get_origin_for_guest_offset ( ThreadId tid
,
4443 Int offset
, SizeT size
)
4448 sh2off
= MC_(get_otrack_shadow_offset
)( offset
, size
);
4450 return 0; /* This piece of guest state is not tracked */
4451 tl_assert(sh2off
>= 0);
4452 tl_assert(0 == (sh2off
% 4));
4453 area
[0] = 0x31313131;
4454 area
[2] = 0x27272727;
4455 VG_(get_shadow_regs_area
)( tid
, (UChar
*)&area
[1], 2/*shadowno*/,sh2off
,4 );
4456 tl_assert(area
[0] == 0x31313131);
4457 tl_assert(area
[2] == 0x27272727);
4463 /* When some chunk of guest state is written, mark the corresponding
4464 shadow area as valid. This is used to initialise arbitrarily large
4465 chunks of guest state, hence the _SIZE value, which has to be as
4466 big as the biggest guest state.
4468 static void mc_post_reg_write ( CorePart part
, ThreadId tid
,
4469 PtrdiffT offset
, SizeT size
)
4471 # define MAX_REG_WRITE_SIZE 1744
4472 UChar area
[MAX_REG_WRITE_SIZE
];
4473 tl_assert(size
<= MAX_REG_WRITE_SIZE
);
4474 VG_(memset
)(area
, V_BITS8_DEFINED
, size
);
4475 VG_(set_shadow_regs_area
)( tid
, 1/*shadowNo*/,offset
,size
, area
);
4476 # undef MAX_REG_WRITE_SIZE
4480 void mc_post_reg_write_clientcall ( ThreadId tid
,
4481 PtrdiffT offset
, SizeT size
, Addr f
)
4483 mc_post_reg_write(/*dummy*/0, tid
, offset
, size
);
4486 /* Look at the definedness of the guest's shadow state for
4487 [offset, offset+len). If any part of that is undefined, record
4490 static void mc_pre_reg_read ( CorePart part
, ThreadId tid
, const HChar
* s
,
4491 PtrdiffT offset
, SizeT size
)
4498 tl_assert(size
<= 16);
4500 VG_(get_shadow_regs_area
)( tid
, area
, 1/*shadowNo*/,offset
,size
);
4503 for (i
= 0; i
< size
; i
++) {
4504 if (area
[i
] != V_BITS8_DEFINED
) {
4513 /* We've found some undefinedness. See if we can also find an
4515 otag
= mb_get_origin_for_guest_offset( tid
, offset
, size
);
4516 MC_(record_regparam_error
) ( tid
, s
, otag
);
4520 /*------------------------------------------------------------*/
4521 /*--- Register-memory event handlers ---*/
4522 /*------------------------------------------------------------*/
4524 static void mc_copy_mem_to_reg ( CorePart part
, ThreadId tid
, Addr a
,
4525 PtrdiffT guest_state_offset
, SizeT size
)
4533 for (i
= 0; i
< size
; i
++) {
4534 get_vbits8( a
+i
, &vbits8
);
4535 VG_(set_shadow_regs_area
)( tid
, 1/*shadowNo*/, guest_state_offset
+i
,
4539 if (MC_(clo_mc_level
) != 3)
4542 /* Track origins. */
4543 offset
= MC_(get_otrack_shadow_offset
)( guest_state_offset
, size
);
4549 d32
= MC_(helperc_b_load1
)( a
);
4552 d32
= MC_(helperc_b_load2
)( a
);
4555 d32
= MC_(helperc_b_load4
)( a
);
4558 d32
= MC_(helperc_b_load8
)( a
);
4561 d32
= MC_(helperc_b_load16
)( a
);
4564 d32
= MC_(helperc_b_load32
)( a
);
4570 VG_(set_shadow_regs_area
)( tid
, 2/*shadowNo*/, offset
, 4, (UChar
*)&d32
);
4573 static void mc_copy_reg_to_mem ( CorePart part
, ThreadId tid
,
4574 PtrdiffT guest_state_offset
, Addr a
,
4583 for (i
= 0; i
< size
; i
++) {
4584 VG_(get_shadow_regs_area
)( tid
, &vbits8
, 1/*shadowNo*/,
4585 guest_state_offset
+i
, 1 );
4586 set_vbits8( a
+i
, vbits8
);
4589 if (MC_(clo_mc_level
) != 3)
4592 /* Track origins. */
4593 offset
= MC_(get_otrack_shadow_offset
)( guest_state_offset
, size
);
4597 VG_(get_shadow_regs_area
)( tid
, (UChar
*)&d32
, 2/*shadowNo*/, offset
, 4 );
4600 MC_(helperc_b_store1
)( a
, d32
);
4603 MC_(helperc_b_store2
)( a
, d32
);
4606 MC_(helperc_b_store4
)( a
, d32
);
4609 MC_(helperc_b_store8
)( a
, d32
);
4612 MC_(helperc_b_store16
)( a
, d32
);
4615 MC_(helperc_b_store32
)( a
, d32
);
4623 /*------------------------------------------------------------*/
4624 /*--- Some static assertions ---*/
4625 /*------------------------------------------------------------*/
4627 /* The handwritten assembly helpers below have baked-in assumptions
4628 about various constant values. These assertions attempt to make
4629 that a bit safer by checking those values and flagging changes that
4630 would make the assembly invalid. Not perfect but it's better than
4633 STATIC_ASSERT(SM_CHUNKS
* 4 == 65536);
4635 STATIC_ASSERT(VA_BITS8_DEFINED
== 0xAA);
4636 STATIC_ASSERT(VA_BITS8_UNDEFINED
== 0x55);
4638 STATIC_ASSERT(V_BITS32_DEFINED
== 0x00000000);
4639 STATIC_ASSERT(V_BITS32_UNDEFINED
== 0xFFFFFFFF);
4641 STATIC_ASSERT(VA_BITS4_DEFINED
== 0xA);
4642 STATIC_ASSERT(VA_BITS4_UNDEFINED
== 0x5);
4644 STATIC_ASSERT(V_BITS16_DEFINED
== 0x0000);
4645 STATIC_ASSERT(V_BITS16_UNDEFINED
== 0xFFFF);
4647 STATIC_ASSERT(VA_BITS2_DEFINED
== 2);
4648 STATIC_ASSERT(VA_BITS2_UNDEFINED
== 1);
4650 STATIC_ASSERT(V_BITS8_DEFINED
== 0x00);
4651 STATIC_ASSERT(V_BITS8_UNDEFINED
== 0xFF);
4654 /*------------------------------------------------------------*/
4655 /*--- Functions called directly from generated code: ---*/
4656 /*--- Load/store handlers. ---*/
4657 /*------------------------------------------------------------*/
4659 /* Types: LOADV32, LOADV16, LOADV8 are:
4661 so they return 32-bits on 32-bit machines and 64-bits on
4662 64-bit machines. Addr has the same size as a host word.
4664 LOADV64 is always ULong fn ( Addr a )
4666 Similarly for STOREV8, STOREV16, STOREV32, the supplied vbits
4667 are a UWord, and for STOREV64 they are a ULong.
4670 /* If any part of '_a' indicated by the mask is 1, either '_a' is not
4671 naturally '_sz/8'-aligned, or it exceeds the range covered by the
4672 primary map. This is all very tricky (and important!), so let's
4673 work through the maths by hand (below), *and* assert for these
4674 values at startup. */
4675 #define MASK(_szInBytes) \
4676 ( ~((0x10000UL-(_szInBytes)) | ((N_PRIMARY_MAP-1) << 16)) )
4678 /* MASK only exists so as to define this macro. */
4679 #define UNALIGNED_OR_HIGH(_a,_szInBits) \
4680 ((_a) & MASK((_szInBits>>3)))
4682 /* On a 32-bit machine:
4684 N_PRIMARY_BITS == 16, so
4685 N_PRIMARY_MAP == 0x10000, so
4686 N_PRIMARY_MAP-1 == 0xFFFF, so
4687 (N_PRIMARY_MAP-1) << 16 == 0xFFFF0000, and so
4689 MASK(1) = ~ ( (0x10000 - 1) | 0xFFFF0000 )
4690 = ~ ( 0xFFFF | 0xFFFF0000 )
4694 MASK(2) = ~ ( (0x10000 - 2) | 0xFFFF0000 )
4695 = ~ ( 0xFFFE | 0xFFFF0000 )
4699 MASK(4) = ~ ( (0x10000 - 4) | 0xFFFF0000 )
4700 = ~ ( 0xFFFC | 0xFFFF0000 )
4704 MASK(8) = ~ ( (0x10000 - 8) | 0xFFFF0000 )
4705 = ~ ( 0xFFF8 | 0xFFFF0000 )
4709 Hence in the 32-bit case, "a & MASK(1/2/4/8)" is a nonzero value
4710 precisely when a is not 1/2/4/8-bytes aligned. And obviously, for
4711 the 1-byte alignment case, it is always a zero value, since MASK(1)
4712 is zero. All as expected.
4714 On a 64-bit machine, it's more complex, since we're testing
4715 simultaneously for misalignment and for the address being at or
4718 N_PRIMARY_BITS == 20, so
4719 N_PRIMARY_MAP == 0x100000, so
4720 N_PRIMARY_MAP-1 == 0xFFFFF, so
4721 (N_PRIMARY_MAP-1) << 16 == 0xF'FFFF'0000, and so
4723 MASK(1) = ~ ( (0x10000 - 1) | 0xF'FFFF'0000 )
4724 = ~ ( 0xFFFF | 0xF'FFFF'0000 )
4726 = 0xFFFF'FFF0'0000'0000
4728 MASK(2) = ~ ( (0x10000 - 2) | 0xF'FFFF'0000 )
4729 = ~ ( 0xFFFE | 0xF'FFFF'0000 )
4731 = 0xFFFF'FFF0'0000'0001
4733 MASK(4) = ~ ( (0x10000 - 4) | 0xF'FFFF'0000 )
4734 = ~ ( 0xFFFC | 0xF'FFFF'0000 )
4736 = 0xFFFF'FFF0'0000'0003
4738 MASK(8) = ~ ( (0x10000 - 8) | 0xF'FFFF'0000 )
4739 = ~ ( 0xFFF8 | 0xF'FFFF'0000 )
4741 = 0xFFFF'FFF0'0000'0007
4744 /*------------------------------------------------------------*/
4745 /*--- LOADV256 and LOADV128 ---*/
4746 /*------------------------------------------------------------*/
4749 void mc_LOADV_128_or_256 ( /*OUT*/ULong
* res
,
4750 Addr a
, SizeT nBits
, Bool isBigEndian
)
4752 PROF_EVENT(MCPE_LOADV_128_OR_256
);
4754 #ifndef PERF_FAST_LOADV
4755 mc_LOADV_128_or_256_slow( res
, a
, nBits
, isBigEndian
);
4759 UWord sm_off16
, vabits16
, j
;
4760 UWord nBytes
= nBits
/ 8;
4761 UWord nULongs
= nBytes
/ 8;
4764 if (UNLIKELY( UNALIGNED_OR_HIGH(a
,nBits
) )) {
4765 PROF_EVENT(MCPE_LOADV_128_OR_256_SLOW1
);
4766 mc_LOADV_128_or_256_slow( res
, a
, nBits
, isBigEndian
);
4770 /* Handle common cases quickly: a (and a+8 and a+16 etc.) is
4771 suitably aligned, is mapped, and addressible. */
4772 for (j
= 0; j
< nULongs
; j
++) {
4773 sm
= get_secmap_for_reading_low(a
+ 8*j
);
4774 sm_off16
= SM_OFF_16(a
+ 8*j
);
4775 vabits16
= sm
->vabits16
[sm_off16
];
4777 // Convert V bits from compact memory form to expanded
4779 if (LIKELY(vabits16
== VA_BITS16_DEFINED
)) {
4780 res
[j
] = V_BITS64_DEFINED
;
4781 } else if (LIKELY(vabits16
== VA_BITS16_UNDEFINED
)) {
4782 res
[j
] = V_BITS64_UNDEFINED
;
4784 /* Slow case: some block of 8 bytes are not all-defined or
4786 PROF_EVENT(MCPE_LOADV_128_OR_256_SLOW2
);
4787 mc_LOADV_128_or_256_slow( res
, a
, nBits
, isBigEndian
);
4796 VG_REGPARM(2) void MC_(helperc_LOADV256be
) ( /*OUT*/V256
* res
, Addr a
)
4798 mc_LOADV_128_or_256(&res
->w64
[0], a
, 256, True
);
4800 VG_REGPARM(2) void MC_(helperc_LOADV256le
) ( /*OUT*/V256
* res
, Addr a
)
4802 mc_LOADV_128_or_256(&res
->w64
[0], a
, 256, False
);
4805 VG_REGPARM(2) void MC_(helperc_LOADV128be
) ( /*OUT*/V128
* res
, Addr a
)
4807 mc_LOADV_128_or_256(&res
->w64
[0], a
, 128, True
);
4809 VG_REGPARM(2) void MC_(helperc_LOADV128le
) ( /*OUT*/V128
* res
, Addr a
)
4811 mc_LOADV_128_or_256(&res
->w64
[0], a
, 128, False
);
4814 /*------------------------------------------------------------*/
4816 /*------------------------------------------------------------*/
4819 ULong
mc_LOADV64 ( Addr a
, Bool isBigEndian
)
4821 PROF_EVENT(MCPE_LOADV64
);
4823 #ifndef PERF_FAST_LOADV
4824 return mc_LOADVn_slow( a
, 64, isBigEndian
);
4827 UWord sm_off16
, vabits16
;
4830 if (UNLIKELY( UNALIGNED_OR_HIGH(a
,64) )) {
4831 PROF_EVENT(MCPE_LOADV64_SLOW1
);
4832 return (ULong
)mc_LOADVn_slow( a
, 64, isBigEndian
);
4835 sm
= get_secmap_for_reading_low(a
);
4836 sm_off16
= SM_OFF_16(a
);
4837 vabits16
= sm
->vabits16
[sm_off16
];
4839 // Handle common case quickly: a is suitably aligned, is mapped, and
4841 // Convert V bits from compact memory form to expanded register form.
4842 if (LIKELY(vabits16
== VA_BITS16_DEFINED
)) {
4843 return V_BITS64_DEFINED
;
4844 } else if (LIKELY(vabits16
== VA_BITS16_UNDEFINED
)) {
4845 return V_BITS64_UNDEFINED
;
4847 /* Slow case: the 8 bytes are not all-defined or all-undefined. */
4848 PROF_EVENT(MCPE_LOADV64_SLOW2
);
4849 return mc_LOADVn_slow( a
, 64, isBigEndian
);
4855 // Generic for all platforms
4856 VG_REGPARM(1) ULong
MC_(helperc_LOADV64be
) ( Addr a
)
4858 return mc_LOADV64(a
, True
);
4861 // Non-generic assembly for arm32-linux
4862 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
4863 && defined(VGP_arm_linux)
4864 __asm__( /* Derived from the 32 bit assembly helper */
4867 ".global vgMemCheck_helperc_LOADV64le \n"
4868 ".type vgMemCheck_helperc_LOADV64le, %function \n"
4869 "vgMemCheck_helperc_LOADV64le: \n"
4871 " movw r3, #:lower16:primary_map \n"
4872 " bne .LLV64LEc4 \n" // if misaligned
4873 " lsr r2, r0, #16 \n"
4874 " movt r3, #:upper16:primary_map \n"
4875 " ldr r2, [r3, r2, lsl #2] \n"
4876 " uxth r1, r0 \n" // r1 is 0-(16)-0 X-(13)-X 000
4877 " movw r3, #0xAAAA \n"
4878 " lsr r1, r1, #2 \n" // r1 is 0-(16)-0 00 X-(13)-X 0
4879 " ldrh r1, [r2, r1] \n"
4880 " cmp r1, r3 \n" // 0xAAAA == VA_BITS16_DEFINED
4881 " bne .LLV64LEc0 \n" // if !all_defined
4882 " mov r1, #0x0 \n" // 0x0 == V_BITS32_DEFINED
4883 " mov r0, #0x0 \n" // 0x0 == V_BITS32_DEFINED
4886 " movw r3, #0x5555 \n"
4887 " cmp r1, r3 \n" // 0x5555 == VA_BITS16_UNDEFINED
4888 " bne .LLV64LEc4 \n" // if !all_undefined
4889 " mov r1, #0xFFFFFFFF \n" // 0xFFFFFFFF == V_BITS32_UNDEFINED
4890 " mov r0, #0xFFFFFFFF \n" // 0xFFFFFFFF == V_BITS32_UNDEFINED
4896 " bl mc_LOADVn_slow \n"
4898 ".size vgMemCheck_helperc_LOADV64le, .-vgMemCheck_helperc_LOADV64le \n"
4902 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
4903 && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
4907 ".global vgMemCheck_helperc_LOADV64le\n"
4908 ".type vgMemCheck_helperc_LOADV64le, @function\n"
4909 "vgMemCheck_helperc_LOADV64le:\n"
4910 " test $0x7, %eax\n"
4911 " jne .LLV64LE2\n" /* jump if not aligned */
4913 " movzwl %ax, %edx\n"
4914 " shr $0x10, %ecx\n"
4915 " mov primary_map(,%ecx,4), %ecx\n"
4917 " movzwl (%ecx,%edx,2), %edx\n"
4918 " cmp $0xaaaa, %edx\n"
4919 " jne .LLV64LE1\n" /* jump if not all defined */
4920 " xor %eax, %eax\n" /* return 0 in edx:eax */
4924 " cmp $0x5555, %edx\n"
4925 " jne .LLV64LE2\n" /* jump if not all undefined */
4926 " or $0xffffffff, %eax\n" /* else return all bits set in edx:eax */
4927 " or $0xffffffff, %edx\n"
4930 " xor %ecx, %ecx\n" /* tail call to mc_LOADVn_slow(a, 64, 0) */
4932 " jmp mc_LOADVn_slow\n"
4933 ".size vgMemCheck_helperc_LOADV64le, .-vgMemCheck_helperc_LOADV64le\n"
4938 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
4939 VG_REGPARM(1) ULong
MC_(helperc_LOADV64le
) ( Addr a
)
4941 return mc_LOADV64(a
, False
);
4945 /*------------------------------------------------------------*/
4946 /*--- STOREV64 ---*/
4947 /*------------------------------------------------------------*/
4950 void mc_STOREV64 ( Addr a
, ULong vbits64
, Bool isBigEndian
)
4952 PROF_EVENT(MCPE_STOREV64
);
4954 #ifndef PERF_FAST_STOREV
4955 // XXX: this slow case seems to be marginally faster than the fast case!
4956 // Investigate further.
4957 mc_STOREVn_slow( a
, 64, vbits64
, isBigEndian
);
4960 UWord sm_off16
, vabits16
;
4963 if (UNLIKELY( UNALIGNED_OR_HIGH(a
,64) )) {
4964 PROF_EVENT(MCPE_STOREV64_SLOW1
);
4965 mc_STOREVn_slow( a
, 64, vbits64
, isBigEndian
);
4969 sm
= get_secmap_for_reading_low(a
);
4970 sm_off16
= SM_OFF_16(a
);
4971 vabits16
= sm
->vabits16
[sm_off16
];
4973 // To understand the below cleverness, see the extensive comments
4974 // in MC_(helperc_STOREV8).
4975 if (LIKELY(V_BITS64_DEFINED
== vbits64
)) {
4976 if (LIKELY(vabits16
== (UShort
)VA_BITS16_DEFINED
)) {
4979 if (!is_distinguished_sm(sm
) && VA_BITS16_UNDEFINED
== vabits16
) {
4980 sm
->vabits16
[sm_off16
] = VA_BITS16_DEFINED
;
4983 PROF_EVENT(MCPE_STOREV64_SLOW2
);
4984 mc_STOREVn_slow( a
, 64, vbits64
, isBigEndian
);
4987 if (V_BITS64_UNDEFINED
== vbits64
) {
4988 if (vabits16
== (UShort
)VA_BITS16_UNDEFINED
) {
4991 if (!is_distinguished_sm(sm
) && VA_BITS16_DEFINED
== vabits16
) {
4992 sm
->vabits16
[sm_off16
] = VA_BITS16_UNDEFINED
;
4995 PROF_EVENT(MCPE_STOREV64_SLOW3
);
4996 mc_STOREVn_slow( a
, 64, vbits64
, isBigEndian
);
5000 PROF_EVENT(MCPE_STOREV64_SLOW4
);
5001 mc_STOREVn_slow( a
, 64, vbits64
, isBigEndian
);
5006 VG_REGPARM(1) void MC_(helperc_STOREV64be
) ( Addr a
, ULong vbits64
)
5008 mc_STOREV64(a
, vbits64
, True
);
5010 VG_REGPARM(1) void MC_(helperc_STOREV64le
) ( Addr a
, ULong vbits64
)
5012 mc_STOREV64(a
, vbits64
, False
);
5015 /*------------------------------------------------------------*/
5017 /*------------------------------------------------------------*/
5020 UWord
mc_LOADV32 ( Addr a
, Bool isBigEndian
)
5022 PROF_EVENT(MCPE_LOADV32
);
5024 #ifndef PERF_FAST_LOADV
5025 return (UWord
)mc_LOADVn_slow( a
, 32, isBigEndian
);
5028 UWord sm_off
, vabits8
;
5031 if (UNLIKELY( UNALIGNED_OR_HIGH(a
,32) )) {
5032 PROF_EVENT(MCPE_LOADV32_SLOW1
);
5033 return (UWord
)mc_LOADVn_slow( a
, 32, isBigEndian
);
5036 sm
= get_secmap_for_reading_low(a
);
5038 vabits8
= sm
->vabits8
[sm_off
];
5040 // Handle common case quickly: a is suitably aligned, is mapped, and the
5041 // entire word32 it lives in is addressible.
5042 // Convert V bits from compact memory form to expanded register form.
5043 // For 64-bit platforms, set the high 32 bits of retval to 1 (undefined).
5044 // Almost certainly not necessary, but be paranoid.
5045 if (LIKELY(vabits8
== VA_BITS8_DEFINED
)) {
5046 return ((UWord
)0xFFFFFFFF00000000ULL
| (UWord
)V_BITS32_DEFINED
);
5047 } else if (LIKELY(vabits8
== VA_BITS8_UNDEFINED
)) {
5048 return ((UWord
)0xFFFFFFFF00000000ULL
| (UWord
)V_BITS32_UNDEFINED
);
5050 /* Slow case: the 4 bytes are not all-defined or all-undefined. */
5051 PROF_EVENT(MCPE_LOADV32_SLOW2
);
5052 return (UWord
)mc_LOADVn_slow( a
, 32, isBigEndian
);
5058 // Generic for all platforms
5059 VG_REGPARM(1) UWord
MC_(helperc_LOADV32be
) ( Addr a
)
5061 return mc_LOADV32(a
, True
);
5064 // Non-generic assembly for arm32-linux
5065 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5066 && defined(VGP_arm_linux)
5067 __asm__( /* Derived from NCode template */
5070 ".global vgMemCheck_helperc_LOADV32le \n"
5071 ".type vgMemCheck_helperc_LOADV32le, %function \n"
5072 "vgMemCheck_helperc_LOADV32le: \n"
5073 " tst r0, #3 \n" // 1
5074 " movw r3, #:lower16:primary_map \n" // 1
5075 " bne .LLV32LEc4 \n" // 2 if misaligned
5076 " lsr r2, r0, #16 \n" // 3
5077 " movt r3, #:upper16:primary_map \n" // 3
5078 " ldr r2, [r3, r2, lsl #2] \n" // 4
5079 " uxth r1, r0 \n" // 4
5080 " ldrb r1, [r2, r1, lsr #2] \n" // 5
5081 " cmp r1, #0xAA \n" // 6 0xAA == VA_BITS8_DEFINED
5082 " bne .LLV32LEc0 \n" // 7 if !all_defined
5083 " mov r0, #0x0 \n" // 8 0x0 == V_BITS32_DEFINED
5086 " cmp r1, #0x55 \n" // 0x55 == VA_BITS8_UNDEFINED
5087 " bne .LLV32LEc4 \n" // if !all_undefined
5088 " mov r0, #0xFFFFFFFF \n" // 0xFFFFFFFF == V_BITS32_UNDEFINED
5094 " bl mc_LOADVn_slow \n"
5096 ".size vgMemCheck_helperc_LOADV32le, .-vgMemCheck_helperc_LOADV32le \n"
5100 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5101 && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
5105 ".global vgMemCheck_helperc_LOADV32le\n"
5106 ".type vgMemCheck_helperc_LOADV32le, @function\n"
5107 "vgMemCheck_helperc_LOADV32le:\n"
5108 " test $0x3, %eax\n"
5109 " jnz .LLV32LE2\n" /* jump if misaligned */
5112 " mov primary_map(,%edx,4), %ecx\n"
5113 " movzwl %ax, %edx\n"
5115 " movzbl (%ecx,%edx,1), %edx\n"
5116 " cmp $0xaa, %edx\n" /* compare to VA_BITS8_DEFINED */
5117 " jne .LLV32LE1\n" /* jump if not completely defined */
5118 " xor %eax, %eax\n" /* else return V_BITS32_DEFINED */
5121 " cmp $0x55, %edx\n" /* compare to VA_BITS8_UNDEFINED */
5122 " jne .LLV32LE2\n" /* jump if not completely undefined */
5123 " or $0xffffffff, %eax\n" /* else return V_BITS32_UNDEFINED */
5126 " xor %ecx, %ecx\n" /* tail call mc_LOADVn_slow(a, 32, 0) */
5128 " jmp mc_LOADVn_slow\n"
5129 ".size vgMemCheck_helperc_LOADV32le, .-vgMemCheck_helperc_LOADV32le\n"
5134 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
5135 VG_REGPARM(1) UWord
MC_(helperc_LOADV32le
) ( Addr a
)
5137 return mc_LOADV32(a
, False
);
5141 /*------------------------------------------------------------*/
5142 /*--- STOREV32 ---*/
5143 /*------------------------------------------------------------*/
5146 void mc_STOREV32 ( Addr a
, UWord vbits32
, Bool isBigEndian
)
5148 PROF_EVENT(MCPE_STOREV32
);
5150 #ifndef PERF_FAST_STOREV
5151 mc_STOREVn_slow( a
, 32, (ULong
)vbits32
, isBigEndian
);
5154 UWord sm_off
, vabits8
;
5157 if (UNLIKELY( UNALIGNED_OR_HIGH(a
,32) )) {
5158 PROF_EVENT(MCPE_STOREV32_SLOW1
);
5159 mc_STOREVn_slow( a
, 32, (ULong
)vbits32
, isBigEndian
);
5163 sm
= get_secmap_for_reading_low(a
);
5165 vabits8
= sm
->vabits8
[sm_off
];
5167 // To understand the below cleverness, see the extensive comments
5168 // in MC_(helperc_STOREV8).
5169 if (LIKELY(V_BITS32_DEFINED
== vbits32
)) {
5170 if (LIKELY(vabits8
== (UInt
)VA_BITS8_DEFINED
)) {
5173 if (!is_distinguished_sm(sm
) && VA_BITS8_UNDEFINED
== vabits8
) {
5174 sm
->vabits8
[sm_off
] = (UInt
)VA_BITS8_DEFINED
;
5177 PROF_EVENT(MCPE_STOREV32_SLOW2
);
5178 mc_STOREVn_slow( a
, 32, (ULong
)vbits32
, isBigEndian
);
5181 if (V_BITS32_UNDEFINED
== vbits32
) {
5182 if (vabits8
== (UInt
)VA_BITS8_UNDEFINED
) {
5185 if (!is_distinguished_sm(sm
) && VA_BITS8_DEFINED
== vabits8
) {
5186 sm
->vabits8
[sm_off
] = (UInt
)VA_BITS8_UNDEFINED
;
5189 PROF_EVENT(MCPE_STOREV32_SLOW3
);
5190 mc_STOREVn_slow( a
, 32, (ULong
)vbits32
, isBigEndian
);
5194 PROF_EVENT(MCPE_STOREV32_SLOW4
);
5195 mc_STOREVn_slow( a
, 32, (ULong
)vbits32
, isBigEndian
);
5200 VG_REGPARM(2) void MC_(helperc_STOREV32be
) ( Addr a
, UWord vbits32
)
5202 mc_STOREV32(a
, vbits32
, True
);
5204 VG_REGPARM(2) void MC_(helperc_STOREV32le
) ( Addr a
, UWord vbits32
)
5206 mc_STOREV32(a
, vbits32
, False
);
5209 /*------------------------------------------------------------*/
5211 /*------------------------------------------------------------*/
5214 UWord
mc_LOADV16 ( Addr a
, Bool isBigEndian
)
5216 PROF_EVENT(MCPE_LOADV16
);
5218 #ifndef PERF_FAST_LOADV
5219 return (UWord
)mc_LOADVn_slow( a
, 16, isBigEndian
);
5222 UWord sm_off
, vabits8
;
5225 if (UNLIKELY( UNALIGNED_OR_HIGH(a
,16) )) {
5226 PROF_EVENT(MCPE_LOADV16_SLOW1
);
5227 return (UWord
)mc_LOADVn_slow( a
, 16, isBigEndian
);
5230 sm
= get_secmap_for_reading_low(a
);
5232 vabits8
= sm
->vabits8
[sm_off
];
5233 // Handle common case quickly: a is suitably aligned, is mapped, and is
5235 // Convert V bits from compact memory form to expanded register form
5236 if (LIKELY(vabits8
== VA_BITS8_DEFINED
)) { return V_BITS16_DEFINED
; }
5237 else if (LIKELY(vabits8
== VA_BITS8_UNDEFINED
)) { return V_BITS16_UNDEFINED
; }
5239 // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
5240 // the two sub-bytes.
5241 UChar vabits4
= extract_vabits4_from_vabits8(a
, vabits8
);
5242 if (vabits4
== VA_BITS4_DEFINED
) { return V_BITS16_DEFINED
; }
5243 else if (vabits4
== VA_BITS4_UNDEFINED
) { return V_BITS16_UNDEFINED
; }
5245 /* Slow case: the two bytes are not all-defined or all-undefined. */
5246 PROF_EVENT(MCPE_LOADV16_SLOW2
);
5247 return (UWord
)mc_LOADVn_slow( a
, 16, isBigEndian
);
5254 // Generic for all platforms
5255 VG_REGPARM(1) UWord
MC_(helperc_LOADV16be
) ( Addr a
)
5257 return mc_LOADV16(a
, True
);
5260 // Non-generic assembly for arm32-linux
5261 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5262 && defined(VGP_arm_linux)
5263 __asm__( /* Derived from NCode template */
5266 ".global vgMemCheck_helperc_LOADV16le \n"
5267 ".type vgMemCheck_helperc_LOADV16le, %function \n"
5268 "vgMemCheck_helperc_LOADV16le: \n" //
5270 " bne .LLV16LEc12 \n" // if misaligned
5271 " lsr r2, r0, #16 \n" // r2 = pri-map-ix
5272 " movw r3, #:lower16:primary_map \n" //
5273 " uxth r1, r0 \n" // r1 = sec-map-offB
5274 " movt r3, #:upper16:primary_map \n" //
5275 " ldr r2, [r3, r2, lsl #2] \n" // r2 = sec-map
5276 " ldrb r1, [r2, r1, lsr #2] \n" // r1 = sec-map-VABITS8
5277 " cmp r1, #0xAA \n" // r1 == VA_BITS8_DEFINED?
5278 " bne .LLV16LEc0 \n" // no, goto .LLV16LEc0
5280 " mov r0, #0xFFFFFFFF \n" //
5281 " lsl r0, r0, #16 \n" // V_BITS16_DEFINED | top16safe
5284 " cmp r1, #0x55 \n" // VA_BITS8_UNDEFINED
5285 " bne .LLV16LEc4 \n" //
5287 " mov r0, #0xFFFFFFFF \n" // V_BITS16_UNDEFINED | top16safe
5290 // r1 holds sec-map-VABITS8. r0 holds the address and is 2-aligned.
5291 // Extract the relevant 4 bits and inspect.
5292 " and r2, r0, #2 \n" // addr & 2
5293 " add r2, r2, r2 \n" // 2 * (addr & 2)
5294 " lsr r1, r1, r2 \n" // sec-map-VABITS8 >> (2 * (addr & 2))
5295 " and r1, r1, #15 \n" // (sec-map-VABITS8 >> (2 * (addr & 2))) & 15
5297 " cmp r1, #0xA \n" // VA_BITS4_DEFINED
5298 " beq .LLV16LEh9 \n" //
5300 " cmp r1, #0x5 \n" // VA_BITS4_UNDEFINED
5301 " beq .LLV16LEc2 \n" //
5303 ".LLV16LEc12: \n" //
5304 " push {r4, lr} \n" //
5306 " mov r1, #16 \n" //
5307 " bl mc_LOADVn_slow \n" //
5308 " pop {r4, pc} \n" //
5309 ".size vgMemCheck_helperc_LOADV16le, .-vgMemCheck_helperc_LOADV16le \n"
5313 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5314 && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
5318 ".global vgMemCheck_helperc_LOADV16le\n"
5319 ".type vgMemCheck_helperc_LOADV16le, @function\n"
5320 "vgMemCheck_helperc_LOADV16le:\n"
5321 " test $0x1, %eax\n"
5322 " jne .LLV16LE5\n" /* jump if not aligned */
5324 " shr $0x10, %edx\n"
5325 " mov primary_map(,%edx,4), %ecx\n"
5326 " movzwl %ax, %edx\n"
5328 " movzbl (%ecx,%edx,1), %edx\n"/* edx = VA bits for 32bit */
5329 " cmp $0xaa, %edx\n" /* compare to VA_BITS8_DEFINED */
5330 " jne .LLV16LE2\n" /* jump if not all 32bits defined */
5332 " mov $0xffff0000,%eax\n" /* V_BITS16_DEFINED | top16safe */
5335 " cmp $0x55, %edx\n" /* compare to VA_BITS8_UNDEFINED */
5336 " jne .LLV16LE4\n" /* jump if not all 32bits undefined */
5338 " or $0xffffffff,%eax\n" /* V_BITS16_UNDEFINED | top16safe */
5347 " je .LLV16LE1\n" /* jump if all 16bits are defined */
5349 " je .LLV16LE3\n" /* jump if all 16bits are undefined */
5351 " xor %ecx, %ecx\n" /* tail call mc_LOADVn_slow(a, 16, 0) */
5353 " jmp mc_LOADVn_slow\n"
5354 ".size vgMemCheck_helperc_LOADV16le, .-vgMemCheck_helperc_LOADV16le \n"
5359 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
5360 VG_REGPARM(1) UWord
MC_(helperc_LOADV16le
) ( Addr a
)
5362 return mc_LOADV16(a
, False
);
5366 /*------------------------------------------------------------*/
5367 /*--- STOREV16 ---*/
5368 /*------------------------------------------------------------*/
5370 /* True if the vabits4 in vabits8 indicate a and a+1 are accessible. */
5372 Bool
accessible_vabits4_in_vabits8 ( Addr a
, UChar vabits8
)
5375 tl_assert(VG_IS_2_ALIGNED(a
)); // Must be 2-aligned
5376 shift
= (a
& 2) << 1; // shift by 0 or 4
5377 vabits8
>>= shift
; // shift the four bits to the bottom
5378 // check 2 x vabits2 != VA_BITS2_NOACCESS
5379 return ((0x3 & vabits8
) != VA_BITS2_NOACCESS
)
5380 && ((0xc & vabits8
) != VA_BITS2_NOACCESS
<< 2);
5384 void mc_STOREV16 ( Addr a
, UWord vbits16
, Bool isBigEndian
)
5386 PROF_EVENT(MCPE_STOREV16
);
5388 #ifndef PERF_FAST_STOREV
5389 mc_STOREVn_slow( a
, 16, (ULong
)vbits16
, isBigEndian
);
5392 UWord sm_off
, vabits8
;
5395 if (UNLIKELY( UNALIGNED_OR_HIGH(a
,16) )) {
5396 PROF_EVENT(MCPE_STOREV16_SLOW1
);
5397 mc_STOREVn_slow( a
, 16, (ULong
)vbits16
, isBigEndian
);
5401 sm
= get_secmap_for_reading_low(a
);
5403 vabits8
= sm
->vabits8
[sm_off
];
5405 // To understand the below cleverness, see the extensive comments
5406 // in MC_(helperc_STOREV8).
5407 if (LIKELY(V_BITS16_DEFINED
== vbits16
)) {
5408 if (LIKELY(vabits8
== VA_BITS8_DEFINED
)) {
5411 if (!is_distinguished_sm(sm
)
5412 && accessible_vabits4_in_vabits8(a
, vabits8
)) {
5413 insert_vabits4_into_vabits8( a
, VA_BITS4_DEFINED
,
5414 &(sm
->vabits8
[sm_off
]) );
5417 PROF_EVENT(MCPE_STOREV16_SLOW2
);
5418 mc_STOREVn_slow( a
, 16, (ULong
)vbits16
, isBigEndian
);
5420 if (V_BITS16_UNDEFINED
== vbits16
) {
5421 if (vabits8
== VA_BITS8_UNDEFINED
) {
5424 if (!is_distinguished_sm(sm
)
5425 && accessible_vabits4_in_vabits8(a
, vabits8
)) {
5426 insert_vabits4_into_vabits8( a
, VA_BITS4_UNDEFINED
,
5427 &(sm
->vabits8
[sm_off
]) );
5430 PROF_EVENT(MCPE_STOREV16_SLOW3
);
5431 mc_STOREVn_slow( a
, 16, (ULong
)vbits16
, isBigEndian
);
5435 PROF_EVENT(MCPE_STOREV16_SLOW4
);
5436 mc_STOREVn_slow( a
, 16, (ULong
)vbits16
, isBigEndian
);
5442 VG_REGPARM(2) void MC_(helperc_STOREV16be
) ( Addr a
, UWord vbits16
)
5444 mc_STOREV16(a
, vbits16
, True
);
5446 VG_REGPARM(2) void MC_(helperc_STOREV16le
) ( Addr a
, UWord vbits16
)
5448 mc_STOREV16(a
, vbits16
, False
);
5451 /*------------------------------------------------------------*/
5453 /*------------------------------------------------------------*/
5455 /* Note: endianness is irrelevant for size == 1 */
5457 // Non-generic assembly for arm32-linux
5458 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5459 && defined(VGP_arm_linux)
5460 __asm__( /* Derived from NCode template */
5463 ".global vgMemCheck_helperc_LOADV8 \n"
5464 ".type vgMemCheck_helperc_LOADV8, %function \n"
5465 "vgMemCheck_helperc_LOADV8: \n" //
5466 " lsr r2, r0, #16 \n" // r2 = pri-map-ix
5467 " movw r3, #:lower16:primary_map \n" //
5468 " uxth r1, r0 \n" // r1 = sec-map-offB
5469 " movt r3, #:upper16:primary_map \n" //
5470 " ldr r2, [r3, r2, lsl #2] \n" // r2 = sec-map
5471 " ldrb r1, [r2, r1, lsr #2] \n" // r1 = sec-map-VABITS8
5472 " cmp r1, #0xAA \n" // r1 == VA_BITS8_DEFINED?
5473 " bne .LLV8c0 \n" // no, goto .LLV8c0
5475 " mov r0, #0xFFFFFF00 \n" // V_BITS8_DEFINED | top24safe
5478 " cmp r1, #0x55 \n" // VA_BITS8_UNDEFINED
5479 " bne .LLV8c4 \n" //
5481 " mov r0, #0xFFFFFFFF \n" // V_BITS8_UNDEFINED | top24safe
5484 // r1 holds sec-map-VABITS8
5485 // r0 holds the address. Extract the relevant 2 bits and inspect.
5486 " and r2, r0, #3 \n" // addr & 3
5487 " add r2, r2, r2 \n" // 2 * (addr & 3)
5488 " lsr r1, r1, r2 \n" // sec-map-VABITS8 >> (2 * (addr & 3))
5489 " and r1, r1, #3 \n" // (sec-map-VABITS8 >> (2 * (addr & 3))) & 3
5491 " cmp r1, #2 \n" // VA_BITS2_DEFINED
5492 " beq .LLV8h9 \n" //
5494 " cmp r1, #1 \n" // VA_BITS2_UNDEFINED
5495 " beq .LLV8c2 \n" //
5497 " push {r4, lr} \n" //
5500 " bl mc_LOADVn_slow \n" //
5501 " pop {r4, pc} \n" //
5502 ".size vgMemCheck_helperc_LOADV8, .-vgMemCheck_helperc_LOADV8 \n"
5506 /* Non-generic assembly for x86-linux */
5507 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5508 && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
5512 ".global vgMemCheck_helperc_LOADV8\n"
5513 ".type vgMemCheck_helperc_LOADV8, @function\n"
5514 "vgMemCheck_helperc_LOADV8:\n"
5516 " shr $0x10, %edx\n"
5517 " mov primary_map(,%edx,4), %ecx\n"
5518 " movzwl %ax, %edx\n"
5520 " movzbl (%ecx,%edx,1), %edx\n"/* edx = VA bits for 32bit */
5521 " cmp $0xaa, %edx\n" /* compare to VA_BITS8_DEFINED? */
5522 " jne .LLV8LE2\n" /* jump if not defined */
5524 " mov $0xffffff00, %eax\n" /* V_BITS8_DEFINED | top24safe */
5527 " cmp $0x55, %edx\n" /* compare to VA_BITS8_UNDEFINED */
5528 " jne .LLV8LE4\n" /* jump if not all 32bits are undefined */
5530 " or $0xffffffff, %eax\n" /* V_BITS8_UNDEFINED | top24safe */
5539 " je .LLV8LE1\n" /* jump if all 8bits are defined */
5541 " je .LLV8LE3\n" /* jump if all 8bits are undefined */
5542 " xor %ecx, %ecx\n" /* tail call to mc_LOADVn_slow(a, 8, 0) */
5544 " jmp mc_LOADVn_slow\n"
5545 ".size vgMemCheck_helperc_LOADV8, .-vgMemCheck_helperc_LOADV8\n"
5550 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
5552 UWord
MC_(helperc_LOADV8
) ( Addr a
)
5554 PROF_EVENT(MCPE_LOADV8
);
5556 #ifndef PERF_FAST_LOADV
5557 return (UWord
)mc_LOADVn_slow( a
, 8, False
/*irrelevant*/ );
5560 UWord sm_off
, vabits8
;
5563 if (UNLIKELY( UNALIGNED_OR_HIGH(a
,8) )) {
5564 PROF_EVENT(MCPE_LOADV8_SLOW1
);
5565 return (UWord
)mc_LOADVn_slow( a
, 8, False
/*irrelevant*/ );
5568 sm
= get_secmap_for_reading_low(a
);
5570 vabits8
= sm
->vabits8
[sm_off
];
5571 // Convert V bits from compact memory form to expanded register form
5572 // Handle common case quickly: a is mapped, and the entire
5573 // word32 it lives in is addressible.
5574 if (LIKELY(vabits8
== VA_BITS8_DEFINED
)) { return V_BITS8_DEFINED
; }
5575 else if (LIKELY(vabits8
== VA_BITS8_UNDEFINED
)) { return V_BITS8_UNDEFINED
; }
5577 // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
5579 UChar vabits2
= extract_vabits2_from_vabits8(a
, vabits8
);
5580 if (vabits2
== VA_BITS2_DEFINED
) { return V_BITS8_DEFINED
; }
5581 else if (vabits2
== VA_BITS2_UNDEFINED
) { return V_BITS8_UNDEFINED
; }
5583 /* Slow case: the byte is not all-defined or all-undefined. */
5584 PROF_EVENT(MCPE_LOADV8_SLOW2
);
5585 return (UWord
)mc_LOADVn_slow( a
, 8, False
/*irrelevant*/ );
5593 /*------------------------------------------------------------*/
5595 /*------------------------------------------------------------*/
5598 void MC_(helperc_STOREV8
) ( Addr a
, UWord vbits8
)
5600 PROF_EVENT(MCPE_STOREV8
);
5602 #ifndef PERF_FAST_STOREV
5603 mc_STOREVn_slow( a
, 8, (ULong
)vbits8
, False
/*irrelevant*/ );
5606 UWord sm_off
, vabits8
;
5609 if (UNLIKELY( UNALIGNED_OR_HIGH(a
,8) )) {
5610 PROF_EVENT(MCPE_STOREV8_SLOW1
);
5611 mc_STOREVn_slow( a
, 8, (ULong
)vbits8
, False
/*irrelevant*/ );
5615 sm
= get_secmap_for_reading_low(a
);
5617 vabits8
= sm
->vabits8
[sm_off
];
5619 // Clevernesses to speed up storing V bits.
5620 // The 64/32/16 bit cases also have similar clevernesses, but it
5621 // works a little differently to the code below.
5623 // Cleverness 1: sometimes we don't have to write the shadow memory at
5624 // all, if we can tell that what we want to write is the same as what is
5625 // already there. These cases are marked below as "defined on defined" and
5626 // "undefined on undefined".
5629 // We also avoid to call mc_STOREVn_slow if the V bits can directly
5630 // be written in the secondary map. V bits can be directly written
5631 // if 4 conditions are respected:
5632 // * The address for which V bits are written is naturally aligned
5633 // on 1 byte for STOREV8 (this is always true)
5634 // on 2 bytes for STOREV16
5635 // on 4 bytes for STOREV32
5636 // on 8 bytes for STOREV64.
5637 // * V bits being written are either fully defined or fully undefined.
5638 // (for partially defined V bits, V bits cannot be directly written,
5639 // as the secondary vbits table must be maintained).
5640 // * the secmap is not distinguished (distinguished maps cannot be
5642 // * the memory corresponding to the V bits being written is
5643 // accessible (if one or more bytes are not accessible,
5644 // we must call mc_STOREVn_slow in order to report accessibility
5646 // Note that for STOREV32 and STOREV64, it is too expensive
5647 // to verify the accessibility of each byte for the benefit it
5648 // brings. Instead, a quicker check is done by comparing to
5649 // VA_BITS(8|16)_(UN)DEFINED. This guarantees accessibility,
5650 // but misses some opportunity of direct modifications.
5651 // Checking each byte accessibility was measured for
5652 // STOREV32+perf tests and was slowing down all perf tests.
5653 // The cases corresponding to cleverness 2 are marked below as
5655 if (LIKELY(V_BITS8_DEFINED
== vbits8
)) {
5656 if (LIKELY(vabits8
== VA_BITS8_DEFINED
)) {
5657 return; // defined on defined
5659 if (!is_distinguished_sm(sm
)
5660 && VA_BITS2_NOACCESS
!= extract_vabits2_from_vabits8(a
, vabits8
)) {
5662 insert_vabits2_into_vabits8( a
, VA_BITS2_DEFINED
,
5663 &(sm
->vabits8
[sm_off
]) );
5666 PROF_EVENT(MCPE_STOREV8_SLOW2
);
5667 mc_STOREVn_slow( a
, 8, (ULong
)vbits8
, False
/*irrelevant*/ );
5670 if (V_BITS8_UNDEFINED
== vbits8
) {
5671 if (vabits8
== VA_BITS8_UNDEFINED
) {
5672 return; // undefined on undefined
5674 if (!is_distinguished_sm(sm
)
5675 && (VA_BITS2_NOACCESS
5676 != extract_vabits2_from_vabits8(a
, vabits8
))) {
5678 insert_vabits2_into_vabits8( a
, VA_BITS2_UNDEFINED
,
5679 &(sm
->vabits8
[sm_off
]) );
5682 PROF_EVENT(MCPE_STOREV8_SLOW3
);
5683 mc_STOREVn_slow( a
, 8, (ULong
)vbits8
, False
/*irrelevant*/ );
5687 // Partially defined word
5688 PROF_EVENT(MCPE_STOREV8_SLOW4
);
5689 mc_STOREVn_slow( a
, 8, (ULong
)vbits8
, False
/*irrelevant*/ );
5695 /*------------------------------------------------------------*/
5696 /*--- Functions called directly from generated code: ---*/
5697 /*--- Value-check failure handlers. ---*/
5698 /*------------------------------------------------------------*/
5700 /* Call these ones when an origin is available ... */
5702 void MC_(helperc_value_check0_fail_w_o
) ( UWord origin
) {
5703 MC_(record_cond_error
) ( VG_(get_running_tid
)(), (UInt
)origin
);
5707 void MC_(helperc_value_check1_fail_w_o
) ( UWord origin
) {
5708 MC_(record_value_error
) ( VG_(get_running_tid
)(), 1, (UInt
)origin
);
5712 void MC_(helperc_value_check4_fail_w_o
) ( UWord origin
) {
5713 MC_(record_value_error
) ( VG_(get_running_tid
)(), 4, (UInt
)origin
);
5717 void MC_(helperc_value_check8_fail_w_o
) ( UWord origin
) {
5718 MC_(record_value_error
) ( VG_(get_running_tid
)(), 8, (UInt
)origin
);
5722 void MC_(helperc_value_checkN_fail_w_o
) ( HWord sz
, UWord origin
) {
5723 MC_(record_value_error
) ( VG_(get_running_tid
)(), (Int
)sz
, (UInt
)origin
);
5726 /* ... and these when an origin isn't available. */
5729 void MC_(helperc_value_check0_fail_no_o
) ( void ) {
5730 MC_(record_cond_error
) ( VG_(get_running_tid
)(), 0/*origin*/ );
5734 void MC_(helperc_value_check1_fail_no_o
) ( void ) {
5735 MC_(record_value_error
) ( VG_(get_running_tid
)(), 1, 0/*origin*/ );
5739 void MC_(helperc_value_check4_fail_no_o
) ( void ) {
5740 MC_(record_value_error
) ( VG_(get_running_tid
)(), 4, 0/*origin*/ );
5744 void MC_(helperc_value_check8_fail_no_o
) ( void ) {
5745 MC_(record_value_error
) ( VG_(get_running_tid
)(), 8, 0/*origin*/ );
5749 void MC_(helperc_value_checkN_fail_no_o
) ( HWord sz
) {
5750 MC_(record_value_error
) ( VG_(get_running_tid
)(), (Int
)sz
, 0/*origin*/ );
5754 /*------------------------------------------------------------*/
5755 /*--- Metadata get/set functions, for client requests. ---*/
5756 /*------------------------------------------------------------*/
5758 // Nb: this expands the V+A bits out into register-form V bits, even though
5759 // they're in memory. This is for backward compatibility, and because it's
5760 // probably what the user wants.
5762 /* Copy Vbits from/to address 'a'. Returns: 1 == OK, 2 == alignment
5763 error [no longer used], 3 == addressing error. */
5764 /* Nb: We used to issue various definedness/addressability errors from here,
5765 but we took them out because they ranged from not-very-helpful to
5766 downright annoying, and they complicated the error data structures. */
5767 static Int
mc_get_or_set_vbits_for_client (
5771 Bool setting
, /* True <=> set vbits, False <=> get vbits */
5772 Bool is_client_request
/* True <=> real user request
5773 False <=> internal call from gdbserver */
5780 /* Check that arrays are addressible before doing any getting/setting.
5781 vbits to be checked only for real user request. */
5782 for (i
= 0; i
< szB
; i
++) {
5783 if (VA_BITS2_NOACCESS
== get_vabits2(a
+ i
) ||
5784 (is_client_request
&& VA_BITS2_NOACCESS
== get_vabits2(vbits
+ i
))) {
5792 for (i
= 0; i
< szB
; i
++) {
5793 ok
= set_vbits8(a
+ i
, ((UChar
*)vbits
)[i
]);
5798 for (i
= 0; i
< szB
; i
++) {
5799 ok
= get_vbits8(a
+ i
, &vbits8
);
5801 ((UChar
*)vbits
)[i
] = vbits8
;
5803 if (is_client_request
)
5804 // The bytes in vbits[] have now been set, so mark them as such.
5805 MC_(make_mem_defined
)(vbits
, szB
);
5812 /*------------------------------------------------------------*/
5813 /*--- Detecting leaked (unreachable) malloc'd blocks. ---*/
5814 /*------------------------------------------------------------*/
5816 /* For the memory leak detector, say whether an entire 64k chunk of
5817 address space is possibly in use, or not. If in doubt return
5820 Bool
MC_(is_within_valid_secondary
) ( Addr a
)
5822 SecMap
* sm
= maybe_get_secmap_for ( a
);
5823 if (sm
== NULL
|| sm
== &sm_distinguished
[SM_DIST_NOACCESS
]) {
5824 /* Definitely not in use. */
5832 /* For the memory leak detector, say whether or not a given word
5833 address is to be regarded as valid. */
5834 Bool
MC_(is_valid_aligned_word
) ( Addr a
)
5836 tl_assert(sizeof(UWord
) == 4 || sizeof(UWord
) == 8);
5837 tl_assert(VG_IS_WORD_ALIGNED(a
));
5838 if (get_vabits8_for_aligned_word32 (a
) != VA_BITS8_DEFINED
)
5840 if (sizeof(UWord
) == 8) {
5841 if (get_vabits8_for_aligned_word32 (a
+ 4) != VA_BITS8_DEFINED
)
5844 if (UNLIKELY(MC_(in_ignored_range
)(a
)))
5851 /*------------------------------------------------------------*/
5852 /*--- Initialisation ---*/
5853 /*------------------------------------------------------------*/
5855 static void init_shadow_memory ( void )
5860 tl_assert(V_BIT_UNDEFINED
== 1);
5861 tl_assert(V_BIT_DEFINED
== 0);
5862 tl_assert(V_BITS8_UNDEFINED
== 0xFF);
5863 tl_assert(V_BITS8_DEFINED
== 0);
5865 /* Build the 3 distinguished secondaries */
5866 sm
= &sm_distinguished
[SM_DIST_NOACCESS
];
5867 for (i
= 0; i
< SM_CHUNKS
; i
++) sm
->vabits8
[i
] = VA_BITS8_NOACCESS
;
5869 sm
= &sm_distinguished
[SM_DIST_UNDEFINED
];
5870 for (i
= 0; i
< SM_CHUNKS
; i
++) sm
->vabits8
[i
] = VA_BITS8_UNDEFINED
;
5872 sm
= &sm_distinguished
[SM_DIST_DEFINED
];
5873 for (i
= 0; i
< SM_CHUNKS
; i
++) sm
->vabits8
[i
] = VA_BITS8_DEFINED
;
5875 /* Set up the primary map. */
5876 /* These entries gradually get overwritten as the used address
5878 for (i
= 0; i
< N_PRIMARY_MAP
; i
++)
5879 primary_map
[i
] = &sm_distinguished
[SM_DIST_NOACCESS
];
5881 /* Auxiliary primary maps */
5882 init_auxmap_L1_L2();
5884 /* auxmap_size = auxmap_used = 0;
5885 no ... these are statically initialised */
5887 /* Secondary V bit table */
5888 secVBitTable
= createSecVBitTable();
5892 /*------------------------------------------------------------*/
5893 /*--- Sanity check machinery (permanently engaged) ---*/
5894 /*------------------------------------------------------------*/
5896 static Bool
mc_cheap_sanity_check ( void )
5899 PROF_EVENT(MCPE_CHEAP_SANITY_CHECK
);
5900 /* Check for sane operating level */
5901 if (MC_(clo_mc_level
) < 1 || MC_(clo_mc_level
) > 3)
5903 /* nothing else useful we can rapidly check */
5907 static Bool
mc_expensive_sanity_check ( void )
5910 Word n_secmaps_found
;
5912 const HChar
* errmsg
;
5915 if (0) VG_(printf
)("expensive sanity check\n");
5918 n_sanity_expensive
++;
5919 PROF_EVENT(MCPE_EXPENSIVE_SANITY_CHECK
);
5921 /* Check for sane operating level */
5922 if (MC_(clo_mc_level
) < 1 || MC_(clo_mc_level
) > 3)
5925 /* Check that the 3 distinguished SMs are still as they should be. */
5927 /* Check noaccess DSM. */
5928 sm
= &sm_distinguished
[SM_DIST_NOACCESS
];
5929 for (i
= 0; i
< SM_CHUNKS
; i
++)
5930 if (sm
->vabits8
[i
] != VA_BITS8_NOACCESS
)
5933 /* Check undefined DSM. */
5934 sm
= &sm_distinguished
[SM_DIST_UNDEFINED
];
5935 for (i
= 0; i
< SM_CHUNKS
; i
++)
5936 if (sm
->vabits8
[i
] != VA_BITS8_UNDEFINED
)
5939 /* Check defined DSM. */
5940 sm
= &sm_distinguished
[SM_DIST_DEFINED
];
5941 for (i
= 0; i
< SM_CHUNKS
; i
++)
5942 if (sm
->vabits8
[i
] != VA_BITS8_DEFINED
)
5946 VG_(printf
)("memcheck expensive sanity: "
5947 "distinguished_secondaries have changed\n");
5951 /* If we're not checking for undefined value errors, the secondary V bit
5952 * table should be empty. */
5953 if (MC_(clo_mc_level
) == 1) {
5954 if (0 != VG_(OSetGen_Size
)(secVBitTable
))
5958 /* check the auxiliary maps, very thoroughly */
5959 n_secmaps_found
= 0;
5960 errmsg
= check_auxmap_L1_L2_sanity( &n_secmaps_found
);
5962 VG_(printf
)("memcheck expensive sanity, auxmaps:\n\t%s", errmsg
);
5966 /* n_secmaps_found is now the number referred to by the auxiliary
5967 primary map. Now add on the ones referred to by the main
5969 for (i
= 0; i
< N_PRIMARY_MAP
; i
++) {
5970 if (primary_map
[i
] == NULL
) {
5973 if (!is_distinguished_sm(primary_map
[i
]))
5978 /* check that the number of secmaps issued matches the number that
5979 are reachable (iow, no secmap leaks) */
5980 if (n_secmaps_found
!= (n_issued_SMs
- n_deissued_SMs
))
5984 VG_(printf
)("memcheck expensive sanity: "
5985 "apparent secmap leakage\n");
5990 VG_(printf
)("memcheck expensive sanity: "
5991 "auxmap covers wrong address space\n");
5995 /* there is only one pointer to each secmap (expensive) */
6000 /*------------------------------------------------------------*/
6001 /*--- Command line args ---*/
6002 /*------------------------------------------------------------*/
6004 /* 31 Aug 2015: Vectorised code is now so widespread that
6005 --partial-loads-ok needs to be enabled by default on all platforms.
6006 Not doing so causes lots of false errors. */
6007 Bool
MC_(clo_partial_loads_ok
) = True
;
6008 Long
MC_(clo_freelist_vol
) = 20*1000*1000LL;
6009 Long
MC_(clo_freelist_big_blocks
) = 1*1000*1000LL;
6010 LeakCheckMode
MC_(clo_leak_check
) = LC_Summary
;
6011 VgRes
MC_(clo_leak_resolution
) = Vg_HighRes
;
6012 UInt
MC_(clo_show_leak_kinds
) = R2S(Possible
) | R2S(Unreached
);
6013 UInt
MC_(clo_error_for_leak_kinds
) = R2S(Possible
) | R2S(Unreached
);
6014 UInt
MC_(clo_leak_check_heuristics
) = H2S(LchStdString
)
6017 | H2S( LchMultipleInheritance
);
6018 Bool
MC_(clo_xtree_leak
) = False
;
6019 const HChar
* MC_(clo_xtree_leak_file
) = "xtleak.kcg.%p";
6020 Bool
MC_(clo_workaround_gcc296_bugs
) = False
;
6021 Int
MC_(clo_malloc_fill
) = -1;
6022 Int
MC_(clo_free_fill
) = -1;
6023 KeepStacktraces
MC_(clo_keep_stacktraces
) = KS_alloc_and_free
;
6024 Int
MC_(clo_mc_level
) = 2;
6025 Bool
MC_(clo_show_mismatched_frees
) = True
;
6027 ExpensiveDefinednessChecks
6028 MC_(clo_expensive_definedness_checks
) = EdcAUTO
;
6030 Bool
MC_(clo_ignore_range_below_sp
) = False
;
6031 UInt
MC_(clo_ignore_range_below_sp__first_offset
) = 0;
6032 UInt
MC_(clo_ignore_range_below_sp__last_offset
) = 0;
6034 static const HChar
* MC_(parse_leak_heuristics_tokens
) =
6035 "-,stdstring,length64,newarray,multipleinheritance";
6036 /* The first heuristic value (LchNone) has no keyword, as this is
6037 a fake heuristic used to collect the blocks found without any
6040 static Bool
mc_process_cmd_line_options(const HChar
* arg
)
6042 const HChar
* tmp_str
;
6045 tl_assert( MC_(clo_mc_level
) >= 1 && MC_(clo_mc_level
) <= 3 );
6047 /* Set MC_(clo_mc_level):
6048 1 = A bit tracking only
6049 2 = A and V bit tracking, but no V bit origins
6050 3 = A and V bit tracking, and V bit origins
6052 Do this by inspecting --undef-value-errors= and
6053 --track-origins=. Reject the case --undef-value-errors=no
6054 --track-origins=yes as meaningless.
6056 if (0 == VG_(strcmp
)(arg
, "--undef-value-errors=no")) {
6057 if (MC_(clo_mc_level
) == 3) {
6060 MC_(clo_mc_level
) = 1;
6064 if (0 == VG_(strcmp
)(arg
, "--undef-value-errors=yes")) {
6065 if (MC_(clo_mc_level
) == 1)
6066 MC_(clo_mc_level
) = 2;
6069 if (0 == VG_(strcmp
)(arg
, "--track-origins=no")) {
6070 if (MC_(clo_mc_level
) == 3)
6071 MC_(clo_mc_level
) = 2;
6074 if (0 == VG_(strcmp
)(arg
, "--track-origins=yes")) {
6075 if (MC_(clo_mc_level
) == 1) {
6078 MC_(clo_mc_level
) = 3;
6083 if VG_BOOL_CLO(arg
, "--partial-loads-ok", MC_(clo_partial_loads_ok
)) {}
6084 else if VG_USET_CLO(arg
, "--errors-for-leak-kinds",
6085 MC_(parse_leak_kinds_tokens
),
6086 MC_(clo_error_for_leak_kinds
)) {}
6087 else if VG_USET_CLO(arg
, "--show-leak-kinds",
6088 MC_(parse_leak_kinds_tokens
),
6089 MC_(clo_show_leak_kinds
)) {}
6090 else if VG_USET_CLO(arg
, "--leak-check-heuristics",
6091 MC_(parse_leak_heuristics_tokens
),
6092 MC_(clo_leak_check_heuristics
)) {}
6093 else if (VG_BOOL_CLO(arg
, "--show-reachable", tmp_show
)) {
6095 MC_(clo_show_leak_kinds
) = MC_(all_Reachedness
)();
6097 MC_(clo_show_leak_kinds
) &= ~R2S(Reachable
);
6100 else if VG_BOOL_CLO(arg
, "--show-possibly-lost", tmp_show
) {
6102 MC_(clo_show_leak_kinds
) |= R2S(Possible
);
6104 MC_(clo_show_leak_kinds
) &= ~R2S(Possible
);
6107 else if VG_BOOL_CLO(arg
, "--workaround-gcc296-bugs",
6108 MC_(clo_workaround_gcc296_bugs
)) {}
6110 else if VG_BINT_CLO(arg
, "--freelist-vol", MC_(clo_freelist_vol
),
6111 0, 10*1000*1000*1000LL) {}
6113 else if VG_BINT_CLO(arg
, "--freelist-big-blocks",
6114 MC_(clo_freelist_big_blocks
),
6115 0, 10*1000*1000*1000LL) {}
6117 else if VG_XACT_CLO(arg
, "--leak-check=no",
6118 MC_(clo_leak_check
), LC_Off
) {}
6119 else if VG_XACT_CLO(arg
, "--leak-check=summary",
6120 MC_(clo_leak_check
), LC_Summary
) {}
6121 else if VG_XACT_CLO(arg
, "--leak-check=yes",
6122 MC_(clo_leak_check
), LC_Full
) {}
6123 else if VG_XACT_CLO(arg
, "--leak-check=full",
6124 MC_(clo_leak_check
), LC_Full
) {}
6126 else if VG_XACT_CLO(arg
, "--leak-resolution=low",
6127 MC_(clo_leak_resolution
), Vg_LowRes
) {}
6128 else if VG_XACT_CLO(arg
, "--leak-resolution=med",
6129 MC_(clo_leak_resolution
), Vg_MedRes
) {}
6130 else if VG_XACT_CLO(arg
, "--leak-resolution=high",
6131 MC_(clo_leak_resolution
), Vg_HighRes
) {}
6133 else if VG_STR_CLO(arg
, "--ignore-ranges", tmp_str
) {
6134 Bool ok
= parse_ignore_ranges(tmp_str
);
6136 VG_(message
)(Vg_DebugMsg
,
6137 "ERROR: --ignore-ranges: "
6138 "invalid syntax, or end <= start in range\n");
6141 if (gIgnoredAddressRanges
) {
6143 for (i
= 0; i
< VG_(sizeRangeMap
)(gIgnoredAddressRanges
); i
++) {
6144 UWord val
= IAR_INVALID
;
6145 UWord key_min
= ~(UWord
)0;
6146 UWord key_max
= (UWord
)0;
6147 VG_(indexRangeMap
)( &key_min
, &key_max
, &val
,
6148 gIgnoredAddressRanges
, i
);
6149 tl_assert(key_min
<= key_max
);
6150 UWord limit
= 0x4000000; /* 64M - entirely arbitrary limit */
6151 if (key_max
- key_min
> limit
&& val
== IAR_CommandLine
) {
6152 VG_(message
)(Vg_DebugMsg
,
6153 "ERROR: --ignore-ranges: suspiciously large range:\n");
6154 VG_(message
)(Vg_DebugMsg
,
6155 " 0x%lx-0x%lx (size %lu)\n", key_min
, key_max
,
6156 key_max
- key_min
+ 1);
6163 else if VG_STR_CLO(arg
, "--ignore-range-below-sp", tmp_str
) {
6164 /* This seems at first a bit weird, but: in order to imply
6165 a non-wrapped-around address range, the first offset needs to be
6166 larger than the second one. For example
6167 --ignore-range-below-sp=8192,8189
6168 would cause accesses to in the range [SP-8192, SP-8189] to be
6170 UInt offs1
= 0, offs2
= 0;
6171 Bool ok
= parse_UInt_pair(&tmp_str
, &offs1
, &offs2
);
6172 // Ensure we used all the text after the '=' sign.
6173 if (ok
&& *tmp_str
!= 0) ok
= False
;
6175 VG_(message
)(Vg_DebugMsg
,
6176 "ERROR: --ignore-range-below-sp: invalid syntax. "
6177 " Expected \"...=decimalnumber-decimalnumber\".\n");
6180 if (offs1
> 1000*1000 /*arbitrary*/ || offs2
> 1000*1000 /*ditto*/) {
6181 VG_(message
)(Vg_DebugMsg
,
6182 "ERROR: --ignore-range-below-sp: suspiciously large "
6183 "offset(s): %u and %u\n", offs1
, offs2
);
6186 if (offs1
<= offs2
) {
6187 VG_(message
)(Vg_DebugMsg
,
6188 "ERROR: --ignore-range-below-sp: invalid offsets "
6189 "(the first must be larger): %u and %u\n", offs1
, offs2
);
6192 tl_assert(offs1
> offs2
);
6193 if (offs1
- offs2
> 4096 /*arbitrary*/) {
6194 VG_(message
)(Vg_DebugMsg
,
6195 "ERROR: --ignore-range-below-sp: suspiciously large "
6196 "range: %u-%u (size %u)\n", offs1
, offs2
, offs1
- offs2
);
6199 MC_(clo_ignore_range_below_sp
) = True
;
6200 MC_(clo_ignore_range_below_sp__first_offset
) = offs1
;
6201 MC_(clo_ignore_range_below_sp__last_offset
) = offs2
;
6205 else if VG_BHEX_CLO(arg
, "--malloc-fill", MC_(clo_malloc_fill
), 0x00,0xFF) {}
6206 else if VG_BHEX_CLO(arg
, "--free-fill", MC_(clo_free_fill
), 0x00,0xFF) {}
6208 else if VG_XACT_CLO(arg
, "--keep-stacktraces=alloc",
6209 MC_(clo_keep_stacktraces
), KS_alloc
) {}
6210 else if VG_XACT_CLO(arg
, "--keep-stacktraces=free",
6211 MC_(clo_keep_stacktraces
), KS_free
) {}
6212 else if VG_XACT_CLO(arg
, "--keep-stacktraces=alloc-and-free",
6213 MC_(clo_keep_stacktraces
), KS_alloc_and_free
) {}
6214 else if VG_XACT_CLO(arg
, "--keep-stacktraces=alloc-then-free",
6215 MC_(clo_keep_stacktraces
), KS_alloc_then_free
) {}
6216 else if VG_XACT_CLO(arg
, "--keep-stacktraces=none",
6217 MC_(clo_keep_stacktraces
), KS_none
) {}
6219 else if VG_BOOL_CLO(arg
, "--show-mismatched-frees",
6220 MC_(clo_show_mismatched_frees
)) {}
6222 else if VG_XACT_CLO(arg
, "--expensive-definedness-checks=no",
6223 MC_(clo_expensive_definedness_checks
), EdcNO
) {}
6224 else if VG_XACT_CLO(arg
, "--expensive-definedness-checks=auto",
6225 MC_(clo_expensive_definedness_checks
), EdcAUTO
) {}
6226 else if VG_XACT_CLO(arg
, "--expensive-definedness-checks=yes",
6227 MC_(clo_expensive_definedness_checks
), EdcYES
) {}
6229 else if VG_BOOL_CLO(arg
, "--xtree-leak",
6230 MC_(clo_xtree_leak
)) {}
6231 else if VG_STR_CLO (arg
, "--xtree-leak-file",
6232 MC_(clo_xtree_leak_file
)) {}
6235 return VG_(replacement_malloc_process_cmd_line_option
)(arg
);
6241 VG_(fmsg_bad_option
)(arg
,
6242 "--track-origins=yes has no effect when --undef-value-errors=no.\n");
6245 static void mc_print_usage(void)
6248 " --leak-check=no|summary|full search for memory leaks at exit? [summary]\n"
6249 " --leak-resolution=low|med|high differentiation of leak stack traces [high]\n"
6250 " --show-leak-kinds=kind1,kind2,.. which leak kinds to show?\n"
6251 " [definite,possible]\n"
6252 " --errors-for-leak-kinds=kind1,kind2,.. which leak kinds are errors?\n"
6253 " [definite,possible]\n"
6254 " where kind is one of:\n"
6255 " definite indirect possible reachable all none\n"
6256 " --leak-check-heuristics=heur1,heur2,... which heuristics to use for\n"
6257 " improving leak search false positive [all]\n"
6258 " where heur is one of:\n"
6259 " stdstring length64 newarray multipleinheritance all none\n"
6260 " --show-reachable=yes same as --show-leak-kinds=all\n"
6261 " --show-reachable=no --show-possibly-lost=yes\n"
6262 " same as --show-leak-kinds=definite,possible\n"
6263 " --show-reachable=no --show-possibly-lost=no\n"
6264 " same as --show-leak-kinds=definite\n"
6265 " --xtree-leak=no|yes output leak result in xtree format? [no]\n"
6266 " --xtree-leak-file=<file> xtree leak report file [xtleak.kcg.%%p]\n"
6267 " --undef-value-errors=no|yes check for undefined value errors [yes]\n"
6268 " --track-origins=no|yes show origins of undefined values? [no]\n"
6269 " --partial-loads-ok=no|yes too hard to explain here; see manual [yes]\n"
6270 " --expensive-definedness-checks=no|auto|yes\n"
6271 " Use extra-precise definedness tracking [auto]\n"
6272 " --freelist-vol=<number> volume of freed blocks queue [20000000]\n"
6273 " --freelist-big-blocks=<number> releases first blocks with size>= [1000000]\n"
6274 " --workaround-gcc296-bugs=no|yes self explanatory [no]. Deprecated.\n"
6275 " Use --ignore-range-below-sp instead.\n"
6276 " --ignore-ranges=0xPP-0xQQ[,0xRR-0xSS] assume given addresses are OK\n"
6277 " --ignore-range-below-sp=<number>-<number> do not report errors for\n"
6278 " accesses at the given offsets below SP\n"
6279 " --malloc-fill=<hexnumber> fill malloc'd areas with given value\n"
6280 " --free-fill=<hexnumber> fill free'd areas with given value\n"
6281 " --keep-stacktraces=alloc|free|alloc-and-free|alloc-then-free|none\n"
6282 " stack trace(s) to keep for malloc'd/free'd areas [alloc-and-free]\n"
6283 " --show-mismatched-frees=no|yes show frees that don't match the allocator? [yes]\n"
6287 static void mc_print_debug_usage(void)
6295 /*------------------------------------------------------------*/
6296 /*--- Client blocks ---*/
6297 /*------------------------------------------------------------*/
6299 /* Client block management:
6301 This is managed as an expanding array of client block descriptors.
6302 Indices of live descriptors are issued to the client, so it can ask
6303 to free them later. Therefore we cannot slide live entries down
6304 over dead ones. Instead we must use free/inuse flags and scan for
6305 an empty slot at allocation time. This in turn means allocation is
6306 relatively expensive, so we hope this does not happen too often.
6308 An unused block has start == size == 0
6311 /* type CGenBlock is defined in mc_include.h */
6313 /* This subsystem is self-initialising. */
6314 static UWord cgb_size
= 0;
6315 static UWord cgb_used
= 0;
6316 static CGenBlock
* cgbs
= NULL
;
6318 /* Stats for this subsystem. */
6319 static ULong cgb_used_MAX
= 0; /* Max in use. */
6320 static ULong cgb_allocs
= 0; /* Number of allocs. */
6321 static ULong cgb_discards
= 0; /* Number of discards. */
6322 static ULong cgb_search
= 0; /* Number of searches. */
6325 /* Get access to the client block array. */
6326 void MC_(get_ClientBlock_array
)( /*OUT*/CGenBlock
** blocks
,
6327 /*OUT*/UWord
* nBlocks
)
6330 *nBlocks
= cgb_used
;
6335 Int
alloc_client_block ( void )
6338 CGenBlock
* cgbs_new
;
6342 for (i
= 0; i
< cgb_used
; i
++) {
6344 if (cgbs
[i
].start
== 0 && cgbs
[i
].size
== 0)
6348 /* Not found. Try to allocate one at the end. */
6349 if (cgb_used
< cgb_size
) {
6354 /* Ok, we have to allocate a new one. */
6355 tl_assert(cgb_used
== cgb_size
);
6356 sz_new
= (cgbs
== NULL
) ? 10 : (2 * cgb_size
);
6358 cgbs_new
= VG_(malloc
)( "mc.acb.1", sz_new
* sizeof(CGenBlock
) );
6359 for (i
= 0; i
< cgb_used
; i
++)
6360 cgbs_new
[i
] = cgbs
[i
];
6368 if (cgb_used
> cgb_used_MAX
)
6369 cgb_used_MAX
= cgb_used
;
6374 static void show_client_block_stats ( void )
6376 VG_(message
)(Vg_DebugMsg
,
6377 "general CBs: %llu allocs, %llu discards, %llu maxinuse, %llu search\n",
6378 cgb_allocs
, cgb_discards
, cgb_used_MAX
, cgb_search
6381 static void print_monitor_help ( void )
6386 "memcheck monitor commands:\n"
6387 " xb <addr> [<len>]\n"
6388 " prints validity bits for <len> (or 1) bytes at <addr>\n"
6389 " bit values 0 = valid, 1 = invalid, __ = unaddressable byte\n"
6390 " Then prints the bytes values below the corresponding validity bits\n"
6391 " in a layout similar to the gdb command 'x /<len>xb <addr>'\n"
6392 " Example: xb 0x8049c78 10\n"
6393 " get_vbits <addr> [<len>]\n"
6394 " Similar to xb, but only prints the validity bytes by group of 4.\n"
6395 " make_memory [noaccess|undefined\n"
6396 " |defined|Definedifaddressable] <addr> [<len>]\n"
6397 " mark <len> (or 1) bytes at <addr> with the given accessibility\n"
6398 " check_memory [addressable|defined] <addr> [<len>]\n"
6399 " check that <len> (or 1) bytes at <addr> have the given accessibility\n"
6400 " and outputs a description of <addr>\n"
6401 " leak_check [full*|summary|xtleak]\n"
6402 " [kinds kind1,kind2,...|reachable|possibleleak*|definiteleak]\n"
6403 " [heuristics heur1,heur2,...]\n"
6404 " [increased*|changed|any]\n"
6405 " [unlimited*|limited <max_loss_records_output>]\n"
6407 " xtleak produces an xtree full leak result in xtleak.kcg.%%p.%%n\n"
6408 " where kind is one of:\n"
6409 " definite indirect possible reachable all none\n"
6410 " where heur is one of:\n"
6411 " stdstring length64 newarray multipleinheritance all none*\n"
6412 " Examples: leak_check\n"
6413 " leak_check summary any\n"
6414 " leak_check full kinds indirect,possible\n"
6415 " leak_check full reachable any limited 100\n"
6416 " block_list <loss_record_nr>|<loss_record_nr_from>..<loss_record_nr_to>\n"
6417 " [unlimited*|limited <max_blocks>]\n"
6418 " [heuristics heur1,heur2,...]\n"
6419 " after a leak search, shows the list of blocks of <loss_record_nr>\n"
6420 " (or of the range <loss_record_nr_from>..<loss_record_nr_to>).\n"
6421 " With heuristics, only shows the blocks found via heur1,heur2,...\n"
6423 " who_points_at <addr> [<len>]\n"
6424 " shows places pointing inside <len> (default 1) bytes at <addr>\n"
6425 " (with len 1, only shows \"start pointers\" pointing exactly to <addr>,\n"
6426 " with len > 1, will also show \"interior pointers\")\n"
6427 " xtmemory [<filename>]\n"
6428 " dump xtree memory profile in <filename> (default xtmemory.kcg.%%p.%%n)\n"
6432 /* Print szB bytes at address, with a format similar to the gdb command
6434 res[i] == 1 indicates the corresponding byte is addressable. */
6435 static void gdb_xb (Addr address
, SizeT szB
, Int res
[])
6439 for (i
= 0; i
< szB
; i
++) {
6443 VG_(printf
) ("\n"); // Terminate previous line
6444 VG_(printf
) ("%p:", (void*)(address
+i
));
6447 VG_(printf
) ("\t0x%02x", *(UChar
*)(address
+i
));
6449 VG_(printf
) ("\t0x??");
6451 VG_(printf
) ("\n"); // Terminate previous line
6455 /* Returns the address of the next non space character,
6456 or address of the string terminator. */
6457 static HChar
* next_non_space (HChar
*s
)
6459 while (*s
&& *s
== ' ')
6464 /* Parse an integer slice, i.e. a single integer or a range of integer.
6466 <integer>[..<integer> ]
6467 (spaces are allowed before and/or after ..).
6468 Return True if range correctly parsed, False otherwise. */
6469 static Bool
VG_(parse_slice
) (HChar
* s
, HChar
** saveptr
,
6470 UInt
*from
, UInt
*to
)
6475 wl
= VG_(strtok_r
) (s
, " ", saveptr
);
6477 /* slice must start with an integer. */
6479 VG_(gdb_printf
) ("expecting integer or slice <from>..<to>\n");
6482 *from
= VG_(strtoull10
) (wl
, &endptr
);
6484 VG_(gdb_printf
) ("invalid integer or slice <from>..<to>\n");
6488 if (*endptr
== '\0' && *next_non_space(*saveptr
) != '.') {
6489 /* wl token is an integer terminating the string
6490 or else next token does not start with .
6491 In both cases, the slice is a single integer. */
6496 if (*endptr
== '\0') {
6497 // iii .. => get the next token
6498 wl
= VG_(strtok_r
) (NULL
, " .", saveptr
);
6501 if (*endptr
!= '.' && *(endptr
+1) != '.') {
6502 VG_(gdb_printf
) ("expecting slice <from>..<to>\n");
6505 if ( *(endptr
+2) == ' ') {
6506 // It must be iii.. jjj => get the next token
6507 wl
= VG_(strtok_r
) (NULL
, " .", saveptr
);
6509 // It must be iii..jjj
6514 *to
= VG_(strtoull10
) (wl
, &endptr
);
6515 if (*endptr
!= '\0') {
6516 VG_(gdb_printf
) ("missing/wrong 'to' of slice <from>..<to>\n");
6521 VG_(gdb_printf
) ("<from> cannot be bigger than <to> "
6522 "in slice <from>..<to>\n");
6529 /* return True if request recognised, False otherwise */
6530 static Bool
handle_gdb_monitor_command (ThreadId tid
, HChar
*req
)
6533 HChar s
[VG_(strlen
)(req
) + 1]; /* copy for strtok_r */
6536 VG_(strcpy
) (s
, req
);
6538 wcmd
= VG_(strtok_r
) (s
, " ", &ssaveptr
);
6539 /* NB: if possible, avoid introducing a new command below which
6540 starts with the same first letter(s) as an already existing
6541 command. This ensures a shorter abbreviation for the user. */
6542 switch (VG_(keyword_id
)
6543 ("help get_vbits leak_check make_memory check_memory "
6544 "block_list who_points_at xb xtmemory",
6545 wcmd
, kwd_report_duplicated_matches
)) {
6546 case -2: /* multiple matches */
6548 case -1: /* not found */
6551 print_monitor_help();
6553 case 1: { /* get_vbits */
6556 if (VG_(strtok_get_address_and_size
) (&address
, &szB
, &ssaveptr
)) {
6559 Int unaddressable
= 0;
6560 for (i
= 0; i
< szB
; i
++) {
6561 Int res
= mc_get_or_set_vbits_for_client
6562 (address
+i
, (Addr
) &vbits
, 1,
6563 False
, /* get them */
6564 False
/* is client request */ );
6565 /* we are before the first character on next line, print a \n. */
6566 if ((i
% 32) == 0 && i
!= 0)
6568 /* we are before the next block of 4 starts, print a space. */
6569 else if ((i
% 4) == 0 && i
!= 0)
6572 VG_(printf
) ("%02x", vbits
);
6574 tl_assert(3 == res
);
6580 if (unaddressable
) {
6582 ("Address %p len %lu has %d bytes unaddressable\n",
6583 (void *)address
, szB
, unaddressable
);
6588 case 2: { /* leak_check */
6590 LeakCheckParams lcp
;
6591 HChar
* xt_filename
= NULL
;
6595 lcp
.show_leak_kinds
= R2S(Possible
) | R2S(Unreached
);
6596 lcp
.errors_for_leak_kinds
= 0; // no errors for interactive leak search.
6598 lcp
.deltamode
= LCD_Increased
;
6599 lcp
.max_loss_records_output
= 999999999;
6600 lcp
.requested_by_monitor_command
= True
;
6601 lcp
.xt_filename
= NULL
;
6603 for (kw
= VG_(strtok_r
) (NULL
, " ", &ssaveptr
);
6605 kw
= VG_(strtok_r
) (NULL
, " ", &ssaveptr
)) {
6606 switch (VG_(keyword_id
)
6607 ("full summary xtleak "
6608 "kinds reachable possibleleak definiteleak "
6610 "increased changed any "
6611 "unlimited limited ",
6612 kw
, kwd_report_all
)) {
6613 case -2: err
++; break;
6614 case -1: err
++; break;
6616 lcp
.mode
= LC_Full
; break;
6617 case 1: /* summary */
6618 lcp
.mode
= LC_Summary
; break;
6619 case 2: /* xtleak */
6622 = VG_(expand_file_name
)("--xtleak-mc_main.c",
6623 "xtleak.kcg.%p.%n");
6624 lcp
.xt_filename
= xt_filename
;
6626 case 3: { /* kinds */
6627 wcmd
= VG_(strtok_r
) (NULL
, " ", &ssaveptr
);
6629 || !VG_(parse_enum_set
)(MC_(parse_leak_kinds_tokens
),
6632 &lcp
.show_leak_kinds
)) {
6633 VG_(gdb_printf
) ("missing or malformed leak kinds set\n");
6638 case 4: /* reachable */
6639 lcp
.show_leak_kinds
= MC_(all_Reachedness
)();
6641 case 5: /* possibleleak */
6643 = R2S(Possible
) | R2S(IndirectLeak
) | R2S(Unreached
);
6645 case 6: /* definiteleak */
6646 lcp
.show_leak_kinds
= R2S(Unreached
);
6648 case 7: { /* heuristics */
6649 wcmd
= VG_(strtok_r
) (NULL
, " ", &ssaveptr
);
6651 || !VG_(parse_enum_set
)(MC_(parse_leak_heuristics_tokens
),
6655 VG_(gdb_printf
) ("missing or malformed heuristics set\n");
6660 case 8: /* increased */
6661 lcp
.deltamode
= LCD_Increased
; break;
6662 case 9: /* changed */
6663 lcp
.deltamode
= LCD_Changed
; break;
6665 lcp
.deltamode
= LCD_Any
; break;
6666 case 11: /* unlimited */
6667 lcp
.max_loss_records_output
= 999999999; break;
6668 case 12: { /* limited */
6670 const HChar
* endptr
;
6672 wcmd
= VG_(strtok_r
) (NULL
, " ", &ssaveptr
);
6675 endptr
= "empty"; /* to report an error below */
6678 int_value
= VG_(strtoll10
) (wcmd
, &the_end
);
6681 if (*endptr
!= '\0')
6682 VG_(gdb_printf
) ("missing or malformed integer value\n");
6683 else if (int_value
> 0)
6684 lcp
.max_loss_records_output
= (UInt
) int_value
;
6686 VG_(gdb_printf
) ("max_loss_records_output must be >= 1,"
6687 " got %d\n", int_value
);
6695 MC_(detect_memory_leaks
)(tid
, &lcp
);
6696 if (xt_filename
!= NULL
)
6697 VG_(free
)(xt_filename
);
6701 case 3: { /* make_memory */
6704 Int kwdid
= VG_(keyword_id
)
6705 ("noaccess undefined defined Definedifaddressable",
6706 VG_(strtok_r
) (NULL
, " ", &ssaveptr
), kwd_report_all
);
6707 if (!VG_(strtok_get_address_and_size
) (&address
, &szB
, &ssaveptr
))
6712 case 0: MC_(make_mem_noaccess
) (address
, szB
); break;
6713 case 1: make_mem_undefined_w_tid_and_okind ( address
, szB
, tid
,
6714 MC_OKIND_USER
); break;
6715 case 2: MC_(make_mem_defined
) ( address
, szB
); break;
6716 case 3: make_mem_defined_if_addressable ( address
, szB
); break;;
6717 default: tl_assert(0);
6722 case 4: { /* check_memory */
6730 ExeContext
* origin_ec
;
6733 Int kwdid
= VG_(keyword_id
)
6734 ("addressable defined",
6735 VG_(strtok_r
) (NULL
, " ", &ssaveptr
), kwd_report_all
);
6736 if (!VG_(strtok_get_address_and_size
) (&address
, &szB
, &ssaveptr
))
6741 case 0: /* addressable */
6742 if (is_mem_addressable ( address
, szB
, &bad_addr
))
6743 VG_(printf
) ("Address %p len %lu addressable\n",
6744 (void *)address
, szB
);
6747 ("Address %p len %lu not addressable:\nbad address %p\n",
6748 (void *)address
, szB
, (void *) bad_addr
);
6749 // Describe this (probably live) address with current epoch
6750 MC_(pp_describe_addr
) (VG_(current_DiEpoch
)(), address
);
6752 case 1: /* defined */
6753 res
= is_mem_defined ( address
, szB
, &bad_addr
, &otag
);
6754 if (MC_AddrErr
== res
)
6756 ("Address %p len %lu not addressable:\nbad address %p\n",
6757 (void *)address
, szB
, (void *) bad_addr
);
6758 else if (MC_ValueErr
== res
) {
6761 case MC_OKIND_STACK
:
6762 src
= " was created by a stack allocation"; break;
6764 src
= " was created by a heap allocation"; break;
6766 src
= " was created by a client request"; break;
6767 case MC_OKIND_UNKNOWN
:
6769 default: tl_assert(0);
6772 ("Address %p len %lu not defined:\n"
6773 "Uninitialised value at %p%s\n",
6774 (void *)address
, szB
, (void *) bad_addr
, src
);
6776 if (VG_(is_plausible_ECU
)(ecu
)) {
6777 origin_ec
= VG_(get_ExeContext_from_ECU
)( ecu
);
6778 VG_(pp_ExeContext
)( origin_ec
);
6782 VG_(printf
) ("Address %p len %lu defined\n",
6783 (void *)address
, szB
);
6784 // Describe this (probably live) address with current epoch
6785 MC_(pp_describe_addr
) (VG_(current_DiEpoch
)(), address
);
6787 default: tl_assert(0);
6792 case 5: { /* block_list */
6795 UInt lr_nr_from
= 0;
6798 if (VG_(parse_slice
) (NULL
, &ssaveptr
, &lr_nr_from
, &lr_nr_to
)) {
6799 UInt limit_blocks
= 999999999;
6801 UInt heuristics
= 0;
6803 for (wl
= VG_(strtok_r
) (NULL
, " ", &ssaveptr
);
6805 wl
= VG_(strtok_r
) (NULL
, " ", &ssaveptr
)) {
6806 switch (VG_(keyword_id
) ("unlimited limited heuristics ",
6807 wl
, kwd_report_all
)) {
6808 case -2: return True
;
6809 case -1: return True
;
6810 case 0: /* unlimited */
6811 limit_blocks
= 999999999; break;
6812 case 1: /* limited */
6813 wcmd
= VG_(strtok_r
) (NULL
, " ", &ssaveptr
);
6815 VG_(gdb_printf
) ("missing integer value\n");
6818 int_value
= VG_(strtoll10
) (wcmd
, &the_end
);
6819 if (*the_end
!= '\0') {
6820 VG_(gdb_printf
) ("malformed integer value\n");
6823 if (int_value
<= 0) {
6824 VG_(gdb_printf
) ("max_blocks must be >= 1,"
6825 " got %d\n", int_value
);
6828 limit_blocks
= (UInt
) int_value
;
6830 case 2: /* heuristics */
6831 wcmd
= VG_(strtok_r
) (NULL
, " ", &ssaveptr
);
6833 || !VG_(parse_enum_set
)(MC_(parse_leak_heuristics_tokens
),
6837 VG_(gdb_printf
) ("missing or malformed heuristics set\n");
6845 /* substract 1 from lr_nr_from/lr_nr_to as what is shown to the user
6846 is 1 more than the index in lr_array. */
6847 if (lr_nr_from
== 0 || ! MC_(print_block_list
) (lr_nr_from
-1,
6851 VG_(gdb_printf
) ("invalid loss record nr\n");
6856 case 6: { /* who_points_at */
6860 if (!VG_(strtok_get_address_and_size
) (&address
, &szB
, &ssaveptr
))
6862 if (address
== (Addr
) 0) {
6863 VG_(gdb_printf
) ("Cannot search who points at 0x0\n");
6866 MC_(who_points_at
) (address
, szB
);
6873 if (VG_(strtok_get_address_and_size
) (&address
, &szB
, &ssaveptr
)) {
6877 Int unaddressable
= 0;
6878 for (i
= 0; i
< szB
; i
++) {
6880 res
[bnr
] = mc_get_or_set_vbits_for_client
6881 (address
+i
, (Addr
) &vbits
[bnr
], 1,
6882 False
, /* get them */
6883 False
/* is client request */ );
6884 /* We going to print the first vabits of a new line.
6885 Terminate the previous line if needed: prints a line with the
6886 address and the data. */
6890 gdb_xb (address
+ i
- 8, 8, res
);
6892 VG_(printf
) ("\t"); // To align VABITS with gdb_xb layout
6894 if (res
[bnr
] == 1) {
6895 VG_(printf
) ("\t %02x", vbits
[bnr
]);
6897 tl_assert(3 == res
[bnr
]);
6899 VG_(printf
) ("\t __");
6903 if (szB
% 8 == 0 && szB
> 0)
6904 gdb_xb (address
+ szB
- 8, 8, res
);
6906 gdb_xb (address
+ szB
- szB
% 8, szB
% 8, res
);
6907 if (unaddressable
) {
6909 ("Address %p len %lu has %d bytes unaddressable\n",
6910 (void *)address
, szB
, unaddressable
);
6916 case 8: { /* xtmemory */
6918 filename
= VG_(strtok_r
) (NULL
, " ", &ssaveptr
);
6919 MC_(xtmemory_report
)(filename
, False
);
6929 /*------------------------------------------------------------*/
6930 /*--- Client requests ---*/
6931 /*------------------------------------------------------------*/
6933 static Bool
mc_handle_client_request ( ThreadId tid
, UWord
* arg
, UWord
* ret
)
6938 if (!VG_IS_TOOL_USERREQ('M','C',arg
[0])
6939 && VG_USERREQ__MALLOCLIKE_BLOCK
!= arg
[0]
6940 && VG_USERREQ__RESIZEINPLACE_BLOCK
!= arg
[0]
6941 && VG_USERREQ__FREELIKE_BLOCK
!= arg
[0]
6942 && VG_USERREQ__CREATE_MEMPOOL
!= arg
[0]
6943 && VG_USERREQ__DESTROY_MEMPOOL
!= arg
[0]
6944 && VG_USERREQ__MEMPOOL_ALLOC
!= arg
[0]
6945 && VG_USERREQ__MEMPOOL_FREE
!= arg
[0]
6946 && VG_USERREQ__MEMPOOL_TRIM
!= arg
[0]
6947 && VG_USERREQ__MOVE_MEMPOOL
!= arg
[0]
6948 && VG_USERREQ__MEMPOOL_CHANGE
!= arg
[0]
6949 && VG_USERREQ__MEMPOOL_EXISTS
!= arg
[0]
6950 && VG_USERREQ__GDB_MONITOR_COMMAND
!= arg
[0]
6951 && VG_USERREQ__ENABLE_ADDR_ERROR_REPORTING_IN_RANGE
!= arg
[0]
6952 && VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE
!= arg
[0])
6956 case VG_USERREQ__CHECK_MEM_IS_ADDRESSABLE
: {
6957 Bool ok
= is_mem_addressable ( arg
[1], arg
[2], &bad_addr
);
6959 MC_(record_user_error
) ( tid
, bad_addr
, /*isAddrErr*/True
, 0 );
6960 *ret
= ok
? (UWord
)NULL
: bad_addr
;
6964 case VG_USERREQ__CHECK_MEM_IS_DEFINED
: {
6965 Bool errorV
= False
;
6968 Bool errorA
= False
;
6970 is_mem_defined_comprehensive(
6972 &errorV
, &bad_addrV
, &otagV
, &errorA
, &bad_addrA
6975 MC_(record_user_error
) ( tid
, bad_addrV
,
6976 /*isAddrErr*/False
, otagV
);
6979 MC_(record_user_error
) ( tid
, bad_addrA
,
6980 /*isAddrErr*/True
, 0 );
6982 /* Return the lower of the two erring addresses, if any. */
6984 if (errorV
&& !errorA
) {
6987 if (!errorV
&& errorA
) {
6990 if (errorV
&& errorA
) {
6991 *ret
= bad_addrV
< bad_addrA
? bad_addrV
: bad_addrA
;
6996 case VG_USERREQ__DO_LEAK_CHECK
: {
6997 LeakCheckParams lcp
;
7001 else if (arg
[1] == 1)
7002 lcp
.mode
= LC_Summary
;
7004 VG_(message
)(Vg_UserMsg
,
7005 "Warning: unknown memcheck leak search mode\n");
7009 lcp
.show_leak_kinds
= MC_(clo_show_leak_kinds
);
7010 lcp
.errors_for_leak_kinds
= MC_(clo_error_for_leak_kinds
);
7011 lcp
.heuristics
= MC_(clo_leak_check_heuristics
);
7014 lcp
.deltamode
= LCD_Any
;
7015 else if (arg
[2] == 1)
7016 lcp
.deltamode
= LCD_Increased
;
7017 else if (arg
[2] == 2)
7018 lcp
.deltamode
= LCD_Changed
;
7022 "Warning: unknown memcheck leak search deltamode\n");
7023 lcp
.deltamode
= LCD_Any
;
7025 lcp
.max_loss_records_output
= 999999999;
7026 lcp
.requested_by_monitor_command
= False
;
7027 lcp
.xt_filename
= NULL
;
7029 MC_(detect_memory_leaks
)(tid
, &lcp
);
7030 *ret
= 0; /* return value is meaningless */
7034 case VG_USERREQ__MAKE_MEM_NOACCESS
:
7035 MC_(make_mem_noaccess
) ( arg
[1], arg
[2] );
7039 case VG_USERREQ__MAKE_MEM_UNDEFINED
:
7040 make_mem_undefined_w_tid_and_okind ( arg
[1], arg
[2], tid
,
7045 case VG_USERREQ__MAKE_MEM_DEFINED
:
7046 MC_(make_mem_defined
) ( arg
[1], arg
[2] );
7050 case VG_USERREQ__MAKE_MEM_DEFINED_IF_ADDRESSABLE
:
7051 make_mem_defined_if_addressable ( arg
[1], arg
[2] );
7055 case VG_USERREQ__CREATE_BLOCK
: /* describe a block */
7056 if (arg
[1] != 0 && arg
[2] != 0) {
7057 i
= alloc_client_block();
7058 /* VG_(printf)("allocated %d %p\n", i, cgbs); */
7059 cgbs
[i
].start
= arg
[1];
7060 cgbs
[i
].size
= arg
[2];
7061 cgbs
[i
].desc
= VG_(strdup
)("mc.mhcr.1", (HChar
*)arg
[3]);
7062 cgbs
[i
].where
= VG_(record_ExeContext
) ( tid
, 0/*first_ip_delta*/ );
7068 case VG_USERREQ__DISCARD
: /* discard */
7070 || arg
[2] >= cgb_used
||
7071 (cgbs
[arg
[2]].start
== 0 && cgbs
[arg
[2]].size
== 0)) {
7074 tl_assert(arg
[2] >= 0 && arg
[2] < cgb_used
);
7075 cgbs
[arg
[2]].start
= cgbs
[arg
[2]].size
= 0;
7076 VG_(free
)(cgbs
[arg
[2]].desc
);
7082 case VG_USERREQ__GET_VBITS
:
7083 *ret
= mc_get_or_set_vbits_for_client
7084 ( arg
[1], arg
[2], arg
[3],
7085 False
/* get them */,
7086 True
/* is client request */ );
7089 case VG_USERREQ__SET_VBITS
:
7090 *ret
= mc_get_or_set_vbits_for_client
7091 ( arg
[1], arg
[2], arg
[3],
7092 True
/* set them */,
7093 True
/* is client request */ );
7096 case VG_USERREQ__COUNT_LEAKS
: { /* count leaked bytes */
7097 UWord
** argp
= (UWord
**)arg
;
7098 // MC_(bytes_leaked) et al were set by the last leak check (or zero
7099 // if no prior leak checks performed).
7100 *argp
[1] = MC_(bytes_leaked
) + MC_(bytes_indirect
);
7101 *argp
[2] = MC_(bytes_dubious
);
7102 *argp
[3] = MC_(bytes_reachable
);
7103 *argp
[4] = MC_(bytes_suppressed
);
7104 // there is no argp[5]
7105 //*argp[5] = MC_(bytes_indirect);
7106 // XXX need to make *argp[1-4] defined; currently done in the
7107 // VALGRIND_COUNT_LEAKS_MACRO by initialising them to zero.
7111 case VG_USERREQ__COUNT_LEAK_BLOCKS
: { /* count leaked blocks */
7112 UWord
** argp
= (UWord
**)arg
;
7113 // MC_(blocks_leaked) et al were set by the last leak check (or zero
7114 // if no prior leak checks performed).
7115 *argp
[1] = MC_(blocks_leaked
) + MC_(blocks_indirect
);
7116 *argp
[2] = MC_(blocks_dubious
);
7117 *argp
[3] = MC_(blocks_reachable
);
7118 *argp
[4] = MC_(blocks_suppressed
);
7119 // there is no argp[5]
7120 //*argp[5] = MC_(blocks_indirect);
7121 // XXX need to make *argp[1-4] defined; currently done in the
7122 // VALGRIND_COUNT_LEAK_BLOCKS_MACRO by initialising them to zero.
7126 case VG_USERREQ__MALLOCLIKE_BLOCK
: {
7127 Addr p
= (Addr
)arg
[1];
7128 SizeT sizeB
= arg
[2];
7130 Bool is_zeroed
= (Bool
)arg
[4];
7132 MC_(new_block
) ( tid
, p
, sizeB
, /*ignored*/0, is_zeroed
,
7133 MC_AllocCustom
, MC_(malloc_list
) );
7135 MC_(make_mem_noaccess
) ( p
- rzB
, rzB
);
7136 MC_(make_mem_noaccess
) ( p
+ sizeB
, rzB
);
7140 case VG_USERREQ__RESIZEINPLACE_BLOCK
: {
7141 Addr p
= (Addr
)arg
[1];
7142 SizeT oldSizeB
= arg
[2];
7143 SizeT newSizeB
= arg
[3];
7146 MC_(handle_resizeInPlace
) ( tid
, p
, oldSizeB
, newSizeB
, rzB
);
7149 case VG_USERREQ__FREELIKE_BLOCK
: {
7150 Addr p
= (Addr
)arg
[1];
7153 MC_(handle_free
) ( tid
, p
, rzB
, MC_AllocCustom
);
7157 case _VG_USERREQ__MEMCHECK_RECORD_OVERLAP_ERROR
: {
7158 HChar
* s
= (HChar
*)arg
[1];
7159 Addr dst
= (Addr
) arg
[2];
7160 Addr src
= (Addr
) arg
[3];
7161 SizeT len
= (SizeT
)arg
[4];
7162 MC_(record_overlap_error
)(tid
, s
, src
, dst
, len
);
7166 case VG_USERREQ__CREATE_MEMPOOL
: {
7167 Addr pool
= (Addr
)arg
[1];
7169 Bool is_zeroed
= (Bool
)arg
[3];
7170 UInt flags
= arg
[4];
7172 // The create_mempool function does not know these mempool flags,
7173 // pass as booleans.
7174 MC_(create_mempool
) ( pool
, rzB
, is_zeroed
,
7175 (flags
& VALGRIND_MEMPOOL_AUTO_FREE
),
7176 (flags
& VALGRIND_MEMPOOL_METAPOOL
) );
7180 case VG_USERREQ__DESTROY_MEMPOOL
: {
7181 Addr pool
= (Addr
)arg
[1];
7183 MC_(destroy_mempool
) ( pool
);
7187 case VG_USERREQ__MEMPOOL_ALLOC
: {
7188 Addr pool
= (Addr
)arg
[1];
7189 Addr addr
= (Addr
)arg
[2];
7192 MC_(mempool_alloc
) ( tid
, pool
, addr
, size
);
7196 case VG_USERREQ__MEMPOOL_FREE
: {
7197 Addr pool
= (Addr
)arg
[1];
7198 Addr addr
= (Addr
)arg
[2];
7200 MC_(mempool_free
) ( pool
, addr
);
7204 case VG_USERREQ__MEMPOOL_TRIM
: {
7205 Addr pool
= (Addr
)arg
[1];
7206 Addr addr
= (Addr
)arg
[2];
7209 MC_(mempool_trim
) ( pool
, addr
, size
);
7213 case VG_USERREQ__MOVE_MEMPOOL
: {
7214 Addr poolA
= (Addr
)arg
[1];
7215 Addr poolB
= (Addr
)arg
[2];
7217 MC_(move_mempool
) ( poolA
, poolB
);
7221 case VG_USERREQ__MEMPOOL_CHANGE
: {
7222 Addr pool
= (Addr
)arg
[1];
7223 Addr addrA
= (Addr
)arg
[2];
7224 Addr addrB
= (Addr
)arg
[3];
7227 MC_(mempool_change
) ( pool
, addrA
, addrB
, size
);
7231 case VG_USERREQ__MEMPOOL_EXISTS
: {
7232 Addr pool
= (Addr
)arg
[1];
7234 *ret
= (UWord
) MC_(mempool_exists
) ( pool
);
7238 case VG_USERREQ__GDB_MONITOR_COMMAND
: {
7239 Bool handled
= handle_gdb_monitor_command (tid
, (HChar
*)arg
[1]);
7247 case VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE
:
7248 case VG_USERREQ__ENABLE_ADDR_ERROR_REPORTING_IN_RANGE
: {
7250 = arg
[0] == VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE
;
7252 = modify_ignore_ranges(addRange
, arg
[1], arg
[2]);
7260 "Warning: unknown memcheck client request code %llx\n",
7269 /*------------------------------------------------------------*/
7270 /*--- Crude profiling machinery. ---*/
7271 /*------------------------------------------------------------*/
7273 // We track a number of interesting events (using PROF_EVENT)
7274 // if MC_PROFILE_MEMORY is defined.
7276 #ifdef MC_PROFILE_MEMORY
7278 ULong
MC_(event_ctr
)[MCPE_LAST
];
7280 /* Event counter names. Use the name of the function that increases the
7281 event counter. Drop any MC_() and mc_ prefices. */
7282 static const HChar
* MC_(event_ctr_name
)[MCPE_LAST
] = {
7283 [MCPE_LOADVN_SLOW
] = "LOADVn_slow",
7284 [MCPE_LOADVN_SLOW_LOOP
] = "LOADVn_slow_loop",
7285 [MCPE_STOREVN_SLOW
] = "STOREVn_slow",
7286 [MCPE_STOREVN_SLOW_LOOP
] = "STOREVn_slow(loop)",
7287 [MCPE_MAKE_ALIGNED_WORD32_UNDEFINED
] = "make_aligned_word32_undefined",
7288 [MCPE_MAKE_ALIGNED_WORD32_UNDEFINED_SLOW
] =
7289 "make_aligned_word32_undefined_slow",
7290 [MCPE_MAKE_ALIGNED_WORD64_UNDEFINED
] = "make_aligned_word64_undefined",
7291 [MCPE_MAKE_ALIGNED_WORD64_UNDEFINED_SLOW
] =
7292 "make_aligned_word64_undefined_slow",
7293 [MCPE_MAKE_ALIGNED_WORD32_NOACCESS
] = "make_aligned_word32_noaccess",
7294 [MCPE_MAKE_ALIGNED_WORD32_NOACCESS_SLOW
] =
7295 "make_aligned_word32_noaccess_slow",
7296 [MCPE_MAKE_ALIGNED_WORD64_NOACCESS
] = "make_aligned_word64_noaccess",
7297 [MCPE_MAKE_ALIGNED_WORD64_NOACCESS_SLOW
] =
7298 "make_aligned_word64_noaccess_slow",
7299 [MCPE_MAKE_MEM_NOACCESS
] = "make_mem_noaccess",
7300 [MCPE_MAKE_MEM_UNDEFINED
] = "make_mem_undefined",
7301 [MCPE_MAKE_MEM_UNDEFINED_W_OTAG
] = "make_mem_undefined_w_otag",
7302 [MCPE_MAKE_MEM_DEFINED
] = "make_mem_defined",
7303 [MCPE_CHEAP_SANITY_CHECK
] = "cheap_sanity_check",
7304 [MCPE_EXPENSIVE_SANITY_CHECK
] = "expensive_sanity_check",
7305 [MCPE_COPY_ADDRESS_RANGE_STATE
] = "copy_address_range_state",
7306 [MCPE_COPY_ADDRESS_RANGE_STATE_LOOP1
] = "copy_address_range_state(loop1)",
7307 [MCPE_COPY_ADDRESS_RANGE_STATE_LOOP2
] = "copy_address_range_state(loop2)",
7308 [MCPE_CHECK_MEM_IS_NOACCESS
] = "check_mem_is_noaccess",
7309 [MCPE_CHECK_MEM_IS_NOACCESS_LOOP
] = "check_mem_is_noaccess(loop)",
7310 [MCPE_IS_MEM_ADDRESSABLE
] = "is_mem_addressable",
7311 [MCPE_IS_MEM_ADDRESSABLE_LOOP
] = "is_mem_addressable(loop)",
7312 [MCPE_IS_MEM_DEFINED
] = "is_mem_defined",
7313 [MCPE_IS_MEM_DEFINED_LOOP
] = "is_mem_defined(loop)",
7314 [MCPE_IS_MEM_DEFINED_COMPREHENSIVE
] = "is_mem_defined_comprehensive",
7315 [MCPE_IS_MEM_DEFINED_COMPREHENSIVE_LOOP
] =
7316 "is_mem_defined_comprehensive(loop)",
7317 [MCPE_IS_DEFINED_ASCIIZ
] = "is_defined_asciiz",
7318 [MCPE_IS_DEFINED_ASCIIZ_LOOP
] = "is_defined_asciiz(loop)",
7319 [MCPE_FIND_CHUNK_FOR_OLD
] = "find_chunk_for_OLD",
7320 [MCPE_FIND_CHUNK_FOR_OLD_LOOP
] = "find_chunk_for_OLD(loop)",
7321 [MCPE_SET_ADDRESS_RANGE_PERMS
] = "set_address_range_perms",
7322 [MCPE_SET_ADDRESS_RANGE_PERMS_SINGLE_SECMAP
] =
7323 "set_address_range_perms(single-secmap)",
7324 [MCPE_SET_ADDRESS_RANGE_PERMS_STARTOF_SECMAP
] =
7325 "set_address_range_perms(startof-secmap)",
7326 [MCPE_SET_ADDRESS_RANGE_PERMS_MULTIPLE_SECMAPS
] =
7327 "set_address_range_perms(multiple-secmaps)",
7328 [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1
] =
7329 "set_address_range_perms(dist-sm1)",
7330 [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2
] =
7331 "set_address_range_perms(dist-sm2)",
7332 [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1_QUICK
] =
7333 "set_address_range_perms(dist-sm1-quick)",
7334 [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2_QUICK
] =
7335 "set_address_range_perms(dist-sm2-quick)",
7336 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1A
] = "set_address_range_perms(loop1a)",
7337 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1B
] = "set_address_range_perms(loop1b)",
7338 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1C
] = "set_address_range_perms(loop1c)",
7339 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8A
] = "set_address_range_perms(loop8a)",
7340 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8B
] = "set_address_range_perms(loop8b)",
7341 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K
] = "set_address_range_perms(loop64K)",
7342 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K_FREE_DIST_SM
] =
7343 "set_address_range_perms(loop64K-free-dist-sm)",
7344 [MCPE_LOADV_128_OR_256_SLOW_LOOP
] = "LOADV_128_or_256_slow(loop)",
7345 [MCPE_LOADV_128_OR_256
] = "LOADV_128_or_256",
7346 [MCPE_LOADV_128_OR_256_SLOW1
] = "LOADV_128_or_256-slow1",
7347 [MCPE_LOADV_128_OR_256_SLOW2
] = "LOADV_128_or_256-slow2",
7348 [MCPE_LOADV64
] = "LOADV64",
7349 [MCPE_LOADV64_SLOW1
] = "LOADV64-slow1",
7350 [MCPE_LOADV64_SLOW2
] = "LOADV64-slow2",
7351 [MCPE_STOREV64
] = "STOREV64",
7352 [MCPE_STOREV64_SLOW1
] = "STOREV64-slow1",
7353 [MCPE_STOREV64_SLOW2
] = "STOREV64-slow2",
7354 [MCPE_STOREV64_SLOW3
] = "STOREV64-slow3",
7355 [MCPE_STOREV64_SLOW4
] = "STOREV64-slow4",
7356 [MCPE_LOADV32
] = "LOADV32",
7357 [MCPE_LOADV32_SLOW1
] = "LOADV32-slow1",
7358 [MCPE_LOADV32_SLOW2
] = "LOADV32-slow2",
7359 [MCPE_STOREV32
] = "STOREV32",
7360 [MCPE_STOREV32_SLOW1
] = "STOREV32-slow1",
7361 [MCPE_STOREV32_SLOW2
] = "STOREV32-slow2",
7362 [MCPE_STOREV32_SLOW3
] = "STOREV32-slow3",
7363 [MCPE_STOREV32_SLOW4
] = "STOREV32-slow4",
7364 [MCPE_LOADV16
] = "LOADV16",
7365 [MCPE_LOADV16_SLOW1
] = "LOADV16-slow1",
7366 [MCPE_LOADV16_SLOW2
] = "LOADV16-slow2",
7367 [MCPE_STOREV16
] = "STOREV16",
7368 [MCPE_STOREV16_SLOW1
] = "STOREV16-slow1",
7369 [MCPE_STOREV16_SLOW2
] = "STOREV16-slow2",
7370 [MCPE_STOREV16_SLOW3
] = "STOREV16-slow3",
7371 [MCPE_STOREV16_SLOW4
] = "STOREV16-slow4",
7372 [MCPE_LOADV8
] = "LOADV8",
7373 [MCPE_LOADV8_SLOW1
] = "LOADV8-slow1",
7374 [MCPE_LOADV8_SLOW2
] = "LOADV8-slow2",
7375 [MCPE_STOREV8
] = "STOREV8",
7376 [MCPE_STOREV8_SLOW1
] = "STOREV8-slow1",
7377 [MCPE_STOREV8_SLOW2
] = "STOREV8-slow2",
7378 [MCPE_STOREV8_SLOW3
] = "STOREV8-slow3",
7379 [MCPE_STOREV8_SLOW4
] = "STOREV8-slow4",
7380 [MCPE_NEW_MEM_STACK_4
] = "new_mem_stack_4",
7381 [MCPE_NEW_MEM_STACK_8
] = "new_mem_stack_8",
7382 [MCPE_NEW_MEM_STACK_12
] = "new_mem_stack_12",
7383 [MCPE_NEW_MEM_STACK_16
] = "new_mem_stack_16",
7384 [MCPE_NEW_MEM_STACK_32
] = "new_mem_stack_32",
7385 [MCPE_NEW_MEM_STACK_112
] = "new_mem_stack_112",
7386 [MCPE_NEW_MEM_STACK_128
] = "new_mem_stack_128",
7387 [MCPE_NEW_MEM_STACK_144
] = "new_mem_stack_144",
7388 [MCPE_NEW_MEM_STACK_160
] = "new_mem_stack_160",
7389 [MCPE_DIE_MEM_STACK_4
] = "die_mem_stack_4",
7390 [MCPE_DIE_MEM_STACK_8
] = "die_mem_stack_8",
7391 [MCPE_DIE_MEM_STACK_12
] = "die_mem_stack_12",
7392 [MCPE_DIE_MEM_STACK_16
] = "die_mem_stack_16",
7393 [MCPE_DIE_MEM_STACK_32
] = "die_mem_stack_32",
7394 [MCPE_DIE_MEM_STACK_112
] = "die_mem_stack_112",
7395 [MCPE_DIE_MEM_STACK_128
] = "die_mem_stack_128",
7396 [MCPE_DIE_MEM_STACK_144
] = "die_mem_stack_144",
7397 [MCPE_DIE_MEM_STACK_160
] = "die_mem_stack_160",
7398 [MCPE_NEW_MEM_STACK
] = "new_mem_stack",
7399 [MCPE_DIE_MEM_STACK
] = "die_mem_stack",
7400 [MCPE_MAKE_STACK_UNINIT_W_O
] = "MAKE_STACK_UNINIT_w_o",
7401 [MCPE_MAKE_STACK_UNINIT_NO_O
] = "MAKE_STACK_UNINIT_no_o",
7402 [MCPE_MAKE_STACK_UNINIT_128_NO_O
] = "MAKE_STACK_UNINIT_128_no_o",
7403 [MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_16
]
7404 = "MAKE_STACK_UNINIT_128_no_o_aligned_16",
7405 [MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_8
]
7406 = "MAKE_STACK_UNINIT_128_no_o_aligned_8",
7407 [MCPE_MAKE_STACK_UNINIT_128_NO_O_SLOWCASE
]
7408 = "MAKE_STACK_UNINIT_128_no_o_slowcase",
7411 static void init_prof_mem ( void )
7413 Int i
, name_count
= 0;
7415 for (i
= 0; i
< MCPE_LAST
; i
++) {
7416 MC_(event_ctr
)[i
] = 0;
7417 if (MC_(event_ctr_name
)[i
] != NULL
)
7421 /* Make sure every profiling event has a name */
7422 tl_assert(name_count
== MCPE_LAST
);
7425 static void done_prof_mem ( void )
7428 Bool spaced
= False
;
7429 for (i
= n
= 0; i
< MCPE_LAST
; i
++) {
7430 if (!spaced
&& (n
% 10) == 0) {
7434 if (MC_(event_ctr
)[i
] > 0) {
7437 VG_(printf
)( "prof mem event %3d: %11llu %s\n",
7438 i
, MC_(event_ctr
)[i
],
7439 MC_(event_ctr_name
)[i
]);
7446 static void init_prof_mem ( void ) { }
7447 static void done_prof_mem ( void ) { }
7452 /*------------------------------------------------------------*/
7453 /*--- Origin tracking stuff ---*/
7454 /*------------------------------------------------------------*/
7456 /*--------------------------------------------*/
7457 /*--- Origin tracking: load handlers ---*/
7458 /*--------------------------------------------*/
7460 static INLINE UInt
merge_origins ( UInt or1
, UInt or2
) {
7461 return or1
> or2
? or1
: or2
;
7464 UWord
VG_REGPARM(1) MC_(helperc_b_load1
)( Addr a
) {
7467 UWord lineoff
= oc_line_offset(a
);
7468 UWord byteoff
= a
& 3; /* 0, 1, 2 or 3 */
7470 if (OC_ENABLE_ASSERTIONS
) {
7471 tl_assert(lineoff
>= 0 && lineoff
< OC_W32S_PER_LINE
);
7474 line
= find_OCacheLine( a
);
7476 descr
= line
->descr
[lineoff
];
7477 if (OC_ENABLE_ASSERTIONS
) {
7478 tl_assert(descr
< 0x10);
7481 if (LIKELY(0 == (descr
& (1 << byteoff
)))) {
7484 return line
->w32
[lineoff
];
7488 UWord
VG_REGPARM(1) MC_(helperc_b_load2
)( Addr a
) {
7491 UWord lineoff
, byteoff
;
7493 if (UNLIKELY(a
& 1)) {
7494 /* Handle misaligned case, slowly. */
7495 UInt oLo
= (UInt
)MC_(helperc_b_load1
)( a
+ 0 );
7496 UInt oHi
= (UInt
)MC_(helperc_b_load1
)( a
+ 1 );
7497 return merge_origins(oLo
, oHi
);
7500 lineoff
= oc_line_offset(a
);
7501 byteoff
= a
& 3; /* 0 or 2 */
7503 if (OC_ENABLE_ASSERTIONS
) {
7504 tl_assert(lineoff
>= 0 && lineoff
< OC_W32S_PER_LINE
);
7506 line
= find_OCacheLine( a
);
7508 descr
= line
->descr
[lineoff
];
7509 if (OC_ENABLE_ASSERTIONS
) {
7510 tl_assert(descr
< 0x10);
7513 if (LIKELY(0 == (descr
& (3 << byteoff
)))) {
7516 return line
->w32
[lineoff
];
7520 UWord
VG_REGPARM(1) MC_(helperc_b_load4
)( Addr a
) {
7525 if (UNLIKELY(a
& 3)) {
7526 /* Handle misaligned case, slowly. */
7527 UInt oLo
= (UInt
)MC_(helperc_b_load2
)( a
+ 0 );
7528 UInt oHi
= (UInt
)MC_(helperc_b_load2
)( a
+ 2 );
7529 return merge_origins(oLo
, oHi
);
7532 lineoff
= oc_line_offset(a
);
7533 if (OC_ENABLE_ASSERTIONS
) {
7534 tl_assert(lineoff
>= 0 && lineoff
< OC_W32S_PER_LINE
);
7537 line
= find_OCacheLine( a
);
7539 descr
= line
->descr
[lineoff
];
7540 if (OC_ENABLE_ASSERTIONS
) {
7541 tl_assert(descr
< 0x10);
7544 if (LIKELY(0 == descr
)) {
7547 return line
->w32
[lineoff
];
7551 UWord
VG_REGPARM(1) MC_(helperc_b_load8
)( Addr a
) {
7553 UChar descrLo
, descrHi
, descr
;
7556 if (UNLIKELY(a
& 7)) {
7557 /* Handle misaligned case, slowly. */
7558 UInt oLo
= (UInt
)MC_(helperc_b_load4
)( a
+ 0 );
7559 UInt oHi
= (UInt
)MC_(helperc_b_load4
)( a
+ 4 );
7560 return merge_origins(oLo
, oHi
);
7563 lineoff
= oc_line_offset(a
);
7564 if (OC_ENABLE_ASSERTIONS
) {
7565 tl_assert(lineoff
== (lineoff
& 6)); /*0,2,4,6*//*since 8-aligned*/
7568 line
= find_OCacheLine( a
);
7570 descrLo
= line
->descr
[lineoff
+ 0];
7571 descrHi
= line
->descr
[lineoff
+ 1];
7572 descr
= descrLo
| descrHi
;
7573 if (OC_ENABLE_ASSERTIONS
) {
7574 tl_assert(descr
< 0x10);
7577 if (LIKELY(0 == descr
)) {
7578 return 0; /* both 32-bit chunks are defined */
7580 UInt oLo
= descrLo
== 0 ? 0 : line
->w32
[lineoff
+ 0];
7581 UInt oHi
= descrHi
== 0 ? 0 : line
->w32
[lineoff
+ 1];
7582 return merge_origins(oLo
, oHi
);
7586 UWord
VG_REGPARM(1) MC_(helperc_b_load16
)( Addr a
) {
7587 UInt oLo
= (UInt
)MC_(helperc_b_load8
)( a
+ 0 );
7588 UInt oHi
= (UInt
)MC_(helperc_b_load8
)( a
+ 8 );
7589 UInt oBoth
= merge_origins(oLo
, oHi
);
7590 return (UWord
)oBoth
;
7593 UWord
VG_REGPARM(1) MC_(helperc_b_load32
)( Addr a
) {
7594 UInt oQ0
= (UInt
)MC_(helperc_b_load8
)( a
+ 0 );
7595 UInt oQ1
= (UInt
)MC_(helperc_b_load8
)( a
+ 8 );
7596 UInt oQ2
= (UInt
)MC_(helperc_b_load8
)( a
+ 16 );
7597 UInt oQ3
= (UInt
)MC_(helperc_b_load8
)( a
+ 24 );
7598 UInt oAll
= merge_origins(merge_origins(oQ0
, oQ1
),
7599 merge_origins(oQ2
, oQ3
));
7604 /*--------------------------------------------*/
7605 /*--- Origin tracking: store handlers ---*/
7606 /*--------------------------------------------*/
7608 void VG_REGPARM(2) MC_(helperc_b_store1
)( Addr a
, UWord d32
) {
7610 UWord lineoff
= oc_line_offset(a
);
7611 UWord byteoff
= a
& 3; /* 0, 1, 2 or 3 */
7613 if (OC_ENABLE_ASSERTIONS
) {
7614 tl_assert(lineoff
>= 0 && lineoff
< OC_W32S_PER_LINE
);
7617 line
= find_OCacheLine( a
);
7620 line
->descr
[lineoff
] &= ~(1 << byteoff
);
7622 line
->descr
[lineoff
] |= (1 << byteoff
);
7623 line
->w32
[lineoff
] = d32
;
7627 void VG_REGPARM(2) MC_(helperc_b_store2
)( Addr a
, UWord d32
) {
7629 UWord lineoff
, byteoff
;
7631 if (UNLIKELY(a
& 1)) {
7632 /* Handle misaligned case, slowly. */
7633 MC_(helperc_b_store1
)( a
+ 0, d32
);
7634 MC_(helperc_b_store1
)( a
+ 1, d32
);
7638 lineoff
= oc_line_offset(a
);
7639 byteoff
= a
& 3; /* 0 or 2 */
7641 if (OC_ENABLE_ASSERTIONS
) {
7642 tl_assert(lineoff
>= 0 && lineoff
< OC_W32S_PER_LINE
);
7645 line
= find_OCacheLine( a
);
7648 line
->descr
[lineoff
] &= ~(3 << byteoff
);
7650 line
->descr
[lineoff
] |= (3 << byteoff
);
7651 line
->w32
[lineoff
] = d32
;
7655 void VG_REGPARM(2) MC_(helperc_b_store4
)( Addr a
, UWord d32
) {
7659 if (UNLIKELY(a
& 3)) {
7660 /* Handle misaligned case, slowly. */
7661 MC_(helperc_b_store2
)( a
+ 0, d32
);
7662 MC_(helperc_b_store2
)( a
+ 2, d32
);
7666 lineoff
= oc_line_offset(a
);
7667 if (OC_ENABLE_ASSERTIONS
) {
7668 tl_assert(lineoff
>= 0 && lineoff
< OC_W32S_PER_LINE
);
7671 line
= find_OCacheLine( a
);
7674 line
->descr
[lineoff
] = 0;
7676 line
->descr
[lineoff
] = 0xF;
7677 line
->w32
[lineoff
] = d32
;
7681 void VG_REGPARM(2) MC_(helperc_b_store8
)( Addr a
, UWord d32
) {
7685 if (UNLIKELY(a
& 7)) {
7686 /* Handle misaligned case, slowly. */
7687 MC_(helperc_b_store4
)( a
+ 0, d32
);
7688 MC_(helperc_b_store4
)( a
+ 4, d32
);
7692 lineoff
= oc_line_offset(a
);
7693 if (OC_ENABLE_ASSERTIONS
) {
7694 tl_assert(lineoff
== (lineoff
& 6)); /*0,2,4,6*//*since 8-aligned*/
7697 line
= find_OCacheLine( a
);
7700 line
->descr
[lineoff
+ 0] = 0;
7701 line
->descr
[lineoff
+ 1] = 0;
7703 line
->descr
[lineoff
+ 0] = 0xF;
7704 line
->descr
[lineoff
+ 1] = 0xF;
7705 line
->w32
[lineoff
+ 0] = d32
;
7706 line
->w32
[lineoff
+ 1] = d32
;
7710 void VG_REGPARM(2) MC_(helperc_b_store16
)( Addr a
, UWord d32
) {
7711 MC_(helperc_b_store8
)( a
+ 0, d32
);
7712 MC_(helperc_b_store8
)( a
+ 8, d32
);
7715 void VG_REGPARM(2) MC_(helperc_b_store32
)( Addr a
, UWord d32
) {
7716 MC_(helperc_b_store8
)( a
+ 0, d32
);
7717 MC_(helperc_b_store8
)( a
+ 8, d32
);
7718 MC_(helperc_b_store8
)( a
+ 16, d32
);
7719 MC_(helperc_b_store8
)( a
+ 24, d32
);
7723 /*--------------------------------------------*/
7724 /*--- Origin tracking: sarp handlers ---*/
7725 /*--------------------------------------------*/
7727 __attribute__((noinline
))
7728 static void ocache_sarp_Set_Origins ( Addr a
, UWord len
, UInt otag
) {
7729 if ((a
& 1) && len
>= 1) {
7730 MC_(helperc_b_store1
)( a
, otag
);
7734 if ((a
& 2) && len
>= 2) {
7735 MC_(helperc_b_store2
)( a
, otag
);
7740 tl_assert(0 == (a
& 3));
7742 MC_(helperc_b_store4
)( a
, otag
);
7747 MC_(helperc_b_store2
)( a
, otag
);
7752 MC_(helperc_b_store1
)( a
, otag
);
7756 tl_assert(len
== 0);
7759 __attribute__((noinline
))
7760 static void ocache_sarp_Clear_Origins ( Addr a
, UWord len
) {
7761 if ((a
& 1) && len
>= 1) {
7762 MC_(helperc_b_store1
)( a
, 0 );
7766 if ((a
& 2) && len
>= 2) {
7767 MC_(helperc_b_store2
)( a
, 0 );
7772 tl_assert(0 == (a
& 3));
7774 MC_(helperc_b_store4
)( a
, 0 );
7779 MC_(helperc_b_store2
)( a
, 0 );
7784 MC_(helperc_b_store1
)( a
, 0 );
7788 tl_assert(len
== 0);
7792 /*------------------------------------------------------------*/
7793 /*--- Setup and finalisation ---*/
7794 /*------------------------------------------------------------*/
7796 static void mc_post_clo_init ( void )
7798 /* If we've been asked to emit XML, mash around various other
7799 options so as to constrain the output somewhat. */
7801 /* Extract as much info as possible from the leak checker. */
7802 MC_(clo_leak_check
) = LC_Full
;
7805 if (MC_(clo_freelist_big_blocks
) >= MC_(clo_freelist_vol
)
7806 && VG_(clo_verbosity
) == 1 && !VG_(clo_xml
)) {
7807 VG_(message
)(Vg_UserMsg
,
7808 "Warning: --freelist-big-blocks value %lld has no effect\n"
7809 "as it is >= to --freelist-vol value %lld\n",
7810 MC_(clo_freelist_big_blocks
),
7811 MC_(clo_freelist_vol
));
7814 if (MC_(clo_workaround_gcc296_bugs
)
7815 && VG_(clo_verbosity
) == 1 && !VG_(clo_xml
)) {
7817 "Warning: --workaround-gcc296-bugs=yes is deprecated.\n"
7818 "Warning: Instead use: --ignore-range-below-sp=1024-1\n"
7823 tl_assert( MC_(clo_mc_level
) >= 1 && MC_(clo_mc_level
) <= 3 );
7825 if (MC_(clo_mc_level
) == 3) {
7826 /* We're doing origin tracking. */
7827 # ifdef PERF_FAST_STACK
7828 VG_(track_new_mem_stack_4_w_ECU
) ( mc_new_mem_stack_4_w_ECU
);
7829 VG_(track_new_mem_stack_8_w_ECU
) ( mc_new_mem_stack_8_w_ECU
);
7830 VG_(track_new_mem_stack_12_w_ECU
) ( mc_new_mem_stack_12_w_ECU
);
7831 VG_(track_new_mem_stack_16_w_ECU
) ( mc_new_mem_stack_16_w_ECU
);
7832 VG_(track_new_mem_stack_32_w_ECU
) ( mc_new_mem_stack_32_w_ECU
);
7833 VG_(track_new_mem_stack_112_w_ECU
) ( mc_new_mem_stack_112_w_ECU
);
7834 VG_(track_new_mem_stack_128_w_ECU
) ( mc_new_mem_stack_128_w_ECU
);
7835 VG_(track_new_mem_stack_144_w_ECU
) ( mc_new_mem_stack_144_w_ECU
);
7836 VG_(track_new_mem_stack_160_w_ECU
) ( mc_new_mem_stack_160_w_ECU
);
7838 VG_(track_new_mem_stack_w_ECU
) ( mc_new_mem_stack_w_ECU
);
7839 VG_(track_new_mem_stack_signal
) ( mc_new_mem_w_tid_make_ECU
);
7841 /* Not doing origin tracking */
7842 # ifdef PERF_FAST_STACK
7843 VG_(track_new_mem_stack_4
) ( mc_new_mem_stack_4
);
7844 VG_(track_new_mem_stack_8
) ( mc_new_mem_stack_8
);
7845 VG_(track_new_mem_stack_12
) ( mc_new_mem_stack_12
);
7846 VG_(track_new_mem_stack_16
) ( mc_new_mem_stack_16
);
7847 VG_(track_new_mem_stack_32
) ( mc_new_mem_stack_32
);
7848 VG_(track_new_mem_stack_112
) ( mc_new_mem_stack_112
);
7849 VG_(track_new_mem_stack_128
) ( mc_new_mem_stack_128
);
7850 VG_(track_new_mem_stack_144
) ( mc_new_mem_stack_144
);
7851 VG_(track_new_mem_stack_160
) ( mc_new_mem_stack_160
);
7853 VG_(track_new_mem_stack
) ( mc_new_mem_stack
);
7854 VG_(track_new_mem_stack_signal
) ( mc_new_mem_w_tid_no_ECU
);
7857 // We assume that brk()/sbrk() does not initialise new memory. Is this
7858 // accurate? John Reiser says:
7860 // 0) sbrk() can *decrease* process address space. No zero fill is done
7861 // for a decrease, not even the fragment on the high end of the last page
7862 // that is beyond the new highest address. For maximum safety and
7863 // portability, then the bytes in the last page that reside above [the
7864 // new] sbrk(0) should be considered to be uninitialized, but in practice
7865 // it is exceedingly likely that they will retain their previous
7868 // 1) If an increase is large enough to require new whole pages, then
7869 // those new whole pages (like all new pages) are zero-filled by the
7870 // operating system. So if sbrk(0) already is page aligned, then
7871 // sbrk(PAGE_SIZE) *does* zero-fill the new memory.
7873 // 2) Any increase that lies within an existing allocated page is not
7874 // changed. So if (x = sbrk(0)) is not page aligned, then
7875 // sbrk(PAGE_SIZE) yields ((PAGE_SIZE -1) & -x) bytes which keep their
7876 // existing contents, and an additional PAGE_SIZE bytes which are zeroed.
7877 // ((PAGE_SIZE -1) & x) of them are "covered" by the sbrk(), and the rest
7878 // of them come along for the ride because the operating system deals
7879 // only in whole pages. Again, for maximum safety and portability, then
7880 // anything that lives above [the new] sbrk(0) should be considered
7881 // uninitialized, but in practice will retain previous contents [zero in
7886 // A key property of sbrk/brk is that new whole pages that are supplied
7887 // by the operating system *do* get initialized to zero.
7889 // As for the portability of all this:
7891 // sbrk and brk are not POSIX. However, any system that is a derivative
7892 // of *nix has sbrk and brk because there are too many software (such as
7893 // the Bourne shell) which rely on the traditional memory map (.text,
7894 // .data+.bss, stack) and the existence of sbrk/brk.
7896 // So we should arguably observe all this. However:
7897 // - The current inaccuracy has caused maybe one complaint in seven years(?)
7898 // - Relying on the zeroed-ness of whole brk'd pages is pretty grotty... I
7899 // doubt most programmers know the above information.
7900 // So I'm not terribly unhappy with marking it as undefined. --njn.
7902 // [More: I think most of what John said only applies to sbrk(). It seems
7903 // that brk() always deals in whole pages. And since this event deals
7904 // directly with brk(), not with sbrk(), perhaps it would be reasonable to
7905 // just mark all memory it allocates as defined.]
7907 # if !defined(VGO_solaris)
7908 if (MC_(clo_mc_level
) == 3)
7909 VG_(track_new_mem_brk
) ( mc_new_mem_w_tid_make_ECU
);
7911 VG_(track_new_mem_brk
) ( mc_new_mem_w_tid_no_ECU
);
7913 // On Solaris, brk memory has to be marked as defined, otherwise we get
7914 // many false positives.
7915 VG_(track_new_mem_brk
) ( make_mem_defined_w_tid
);
7918 /* This origin tracking cache is huge (~100M), so only initialise
7920 if (MC_(clo_mc_level
) >= 3) {
7922 tl_assert(ocacheL1
!= NULL
);
7923 tl_assert(ocacheL2
!= NULL
);
7925 tl_assert(ocacheL1
== NULL
);
7926 tl_assert(ocacheL2
== NULL
);
7929 MC_(chunk_poolalloc
) = VG_(newPA
)
7930 (sizeof(MC_Chunk
) + MC_(n_where_pointers
)() * sizeof(ExeContext
*),
7933 "mc.cMC.1 (MC_Chunk pools)",
7936 /* Do not check definedness of guest state if --undef-value-errors=no */
7937 if (MC_(clo_mc_level
) >= 2)
7938 VG_(track_pre_reg_read
) ( mc_pre_reg_read
);
7940 if (VG_(clo_xtree_memory
) == Vg_XTMemory_Full
) {
7941 if (MC_(clo_keep_stacktraces
) == KS_none
7942 || MC_(clo_keep_stacktraces
) == KS_free
)
7943 VG_(fmsg_bad_option
)("--keep-stacktraces",
7944 "To use --xtree-memory=full, you must"
7945 " keep at least the alloc stacktrace\n");
7946 // Activate full xtree memory profiling.
7947 VG_(XTMemory_Full_init
)(VG_(XT_filter_1top_and_maybe_below_main
));
7952 static void print_SM_info(const HChar
* type
, Int n_SMs
)
7954 VG_(message
)(Vg_DebugMsg
,
7955 " memcheck: SMs: %s = %d (%luk, %luM)\n",
7958 n_SMs
* sizeof(SecMap
) / 1024UL,
7959 n_SMs
* sizeof(SecMap
) / (1024 * 1024UL) );
7962 static void mc_print_stats (void)
7964 SizeT max_secVBit_szB
, max_SMs_szB
, max_shmem_szB
;
7966 VG_(message
)(Vg_DebugMsg
, " memcheck: freelist: vol %lld length %lld\n",
7967 VG_(free_queue_volume
), VG_(free_queue_length
));
7968 VG_(message
)(Vg_DebugMsg
,
7969 " memcheck: sanity checks: %d cheap, %d expensive\n",
7970 n_sanity_cheap
, n_sanity_expensive
);
7971 VG_(message
)(Vg_DebugMsg
,
7972 " memcheck: auxmaps: %llu auxmap entries (%lluk, %lluM) in use\n",
7974 n_auxmap_L2_nodes
* 64,
7975 n_auxmap_L2_nodes
/ 16 );
7976 VG_(message
)(Vg_DebugMsg
,
7977 " memcheck: auxmaps_L1: %llu searches, %llu cmps, ratio %llu:10\n",
7978 n_auxmap_L1_searches
, n_auxmap_L1_cmps
,
7979 (10ULL * n_auxmap_L1_cmps
)
7980 / (n_auxmap_L1_searches
? n_auxmap_L1_searches
: 1)
7982 VG_(message
)(Vg_DebugMsg
,
7983 " memcheck: auxmaps_L2: %llu searches, %llu nodes\n",
7984 n_auxmap_L2_searches
, n_auxmap_L2_nodes
7987 print_SM_info("n_issued ", n_issued_SMs
);
7988 print_SM_info("n_deissued ", n_deissued_SMs
);
7989 print_SM_info("max_noaccess ", max_noaccess_SMs
);
7990 print_SM_info("max_undefined", max_undefined_SMs
);
7991 print_SM_info("max_defined ", max_defined_SMs
);
7992 print_SM_info("max_non_DSM ", max_non_DSM_SMs
);
7994 // Three DSMs, plus the non-DSM ones
7995 max_SMs_szB
= (3 + max_non_DSM_SMs
) * sizeof(SecMap
);
7996 // The 3*sizeof(Word) bytes is the AVL node metadata size.
7997 // The VG_ROUNDUP is because the OSet pool allocator will/must align
7998 // the elements on pointer size.
7999 // Note that the pool allocator has some additional small overhead
8000 // which is not counted in the below.
8001 // Hardwiring this logic sucks, but I don't see how else to do it.
8002 max_secVBit_szB
= max_secVBit_nodes
*
8003 (3*sizeof(Word
) + VG_ROUNDUP(sizeof(SecVBitNode
), sizeof(void*)));
8004 max_shmem_szB
= sizeof(primary_map
) + max_SMs_szB
+ max_secVBit_szB
;
8006 VG_(message
)(Vg_DebugMsg
,
8007 " memcheck: max sec V bit nodes: %d (%luk, %luM)\n",
8008 max_secVBit_nodes
, max_secVBit_szB
/ 1024,
8009 max_secVBit_szB
/ (1024 * 1024));
8010 VG_(message
)(Vg_DebugMsg
,
8011 " memcheck: set_sec_vbits8 calls: %llu (new: %llu, updates: %llu)\n",
8012 sec_vbits_new_nodes
+ sec_vbits_updates
,
8013 sec_vbits_new_nodes
, sec_vbits_updates
);
8014 VG_(message
)(Vg_DebugMsg
,
8015 " memcheck: max shadow mem size: %luk, %luM\n",
8016 max_shmem_szB
/ 1024, max_shmem_szB
/ (1024 * 1024));
8018 if (MC_(clo_mc_level
) >= 3) {
8019 VG_(message
)(Vg_DebugMsg
,
8020 " ocacheL1: %'12lu refs %'12lu misses (%'lu lossage)\n",
8021 stats_ocacheL1_find
,
8022 stats_ocacheL1_misses
,
8023 stats_ocacheL1_lossage
);
8024 VG_(message
)(Vg_DebugMsg
,
8025 " ocacheL1: %'12lu at 0 %'12lu at 1\n",
8026 stats_ocacheL1_find
- stats_ocacheL1_misses
8027 - stats_ocacheL1_found_at_1
8028 - stats_ocacheL1_found_at_N
,
8029 stats_ocacheL1_found_at_1
);
8030 VG_(message
)(Vg_DebugMsg
,
8031 " ocacheL1: %'12lu at 2+ %'12lu move-fwds\n",
8032 stats_ocacheL1_found_at_N
,
8033 stats_ocacheL1_movefwds
);
8034 VG_(message
)(Vg_DebugMsg
,
8035 " ocacheL1: %'12lu sizeB %'12d useful\n",
8036 (SizeT
)sizeof(OCache
),
8037 4 * OC_W32S_PER_LINE
* OC_LINES_PER_SET
* OC_N_SETS
);
8038 VG_(message
)(Vg_DebugMsg
,
8039 " ocacheL2: %'12lu refs %'12lu misses\n",
8040 stats__ocacheL2_refs
,
8041 stats__ocacheL2_misses
);
8042 VG_(message
)(Vg_DebugMsg
,
8043 " ocacheL2: %'9lu max nodes %'9lu curr nodes\n",
8044 stats__ocacheL2_n_nodes_max
,
8045 stats__ocacheL2_n_nodes
);
8046 VG_(message
)(Vg_DebugMsg
,
8047 " niacache: %'12lu refs %'12lu misses\n",
8048 stats__nia_cache_queries
, stats__nia_cache_misses
);
8050 tl_assert(ocacheL1
== NULL
);
8051 tl_assert(ocacheL2
== NULL
);
8056 static void mc_fini ( Int exitcode
)
8058 MC_(xtmemory_report
) (VG_(clo_xtree_memory_file
), True
);
8059 MC_(print_malloc_stats
)();
8061 if (MC_(clo_leak_check
) != LC_Off
) {
8062 LeakCheckParams lcp
;
8063 HChar
* xt_filename
= NULL
;
8064 lcp
.mode
= MC_(clo_leak_check
);
8065 lcp
.show_leak_kinds
= MC_(clo_show_leak_kinds
);
8066 lcp
.heuristics
= MC_(clo_leak_check_heuristics
);
8067 lcp
.errors_for_leak_kinds
= MC_(clo_error_for_leak_kinds
);
8068 lcp
.deltamode
= LCD_Any
;
8069 lcp
.max_loss_records_output
= 999999999;
8070 lcp
.requested_by_monitor_command
= False
;
8071 if (MC_(clo_xtree_leak
)) {
8072 xt_filename
= VG_(expand_file_name
)("--xtree-leak-file",
8073 MC_(clo_xtree_leak_file
));
8074 lcp
.xt_filename
= xt_filename
;
8078 lcp
.xt_filename
= NULL
;
8079 MC_(detect_memory_leaks
)(1/*bogus ThreadId*/, &lcp
);
8080 if (MC_(clo_xtree_leak
))
8081 VG_(free
)(xt_filename
);
8083 if (VG_(clo_verbosity
) == 1 && !VG_(clo_xml
)) {
8085 "For a detailed leak analysis, rerun with: --leak-check=full\n"
8091 if (VG_(clo_verbosity
) == 1 && !VG_(clo_xml
)) {
8092 VG_(message
)(Vg_UserMsg
,
8093 "For counts of detected and suppressed errors, rerun with: -v\n");
8096 if (MC_(any_value_errors
) && !VG_(clo_xml
) && VG_(clo_verbosity
) >= 1
8097 && MC_(clo_mc_level
) == 2) {
8098 VG_(message
)(Vg_UserMsg
,
8099 "Use --track-origins=yes to see where "
8100 "uninitialised values come from\n");
8103 /* Print a warning if any client-request generated ignore-ranges
8104 still exist. It would be reasonable to expect that a properly
8105 written program would remove any such ranges before exiting, and
8106 since they are a bit on the dangerous side, let's comment. By
8107 contrast ranges which are specified on the command line normally
8108 pertain to hardware mapped into the address space, and so we
8109 can't expect the client to have got rid of them. */
8110 if (gIgnoredAddressRanges
) {
8112 for (i
= 0; i
< VG_(sizeRangeMap
)(gIgnoredAddressRanges
); i
++) {
8113 UWord val
= IAR_INVALID
;
8114 UWord key_min
= ~(UWord
)0;
8115 UWord key_max
= (UWord
)0;
8116 VG_(indexRangeMap
)( &key_min
, &key_max
, &val
,
8117 gIgnoredAddressRanges
, i
);
8118 if (val
!= IAR_ClientReq
)
8120 /* Print the offending range. Also, if it is the first,
8121 print a banner before it. */
8125 "WARNING: exiting program has the following client-requested\n"
8126 "WARNING: address error disablement range(s) still in force,\n"
8128 "possibly as a result of some mistake in the use of the\n"
8130 "VALGRIND_{DISABLE,ENABLE}_ERROR_REPORTING_IN_RANGE macros.\n"
8133 VG_(umsg
)(" [%u] 0x%016lx-0x%016lx %s\n",
8134 i
, key_min
, key_max
, showIARKind(val
));
8144 VG_(message
)(Vg_DebugMsg
,
8145 "------ Valgrind's client block stats follow ---------------\n" );
8146 show_client_block_stats();
8150 /* mark the given addr/len unaddressable for watchpoint implementation
8151 The PointKind will be handled at access time */
8152 static Bool
mc_mark_unaddressable_for_watchpoint (PointKind kind
, Bool insert
,
8153 Addr addr
, SizeT len
)
8155 /* GDBTD this is somewhat fishy. We might rather have to save the previous
8156 accessibility and definedness in gdbserver so as to allow restoring it
8157 properly. Currently, we assume that the user only watches things
8158 which are properly addressable and defined */
8160 MC_(make_mem_noaccess
) (addr
, len
);
8162 MC_(make_mem_defined
) (addr
, len
);
8166 static void mc_pre_clo_init(void)
8168 VG_(details_name
) ("Memcheck");
8169 VG_(details_version
) (NULL
);
8170 VG_(details_description
) ("a memory error detector");
8171 VG_(details_copyright_author
)(
8172 "Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.");
8173 VG_(details_bug_reports_to
) (VG_BUGS_TO
);
8174 VG_(details_avg_translation_sizeB
) ( 640 );
8176 VG_(basic_tool_funcs
) (mc_post_clo_init
,
8180 VG_(needs_final_IR_tidy_pass
) ( MC_(final_tidy
) );
8183 VG_(needs_core_errors
) ();
8184 VG_(needs_tool_errors
) (MC_(eq_Error
),
8185 MC_(before_pp_Error
),
8187 True
,/*show TIDs for errors*/
8188 MC_(update_Error_extra
),
8189 MC_(is_recognised_suppression
),
8190 MC_(read_extra_suppression_info
),
8191 MC_(error_matches_suppression
),
8192 MC_(get_error_name
),
8193 MC_(get_extra_suppression_info
),
8194 MC_(print_extra_suppression_use
),
8195 MC_(update_extra_suppression_use
));
8196 VG_(needs_libc_freeres
) ();
8197 VG_(needs_cxx_freeres
) ();
8198 VG_(needs_command_line_options
)(mc_process_cmd_line_options
,
8200 mc_print_debug_usage
);
8201 VG_(needs_client_requests
) (mc_handle_client_request
);
8202 VG_(needs_sanity_checks
) (mc_cheap_sanity_check
,
8203 mc_expensive_sanity_check
);
8204 VG_(needs_print_stats
) (mc_print_stats
);
8205 VG_(needs_info_location
) (MC_(pp_describe_addr
));
8206 VG_(needs_malloc_replacement
) (MC_(malloc
),
8208 MC_(__builtin_vec_new
),
8212 MC_(__builtin_delete
),
8213 MC_(__builtin_vec_delete
),
8215 MC_(malloc_usable_size
),
8216 MC_MALLOC_DEFAULT_REDZONE_SZB
);
8217 MC_(Malloc_Redzone_SzB
) = VG_(malloc_effective_client_redzone_size
)();
8219 VG_(needs_xml_output
) ();
8221 VG_(track_new_mem_startup
) ( mc_new_mem_startup
);
8223 // Handling of mmap and mprotect isn't simple (well, it is simple,
8224 // but the justification isn't.) See comments above, just prior to
8226 VG_(track_new_mem_mmap
) ( mc_new_mem_mmap
);
8227 VG_(track_change_mem_mprotect
) ( mc_new_mem_mprotect
);
8229 VG_(track_copy_mem_remap
) ( MC_(copy_address_range_state
) );
8231 VG_(track_die_mem_stack_signal
)( MC_(make_mem_noaccess
) );
8232 VG_(track_die_mem_brk
) ( MC_(make_mem_noaccess
) );
8233 VG_(track_die_mem_munmap
) ( MC_(make_mem_noaccess
) );
8235 /* Defer the specification of the new_mem_stack functions to the
8236 post_clo_init function, since we need to first parse the command
8237 line before deciding which set to use. */
8239 # ifdef PERF_FAST_STACK
8240 VG_(track_die_mem_stack_4
) ( mc_die_mem_stack_4
);
8241 VG_(track_die_mem_stack_8
) ( mc_die_mem_stack_8
);
8242 VG_(track_die_mem_stack_12
) ( mc_die_mem_stack_12
);
8243 VG_(track_die_mem_stack_16
) ( mc_die_mem_stack_16
);
8244 VG_(track_die_mem_stack_32
) ( mc_die_mem_stack_32
);
8245 VG_(track_die_mem_stack_112
) ( mc_die_mem_stack_112
);
8246 VG_(track_die_mem_stack_128
) ( mc_die_mem_stack_128
);
8247 VG_(track_die_mem_stack_144
) ( mc_die_mem_stack_144
);
8248 VG_(track_die_mem_stack_160
) ( mc_die_mem_stack_160
);
8250 VG_(track_die_mem_stack
) ( mc_die_mem_stack
);
8252 VG_(track_ban_mem_stack
) ( MC_(make_mem_noaccess
) );
8254 VG_(track_pre_mem_read
) ( check_mem_is_defined
);
8255 VG_(track_pre_mem_read_asciiz
) ( check_mem_is_defined_asciiz
);
8256 VG_(track_pre_mem_write
) ( check_mem_is_addressable
);
8257 VG_(track_post_mem_write
) ( mc_post_mem_write
);
8259 VG_(track_post_reg_write
) ( mc_post_reg_write
);
8260 VG_(track_post_reg_write_clientcall_return
)( mc_post_reg_write_clientcall
);
8262 if (MC_(clo_mc_level
) >= 2) {
8263 VG_(track_copy_mem_to_reg
) ( mc_copy_mem_to_reg
);
8264 VG_(track_copy_reg_to_mem
) ( mc_copy_reg_to_mem
);
8267 VG_(needs_watchpoint
) ( mc_mark_unaddressable_for_watchpoint
);
8269 init_shadow_memory();
8270 // MC_(chunk_poolalloc) must be allocated in post_clo_init
8271 tl_assert(MC_(chunk_poolalloc
) == NULL
);
8272 MC_(malloc_list
) = VG_(HT_construct
)( "MC_(malloc_list)" );
8273 MC_(mempool_list
) = VG_(HT_construct
)( "MC_(mempool_list)" );
8276 tl_assert( mc_expensive_sanity_check() );
8278 // {LOADV,STOREV}[8421] will all fail horribly if this isn't true.
8279 tl_assert(sizeof(UWord
) == sizeof(Addr
));
8280 // Call me paranoid. I don't care.
8281 tl_assert(sizeof(void*) == sizeof(Addr
));
8283 // BYTES_PER_SEC_VBIT_NODE must be a power of two.
8284 tl_assert(-1 != VG_(log2
)(BYTES_PER_SEC_VBIT_NODE
));
8286 /* This is small. Always initialise it. */
8287 init_nia_to_ecu_cache();
8289 /* We can't initialise ocacheL1/ocacheL2 yet, since we don't know
8290 if we need to, since the command line args haven't been
8291 processed yet. Hence defer it to mc_post_clo_init. */
8292 tl_assert(ocacheL1
== NULL
);
8293 tl_assert(ocacheL2
== NULL
);
8295 /* Check some important stuff. See extensive comments above
8296 re UNALIGNED_OR_HIGH for background. */
8297 # if VG_WORDSIZE == 4
8298 tl_assert(sizeof(void*) == 4);
8299 tl_assert(sizeof(Addr
) == 4);
8300 tl_assert(sizeof(UWord
) == 4);
8301 tl_assert(sizeof(Word
) == 4);
8302 tl_assert(MAX_PRIMARY_ADDRESS
== 0xFFFFFFFFUL
);
8303 tl_assert(MASK(1) == 0UL);
8304 tl_assert(MASK(2) == 1UL);
8305 tl_assert(MASK(4) == 3UL);
8306 tl_assert(MASK(8) == 7UL);
8308 tl_assert(VG_WORDSIZE
== 8);
8309 tl_assert(sizeof(void*) == 8);
8310 tl_assert(sizeof(Addr
) == 8);
8311 tl_assert(sizeof(UWord
) == 8);
8312 tl_assert(sizeof(Word
) == 8);
8313 tl_assert(MAX_PRIMARY_ADDRESS
== 0x1FFFFFFFFFULL
);
8314 tl_assert(MASK(1) == 0xFFFFFFE000000000ULL
);
8315 tl_assert(MASK(2) == 0xFFFFFFE000000001ULL
);
8316 tl_assert(MASK(4) == 0xFFFFFFE000000003ULL
);
8317 tl_assert(MASK(8) == 0xFFFFFFE000000007ULL
);
8320 /* Check some assertions to do with the instrumentation machinery. */
8321 MC_(do_instrumentation_startup_checks
)();
8324 STATIC_ASSERT(sizeof(UWord
) == sizeof(SizeT
));
8326 VG_DETERMINE_INTERFACE_VERSION(mc_pre_clo_init
)
8328 /*--------------------------------------------------------------------*/
8329 /*--- end mc_main.c ---*/
8330 /*--------------------------------------------------------------------*/