1 /* -*- mode: C; c-basic-offset: 3; -*- */
3 /*--------------------------------------------------------------------*/
4 /*--- MemCheck: Maintain bitmaps of memory, tracking the ---*/
5 /*--- accessibility (A) and validity (V) status of each byte. ---*/
7 /*--------------------------------------------------------------------*/
10 This file is part of MemCheck, a heavyweight Valgrind tool for
11 detecting memory errors.
13 Copyright (C) 2000-2017 Julian Seward
16 This program is free software; you can redistribute it and/or
17 modify it under the terms of the GNU General Public License as
18 published by the Free Software Foundation; either version 2 of the
19 License, or (at your option) any later version.
21 This program is distributed in the hope that it will be useful, but
22 WITHOUT ANY WARRANTY; without even the implied warranty of
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 General Public License for more details.
26 You should have received a copy of the GNU General Public License
27 along with this program; if not, see <http://www.gnu.org/licenses/>.
29 The GNU General Public License is contained in the file COPYING.
32 #include "pub_tool_basics.h"
33 #include "pub_tool_aspacemgr.h"
34 #include "pub_tool_gdbserver.h"
35 #include "pub_tool_poolalloc.h"
36 #include "pub_tool_hashtable.h" // For mc_include.h
37 #include "pub_tool_libcbase.h"
38 #include "pub_tool_libcassert.h"
39 #include "pub_tool_libcprint.h"
40 #include "pub_tool_machine.h"
41 #include "pub_tool_mallocfree.h"
42 #include "pub_tool_options.h"
43 #include "pub_tool_oset.h"
44 #include "pub_tool_rangemap.h"
45 #include "pub_tool_replacemalloc.h"
46 #include "pub_tool_tooliface.h"
47 #include "pub_tool_threadstate.h"
48 #include "pub_tool_xarray.h"
49 #include "pub_tool_xtree.h"
50 #include "pub_tool_xtmemory.h"
52 #include "mc_include.h"
53 #include "memcheck.h" /* for client requests */
55 /* Set to 1 to do a little more sanity checking */
56 #define VG_DEBUG_MEMORY 0
58 #define DEBUG(fmt, args...) //VG_(printf)(fmt, ## args)
60 static void ocache_sarp_Set_Origins ( Addr
, UWord
, UInt
); /* fwds */
61 static void ocache_sarp_Clear_Origins ( Addr
, UWord
); /* fwds */
64 /*------------------------------------------------------------*/
65 /*--- Fast-case knobs ---*/
66 /*------------------------------------------------------------*/
68 // Comment these out to disable the fast cases (don't just set them to zero).
70 /* PERF_FAST_LOADV is in mc_include.h */
71 #define PERF_FAST_STOREV 1
73 #define PERF_FAST_SARP 1
75 #define PERF_FAST_STACK 1
76 #define PERF_FAST_STACK2 1
78 /* Change this to 1 to enable assertions on origin tracking cache fast
80 #define OC_ENABLE_ASSERTIONS 0
82 /* Change this to 1 for experimental, higher precision origin tracking
83 8- and 16-bit store handling. */
84 #define OC_PRECISION_STORE 1
87 /*------------------------------------------------------------*/
88 /*--- Comments on the origin tracking implementation ---*/
89 /*------------------------------------------------------------*/
91 /* See detailed comment entitled
92 AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
93 which is contained further on in this file. */
96 /*------------------------------------------------------------*/
97 /*--- V bits and A bits ---*/
98 /*------------------------------------------------------------*/
100 /* Conceptually, every byte value has 8 V bits, which track whether Memcheck
101 thinks the corresponding value bit is defined. And every memory byte
102 has an A bit, which tracks whether Memcheck thinks the program can access
103 it safely (ie. it's mapped, and has at least one of the RWX permission bits
104 set). So every N-bit register is shadowed with N V bits, and every memory
105 byte is shadowed with 8 V bits and one A bit.
107 In the implementation, we use two forms of compression (compressed V bits
108 and distinguished secondary maps) to avoid the 9-bit-per-byte overhead
111 Memcheck also tracks extra information about each heap block that is
112 allocated, for detecting memory leaks and other purposes.
115 /*------------------------------------------------------------*/
116 /*--- Basic A/V bitmap representation. ---*/
117 /*------------------------------------------------------------*/
119 /* All reads and writes are checked against a memory map (a.k.a. shadow
120 memory), which records the state of all memory in the process.
122 On 32-bit machines the memory map is organised as follows.
123 The top 16 bits of an address are used to index into a top-level
124 map table, containing 65536 entries. Each entry is a pointer to a
125 second-level map, which records the accesibililty and validity
126 permissions for the 65536 bytes indexed by the lower 16 bits of the
127 address. Each byte is represented by two bits (details are below). So
128 each second-level map contains 16384 bytes. This two-level arrangement
129 conveniently divides the 4G address space into 64k lumps, each size 64k
132 All entries in the primary (top-level) map must point to a valid
133 secondary (second-level) map. Since many of the 64kB chunks will
134 have the same status for every bit -- ie. noaccess (for unused
135 address space) or entirely addressable and defined (for code segments) --
136 there are three distinguished secondary maps, which indicate 'noaccess',
137 'undefined' and 'defined'. For these uniform 64kB chunks, the primary
138 map entry points to the relevant distinguished map. In practice,
139 typically more than half of the addressable memory is represented with
140 the 'undefined' or 'defined' distinguished secondary map, so it gives a
141 good saving. It also lets us set the V+A bits of large address regions
142 quickly in set_address_range_perms().
144 On 64-bit machines it's more complicated. If we followed the same basic
145 scheme we'd have a four-level table which would require too many memory
146 accesses. So instead the top-level map table has 2^20 entries (indexed
147 using bits 16..35 of the address); this covers the bottom 64GB. Any
148 accesses above 64GB are handled with a slow, sparse auxiliary table.
149 Valgrind's address space manager tries very hard to keep things below
150 this 64GB barrier so that performance doesn't suffer too much.
152 Note that this file has a lot of different functions for reading and
153 writing shadow memory. Only a couple are strictly necessary (eg.
154 get_vabits2 and set_vabits2), most are just specialised for specific
155 common cases to improve performance.
157 Aside: the V+A bits are less precise than they could be -- we have no way
158 of marking memory as read-only. It would be great if we could add an
159 extra state VA_BITSn_READONLY. But then we'd have 5 different states,
160 which requires 2.3 bits to hold, and there's no way to do that elegantly
161 -- we'd have to double up to 4 bits of metadata per byte, which doesn't
165 /* --------------- Basic configuration --------------- */
167 /* Only change this. N_PRIMARY_MAP *must* be a power of 2. */
171 /* cover the entire address space */
172 # define N_PRIMARY_BITS 16
176 /* Just handle the first 128G fast and the rest via auxiliary
177 primaries. If you change this, Memcheck will assert at startup.
178 See the definition of UNALIGNED_OR_HIGH for extensive comments. */
179 # define N_PRIMARY_BITS 21
184 /* Do not change this. */
185 #define N_PRIMARY_MAP ( ((UWord)1) << N_PRIMARY_BITS)
187 /* Do not change this. */
188 #define MAX_PRIMARY_ADDRESS (Addr)((((Addr)65536) * N_PRIMARY_MAP)-1)
191 /* --------------- Secondary maps --------------- */
193 // Each byte of memory conceptually has an A bit, which indicates its
194 // addressability, and 8 V bits, which indicates its definedness.
196 // But because very few bytes are partially defined, we can use a nice
197 // compression scheme to reduce the size of shadow memory. Each byte of
198 // memory has 2 bits which indicates its state (ie. V+A bits):
200 // 00: noaccess (unaddressable but treated as fully defined)
201 // 01: undefined (addressable and fully undefined)
202 // 10: defined (addressable and fully defined)
203 // 11: partdefined (addressable and partially defined)
205 // In the "partdefined" case, we use a secondary table to store the V bits.
206 // Each entry in the secondary-V-bits table maps a byte address to its 8 V
209 // We store the compressed V+A bits in 8-bit chunks, ie. the V+A bits for
210 // four bytes (32 bits) of memory are in each chunk. Hence the name
211 // "vabits8". This lets us get the V+A bits for four bytes at a time
212 // easily (without having to do any shifting and/or masking), and that is a
213 // very common operation. (Note that although each vabits8 chunk
214 // is 8 bits in size, it represents 32 bits of memory.)
216 // The representation is "inverse" little-endian... each 4 bytes of
217 // memory is represented by a 1 byte value, where:
219 // - the status of byte (a+0) is held in bits [1..0]
220 // - the status of byte (a+1) is held in bits [3..2]
221 // - the status of byte (a+2) is held in bits [5..4]
222 // - the status of byte (a+3) is held in bits [7..6]
224 // It's "inverse" because endianness normally describes a mapping from
225 // value bits to memory addresses; in this case the mapping is inverted.
226 // Ie. instead of particular value bits being held in certain addresses, in
227 // this case certain addresses are represented by particular value bits.
228 // See insert_vabits2_into_vabits8() for an example.
230 // But note that we don't compress the V bits stored in registers; they
231 // need to be explicit to made the shadow operations possible. Therefore
232 // when moving values between registers and memory we need to convert
233 // between the expanded in-register format and the compressed in-memory
234 // format. This isn't so difficult, it just requires careful attention in a
237 // These represent eight bits of memory.
238 #define VA_BITS2_NOACCESS 0x0 // 00b
239 #define VA_BITS2_UNDEFINED 0x1 // 01b
240 #define VA_BITS2_DEFINED 0x2 // 10b
241 #define VA_BITS2_PARTDEFINED 0x3 // 11b
243 // These represent 16 bits of memory.
244 #define VA_BITS4_NOACCESS 0x0 // 00_00b
245 #define VA_BITS4_UNDEFINED 0x5 // 01_01b
246 #define VA_BITS4_DEFINED 0xa // 10_10b
248 // These represent 32 bits of memory.
249 #define VA_BITS8_NOACCESS 0x00 // 00_00_00_00b
250 #define VA_BITS8_UNDEFINED 0x55 // 01_01_01_01b
251 #define VA_BITS8_DEFINED 0xaa // 10_10_10_10b
253 // These represent 64 bits of memory.
254 #define VA_BITS16_NOACCESS 0x0000 // 00_00_00_00b x 2
255 #define VA_BITS16_UNDEFINED 0x5555 // 01_01_01_01b x 2
256 #define VA_BITS16_DEFINED 0xaaaa // 10_10_10_10b x 2
258 // These represent 128 bits of memory.
259 #define VA_BITS32_UNDEFINED 0x55555555 // 01_01_01_01b x 4
262 #define SM_CHUNKS 16384 // Each SM covers 64k of memory.
263 #define SM_OFF(aaa) (((aaa) & 0xffff) >> 2)
264 #define SM_OFF_16(aaa) (((aaa) & 0xffff) >> 3)
266 // Paranoia: it's critical for performance that the requested inlining
267 // occurs. So try extra hard.
268 #define INLINE inline __attribute__((always_inline))
270 static INLINE Addr
start_of_this_sm ( Addr a
) {
271 return (a
& (~SM_MASK
));
273 static INLINE Bool
is_start_of_sm ( Addr a
) {
274 return (start_of_this_sm(a
) == a
);
277 STATIC_ASSERT(SM_CHUNKS
% 2 == 0);
281 UChar vabits8
[SM_CHUNKS
];
282 UShort vabits16
[SM_CHUNKS
/2];
286 // 3 distinguished secondary maps, one for no-access, one for
287 // accessible but undefined, and one for accessible and defined.
288 // Distinguished secondaries may never be modified.
289 #define SM_DIST_NOACCESS 0
290 #define SM_DIST_UNDEFINED 1
291 #define SM_DIST_DEFINED 2
293 static SecMap sm_distinguished
[3];
295 static INLINE Bool
is_distinguished_sm ( SecMap
* sm
) {
296 return sm
>= &sm_distinguished
[0] && sm
<= &sm_distinguished
[2];
299 // Forward declaration
300 static void update_SM_counts(SecMap
* oldSM
, SecMap
* newSM
);
302 /* dist_sm points to one of our three distinguished secondaries. Make
303 a copy of it so that we can write to it.
305 static SecMap
* copy_for_writing ( SecMap
* dist_sm
)
308 tl_assert(dist_sm
== &sm_distinguished
[0]
309 || dist_sm
== &sm_distinguished
[1]
310 || dist_sm
== &sm_distinguished
[2]);
312 SysRes sres
= VG_(am_shadow_alloc
)(sizeof(SecMap
));
313 if (sr_isError(sres
))
314 VG_(out_of_memory_NORETURN
)( "memcheck:allocate new SecMap",
315 sizeof(SecMap
), sr_Err(sres
) );
316 new_sm
= (void *)(Addr
)sr_Res(sres
);
317 VG_(memcpy
)(new_sm
, dist_sm
, sizeof(SecMap
));
318 update_SM_counts(dist_sm
, new_sm
);
322 /* --------------- Stats --------------- */
324 static Int n_issued_SMs
= 0;
325 static Int n_deissued_SMs
= 0;
326 static Int n_noaccess_SMs
= N_PRIMARY_MAP
; // start with many noaccess DSMs
327 static Int n_undefined_SMs
= 0;
328 static Int n_defined_SMs
= 0;
329 static Int n_non_DSM_SMs
= 0;
330 static Int max_noaccess_SMs
= 0;
331 static Int max_undefined_SMs
= 0;
332 static Int max_defined_SMs
= 0;
333 static Int max_non_DSM_SMs
= 0;
335 /* # searches initiated in auxmap_L1, and # base cmps required */
336 static ULong n_auxmap_L1_searches
= 0;
337 static ULong n_auxmap_L1_cmps
= 0;
338 /* # of searches that missed in auxmap_L1 and therefore had to
339 be handed to auxmap_L2. And the number of nodes inserted. */
340 static ULong n_auxmap_L2_searches
= 0;
341 static ULong n_auxmap_L2_nodes
= 0;
343 static Int n_sanity_cheap
= 0;
344 static Int n_sanity_expensive
= 0;
346 static Int n_secVBit_nodes
= 0;
347 static Int max_secVBit_nodes
= 0;
349 static void update_SM_counts(SecMap
* oldSM
, SecMap
* newSM
)
351 if (oldSM
== &sm_distinguished
[SM_DIST_NOACCESS
]) n_noaccess_SMs
--;
352 else if (oldSM
== &sm_distinguished
[SM_DIST_UNDEFINED
]) n_undefined_SMs
--;
353 else if (oldSM
== &sm_distinguished
[SM_DIST_DEFINED
]) n_defined_SMs
--;
354 else { n_non_DSM_SMs
--;
357 if (newSM
== &sm_distinguished
[SM_DIST_NOACCESS
]) n_noaccess_SMs
++;
358 else if (newSM
== &sm_distinguished
[SM_DIST_UNDEFINED
]) n_undefined_SMs
++;
359 else if (newSM
== &sm_distinguished
[SM_DIST_DEFINED
]) n_defined_SMs
++;
360 else { n_non_DSM_SMs
++;
363 if (n_noaccess_SMs
> max_noaccess_SMs
) max_noaccess_SMs
= n_noaccess_SMs
;
364 if (n_undefined_SMs
> max_undefined_SMs
) max_undefined_SMs
= n_undefined_SMs
;
365 if (n_defined_SMs
> max_defined_SMs
) max_defined_SMs
= n_defined_SMs
;
366 if (n_non_DSM_SMs
> max_non_DSM_SMs
) max_non_DSM_SMs
= n_non_DSM_SMs
;
369 /* --------------- Primary maps --------------- */
371 /* The main primary map. This covers some initial part of the address
372 space, addresses 0 .. (N_PRIMARY_MAP << 16)-1. The rest of it is
373 handled using the auxiliary primary map.
375 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
376 && (defined(VGP_arm_linux) \
377 || defined(VGP_x86_linux) || defined(VGP_x86_solaris) || defined(VGP_x86_freebsd))
378 /* mc_main_asm.c needs visibility on a few things declared in this file.
379 MC_MAIN_STATIC allows to define them static if ok, i.e. on
380 platforms that are not using hand-coded asm statements. */
381 #define MC_MAIN_STATIC
383 #define MC_MAIN_STATIC static
385 MC_MAIN_STATIC SecMap
* primary_map
[N_PRIMARY_MAP
];
388 /* An entry in the auxiliary primary map. base must be a 64k-aligned
389 value, and sm points at the relevant secondary map. As with the
390 main primary map, the secondary may be either a real secondary, or
391 one of the three distinguished secondaries. DO NOT CHANGE THIS
392 LAYOUT: the first word has to be the key for OSet fast lookups.
401 /* Tunable parameter: How big is the L1 queue? */
402 #define N_AUXMAP_L1 24
404 /* Tunable parameter: How far along the L1 queue to insert
405 entries resulting from L2 lookups? */
406 #define AUXMAP_L1_INSERT_IX 12
410 AuxMapEnt
* ent
; // pointer to the matching auxmap_L2 node
412 auxmap_L1
[N_AUXMAP_L1
];
414 static OSet
* auxmap_L2
= NULL
;
416 static void init_auxmap_L1_L2 ( void )
419 for (i
= 0; i
< N_AUXMAP_L1
; i
++) {
420 auxmap_L1
[i
].base
= 0;
421 auxmap_L1
[i
].ent
= NULL
;
424 tl_assert(0 == offsetof(AuxMapEnt
,base
));
425 tl_assert(sizeof(Addr
) == sizeof(void*));
426 auxmap_L2
= VG_(OSetGen_Create
)( /*keyOff*/ offsetof(AuxMapEnt
,base
),
428 VG_(malloc
), "mc.iaLL.1", VG_(free
) );
431 /* Check representation invariants; if OK return NULL; else a
432 descriptive bit of text. Also return the number of
433 non-distinguished secondary maps referred to from the auxiliary
436 static const HChar
* check_auxmap_L1_L2_sanity ( Word
* n_secmaps_found
)
439 /* On a 32-bit platform, the L2 and L1 tables should
440 both remain empty forever.
442 On a 64-bit platform:
444 all .base & 0xFFFF == 0
445 all .base > MAX_PRIMARY_ADDRESS
447 all .base & 0xFFFF == 0
448 all (.base > MAX_PRIMARY_ADDRESS
450 and .ent points to an AuxMapEnt with the same .base)
452 (.base == 0 and .ent == NULL)
454 *n_secmaps_found
= 0;
455 if (sizeof(void*) == 4) {
456 /* 32-bit platform */
457 if (VG_(OSetGen_Size
)(auxmap_L2
) != 0)
458 return "32-bit: auxmap_L2 is non-empty";
459 for (i
= 0; i
< N_AUXMAP_L1
; i
++)
460 if (auxmap_L1
[i
].base
!= 0 || auxmap_L1
[i
].ent
!= NULL
)
461 return "32-bit: auxmap_L1 is non-empty";
463 /* 64-bit platform */
464 UWord elems_seen
= 0;
465 AuxMapEnt
*elem
, *res
;
468 VG_(OSetGen_ResetIter
)(auxmap_L2
);
469 while ( (elem
= VG_(OSetGen_Next
)(auxmap_L2
)) ) {
471 if (0 != (elem
->base
& (Addr
)0xFFFF))
472 return "64-bit: nonzero .base & 0xFFFF in auxmap_L2";
473 if (elem
->base
<= MAX_PRIMARY_ADDRESS
)
474 return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L2";
475 if (elem
->sm
== NULL
)
476 return "64-bit: .sm in _L2 is NULL";
477 if (!is_distinguished_sm(elem
->sm
))
478 (*n_secmaps_found
)++;
480 if (elems_seen
!= n_auxmap_L2_nodes
)
481 return "64-bit: disagreement on number of elems in _L2";
482 /* Check L1-L2 correspondence */
483 for (i
= 0; i
< N_AUXMAP_L1
; i
++) {
484 if (auxmap_L1
[i
].base
== 0 && auxmap_L1
[i
].ent
== NULL
)
486 if (0 != (auxmap_L1
[i
].base
& (Addr
)0xFFFF))
487 return "64-bit: nonzero .base & 0xFFFF in auxmap_L1";
488 if (auxmap_L1
[i
].base
<= MAX_PRIMARY_ADDRESS
)
489 return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L1";
490 if (auxmap_L1
[i
].ent
== NULL
)
491 return "64-bit: .ent is NULL in auxmap_L1";
492 if (auxmap_L1
[i
].ent
->base
!= auxmap_L1
[i
].base
)
493 return "64-bit: _L1 and _L2 bases are inconsistent";
494 /* Look it up in auxmap_L2. */
495 key
.base
= auxmap_L1
[i
].base
;
497 res
= VG_(OSetGen_Lookup
)(auxmap_L2
, &key
);
499 return "64-bit: _L1 .base not found in _L2";
500 if (res
!= auxmap_L1
[i
].ent
)
501 return "64-bit: _L1 .ent disagrees with _L2 entry";
503 /* Check L1 contains no duplicates */
504 for (i
= 0; i
< N_AUXMAP_L1
; i
++) {
505 if (auxmap_L1
[i
].base
== 0)
507 for (j
= i
+1; j
< N_AUXMAP_L1
; j
++) {
508 if (auxmap_L1
[j
].base
== 0)
510 if (auxmap_L1
[j
].base
== auxmap_L1
[i
].base
)
511 return "64-bit: duplicate _L1 .base entries";
515 return NULL
; /* ok */
518 static void insert_into_auxmap_L1_at ( Word rank
, AuxMapEnt
* ent
)
522 tl_assert(rank
>= 0 && rank
< N_AUXMAP_L1
);
523 for (i
= N_AUXMAP_L1
-1; i
> rank
; i
--)
524 auxmap_L1
[i
] = auxmap_L1
[i
-1];
525 auxmap_L1
[rank
].base
= ent
->base
;
526 auxmap_L1
[rank
].ent
= ent
;
529 static INLINE AuxMapEnt
* maybe_find_in_auxmap ( Addr a
)
535 tl_assert(a
> MAX_PRIMARY_ADDRESS
);
538 /* First search the front-cache, which is a self-organising
539 list containing the most popular entries. */
541 if (LIKELY(auxmap_L1
[0].base
== a
))
542 return auxmap_L1
[0].ent
;
543 if (LIKELY(auxmap_L1
[1].base
== a
)) {
544 Addr t_base
= auxmap_L1
[0].base
;
545 AuxMapEnt
* t_ent
= auxmap_L1
[0].ent
;
546 auxmap_L1
[0].base
= auxmap_L1
[1].base
;
547 auxmap_L1
[0].ent
= auxmap_L1
[1].ent
;
548 auxmap_L1
[1].base
= t_base
;
549 auxmap_L1
[1].ent
= t_ent
;
550 return auxmap_L1
[0].ent
;
553 n_auxmap_L1_searches
++;
555 for (i
= 0; i
< N_AUXMAP_L1
; i
++) {
556 if (auxmap_L1
[i
].base
== a
) {
560 tl_assert(i
>= 0 && i
<= N_AUXMAP_L1
);
562 n_auxmap_L1_cmps
+= (ULong
)(i
+1);
564 if (i
< N_AUXMAP_L1
) {
566 Addr t_base
= auxmap_L1
[i
-1].base
;
567 AuxMapEnt
* t_ent
= auxmap_L1
[i
-1].ent
;
568 auxmap_L1
[i
-1].base
= auxmap_L1
[i
-0].base
;
569 auxmap_L1
[i
-1].ent
= auxmap_L1
[i
-0].ent
;
570 auxmap_L1
[i
-0].base
= t_base
;
571 auxmap_L1
[i
-0].ent
= t_ent
;
574 return auxmap_L1
[i
].ent
;
577 n_auxmap_L2_searches
++;
579 /* First see if we already have it. */
583 res
= VG_(OSetGen_Lookup
)(auxmap_L2
, &key
);
585 insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX
, res
);
589 static AuxMapEnt
* find_or_alloc_in_auxmap ( Addr a
)
591 AuxMapEnt
*nyu
, *res
;
593 /* First see if we already have it. */
594 res
= maybe_find_in_auxmap( a
);
598 /* Ok, there's no entry in the secondary map, so we'll have
602 nyu
= (AuxMapEnt
*) VG_(OSetGen_AllocNode
)( auxmap_L2
, sizeof(AuxMapEnt
) );
604 nyu
->sm
= &sm_distinguished
[SM_DIST_NOACCESS
];
605 VG_(OSetGen_Insert
)( auxmap_L2
, nyu
);
606 insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX
, nyu
);
611 /* --------------- SecMap fundamentals --------------- */
613 // In all these, 'low' means it's definitely in the main primary map,
614 // 'high' means it's definitely in the auxiliary table.
616 static INLINE UWord
get_primary_map_low_offset ( Addr a
)
618 UWord pm_off
= a
>> 16;
622 static INLINE SecMap
** get_secmap_low_ptr ( Addr a
)
624 UWord pm_off
= a
>> 16;
625 # if VG_DEBUG_MEMORY >= 1
626 tl_assert(pm_off
< N_PRIMARY_MAP
);
628 return &primary_map
[ pm_off
];
631 static INLINE SecMap
** get_secmap_high_ptr ( Addr a
)
633 AuxMapEnt
* am
= find_or_alloc_in_auxmap(a
);
637 static INLINE SecMap
** get_secmap_ptr ( Addr a
)
639 return ( a
<= MAX_PRIMARY_ADDRESS
640 ? get_secmap_low_ptr(a
)
641 : get_secmap_high_ptr(a
));
644 static INLINE SecMap
* get_secmap_for_reading_low ( Addr a
)
646 return *get_secmap_low_ptr(a
);
649 static INLINE SecMap
* get_secmap_for_reading_high ( Addr a
)
651 return *get_secmap_high_ptr(a
);
654 static INLINE SecMap
* get_secmap_for_writing_low(Addr a
)
656 SecMap
** p
= get_secmap_low_ptr(a
);
657 if (UNLIKELY(is_distinguished_sm(*p
)))
658 *p
= copy_for_writing(*p
);
662 static INLINE SecMap
* get_secmap_for_writing_high ( Addr a
)
664 SecMap
** p
= get_secmap_high_ptr(a
);
665 if (UNLIKELY(is_distinguished_sm(*p
)))
666 *p
= copy_for_writing(*p
);
670 /* Produce the secmap for 'a', either from the primary map or by
671 ensuring there is an entry for it in the aux primary map. The
672 secmap may be a distinguished one as the caller will only want to
675 static INLINE SecMap
* get_secmap_for_reading ( Addr a
)
677 return ( a
<= MAX_PRIMARY_ADDRESS
678 ? get_secmap_for_reading_low (a
)
679 : get_secmap_for_reading_high(a
) );
682 /* Produce the secmap for 'a', either from the primary map or by
683 ensuring there is an entry for it in the aux primary map. The
684 secmap may not be a distinguished one, since the caller will want
685 to be able to write it. If it is a distinguished secondary, make a
686 writable copy of it, install it, and return the copy instead. (COW
689 static INLINE SecMap
* get_secmap_for_writing ( Addr a
)
691 return ( a
<= MAX_PRIMARY_ADDRESS
692 ? get_secmap_for_writing_low (a
)
693 : get_secmap_for_writing_high(a
) );
696 /* If 'a' has a SecMap, produce it. Else produce NULL. But don't
697 allocate one if one doesn't already exist. This is used by the
700 static SecMap
* maybe_get_secmap_for ( Addr a
)
702 if (a
<= MAX_PRIMARY_ADDRESS
) {
703 return get_secmap_for_reading_low(a
);
705 AuxMapEnt
* am
= maybe_find_in_auxmap(a
);
706 return am
? am
->sm
: NULL
;
710 /* --------------- Fundamental functions --------------- */
713 void insert_vabits2_into_vabits8 ( Addr a
, UChar vabits2
, UChar
* vabits8
)
715 UInt shift
= (a
& 3) << 1; // shift by 0, 2, 4, or 6
716 *vabits8
&= ~(0x3 << shift
); // mask out the two old bits
717 *vabits8
|= (vabits2
<< shift
); // mask in the two new bits
721 void insert_vabits4_into_vabits8 ( Addr a
, UChar vabits4
, UChar
* vabits8
)
724 tl_assert(VG_IS_2_ALIGNED(a
)); // Must be 2-aligned
725 shift
= (a
& 2) << 1; // shift by 0 or 4
726 *vabits8
&= ~(0xf << shift
); // mask out the four old bits
727 *vabits8
|= (vabits4
<< shift
); // mask in the four new bits
731 UChar
extract_vabits2_from_vabits8 ( Addr a
, UChar vabits8
)
733 UInt shift
= (a
& 3) << 1; // shift by 0, 2, 4, or 6
734 vabits8
>>= shift
; // shift the two bits to the bottom
735 return 0x3 & vabits8
; // mask out the rest
739 UChar
extract_vabits4_from_vabits8 ( Addr a
, UChar vabits8
)
742 tl_assert(VG_IS_2_ALIGNED(a
)); // Must be 2-aligned
743 shift
= (a
& 2) << 1; // shift by 0 or 4
744 vabits8
>>= shift
; // shift the four bits to the bottom
745 return 0xf & vabits8
; // mask out the rest
748 // Note that these four are only used in slow cases. The fast cases do
749 // clever things like combine the auxmap check (in
750 // get_secmap_{read,writ}able) with alignment checks.
753 // Any time this function is called, if it is possible that vabits2
754 // is equal to VA_BITS2_PARTDEFINED, then the corresponding entry in the
755 // sec-V-bits table must also be set!
757 void set_vabits2 ( Addr a
, UChar vabits2
)
759 SecMap
* sm
= get_secmap_for_writing(a
);
760 UWord sm_off
= SM_OFF(a
);
761 insert_vabits2_into_vabits8( a
, vabits2
, &(sm
->vabits8
[sm_off
]) );
765 UChar
get_vabits2 ( Addr a
)
767 SecMap
* sm
= get_secmap_for_reading(a
);
768 UWord sm_off
= SM_OFF(a
);
769 UChar vabits8
= sm
->vabits8
[sm_off
];
770 return extract_vabits2_from_vabits8(a
, vabits8
);
774 // Any time this function is called, if it is possible that any of the
775 // 4 2-bit fields in vabits8 are equal to VA_BITS2_PARTDEFINED, then the
776 // corresponding entry(s) in the sec-V-bits table must also be set!
778 UChar
get_vabits8_for_aligned_word32 ( Addr a
)
780 SecMap
* sm
= get_secmap_for_reading(a
);
781 UWord sm_off
= SM_OFF(a
);
782 UChar vabits8
= sm
->vabits8
[sm_off
];
787 void set_vabits8_for_aligned_word32 ( Addr a
, UChar vabits8
)
789 SecMap
* sm
= get_secmap_for_writing(a
);
790 UWord sm_off
= SM_OFF(a
);
791 sm
->vabits8
[sm_off
] = vabits8
;
795 // Forward declarations
796 static UWord
get_sec_vbits8(Addr a
);
797 static void set_sec_vbits8(Addr a
, UWord vbits8
);
799 // Returns False if there was an addressability error.
801 Bool
set_vbits8 ( Addr a
, UChar vbits8
)
804 UChar vabits2
= get_vabits2(a
);
805 if ( VA_BITS2_NOACCESS
!= vabits2
) {
806 // Addressable. Convert in-register format to in-memory format.
807 // Also remove any existing sec V bit entry for the byte if no
809 if ( V_BITS8_DEFINED
== vbits8
) { vabits2
= VA_BITS2_DEFINED
; }
810 else if ( V_BITS8_UNDEFINED
== vbits8
) { vabits2
= VA_BITS2_UNDEFINED
; }
811 else { vabits2
= VA_BITS2_PARTDEFINED
;
812 set_sec_vbits8(a
, vbits8
); }
813 set_vabits2(a
, vabits2
);
816 // Unaddressable! Do nothing -- when writing to unaddressable
817 // memory it acts as a black hole, and the V bits can never be seen
818 // again. So we don't have to write them at all.
824 // Returns False if there was an addressability error. In that case, we put
825 // all defined bits into vbits8.
827 Bool
get_vbits8 ( Addr a
, UChar
* vbits8
)
830 UChar vabits2
= get_vabits2(a
);
832 // Convert the in-memory format to in-register format.
833 if ( VA_BITS2_DEFINED
== vabits2
) { *vbits8
= V_BITS8_DEFINED
; }
834 else if ( VA_BITS2_UNDEFINED
== vabits2
) { *vbits8
= V_BITS8_UNDEFINED
; }
835 else if ( VA_BITS2_NOACCESS
== vabits2
) {
836 *vbits8
= V_BITS8_DEFINED
; // Make V bits defined!
839 tl_assert( VA_BITS2_PARTDEFINED
== vabits2
);
840 *vbits8
= get_sec_vbits8(a
);
846 /* --------------- Secondary V bit table ------------ */
848 // This table holds the full V bit pattern for partially-defined bytes
849 // (PDBs) that are represented by VA_BITS2_PARTDEFINED in the main shadow
852 // Note: the nodes in this table can become stale. Eg. if you write a PDB,
853 // then overwrite the same address with a fully defined byte, the sec-V-bit
854 // node will not necessarily be removed. This is because checking for
855 // whether removal is necessary would slow down the fast paths.
857 // To avoid the stale nodes building up too much, we periodically (once the
858 // table reaches a certain size) garbage collect (GC) the table by
859 // traversing it and evicting any nodes not having PDB.
860 // If more than a certain proportion of nodes survived, we increase the
861 // table size so that GCs occur less often.
863 // This policy is designed to avoid bad table bloat in the worst case where
864 // a program creates huge numbers of stale PDBs -- we would get this bloat
865 // if we had no GC -- while handling well the case where a node becomes
866 // stale but shortly afterwards is rewritten with a PDB and so becomes
867 // non-stale again (which happens quite often, eg. in perf/bz2). If we just
868 // remove all stale nodes as soon as possible, we just end up re-adding a
869 // lot of them in later again. The "sufficiently stale" approach avoids
870 // this. (If a program has many live PDBs, performance will just suck,
871 // there's no way around that.)
873 // Further comments, JRS 14 Feb 2012. It turns out that the policy of
874 // holding on to stale entries for 2 GCs before discarding them can lead
875 // to massive space leaks. So we're changing to an arrangement where
876 // lines are evicted as soon as they are observed to be stale during a
877 // GC. This also has a side benefit of allowing the sufficiently_stale
878 // field to be removed from the SecVBitNode struct, reducing its size by
879 // 8 bytes, which is a substantial space saving considering that the
880 // struct was previously 32 or so bytes, on a 64 bit target.
882 // In order to try and mitigate the problem that the "sufficiently stale"
883 // heuristic was designed to avoid, the table size is allowed to drift
884 // up ("DRIFTUP") slowly to 80000, even if the residency is low. This
885 // means that nodes will exist in the table longer on average, and hopefully
886 // will be deleted and re-added less frequently.
888 // The previous scaling up mechanism (now called STEPUP) is retained:
889 // if residency exceeds 50%, the table is scaled up, although by a
890 // factor sqrt(2) rather than 2 as before. This effectively doubles the
891 // frequency of GCs when there are many PDBs at reduces the tendency of
892 // stale PDBs to reside for long periods in the table.
894 static OSet
* secVBitTable
;
897 static ULong sec_vbits_new_nodes
= 0;
898 static ULong sec_vbits_updates
= 0;
900 // This must be a power of two; this is checked in mc_pre_clo_init().
901 // The size chosen here is a trade-off: if the nodes are bigger (ie. cover
902 // a larger address range) they take more space but we can get multiple
903 // partially-defined bytes in one if they are close to each other, reducing
904 // the number of total nodes. In practice sometimes they are clustered (eg.
905 // perf/bz2 repeatedly writes then reads more than 20,000 in a contiguous
906 // row), but often not. So we choose something intermediate.
907 #define BYTES_PER_SEC_VBIT_NODE 16
909 // We make the table bigger by a factor of STEPUP_GROWTH_FACTOR if
910 // more than this many nodes survive a GC.
911 #define STEPUP_SURVIVOR_PROPORTION 0.5
912 #define STEPUP_GROWTH_FACTOR 1.414213562
914 // If the above heuristic doesn't apply, then we may make the table
915 // slightly bigger, by a factor of DRIFTUP_GROWTH_FACTOR, if more than
916 // this many nodes survive a GC, _and_ the total table size does
917 // not exceed a fixed limit. The numbers are somewhat arbitrary, but
918 // work tolerably well on long Firefox runs. The scaleup ratio of 1.5%
919 // effectively although gradually reduces residency and increases time
920 // between GCs for programs with small numbers of PDBs. The 80000 limit
921 // effectively limits the table size to around 2MB for programs with
922 // small numbers of PDBs, whilst giving a reasonably long lifetime to
923 // entries, to try and reduce the costs resulting from deleting and
924 // re-adding of entries.
925 #define DRIFTUP_SURVIVOR_PROPORTION 0.15
926 #define DRIFTUP_GROWTH_FACTOR 1.015
927 #define DRIFTUP_MAX_SIZE 80000
929 // We GC the table when it gets this many nodes in it, ie. it's effectively
930 // the table size. It can change.
931 static Int secVBitLimit
= 1000;
933 // The number of GCs done, used to age sec-V-bit nodes for eviction.
934 // Because it's unsigned, wrapping doesn't matter -- the right answer will
936 static UInt GCs_done
= 0;
941 UChar vbits8
[BYTES_PER_SEC_VBIT_NODE
];
945 static OSet
* createSecVBitTable(void)
947 OSet
* newSecVBitTable
;
948 newSecVBitTable
= VG_(OSetGen_Create_With_Pool
)
949 ( offsetof(SecVBitNode
, a
),
950 NULL
, // use fast comparisons
951 VG_(malloc
), "mc.cSVT.1 (sec VBit table)",
954 sizeof(SecVBitNode
));
955 return newSecVBitTable
;
958 static void gcSecVBitTable(void)
962 Int i
, n_nodes
= 0, n_survivors
= 0;
966 // Create the new table.
967 secVBitTable2
= createSecVBitTable();
969 // Traverse the table, moving fresh nodes into the new table.
970 VG_(OSetGen_ResetIter
)(secVBitTable
);
971 while ( (n
= VG_(OSetGen_Next
)(secVBitTable
)) ) {
972 // Keep node if any of its bytes are non-stale. Using
973 // get_vabits2() for the lookup is not very efficient, but I don't
975 for (i
= 0; i
< BYTES_PER_SEC_VBIT_NODE
; i
++) {
976 if (VA_BITS2_PARTDEFINED
== get_vabits2(n
->a
+ i
)) {
977 // Found a non-stale byte, so keep =>
978 // Insert a copy of the node into the new table.
980 VG_(OSetGen_AllocNode
)(secVBitTable2
, sizeof(SecVBitNode
));
982 VG_(OSetGen_Insert
)(secVBitTable2
, n2
);
988 // Get the before and after sizes.
989 n_nodes
= VG_(OSetGen_Size
)(secVBitTable
);
990 n_survivors
= VG_(OSetGen_Size
)(secVBitTable2
);
992 // Destroy the old table, and put the new one in its place.
993 VG_(OSetGen_Destroy
)(secVBitTable
);
994 secVBitTable
= secVBitTable2
;
996 if (VG_(clo_verbosity
) > 1 && n_nodes
!= 0) {
997 VG_(message
)(Vg_DebugMsg
, "memcheck GC: %d nodes, %d survivors (%.1f%%)\n",
998 n_nodes
, n_survivors
, n_survivors
* 100.0 / n_nodes
);
1001 // Increase table size if necessary.
1002 if ((Double
)n_survivors
1003 > ((Double
)secVBitLimit
* STEPUP_SURVIVOR_PROPORTION
)) {
1004 secVBitLimit
= (Int
)((Double
)secVBitLimit
* (Double
)STEPUP_GROWTH_FACTOR
);
1005 if (VG_(clo_verbosity
) > 1)
1006 VG_(message
)(Vg_DebugMsg
,
1007 "memcheck GC: %d new table size (stepup)\n",
1011 if (secVBitLimit
< DRIFTUP_MAX_SIZE
1012 && (Double
)n_survivors
1013 > ((Double
)secVBitLimit
* DRIFTUP_SURVIVOR_PROPORTION
)) {
1014 secVBitLimit
= (Int
)((Double
)secVBitLimit
* (Double
)DRIFTUP_GROWTH_FACTOR
);
1015 if (VG_(clo_verbosity
) > 1)
1016 VG_(message
)(Vg_DebugMsg
,
1017 "memcheck GC: %d new table size (driftup)\n",
1022 static UWord
get_sec_vbits8(Addr a
)
1024 Addr aAligned
= VG_ROUNDDN(a
, BYTES_PER_SEC_VBIT_NODE
);
1025 Int amod
= a
% BYTES_PER_SEC_VBIT_NODE
;
1026 SecVBitNode
* n
= VG_(OSetGen_Lookup
)(secVBitTable
, &aAligned
);
1028 tl_assert2(n
, "get_sec_vbits8: no node for address %p (%p)\n", aAligned
, a
);
1029 // Shouldn't be fully defined or fully undefined -- those cases shouldn't
1030 // make it to the secondary V bits table.
1031 vbits8
= n
->vbits8
[amod
];
1032 tl_assert(V_BITS8_DEFINED
!= vbits8
&& V_BITS8_UNDEFINED
!= vbits8
);
1036 static void set_sec_vbits8(Addr a
, UWord vbits8
)
1038 Addr aAligned
= VG_ROUNDDN(a
, BYTES_PER_SEC_VBIT_NODE
);
1039 Int i
, amod
= a
% BYTES_PER_SEC_VBIT_NODE
;
1040 SecVBitNode
* n
= VG_(OSetGen_Lookup
)(secVBitTable
, &aAligned
);
1041 // Shouldn't be fully defined or fully undefined -- those cases shouldn't
1042 // make it to the secondary V bits table.
1043 tl_assert(V_BITS8_DEFINED
!= vbits8
&& V_BITS8_UNDEFINED
!= vbits8
);
1045 n
->vbits8
[amod
] = vbits8
; // update
1046 sec_vbits_updates
++;
1048 // Do a table GC if necessary. Nb: do this before creating and
1049 // inserting the new node, to avoid erroneously GC'ing the new node.
1050 if (secVBitLimit
== VG_(OSetGen_Size
)(secVBitTable
)) {
1054 // New node: assign the specific byte, make the rest invalid (they
1055 // should never be read as-is, but be cautious).
1056 n
= VG_(OSetGen_AllocNode
)(secVBitTable
, sizeof(SecVBitNode
));
1058 for (i
= 0; i
< BYTES_PER_SEC_VBIT_NODE
; i
++) {
1059 n
->vbits8
[i
] = V_BITS8_UNDEFINED
;
1061 n
->vbits8
[amod
] = vbits8
;
1063 // Insert the new node.
1064 VG_(OSetGen_Insert
)(secVBitTable
, n
);
1065 sec_vbits_new_nodes
++;
1067 n_secVBit_nodes
= VG_(OSetGen_Size
)(secVBitTable
);
1068 if (n_secVBit_nodes
> max_secVBit_nodes
)
1069 max_secVBit_nodes
= n_secVBit_nodes
;
1073 /* --------------- Endianness helpers --------------- */
1075 /* Returns the offset in memory of the byteno-th most significant byte
1076 in a wordszB-sized word, given the specified endianness. */
1077 static INLINE UWord
byte_offset_w ( UWord wordszB
, Bool bigendian
,
1079 return bigendian
? (wordszB
-1-byteno
) : byteno
;
1083 /* --------------- Ignored address ranges --------------- */
1085 /* Denotes the address-error-reportability status for address ranges:
1086 IAR_NotIgnored: the usual case -- report errors in this range
1087 IAR_CommandLine: don't report errors -- from command line setting
1088 IAR_ClientReq: don't report errors -- from client request
1091 enum { IAR_INVALID
=99,
1097 static const HChar
* showIARKind ( IARKind iark
)
1100 case IAR_INVALID
: return "INVALID";
1101 case IAR_NotIgnored
: return "NotIgnored";
1102 case IAR_CommandLine
: return "CommandLine";
1103 case IAR_ClientReq
: return "ClientReq";
1104 default: return "???";
1108 // RangeMap<IARKind>
1109 static RangeMap
* gIgnoredAddressRanges
= NULL
;
1111 static void init_gIgnoredAddressRanges ( void )
1113 if (LIKELY(gIgnoredAddressRanges
!= NULL
))
1115 gIgnoredAddressRanges
= VG_(newRangeMap
)( VG_(malloc
), "mc.igIAR.1",
1116 VG_(free
), IAR_NotIgnored
);
1119 Bool
MC_(in_ignored_range
) ( Addr a
)
1121 if (LIKELY(gIgnoredAddressRanges
== NULL
))
1123 UWord how
= IAR_INVALID
;
1124 UWord key_min
= ~(UWord
)0;
1125 UWord key_max
= (UWord
)0;
1126 VG_(lookupRangeMap
)(&key_min
, &key_max
, &how
, gIgnoredAddressRanges
, a
);
1127 tl_assert(key_min
<= a
&& a
<= key_max
);
1129 case IAR_NotIgnored
: return False
;
1130 case IAR_CommandLine
: return True
;
1131 case IAR_ClientReq
: return True
;
1132 default: break; /* invalid */
1134 VG_(tool_panic
)("MC_(in_ignore_range)");
1138 Bool
MC_(in_ignored_range_below_sp
) ( Addr sp
, Addr a
, UInt szB
)
1140 if (LIKELY(!MC_(clo_ignore_range_below_sp
)))
1142 tl_assert(szB
>= 1 && szB
<= 32);
1143 tl_assert(MC_(clo_ignore_range_below_sp__first_offset
)
1144 > MC_(clo_ignore_range_below_sp__last_offset
));
1145 Addr range_lo
= sp
- MC_(clo_ignore_range_below_sp__first_offset
);
1146 Addr range_hi
= sp
- MC_(clo_ignore_range_below_sp__last_offset
);
1147 if (range_lo
>= range_hi
) {
1148 /* Bizarre. We have a wraparound situation. What should we do? */
1149 return False
; // Play safe
1151 /* This is the expected case. */
1152 if (range_lo
<= a
&& a
+ szB
- 1 <= range_hi
)
1161 /* Parse two Addrs (in hex) separated by a dash, or fail. */
1163 static Bool
parse_Addr_pair ( const HChar
** ppc
, Addr
* result1
, Addr
* result2
)
1165 Bool ok
= VG_(parse_Addr
) (ppc
, result1
);
1171 ok
= VG_(parse_Addr
) (ppc
, result2
);
1177 /* Parse two UInts (32 bit unsigned, in decimal) separated by a dash,
1180 static Bool
parse_UInt_pair ( const HChar
** ppc
, UInt
* result1
, UInt
* result2
)
1182 Bool ok
= VG_(parse_UInt
) (ppc
, result1
);
1188 ok
= VG_(parse_UInt
) (ppc
, result2
);
1194 /* Parse a set of ranges separated by commas into 'ignoreRanges', or
1195 fail. If they are valid, add them to the global set of ignored
1197 static Bool
parse_ignore_ranges ( const HChar
* str0
)
1199 init_gIgnoredAddressRanges();
1200 const HChar
* str
= str0
;
1201 const HChar
** ppc
= &str
;
1203 Addr start
= ~(Addr
)0;
1205 Bool ok
= parse_Addr_pair(ppc
, &start
, &end
);
1210 VG_(bindRangeMap
)( gIgnoredAddressRanges
, start
, end
, IAR_CommandLine
);
1221 /* Add or remove [start, +len) from the set of ignored ranges. */
1222 static Bool
modify_ignore_ranges ( Bool addRange
, Addr start
, Addr len
)
1224 init_gIgnoredAddressRanges();
1225 const Bool verbose
= (VG_(clo_verbosity
) > 1);
1230 VG_(bindRangeMap
)(gIgnoredAddressRanges
,
1231 start
, start
+len
-1, IAR_ClientReq
);
1233 VG_(dmsg
)("memcheck: modify_ignore_ranges: add %p %p\n",
1234 (void*)start
, (void*)(start
+len
-1));
1236 VG_(bindRangeMap
)(gIgnoredAddressRanges
,
1237 start
, start
+len
-1, IAR_NotIgnored
);
1239 VG_(dmsg
)("memcheck: modify_ignore_ranges: del %p %p\n",
1240 (void*)start
, (void*)(start
+len
-1));
1243 VG_(dmsg
)("memcheck: now have %u ranges:\n",
1244 VG_(sizeRangeMap
)(gIgnoredAddressRanges
));
1246 for (i
= 0; i
< VG_(sizeRangeMap
)(gIgnoredAddressRanges
); i
++) {
1247 UWord val
= IAR_INVALID
;
1248 UWord key_min
= ~(UWord
)0;
1249 UWord key_max
= (UWord
)0;
1250 VG_(indexRangeMap
)( &key_min
, &key_max
, &val
,
1251 gIgnoredAddressRanges
, i
);
1252 VG_(dmsg
)("memcheck: [%u] %016lx-%016lx %s\n",
1253 i
, key_min
, key_max
, showIARKind(val
));
1260 /* --------------- Load/store slow cases. --------------- */
1263 __attribute__((noinline
))
1264 void mc_LOADV_128_or_256_slow ( /*OUT*/ULong
* res
,
1265 Addr a
, SizeT nBits
, Bool bigendian
)
1267 ULong pessim
[4]; /* only used when p-l-ok=yes */
1268 SSizeT szB
= nBits
/ 8;
1269 SSizeT szL
= szB
/ 8; /* Size in Longs (64-bit units) */
1270 SSizeT i
, j
; /* Must be signed. */
1271 SizeT n_addrs_bad
= 0;
1276 /* Code below assumes load size is a power of two and at least 64
1278 tl_assert((szB
& (szB
-1)) == 0 && szL
> 0);
1280 /* If this triggers, you probably just need to increase the size of
1281 the pessim array. */
1282 tl_assert(szL
<= sizeof(pessim
) / sizeof(pessim
[0]));
1284 for (j
= 0; j
< szL
; j
++) {
1285 pessim
[j
] = V_BITS64_DEFINED
;
1286 res
[j
] = V_BITS64_UNDEFINED
;
1289 /* Make up a result V word, which contains the loaded data for
1290 valid addresses and Defined for invalid addresses. Iterate over
1291 the bytes in the word, from the most significant down to the
1292 least. The vbits to return are calculated into vbits128. Also
1293 compute the pessimising value to be used when
1294 --partial-loads-ok=yes. n_addrs_bad is redundant (the relevant
1295 info can be gleaned from the pessim array) but is used as a
1297 for (j
= szL
-1; j
>= 0; j
--) {
1298 ULong vbits64
= V_BITS64_UNDEFINED
;
1299 ULong pessim64
= V_BITS64_DEFINED
;
1300 UWord long_index
= byte_offset_w(szL
, bigendian
, j
);
1301 for (i
= 8-1; i
>= 0; i
--) {
1302 PROF_EVENT(MCPE_LOADV_128_OR_256_SLOW_LOOP
);
1303 ai
= a
+ 8*long_index
+ byte_offset_w(8, bigendian
, i
);
1304 ok
= get_vbits8(ai
, &vbits8
);
1307 if (!ok
) n_addrs_bad
++;
1309 pessim64
|= (ok
? V_BITS8_DEFINED
: V_BITS8_UNDEFINED
);
1311 res
[long_index
] = vbits64
;
1312 pessim
[long_index
] = pessim64
;
1315 /* In the common case, all the addresses involved are valid, so we
1316 just return the computed V bits and have done. */
1317 if (LIKELY(n_addrs_bad
== 0))
1320 /* If there's no possibility of getting a partial-loads-ok
1321 exemption, report the error and quit. */
1322 if (!MC_(clo_partial_loads_ok
)) {
1323 MC_(record_address_error
)( VG_(get_running_tid
)(), a
, szB
, False
);
1327 /* The partial-loads-ok excemption might apply. Find out if it
1328 does. If so, don't report an addressing error, but do return
1329 Undefined for the bytes that are out of range, so as to avoid
1330 false negatives. If it doesn't apply, just report an addressing
1331 error in the usual way. */
1333 /* Some code steps along byte strings in aligned chunks
1334 even when there is only a partially defined word at the end (eg,
1335 optimised strlen). This is allowed by the memory model of
1336 modern machines, since an aligned load cannot span two pages and
1337 thus cannot "partially fault".
1339 Therefore, a load from a partially-addressible place is allowed
1340 if all of the following hold:
1341 - the command-line flag is set [by default, it isn't]
1342 - it's an aligned load
1343 - at least one of the addresses in the word *is* valid
1345 Since this suppresses the addressing error, we avoid false
1346 negatives by marking bytes undefined when they come from an
1350 /* "at least one of the addresses is invalid" */
1352 for (j
= 0; j
< szL
; j
++)
1353 ok
|= pessim
[j
] != V_BITS64_DEFINED
;
1356 # if defined(VGP_s390x_linux)
1357 tl_assert(szB
== 16); // s390 doesn't have > 128 bit SIMD
1358 /* OK if all loaded bytes are from the same page. */
1359 Bool alignedOK
= ((a
& 0xfff) <= 0x1000 - szB
);
1360 # elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
1361 /* lxvd2x might generate an unaligned 128 bit vector load. */
1362 Bool alignedOK
= (szB
== 16);
1364 /* OK if the address is aligned by the load size. */
1365 Bool alignedOK
= (0 == (a
& (szB
- 1)));
1368 if (alignedOK
&& n_addrs_bad
< szB
) {
1369 /* Exemption applies. Use the previously computed pessimising
1370 value and return the combined result, but don't flag an
1371 addressing error. The pessimising value is Defined for valid
1372 addresses and Undefined for invalid addresses. */
1373 /* for assumption that doing bitwise or implements UifU */
1374 tl_assert(V_BIT_UNDEFINED
== 1 && V_BIT_DEFINED
== 0);
1375 /* (really need "UifU" here...)
1376 vbits[j] UifU= pessim[j] (is pessimised by it, iow) */
1377 for (j
= szL
-1; j
>= 0; j
--)
1378 res
[j
] |= pessim
[j
];
1382 /* Exemption doesn't apply. Flag an addressing error in the normal
1384 MC_(record_address_error
)( VG_(get_running_tid
)(), a
, szB
, False
);
1388 __attribute__((noinline
))
1389 __attribute__((used
))
1391 ULong
mc_LOADVn_slow ( Addr a
, SizeT nBits
, Bool bigendian
);
1394 __attribute__((noinline
))
1395 __attribute__((used
))
1396 VG_REGPARM(3) /* make sure we're using a fixed calling convention, since
1397 this function may get called from hand written assembly. */
1398 ULong
mc_LOADVn_slow ( Addr a
, SizeT nBits
, Bool bigendian
)
1400 PROF_EVENT(MCPE_LOADVN_SLOW
);
1402 /* ------------ BEGIN semi-fast cases ------------ */
1403 /* These deal quickly-ish with the common auxiliary primary map
1404 cases on 64-bit platforms. Are merely a speedup hack; can be
1405 omitted without loss of correctness/functionality. Note that in
1406 both cases the "sizeof(void*) == 8" causes these cases to be
1407 folded out by compilers on 32-bit platforms. These are derived
1408 from LOADV64 and LOADV32.
1411 # if defined(VGA_mips64) && defined(VGABI_N32)
1412 if (LIKELY(sizeof(void*) == 4 && nBits
== 64 && VG_IS_8_ALIGNED(a
)))
1414 if (LIKELY(sizeof(void*) == 8 && nBits
== 64 && VG_IS_8_ALIGNED(a
)))
1417 SecMap
* sm
= get_secmap_for_reading(a
);
1418 UWord sm_off16
= SM_OFF_16(a
);
1419 UWord vabits16
= sm
->vabits16
[sm_off16
];
1420 if (LIKELY(vabits16
== VA_BITS16_DEFINED
))
1421 return V_BITS64_DEFINED
;
1422 if (LIKELY(vabits16
== VA_BITS16_UNDEFINED
))
1423 return V_BITS64_UNDEFINED
;
1424 /* else fall into the slow case */
1427 # if defined(VGA_mips64) && defined(VGABI_N32)
1428 if (LIKELY(sizeof(void*) == 4 && nBits
== 32 && VG_IS_4_ALIGNED(a
)))
1430 if (LIKELY(sizeof(void*) == 8 && nBits
== 32 && VG_IS_4_ALIGNED(a
)))
1433 SecMap
* sm
= get_secmap_for_reading(a
);
1434 UWord sm_off
= SM_OFF(a
);
1435 UWord vabits8
= sm
->vabits8
[sm_off
];
1436 if (LIKELY(vabits8
== VA_BITS8_DEFINED
))
1437 return ((UWord
)0xFFFFFFFF00000000ULL
| (UWord
)V_BITS32_DEFINED
);
1438 if (LIKELY(vabits8
== VA_BITS8_UNDEFINED
))
1439 return ((UWord
)0xFFFFFFFF00000000ULL
| (UWord
)V_BITS32_UNDEFINED
);
1440 /* else fall into slow case */
1443 /* ------------ END semi-fast cases ------------ */
1445 ULong vbits64
= V_BITS64_UNDEFINED
; /* result */
1446 ULong pessim64
= V_BITS64_DEFINED
; /* only used when p-l-ok=yes */
1447 SSizeT szB
= nBits
/ 8;
1448 SSizeT i
; /* Must be signed. */
1449 SizeT n_addrs_bad
= 0;
1454 tl_assert(nBits
== 64 || nBits
== 32 || nBits
== 16 || nBits
== 8);
1456 /* Make up a 64-bit result V word, which contains the loaded data
1457 for valid addresses and Defined for invalid addresses. Iterate
1458 over the bytes in the word, from the most significant down to
1459 the least. The vbits to return are calculated into vbits64.
1460 Also compute the pessimising value to be used when
1461 --partial-loads-ok=yes. n_addrs_bad is redundant (the relevant
1462 info can be gleaned from pessim64) but is used as a
1464 for (i
= szB
-1; i
>= 0; i
--) {
1465 PROF_EVENT(MCPE_LOADVN_SLOW_LOOP
);
1466 ai
= a
+ byte_offset_w(szB
, bigendian
, i
);
1467 ok
= get_vbits8(ai
, &vbits8
);
1470 if (!ok
) n_addrs_bad
++;
1472 pessim64
|= (ok
? V_BITS8_DEFINED
: V_BITS8_UNDEFINED
);
1475 /* In the common case, all the addresses involved are valid, so we
1476 just return the computed V bits and have done. */
1477 if (LIKELY(n_addrs_bad
== 0))
1480 /* If there's no possibility of getting a partial-loads-ok
1481 exemption, report the error and quit. */
1482 if (!MC_(clo_partial_loads_ok
)) {
1483 MC_(record_address_error
)( VG_(get_running_tid
)(), a
, szB
, False
);
1487 /* The partial-loads-ok excemption might apply. Find out if it
1488 does. If so, don't report an addressing error, but do return
1489 Undefined for the bytes that are out of range, so as to avoid
1490 false negatives. If it doesn't apply, just report an addressing
1491 error in the usual way. */
1493 /* Some code steps along byte strings in aligned word-sized chunks
1494 even when there is only a partially defined word at the end (eg,
1495 optimised strlen). This is allowed by the memory model of
1496 modern machines, since an aligned load cannot span two pages and
1497 thus cannot "partially fault". Despite such behaviour being
1498 declared undefined by ANSI C/C++.
1500 Therefore, a load from a partially-addressible place is allowed
1501 if all of the following hold:
1502 - the command-line flag is set [by default, it isn't]
1503 - it's a word-sized, word-aligned load
1504 - at least one of the addresses in the word *is* valid
1506 Since this suppresses the addressing error, we avoid false
1507 negatives by marking bytes undefined when they come from an
1511 /* "at least one of the addresses is invalid" */
1512 tl_assert(pessim64
!= V_BITS64_DEFINED
);
1514 # if defined(VGA_mips64) && defined(VGABI_N32)
1515 if (szB
== VG_WORDSIZE
* 2 && VG_IS_WORD_ALIGNED(a
)
1516 && n_addrs_bad
< VG_WORDSIZE
* 2)
1517 # elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
1518 /* On power unaligned loads of words are OK. */
1519 if (szB
== VG_WORDSIZE
&& n_addrs_bad
< VG_WORDSIZE
)
1521 if (szB
== VG_WORDSIZE
&& VG_IS_WORD_ALIGNED(a
)
1522 && n_addrs_bad
< VG_WORDSIZE
)
1525 /* Exemption applies. Use the previously computed pessimising
1526 value for vbits64 and return the combined result, but don't
1527 flag an addressing error. The pessimising value is Defined
1528 for valid addresses and Undefined for invalid addresses. */
1529 /* for assumption that doing bitwise or implements UifU */
1530 tl_assert(V_BIT_UNDEFINED
== 1 && V_BIT_DEFINED
== 0);
1531 /* (really need "UifU" here...)
1532 vbits64 UifU= pessim64 (is pessimised by it, iow) */
1533 vbits64
|= pessim64
;
1537 /* Also, in appears that gcc generates string-stepping code in
1538 32-bit chunks on 64 bit platforms. So, also grant an exception
1539 for this case. Note that the first clause of the conditional
1540 (VG_WORDSIZE == 8) is known at compile time, so the whole clause
1541 will get folded out in 32 bit builds. */
1542 # if defined(VGA_mips64) && defined(VGABI_N32)
1543 if (VG_WORDSIZE
== 4
1544 && VG_IS_4_ALIGNED(a
) && nBits
== 32 && n_addrs_bad
< 4)
1546 if (VG_WORDSIZE
== 8
1547 && VG_IS_4_ALIGNED(a
) && nBits
== 32 && n_addrs_bad
< 4)
1550 tl_assert(V_BIT_UNDEFINED
== 1 && V_BIT_DEFINED
== 0);
1551 /* (really need "UifU" here...)
1552 vbits64 UifU= pessim64 (is pessimised by it, iow) */
1553 vbits64
|= pessim64
;
1554 /* Mark the upper 32 bits as undefined, just to be on the safe
1556 vbits64
|= (((ULong
)V_BITS32_UNDEFINED
) << 32);
1560 /* Exemption doesn't apply. Flag an addressing error in the normal
1562 MC_(record_address_error
)( VG_(get_running_tid
)(), a
, szB
, False
);
1569 __attribute__((noinline
))
1570 void mc_STOREVn_slow ( Addr a
, SizeT nBits
, ULong vbytes
, Bool bigendian
)
1572 SizeT szB
= nBits
/ 8;
1573 SizeT i
, n_addrs_bad
= 0;
1578 PROF_EVENT(MCPE_STOREVN_SLOW
);
1580 /* ------------ BEGIN semi-fast cases ------------ */
1581 /* These deal quickly-ish with the common auxiliary primary map
1582 cases on 64-bit platforms. Are merely a speedup hack; can be
1583 omitted without loss of correctness/functionality. Note that in
1584 both cases the "sizeof(void*) == 8" causes these cases to be
1585 folded out by compilers on 32-bit platforms. The logic below
1586 is somewhat similar to some cases extensively commented in
1587 MC_(helperc_STOREV8).
1589 # if defined(VGA_mips64) && defined(VGABI_N32)
1590 if (LIKELY(sizeof(void*) == 4 && nBits
== 64 && VG_IS_8_ALIGNED(a
)))
1592 if (LIKELY(sizeof(void*) == 8 && nBits
== 64 && VG_IS_8_ALIGNED(a
)))
1595 SecMap
* sm
= get_secmap_for_reading(a
);
1596 UWord sm_off16
= SM_OFF_16(a
);
1597 UWord vabits16
= sm
->vabits16
[sm_off16
];
1598 if (LIKELY( !is_distinguished_sm(sm
) &&
1599 (VA_BITS16_DEFINED
== vabits16
||
1600 VA_BITS16_UNDEFINED
== vabits16
) )) {
1601 /* Handle common case quickly: a is suitably aligned, */
1602 /* is mapped, and is addressible. */
1603 // Convert full V-bits in register to compact 2-bit form.
1604 if (LIKELY(V_BITS64_DEFINED
== vbytes
)) {
1605 sm
->vabits16
[sm_off16
] = VA_BITS16_DEFINED
;
1607 } else if (V_BITS64_UNDEFINED
== vbytes
) {
1608 sm
->vabits16
[sm_off16
] = VA_BITS16_UNDEFINED
;
1611 /* else fall into the slow case */
1613 /* else fall into the slow case */
1616 # if defined(VGA_mips64) && defined(VGABI_N32)
1617 if (LIKELY(sizeof(void*) == 4 && nBits
== 32 && VG_IS_4_ALIGNED(a
)))
1619 if (LIKELY(sizeof(void*) == 8 && nBits
== 32 && VG_IS_4_ALIGNED(a
)))
1622 SecMap
* sm
= get_secmap_for_reading(a
);
1623 UWord sm_off
= SM_OFF(a
);
1624 UWord vabits8
= sm
->vabits8
[sm_off
];
1625 if (LIKELY( !is_distinguished_sm(sm
) &&
1626 (VA_BITS8_DEFINED
== vabits8
||
1627 VA_BITS8_UNDEFINED
== vabits8
) )) {
1628 /* Handle common case quickly: a is suitably aligned, */
1629 /* is mapped, and is addressible. */
1630 // Convert full V-bits in register to compact 2-bit form.
1631 if (LIKELY(V_BITS32_DEFINED
== (vbytes
& 0xFFFFFFFF))) {
1632 sm
->vabits8
[sm_off
] = VA_BITS8_DEFINED
;
1634 } else if (V_BITS32_UNDEFINED
== (vbytes
& 0xFFFFFFFF)) {
1635 sm
->vabits8
[sm_off
] = VA_BITS8_UNDEFINED
;
1638 /* else fall into the slow case */
1640 /* else fall into the slow case */
1642 /* ------------ END semi-fast cases ------------ */
1644 tl_assert(nBits
== 64 || nBits
== 32 || nBits
== 16 || nBits
== 8);
1646 /* Dump vbytes in memory, iterating from least to most significant
1647 byte. At the same time establish addressibility of the location. */
1648 for (i
= 0; i
< szB
; i
++) {
1649 PROF_EVENT(MCPE_STOREVN_SLOW_LOOP
);
1650 ai
= a
+ byte_offset_w(szB
, bigendian
, i
);
1651 vbits8
= vbytes
& 0xff;
1652 ok
= set_vbits8(ai
, vbits8
);
1653 if (!ok
) n_addrs_bad
++;
1657 /* If an address error has happened, report it. */
1658 if (n_addrs_bad
> 0)
1659 MC_(record_address_error
)( VG_(get_running_tid
)(), a
, szB
, True
);
1663 /*------------------------------------------------------------*/
1664 /*--- Setting permissions over address ranges. ---*/
1665 /*------------------------------------------------------------*/
1667 static void set_address_range_perms ( Addr a
, SizeT lenT
, UWord vabits16
,
1670 UWord sm_off
, sm_off16
;
1671 UWord vabits2
= vabits16
& 0x3;
1672 SizeT lenA
, lenB
, len_to_next_secmap
;
1676 SecMap
* example_dsm
;
1678 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS
);
1680 /* Check the V+A bits make sense. */
1681 tl_assert(VA_BITS16_NOACCESS
== vabits16
||
1682 VA_BITS16_UNDEFINED
== vabits16
||
1683 VA_BITS16_DEFINED
== vabits16
);
1685 // This code should never write PDBs; ensure this. (See comment above
1687 tl_assert(VA_BITS2_PARTDEFINED
!= vabits2
);
1692 if (lenT
> 256 * 1024 * 1024) {
1693 if (VG_(clo_verbosity
) > 0 && !VG_(clo_xml
)) {
1694 const HChar
* s
= "unknown???";
1695 if (vabits16
== VA_BITS16_NOACCESS
) s
= "noaccess";
1696 if (vabits16
== VA_BITS16_UNDEFINED
) s
= "undefined";
1697 if (vabits16
== VA_BITS16_DEFINED
) s
= "defined";
1698 VG_(message
)(Vg_UserMsg
, "Warning: set address range perms: "
1699 "large range [0x%lx, 0x%lx) (%s)\n",
1704 #ifndef PERF_FAST_SARP
1705 /*------------------ debug-only case ------------------ */
1707 // Endianness doesn't matter here because all bytes are being set to
1709 // Nb: We don't have to worry about updating the sec-V-bits table
1710 // after these set_vabits2() calls because this code never writes
1711 // VA_BITS2_PARTDEFINED values.
1713 for (i
= 0; i
< lenT
; i
++) {
1714 set_vabits2(a
+ i
, vabits2
);
1720 /*------------------ standard handling ------------------ */
1722 /* Get the distinguished secondary that we might want
1723 to use (part of the space-compression scheme). */
1724 example_dsm
= &sm_distinguished
[dsm_num
];
1726 // We have to handle ranges covering various combinations of partial and
1727 // whole sec-maps. Here is how parts 1, 2 and 3 are used in each case.
1728 // Cases marked with a '*' are common.
1732 // * one partial sec-map (p) 1
1733 // - one whole sec-map (P) 2
1735 // * two partial sec-maps (pp) 1,3
1736 // - one partial, one whole sec-map (pP) 1,2
1737 // - one whole, one partial sec-map (Pp) 2,3
1738 // - two whole sec-maps (PP) 2,2
1740 // * one partial, one whole, one partial (pPp) 1,2,3
1741 // - one partial, two whole (pPP) 1,2,2
1742 // - two whole, one partial (PPp) 2,2,3
1743 // - three whole (PPP) 2,2,2
1745 // * one partial, N-2 whole, one partial (pP...Pp) 1,2...2,3
1746 // - one partial, N-1 whole (pP...PP) 1,2...2,2
1747 // - N-1 whole, one partial (PP...Pp) 2,2...2,3
1748 // - N whole (PP...PP) 2,2...2,3
1750 // Break up total length (lenT) into two parts: length in the first
1751 // sec-map (lenA), and the rest (lenB); lenT == lenA + lenB.
1752 aNext
= start_of_this_sm(a
) + SM_SIZE
;
1753 len_to_next_secmap
= aNext
- a
;
1754 if ( lenT
<= len_to_next_secmap
) {
1755 // Range entirely within one sec-map. Covers almost all cases.
1756 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_SINGLE_SECMAP
);
1759 } else if (is_start_of_sm(a
)) {
1760 // Range spans at least one whole sec-map, and starts at the beginning
1761 // of a sec-map; skip to Part 2.
1762 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_STARTOF_SECMAP
);
1767 // Range spans two or more sec-maps, first one is partial.
1768 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_MULTIPLE_SECMAPS
);
1769 lenA
= len_to_next_secmap
;
1773 //------------------------------------------------------------------------
1774 // Part 1: Deal with the first sec_map. Most of the time the range will be
1775 // entirely within a sec_map and this part alone will suffice. Also,
1776 // doing it this way lets us avoid repeatedly testing for the crossing of
1777 // a sec-map boundary within these loops.
1778 //------------------------------------------------------------------------
1780 // If it's distinguished, make it undistinguished if necessary.
1781 sm_ptr
= get_secmap_ptr(a
);
1782 if (is_distinguished_sm(*sm_ptr
)) {
1783 if (*sm_ptr
== example_dsm
) {
1784 // Sec-map already has the V+A bits that we want, so skip.
1785 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1_QUICK
);
1789 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1
);
1790 *sm_ptr
= copy_for_writing(*sm_ptr
);
1797 if (VG_IS_8_ALIGNED(a
)) break;
1798 if (lenA
< 1) break;
1799 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1A
);
1801 insert_vabits2_into_vabits8( a
, vabits2
, &(sm
->vabits8
[sm_off
]) );
1805 // 8-aligned, 8 byte steps
1807 if (lenA
< 8) break;
1808 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8A
);
1809 sm_off16
= SM_OFF_16(a
);
1810 sm
->vabits16
[sm_off16
] = vabits16
;
1816 if (lenA
< 1) break;
1817 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1B
);
1819 insert_vabits2_into_vabits8( a
, vabits2
, &(sm
->vabits8
[sm_off
]) );
1824 // We've finished the first sec-map. Is that it?
1828 //------------------------------------------------------------------------
1829 // Part 2: Fast-set entire sec-maps at a time.
1830 //------------------------------------------------------------------------
1832 // 64KB-aligned, 64KB steps.
1833 // Nb: we can reach here with lenB < SM_SIZE
1834 tl_assert(0 == lenA
);
1836 if (lenB
< SM_SIZE
) break;
1837 tl_assert(is_start_of_sm(a
));
1838 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K
);
1839 sm_ptr
= get_secmap_ptr(a
);
1840 if (!is_distinguished_sm(*sm_ptr
)) {
1841 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K_FREE_DIST_SM
);
1842 // Free the non-distinguished sec-map that we're replacing. This
1843 // case happens moderately often, enough to be worthwhile.
1844 SysRes sres
= VG_(am_munmap_valgrind
)((Addr
)*sm_ptr
, sizeof(SecMap
));
1845 tl_assert2(! sr_isError(sres
), "SecMap valgrind munmap failure\n");
1847 update_SM_counts(*sm_ptr
, example_dsm
);
1848 // Make the sec-map entry point to the example DSM
1849 *sm_ptr
= example_dsm
;
1854 // We've finished the whole sec-maps. Is that it?
1858 //------------------------------------------------------------------------
1859 // Part 3: Finish off the final partial sec-map, if necessary.
1860 //------------------------------------------------------------------------
1862 tl_assert(is_start_of_sm(a
) && lenB
< SM_SIZE
);
1864 // If it's distinguished, make it undistinguished if necessary.
1865 sm_ptr
= get_secmap_ptr(a
);
1866 if (is_distinguished_sm(*sm_ptr
)) {
1867 if (*sm_ptr
== example_dsm
) {
1868 // Sec-map already has the V+A bits that we want, so stop.
1869 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2_QUICK
);
1872 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2
);
1873 *sm_ptr
= copy_for_writing(*sm_ptr
);
1878 // 8-aligned, 8 byte steps
1880 if (lenB
< 8) break;
1881 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8B
);
1882 sm_off16
= SM_OFF_16(a
);
1883 sm
->vabits16
[sm_off16
] = vabits16
;
1889 if (lenB
< 1) return;
1890 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1C
);
1892 insert_vabits2_into_vabits8( a
, vabits2
, &(sm
->vabits8
[sm_off
]) );
1899 /* --- Set permissions for arbitrary address ranges --- */
1901 void MC_(make_mem_noaccess
) ( Addr a
, SizeT len
)
1903 PROF_EVENT(MCPE_MAKE_MEM_NOACCESS
);
1904 DEBUG("MC_(make_mem_noaccess)(%p, %lu)\n", a
, len
);
1905 set_address_range_perms ( a
, len
, VA_BITS16_NOACCESS
, SM_DIST_NOACCESS
);
1906 if (UNLIKELY( MC_(clo_mc_level
) == 3 ))
1907 ocache_sarp_Clear_Origins ( a
, len
);
1910 static void make_mem_undefined ( Addr a
, SizeT len
)
1912 PROF_EVENT(MCPE_MAKE_MEM_UNDEFINED
);
1913 DEBUG("make_mem_undefined(%p, %lu)\n", a
, len
);
1914 set_address_range_perms ( a
, len
, VA_BITS16_UNDEFINED
, SM_DIST_UNDEFINED
);
1917 void MC_(make_mem_undefined_w_otag
) ( Addr a
, SizeT len
, UInt otag
)
1919 PROF_EVENT(MCPE_MAKE_MEM_UNDEFINED_W_OTAG
);
1920 DEBUG("MC_(make_mem_undefined)(%p, %lu)\n", a
, len
);
1921 set_address_range_perms ( a
, len
, VA_BITS16_UNDEFINED
, SM_DIST_UNDEFINED
);
1922 if (UNLIKELY( MC_(clo_mc_level
) == 3 ))
1923 ocache_sarp_Set_Origins ( a
, len
, otag
);
1927 void make_mem_undefined_w_tid_and_okind ( Addr a
, SizeT len
,
1928 ThreadId tid
, UInt okind
)
1932 /* VG_(record_ExeContext) checks for validity of tid, and asserts
1933 if it is invalid. So no need to do it here. */
1934 tl_assert(okind
<= 3);
1935 here
= VG_(record_ExeContext
)( tid
, 0/*first_ip_delta*/ );
1937 ecu
= VG_(get_ECU_from_ExeContext
)(here
);
1938 tl_assert(VG_(is_plausible_ECU
)(ecu
));
1939 MC_(make_mem_undefined_w_otag
) ( a
, len
, ecu
| okind
);
1943 void mc_new_mem_w_tid_make_ECU ( Addr a
, SizeT len
, ThreadId tid
)
1945 make_mem_undefined_w_tid_and_okind ( a
, len
, tid
, MC_OKIND_UNKNOWN
);
1949 void mc_new_mem_w_tid_no_ECU ( Addr a
, SizeT len
, ThreadId tid
)
1951 MC_(make_mem_undefined_w_otag
) ( a
, len
, MC_OKIND_UNKNOWN
);
1954 void MC_(make_mem_defined
) ( Addr a
, SizeT len
)
1956 PROF_EVENT(MCPE_MAKE_MEM_DEFINED
);
1957 DEBUG("MC_(make_mem_defined)(%p, %lu)\n", a
, len
);
1958 set_address_range_perms ( a
, len
, VA_BITS16_DEFINED
, SM_DIST_DEFINED
);
1959 if (UNLIKELY( MC_(clo_mc_level
) == 3 ))
1960 ocache_sarp_Clear_Origins ( a
, len
);
1963 __attribute__((unused
))
1964 static void make_mem_defined_w_tid ( Addr a
, SizeT len
, ThreadId tid
)
1966 MC_(make_mem_defined
)(a
, len
);
1969 /* For each byte in [a,a+len), if the byte is addressable, make it be
1970 defined, but if it isn't addressible, leave it alone. In other
1971 words a version of MC_(make_mem_defined) that doesn't mess with
1972 addressibility. Low-performance implementation. */
1973 static void make_mem_defined_if_addressable ( Addr a
, SizeT len
)
1977 DEBUG("make_mem_defined_if_addressable(%p, %llu)\n", a
, (ULong
)len
);
1978 for (i
= 0; i
< len
; i
++) {
1979 vabits2
= get_vabits2( a
+i
);
1980 if (LIKELY(VA_BITS2_NOACCESS
!= vabits2
)) {
1981 set_vabits2(a
+i
, VA_BITS2_DEFINED
);
1982 if (UNLIKELY(MC_(clo_mc_level
) >= 3)) {
1983 MC_(helperc_b_store1
)( a
+i
, 0 ); /* clear the origin tag */
1989 /* Similarly (needed for mprotect handling ..) */
1990 static void make_mem_defined_if_noaccess ( Addr a
, SizeT len
)
1994 DEBUG("make_mem_defined_if_noaccess(%p, %llu)\n", a
, (ULong
)len
);
1995 for (i
= 0; i
< len
; i
++) {
1996 vabits2
= get_vabits2( a
+i
);
1997 if (LIKELY(VA_BITS2_NOACCESS
== vabits2
)) {
1998 set_vabits2(a
+i
, VA_BITS2_DEFINED
);
1999 if (UNLIKELY(MC_(clo_mc_level
) >= 3)) {
2000 MC_(helperc_b_store1
)( a
+i
, 0 ); /* clear the origin tag */
2006 /* --- Block-copy permissions (needed for implementing realloc() and
2009 void MC_(copy_address_range_state
) ( Addr src
, Addr dst
, SizeT len
)
2012 UChar vabits2
, vabits8
;
2013 Bool aligned
, nooverlap
;
2015 DEBUG("MC_(copy_address_range_state)\n");
2016 PROF_EVENT(MCPE_COPY_ADDRESS_RANGE_STATE
);
2018 if (len
== 0 || src
== dst
)
2021 aligned
= VG_IS_4_ALIGNED(src
) && VG_IS_4_ALIGNED(dst
);
2022 nooverlap
= src
+len
<= dst
|| dst
+len
<= src
;
2024 if (nooverlap
&& aligned
) {
2026 /* Vectorised fast case, when no overlap and suitably aligned */
2030 vabits8
= get_vabits8_for_aligned_word32( src
+i
);
2031 set_vabits8_for_aligned_word32( dst
+i
, vabits8
);
2032 if (LIKELY(VA_BITS8_DEFINED
== vabits8
2033 || VA_BITS8_UNDEFINED
== vabits8
2034 || VA_BITS8_NOACCESS
== vabits8
)) {
2037 /* have to copy secondary map info */
2038 if (VA_BITS2_PARTDEFINED
== get_vabits2( src
+i
+0 ))
2039 set_sec_vbits8( dst
+i
+0, get_sec_vbits8( src
+i
+0 ) );
2040 if (VA_BITS2_PARTDEFINED
== get_vabits2( src
+i
+1 ))
2041 set_sec_vbits8( dst
+i
+1, get_sec_vbits8( src
+i
+1 ) );
2042 if (VA_BITS2_PARTDEFINED
== get_vabits2( src
+i
+2 ))
2043 set_sec_vbits8( dst
+i
+2, get_sec_vbits8( src
+i
+2 ) );
2044 if (VA_BITS2_PARTDEFINED
== get_vabits2( src
+i
+3 ))
2045 set_sec_vbits8( dst
+i
+3, get_sec_vbits8( src
+i
+3 ) );
2052 vabits2
= get_vabits2( src
+i
);
2053 set_vabits2( dst
+i
, vabits2
);
2054 if (VA_BITS2_PARTDEFINED
== vabits2
) {
2055 set_sec_vbits8( dst
+i
, get_sec_vbits8( src
+i
) );
2063 /* We have to do things the slow way */
2065 for (i
= 0, j
= len
-1; i
< len
; i
++, j
--) {
2066 PROF_EVENT(MCPE_COPY_ADDRESS_RANGE_STATE_LOOP1
);
2067 vabits2
= get_vabits2( src
+j
);
2068 set_vabits2( dst
+j
, vabits2
);
2069 if (VA_BITS2_PARTDEFINED
== vabits2
) {
2070 set_sec_vbits8( dst
+j
, get_sec_vbits8( src
+j
) );
2076 for (i
= 0; i
< len
; i
++) {
2077 PROF_EVENT(MCPE_COPY_ADDRESS_RANGE_STATE_LOOP2
);
2078 vabits2
= get_vabits2( src
+i
);
2079 set_vabits2( dst
+i
, vabits2
);
2080 if (VA_BITS2_PARTDEFINED
== vabits2
) {
2081 set_sec_vbits8( dst
+i
, get_sec_vbits8( src
+i
) );
2090 /*------------------------------------------------------------*/
2091 /*--- Origin tracking stuff - cache basics ---*/
2092 /*------------------------------------------------------------*/
2094 /* AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
2095 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2097 Note that this implementation draws inspiration from the "origin
2098 tracking by value piggybacking" scheme described in "Tracking Bad
2099 Apples: Reporting the Origin of Null and Undefined Value Errors"
2100 (Michael Bond, Nicholas Nethercote, Stephen Kent, Samuel Guyer,
2101 Kathryn McKinley, OOPSLA07, Montreal, Oct 2007) but in fact it is
2102 implemented completely differently.
2104 Origin tags and ECUs -- about the shadow values
2105 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2107 This implementation tracks the defining point of all uninitialised
2108 values using so called "origin tags", which are 32-bit integers,
2109 rather than using the values themselves to encode the origins. The
2110 latter, so-called value piggybacking", is what the OOPSLA07 paper
2113 Origin tags, as tracked by the machinery below, are 32-bit unsigned
2114 ints (UInts), regardless of the machine's word size. Each tag
2115 comprises an upper 30-bit ECU field and a lower 2-bit
2116 'kind' field. The ECU field is a number given out by m_execontext
2117 and has a 1-1 mapping with ExeContext*s. An ECU can be used
2118 directly as an origin tag (otag), but in fact we want to put
2119 additional information 'kind' field to indicate roughly where the
2120 tag came from. This helps print more understandable error messages
2121 for the user -- it has no other purpose. In summary:
2123 * Both ECUs and origin tags are represented as 32-bit words
2125 * m_execontext and the core-tool interface deal purely in ECUs.
2126 They have no knowledge of origin tags - that is a purely
2127 Memcheck-internal matter.
2129 * all valid ECUs have the lowest 2 bits zero and at least
2130 one of the upper 30 bits nonzero (see VG_(is_plausible_ECU))
2132 * to convert from an ECU to an otag, OR in one of the MC_OKIND_
2133 constants defined in mc_include.h.
2135 * to convert an otag back to an ECU, AND it with ~3
2137 One important fact is that no valid otag is zero. A zero otag is
2138 used by the implementation to indicate "no origin", which could
2139 mean that either the value is defined, or it is undefined but the
2140 implementation somehow managed to lose the origin.
2142 The ECU used for memory created by malloc etc is derived from the
2143 stack trace at the time the malloc etc happens. This means the
2144 mechanism can show the exact allocation point for heap-created
2145 uninitialised values.
2147 In contrast, it is simply too expensive to create a complete
2148 backtrace for each stack allocation. Therefore we merely use a
2149 depth-1 backtrace for stack allocations, which can be done once at
2150 translation time, rather than N times at run time. The result of
2151 this is that, for stack created uninitialised values, Memcheck can
2152 only show the allocating function, and not what called it.
2153 Furthermore, compilers tend to move the stack pointer just once at
2154 the start of the function, to allocate all locals, and so in fact
2155 the stack origin almost always simply points to the opening brace
2156 of the function. Net result is, for stack origins, the mechanism
2157 can tell you in which function the undefined value was created, but
2158 that's all. Users will need to carefully check all locals in the
2161 Shadowing registers and memory
2162 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2164 Memory is shadowed using a two level cache structure (ocacheL1 and
2165 ocacheL2). Memory references are first directed to ocacheL1. This
2166 is a traditional 2-way set associative cache with 32-byte lines and
2167 approximate LRU replacement within each set.
2169 A naive implementation would require storing one 32 bit otag for
2170 each byte of memory covered, a 4:1 space overhead. Instead, there
2171 is one otag for every 4 bytes of memory covered, plus a 4-bit mask
2172 that shows which of the 4 bytes have that shadow value and which
2173 have a shadow value of zero (indicating no origin). Hence a lot of
2174 space is saved, but the cost is that only one different origin per
2175 4 bytes of address space can be represented. This is a source of
2176 imprecision, but how much of a problem it really is remains to be
2179 A cache line that contains all zeroes ("no origins") contains no
2180 useful information, and can be ejected from the L1 cache "for
2181 free", in the sense that a read miss on the L1 causes a line of
2182 zeroes to be installed. However, ejecting a line containing
2183 nonzeroes risks losing origin information permanently. In order to
2184 prevent such lossage, ejected nonzero lines are placed in a
2185 secondary cache (ocacheL2), which is an OSet (AVL tree) of cache
2186 lines. This can grow arbitrarily large, and so should ensure that
2187 Memcheck runs out of memory in preference to losing useful origin
2188 info due to cache size limitations.
2190 Shadowing registers is a bit tricky, because the shadow values are
2191 32 bits, regardless of the size of the register. That gives a
2192 problem for registers smaller than 32 bits. The solution is to
2193 find spaces in the guest state that are unused, and use those to
2194 shadow guest state fragments smaller than 32 bits. For example, on
2195 ppc32/64, each vector register is 16 bytes long. If 4 bytes of the
2196 shadow are allocated for the register's otag, then there are still
2197 12 bytes left over which could be used to shadow 3 other values.
2199 This implies there is some non-obvious mapping from guest state
2200 (start,length) pairs to the relevant shadow offset (for the origin
2201 tags). And it is unfortunately guest-architecture specific. The
2202 mapping is contained in mc_machine.c, which is quite lengthy but
2205 Instrumenting the IR
2206 ~~~~~~~~~~~~~~~~~~~~
2208 Instrumentation is largely straightforward, and done by the
2209 functions schemeE and schemeS in mc_translate.c. These generate
2210 code for handling the origin tags of expressions (E) and statements
2211 (S) respectively. The rather strange names are a reference to the
2212 "compilation schemes" shown in Simon Peyton Jones' book "The
2213 Implementation of Functional Programming Languages" (Prentice Hall,
2215 http://research.microsoft.com/~simonpj/papers/slpj-book-1987/index.htm).
2217 schemeS merely arranges to move shadow values around the guest
2218 state to track the incoming IR. schemeE is largely trivial too.
2219 The only significant point is how to compute the otag corresponding
2220 to binary (or ternary, quaternary, etc) operator applications. The
2221 rule is simple: just take whichever value is larger (32-bit
2222 unsigned max). Constants get the special value zero. Hence this
2223 rule always propagates a nonzero (known) otag in preference to a
2224 zero (unknown, or more likely, value-is-defined) tag, as we want.
2225 If two different undefined values are inputs to a binary operator
2226 application, then which is propagated is arbitrary, but that
2227 doesn't matter, since the program is erroneous in using either of
2228 the values, and so there's no point in attempting to propagate
2231 Since constants are abstracted to (otag) zero, much of the
2232 instrumentation code can be folded out without difficulty by the
2233 generic post-instrumentation IR cleanup pass, using these rules:
2234 Max32U(0,x) -> x, Max32U(x,0) -> x, Max32(x,y) where x and y are
2235 constants is evaluated at JIT time. And the resulting dead code
2236 removal. In practice this causes surprisingly few Max32Us to
2237 survive through to backend code generation.
2239 Integration with the V-bits machinery
2240 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2242 This is again largely straightforward. Mostly the otag and V bits
2243 stuff are independent. The only point of interaction is when the V
2244 bits instrumenter creates a call to a helper function to report an
2245 uninitialised value error -- in that case it must first use schemeE
2246 to get hold of the origin tag expression for the value, and pass
2247 that to the helper too.
2249 There is the usual stuff to do with setting address range
2250 permissions. When memory is painted undefined, we must also know
2251 the origin tag to paint with, which involves some tedious plumbing,
2252 particularly to do with the fast case stack handlers. When memory
2253 is painted defined or noaccess then the origin tags must be forced
2256 One of the goals of the implementation was to ensure that the
2257 non-origin tracking mode isn't slowed down at all. To do this,
2258 various functions to do with memory permissions setting (again,
2259 mostly pertaining to the stack) are duplicated for the with- and
2262 Dealing with stack redzones, and the NIA cache
2263 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2265 This is one of the few non-obvious parts of the implementation.
2267 Some ABIs (amd64-ELF, ppc64-ELF, ppc32/64-XCOFF) define a small
2268 reserved area below the stack pointer, that can be used as scratch
2269 space by compiler generated code for functions. In the Memcheck
2270 sources this is referred to as the "stack redzone". The important
2271 thing here is that such redzones are considered volatile across
2272 function calls and returns. So Memcheck takes care to mark them as
2273 undefined for each call and return, on the afflicted platforms.
2274 Past experience shows this is essential in order to get reliable
2275 messages about uninitialised values that come from the stack.
2277 So the question is, when we paint a redzone undefined, what origin
2278 tag should we use for it? Consider a function f() calling g(). If
2279 we paint the redzone using an otag derived from the ExeContext of
2280 the CALL/BL instruction in f, then any errors in g causing it to
2281 use uninitialised values that happen to lie in the redzone, will be
2282 reported as having their origin in f. Which is highly confusing.
2284 The same applies for returns: if, on a return, we paint the redzone
2285 using a origin tag derived from the ExeContext of the RET/BLR
2286 instruction in g, then any later errors in f causing it to use
2287 uninitialised values in the redzone, will be reported as having
2288 their origin in g. Which is just as confusing.
2290 To do it right, in both cases we need to use an origin tag which
2291 pertains to the instruction which dynamically follows the CALL/BL
2292 or RET/BLR. In short, one derived from the NIA - the "next
2293 instruction address".
2295 To make this work, Memcheck's redzone-painting helper,
2296 MC_(helperc_MAKE_STACK_UNINIT), now takes a third argument, the
2297 NIA. It converts the NIA to a 1-element ExeContext, and uses that
2298 ExeContext's ECU as the basis for the otag used to paint the
2299 redzone. The expensive part of this is converting an NIA into an
2300 ECU, since this happens once for every call and every return. So
2301 we use a simple 511-line, 2-way set associative cache
2302 (nia_to_ecu_cache) to cache the mappings, and that knocks most of
2305 Further background comments
2306 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
2308 > Question: why is otag a UInt? Wouldn't a UWord be better? Isn't
2309 > it really just the address of the relevant ExeContext?
2311 Well, it's not the address, but a value which has a 1-1 mapping
2312 with ExeContexts, and is guaranteed not to be zero, since zero
2313 denotes (to memcheck) "unknown origin or defined value". So these
2314 UInts are just numbers starting at 4 and incrementing by 4; each
2315 ExeContext is given a number when it is created. (*** NOTE this
2316 confuses otags and ECUs; see comments above ***).
2318 Making these otags 32-bit regardless of the machine's word size
2319 makes the 64-bit implementation easier (next para). And it doesn't
2320 really limit us in any way, since for the tags to overflow would
2321 require that the program somehow caused 2^30-1 different
2322 ExeContexts to be created, in which case it is probably in deep
2323 trouble. Not to mention V will have soaked up many tens of
2324 gigabytes of memory merely to store them all.
2326 So having 64-bit origins doesn't really buy you anything, and has
2327 the following downsides:
2329 Suppose that instead, an otag is a UWord. This would mean that, on
2332 1. It becomes hard to shadow any element of guest state which is
2333 smaller than 8 bytes. To do so means you'd need to find some
2334 8-byte-sized hole in the guest state which you don't want to
2335 shadow, and use that instead to hold the otag. On ppc64, the
2336 condition code register(s) are split into 20 UChar sized pieces,
2337 all of which need to be tracked (guest_XER_SO .. guest_CR7_0)
2338 and so that would entail finding 160 bytes somewhere else in the
2341 Even on x86, I want to track origins for %AH .. %DH (bits 15:8
2342 of %EAX .. %EDX) that are separate from %AL .. %DL (bits 7:0 of
2343 same) and so I had to look for 4 untracked otag-sized areas in
2344 the guest state to make that possible.
2346 The same problem exists of course when origin tags are only 32
2347 bits, but it's less extreme.
2349 2. (More compelling) it doubles the size of the origin shadow
2350 memory. Given that the shadow memory is organised as a fixed
2351 size cache, and that accuracy of tracking is limited by origins
2352 falling out the cache due to space conflicts, this isn't good.
2354 > Another question: is the origin tracking perfect, or are there
2355 > cases where it fails to determine an origin?
2357 It is imperfect for at least for the following reasons, and
2360 * Insufficient capacity in the origin cache. When a line is
2361 evicted from the cache it is gone forever, and so subsequent
2362 queries for the line produce zero, indicating no origin
2363 information. Interestingly, a line containing all zeroes can be
2364 evicted "free" from the cache, since it contains no useful
2365 information, so there is scope perhaps for some cleverer cache
2366 management schemes. (*** NOTE, with the introduction of the
2367 second level origin tag cache, ocacheL2, this is no longer a
2370 * The origin cache only stores one otag per 32-bits of address
2371 space, plus 4 bits indicating which of the 4 bytes has that tag
2372 and which are considered defined. The result is that if two
2373 undefined bytes in the same word are stored in memory, the first
2374 stored byte's origin will be lost and replaced by the origin for
2377 * Nonzero origin tags for defined values. Consider a binary
2378 operator application op(x,y). Suppose y is undefined (and so has
2379 a valid nonzero origin tag), and x is defined, but erroneously
2380 has a nonzero origin tag (defined values should have tag zero).
2381 If the erroneous tag has a numeric value greater than y's tag,
2382 then the rule for propagating origin tags though binary
2383 operations, which is simply to take the unsigned max of the two
2384 tags, will erroneously propagate x's tag rather than y's.
2386 * Some obscure uses of x86/amd64 byte registers can cause lossage
2387 or confusion of origins. %AH .. %DH are treated as different
2388 from, and unrelated to, their parent registers, %EAX .. %EDX.
2389 So some weird sequences like
2391 movb undefined-value, %AH
2392 movb defined-value, %AL
2393 .. use %AX or %EAX ..
2395 will cause the origin attributed to %AH to be ignored, since %AL,
2396 %AX, %EAX are treated as the same register, and %AH as a
2397 completely separate one.
2399 But having said all that, it actually seems to work fairly well in
2403 static UWord stats_ocacheL1_find
= 0;
2404 static UWord stats_ocacheL1_found_at_1
= 0;
2405 static UWord stats_ocacheL1_found_at_N
= 0;
2406 static UWord stats_ocacheL1_misses
= 0;
2407 static UWord stats_ocacheL1_lossage
= 0;
2408 static UWord stats_ocacheL1_movefwds
= 0;
2410 static UWord stats__ocacheL2_finds
= 0;
2411 static UWord stats__ocacheL2_adds
= 0;
2412 static UWord stats__ocacheL2_dels
= 0;
2413 static UWord stats__ocacheL2_misses
= 0;
2414 static UWord stats__ocacheL2_n_nodes_max
= 0;
2416 /* Cache of 32-bit values, one every 32 bits of address space */
2418 #define OC_BITS_PER_LINE 5
2419 #define OC_W32S_PER_LINE (1 << (OC_BITS_PER_LINE - 2))
2421 static INLINE UWord
oc_line_offset ( Addr a
) {
2422 return (a
>> 2) & (OC_W32S_PER_LINE
- 1);
2424 static INLINE Bool
is_valid_oc_tag ( Addr tag
) {
2425 return 0 == (tag
& ((1 << OC_BITS_PER_LINE
) - 1));
2428 #define OC_LINES_PER_SET 2
2430 #define OC_N_SET_BITS 20
2431 #define OC_N_SETS (1 << OC_N_SET_BITS)
2433 /* These settings give:
2434 64 bit host: ocache: 100,663,296 sizeB 67,108,864 useful
2435 32 bit host: ocache: 92,274,688 sizeB 67,108,864 useful
2438 #define OC_MOVE_FORWARDS_EVERY_BITS 7
2441 /* Originally (pre Dec 2021) it was the case that this code had a
2442 parameterizable cache line size, set by changing OC_BITS_PER_LINE.
2443 However, as a result of the speedup fixes necessitated by bug 446103, that
2444 is no longer really the case, and much of the L1 and L2 cache code has been
2445 tuned specifically for the case OC_BITS_PER_LINE == 5 (that is, the line
2446 size is 32 bytes). Changing that would require a bunch of re-tuning
2447 effort. So let's set it in stone for now. */
2448 STATIC_ASSERT(OC_BITS_PER_LINE
== 5);
2449 STATIC_ASSERT(OC_LINES_PER_SET
== 2);
2451 /* Fundamentally we want an OCacheLine structure (see below) as follows:
2454 UInt w32 [OC_W32S_PER_LINE];
2455 UChar descr[OC_W32S_PER_LINE];
2457 However, in various places, we want to set the w32[] and descr[] arrays to
2458 zero, or check if they are zero. This can be a very hot path (per bug
2459 446103). So, instead, we have a union which is either those two arrays
2460 (OCacheLine_Main) or simply an array of ULongs (OCacheLine_W64s). For the
2461 set-zero/test-zero operations, the OCacheLine_W64s are used.
2464 // To ensure that OCacheLine.descr[] will fit in an integral number of ULongs.
2465 STATIC_ASSERT(0 == (OC_W32S_PER_LINE
% 8));
2467 #define OC_W64S_PER_MAIN /* "MAIN" meaning "struct OCacheLine_Main" */ \
2468 (OC_W32S_PER_LINE / 2 /* covers OCacheLine_Main.w32[] */ \
2469 + OC_W32S_PER_LINE / 8) /* covers OCacheLine_Main.descr[] */
2470 STATIC_ASSERT(OC_W64S_PER_MAIN
== 5);
2473 ULong OCacheLine_W64s
[OC_W64S_PER_MAIN
];
2477 UInt w32
[OC_W32S_PER_LINE
];
2478 UChar descr
[OC_W32S_PER_LINE
];
2482 STATIC_ASSERT(sizeof(OCacheLine_W64s
) == sizeof(OCacheLine_Main
));
2488 OCacheLine_W64s w64s
;
2489 OCacheLine_Main main
;
2494 /* Classify and also sanity-check 'line'. Return 'e' (empty) if not
2495 in use, 'n' (nonzero) if it contains at least one valid origin tag,
2496 and 'z' if all the represented tags are zero. */
2497 static inline UChar
classify_OCacheLine ( OCacheLine
* line
)
2500 if (line
->tag
== 1/*invalid*/)
2501 return 'e'; /* EMPTY */
2502 tl_assert(is_valid_oc_tag(line
->tag
));
2504 // BEGIN fast special-case of the test loop below. This will detect
2505 // zero-ness (case 'z') for a subset of cases that the loop below will,
2507 if (OC_W64S_PER_MAIN
== 5) {
2508 if (line
->u
.w64s
[0] == 0
2509 && line
->u
.w64s
[1] == 0 && line
->u
.w64s
[2] == 0
2510 && line
->u
.w64s
[3] == 0 && line
->u
.w64s
[4] == 0) {
2514 tl_assert2(0, "unsupported line size (classify_OCacheLine)");
2516 // END fast special-case of the test loop below.
2518 for (i
= 0; i
< OC_W32S_PER_LINE
; i
++) {
2519 tl_assert(0 == ((~0xF) & line
->u
.main
.descr
[i
]));
2520 if (line
->u
.main
.w32
[i
] > 0 && line
->u
.main
.descr
[i
] > 0)
2521 return 'n'; /* NONZERO - contains useful info */
2523 return 'z'; /* ZERO - no useful info */
2528 OCacheLine line
[OC_LINES_PER_SET
];
2534 OCacheSet set
[OC_N_SETS
];
2538 static OCache
* ocacheL1
= NULL
;
2539 static UWord ocacheL1_event_ctr
= 0;
2541 static void init_ocacheL2 ( void ); /* fwds */
2542 static void init_OCache ( void )
2545 tl_assert(MC_(clo_mc_level
) >= 3);
2546 tl_assert(ocacheL1
== NULL
);
2547 SysRes sres
= VG_(am_shadow_alloc
)(sizeof(OCache
));
2548 if (sr_isError(sres
)) {
2549 VG_(out_of_memory_NORETURN
)( "memcheck:allocating ocacheL1",
2550 sizeof(OCache
), sr_Err(sres
) );
2552 ocacheL1
= (void *)(Addr
)sr_Res(sres
);
2553 tl_assert(ocacheL1
!= NULL
);
2554 for (set
= 0; set
< OC_N_SETS
; set
++) {
2555 for (line
= 0; line
< OC_LINES_PER_SET
; line
++) {
2556 ocacheL1
->set
[set
].line
[line
].tag
= 1/*invalid*/;
2562 static inline void moveLineForwards ( OCacheSet
* set
, UWord lineno
)
2565 stats_ocacheL1_movefwds
++;
2566 tl_assert(lineno
> 0 && lineno
< OC_LINES_PER_SET
);
2567 tmp
= set
->line
[lineno
-1];
2568 set
->line
[lineno
-1] = set
->line
[lineno
];
2569 set
->line
[lineno
] = tmp
;
2572 static inline void zeroise_OCacheLine ( OCacheLine
* line
, Addr tag
) {
2574 if (OC_W32S_PER_LINE
== 8) {
2575 // BEGIN fast special-case of the loop below
2576 tl_assert(OC_W64S_PER_MAIN
== 5);
2577 line
->u
.w64s
[0] = 0;
2578 line
->u
.w64s
[1] = 0;
2579 line
->u
.w64s
[2] = 0;
2580 line
->u
.w64s
[3] = 0;
2581 line
->u
.w64s
[4] = 0;
2582 // END fast special-case of the loop below
2584 tl_assert2(0, "unsupported line size (zeroise_OCacheLine)");
2585 for (i
= 0; i
< OC_W32S_PER_LINE
; i
++) {
2586 line
->u
.main
.w32
[i
] = 0; /* NO ORIGIN */
2587 line
->u
.main
.descr
[i
] = 0; /* REALLY REALLY NO ORIGIN! */
2593 //////////////////////////////////////////////////////////////
2594 //// OCache backing store
2596 // The backing store for ocacheL1 is, conceptually, an AVL tree of lines that
2597 // got ejected from the L1 (a "victim cache"), and which actually contain
2598 // useful info -- that is, for which classify_OCacheLine would return 'n' and
2599 // no other value. However, the tree can grow large, and searching/updating
2600 // it can be hot paths. Hence we "take out" 12 significant bits of the key by
2601 // having 4096 trees, and select one using HASH_OCACHE_TAG.
2603 // What that hash function returns isn't important so long as it is a pure
2604 // function of the tag values, and is < 4096. However, it is critical for
2605 // performance of long SARPs. Hence the extra shift of 11 bits. This means
2606 // each tree conceptually is assigned to contiguous sequences of 2048 lines in
2607 // the "line address space", giving some locality of reference when scanning
2608 // linearly through address space, as is done by a SARP. Changing that 11 to
2609 // 0 gives terrible performance on long SARPs, presumably because each new
2610 // line is in a different tree, hence we wind up thrashing the (CPU's) caches.
2612 // On 32-bit targets, we have to be a bit careful not to shift out so many
2613 // bits that not all 2^12 trees get used. That leads to the constraint
2614 // (OC_BITS_PER_LINE + 11 + 12) < 32. Note that the 11 is the only thing we
2615 // can change here. In this case we have OC_BITS_PER_LINE == 5, hence the
2616 // inequality is (28 < 32) and so we're good.
2618 // The value 11 was determined empirically from various Firefox runs. 10 or
2619 // 12 also work pretty well.
2621 static OSet
* ocachesL2
[4096];
2623 STATIC_ASSERT((OC_BITS_PER_LINE
+ 11 + 12) < 32);
2624 static inline UInt
HASH_OCACHE_TAG ( Addr tag
) {
2625 return (UInt
)((tag
>> (OC_BITS_PER_LINE
+ 11)) & 0xFFF);
2628 static void* ocacheL2_malloc ( const HChar
* cc
, SizeT szB
) {
2629 return VG_(malloc
)(cc
, szB
);
2631 static void ocacheL2_free ( void* v
) {
2635 /* Stats: # nodes currently in tree */
2636 static UWord stats__ocacheL2_n_nodes
= 0;
2638 static void init_ocacheL2 ( void )
2640 tl_assert(sizeof(Word
) == sizeof(Addr
)); /* since OCacheLine.tag :: Addr */
2641 tl_assert(0 == offsetof(OCacheLine
,tag
));
2642 for (UInt i
= 0; i
< 4096; i
++) {
2643 tl_assert(!ocachesL2
[i
]);
2645 = VG_(OSetGen_Create
)( offsetof(OCacheLine
,tag
),
2646 NULL
, /* fast cmp */
2647 ocacheL2_malloc
, "mc.ioL2", ocacheL2_free
);
2649 stats__ocacheL2_n_nodes
= 0;
2652 /* Find line with the given tag in the tree, or NULL if not found. */
2653 static inline OCacheLine
* ocacheL2_find_tag ( Addr tag
)
2656 tl_assert(is_valid_oc_tag(tag
));
2657 stats__ocacheL2_finds
++;
2658 OSet
* oset
= ocachesL2
[HASH_OCACHE_TAG(tag
)];
2659 line
= VG_(OSetGen_Lookup
)( oset
, &tag
);
2663 /* Delete the line with the given tag from the tree, if it is present, and
2664 free up the associated memory. */
2665 static void ocacheL2_del_tag ( Addr tag
)
2668 tl_assert(is_valid_oc_tag(tag
));
2669 stats__ocacheL2_dels
++;
2670 OSet
* oset
= ocachesL2
[HASH_OCACHE_TAG(tag
)];
2671 line
= VG_(OSetGen_Remove
)( oset
, &tag
);
2673 VG_(OSetGen_FreeNode
)(oset
, line
);
2674 tl_assert(stats__ocacheL2_n_nodes
> 0);
2675 stats__ocacheL2_n_nodes
--;
2679 /* Add a copy of the given line to the tree. It must not already be
2681 static void ocacheL2_add_line ( OCacheLine
* line
)
2684 tl_assert(is_valid_oc_tag(line
->tag
));
2685 OSet
* oset
= ocachesL2
[HASH_OCACHE_TAG(line
->tag
)];
2686 copy
= VG_(OSetGen_AllocNode
)( oset
, sizeof(OCacheLine
) );
2688 stats__ocacheL2_adds
++;
2689 VG_(OSetGen_Insert
)( oset
, copy
);
2690 stats__ocacheL2_n_nodes
++;
2691 if (stats__ocacheL2_n_nodes
> stats__ocacheL2_n_nodes_max
)
2692 stats__ocacheL2_n_nodes_max
= stats__ocacheL2_n_nodes
;
2696 //////////////////////////////////////////////////////////////
2698 __attribute__((noinline
))
2699 static OCacheLine
* find_OCacheLine_SLOW ( Addr a
)
2701 OCacheLine
*victim
, *inL2
;
2704 UWord setno
= (a
>> OC_BITS_PER_LINE
) & (OC_N_SETS
- 1);
2705 UWord tagmask
= ~((1 << OC_BITS_PER_LINE
) - 1);
2706 UWord tag
= a
& tagmask
;
2707 tl_assert(setno
>= 0 && setno
< OC_N_SETS
);
2709 /* we already tried line == 0; skip therefore. */
2710 for (line
= 1; line
< OC_LINES_PER_SET
; line
++) {
2711 if (ocacheL1
->set
[setno
].line
[line
].tag
== tag
) {
2713 stats_ocacheL1_found_at_1
++;
2715 stats_ocacheL1_found_at_N
++;
2717 if (UNLIKELY(0 == (ocacheL1_event_ctr
++
2718 & ((1<<OC_MOVE_FORWARDS_EVERY_BITS
)-1)))) {
2719 moveLineForwards( &ocacheL1
->set
[setno
], line
);
2722 return &ocacheL1
->set
[setno
].line
[line
];
2726 /* A miss. Use the last slot. Implicitly this means we're
2727 ejecting the line in the last slot. */
2728 stats_ocacheL1_misses
++;
2729 tl_assert(line
== OC_LINES_PER_SET
);
2731 tl_assert(line
> 0);
2733 /* First, move the to-be-ejected line to the L2 cache. */
2734 victim
= &ocacheL1
->set
[setno
].line
[line
];
2735 c
= classify_OCacheLine(victim
);
2738 /* the line is empty (has invalid tag); ignore it. */
2741 /* line contains zeroes. We must ensure the backing store is
2742 updated accordingly, either by copying the line there
2743 verbatim, or by ensuring it isn't present there. We
2744 choose the latter on the basis that it reduces the size of
2745 the backing store. */
2746 ocacheL2_del_tag( victim
->tag
);
2749 /* line contains at least one real, useful origin. Copy it
2750 to the backing store. */
2751 stats_ocacheL1_lossage
++;
2752 inL2
= ocacheL2_find_tag( victim
->tag
);
2756 ocacheL2_add_line( victim
);
2763 /* Now we must reload the L1 cache from the backing tree, if
2765 tl_assert(tag
!= victim
->tag
); /* stay sane */
2766 inL2
= ocacheL2_find_tag( tag
);
2768 /* We're in luck. It's in the L2. */
2769 ocacheL1
->set
[setno
].line
[line
] = *inL2
;
2771 /* Missed at both levels of the cache hierarchy. We have to
2772 declare it as full of zeroes (unknown origins). */
2773 stats__ocacheL2_misses
++;
2774 zeroise_OCacheLine( &ocacheL1
->set
[setno
].line
[line
], tag
);
2777 /* Move it one forwards */
2778 moveLineForwards( &ocacheL1
->set
[setno
], line
);
2781 return &ocacheL1
->set
[setno
].line
[line
];
2784 static INLINE OCacheLine
* find_OCacheLine ( Addr a
)
2786 UWord setno
= (a
>> OC_BITS_PER_LINE
) & (OC_N_SETS
- 1);
2787 UWord tagmask
= ~((1 << OC_BITS_PER_LINE
) - 1);
2788 UWord tag
= a
& tagmask
;
2790 stats_ocacheL1_find
++;
2792 if (OC_ENABLE_ASSERTIONS
) {
2793 tl_assert(setno
>= 0 && setno
< OC_N_SETS
);
2794 tl_assert(0 == (tag
& (4 * OC_W32S_PER_LINE
- 1)));
2797 if (LIKELY(ocacheL1
->set
[setno
].line
[0].tag
== tag
)) {
2798 return &ocacheL1
->set
[setno
].line
[0];
2801 return find_OCacheLine_SLOW( a
);
2804 static INLINE
void set_aligned_word64_Origin_to_undef ( Addr a
, UInt otag
)
2806 //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2807 //// Set the origins for a+0 .. a+7
2809 UWord lineoff
= oc_line_offset(a
);
2810 if (OC_ENABLE_ASSERTIONS
) {
2811 tl_assert(lineoff
>= 0
2812 && lineoff
< OC_W32S_PER_LINE
-1/*'cos 8-aligned*/);
2814 line
= find_OCacheLine( a
);
2815 line
->u
.main
.descr
[lineoff
+0] = 0xF;
2816 line
->u
.main
.descr
[lineoff
+1] = 0xF;
2817 line
->u
.main
.w32
[lineoff
+0] = otag
;
2818 line
->u
.main
.w32
[lineoff
+1] = otag
;
2820 //// END inlined, specialised version of MC_(helperc_b_store8)
2824 /*------------------------------------------------------------*/
2825 /*--- Aligned fast case permission setters, ---*/
2826 /*--- for dealing with stacks ---*/
2827 /*------------------------------------------------------------*/
2829 /*--------------------- 32-bit ---------------------*/
2831 /* Nb: by "aligned" here we mean 4-byte aligned */
2833 static INLINE
void make_aligned_word32_undefined ( Addr a
)
2835 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_UNDEFINED
);
2837 #ifndef PERF_FAST_STACK2
2838 make_mem_undefined(a
, 4);
2844 if (UNLIKELY(a
> MAX_PRIMARY_ADDRESS
)) {
2845 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_UNDEFINED_SLOW
);
2846 make_mem_undefined(a
, 4);
2850 sm
= get_secmap_for_writing_low(a
);
2852 sm
->vabits8
[sm_off
] = VA_BITS8_UNDEFINED
;
2858 void make_aligned_word32_undefined_w_otag ( Addr a
, UInt otag
)
2860 make_aligned_word32_undefined(a
);
2861 //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
2862 //// Set the origins for a+0 .. a+3
2864 UWord lineoff
= oc_line_offset(a
);
2865 if (OC_ENABLE_ASSERTIONS
) {
2866 tl_assert(lineoff
>= 0 && lineoff
< OC_W32S_PER_LINE
);
2868 line
= find_OCacheLine( a
);
2869 line
->u
.main
.descr
[lineoff
] = 0xF;
2870 line
->u
.main
.w32
[lineoff
] = otag
;
2872 //// END inlined, specialised version of MC_(helperc_b_store4)
2876 void make_aligned_word32_noaccess ( Addr a
)
2878 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_NOACCESS
);
2880 #ifndef PERF_FAST_STACK2
2881 MC_(make_mem_noaccess
)(a
, 4);
2887 if (UNLIKELY(a
> MAX_PRIMARY_ADDRESS
)) {
2888 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_NOACCESS_SLOW
);
2889 MC_(make_mem_noaccess
)(a
, 4);
2893 sm
= get_secmap_for_writing_low(a
);
2895 sm
->vabits8
[sm_off
] = VA_BITS8_NOACCESS
;
2897 //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
2898 //// Set the origins for a+0 .. a+3.
2899 if (UNLIKELY( MC_(clo_mc_level
) == 3 )) {
2901 UWord lineoff
= oc_line_offset(a
);
2902 if (OC_ENABLE_ASSERTIONS
) {
2903 tl_assert(lineoff
>= 0 && lineoff
< OC_W32S_PER_LINE
);
2905 line
= find_OCacheLine( a
);
2906 line
->u
.main
.descr
[lineoff
] = 0;
2908 //// END inlined, specialised version of MC_(helperc_b_store4)
2913 /*--------------------- 64-bit ---------------------*/
2915 /* Nb: by "aligned" here we mean 8-byte aligned */
2917 static INLINE
void make_aligned_word64_undefined ( Addr a
)
2919 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_UNDEFINED
);
2921 #ifndef PERF_FAST_STACK2
2922 make_mem_undefined(a
, 8);
2928 if (UNLIKELY(a
> MAX_PRIMARY_ADDRESS
)) {
2929 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_UNDEFINED_SLOW
);
2930 make_mem_undefined(a
, 8);
2934 sm
= get_secmap_for_writing_low(a
);
2935 sm_off16
= SM_OFF_16(a
);
2936 sm
->vabits16
[sm_off16
] = VA_BITS16_UNDEFINED
;
2942 void make_aligned_word64_undefined_w_otag ( Addr a
, UInt otag
)
2944 make_aligned_word64_undefined(a
);
2945 //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2946 //// Set the origins for a+0 .. a+7
2948 UWord lineoff
= oc_line_offset(a
);
2949 tl_assert(lineoff
>= 0
2950 && lineoff
< OC_W32S_PER_LINE
-1/*'cos 8-aligned*/);
2951 line
= find_OCacheLine( a
);
2952 line
->u
.main
.descr
[lineoff
+0] = 0xF;
2953 line
->u
.main
.descr
[lineoff
+1] = 0xF;
2954 line
->u
.main
.w32
[lineoff
+0] = otag
;
2955 line
->u
.main
.w32
[lineoff
+1] = otag
;
2957 //// END inlined, specialised version of MC_(helperc_b_store8)
2961 void make_aligned_word64_noaccess ( Addr a
)
2963 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_NOACCESS
);
2965 #ifndef PERF_FAST_STACK2
2966 MC_(make_mem_noaccess
)(a
, 8);
2972 if (UNLIKELY(a
> MAX_PRIMARY_ADDRESS
)) {
2973 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_NOACCESS_SLOW
);
2974 MC_(make_mem_noaccess
)(a
, 8);
2978 sm
= get_secmap_for_writing_low(a
);
2979 sm_off16
= SM_OFF_16(a
);
2980 sm
->vabits16
[sm_off16
] = VA_BITS16_NOACCESS
;
2982 //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2983 //// Clear the origins for a+0 .. a+7.
2984 if (UNLIKELY( MC_(clo_mc_level
) == 3 )) {
2986 UWord lineoff
= oc_line_offset(a
);
2987 tl_assert(lineoff
>= 0
2988 && lineoff
< OC_W32S_PER_LINE
-1/*'cos 8-aligned*/);
2989 line
= find_OCacheLine( a
);
2990 line
->u
.main
.descr
[lineoff
+0] = 0;
2991 line
->u
.main
.descr
[lineoff
+1] = 0;
2993 //// END inlined, specialised version of MC_(helperc_b_store8)
2999 /*------------------------------------------------------------*/
3000 /*--- Stack pointer adjustment ---*/
3001 /*------------------------------------------------------------*/
3003 #ifdef PERF_FAST_STACK
3006 # define MAYBE_USED __attribute__((unused))
3009 /*--------------- adjustment by 4 bytes ---------------*/
3012 static void VG_REGPARM(2) mc_new_mem_stack_4_w_ECU(Addr new_SP
, UInt ecu
)
3014 UInt otag
= ecu
| MC_OKIND_STACK
;
3015 PROF_EVENT(MCPE_NEW_MEM_STACK_4
);
3016 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3017 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
3019 MC_(make_mem_undefined_w_otag
) ( -VG_STACK_REDZONE_SZB
+ new_SP
, 4, otag
);
3024 static void VG_REGPARM(1) mc_new_mem_stack_4(Addr new_SP
)
3026 PROF_EVENT(MCPE_NEW_MEM_STACK_4
);
3027 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3028 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
3030 make_mem_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
, 4 );
3035 static void VG_REGPARM(1) mc_die_mem_stack_4(Addr new_SP
)
3037 PROF_EVENT(MCPE_DIE_MEM_STACK_4
);
3038 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3039 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-4 );
3041 MC_(make_mem_noaccess
) ( -VG_STACK_REDZONE_SZB
+ new_SP
-4, 4 );
3045 /*--------------- adjustment by 8 bytes ---------------*/
3048 static void VG_REGPARM(2) mc_new_mem_stack_8_w_ECU(Addr new_SP
, UInt ecu
)
3050 UInt otag
= ecu
| MC_OKIND_STACK
;
3051 PROF_EVENT(MCPE_NEW_MEM_STACK_8
);
3052 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3053 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
3054 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3055 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
3056 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+4, otag
);
3058 MC_(make_mem_undefined_w_otag
) ( -VG_STACK_REDZONE_SZB
+ new_SP
, 8, otag
);
3063 static void VG_REGPARM(1) mc_new_mem_stack_8(Addr new_SP
)
3065 PROF_EVENT(MCPE_NEW_MEM_STACK_8
);
3066 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3067 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
3068 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3069 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
3070 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+4 );
3072 make_mem_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
, 8 );
3077 static void VG_REGPARM(1) mc_die_mem_stack_8(Addr new_SP
)
3079 PROF_EVENT(MCPE_DIE_MEM_STACK_8
);
3080 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3081 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-8 );
3082 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3083 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-8 );
3084 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-4 );
3086 MC_(make_mem_noaccess
) ( -VG_STACK_REDZONE_SZB
+ new_SP
-8, 8 );
3090 /*--------------- adjustment by 12 bytes ---------------*/
3093 static void VG_REGPARM(2) mc_new_mem_stack_12_w_ECU(Addr new_SP
, UInt ecu
)
3095 UInt otag
= ecu
| MC_OKIND_STACK
;
3096 PROF_EVENT(MCPE_NEW_MEM_STACK_12
);
3097 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3098 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
3099 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+8, otag
);
3100 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3101 /* from previous test we don't have 8-alignment at offset +0,
3102 hence must have 8 alignment at offsets +4/-4. Hence safe to
3103 do 4 at +0 and then 8 at +4/. */
3104 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
3105 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+4, otag
);
3107 MC_(make_mem_undefined_w_otag
) ( -VG_STACK_REDZONE_SZB
+ new_SP
, 12, otag
);
3112 static void VG_REGPARM(1) mc_new_mem_stack_12(Addr new_SP
)
3114 PROF_EVENT(MCPE_NEW_MEM_STACK_12
);
3115 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3116 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
3117 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+8 );
3118 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3119 /* from previous test we don't have 8-alignment at offset +0,
3120 hence must have 8 alignment at offsets +4/-4. Hence safe to
3121 do 4 at +0 and then 8 at +4/. */
3122 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
3123 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+4 );
3125 make_mem_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
, 12 );
3130 static void VG_REGPARM(1) mc_die_mem_stack_12(Addr new_SP
)
3132 PROF_EVENT(MCPE_DIE_MEM_STACK_12
);
3133 /* Note the -12 in the test */
3134 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
-12 )) {
3135 /* We have 8-alignment at -12, hence ok to do 8 at -12 and 4 at
3137 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-12 );
3138 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-4 );
3139 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3140 /* We have 4-alignment at +0, but we don't have 8-alignment at
3141 -12. So we must have 8-alignment at -8. Hence do 4 at -12
3142 and then 8 at -8. */
3143 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-12 );
3144 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-8 );
3146 MC_(make_mem_noaccess
) ( -VG_STACK_REDZONE_SZB
+ new_SP
-12, 12 );
3150 /*--------------- adjustment by 16 bytes ---------------*/
3153 static void VG_REGPARM(2) mc_new_mem_stack_16_w_ECU(Addr new_SP
, UInt ecu
)
3155 UInt otag
= ecu
| MC_OKIND_STACK
;
3156 PROF_EVENT(MCPE_NEW_MEM_STACK_16
);
3157 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3158 /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
3159 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
3160 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+8, otag
);
3161 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3162 /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
3163 Hence do 4 at +0, 8 at +4, 4 at +12. */
3164 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
3165 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+4 , otag
);
3166 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+12, otag
);
3168 MC_(make_mem_undefined_w_otag
) ( -VG_STACK_REDZONE_SZB
+ new_SP
, 16, otag
);
3173 static void VG_REGPARM(1) mc_new_mem_stack_16(Addr new_SP
)
3175 PROF_EVENT(MCPE_NEW_MEM_STACK_16
);
3176 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3177 /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
3178 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
3179 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+8 );
3180 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3181 /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
3182 Hence do 4 at +0, 8 at +4, 4 at +12. */
3183 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
3184 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+4 );
3185 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+12 );
3187 make_mem_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
, 16 );
3192 static void VG_REGPARM(1) mc_die_mem_stack_16(Addr new_SP
)
3194 PROF_EVENT(MCPE_DIE_MEM_STACK_16
);
3195 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3196 /* Have 8-alignment at +0, hence do 8 at -16 and 8 at -8. */
3197 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-16 );
3198 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-8 );
3199 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3200 /* 8 alignment must be at -12. Do 4 at -16, 8 at -12, 4 at -4. */
3201 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-16 );
3202 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-12 );
3203 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-4 );
3205 MC_(make_mem_noaccess
) ( -VG_STACK_REDZONE_SZB
+ new_SP
-16, 16 );
3209 /*--------------- adjustment by 32 bytes ---------------*/
3212 static void VG_REGPARM(2) mc_new_mem_stack_32_w_ECU(Addr new_SP
, UInt ecu
)
3214 UInt otag
= ecu
| MC_OKIND_STACK
;
3215 PROF_EVENT(MCPE_NEW_MEM_STACK_32
);
3216 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3217 /* Straightforward */
3218 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
3219 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+8 , otag
);
3220 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+16, otag
);
3221 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+24, otag
);
3222 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3223 /* 8 alignment must be at +4. Hence do 8 at +4,+12,+20 and 4 at
3225 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
3226 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+4 , otag
);
3227 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+12, otag
);
3228 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+20, otag
);
3229 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+28, otag
);
3231 MC_(make_mem_undefined_w_otag
) ( -VG_STACK_REDZONE_SZB
+ new_SP
, 32, otag
);
3236 static void VG_REGPARM(1) mc_new_mem_stack_32(Addr new_SP
)
3238 PROF_EVENT(MCPE_NEW_MEM_STACK_32
);
3239 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3240 /* Straightforward */
3241 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
3242 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+8 );
3243 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+16 );
3244 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+24 );
3245 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3246 /* 8 alignment must be at +4. Hence do 8 at +4,+12,+20 and 4 at
3248 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
3249 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+4 );
3250 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+12 );
3251 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+20 );
3252 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+28 );
3254 make_mem_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
, 32 );
3259 static void VG_REGPARM(1) mc_die_mem_stack_32(Addr new_SP
)
3261 PROF_EVENT(MCPE_DIE_MEM_STACK_32
);
3262 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3263 /* Straightforward */
3264 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-32 );
3265 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-24 );
3266 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-16 );
3267 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
- 8 );
3268 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3269 /* 8 alignment must be at -4 etc. Hence do 8 at -12,-20,-28 and
3271 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-32 );
3272 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-28 );
3273 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-20 );
3274 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-12 );
3275 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-4 );
3277 MC_(make_mem_noaccess
) ( -VG_STACK_REDZONE_SZB
+ new_SP
-32, 32 );
3281 /*--------------- adjustment by 112 bytes ---------------*/
3284 static void VG_REGPARM(2) mc_new_mem_stack_112_w_ECU(Addr new_SP
, UInt ecu
)
3286 UInt otag
= ecu
| MC_OKIND_STACK
;
3287 PROF_EVENT(MCPE_NEW_MEM_STACK_112
);
3288 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3289 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
3290 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+8 , otag
);
3291 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+16, otag
);
3292 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+24, otag
);
3293 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+32, otag
);
3294 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+40, otag
);
3295 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+48, otag
);
3296 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+56, otag
);
3297 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+64, otag
);
3298 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+72, otag
);
3299 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+80, otag
);
3300 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+88, otag
);
3301 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+96, otag
);
3302 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+104, otag
);
3304 MC_(make_mem_undefined_w_otag
) ( -VG_STACK_REDZONE_SZB
+ new_SP
, 112, otag
);
3309 static void VG_REGPARM(1) mc_new_mem_stack_112(Addr new_SP
)
3311 PROF_EVENT(MCPE_NEW_MEM_STACK_112
);
3312 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3313 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
3314 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+8 );
3315 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+16 );
3316 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+24 );
3317 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+32 );
3318 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+40 );
3319 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+48 );
3320 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+56 );
3321 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+64 );
3322 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+72 );
3323 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+80 );
3324 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+88 );
3325 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+96 );
3326 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+104 );
3328 make_mem_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
, 112 );
3333 static void VG_REGPARM(1) mc_die_mem_stack_112(Addr new_SP
)
3335 PROF_EVENT(MCPE_DIE_MEM_STACK_112
);
3336 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3337 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-112);
3338 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-104);
3339 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-96 );
3340 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-88 );
3341 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-80 );
3342 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-72 );
3343 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-64 );
3344 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-56 );
3345 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-48 );
3346 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-40 );
3347 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-32 );
3348 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-24 );
3349 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-16 );
3350 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
- 8 );
3352 MC_(make_mem_noaccess
) ( -VG_STACK_REDZONE_SZB
+ new_SP
-112, 112 );
3356 /*--------------- adjustment by 128 bytes ---------------*/
3359 static void VG_REGPARM(2) mc_new_mem_stack_128_w_ECU(Addr new_SP
, UInt ecu
)
3361 UInt otag
= ecu
| MC_OKIND_STACK
;
3362 PROF_EVENT(MCPE_NEW_MEM_STACK_128
);
3363 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3364 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
3365 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+8 , otag
);
3366 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+16, otag
);
3367 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+24, otag
);
3368 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+32, otag
);
3369 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+40, otag
);
3370 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+48, otag
);
3371 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+56, otag
);
3372 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+64, otag
);
3373 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+72, otag
);
3374 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+80, otag
);
3375 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+88, otag
);
3376 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+96, otag
);
3377 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+104, otag
);
3378 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+112, otag
);
3379 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+120, otag
);
3381 MC_(make_mem_undefined_w_otag
) ( -VG_STACK_REDZONE_SZB
+ new_SP
, 128, otag
);
3386 static void VG_REGPARM(1) mc_new_mem_stack_128(Addr new_SP
)
3388 PROF_EVENT(MCPE_NEW_MEM_STACK_128
);
3389 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3390 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
3391 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+8 );
3392 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+16 );
3393 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+24 );
3394 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+32 );
3395 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+40 );
3396 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+48 );
3397 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+56 );
3398 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+64 );
3399 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+72 );
3400 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+80 );
3401 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+88 );
3402 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+96 );
3403 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+104 );
3404 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+112 );
3405 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+120 );
3407 make_mem_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
, 128 );
3412 static void VG_REGPARM(1) mc_die_mem_stack_128(Addr new_SP
)
3414 PROF_EVENT(MCPE_DIE_MEM_STACK_128
);
3415 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3416 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-128);
3417 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-120);
3418 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-112);
3419 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-104);
3420 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-96 );
3421 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-88 );
3422 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-80 );
3423 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-72 );
3424 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-64 );
3425 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-56 );
3426 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-48 );
3427 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-40 );
3428 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-32 );
3429 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-24 );
3430 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-16 );
3431 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
- 8 );
3433 MC_(make_mem_noaccess
) ( -VG_STACK_REDZONE_SZB
+ new_SP
-128, 128 );
3437 /*--------------- adjustment by 144 bytes ---------------*/
3440 static void VG_REGPARM(2) mc_new_mem_stack_144_w_ECU(Addr new_SP
, UInt ecu
)
3442 UInt otag
= ecu
| MC_OKIND_STACK
;
3443 PROF_EVENT(MCPE_NEW_MEM_STACK_144
);
3444 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3445 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
3446 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+8, otag
);
3447 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+16, otag
);
3448 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+24, otag
);
3449 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+32, otag
);
3450 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+40, otag
);
3451 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+48, otag
);
3452 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+56, otag
);
3453 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+64, otag
);
3454 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+72, otag
);
3455 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+80, otag
);
3456 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+88, otag
);
3457 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+96, otag
);
3458 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+104, otag
);
3459 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+112, otag
);
3460 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+120, otag
);
3461 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+128, otag
);
3462 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+136, otag
);
3464 MC_(make_mem_undefined_w_otag
) ( -VG_STACK_REDZONE_SZB
+ new_SP
, 144, otag
);
3469 static void VG_REGPARM(1) mc_new_mem_stack_144(Addr new_SP
)
3471 PROF_EVENT(MCPE_NEW_MEM_STACK_144
);
3472 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3473 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
3474 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+8 );
3475 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+16 );
3476 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+24 );
3477 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+32 );
3478 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+40 );
3479 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+48 );
3480 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+56 );
3481 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+64 );
3482 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+72 );
3483 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+80 );
3484 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+88 );
3485 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+96 );
3486 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+104 );
3487 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+112 );
3488 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+120 );
3489 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+128 );
3490 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+136 );
3492 make_mem_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
, 144 );
3497 static void VG_REGPARM(1) mc_die_mem_stack_144(Addr new_SP
)
3499 PROF_EVENT(MCPE_DIE_MEM_STACK_144
);
3500 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3501 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-144);
3502 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-136);
3503 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-128);
3504 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-120);
3505 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-112);
3506 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-104);
3507 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-96 );
3508 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-88 );
3509 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-80 );
3510 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-72 );
3511 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-64 );
3512 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-56 );
3513 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-48 );
3514 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-40 );
3515 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-32 );
3516 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-24 );
3517 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-16 );
3518 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
- 8 );
3520 MC_(make_mem_noaccess
) ( -VG_STACK_REDZONE_SZB
+ new_SP
-144, 144 );
3524 /*--------------- adjustment by 160 bytes ---------------*/
3527 static void VG_REGPARM(2) mc_new_mem_stack_160_w_ECU(Addr new_SP
, UInt ecu
)
3529 UInt otag
= ecu
| MC_OKIND_STACK
;
3530 PROF_EVENT(MCPE_NEW_MEM_STACK_160
);
3531 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3532 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
3533 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+8, otag
);
3534 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+16, otag
);
3535 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+24, otag
);
3536 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+32, otag
);
3537 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+40, otag
);
3538 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+48, otag
);
3539 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+56, otag
);
3540 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+64, otag
);
3541 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+72, otag
);
3542 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+80, otag
);
3543 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+88, otag
);
3544 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+96, otag
);
3545 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+104, otag
);
3546 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+112, otag
);
3547 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+120, otag
);
3548 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+128, otag
);
3549 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+136, otag
);
3550 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+144, otag
);
3551 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+152, otag
);
3553 MC_(make_mem_undefined_w_otag
) ( -VG_STACK_REDZONE_SZB
+ new_SP
, 160, otag
);
3558 static void VG_REGPARM(1) mc_new_mem_stack_160(Addr new_SP
)
3560 PROF_EVENT(MCPE_NEW_MEM_STACK_160
);
3561 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3562 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
3563 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+8 );
3564 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+16 );
3565 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+24 );
3566 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+32 );
3567 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+40 );
3568 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+48 );
3569 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+56 );
3570 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+64 );
3571 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+72 );
3572 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+80 );
3573 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+88 );
3574 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+96 );
3575 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+104 );
3576 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+112 );
3577 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+120 );
3578 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+128 );
3579 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+136 );
3580 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+144 );
3581 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+152 );
3583 make_mem_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
, 160 );
3588 static void VG_REGPARM(1) mc_die_mem_stack_160(Addr new_SP
)
3590 PROF_EVENT(MCPE_DIE_MEM_STACK_160
);
3591 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3592 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-160);
3593 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-152);
3594 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-144);
3595 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-136);
3596 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-128);
3597 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-120);
3598 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-112);
3599 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-104);
3600 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-96 );
3601 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-88 );
3602 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-80 );
3603 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-72 );
3604 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-64 );
3605 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-56 );
3606 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-48 );
3607 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-40 );
3608 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-32 );
3609 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-24 );
3610 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-16 );
3611 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
- 8 );
3613 MC_(make_mem_noaccess
) ( -VG_STACK_REDZONE_SZB
+ new_SP
-160, 160 );
3617 /*--------------- adjustment by N bytes ---------------*/
3619 static void mc_new_mem_stack_w_ECU ( Addr a
, SizeT len
, UInt ecu
)
3621 UInt otag
= ecu
| MC_OKIND_STACK
;
3622 PROF_EVENT(MCPE_NEW_MEM_STACK
);
3623 MC_(make_mem_undefined_w_otag
) ( -VG_STACK_REDZONE_SZB
+ a
, len
, otag
);
3626 static void mc_new_mem_stack ( Addr a
, SizeT len
)
3628 PROF_EVENT(MCPE_NEW_MEM_STACK
);
3629 make_mem_undefined ( -VG_STACK_REDZONE_SZB
+ a
, len
);
3632 static void mc_die_mem_stack ( Addr a
, SizeT len
)
3634 PROF_EVENT(MCPE_DIE_MEM_STACK
);
3635 MC_(make_mem_noaccess
) ( -VG_STACK_REDZONE_SZB
+ a
, len
);
3639 /* The AMD64 ABI says:
3641 "The 128-byte area beyond the location pointed to by %rsp is considered
3642 to be reserved and shall not be modified by signal or interrupt
3643 handlers. Therefore, functions may use this area for temporary data
3644 that is not needed across function calls. In particular, leaf functions
3645 may use this area for their entire stack frame, rather than adjusting
3646 the stack pointer in the prologue and epilogue. This area is known as
3649 So after any call or return we need to mark this redzone as containing
3652 Consider this: we're in function f. f calls g. g moves rsp down
3653 modestly (say 16 bytes) and writes stuff all over the red zone, making it
3654 defined. g returns. f is buggy and reads from parts of the red zone
3655 that it didn't write on. But because g filled that area in, f is going
3656 to be picking up defined V bits and so any errors from reading bits of
3657 the red zone it didn't write, will be missed. The only solution I could
3658 think of was to make the red zone undefined when g returns to f.
3660 This is in accordance with the ABI, which makes it clear the redzone
3661 is volatile across function calls.
3663 The problem occurs the other way round too: f could fill the RZ up
3664 with defined values and g could mistakenly read them. So the RZ
3665 also needs to be nuked on function calls.
3669 /* Here's a simple cache to hold nia -> ECU mappings. It could be
3670 improved so as to have a lower miss rate. */
3672 static UWord stats__nia_cache_queries
= 0;
3673 static UWord stats__nia_cache_misses
= 0;
3676 struct { UWord nia0
; UWord ecu0
; /* nia0 maps to ecu0 */
3677 UWord nia1
; UWord ecu1
; } /* nia1 maps to ecu1 */
3680 #define N_NIA_TO_ECU_CACHE 511
3682 static WCacheEnt nia_to_ecu_cache
[N_NIA_TO_ECU_CACHE
];
3684 static void init_nia_to_ecu_cache ( void )
3688 ExeContext
* zero_ec
;
3690 /* Fill all the slots with an entry for address zero, and the
3691 relevant otags accordingly. Hence the cache is initially filled
3693 zero_ec
= VG_(make_depth_1_ExeContext_from_Addr
)(zero_addr
);
3695 zero_ecu
= VG_(get_ECU_from_ExeContext
)(zero_ec
);
3696 tl_assert(VG_(is_plausible_ECU
)(zero_ecu
));
3697 for (i
= 0; i
< N_NIA_TO_ECU_CACHE
; i
++) {
3698 nia_to_ecu_cache
[i
].nia0
= zero_addr
;
3699 nia_to_ecu_cache
[i
].ecu0
= zero_ecu
;
3700 nia_to_ecu_cache
[i
].nia1
= zero_addr
;
3701 nia_to_ecu_cache
[i
].ecu1
= zero_ecu
;
3705 static inline UInt
convert_nia_to_ecu ( Addr nia
)
3711 tl_assert( sizeof(nia_to_ecu_cache
[0].nia1
) == sizeof(nia
) );
3713 stats__nia_cache_queries
++;
3714 i
= nia
% N_NIA_TO_ECU_CACHE
;
3715 tl_assert(i
>= 0 && i
< N_NIA_TO_ECU_CACHE
);
3717 if (LIKELY( nia_to_ecu_cache
[i
].nia0
== nia
))
3718 return nia_to_ecu_cache
[i
].ecu0
;
3720 if (LIKELY( nia_to_ecu_cache
[i
].nia1
== nia
)) {
3721 # define SWAP(_w1,_w2) { UWord _t = _w1; _w1 = _w2; _w2 = _t; }
3722 SWAP( nia_to_ecu_cache
[i
].nia0
, nia_to_ecu_cache
[i
].nia1
);
3723 SWAP( nia_to_ecu_cache
[i
].ecu0
, nia_to_ecu_cache
[i
].ecu1
);
3725 return nia_to_ecu_cache
[i
].ecu0
;
3728 stats__nia_cache_misses
++;
3729 ec
= VG_(make_depth_1_ExeContext_from_Addr
)(nia
);
3731 ecu
= VG_(get_ECU_from_ExeContext
)(ec
);
3732 tl_assert(VG_(is_plausible_ECU
)(ecu
));
3734 nia_to_ecu_cache
[i
].nia1
= nia_to_ecu_cache
[i
].nia0
;
3735 nia_to_ecu_cache
[i
].ecu1
= nia_to_ecu_cache
[i
].ecu0
;
3737 nia_to_ecu_cache
[i
].nia0
= nia
;
3738 nia_to_ecu_cache
[i
].ecu0
= (UWord
)ecu
;
3743 /* This marks the stack as addressible but undefined, after a call or
3744 return for a target that has an ABI defined stack redzone. It
3745 happens quite a lot and needs to be fast. This is the version for
3746 origin tracking. The non-origin-tracking version is below. */
3748 void MC_(helperc_MAKE_STACK_UNINIT_w_o
) ( Addr base
, UWord len
, Addr nia
)
3750 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_W_O
);
3752 VG_(printf
)("helperc_MAKE_STACK_UNINIT_w_o (%#lx,%lu,nia=%#lx)\n",
3755 UInt ecu
= convert_nia_to_ecu ( nia
);
3756 tl_assert(VG_(is_plausible_ECU
)(ecu
));
3758 UInt otag
= ecu
| MC_OKIND_STACK
;
3761 /* Slow(ish) version, which is fairly easily seen to be correct.
3763 if (LIKELY( VG_IS_8_ALIGNED(base
) && len
==128 )) {
3764 make_aligned_word64_undefined_w_otag(base
+ 0, otag
);
3765 make_aligned_word64_undefined_w_otag(base
+ 8, otag
);
3766 make_aligned_word64_undefined_w_otag(base
+ 16, otag
);
3767 make_aligned_word64_undefined_w_otag(base
+ 24, otag
);
3769 make_aligned_word64_undefined_w_otag(base
+ 32, otag
);
3770 make_aligned_word64_undefined_w_otag(base
+ 40, otag
);
3771 make_aligned_word64_undefined_w_otag(base
+ 48, otag
);
3772 make_aligned_word64_undefined_w_otag(base
+ 56, otag
);
3774 make_aligned_word64_undefined_w_otag(base
+ 64, otag
);
3775 make_aligned_word64_undefined_w_otag(base
+ 72, otag
);
3776 make_aligned_word64_undefined_w_otag(base
+ 80, otag
);
3777 make_aligned_word64_undefined_w_otag(base
+ 88, otag
);
3779 make_aligned_word64_undefined_w_otag(base
+ 96, otag
);
3780 make_aligned_word64_undefined_w_otag(base
+ 104, otag
);
3781 make_aligned_word64_undefined_w_otag(base
+ 112, otag
);
3782 make_aligned_word64_undefined_w_otag(base
+ 120, otag
);
3784 MC_(make_mem_undefined_w_otag
)(base
, len
, otag
);
3788 /* Idea is: go fast when
3789 * 8-aligned and length is 128
3790 * the sm is available in the main primary map
3791 * the address range falls entirely with a single secondary map
3792 If all those conditions hold, just update the V+A bits by writing
3793 directly into the vabits array. (If the sm was distinguished, this
3794 will make a copy and then write to it.)
3796 if (LIKELY( len
== 128 && VG_IS_8_ALIGNED(base
) )) {
3797 /* Now we know the address range is suitably sized and aligned. */
3798 UWord a_lo
= (UWord
)(base
);
3799 UWord a_hi
= (UWord
)(base
+ 128 - 1);
3800 tl_assert(a_lo
< a_hi
); // paranoia: detect overflow
3801 if (LIKELY(a_hi
<= MAX_PRIMARY_ADDRESS
)) {
3802 /* Now we know the entire range is within the main primary map. */
3803 UWord pm_off_lo
= get_primary_map_low_offset(a_lo
);
3804 UWord pm_off_hi
= get_primary_map_low_offset(a_hi
);
3805 if (LIKELY(pm_off_lo
== pm_off_hi
)) {
3806 /* Now we know that the entire address range falls within a
3807 single secondary map, and that that secondary 'lives' in
3808 the main primary map. */
3809 SecMap
* sm
= get_secmap_for_writing_low(a_lo
);
3810 UWord v_off16
= SM_OFF_16(a_lo
);
3811 UShort
* p
= &sm
->vabits16
[v_off16
];
3812 p
[ 0] = VA_BITS16_UNDEFINED
;
3813 p
[ 1] = VA_BITS16_UNDEFINED
;
3814 p
[ 2] = VA_BITS16_UNDEFINED
;
3815 p
[ 3] = VA_BITS16_UNDEFINED
;
3816 p
[ 4] = VA_BITS16_UNDEFINED
;
3817 p
[ 5] = VA_BITS16_UNDEFINED
;
3818 p
[ 6] = VA_BITS16_UNDEFINED
;
3819 p
[ 7] = VA_BITS16_UNDEFINED
;
3820 p
[ 8] = VA_BITS16_UNDEFINED
;
3821 p
[ 9] = VA_BITS16_UNDEFINED
;
3822 p
[10] = VA_BITS16_UNDEFINED
;
3823 p
[11] = VA_BITS16_UNDEFINED
;
3824 p
[12] = VA_BITS16_UNDEFINED
;
3825 p
[13] = VA_BITS16_UNDEFINED
;
3826 p
[14] = VA_BITS16_UNDEFINED
;
3827 p
[15] = VA_BITS16_UNDEFINED
;
3828 set_aligned_word64_Origin_to_undef( base
+ 8 * 0, otag
);
3829 set_aligned_word64_Origin_to_undef( base
+ 8 * 1, otag
);
3830 set_aligned_word64_Origin_to_undef( base
+ 8 * 2, otag
);
3831 set_aligned_word64_Origin_to_undef( base
+ 8 * 3, otag
);
3832 set_aligned_word64_Origin_to_undef( base
+ 8 * 4, otag
);
3833 set_aligned_word64_Origin_to_undef( base
+ 8 * 5, otag
);
3834 set_aligned_word64_Origin_to_undef( base
+ 8 * 6, otag
);
3835 set_aligned_word64_Origin_to_undef( base
+ 8 * 7, otag
);
3836 set_aligned_word64_Origin_to_undef( base
+ 8 * 8, otag
);
3837 set_aligned_word64_Origin_to_undef( base
+ 8 * 9, otag
);
3838 set_aligned_word64_Origin_to_undef( base
+ 8 * 10, otag
);
3839 set_aligned_word64_Origin_to_undef( base
+ 8 * 11, otag
);
3840 set_aligned_word64_Origin_to_undef( base
+ 8 * 12, otag
);
3841 set_aligned_word64_Origin_to_undef( base
+ 8 * 13, otag
);
3842 set_aligned_word64_Origin_to_undef( base
+ 8 * 14, otag
);
3843 set_aligned_word64_Origin_to_undef( base
+ 8 * 15, otag
);
3849 /* 288 bytes (36 ULongs) is the magic value for ELF ppc64. */
3850 if (LIKELY( len
== 288 && VG_IS_8_ALIGNED(base
) )) {
3851 /* Now we know the address range is suitably sized and aligned. */
3852 UWord a_lo
= (UWord
)(base
);
3853 UWord a_hi
= (UWord
)(base
+ 288 - 1);
3854 tl_assert(a_lo
< a_hi
); // paranoia: detect overflow
3855 if (a_hi
<= MAX_PRIMARY_ADDRESS
) {
3856 UWord pm_off_lo
= get_primary_map_low_offset(a_lo
);
3857 UWord pm_off_hi
= get_primary_map_low_offset(a_hi
);
3858 if (LIKELY(pm_off_lo
== pm_off_hi
)) {
3859 /* Now we know that the entire address range falls within a
3860 single secondary map, and that that secondary 'lives' in
3861 the main primary map. */
3862 SecMap
* sm
= get_secmap_for_writing_low(a_lo
);
3863 UWord v_off16
= SM_OFF_16(a_lo
);
3864 UShort
* p
= &sm
->vabits16
[v_off16
];
3865 p
[ 0] = VA_BITS16_UNDEFINED
;
3866 p
[ 1] = VA_BITS16_UNDEFINED
;
3867 p
[ 2] = VA_BITS16_UNDEFINED
;
3868 p
[ 3] = VA_BITS16_UNDEFINED
;
3869 p
[ 4] = VA_BITS16_UNDEFINED
;
3870 p
[ 5] = VA_BITS16_UNDEFINED
;
3871 p
[ 6] = VA_BITS16_UNDEFINED
;
3872 p
[ 7] = VA_BITS16_UNDEFINED
;
3873 p
[ 8] = VA_BITS16_UNDEFINED
;
3874 p
[ 9] = VA_BITS16_UNDEFINED
;
3875 p
[10] = VA_BITS16_UNDEFINED
;
3876 p
[11] = VA_BITS16_UNDEFINED
;
3877 p
[12] = VA_BITS16_UNDEFINED
;
3878 p
[13] = VA_BITS16_UNDEFINED
;
3879 p
[14] = VA_BITS16_UNDEFINED
;
3880 p
[15] = VA_BITS16_UNDEFINED
;
3881 p
[16] = VA_BITS16_UNDEFINED
;
3882 p
[17] = VA_BITS16_UNDEFINED
;
3883 p
[18] = VA_BITS16_UNDEFINED
;
3884 p
[19] = VA_BITS16_UNDEFINED
;
3885 p
[20] = VA_BITS16_UNDEFINED
;
3886 p
[21] = VA_BITS16_UNDEFINED
;
3887 p
[22] = VA_BITS16_UNDEFINED
;
3888 p
[23] = VA_BITS16_UNDEFINED
;
3889 p
[24] = VA_BITS16_UNDEFINED
;
3890 p
[25] = VA_BITS16_UNDEFINED
;
3891 p
[26] = VA_BITS16_UNDEFINED
;
3892 p
[27] = VA_BITS16_UNDEFINED
;
3893 p
[28] = VA_BITS16_UNDEFINED
;
3894 p
[29] = VA_BITS16_UNDEFINED
;
3895 p
[30] = VA_BITS16_UNDEFINED
;
3896 p
[31] = VA_BITS16_UNDEFINED
;
3897 p
[32] = VA_BITS16_UNDEFINED
;
3898 p
[33] = VA_BITS16_UNDEFINED
;
3899 p
[34] = VA_BITS16_UNDEFINED
;
3900 p
[35] = VA_BITS16_UNDEFINED
;
3901 set_aligned_word64_Origin_to_undef( base
+ 8 * 0, otag
);
3902 set_aligned_word64_Origin_to_undef( base
+ 8 * 1, otag
);
3903 set_aligned_word64_Origin_to_undef( base
+ 8 * 2, otag
);
3904 set_aligned_word64_Origin_to_undef( base
+ 8 * 3, otag
);
3905 set_aligned_word64_Origin_to_undef( base
+ 8 * 4, otag
);
3906 set_aligned_word64_Origin_to_undef( base
+ 8 * 5, otag
);
3907 set_aligned_word64_Origin_to_undef( base
+ 8 * 6, otag
);
3908 set_aligned_word64_Origin_to_undef( base
+ 8 * 7, otag
);
3909 set_aligned_word64_Origin_to_undef( base
+ 8 * 8, otag
);
3910 set_aligned_word64_Origin_to_undef( base
+ 8 * 9, otag
);
3911 set_aligned_word64_Origin_to_undef( base
+ 8 * 10, otag
);
3912 set_aligned_word64_Origin_to_undef( base
+ 8 * 11, otag
);
3913 set_aligned_word64_Origin_to_undef( base
+ 8 * 12, otag
);
3914 set_aligned_word64_Origin_to_undef( base
+ 8 * 13, otag
);
3915 set_aligned_word64_Origin_to_undef( base
+ 8 * 14, otag
);
3916 set_aligned_word64_Origin_to_undef( base
+ 8 * 15, otag
);
3917 set_aligned_word64_Origin_to_undef( base
+ 8 * 16, otag
);
3918 set_aligned_word64_Origin_to_undef( base
+ 8 * 17, otag
);
3919 set_aligned_word64_Origin_to_undef( base
+ 8 * 18, otag
);
3920 set_aligned_word64_Origin_to_undef( base
+ 8 * 19, otag
);
3921 set_aligned_word64_Origin_to_undef( base
+ 8 * 20, otag
);
3922 set_aligned_word64_Origin_to_undef( base
+ 8 * 21, otag
);
3923 set_aligned_word64_Origin_to_undef( base
+ 8 * 22, otag
);
3924 set_aligned_word64_Origin_to_undef( base
+ 8 * 23, otag
);
3925 set_aligned_word64_Origin_to_undef( base
+ 8 * 24, otag
);
3926 set_aligned_word64_Origin_to_undef( base
+ 8 * 25, otag
);
3927 set_aligned_word64_Origin_to_undef( base
+ 8 * 26, otag
);
3928 set_aligned_word64_Origin_to_undef( base
+ 8 * 27, otag
);
3929 set_aligned_word64_Origin_to_undef( base
+ 8 * 28, otag
);
3930 set_aligned_word64_Origin_to_undef( base
+ 8 * 29, otag
);
3931 set_aligned_word64_Origin_to_undef( base
+ 8 * 30, otag
);
3932 set_aligned_word64_Origin_to_undef( base
+ 8 * 31, otag
);
3933 set_aligned_word64_Origin_to_undef( base
+ 8 * 32, otag
);
3934 set_aligned_word64_Origin_to_undef( base
+ 8 * 33, otag
);
3935 set_aligned_word64_Origin_to_undef( base
+ 8 * 34, otag
);
3936 set_aligned_word64_Origin_to_undef( base
+ 8 * 35, otag
);
3942 /* else fall into slow case */
3943 MC_(make_mem_undefined_w_otag
)(base
, len
, otag
);
3947 /* This is a version of MC_(helperc_MAKE_STACK_UNINIT_w_o) that is
3948 specialised for the non-origin-tracking case. */
3950 void MC_(helperc_MAKE_STACK_UNINIT_no_o
) ( Addr base
, UWord len
)
3952 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_NO_O
);
3954 VG_(printf
)("helperc_MAKE_STACK_UNINIT_no_o (%#lx,%lu)\n",
3958 /* Slow(ish) version, which is fairly easily seen to be correct.
3960 if (LIKELY( VG_IS_8_ALIGNED(base
) && len
==128 )) {
3961 make_aligned_word64_undefined(base
+ 0);
3962 make_aligned_word64_undefined(base
+ 8);
3963 make_aligned_word64_undefined(base
+ 16);
3964 make_aligned_word64_undefined(base
+ 24);
3966 make_aligned_word64_undefined(base
+ 32);
3967 make_aligned_word64_undefined(base
+ 40);
3968 make_aligned_word64_undefined(base
+ 48);
3969 make_aligned_word64_undefined(base
+ 56);
3971 make_aligned_word64_undefined(base
+ 64);
3972 make_aligned_word64_undefined(base
+ 72);
3973 make_aligned_word64_undefined(base
+ 80);
3974 make_aligned_word64_undefined(base
+ 88);
3976 make_aligned_word64_undefined(base
+ 96);
3977 make_aligned_word64_undefined(base
+ 104);
3978 make_aligned_word64_undefined(base
+ 112);
3979 make_aligned_word64_undefined(base
+ 120);
3981 make_mem_undefined(base
, len
);
3985 /* Idea is: go fast when
3986 * 8-aligned and length is 128
3987 * the sm is available in the main primary map
3988 * the address range falls entirely with a single secondary map
3989 If all those conditions hold, just update the V+A bits by writing
3990 directly into the vabits array. (If the sm was distinguished, this
3991 will make a copy and then write to it.)
3993 if (LIKELY( len
== 128 && VG_IS_8_ALIGNED(base
) )) {
3994 /* Now we know the address range is suitably sized and aligned. */
3995 UWord a_lo
= (UWord
)(base
);
3996 UWord a_hi
= (UWord
)(base
+ 128 - 1);
3997 tl_assert(a_lo
< a_hi
); // paranoia: detect overflow
3998 if (LIKELY(a_hi
<= MAX_PRIMARY_ADDRESS
)) {
3999 /* Now we know the entire range is within the main primary map. */
4000 UWord pm_off_lo
= get_primary_map_low_offset(a_lo
);
4001 UWord pm_off_hi
= get_primary_map_low_offset(a_hi
);
4002 if (LIKELY(pm_off_lo
== pm_off_hi
)) {
4003 /* Now we know that the entire address range falls within a
4004 single secondary map, and that that secondary 'lives' in
4005 the main primary map. */
4006 SecMap
* sm
= get_secmap_for_writing_low(a_lo
);
4007 UWord v_off16
= SM_OFF_16(a_lo
);
4008 UShort
* p
= &sm
->vabits16
[v_off16
];
4009 p
[ 0] = VA_BITS16_UNDEFINED
;
4010 p
[ 1] = VA_BITS16_UNDEFINED
;
4011 p
[ 2] = VA_BITS16_UNDEFINED
;
4012 p
[ 3] = VA_BITS16_UNDEFINED
;
4013 p
[ 4] = VA_BITS16_UNDEFINED
;
4014 p
[ 5] = VA_BITS16_UNDEFINED
;
4015 p
[ 6] = VA_BITS16_UNDEFINED
;
4016 p
[ 7] = VA_BITS16_UNDEFINED
;
4017 p
[ 8] = VA_BITS16_UNDEFINED
;
4018 p
[ 9] = VA_BITS16_UNDEFINED
;
4019 p
[10] = VA_BITS16_UNDEFINED
;
4020 p
[11] = VA_BITS16_UNDEFINED
;
4021 p
[12] = VA_BITS16_UNDEFINED
;
4022 p
[13] = VA_BITS16_UNDEFINED
;
4023 p
[14] = VA_BITS16_UNDEFINED
;
4024 p
[15] = VA_BITS16_UNDEFINED
;
4030 /* 288 bytes (36 ULongs) is the magic value for ELF ppc64. */
4031 if (LIKELY( len
== 288 && VG_IS_8_ALIGNED(base
) )) {
4032 /* Now we know the address range is suitably sized and aligned. */
4033 UWord a_lo
= (UWord
)(base
);
4034 UWord a_hi
= (UWord
)(base
+ 288 - 1);
4035 tl_assert(a_lo
< a_hi
); // paranoia: detect overflow
4036 if (a_hi
<= MAX_PRIMARY_ADDRESS
) {
4037 UWord pm_off_lo
= get_primary_map_low_offset(a_lo
);
4038 UWord pm_off_hi
= get_primary_map_low_offset(a_hi
);
4039 if (LIKELY(pm_off_lo
== pm_off_hi
)) {
4040 /* Now we know that the entire address range falls within a
4041 single secondary map, and that that secondary 'lives' in
4042 the main primary map. */
4043 SecMap
* sm
= get_secmap_for_writing_low(a_lo
);
4044 UWord v_off16
= SM_OFF_16(a_lo
);
4045 UShort
* p
= &sm
->vabits16
[v_off16
];
4046 p
[ 0] = VA_BITS16_UNDEFINED
;
4047 p
[ 1] = VA_BITS16_UNDEFINED
;
4048 p
[ 2] = VA_BITS16_UNDEFINED
;
4049 p
[ 3] = VA_BITS16_UNDEFINED
;
4050 p
[ 4] = VA_BITS16_UNDEFINED
;
4051 p
[ 5] = VA_BITS16_UNDEFINED
;
4052 p
[ 6] = VA_BITS16_UNDEFINED
;
4053 p
[ 7] = VA_BITS16_UNDEFINED
;
4054 p
[ 8] = VA_BITS16_UNDEFINED
;
4055 p
[ 9] = VA_BITS16_UNDEFINED
;
4056 p
[10] = VA_BITS16_UNDEFINED
;
4057 p
[11] = VA_BITS16_UNDEFINED
;
4058 p
[12] = VA_BITS16_UNDEFINED
;
4059 p
[13] = VA_BITS16_UNDEFINED
;
4060 p
[14] = VA_BITS16_UNDEFINED
;
4061 p
[15] = VA_BITS16_UNDEFINED
;
4062 p
[16] = VA_BITS16_UNDEFINED
;
4063 p
[17] = VA_BITS16_UNDEFINED
;
4064 p
[18] = VA_BITS16_UNDEFINED
;
4065 p
[19] = VA_BITS16_UNDEFINED
;
4066 p
[20] = VA_BITS16_UNDEFINED
;
4067 p
[21] = VA_BITS16_UNDEFINED
;
4068 p
[22] = VA_BITS16_UNDEFINED
;
4069 p
[23] = VA_BITS16_UNDEFINED
;
4070 p
[24] = VA_BITS16_UNDEFINED
;
4071 p
[25] = VA_BITS16_UNDEFINED
;
4072 p
[26] = VA_BITS16_UNDEFINED
;
4073 p
[27] = VA_BITS16_UNDEFINED
;
4074 p
[28] = VA_BITS16_UNDEFINED
;
4075 p
[29] = VA_BITS16_UNDEFINED
;
4076 p
[30] = VA_BITS16_UNDEFINED
;
4077 p
[31] = VA_BITS16_UNDEFINED
;
4078 p
[32] = VA_BITS16_UNDEFINED
;
4079 p
[33] = VA_BITS16_UNDEFINED
;
4080 p
[34] = VA_BITS16_UNDEFINED
;
4081 p
[35] = VA_BITS16_UNDEFINED
;
4087 /* else fall into slow case */
4088 make_mem_undefined(base
, len
);
4092 /* And this is an even more specialised case, for the case where there
4093 is no origin tracking, and the length is 128. */
4095 void MC_(helperc_MAKE_STACK_UNINIT_128_no_o
) ( Addr base
)
4097 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O
);
4099 VG_(printf
)("helperc_MAKE_STACK_UNINIT_128_no_o (%#lx)\n", base
);
4102 /* Slow(ish) version, which is fairly easily seen to be correct.
4104 if (LIKELY( VG_IS_8_ALIGNED(base
) )) {
4105 make_aligned_word64_undefined(base
+ 0);
4106 make_aligned_word64_undefined(base
+ 8);
4107 make_aligned_word64_undefined(base
+ 16);
4108 make_aligned_word64_undefined(base
+ 24);
4110 make_aligned_word64_undefined(base
+ 32);
4111 make_aligned_word64_undefined(base
+ 40);
4112 make_aligned_word64_undefined(base
+ 48);
4113 make_aligned_word64_undefined(base
+ 56);
4115 make_aligned_word64_undefined(base
+ 64);
4116 make_aligned_word64_undefined(base
+ 72);
4117 make_aligned_word64_undefined(base
+ 80);
4118 make_aligned_word64_undefined(base
+ 88);
4120 make_aligned_word64_undefined(base
+ 96);
4121 make_aligned_word64_undefined(base
+ 104);
4122 make_aligned_word64_undefined(base
+ 112);
4123 make_aligned_word64_undefined(base
+ 120);
4125 make_mem_undefined(base
, 128);
4129 /* Idea is: go fast when
4130 * 16-aligned and length is 128
4131 * the sm is available in the main primary map
4132 * the address range falls entirely with a single secondary map
4133 If all those conditions hold, just update the V+A bits by writing
4134 directly into the vabits array. (If the sm was distinguished, this
4135 will make a copy and then write to it.)
4137 Typically this applies to amd64 'ret' instructions, since RSP is
4138 16-aligned (0 % 16) after the instruction (per the amd64-ELF ABI).
4140 if (LIKELY( VG_IS_16_ALIGNED(base
) )) {
4141 /* Now we know the address range is suitably sized and aligned. */
4142 UWord a_lo
= (UWord
)(base
);
4143 UWord a_hi
= (UWord
)(base
+ 128 - 1);
4144 /* FIXME: come up with a sane story on the wraparound case
4145 (which of course cnanot happen, but still..) */
4146 /* tl_assert(a_lo < a_hi); */ // paranoia: detect overflow
4147 if (LIKELY(a_hi
<= MAX_PRIMARY_ADDRESS
)) {
4148 /* Now we know the entire range is within the main primary map. */
4149 UWord pm_off_lo
= get_primary_map_low_offset(a_lo
);
4150 UWord pm_off_hi
= get_primary_map_low_offset(a_hi
);
4151 if (LIKELY(pm_off_lo
== pm_off_hi
)) {
4152 /* Now we know that the entire address range falls within a
4153 single secondary map, and that that secondary 'lives' in
4154 the main primary map. */
4155 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_16
);
4156 SecMap
* sm
= get_secmap_for_writing_low(a_lo
);
4157 UWord v_off
= SM_OFF(a_lo
);
4158 UInt
* w32
= ASSUME_ALIGNED(UInt
*, &sm
->vabits8
[v_off
]);
4159 w32
[ 0] = VA_BITS32_UNDEFINED
;
4160 w32
[ 1] = VA_BITS32_UNDEFINED
;
4161 w32
[ 2] = VA_BITS32_UNDEFINED
;
4162 w32
[ 3] = VA_BITS32_UNDEFINED
;
4163 w32
[ 4] = VA_BITS32_UNDEFINED
;
4164 w32
[ 5] = VA_BITS32_UNDEFINED
;
4165 w32
[ 6] = VA_BITS32_UNDEFINED
;
4166 w32
[ 7] = VA_BITS32_UNDEFINED
;
4172 /* The same, but for when base is 8 % 16, which is the situation
4173 with RSP for amd64-ELF immediately after call instructions.
4175 if (LIKELY( VG_IS_16_ALIGNED(base
+8) )) { // restricts to 8 aligned
4176 /* Now we know the address range is suitably sized and aligned. */
4177 UWord a_lo
= (UWord
)(base
);
4178 UWord a_hi
= (UWord
)(base
+ 128 - 1);
4179 /* FIXME: come up with a sane story on the wraparound case
4180 (which of course cnanot happen, but still..) */
4181 /* tl_assert(a_lo < a_hi); */ // paranoia: detect overflow
4182 if (LIKELY(a_hi
<= MAX_PRIMARY_ADDRESS
)) {
4183 /* Now we know the entire range is within the main primary map. */
4184 UWord pm_off_lo
= get_primary_map_low_offset(a_lo
);
4185 UWord pm_off_hi
= get_primary_map_low_offset(a_hi
);
4186 if (LIKELY(pm_off_lo
== pm_off_hi
)) {
4187 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_8
);
4188 /* Now we know that the entire address range falls within a
4189 single secondary map, and that that secondary 'lives' in
4190 the main primary map. */
4191 SecMap
* sm
= get_secmap_for_writing_low(a_lo
);
4192 UWord v_off16
= SM_OFF_16(a_lo
);
4193 UShort
* w16
= &sm
->vabits16
[v_off16
];
4194 UInt
* w32
= ASSUME_ALIGNED(UInt
*, &w16
[1]);
4195 /* The following assertion is commented out for obvious
4196 performance reasons, but was verified as valid when
4197 running the entire testsuite and also Firefox. */
4198 /* tl_assert(VG_IS_4_ALIGNED(w32)); */
4199 w16
[ 0] = VA_BITS16_UNDEFINED
; // w16[0]
4200 w32
[ 0] = VA_BITS32_UNDEFINED
; // w16[1,2]
4201 w32
[ 1] = VA_BITS32_UNDEFINED
; // w16[3,4]
4202 w32
[ 2] = VA_BITS32_UNDEFINED
; // w16[5,6]
4203 w32
[ 3] = VA_BITS32_UNDEFINED
; // w16[7,8]
4204 w32
[ 4] = VA_BITS32_UNDEFINED
; // w16[9,10]
4205 w32
[ 5] = VA_BITS32_UNDEFINED
; // w16[11,12]
4206 w32
[ 6] = VA_BITS32_UNDEFINED
; // w16[13,14]
4207 w16
[15] = VA_BITS16_UNDEFINED
; // w16[15]
4213 /* else fall into slow case */
4214 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O_SLOWCASE
);
4215 make_mem_undefined(base
, 128);
4219 /*------------------------------------------------------------*/
4220 /*--- Checking memory ---*/
4221 /*------------------------------------------------------------*/
4232 /* Check permissions for address range. If inadequate permissions
4233 exist, *bad_addr is set to the offending address, so the caller can
4236 /* Returns True if [a .. a+len) is not addressible. Otherwise,
4237 returns False, and if bad_addr is non-NULL, sets *bad_addr to
4238 indicate the lowest failing address. Functions below are
4240 Bool
MC_(check_mem_is_noaccess
) ( Addr a
, SizeT len
, Addr
* bad_addr
)
4245 PROF_EVENT(MCPE_CHECK_MEM_IS_NOACCESS
);
4246 for (i
= 0; i
< len
; i
++) {
4247 PROF_EVENT(MCPE_CHECK_MEM_IS_NOACCESS_LOOP
);
4248 vabits2
= get_vabits2(a
);
4249 if (VA_BITS2_NOACCESS
!= vabits2
) {
4250 if (bad_addr
!= NULL
) *bad_addr
= a
;
4258 static Bool
is_mem_addressable ( Addr a
, SizeT len
,
4259 /*OUT*/Addr
* bad_addr
)
4264 PROF_EVENT(MCPE_IS_MEM_ADDRESSABLE
);
4265 for (i
= 0; i
< len
; i
++) {
4266 PROF_EVENT(MCPE_IS_MEM_ADDRESSABLE_LOOP
);
4267 vabits2
= get_vabits2(a
);
4268 if (VA_BITS2_NOACCESS
== vabits2
) {
4269 if (bad_addr
!= NULL
) *bad_addr
= a
;
4277 static MC_ReadResult
is_mem_defined ( Addr a
, SizeT len
,
4278 /*OUT*/Addr
* bad_addr
,
4284 PROF_EVENT(MCPE_IS_MEM_DEFINED
);
4285 DEBUG("is_mem_defined\n");
4287 if (otag
) *otag
= 0;
4288 if (bad_addr
) *bad_addr
= 0;
4289 for (i
= 0; i
< len
; i
++) {
4290 PROF_EVENT(MCPE_IS_MEM_DEFINED_LOOP
);
4291 vabits2
= get_vabits2(a
);
4292 if (VA_BITS2_DEFINED
!= vabits2
) {
4293 // Error! Nb: Report addressability errors in preference to
4294 // definedness errors. And don't report definedeness errors unless
4295 // --undef-value-errors=yes.
4299 if (VA_BITS2_NOACCESS
== vabits2
) {
4302 if (MC_(clo_mc_level
) >= 2) {
4303 if (otag
&& MC_(clo_mc_level
) == 3) {
4304 *otag
= MC_(helperc_b_load1
)( a
);
4315 /* Like is_mem_defined but doesn't give up at the first uninitialised
4316 byte -- the entire range is always checked. This is important for
4317 detecting errors in the case where a checked range strays into
4318 invalid memory, but that fact is not detected by the ordinary
4319 is_mem_defined(), because of an undefined section that precedes the
4320 out of range section, possibly as a result of an alignment hole in
4321 the checked data. This version always checks the entire range and
4322 can report both a definedness and an accessbility error, if
4324 static void is_mem_defined_comprehensive (
4326 /*OUT*/Bool
* errorV
, /* is there a definedness err? */
4327 /*OUT*/Addr
* bad_addrV
, /* if so where? */
4328 /*OUT*/UInt
* otagV
, /* and what's its otag? */
4329 /*OUT*/Bool
* errorA
, /* is there an addressability err? */
4330 /*OUT*/Addr
* bad_addrA
/* if so where? */
4335 Bool already_saw_errV
= False
;
4337 PROF_EVENT(MCPE_IS_MEM_DEFINED_COMPREHENSIVE
);
4338 DEBUG("is_mem_defined_comprehensive\n");
4340 tl_assert(!(*errorV
|| *errorA
));
4342 for (i
= 0; i
< len
; i
++) {
4343 PROF_EVENT(MCPE_IS_MEM_DEFINED_COMPREHENSIVE_LOOP
);
4344 vabits2
= get_vabits2(a
);
4346 case VA_BITS2_DEFINED
:
4349 case VA_BITS2_UNDEFINED
:
4350 case VA_BITS2_PARTDEFINED
:
4351 if (!already_saw_errV
) {
4354 if (MC_(clo_mc_level
) == 3) {
4355 *otagV
= MC_(helperc_b_load1
)( a
);
4359 already_saw_errV
= True
;
4361 a
++; /* keep going */
4363 case VA_BITS2_NOACCESS
:
4366 return; /* give up now. */
4374 /* Check a zero-terminated ascii string. Tricky -- don't want to
4375 examine the actual bytes, to find the end, until we're sure it is
4378 static Bool
mc_is_defined_asciiz ( Addr a
, Addr
* bad_addr
, UInt
* otag
)
4382 PROF_EVENT(MCPE_IS_DEFINED_ASCIIZ
);
4383 DEBUG("mc_is_defined_asciiz\n");
4385 if (otag
) *otag
= 0;
4386 if (bad_addr
) *bad_addr
= 0;
4388 PROF_EVENT(MCPE_IS_DEFINED_ASCIIZ_LOOP
);
4389 vabits2
= get_vabits2(a
);
4390 if (VA_BITS2_DEFINED
!= vabits2
) {
4391 // Error! Nb: Report addressability errors in preference to
4392 // definedness errors. And don't report definedeness errors unless
4393 // --undef-value-errors=yes.
4397 if (VA_BITS2_NOACCESS
== vabits2
) {
4400 if (MC_(clo_mc_level
) >= 2) {
4401 if (otag
&& MC_(clo_mc_level
) == 3) {
4402 *otag
= MC_(helperc_b_load1
)( a
);
4407 /* Ok, a is safe to read. */
4408 if (* ((UChar
*)a
) == 0) {
4416 /*------------------------------------------------------------*/
4417 /*--- Memory event handlers ---*/
4418 /*------------------------------------------------------------*/
4421 void check_mem_is_addressable ( CorePart part
, ThreadId tid
, const HChar
* s
,
4422 Addr base
, SizeT size
)
4425 Bool ok
= is_mem_addressable ( base
, size
, &bad_addr
);
4429 case Vg_CoreSysCall
:
4430 MC_(record_memparam_error
) ( tid
, bad_addr
,
4431 /*isAddrErr*/True
, s
, 0/*otag*/ );
4435 MC_(record_core_mem_error
)( tid
, s
);
4439 VG_(tool_panic
)("check_mem_is_addressable: unexpected CorePart");
4445 void check_mem_is_defined ( CorePart part
, ThreadId tid
, const HChar
* s
,
4446 Addr base
, SizeT size
)
4450 MC_ReadResult res
= is_mem_defined ( base
, size
, &bad_addr
, &otag
);
4453 Bool isAddrErr
= ( MC_AddrErr
== res
? True
: False
);
4456 case Vg_CoreSysCall
:
4457 MC_(record_memparam_error
) ( tid
, bad_addr
, isAddrErr
, s
,
4458 isAddrErr
? 0 : otag
);
4461 case Vg_CoreSysCallArgInMem
:
4462 MC_(record_regparam_error
) ( tid
, s
, otag
);
4465 /* If we're being asked to jump to a silly address, record an error
4466 message before potentially crashing the entire system. */
4467 case Vg_CoreTranslate
:
4468 MC_(record_jump_error
)( tid
, bad_addr
);
4472 VG_(tool_panic
)("check_mem_is_defined: unexpected CorePart");
4478 void check_mem_is_defined_asciiz ( CorePart part
, ThreadId tid
,
4479 const HChar
* s
, Addr str
)
4482 Addr bad_addr
= 0; // shut GCC up
4485 tl_assert(part
== Vg_CoreSysCall
);
4486 res
= mc_is_defined_asciiz ( (Addr
)str
, &bad_addr
, &otag
);
4488 Bool isAddrErr
= ( MC_AddrErr
== res
? True
: False
);
4489 MC_(record_memparam_error
) ( tid
, bad_addr
, isAddrErr
, s
,
4490 isAddrErr
? 0 : otag
);
4494 /* Handling of mmap and mprotect is not as simple as it seems.
4496 The underlying semantics are that memory obtained from mmap is
4497 always initialised, but may be inaccessible. And changes to the
4498 protection of memory do not change its contents and hence not its
4499 definedness state. Problem is we can't model
4500 inaccessible-but-with-some-definedness state; once we mark memory
4501 as inaccessible we lose all info about definedness, and so can't
4502 restore that if it is later made accessible again.
4504 One obvious thing to do is this:
4506 mmap/mprotect NONE -> noaccess
4507 mmap/mprotect other -> defined
4509 The problem case here is: taking accessible memory, writing
4510 uninitialised data to it, mprotecting it NONE and later mprotecting
4511 it back to some accessible state causes the undefinedness to be
4514 A better proposal is:
4516 (1) mmap NONE -> make noaccess
4517 (2) mmap other -> make defined
4519 (3) mprotect NONE -> # no change
4520 (4) mprotect other -> change any "noaccess" to "defined"
4522 (2) is OK because memory newly obtained from mmap really is defined
4523 (zeroed out by the kernel -- doing anything else would
4524 constitute a massive security hole.)
4526 (1) is OK because the only way to make the memory usable is via
4527 (4), in which case we also wind up correctly marking it all as
4530 (3) is the weak case. We choose not to change memory state.
4531 (presumably the range is in some mixture of "defined" and
4532 "undefined", viz, accessible but with arbitrary V bits). Doing
4533 nothing means we retain the V bits, so that if the memory is
4534 later mprotected "other", the V bits remain unchanged, so there
4535 can be no false negatives. The bad effect is that if there's
4536 an access in the area, then MC cannot warn; but at least we'll
4537 get a SEGV to show, so it's better than nothing.
4539 Consider the sequence (3) followed by (4). Any memory that was
4540 "defined" or "undefined" previously retains its state (as
4541 required). Any memory that was "noaccess" before can only have
4542 been made that way by (1), and so it's OK to change it to
4545 See https://bugs.kde.org/show_bug.cgi?id=205541
4546 and https://bugs.kde.org/show_bug.cgi?id=210268
4549 void mc_new_mem_mmap ( Addr a
, SizeT len
, Bool rr
, Bool ww
, Bool xx
,
4552 if (rr
|| ww
|| xx
) {
4553 /* (2) mmap/mprotect other -> defined */
4554 MC_(make_mem_defined
)(a
, len
);
4556 /* (1) mmap/mprotect NONE -> noaccess */
4557 MC_(make_mem_noaccess
)(a
, len
);
4562 void mc_new_mem_mprotect ( Addr a
, SizeT len
, Bool rr
, Bool ww
, Bool xx
)
4564 if (rr
|| ww
|| xx
) {
4565 /* (4) mprotect other -> change any "noaccess" to "defined" */
4566 make_mem_defined_if_noaccess(a
, len
);
4568 /* (3) mprotect NONE -> # no change */
4575 void mc_new_mem_startup( Addr a
, SizeT len
,
4576 Bool rr
, Bool ww
, Bool xx
, ULong di_handle
)
4578 // Because code is defined, initialised variables get put in the data
4579 // segment and are defined, and uninitialised variables get put in the
4580 // bss segment and are auto-zeroed (and so defined).
4582 // It's possible that there will be padding between global variables.
4583 // This will also be auto-zeroed, and marked as defined by Memcheck. If
4584 // a program uses it, Memcheck will not complain. This is arguably a
4585 // false negative, but it's a grey area -- the behaviour is defined (the
4586 // padding is zeroed) but it's probably not what the user intended. And
4587 // we can't avoid it.
4589 // Note: we generally ignore RWX permissions, because we can't track them
4590 // without requiring more than one A bit which would slow things down a
4591 // lot. But on Darwin the 0th page is mapped but !R and !W and !X.
4592 // So we mark any such pages as "unaddressable".
4593 DEBUG("mc_new_mem_startup(%#lx, %llu, rr=%u, ww=%u, xx=%u)\n",
4594 a
, (ULong
)len
, rr
, ww
, xx
);
4595 mc_new_mem_mmap(a
, len
, rr
, ww
, xx
, di_handle
);
4599 void mc_post_mem_write(CorePart part
, ThreadId tid
, Addr a
, SizeT len
)
4601 MC_(make_mem_defined
)(a
, len
);
4605 /*------------------------------------------------------------*/
4606 /*--- Register event handlers ---*/
4607 /*------------------------------------------------------------*/
4609 /* Try and get a nonzero origin for the guest state section of thread
4610 tid characterised by (offset,size). Return 0 if nothing to show
4612 static UInt
mb_get_origin_for_guest_offset ( ThreadId tid
,
4613 Int offset
, SizeT size
)
4618 sh2off
= MC_(get_otrack_shadow_offset
)( offset
, size
);
4620 return 0; /* This piece of guest state is not tracked */
4621 tl_assert(sh2off
>= 0);
4622 tl_assert(0 == (sh2off
% 4));
4623 area
[0] = 0x31313131;
4624 area
[2] = 0x27272727;
4625 VG_(get_shadow_regs_area
)( tid
, (UChar
*)&area
[1], 2/*shadowno*/,sh2off
,4 );
4626 tl_assert(area
[0] == 0x31313131);
4627 tl_assert(area
[2] == 0x27272727);
4633 /* When some chunk of guest state is written, mark the corresponding
4634 shadow area as valid. This is used to initialise arbitrarily large
4635 chunks of guest state, hence the _SIZE value, which has to be as
4636 big as the biggest guest state.
4638 static void mc_post_reg_write ( CorePart part
, ThreadId tid
,
4639 PtrdiffT offset
, SizeT size
)
4641 # define MAX_REG_WRITE_SIZE 2264
4642 UChar area
[MAX_REG_WRITE_SIZE
];
4643 tl_assert(size
<= MAX_REG_WRITE_SIZE
);
4644 VG_(memset
)(area
, V_BITS8_DEFINED
, size
);
4645 VG_(set_shadow_regs_area
)( tid
, 1/*shadowNo*/,offset
,size
, area
);
4646 # undef MAX_REG_WRITE_SIZE
4650 void mc_post_reg_write_clientcall ( ThreadId tid
,
4651 PtrdiffT offset
, SizeT size
, Addr f
)
4653 mc_post_reg_write(/*dummy*/0, tid
, offset
, size
);
4656 /* Look at the definedness of the guest's shadow state for
4657 [offset, offset+len). If any part of that is undefined, record
4660 static void mc_pre_reg_read ( CorePart part
, ThreadId tid
, const HChar
* s
,
4661 PtrdiffT offset
, SizeT size
)
4668 tl_assert(size
<= 16);
4670 VG_(get_shadow_regs_area
)( tid
, area
, 1/*shadowNo*/,offset
,size
);
4673 for (i
= 0; i
< size
; i
++) {
4674 if (area
[i
] != V_BITS8_DEFINED
) {
4683 /* We've found some undefinedness. See if we can also find an
4685 otag
= mb_get_origin_for_guest_offset( tid
, offset
, size
);
4686 MC_(record_regparam_error
) ( tid
, s
, otag
);
4690 /*------------------------------------------------------------*/
4691 /*--- Register-memory event handlers ---*/
4692 /*------------------------------------------------------------*/
4694 static void mc_copy_mem_to_reg ( CorePart part
, ThreadId tid
, Addr a
,
4695 PtrdiffT guest_state_offset
, SizeT size
)
4703 for (i
= 0; i
< size
; i
++) {
4704 get_vbits8( a
+i
, &vbits8
);
4705 VG_(set_shadow_regs_area
)( tid
, 1/*shadowNo*/, guest_state_offset
+i
,
4709 if (MC_(clo_mc_level
) != 3)
4712 /* Track origins. */
4713 offset
= MC_(get_otrack_shadow_offset
)( guest_state_offset
, size
);
4719 d32
= MC_(helperc_b_load1
)( a
);
4722 d32
= MC_(helperc_b_load2
)( a
);
4725 d32
= MC_(helperc_b_load4
)( a
);
4728 d32
= MC_(helperc_b_load8
)( a
);
4731 d32
= MC_(helperc_b_load16
)( a
);
4734 d32
= MC_(helperc_b_load32
)( a
);
4740 VG_(set_shadow_regs_area
)( tid
, 2/*shadowNo*/, offset
, 4, (UChar
*)&d32
);
4743 static void mc_copy_reg_to_mem ( CorePart part
, ThreadId tid
,
4744 PtrdiffT guest_state_offset
, Addr a
,
4753 for (i
= 0; i
< size
; i
++) {
4754 VG_(get_shadow_regs_area
)( tid
, &vbits8
, 1/*shadowNo*/,
4755 guest_state_offset
+i
, 1 );
4756 set_vbits8( a
+i
, vbits8
);
4759 if (MC_(clo_mc_level
) != 3)
4762 /* Track origins. */
4763 offset
= MC_(get_otrack_shadow_offset
)( guest_state_offset
, size
);
4767 VG_(get_shadow_regs_area
)( tid
, (UChar
*)&d32
, 2/*shadowNo*/, offset
, 4 );
4770 MC_(helperc_b_store1
)( a
, d32
);
4773 MC_(helperc_b_store2
)( a
, d32
);
4776 MC_(helperc_b_store4
)( a
, d32
);
4779 MC_(helperc_b_store8
)( a
, d32
);
4782 MC_(helperc_b_store16
)( a
, d32
);
4785 MC_(helperc_b_store32
)( a
, d32
);
4793 /*------------------------------------------------------------*/
4794 /*--- Some static assertions ---*/
4795 /*------------------------------------------------------------*/
4797 /* The handwritten assembly helpers below have baked-in assumptions
4798 about various constant values. These assertions attempt to make
4799 that a bit safer by checking those values and flagging changes that
4800 would make the assembly invalid. Not perfect but it's better than
4803 STATIC_ASSERT(SM_CHUNKS
* 4 == 65536);
4805 STATIC_ASSERT(VA_BITS8_DEFINED
== 0xAA);
4806 STATIC_ASSERT(VA_BITS8_UNDEFINED
== 0x55);
4808 STATIC_ASSERT(V_BITS32_DEFINED
== 0x00000000);
4809 STATIC_ASSERT(V_BITS32_UNDEFINED
== 0xFFFFFFFF);
4811 STATIC_ASSERT(VA_BITS4_DEFINED
== 0xA);
4812 STATIC_ASSERT(VA_BITS4_UNDEFINED
== 0x5);
4814 STATIC_ASSERT(V_BITS16_DEFINED
== 0x0000);
4815 STATIC_ASSERT(V_BITS16_UNDEFINED
== 0xFFFF);
4817 STATIC_ASSERT(VA_BITS2_DEFINED
== 2);
4818 STATIC_ASSERT(VA_BITS2_UNDEFINED
== 1);
4820 STATIC_ASSERT(V_BITS8_DEFINED
== 0x00);
4821 STATIC_ASSERT(V_BITS8_UNDEFINED
== 0xFF);
4824 /*------------------------------------------------------------*/
4825 /*--- Functions called directly from generated code: ---*/
4826 /*--- Load/store handlers. ---*/
4827 /*------------------------------------------------------------*/
4829 /* Types: LOADV32, LOADV16, LOADV8 are:
4831 so they return 32-bits on 32-bit machines and 64-bits on
4832 64-bit machines. Addr has the same size as a host word.
4834 LOADV64 is always ULong fn ( Addr a )
4836 Similarly for STOREV8, STOREV16, STOREV32, the supplied vbits
4837 are a UWord, and for STOREV64 they are a ULong.
4840 /* If any part of '_a' indicated by the mask is 1, either '_a' is not
4841 naturally '_sz/8'-aligned, or it exceeds the range covered by the
4842 primary map. This is all very tricky (and important!), so let's
4843 work through the maths by hand (below), *and* assert for these
4844 values at startup. */
4845 #define MASK(_szInBytes) \
4846 ( ~((0x10000UL-(_szInBytes)) | ((N_PRIMARY_MAP-1) << 16)) )
4848 /* MASK only exists so as to define this macro. */
4849 #define UNALIGNED_OR_HIGH(_a,_szInBits) \
4850 ((_a) & MASK((_szInBits>>3)))
4852 /* On a 32-bit machine:
4854 N_PRIMARY_BITS == 16, so
4855 N_PRIMARY_MAP == 0x10000, so
4856 N_PRIMARY_MAP-1 == 0xFFFF, so
4857 (N_PRIMARY_MAP-1) << 16 == 0xFFFF0000, and so
4859 MASK(1) = ~ ( (0x10000 - 1) | 0xFFFF0000 )
4860 = ~ ( 0xFFFF | 0xFFFF0000 )
4864 MASK(2) = ~ ( (0x10000 - 2) | 0xFFFF0000 )
4865 = ~ ( 0xFFFE | 0xFFFF0000 )
4869 MASK(4) = ~ ( (0x10000 - 4) | 0xFFFF0000 )
4870 = ~ ( 0xFFFC | 0xFFFF0000 )
4874 MASK(8) = ~ ( (0x10000 - 8) | 0xFFFF0000 )
4875 = ~ ( 0xFFF8 | 0xFFFF0000 )
4879 Hence in the 32-bit case, "a & MASK(1/2/4/8)" is a nonzero value
4880 precisely when a is not 1/2/4/8-bytes aligned. And obviously, for
4881 the 1-byte alignment case, it is always a zero value, since MASK(1)
4882 is zero. All as expected.
4884 On a 64-bit machine, it's more complex, since we're testing
4885 simultaneously for misalignment and for the address being at or
4888 N_PRIMARY_BITS == 20, so
4889 N_PRIMARY_MAP == 0x100000, so
4890 N_PRIMARY_MAP-1 == 0xFFFFF, so
4891 (N_PRIMARY_MAP-1) << 16 == 0xF'FFFF'0000, and so
4893 MASK(1) = ~ ( (0x10000 - 1) | 0xF'FFFF'0000 )
4894 = ~ ( 0xFFFF | 0xF'FFFF'0000 )
4896 = 0xFFFF'FFF0'0000'0000
4898 MASK(2) = ~ ( (0x10000 - 2) | 0xF'FFFF'0000 )
4899 = ~ ( 0xFFFE | 0xF'FFFF'0000 )
4901 = 0xFFFF'FFF0'0000'0001
4903 MASK(4) = ~ ( (0x10000 - 4) | 0xF'FFFF'0000 )
4904 = ~ ( 0xFFFC | 0xF'FFFF'0000 )
4906 = 0xFFFF'FFF0'0000'0003
4908 MASK(8) = ~ ( (0x10000 - 8) | 0xF'FFFF'0000 )
4909 = ~ ( 0xFFF8 | 0xF'FFFF'0000 )
4911 = 0xFFFF'FFF0'0000'0007
4914 /*------------------------------------------------------------*/
4915 /*--- LOADV256 and LOADV128 ---*/
4916 /*------------------------------------------------------------*/
4919 void mc_LOADV_128_or_256 ( /*OUT*/ULong
* res
,
4920 Addr a
, SizeT nBits
, Bool isBigEndian
)
4922 PROF_EVENT(MCPE_LOADV_128_OR_256
);
4924 #ifndef PERF_FAST_LOADV
4925 mc_LOADV_128_or_256_slow( res
, a
, nBits
, isBigEndian
);
4929 UWord sm_off16
, vabits16
, j
;
4930 UWord nBytes
= nBits
/ 8;
4931 UWord nULongs
= nBytes
/ 8;
4934 if (UNLIKELY( UNALIGNED_OR_HIGH(a
,nBits
) )) {
4935 PROF_EVENT(MCPE_LOADV_128_OR_256_SLOW1
);
4936 mc_LOADV_128_or_256_slow( res
, a
, nBits
, isBigEndian
);
4940 /* Handle common cases quickly: a (and a+8 and a+16 etc.) is
4941 suitably aligned, is mapped, and addressible. */
4942 for (j
= 0; j
< nULongs
; j
++) {
4943 sm
= get_secmap_for_reading_low(a
+ 8*j
);
4944 sm_off16
= SM_OFF_16(a
+ 8*j
);
4945 vabits16
= sm
->vabits16
[sm_off16
];
4947 // Convert V bits from compact memory form to expanded
4949 if (LIKELY(vabits16
== VA_BITS16_DEFINED
)) {
4950 res
[j
] = V_BITS64_DEFINED
;
4951 } else if (LIKELY(vabits16
== VA_BITS16_UNDEFINED
)) {
4952 res
[j
] = V_BITS64_UNDEFINED
;
4954 /* Slow case: some block of 8 bytes are not all-defined or
4956 PROF_EVENT(MCPE_LOADV_128_OR_256_SLOW2
);
4957 mc_LOADV_128_or_256_slow( res
, a
, nBits
, isBigEndian
);
4966 VG_REGPARM(2) void MC_(helperc_LOADV256be
) ( /*OUT*/V256
* res
, Addr a
)
4968 mc_LOADV_128_or_256(&res
->w64
[0], a
, 256, True
);
4970 VG_REGPARM(2) void MC_(helperc_LOADV256le
) ( /*OUT*/V256
* res
, Addr a
)
4972 mc_LOADV_128_or_256(&res
->w64
[0], a
, 256, False
);
4975 VG_REGPARM(2) void MC_(helperc_LOADV128be
) ( /*OUT*/V128
* res
, Addr a
)
4977 mc_LOADV_128_or_256(&res
->w64
[0], a
, 128, True
);
4979 VG_REGPARM(2) void MC_(helperc_LOADV128le
) ( /*OUT*/V128
* res
, Addr a
)
4981 mc_LOADV_128_or_256(&res
->w64
[0], a
, 128, False
);
4984 /*------------------------------------------------------------*/
4986 /*------------------------------------------------------------*/
4989 ULong
mc_LOADV64 ( Addr a
, Bool isBigEndian
)
4991 PROF_EVENT(MCPE_LOADV64
);
4993 #ifndef PERF_FAST_LOADV
4994 return mc_LOADVn_slow( a
, 64, isBigEndian
);
4997 UWord sm_off16
, vabits16
;
5000 if (UNLIKELY( UNALIGNED_OR_HIGH(a
,64) )) {
5001 PROF_EVENT(MCPE_LOADV64_SLOW1
);
5002 return (ULong
)mc_LOADVn_slow( a
, 64, isBigEndian
);
5005 sm
= get_secmap_for_reading_low(a
);
5006 sm_off16
= SM_OFF_16(a
);
5007 vabits16
= sm
->vabits16
[sm_off16
];
5009 // Handle common case quickly: a is suitably aligned, is mapped, and
5011 // Convert V bits from compact memory form to expanded register form.
5012 if (LIKELY(vabits16
== VA_BITS16_DEFINED
)) {
5013 return V_BITS64_DEFINED
;
5014 } else if (LIKELY(vabits16
== VA_BITS16_UNDEFINED
)) {
5015 return V_BITS64_UNDEFINED
;
5017 /* Slow case: the 8 bytes are not all-defined or all-undefined. */
5018 PROF_EVENT(MCPE_LOADV64_SLOW2
);
5019 return mc_LOADVn_slow( a
, 64, isBigEndian
);
5025 // Generic for all platforms
5026 VG_REGPARM(1) ULong
MC_(helperc_LOADV64be
) ( Addr a
)
5028 return mc_LOADV64(a
, True
);
5031 // Non-generic assembly for arm32-linux
5032 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5033 && defined(VGP_arm_linux)
5034 /* See mc_main_asm.c */
5036 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5037 && (defined(VGP_x86_linux) || defined(VGP_x86_solaris) || defined(VGP_x86_freebsd))
5038 /* See mc_main_asm.c */
5041 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
5042 VG_REGPARM(1) ULong
MC_(helperc_LOADV64le
) ( Addr a
)
5044 return mc_LOADV64(a
, False
);
5048 /*------------------------------------------------------------*/
5049 /*--- STOREV64 ---*/
5050 /*------------------------------------------------------------*/
5053 void mc_STOREV64 ( Addr a
, ULong vbits64
, Bool isBigEndian
)
5055 PROF_EVENT(MCPE_STOREV64
);
5057 #ifndef PERF_FAST_STOREV
5058 // XXX: this slow case seems to be marginally faster than the fast case!
5059 // Investigate further.
5060 mc_STOREVn_slow( a
, 64, vbits64
, isBigEndian
);
5063 UWord sm_off16
, vabits16
;
5066 if (UNLIKELY( UNALIGNED_OR_HIGH(a
,64) )) {
5067 PROF_EVENT(MCPE_STOREV64_SLOW1
);
5068 mc_STOREVn_slow( a
, 64, vbits64
, isBigEndian
);
5072 sm
= get_secmap_for_reading_low(a
);
5073 sm_off16
= SM_OFF_16(a
);
5074 vabits16
= sm
->vabits16
[sm_off16
];
5076 // To understand the below cleverness, see the extensive comments
5077 // in MC_(helperc_STOREV8).
5078 if (LIKELY(V_BITS64_DEFINED
== vbits64
)) {
5079 if (LIKELY(vabits16
== (UShort
)VA_BITS16_DEFINED
)) {
5082 if (!is_distinguished_sm(sm
) && VA_BITS16_UNDEFINED
== vabits16
) {
5083 sm
->vabits16
[sm_off16
] = VA_BITS16_DEFINED
;
5086 PROF_EVENT(MCPE_STOREV64_SLOW2
);
5087 mc_STOREVn_slow( a
, 64, vbits64
, isBigEndian
);
5090 if (V_BITS64_UNDEFINED
== vbits64
) {
5091 if (vabits16
== (UShort
)VA_BITS16_UNDEFINED
) {
5094 if (!is_distinguished_sm(sm
) && VA_BITS16_DEFINED
== vabits16
) {
5095 sm
->vabits16
[sm_off16
] = VA_BITS16_UNDEFINED
;
5098 PROF_EVENT(MCPE_STOREV64_SLOW3
);
5099 mc_STOREVn_slow( a
, 64, vbits64
, isBigEndian
);
5103 PROF_EVENT(MCPE_STOREV64_SLOW4
);
5104 mc_STOREVn_slow( a
, 64, vbits64
, isBigEndian
);
5109 VG_REGPARM(1) void MC_(helperc_STOREV64be
) ( Addr a
, ULong vbits64
)
5111 mc_STOREV64(a
, vbits64
, True
);
5113 VG_REGPARM(1) void MC_(helperc_STOREV64le
) ( Addr a
, ULong vbits64
)
5115 mc_STOREV64(a
, vbits64
, False
);
5118 /*------------------------------------------------------------*/
5120 /*------------------------------------------------------------*/
5123 UWord
mc_LOADV32 ( Addr a
, Bool isBigEndian
)
5125 PROF_EVENT(MCPE_LOADV32
);
5127 #ifndef PERF_FAST_LOADV
5128 return (UWord
)mc_LOADVn_slow( a
, 32, isBigEndian
);
5131 UWord sm_off
, vabits8
;
5134 if (UNLIKELY( UNALIGNED_OR_HIGH(a
,32) )) {
5135 PROF_EVENT(MCPE_LOADV32_SLOW1
);
5136 return (UWord
)mc_LOADVn_slow( a
, 32, isBigEndian
);
5139 sm
= get_secmap_for_reading_low(a
);
5141 vabits8
= sm
->vabits8
[sm_off
];
5143 // Handle common case quickly: a is suitably aligned, is mapped, and the
5144 // entire word32 it lives in is addressible.
5145 // Convert V bits from compact memory form to expanded register form.
5146 // For 64-bit platforms, set the high 32 bits of retval to 1 (undefined).
5147 // Almost certainly not necessary, but be paranoid.
5148 if (LIKELY(vabits8
== VA_BITS8_DEFINED
)) {
5149 return ((UWord
)0xFFFFFFFF00000000ULL
| (UWord
)V_BITS32_DEFINED
);
5150 } else if (LIKELY(vabits8
== VA_BITS8_UNDEFINED
)) {
5151 return ((UWord
)0xFFFFFFFF00000000ULL
| (UWord
)V_BITS32_UNDEFINED
);
5153 /* Slow case: the 4 bytes are not all-defined or all-undefined. */
5154 PROF_EVENT(MCPE_LOADV32_SLOW2
);
5155 return (UWord
)mc_LOADVn_slow( a
, 32, isBigEndian
);
5161 // Generic for all platforms
5162 VG_REGPARM(1) UWord
MC_(helperc_LOADV32be
) ( Addr a
)
5164 return mc_LOADV32(a
, True
);
5167 // Non-generic assembly for arm32-linux
5168 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5169 && defined(VGP_arm_linux)
5170 /* See mc_main_asm.c */
5172 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5173 && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
5174 /* See mc_main_asm.c */
5177 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
5178 VG_REGPARM(1) UWord
MC_(helperc_LOADV32le
) ( Addr a
)
5180 return mc_LOADV32(a
, False
);
5184 /*------------------------------------------------------------*/
5185 /*--- STOREV32 ---*/
5186 /*------------------------------------------------------------*/
5189 void mc_STOREV32 ( Addr a
, UWord vbits32
, Bool isBigEndian
)
5191 PROF_EVENT(MCPE_STOREV32
);
5193 #ifndef PERF_FAST_STOREV
5194 mc_STOREVn_slow( a
, 32, (ULong
)vbits32
, isBigEndian
);
5197 UWord sm_off
, vabits8
;
5200 if (UNLIKELY( UNALIGNED_OR_HIGH(a
,32) )) {
5201 PROF_EVENT(MCPE_STOREV32_SLOW1
);
5202 mc_STOREVn_slow( a
, 32, (ULong
)vbits32
, isBigEndian
);
5206 sm
= get_secmap_for_reading_low(a
);
5208 vabits8
= sm
->vabits8
[sm_off
];
5210 // To understand the below cleverness, see the extensive comments
5211 // in MC_(helperc_STOREV8).
5212 if (LIKELY(V_BITS32_DEFINED
== vbits32
)) {
5213 if (LIKELY(vabits8
== (UInt
)VA_BITS8_DEFINED
)) {
5216 if (!is_distinguished_sm(sm
) && VA_BITS8_UNDEFINED
== vabits8
) {
5217 sm
->vabits8
[sm_off
] = (UInt
)VA_BITS8_DEFINED
;
5220 PROF_EVENT(MCPE_STOREV32_SLOW2
);
5221 mc_STOREVn_slow( a
, 32, (ULong
)vbits32
, isBigEndian
);
5224 if (V_BITS32_UNDEFINED
== vbits32
) {
5225 if (vabits8
== (UInt
)VA_BITS8_UNDEFINED
) {
5228 if (!is_distinguished_sm(sm
) && VA_BITS8_DEFINED
== vabits8
) {
5229 sm
->vabits8
[sm_off
] = (UInt
)VA_BITS8_UNDEFINED
;
5232 PROF_EVENT(MCPE_STOREV32_SLOW3
);
5233 mc_STOREVn_slow( a
, 32, (ULong
)vbits32
, isBigEndian
);
5237 PROF_EVENT(MCPE_STOREV32_SLOW4
);
5238 mc_STOREVn_slow( a
, 32, (ULong
)vbits32
, isBigEndian
);
5243 VG_REGPARM(2) void MC_(helperc_STOREV32be
) ( Addr a
, UWord vbits32
)
5245 mc_STOREV32(a
, vbits32
, True
);
5247 VG_REGPARM(2) void MC_(helperc_STOREV32le
) ( Addr a
, UWord vbits32
)
5249 mc_STOREV32(a
, vbits32
, False
);
5252 /*------------------------------------------------------------*/
5254 /*------------------------------------------------------------*/
5257 UWord
mc_LOADV16 ( Addr a
, Bool isBigEndian
)
5259 PROF_EVENT(MCPE_LOADV16
);
5261 #ifndef PERF_FAST_LOADV
5262 return (UWord
)mc_LOADVn_slow( a
, 16, isBigEndian
);
5265 UWord sm_off
, vabits8
;
5268 if (UNLIKELY( UNALIGNED_OR_HIGH(a
,16) )) {
5269 PROF_EVENT(MCPE_LOADV16_SLOW1
);
5270 return (UWord
)mc_LOADVn_slow( a
, 16, isBigEndian
);
5273 sm
= get_secmap_for_reading_low(a
);
5275 vabits8
= sm
->vabits8
[sm_off
];
5276 // Handle common case quickly: a is suitably aligned, is mapped, and is
5278 // Convert V bits from compact memory form to expanded register form
5279 if (LIKELY(vabits8
== VA_BITS8_DEFINED
)) { return V_BITS16_DEFINED
; }
5280 else if (LIKELY(vabits8
== VA_BITS8_UNDEFINED
)) { return V_BITS16_UNDEFINED
; }
5282 // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
5283 // the two sub-bytes.
5284 UChar vabits4
= extract_vabits4_from_vabits8(a
, vabits8
);
5285 if (vabits4
== VA_BITS4_DEFINED
) { return V_BITS16_DEFINED
; }
5286 else if (vabits4
== VA_BITS4_UNDEFINED
) { return V_BITS16_UNDEFINED
; }
5288 /* Slow case: the two bytes are not all-defined or all-undefined. */
5289 PROF_EVENT(MCPE_LOADV16_SLOW2
);
5290 return (UWord
)mc_LOADVn_slow( a
, 16, isBigEndian
);
5297 // Generic for all platforms
5298 VG_REGPARM(1) UWord
MC_(helperc_LOADV16be
) ( Addr a
)
5300 return mc_LOADV16(a
, True
);
5303 // Non-generic assembly for arm32-linux
5304 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5305 && defined(VGP_arm_linux)
5306 __asm__( /* Derived from NCode template */
5309 ".global vgMemCheck_helperc_LOADV16le \n"
5310 ".type vgMemCheck_helperc_LOADV16le, %function \n"
5311 "vgMemCheck_helperc_LOADV16le: \n" //
5313 " bne .LLV16LEc12 \n" // if misaligned
5314 " lsr r2, r0, #16 \n" // r2 = pri-map-ix
5315 " movw r3, #:lower16:primary_map \n" //
5316 " uxth r1, r0 \n" // r1 = sec-map-offB
5317 " movt r3, #:upper16:primary_map \n" //
5318 " ldr r2, [r3, r2, lsl #2] \n" // r2 = sec-map
5319 " ldrb r1, [r2, r1, lsr #2] \n" // r1 = sec-map-VABITS8
5320 " cmp r1, #0xAA \n" // r1 == VA_BITS8_DEFINED?
5321 " bne .LLV16LEc0 \n" // no, goto .LLV16LEc0
5323 " mov r0, #0xFFFFFFFF \n" //
5324 " lsl r0, r0, #16 \n" // V_BITS16_DEFINED | top16safe
5327 " cmp r1, #0x55 \n" // VA_BITS8_UNDEFINED
5328 " bne .LLV16LEc4 \n" //
5330 " mov r0, #0xFFFFFFFF \n" // V_BITS16_UNDEFINED | top16safe
5333 // r1 holds sec-map-VABITS8. r0 holds the address and is 2-aligned.
5334 // Extract the relevant 4 bits and inspect.
5335 " and r2, r0, #2 \n" // addr & 2
5336 " add r2, r2, r2 \n" // 2 * (addr & 2)
5337 " lsr r1, r1, r2 \n" // sec-map-VABITS8 >> (2 * (addr & 2))
5338 " and r1, r1, #15 \n" // (sec-map-VABITS8 >> (2 * (addr & 2))) & 15
5340 " cmp r1, #0xA \n" // VA_BITS4_DEFINED
5341 " beq .LLV16LEh9 \n" //
5343 " cmp r1, #0x5 \n" // VA_BITS4_UNDEFINED
5344 " beq .LLV16LEc2 \n" //
5346 ".LLV16LEc12: \n" //
5347 " push {r4, lr} \n" //
5349 " mov r1, #16 \n" //
5350 " bl mc_LOADVn_slow \n" //
5351 " pop {r4, pc} \n" //
5352 ".size vgMemCheck_helperc_LOADV16le, .-vgMemCheck_helperc_LOADV16le \n"
5356 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5357 && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
5361 ".global vgMemCheck_helperc_LOADV16le\n"
5362 ".type vgMemCheck_helperc_LOADV16le, @function\n"
5363 "vgMemCheck_helperc_LOADV16le:\n"
5364 " test $0x1, %eax\n"
5365 " jne .LLV16LE5\n" /* jump if not aligned */
5367 " shr $0x10, %edx\n"
5368 " mov primary_map(,%edx,4), %ecx\n"
5369 " movzwl %ax, %edx\n"
5371 " movzbl (%ecx,%edx,1), %edx\n"/* edx = VA bits for 32bit */
5372 " cmp $0xaa, %edx\n" /* compare to VA_BITS8_DEFINED */
5373 " jne .LLV16LE2\n" /* jump if not all 32bits defined */
5375 " mov $0xffff0000,%eax\n" /* V_BITS16_DEFINED | top16safe */
5378 " cmp $0x55, %edx\n" /* compare to VA_BITS8_UNDEFINED */
5379 " jne .LLV16LE4\n" /* jump if not all 32bits undefined */
5381 " or $0xffffffff,%eax\n" /* V_BITS16_UNDEFINED | top16safe */
5390 " je .LLV16LE1\n" /* jump if all 16bits are defined */
5392 " je .LLV16LE3\n" /* jump if all 16bits are undefined */
5394 " xor %ecx, %ecx\n" /* tail call mc_LOADVn_slow(a, 16, 0) */
5396 " jmp mc_LOADVn_slow\n"
5397 ".size vgMemCheck_helperc_LOADV16le, .-vgMemCheck_helperc_LOADV16le \n"
5402 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
5403 VG_REGPARM(1) UWord
MC_(helperc_LOADV16le
) ( Addr a
)
5405 return mc_LOADV16(a
, False
);
5409 /*------------------------------------------------------------*/
5410 /*--- STOREV16 ---*/
5411 /*------------------------------------------------------------*/
5413 /* True if the vabits4 in vabits8 indicate a and a+1 are accessible. */
5415 Bool
accessible_vabits4_in_vabits8 ( Addr a
, UChar vabits8
)
5418 tl_assert(VG_IS_2_ALIGNED(a
)); // Must be 2-aligned
5419 shift
= (a
& 2) << 1; // shift by 0 or 4
5420 vabits8
>>= shift
; // shift the four bits to the bottom
5421 // check 2 x vabits2 != VA_BITS2_NOACCESS
5422 return ((0x3 & vabits8
) != VA_BITS2_NOACCESS
)
5423 && ((0xc & vabits8
) != VA_BITS2_NOACCESS
<< 2);
5427 void mc_STOREV16 ( Addr a
, UWord vbits16
, Bool isBigEndian
)
5429 PROF_EVENT(MCPE_STOREV16
);
5431 #ifndef PERF_FAST_STOREV
5432 mc_STOREVn_slow( a
, 16, (ULong
)vbits16
, isBigEndian
);
5435 UWord sm_off
, vabits8
;
5438 if (UNLIKELY( UNALIGNED_OR_HIGH(a
,16) )) {
5439 PROF_EVENT(MCPE_STOREV16_SLOW1
);
5440 mc_STOREVn_slow( a
, 16, (ULong
)vbits16
, isBigEndian
);
5444 sm
= get_secmap_for_reading_low(a
);
5446 vabits8
= sm
->vabits8
[sm_off
];
5448 // To understand the below cleverness, see the extensive comments
5449 // in MC_(helperc_STOREV8).
5450 if (LIKELY(V_BITS16_DEFINED
== vbits16
)) {
5451 if (LIKELY(vabits8
== VA_BITS8_DEFINED
)) {
5454 if (!is_distinguished_sm(sm
)
5455 && accessible_vabits4_in_vabits8(a
, vabits8
)) {
5456 insert_vabits4_into_vabits8( a
, VA_BITS4_DEFINED
,
5457 &(sm
->vabits8
[sm_off
]) );
5460 PROF_EVENT(MCPE_STOREV16_SLOW2
);
5461 mc_STOREVn_slow( a
, 16, (ULong
)vbits16
, isBigEndian
);
5463 if (V_BITS16_UNDEFINED
== vbits16
) {
5464 if (vabits8
== VA_BITS8_UNDEFINED
) {
5467 if (!is_distinguished_sm(sm
)
5468 && accessible_vabits4_in_vabits8(a
, vabits8
)) {
5469 insert_vabits4_into_vabits8( a
, VA_BITS4_UNDEFINED
,
5470 &(sm
->vabits8
[sm_off
]) );
5473 PROF_EVENT(MCPE_STOREV16_SLOW3
);
5474 mc_STOREVn_slow( a
, 16, (ULong
)vbits16
, isBigEndian
);
5478 PROF_EVENT(MCPE_STOREV16_SLOW4
);
5479 mc_STOREVn_slow( a
, 16, (ULong
)vbits16
, isBigEndian
);
5485 VG_REGPARM(2) void MC_(helperc_STOREV16be
) ( Addr a
, UWord vbits16
)
5487 mc_STOREV16(a
, vbits16
, True
);
5489 VG_REGPARM(2) void MC_(helperc_STOREV16le
) ( Addr a
, UWord vbits16
)
5491 mc_STOREV16(a
, vbits16
, False
);
5494 /*------------------------------------------------------------*/
5496 /*------------------------------------------------------------*/
5498 /* Note: endianness is irrelevant for size == 1 */
5500 // Non-generic assembly for arm32-linux
5501 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5502 && defined(VGP_arm_linux)
5503 __asm__( /* Derived from NCode template */
5506 ".global vgMemCheck_helperc_LOADV8 \n"
5507 ".type vgMemCheck_helperc_LOADV8, %function \n"
5508 "vgMemCheck_helperc_LOADV8: \n" //
5509 " lsr r2, r0, #16 \n" // r2 = pri-map-ix
5510 " movw r3, #:lower16:primary_map \n" //
5511 " uxth r1, r0 \n" // r1 = sec-map-offB
5512 " movt r3, #:upper16:primary_map \n" //
5513 " ldr r2, [r3, r2, lsl #2] \n" // r2 = sec-map
5514 " ldrb r1, [r2, r1, lsr #2] \n" // r1 = sec-map-VABITS8
5515 " cmp r1, #0xAA \n" // r1 == VA_BITS8_DEFINED?
5516 " bne .LLV8c0 \n" // no, goto .LLV8c0
5518 " mov r0, #0xFFFFFF00 \n" // V_BITS8_DEFINED | top24safe
5521 " cmp r1, #0x55 \n" // VA_BITS8_UNDEFINED
5522 " bne .LLV8c4 \n" //
5524 " mov r0, #0xFFFFFFFF \n" // V_BITS8_UNDEFINED | top24safe
5527 // r1 holds sec-map-VABITS8
5528 // r0 holds the address. Extract the relevant 2 bits and inspect.
5529 " and r2, r0, #3 \n" // addr & 3
5530 " add r2, r2, r2 \n" // 2 * (addr & 3)
5531 " lsr r1, r1, r2 \n" // sec-map-VABITS8 >> (2 * (addr & 3))
5532 " and r1, r1, #3 \n" // (sec-map-VABITS8 >> (2 * (addr & 3))) & 3
5534 " cmp r1, #2 \n" // VA_BITS2_DEFINED
5535 " beq .LLV8h9 \n" //
5537 " cmp r1, #1 \n" // VA_BITS2_UNDEFINED
5538 " beq .LLV8c2 \n" //
5540 " push {r4, lr} \n" //
5543 " bl mc_LOADVn_slow \n" //
5544 " pop {r4, pc} \n" //
5545 ".size vgMemCheck_helperc_LOADV8, .-vgMemCheck_helperc_LOADV8 \n"
5549 /* Non-generic assembly for x86-linux */
5550 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5551 && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
5555 ".global vgMemCheck_helperc_LOADV8\n"
5556 ".type vgMemCheck_helperc_LOADV8, @function\n"
5557 "vgMemCheck_helperc_LOADV8:\n"
5559 " shr $0x10, %edx\n"
5560 " mov primary_map(,%edx,4), %ecx\n"
5561 " movzwl %ax, %edx\n"
5563 " movzbl (%ecx,%edx,1), %edx\n"/* edx = VA bits for 32bit */
5564 " cmp $0xaa, %edx\n" /* compare to VA_BITS8_DEFINED? */
5565 " jne .LLV8LE2\n" /* jump if not defined */
5567 " mov $0xffffff00, %eax\n" /* V_BITS8_DEFINED | top24safe */
5570 " cmp $0x55, %edx\n" /* compare to VA_BITS8_UNDEFINED */
5571 " jne .LLV8LE4\n" /* jump if not all 32bits are undefined */
5573 " or $0xffffffff, %eax\n" /* V_BITS8_UNDEFINED | top24safe */
5582 " je .LLV8LE1\n" /* jump if all 8bits are defined */
5584 " je .LLV8LE3\n" /* jump if all 8bits are undefined */
5585 " xor %ecx, %ecx\n" /* tail call to mc_LOADVn_slow(a, 8, 0) */
5587 " jmp mc_LOADVn_slow\n"
5588 ".size vgMemCheck_helperc_LOADV8, .-vgMemCheck_helperc_LOADV8\n"
5593 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
5595 UWord
MC_(helperc_LOADV8
) ( Addr a
)
5597 PROF_EVENT(MCPE_LOADV8
);
5599 #ifndef PERF_FAST_LOADV
5600 return (UWord
)mc_LOADVn_slow( a
, 8, False
/*irrelevant*/ );
5603 UWord sm_off
, vabits8
;
5606 if (UNLIKELY( UNALIGNED_OR_HIGH(a
,8) )) {
5607 PROF_EVENT(MCPE_LOADV8_SLOW1
);
5608 return (UWord
)mc_LOADVn_slow( a
, 8, False
/*irrelevant*/ );
5611 sm
= get_secmap_for_reading_low(a
);
5613 vabits8
= sm
->vabits8
[sm_off
];
5614 // Convert V bits from compact memory form to expanded register form
5615 // Handle common case quickly: a is mapped, and the entire
5616 // word32 it lives in is addressible.
5617 if (LIKELY(vabits8
== VA_BITS8_DEFINED
)) { return V_BITS8_DEFINED
; }
5618 else if (LIKELY(vabits8
== VA_BITS8_UNDEFINED
)) { return V_BITS8_UNDEFINED
; }
5620 // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
5622 UChar vabits2
= extract_vabits2_from_vabits8(a
, vabits8
);
5623 if (vabits2
== VA_BITS2_DEFINED
) { return V_BITS8_DEFINED
; }
5624 else if (vabits2
== VA_BITS2_UNDEFINED
) { return V_BITS8_UNDEFINED
; }
5626 /* Slow case: the byte is not all-defined or all-undefined. */
5627 PROF_EVENT(MCPE_LOADV8_SLOW2
);
5628 return (UWord
)mc_LOADVn_slow( a
, 8, False
/*irrelevant*/ );
5636 /*------------------------------------------------------------*/
5638 /*------------------------------------------------------------*/
5641 void MC_(helperc_STOREV8
) ( Addr a
, UWord vbits8
)
5643 PROF_EVENT(MCPE_STOREV8
);
5645 #ifndef PERF_FAST_STOREV
5646 mc_STOREVn_slow( a
, 8, (ULong
)vbits8
, False
/*irrelevant*/ );
5649 UWord sm_off
, vabits8
;
5652 if (UNLIKELY( UNALIGNED_OR_HIGH(a
,8) )) {
5653 PROF_EVENT(MCPE_STOREV8_SLOW1
);
5654 mc_STOREVn_slow( a
, 8, (ULong
)vbits8
, False
/*irrelevant*/ );
5658 sm
= get_secmap_for_reading_low(a
);
5660 vabits8
= sm
->vabits8
[sm_off
];
5662 // Clevernesses to speed up storing V bits.
5663 // The 64/32/16 bit cases also have similar clevernesses, but it
5664 // works a little differently to the code below.
5666 // Cleverness 1: sometimes we don't have to write the shadow memory at
5667 // all, if we can tell that what we want to write is the same as what is
5668 // already there. These cases are marked below as "defined on defined" and
5669 // "undefined on undefined".
5672 // We also avoid to call mc_STOREVn_slow if the V bits can directly
5673 // be written in the secondary map. V bits can be directly written
5674 // if 4 conditions are respected:
5675 // * The address for which V bits are written is naturally aligned
5676 // on 1 byte for STOREV8 (this is always true)
5677 // on 2 bytes for STOREV16
5678 // on 4 bytes for STOREV32
5679 // on 8 bytes for STOREV64.
5680 // * V bits being written are either fully defined or fully undefined.
5681 // (for partially defined V bits, V bits cannot be directly written,
5682 // as the secondary vbits table must be maintained).
5683 // * the secmap is not distinguished (distinguished maps cannot be
5685 // * the memory corresponding to the V bits being written is
5686 // accessible (if one or more bytes are not accessible,
5687 // we must call mc_STOREVn_slow in order to report accessibility
5689 // Note that for STOREV32 and STOREV64, it is too expensive
5690 // to verify the accessibility of each byte for the benefit it
5691 // brings. Instead, a quicker check is done by comparing to
5692 // VA_BITS(8|16)_(UN)DEFINED. This guarantees accessibility,
5693 // but misses some opportunity of direct modifications.
5694 // Checking each byte accessibility was measured for
5695 // STOREV32+perf tests and was slowing down all perf tests.
5696 // The cases corresponding to cleverness 2 are marked below as
5698 if (LIKELY(V_BITS8_DEFINED
== vbits8
)) {
5699 if (LIKELY(vabits8
== VA_BITS8_DEFINED
)) {
5700 return; // defined on defined
5702 if (!is_distinguished_sm(sm
)
5703 && VA_BITS2_NOACCESS
!= extract_vabits2_from_vabits8(a
, vabits8
)) {
5705 insert_vabits2_into_vabits8( a
, VA_BITS2_DEFINED
,
5706 &(sm
->vabits8
[sm_off
]) );
5709 PROF_EVENT(MCPE_STOREV8_SLOW2
);
5710 mc_STOREVn_slow( a
, 8, (ULong
)vbits8
, False
/*irrelevant*/ );
5713 if (V_BITS8_UNDEFINED
== vbits8
) {
5714 if (vabits8
== VA_BITS8_UNDEFINED
) {
5715 return; // undefined on undefined
5717 if (!is_distinguished_sm(sm
)
5718 && (VA_BITS2_NOACCESS
5719 != extract_vabits2_from_vabits8(a
, vabits8
))) {
5721 insert_vabits2_into_vabits8( a
, VA_BITS2_UNDEFINED
,
5722 &(sm
->vabits8
[sm_off
]) );
5725 PROF_EVENT(MCPE_STOREV8_SLOW3
);
5726 mc_STOREVn_slow( a
, 8, (ULong
)vbits8
, False
/*irrelevant*/ );
5730 // Partially defined word
5731 PROF_EVENT(MCPE_STOREV8_SLOW4
);
5732 mc_STOREVn_slow( a
, 8, (ULong
)vbits8
, False
/*irrelevant*/ );
5738 /*------------------------------------------------------------*/
5739 /*--- Functions called directly from generated code: ---*/
5740 /*--- Value-check failure handlers. ---*/
5741 /*------------------------------------------------------------*/
5743 /* Call these ones when an origin is available ... */
5745 void MC_(helperc_value_check0_fail_w_o
) ( UWord origin
) {
5746 MC_(record_cond_error
) ( VG_(get_running_tid
)(), (UInt
)origin
);
5750 void MC_(helperc_value_check1_fail_w_o
) ( UWord origin
) {
5751 MC_(record_value_error
) ( VG_(get_running_tid
)(), 1, (UInt
)origin
);
5755 void MC_(helperc_value_check4_fail_w_o
) ( UWord origin
) {
5756 MC_(record_value_error
) ( VG_(get_running_tid
)(), 4, (UInt
)origin
);
5760 void MC_(helperc_value_check8_fail_w_o
) ( UWord origin
) {
5761 MC_(record_value_error
) ( VG_(get_running_tid
)(), 8, (UInt
)origin
);
5765 void MC_(helperc_value_checkN_fail_w_o
) ( HWord sz
, UWord origin
) {
5766 MC_(record_value_error
) ( VG_(get_running_tid
)(), (Int
)sz
, (UInt
)origin
);
5769 /* ... and these when an origin isn't available. */
5772 void MC_(helperc_value_check0_fail_no_o
) ( void ) {
5773 MC_(record_cond_error
) ( VG_(get_running_tid
)(), 0/*origin*/ );
5777 void MC_(helperc_value_check1_fail_no_o
) ( void ) {
5778 MC_(record_value_error
) ( VG_(get_running_tid
)(), 1, 0/*origin*/ );
5782 void MC_(helperc_value_check4_fail_no_o
) ( void ) {
5783 MC_(record_value_error
) ( VG_(get_running_tid
)(), 4, 0/*origin*/ );
5787 void MC_(helperc_value_check8_fail_no_o
) ( void ) {
5788 MC_(record_value_error
) ( VG_(get_running_tid
)(), 8, 0/*origin*/ );
5792 void MC_(helperc_value_checkN_fail_no_o
) ( HWord sz
) {
5793 MC_(record_value_error
) ( VG_(get_running_tid
)(), (Int
)sz
, 0/*origin*/ );
5797 /*------------------------------------------------------------*/
5798 /*--- Metadata get/set functions, for client requests. ---*/
5799 /*------------------------------------------------------------*/
5801 // Nb: this expands the V+A bits out into register-form V bits, even though
5802 // they're in memory. This is for backward compatibility, and because it's
5803 // probably what the user wants.
5805 /* Copy Vbits from/to address 'a'. Returns: 1 == OK, 2 == alignment
5806 error [no longer used], 3 == addressing error. */
5807 /* Nb: We used to issue various definedness/addressability errors from here,
5808 but we took them out because they ranged from not-very-helpful to
5809 downright annoying, and they complicated the error data structures. */
5810 static Int
mc_get_or_set_vbits_for_client (
5814 Bool setting
, /* True <=> set vbits, False <=> get vbits */
5815 Bool is_client_request
/* True <=> real user request
5816 False <=> internal call from gdbserver */
5823 /* Check that arrays are addressible before doing any getting/setting.
5824 vbits to be checked only for real user request. */
5825 for (i
= 0; i
< szB
; i
++) {
5826 if (VA_BITS2_NOACCESS
== get_vabits2(a
+ i
) ||
5827 (is_client_request
&& VA_BITS2_NOACCESS
== get_vabits2(vbits
+ i
))) {
5835 for (i
= 0; i
< szB
; i
++) {
5836 ok
= set_vbits8(a
+ i
, ((UChar
*)vbits
)[i
]);
5841 for (i
= 0; i
< szB
; i
++) {
5842 ok
= get_vbits8(a
+ i
, &vbits8
);
5844 ((UChar
*)vbits
)[i
] = vbits8
;
5846 if (is_client_request
)
5847 // The bytes in vbits[] have now been set, so mark them as such.
5848 MC_(make_mem_defined
)(vbits
, szB
);
5855 /*------------------------------------------------------------*/
5856 /*--- Detecting leaked (unreachable) malloc'd blocks. ---*/
5857 /*------------------------------------------------------------*/
5859 /* For the memory leak detector, say whether an entire 64k chunk of
5860 address space is possibly in use, or not. If in doubt return
5863 Bool
MC_(is_within_valid_secondary
) ( Addr a
)
5865 SecMap
* sm
= maybe_get_secmap_for ( a
);
5866 if (sm
== NULL
|| sm
== &sm_distinguished
[SM_DIST_NOACCESS
]) {
5867 /* Definitely not in use. */
5875 /* For the memory leak detector, say whether or not a given word
5876 address is to be regarded as valid. */
5877 Bool
MC_(is_valid_aligned_word
) ( Addr a
)
5879 tl_assert(sizeof(UWord
) == 4 || sizeof(UWord
) == 8);
5880 tl_assert(VG_IS_WORD_ALIGNED(a
));
5881 if (get_vabits8_for_aligned_word32 (a
) != VA_BITS8_DEFINED
)
5883 if (sizeof(UWord
) == 8) {
5884 if (get_vabits8_for_aligned_word32 (a
+ 4) != VA_BITS8_DEFINED
)
5887 if (UNLIKELY(MC_(in_ignored_range
)(a
)))
5894 /*------------------------------------------------------------*/
5895 /*--- Initialisation ---*/
5896 /*------------------------------------------------------------*/
5898 static void init_shadow_memory ( void )
5903 tl_assert(V_BIT_UNDEFINED
== 1);
5904 tl_assert(V_BIT_DEFINED
== 0);
5905 tl_assert(V_BITS8_UNDEFINED
== 0xFF);
5906 tl_assert(V_BITS8_DEFINED
== 0);
5908 /* Build the 3 distinguished secondaries */
5909 sm
= &sm_distinguished
[SM_DIST_NOACCESS
];
5910 for (i
= 0; i
< SM_CHUNKS
; i
++) sm
->vabits8
[i
] = VA_BITS8_NOACCESS
;
5912 sm
= &sm_distinguished
[SM_DIST_UNDEFINED
];
5913 for (i
= 0; i
< SM_CHUNKS
; i
++) sm
->vabits8
[i
] = VA_BITS8_UNDEFINED
;
5915 sm
= &sm_distinguished
[SM_DIST_DEFINED
];
5916 for (i
= 0; i
< SM_CHUNKS
; i
++) sm
->vabits8
[i
] = VA_BITS8_DEFINED
;
5918 /* Set up the primary map. */
5919 /* These entries gradually get overwritten as the used address
5921 for (i
= 0; i
< N_PRIMARY_MAP
; i
++)
5922 primary_map
[i
] = &sm_distinguished
[SM_DIST_NOACCESS
];
5924 /* Auxiliary primary maps */
5925 init_auxmap_L1_L2();
5927 /* auxmap_size = auxmap_used = 0;
5928 no ... these are statically initialised */
5930 /* Secondary V bit table */
5931 secVBitTable
= createSecVBitTable();
5935 /*------------------------------------------------------------*/
5936 /*--- Sanity check machinery (permanently engaged) ---*/
5937 /*------------------------------------------------------------*/
5939 static Bool
mc_cheap_sanity_check ( void )
5942 PROF_EVENT(MCPE_CHEAP_SANITY_CHECK
);
5943 /* Check for sane operating level */
5944 if (MC_(clo_mc_level
) < 1 || MC_(clo_mc_level
) > 3)
5946 /* nothing else useful we can rapidly check */
5950 static Bool
mc_expensive_sanity_check ( void )
5953 Word n_secmaps_found
;
5955 const HChar
* errmsg
;
5958 if (0) VG_(printf
)("expensive sanity check\n");
5961 n_sanity_expensive
++;
5962 PROF_EVENT(MCPE_EXPENSIVE_SANITY_CHECK
);
5964 /* Check for sane operating level */
5965 if (MC_(clo_mc_level
) < 1 || MC_(clo_mc_level
) > 3)
5968 /* Check that the 3 distinguished SMs are still as they should be. */
5970 /* Check noaccess DSM. */
5971 sm
= &sm_distinguished
[SM_DIST_NOACCESS
];
5972 for (i
= 0; i
< SM_CHUNKS
; i
++)
5973 if (sm
->vabits8
[i
] != VA_BITS8_NOACCESS
)
5976 /* Check undefined DSM. */
5977 sm
= &sm_distinguished
[SM_DIST_UNDEFINED
];
5978 for (i
= 0; i
< SM_CHUNKS
; i
++)
5979 if (sm
->vabits8
[i
] != VA_BITS8_UNDEFINED
)
5982 /* Check defined DSM. */
5983 sm
= &sm_distinguished
[SM_DIST_DEFINED
];
5984 for (i
= 0; i
< SM_CHUNKS
; i
++)
5985 if (sm
->vabits8
[i
] != VA_BITS8_DEFINED
)
5989 VG_(printf
)("memcheck expensive sanity: "
5990 "distinguished_secondaries have changed\n");
5994 /* If we're not checking for undefined value errors, the secondary V bit
5995 * table should be empty. */
5996 if (MC_(clo_mc_level
) == 1) {
5997 if (0 != VG_(OSetGen_Size
)(secVBitTable
))
6001 /* check the auxiliary maps, very thoroughly */
6002 n_secmaps_found
= 0;
6003 errmsg
= check_auxmap_L1_L2_sanity( &n_secmaps_found
);
6005 VG_(printf
)("memcheck expensive sanity, auxmaps:\n\t%s", errmsg
);
6009 /* n_secmaps_found is now the number referred to by the auxiliary
6010 primary map. Now add on the ones referred to by the main
6012 for (i
= 0; i
< N_PRIMARY_MAP
; i
++) {
6013 if (primary_map
[i
] == NULL
) {
6016 if (!is_distinguished_sm(primary_map
[i
]))
6021 /* check that the number of secmaps issued matches the number that
6022 are reachable (iow, no secmap leaks) */
6023 if (n_secmaps_found
!= (n_issued_SMs
- n_deissued_SMs
))
6027 VG_(printf
)("memcheck expensive sanity: "
6028 "apparent secmap leakage\n");
6033 VG_(printf
)("memcheck expensive sanity: "
6034 "auxmap covers wrong address space\n");
6038 /* there is only one pointer to each secmap (expensive) */
6043 /*------------------------------------------------------------*/
6044 /*--- Command line args ---*/
6045 /*------------------------------------------------------------*/
6047 /* 31 Aug 2015: Vectorised code is now so widespread that
6048 --partial-loads-ok needs to be enabled by default on all platforms.
6049 Not doing so causes lots of false errors. */
6050 Bool
MC_(clo_partial_loads_ok
) = True
;
6051 Long
MC_(clo_freelist_vol
) = 20*1000*1000LL;
6052 Long
MC_(clo_freelist_big_blocks
) = 1*1000*1000LL;
6053 LeakCheckMode
MC_(clo_leak_check
) = LC_Summary
;
6054 VgRes
MC_(clo_leak_resolution
) = Vg_HighRes
;
6055 UInt
MC_(clo_show_leak_kinds
) = R2S(Possible
) | R2S(Unreached
);
6056 UInt
MC_(clo_error_for_leak_kinds
) = R2S(Possible
) | R2S(Unreached
);
6057 UInt
MC_(clo_leak_check_heuristics
) = H2S(LchStdString
)
6060 | H2S( LchMultipleInheritance
);
6061 Bool
MC_(clo_xtree_leak
) = False
;
6062 const HChar
* MC_(clo_xtree_leak_file
) = "xtleak.kcg.%p";
6063 Bool
MC_(clo_workaround_gcc296_bugs
) = False
;
6064 Int
MC_(clo_malloc_fill
) = -1;
6065 Int
MC_(clo_free_fill
) = -1;
6066 KeepStacktraces
MC_(clo_keep_stacktraces
) = KS_alloc_and_free
;
6067 Int
MC_(clo_mc_level
) = 2;
6068 Bool
MC_(clo_show_mismatched_frees
) = True
;
6069 Bool
MC_(clo_show_realloc_size_zero
) = True
;
6071 ExpensiveDefinednessChecks
6072 MC_(clo_expensive_definedness_checks
) = EdcAUTO
;
6074 Bool
MC_(clo_ignore_range_below_sp
) = False
;
6075 UInt
MC_(clo_ignore_range_below_sp__first_offset
) = 0;
6076 UInt
MC_(clo_ignore_range_below_sp__last_offset
) = 0;
6078 static const HChar
* MC_(parse_leak_heuristics_tokens
) =
6079 "-,stdstring,length64,newarray,multipleinheritance";
6080 /* The first heuristic value (LchNone) has no keyword, as this is
6081 a fake heuristic used to collect the blocks found without any
6084 static Bool
mc_process_cmd_line_options(const HChar
* arg
)
6086 const HChar
* tmp_str
;
6089 tl_assert( MC_(clo_mc_level
) >= 1 && MC_(clo_mc_level
) <= 3 );
6091 /* Set MC_(clo_mc_level):
6092 1 = A bit tracking only
6093 2 = A and V bit tracking, but no V bit origins
6094 3 = A and V bit tracking, and V bit origins
6096 Do this by inspecting --undef-value-errors= and
6097 --track-origins=. Reject the case --undef-value-errors=no
6098 --track-origins=yes as meaningless.
6100 if VG_BOOL_CLO(arg
, "--undef-value-errors", tmp_show
) {
6102 if (MC_(clo_mc_level
) == 1)
6103 MC_(clo_mc_level
) = 2;
6105 if (MC_(clo_mc_level
) == 3) {
6108 MC_(clo_mc_level
) = 1;
6112 else if VG_BOOL_CLO(arg
, "--track-origins", tmp_show
) {
6114 if (MC_(clo_mc_level
) == 1) {
6117 MC_(clo_mc_level
) = 3;
6120 if (MC_(clo_mc_level
) == 3)
6121 MC_(clo_mc_level
) = 2;
6124 else if VG_BOOL_CLO(arg
, "--partial-loads-ok", MC_(clo_partial_loads_ok
)) {}
6125 else if VG_USET_CLOM(cloPD
, arg
, "--errors-for-leak-kinds",
6126 MC_(parse_leak_kinds_tokens
),
6127 MC_(clo_error_for_leak_kinds
)) {}
6128 else if VG_USET_CLOM(cloPD
, arg
, "--show-leak-kinds",
6129 MC_(parse_leak_kinds_tokens
),
6130 MC_(clo_show_leak_kinds
)) {}
6131 else if VG_USET_CLOM(cloPD
, arg
, "--leak-check-heuristics",
6132 MC_(parse_leak_heuristics_tokens
),
6133 MC_(clo_leak_check_heuristics
)) {}
6134 else if (VG_BOOL_CLOM(cloPD
, arg
, "--show-reachable", tmp_show
)) {
6136 MC_(clo_show_leak_kinds
) = MC_(all_Reachedness
)();
6138 MC_(clo_show_leak_kinds
) &= ~R2S(Reachable
);
6141 else if VG_BOOL_CLOM(cloPD
, arg
, "--show-possibly-lost", tmp_show
) {
6143 MC_(clo_show_leak_kinds
) |= R2S(Possible
);
6145 MC_(clo_show_leak_kinds
) &= ~R2S(Possible
);
6148 else if VG_BOOL_CLO(arg
, "--workaround-gcc296-bugs",
6149 MC_(clo_workaround_gcc296_bugs
)) {}
6151 else if VG_BINT_CLOM(cloPD
, arg
, "--freelist-vol", MC_(clo_freelist_vol
),
6152 0, 10*1000*1000*1000LL) {}
6154 else if VG_BINT_CLOM(cloPD
, arg
, "--freelist-big-blocks",
6155 MC_(clo_freelist_big_blocks
),
6156 0, 10*1000*1000*1000LL) {}
6158 else if VG_XACT_CLOM(cloPD
, arg
, "--leak-check=no",
6159 MC_(clo_leak_check
), LC_Off
) {}
6160 else if VG_XACT_CLOM(cloPD
, arg
, "--leak-check=summary",
6161 MC_(clo_leak_check
), LC_Summary
) {}
6162 else if VG_XACT_CLOM(cloPD
, arg
, "--leak-check=yes",
6163 MC_(clo_leak_check
), LC_Full
) {}
6164 else if VG_XACT_CLOM(cloPD
, arg
, "--leak-check=full",
6165 MC_(clo_leak_check
), LC_Full
) {}
6167 else if VG_XACT_CLO(arg
, "--leak-resolution=low",
6168 MC_(clo_leak_resolution
), Vg_LowRes
) {}
6169 else if VG_XACT_CLO(arg
, "--leak-resolution=med",
6170 MC_(clo_leak_resolution
), Vg_MedRes
) {}
6171 else if VG_XACT_CLO(arg
, "--leak-resolution=high",
6172 MC_(clo_leak_resolution
), Vg_HighRes
) {}
6174 else if VG_STR_CLOM(cloPD
, arg
, "--ignore-ranges", tmp_str
) {
6175 Bool ok
= parse_ignore_ranges(tmp_str
);
6177 VG_(message
)(Vg_DebugMsg
,
6178 "ERROR: --ignore-ranges: "
6179 "invalid syntax, or end <= start in range\n");
6182 if (gIgnoredAddressRanges
) {
6184 for (i
= 0; i
< VG_(sizeRangeMap
)(gIgnoredAddressRanges
); i
++) {
6185 UWord val
= IAR_INVALID
;
6186 UWord key_min
= ~(UWord
)0;
6187 UWord key_max
= (UWord
)0;
6188 VG_(indexRangeMap
)( &key_min
, &key_max
, &val
,
6189 gIgnoredAddressRanges
, i
);
6190 tl_assert(key_min
<= key_max
);
6191 UWord limit
= 0x4000000; /* 64M - entirely arbitrary limit */
6192 if (key_max
- key_min
> limit
&& val
== IAR_CommandLine
) {
6193 VG_(message
)(Vg_DebugMsg
,
6194 "ERROR: --ignore-ranges: suspiciously large range:\n");
6195 VG_(message
)(Vg_DebugMsg
,
6196 " 0x%lx-0x%lx (size %lu)\n", key_min
, key_max
,
6197 key_max
- key_min
+ 1);
6204 else if VG_STR_CLOM(cloPD
, arg
, "--ignore-range-below-sp", tmp_str
) {
6205 /* This seems at first a bit weird, but: in order to imply
6206 a non-wrapped-around address range, the first offset needs to be
6207 larger than the second one. For example
6208 --ignore-range-below-sp=8192,8189
6209 would cause accesses to in the range [SP-8192, SP-8189] to be
6211 UInt offs1
= 0, offs2
= 0;
6212 Bool ok
= parse_UInt_pair(&tmp_str
, &offs1
, &offs2
);
6213 // Ensure we used all the text after the '=' sign.
6214 if (ok
&& *tmp_str
!= 0) ok
= False
;
6216 VG_(message
)(Vg_DebugMsg
,
6217 "ERROR: --ignore-range-below-sp: invalid syntax. "
6218 " Expected \"...=decimalnumber-decimalnumber\".\n");
6221 if (offs1
> 1000*1000 /*arbitrary*/ || offs2
> 1000*1000 /*ditto*/) {
6222 VG_(message
)(Vg_DebugMsg
,
6223 "ERROR: --ignore-range-below-sp: suspiciously large "
6224 "offset(s): %u and %u\n", offs1
, offs2
);
6227 if (offs1
<= offs2
) {
6228 VG_(message
)(Vg_DebugMsg
,
6229 "ERROR: --ignore-range-below-sp: invalid offsets "
6230 "(the first must be larger): %u and %u\n", offs1
, offs2
);
6233 tl_assert(offs1
> offs2
);
6234 if (offs1
- offs2
> 4096 /*arbitrary*/) {
6235 VG_(message
)(Vg_DebugMsg
,
6236 "ERROR: --ignore-range-below-sp: suspiciously large "
6237 "range: %u-%u (size %u)\n", offs1
, offs2
, offs1
- offs2
);
6240 MC_(clo_ignore_range_below_sp
) = True
;
6241 MC_(clo_ignore_range_below_sp__first_offset
) = offs1
;
6242 MC_(clo_ignore_range_below_sp__last_offset
) = offs2
;
6246 else if VG_BHEX_CLO(arg
, "--malloc-fill", MC_(clo_malloc_fill
), 0x00,0xFF) {}
6247 else if VG_BHEX_CLO(arg
, "--free-fill", MC_(clo_free_fill
), 0x00,0xFF) {}
6249 else if VG_XACT_CLO(arg
, "--keep-stacktraces=alloc",
6250 MC_(clo_keep_stacktraces
), KS_alloc
) {}
6251 else if VG_XACT_CLO(arg
, "--keep-stacktraces=free",
6252 MC_(clo_keep_stacktraces
), KS_free
) {}
6253 else if VG_XACT_CLO(arg
, "--keep-stacktraces=alloc-and-free",
6254 MC_(clo_keep_stacktraces
), KS_alloc_and_free
) {}
6255 else if VG_XACT_CLO(arg
, "--keep-stacktraces=alloc-then-free",
6256 MC_(clo_keep_stacktraces
), KS_alloc_then_free
) {}
6257 else if VG_XACT_CLO(arg
, "--keep-stacktraces=none",
6258 MC_(clo_keep_stacktraces
), KS_none
) {}
6260 else if VG_BOOL_CLOM(cloPD
, arg
, "--show-mismatched-frees",
6261 MC_(clo_show_mismatched_frees
)) {}
6262 else if VG_BOOL_CLOM(cloPD
, arg
, "--show-realloc-size-zero",
6263 MC_(clo_show_realloc_size_zero
)) {}
6265 else if VG_XACT_CLO(arg
, "--expensive-definedness-checks=no",
6266 MC_(clo_expensive_definedness_checks
), EdcNO
) {}
6267 else if VG_XACT_CLO(arg
, "--expensive-definedness-checks=auto",
6268 MC_(clo_expensive_definedness_checks
), EdcAUTO
) {}
6269 else if VG_XACT_CLO(arg
, "--expensive-definedness-checks=yes",
6270 MC_(clo_expensive_definedness_checks
), EdcYES
) {}
6272 else if VG_BOOL_CLO(arg
, "--xtree-leak",
6273 MC_(clo_xtree_leak
)) {}
6274 else if VG_STR_CLO (arg
, "--xtree-leak-file",
6275 MC_(clo_xtree_leak_file
)) {}
6278 return VG_(replacement_malloc_process_cmd_line_option
)(arg
);
6284 VG_(fmsg_bad_option
)(arg
,
6285 "--track-origins=yes has no effect when --undef-value-errors=no.\n");
6289 static void mc_print_usage(void)
6292 " --leak-check=no|summary|full search for memory leaks at exit? [summary]\n"
6293 " --leak-resolution=low|med|high differentiation of leak stack traces [high]\n"
6294 " --show-leak-kinds=kind1,kind2,.. which leak kinds to show?\n"
6295 " [definite,possible]\n"
6296 " --errors-for-leak-kinds=kind1,kind2,.. which leak kinds are errors?\n"
6297 " [definite,possible]\n"
6298 " where kind is one of:\n"
6299 " definite indirect possible reachable all none\n"
6300 " --leak-check-heuristics=heur1,heur2,... which heuristics to use for\n"
6301 " improving leak search false positive [all]\n"
6302 " where heur is one of:\n"
6303 " stdstring length64 newarray multipleinheritance all none\n"
6304 " --show-reachable=yes same as --show-leak-kinds=all\n"
6305 " --show-reachable=no --show-possibly-lost=yes\n"
6306 " same as --show-leak-kinds=definite,possible\n"
6307 " --show-reachable=no --show-possibly-lost=no\n"
6308 " same as --show-leak-kinds=definite\n"
6309 " --xtree-leak=no|yes output leak result in xtree format? [no]\n"
6310 " --xtree-leak-file=<file> xtree leak report file [xtleak.kcg.%%p]\n"
6311 " --undef-value-errors=no|yes check for undefined value errors [yes]\n"
6312 " --track-origins=no|yes show origins of undefined values? [no]\n"
6313 " --partial-loads-ok=no|yes too hard to explain here; see manual [yes]\n"
6314 " --expensive-definedness-checks=no|auto|yes\n"
6315 " Use extra-precise definedness tracking [auto]\n"
6316 " --freelist-vol=<number> volume of freed blocks queue [20000000]\n"
6317 " --freelist-big-blocks=<number> releases first blocks with size>= [1000000]\n"
6318 " --workaround-gcc296-bugs=no|yes self explanatory [no]. Deprecated.\n"
6319 " Use --ignore-range-below-sp instead.\n"
6320 " --ignore-ranges=0xPP-0xQQ[,0xRR-0xSS] assume given addresses are OK\n"
6321 " --ignore-range-below-sp=<number>-<number> do not report errors for\n"
6322 " accesses at the given offsets below SP\n"
6323 " --malloc-fill=<hexnumber> fill malloc'd areas with given value\n"
6324 " --free-fill=<hexnumber> fill free'd areas with given value\n"
6325 " --keep-stacktraces=alloc|free|alloc-and-free|alloc-then-free|none\n"
6326 " stack trace(s) to keep for malloc'd/free'd areas [alloc-and-free]\n"
6327 " --show-mismatched-frees=no|yes show frees that don't match the allocator? [yes]\n"
6328 " --show-realloc-size-zero=no|yes show realocs with a size of zero? [yes]\n"
6332 static void mc_print_debug_usage(void)
6340 /*------------------------------------------------------------*/
6341 /*--- Client blocks ---*/
6342 /*------------------------------------------------------------*/
6344 /* Client block management:
6346 This is managed as an expanding array of client block descriptors.
6347 Indices of live descriptors are issued to the client, so it can ask
6348 to free them later. Therefore we cannot slide live entries down
6349 over dead ones. Instead we must use free/inuse flags and scan for
6350 an empty slot at allocation time. This in turn means allocation is
6351 relatively expensive, so we hope this does not happen too often.
6353 An unused block has start == size == 0
6356 /* type CGenBlock is defined in mc_include.h */
6358 /* This subsystem is self-initialising. */
6359 static UWord cgb_size
= 0;
6360 static UWord cgb_used
= 0;
6361 static CGenBlock
* cgbs
= NULL
;
6363 /* Stats for this subsystem. */
6364 static ULong cgb_used_MAX
= 0; /* Max in use. */
6365 static ULong cgb_allocs
= 0; /* Number of allocs. */
6366 static ULong cgb_discards
= 0; /* Number of discards. */
6367 static ULong cgb_search
= 0; /* Number of searches. */
6370 /* Get access to the client block array. */
6371 void MC_(get_ClientBlock_array
)( /*OUT*/CGenBlock
** blocks
,
6372 /*OUT*/UWord
* nBlocks
)
6375 *nBlocks
= cgb_used
;
6380 Int
alloc_client_block ( void )
6383 CGenBlock
* cgbs_new
;
6387 for (i
= 0; i
< cgb_used
; i
++) {
6389 if (cgbs
[i
].start
== 0 && cgbs
[i
].size
== 0)
6393 /* Not found. Try to allocate one at the end. */
6394 if (cgb_used
< cgb_size
) {
6399 /* Ok, we have to allocate a new one. */
6400 tl_assert(cgb_used
== cgb_size
);
6401 sz_new
= (cgbs
== NULL
) ? 10 : (2 * cgb_size
);
6403 cgbs_new
= VG_(malloc
)( "mc.acb.1", sz_new
* sizeof(CGenBlock
) );
6404 for (i
= 0; i
< cgb_used
; i
++)
6405 cgbs_new
[i
] = cgbs
[i
];
6413 if (cgb_used
> cgb_used_MAX
)
6414 cgb_used_MAX
= cgb_used
;
6419 static void show_client_block_stats ( void )
6421 VG_(message
)(Vg_DebugMsg
,
6422 "general CBs: %llu allocs, %llu discards, %llu maxinuse, %llu search\n",
6423 cgb_allocs
, cgb_discards
, cgb_used_MAX
, cgb_search
6426 static void print_monitor_help ( void )
6431 "memcheck monitor commands:\n"
6432 " xb <addr> [<len>]\n"
6433 " prints validity bits for <len> (or 1) bytes at <addr>\n"
6434 " bit values 0 = valid, 1 = invalid, __ = unaddressable byte\n"
6435 " Then prints the bytes values below the corresponding validity bits\n"
6436 " in a layout similar to the gdb command 'x /<len>xb <addr>'\n"
6437 " Example: xb 0x8049c78 10\n"
6438 " get_vbits <addr> [<len>]\n"
6439 " Similar to xb, but only prints the validity bytes by group of 4.\n"
6440 " make_memory [noaccess|undefined\n"
6441 " |defined|Definedifaddressable] <addr> [<len>]\n"
6442 " mark <len> (or 1) bytes at <addr> with the given accessibility\n"
6443 " check_memory [addressable|defined] <addr> [<len>]\n"
6444 " check that <len> (or 1) bytes at <addr> have the given accessibility\n"
6445 " and outputs a description of <addr>\n"
6446 " leak_check [full*|summary|xtleak]\n"
6447 " [kinds kind1,kind2,...|reachable|possibleleak*|definiteleak]\n"
6448 " [heuristics heur1,heur2,...]\n"
6449 " [new|increased*|changed|any]\n"
6450 " [unlimited*|limited <max_loss_records_output>]\n"
6452 " xtleak produces an xtree full leak result in xtleak.kcg.%%p.%%n\n"
6453 " where kind is one of:\n"
6454 " definite indirect possible reachable all none\n"
6455 " where heur is one of:\n"
6456 " stdstring length64 newarray multipleinheritance all none*\n"
6457 " Examples: leak_check\n"
6458 " leak_check summary any\n"
6459 " leak_check full kinds indirect,possible\n"
6460 " leak_check full reachable any limited 100\n"
6461 " block_list <loss_record_nr>|<loss_record_nr_from>..<loss_record_nr_to>\n"
6462 " [unlimited*|limited <max_blocks>]\n"
6463 " [heuristics heur1,heur2,...]\n"
6464 " after a leak search, shows the list of blocks of <loss_record_nr>\n"
6465 " (or of the range <loss_record_nr_from>..<loss_record_nr_to>).\n"
6466 " With heuristics, only shows the blocks found via heur1,heur2,...\n"
6468 " who_points_at <addr> [<len>]\n"
6469 " shows places pointing inside <len> (default 1) bytes at <addr>\n"
6470 " (with len 1, only shows \"start pointers\" pointing exactly to <addr>,\n"
6471 " with len > 1, will also show \"interior pointers\")\n"
6472 " xtmemory [<filename>]\n"
6473 " dump xtree memory profile in <filename> (default xtmemory.kcg.%%p.%%n)\n"
6477 /* Print szB bytes at address, with a format similar to the gdb command
6479 res[i] == 1 indicates the corresponding byte is addressable. */
6480 static void gdb_xb (Addr address
, SizeT szB
, Int res
[])
6484 for (i
= 0; i
< szB
; i
++) {
6488 VG_(printf
) ("\n"); // Terminate previous line
6489 VG_(printf
) ("%p:", (void*)(address
+i
));
6492 VG_(printf
) ("\t0x%02x", *(UChar
*)(address
+i
));
6494 VG_(printf
) ("\t0x??");
6496 VG_(printf
) ("\n"); // Terminate previous line
6500 /* Returns the address of the next non space character,
6501 or address of the string terminator. */
6502 static HChar
* next_non_space (HChar
*s
)
6504 while (*s
&& *s
== ' ')
6509 /* Parse an integer slice, i.e. a single integer or a range of integer.
6511 <integer>[..<integer> ]
6512 (spaces are allowed before and/or after ..).
6513 Return True if range correctly parsed, False otherwise. */
6514 static Bool
VG_(parse_slice
) (HChar
* s
, HChar
** saveptr
,
6515 UInt
*from
, UInt
*to
)
6520 wl
= VG_(strtok_r
) (s
, " ", saveptr
);
6522 /* slice must start with an integer. */
6524 VG_(gdb_printf
) ("expecting integer or slice <from>..<to>\n");
6527 *from
= VG_(strtoull10
) (wl
, &endptr
);
6529 VG_(gdb_printf
) ("invalid integer or slice <from>..<to>\n");
6533 if (*endptr
== '\0' && *next_non_space(*saveptr
) != '.') {
6534 /* wl token is an integer terminating the string
6535 or else next token does not start with .
6536 In both cases, the slice is a single integer. */
6541 if (*endptr
== '\0') {
6542 // iii .. => get the next token
6543 wl
= VG_(strtok_r
) (NULL
, " .", saveptr
);
6546 if (*endptr
!= '.' && *(endptr
+1) != '.') {
6547 VG_(gdb_printf
) ("expecting slice <from>..<to>\n");
6550 if ( *(endptr
+2) == ' ') {
6551 // It must be iii.. jjj => get the next token
6552 wl
= VG_(strtok_r
) (NULL
, " .", saveptr
);
6554 // It must be iii..jjj
6559 *to
= VG_(strtoull10
) (wl
, &endptr
);
6560 if (*endptr
!= '\0') {
6561 VG_(gdb_printf
) ("missing/wrong 'to' of slice <from>..<to>\n");
6566 VG_(gdb_printf
) ("<from> cannot be bigger than <to> "
6567 "in slice <from>..<to>\n");
6574 /* return True if request recognised, False otherwise */
6575 static Bool
handle_gdb_monitor_command (ThreadId tid
, HChar
*req
)
6578 HChar s
[VG_(strlen
)(req
) + 1]; /* copy for strtok_r */
6581 VG_(strcpy
) (s
, req
);
6583 wcmd
= VG_(strtok_r
) (s
, " ", &ssaveptr
);
6584 /* NB: if possible, avoid introducing a new command below which
6585 starts with the same first letter(s) as an already existing
6586 command. This ensures a shorter abbreviation for the user. */
6587 switch (VG_(keyword_id
)
6588 ("help get_vbits leak_check make_memory check_memory "
6589 "block_list who_points_at xb xtmemory",
6590 wcmd
, kwd_report_duplicated_matches
)) {
6591 case -2: /* multiple matches */
6593 case -1: /* not found */
6596 print_monitor_help();
6598 case 1: { /* get_vbits */
6601 if (VG_(strtok_get_address_and_size
) (&address
, &szB
, &ssaveptr
)) {
6604 Int unaddressable
= 0;
6605 for (i
= 0; i
< szB
; i
++) {
6606 Int res
= mc_get_or_set_vbits_for_client
6607 (address
+i
, (Addr
) &vbits
, 1,
6608 False
, /* get them */
6609 False
/* is client request */ );
6610 /* we are before the first character on next line, print a \n. */
6611 if ((i
% 32) == 0 && i
!= 0)
6613 /* we are before the next block of 4 starts, print a space. */
6614 else if ((i
% 4) == 0 && i
!= 0)
6617 VG_(printf
) ("%02x", vbits
);
6619 tl_assert(3 == res
);
6625 if (unaddressable
) {
6627 ("Address %p len %lu has %d bytes unaddressable\n",
6628 (void *)address
, szB
, unaddressable
);
6633 case 2: { /* leak_check */
6635 LeakCheckParams lcp
;
6636 HChar
* xt_filename
= NULL
;
6640 lcp
.show_leak_kinds
= R2S(Possible
) | R2S(Unreached
);
6641 lcp
.errors_for_leak_kinds
= 0; // no errors for interactive leak search.
6643 lcp
.deltamode
= LCD_Increased
;
6644 lcp
.max_loss_records_output
= 999999999;
6645 lcp
.requested_by_monitor_command
= True
;
6646 lcp
.xt_filename
= NULL
;
6648 for (kw
= VG_(strtok_r
) (NULL
, " ", &ssaveptr
);
6650 kw
= VG_(strtok_r
) (NULL
, " ", &ssaveptr
)) {
6651 switch (VG_(keyword_id
)
6652 ("full summary xtleak "
6653 "kinds reachable possibleleak definiteleak "
6655 "new increased changed any "
6656 "unlimited limited ",
6657 kw
, kwd_report_all
)) {
6658 case -2: err
++; break;
6659 case -1: err
++; break;
6661 lcp
.mode
= LC_Full
; break;
6662 case 1: /* summary */
6663 lcp
.mode
= LC_Summary
; break;
6664 case 2: /* xtleak */
6667 = VG_(expand_file_name
)("--xtleak-mc_main.c",
6668 "xtleak.kcg.%p.%n");
6669 lcp
.xt_filename
= xt_filename
;
6671 case 3: { /* kinds */
6672 wcmd
= VG_(strtok_r
) (NULL
, " ", &ssaveptr
);
6674 || !VG_(parse_enum_set
)(MC_(parse_leak_kinds_tokens
),
6677 &lcp
.show_leak_kinds
)) {
6678 VG_(gdb_printf
) ("missing or malformed leak kinds set\n");
6683 case 4: /* reachable */
6684 lcp
.show_leak_kinds
= MC_(all_Reachedness
)();
6686 case 5: /* possibleleak */
6688 = R2S(Possible
) | R2S(IndirectLeak
) | R2S(Unreached
);
6690 case 6: /* definiteleak */
6691 lcp
.show_leak_kinds
= R2S(Unreached
);
6693 case 7: { /* heuristics */
6694 wcmd
= VG_(strtok_r
) (NULL
, " ", &ssaveptr
);
6696 || !VG_(parse_enum_set
)(MC_(parse_leak_heuristics_tokens
),
6700 VG_(gdb_printf
) ("missing or malformed heuristics set\n");
6706 lcp
.deltamode
= LCD_New
; break;
6707 case 9: /* increased */
6708 lcp
.deltamode
= LCD_Increased
; break;
6709 case 10: /* changed */
6710 lcp
.deltamode
= LCD_Changed
; break;
6712 lcp
.deltamode
= LCD_Any
; break;
6713 case 12: /* unlimited */
6714 lcp
.max_loss_records_output
= 999999999; break;
6715 case 13: { /* limited */
6717 const HChar
* endptr
;
6719 wcmd
= VG_(strtok_r
) (NULL
, " ", &ssaveptr
);
6722 endptr
= "empty"; /* to report an error below */
6725 int_value
= VG_(strtoll10
) (wcmd
, &the_end
);
6728 if (*endptr
!= '\0')
6729 VG_(gdb_printf
) ("missing or malformed integer value\n");
6730 else if (int_value
> 0)
6731 lcp
.max_loss_records_output
= (UInt
) int_value
;
6733 VG_(gdb_printf
) ("max_loss_records_output must be >= 1,"
6734 " got %d\n", int_value
);
6742 MC_(detect_memory_leaks
)(tid
, &lcp
);
6743 if (xt_filename
!= NULL
)
6744 VG_(free
)(xt_filename
);
6748 case 3: { /* make_memory */
6751 Int kwdid
= VG_(keyword_id
)
6752 ("noaccess undefined defined Definedifaddressable",
6753 VG_(strtok_r
) (NULL
, " ", &ssaveptr
), kwd_report_all
);
6754 if (!VG_(strtok_get_address_and_size
) (&address
, &szB
, &ssaveptr
))
6759 case 0: MC_(make_mem_noaccess
) (address
, szB
); break;
6760 case 1: make_mem_undefined_w_tid_and_okind ( address
, szB
, tid
,
6761 MC_OKIND_USER
); break;
6762 case 2: MC_(make_mem_defined
) ( address
, szB
); break;
6763 case 3: make_mem_defined_if_addressable ( address
, szB
); break;;
6764 default: tl_assert(0);
6769 case 4: { /* check_memory */
6777 ExeContext
* origin_ec
;
6780 Int kwdid
= VG_(keyword_id
)
6781 ("addressable defined",
6782 VG_(strtok_r
) (NULL
, " ", &ssaveptr
), kwd_report_all
);
6783 if (!VG_(strtok_get_address_and_size
) (&address
, &szB
, &ssaveptr
))
6788 case 0: /* addressable */
6789 if (is_mem_addressable ( address
, szB
, &bad_addr
))
6790 VG_(printf
) ("Address %p len %lu addressable\n",
6791 (void *)address
, szB
);
6794 ("Address %p len %lu not addressable:\nbad address %p\n",
6795 (void *)address
, szB
, (void *) bad_addr
);
6796 // Describe this (probably live) address with current epoch
6797 MC_(pp_describe_addr
) (VG_(current_DiEpoch
)(), address
);
6799 case 1: /* defined */
6800 res
= is_mem_defined ( address
, szB
, &bad_addr
, &otag
);
6801 if (MC_AddrErr
== res
)
6803 ("Address %p len %lu not addressable:\nbad address %p\n",
6804 (void *)address
, szB
, (void *) bad_addr
);
6805 else if (MC_ValueErr
== res
) {
6808 case MC_OKIND_STACK
:
6809 src
= " was created by a stack allocation"; break;
6811 src
= " was created by a heap allocation"; break;
6813 src
= " was created by a client request"; break;
6814 case MC_OKIND_UNKNOWN
:
6816 default: tl_assert(0);
6819 ("Address %p len %lu not defined:\n"
6820 "Uninitialised value at %p%s\n",
6821 (void *)address
, szB
, (void *) bad_addr
, src
);
6823 if (VG_(is_plausible_ECU
)(ecu
)) {
6824 origin_ec
= VG_(get_ExeContext_from_ECU
)( ecu
);
6825 VG_(pp_ExeContext
)( origin_ec
);
6829 VG_(printf
) ("Address %p len %lu defined\n",
6830 (void *)address
, szB
);
6831 // Describe this (probably live) address with current epoch
6832 MC_(pp_describe_addr
) (VG_(current_DiEpoch
)(), address
);
6834 default: tl_assert(0);
6839 case 5: { /* block_list */
6842 UInt lr_nr_from
= 0;
6845 if (VG_(parse_slice
) (NULL
, &ssaveptr
, &lr_nr_from
, &lr_nr_to
)) {
6846 UInt limit_blocks
= 999999999;
6848 UInt heuristics
= 0;
6850 for (wl
= VG_(strtok_r
) (NULL
, " ", &ssaveptr
);
6852 wl
= VG_(strtok_r
) (NULL
, " ", &ssaveptr
)) {
6853 switch (VG_(keyword_id
) ("unlimited limited heuristics ",
6854 wl
, kwd_report_all
)) {
6855 case -2: return True
;
6856 case -1: return True
;
6857 case 0: /* unlimited */
6858 limit_blocks
= 999999999; break;
6859 case 1: /* limited */
6860 wcmd
= VG_(strtok_r
) (NULL
, " ", &ssaveptr
);
6862 VG_(gdb_printf
) ("missing integer value\n");
6865 int_value
= VG_(strtoll10
) (wcmd
, &the_end
);
6866 if (*the_end
!= '\0') {
6867 VG_(gdb_printf
) ("malformed integer value\n");
6870 if (int_value
<= 0) {
6871 VG_(gdb_printf
) ("max_blocks must be >= 1,"
6872 " got %d\n", int_value
);
6875 limit_blocks
= (UInt
) int_value
;
6877 case 2: /* heuristics */
6878 wcmd
= VG_(strtok_r
) (NULL
, " ", &ssaveptr
);
6880 || !VG_(parse_enum_set
)(MC_(parse_leak_heuristics_tokens
),
6884 VG_(gdb_printf
) ("missing or malformed heuristics set\n");
6892 /* substract 1 from lr_nr_from/lr_nr_to as what is shown to the user
6893 is 1 more than the index in lr_array. */
6894 if (lr_nr_from
== 0 || ! MC_(print_block_list
) (lr_nr_from
-1,
6898 VG_(gdb_printf
) ("invalid loss record nr\n");
6903 case 6: { /* who_points_at */
6907 if (!VG_(strtok_get_address_and_size
) (&address
, &szB
, &ssaveptr
))
6909 if (address
== (Addr
) 0) {
6910 VG_(gdb_printf
) ("Cannot search who points at 0x0\n");
6913 MC_(who_points_at
) (address
, szB
);
6920 if (VG_(strtok_get_address_and_size
) (&address
, &szB
, &ssaveptr
)) {
6924 Int unaddressable
= 0;
6925 for (i
= 0; i
< szB
; i
++) {
6927 /* We going to print the first vabits of a new line.
6928 Terminate the previous line if needed: prints a line with the
6929 address and the data. */
6933 gdb_xb (address
+ i
- 8, 8, res
);
6935 VG_(printf
) ("\t"); // To align VABITS with gdb_xb layout
6937 res
[bnr
] = mc_get_or_set_vbits_for_client
6938 (address
+i
, (Addr
) &vbits
[bnr
], 1,
6939 False
, /* get them */
6940 False
/* is client request */ );
6941 if (res
[bnr
] == 1) {
6942 VG_(printf
) ("\t %02x", vbits
[bnr
]);
6944 tl_assert(3 == res
[bnr
]);
6946 VG_(printf
) ("\t __");
6950 if (szB
% 8 == 0 && szB
> 0)
6951 gdb_xb (address
+ szB
- 8, 8, res
);
6953 gdb_xb (address
+ szB
- szB
% 8, szB
% 8, res
);
6954 if (unaddressable
) {
6956 ("Address %p len %lu has %d bytes unaddressable\n",
6957 (void *)address
, szB
, unaddressable
);
6963 case 8: { /* xtmemory */
6965 filename
= VG_(strtok_r
) (NULL
, " ", &ssaveptr
);
6966 MC_(xtmemory_report
)(filename
, False
);
6976 /*------------------------------------------------------------*/
6977 /*--- Client requests ---*/
6978 /*------------------------------------------------------------*/
6980 static Bool
mc_handle_client_request ( ThreadId tid
, UWord
* arg
, UWord
* ret
)
6984 MC_Chunk
* mc
= NULL
;
6986 if (!VG_IS_TOOL_USERREQ('M','C',arg
[0])
6987 && VG_USERREQ__MALLOCLIKE_BLOCK
!= arg
[0]
6988 && VG_USERREQ__RESIZEINPLACE_BLOCK
!= arg
[0]
6989 && VG_USERREQ__FREELIKE_BLOCK
!= arg
[0]
6990 && VG_USERREQ__CREATE_MEMPOOL
!= arg
[0]
6991 && VG_USERREQ__DESTROY_MEMPOOL
!= arg
[0]
6992 && VG_USERREQ__MEMPOOL_ALLOC
!= arg
[0]
6993 && VG_USERREQ__MEMPOOL_FREE
!= arg
[0]
6994 && VG_USERREQ__MEMPOOL_TRIM
!= arg
[0]
6995 && VG_USERREQ__MOVE_MEMPOOL
!= arg
[0]
6996 && VG_USERREQ__MEMPOOL_CHANGE
!= arg
[0]
6997 && VG_USERREQ__MEMPOOL_EXISTS
!= arg
[0]
6998 && VG_USERREQ__GDB_MONITOR_COMMAND
!= arg
[0]
6999 && VG_USERREQ__ENABLE_ADDR_ERROR_REPORTING_IN_RANGE
!= arg
[0]
7000 && VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE
!= arg
[0])
7004 case VG_USERREQ__CHECK_MEM_IS_ADDRESSABLE
: {
7005 Bool ok
= is_mem_addressable ( arg
[1], arg
[2], &bad_addr
);
7007 MC_(record_user_error
) ( tid
, bad_addr
, /*isAddrErr*/True
, 0 );
7008 *ret
= ok
? (UWord
)NULL
: bad_addr
;
7012 case VG_USERREQ__CHECK_MEM_IS_DEFINED
: {
7013 Bool errorV
= False
;
7016 Bool errorA
= False
;
7018 is_mem_defined_comprehensive(
7020 &errorV
, &bad_addrV
, &otagV
, &errorA
, &bad_addrA
7023 MC_(record_user_error
) ( tid
, bad_addrV
,
7024 /*isAddrErr*/False
, otagV
);
7027 MC_(record_user_error
) ( tid
, bad_addrA
,
7028 /*isAddrErr*/True
, 0 );
7030 /* Return the lower of the two erring addresses, if any. */
7032 if (errorV
&& !errorA
) {
7035 if (!errorV
&& errorA
) {
7038 if (errorV
&& errorA
) {
7039 *ret
= bad_addrV
< bad_addrA
? bad_addrV
: bad_addrA
;
7044 case VG_USERREQ__DO_LEAK_CHECK
: {
7045 LeakCheckParams lcp
;
7049 else if (arg
[1] == 1)
7050 lcp
.mode
= LC_Summary
;
7052 VG_(message
)(Vg_UserMsg
,
7053 "Warning: unknown memcheck leak search mode\n");
7057 lcp
.show_leak_kinds
= MC_(clo_show_leak_kinds
);
7058 lcp
.errors_for_leak_kinds
= MC_(clo_error_for_leak_kinds
);
7059 lcp
.heuristics
= MC_(clo_leak_check_heuristics
);
7062 lcp
.deltamode
= LCD_Any
;
7063 else if (arg
[2] == 1)
7064 lcp
.deltamode
= LCD_Increased
;
7065 else if (arg
[2] == 2)
7066 lcp
.deltamode
= LCD_Changed
;
7067 else if (arg
[2] == 3)
7068 lcp
.deltamode
= LCD_New
;
7072 "Warning: unknown memcheck leak search deltamode\n");
7073 lcp
.deltamode
= LCD_Any
;
7075 lcp
.max_loss_records_output
= 999999999;
7076 lcp
.requested_by_monitor_command
= False
;
7077 lcp
.xt_filename
= NULL
;
7079 MC_(detect_memory_leaks
)(tid
, &lcp
);
7080 *ret
= 0; /* return value is meaningless */
7084 case VG_USERREQ__MAKE_MEM_NOACCESS
:
7085 MC_(make_mem_noaccess
) ( arg
[1], arg
[2] );
7089 case VG_USERREQ__MAKE_MEM_UNDEFINED
:
7090 make_mem_undefined_w_tid_and_okind ( arg
[1], arg
[2], tid
,
7095 case VG_USERREQ__MAKE_MEM_DEFINED
:
7096 MC_(make_mem_defined
) ( arg
[1], arg
[2] );
7100 case VG_USERREQ__MAKE_MEM_DEFINED_IF_ADDRESSABLE
:
7101 make_mem_defined_if_addressable ( arg
[1], arg
[2] );
7105 case VG_USERREQ__CREATE_BLOCK
: /* describe a block */
7106 if (arg
[1] != 0 && arg
[2] != 0) {
7107 i
= alloc_client_block();
7108 /* VG_(printf)("allocated %d %p\n", i, cgbs); */
7109 cgbs
[i
].start
= arg
[1];
7110 cgbs
[i
].size
= arg
[2];
7111 cgbs
[i
].desc
= VG_(strdup
)("mc.mhcr.1", (HChar
*)arg
[3]);
7112 cgbs
[i
].where
= VG_(record_ExeContext
) ( tid
, 0/*first_ip_delta*/ );
7118 case VG_USERREQ__DISCARD
: /* discard */
7120 || arg
[2] >= cgb_used
||
7121 (cgbs
[arg
[2]].start
== 0 && cgbs
[arg
[2]].size
== 0)) {
7124 tl_assert(arg
[2] >= 0 && arg
[2] < cgb_used
);
7125 cgbs
[arg
[2]].start
= cgbs
[arg
[2]].size
= 0;
7126 VG_(free
)(cgbs
[arg
[2]].desc
);
7132 case VG_USERREQ__GET_VBITS
:
7133 *ret
= mc_get_or_set_vbits_for_client
7134 ( arg
[1], arg
[2], arg
[3],
7135 False
/* get them */,
7136 True
/* is client request */ );
7139 case VG_USERREQ__SET_VBITS
:
7140 *ret
= mc_get_or_set_vbits_for_client
7141 ( arg
[1], arg
[2], arg
[3],
7142 True
/* set them */,
7143 True
/* is client request */ );
7146 case VG_USERREQ__COUNT_LEAKS
: { /* count leaked bytes */
7147 UWord
** argp
= (UWord
**)arg
;
7148 // MC_(bytes_leaked) et al were set by the last leak check (or zero
7149 // if no prior leak checks performed).
7150 *argp
[1] = MC_(bytes_leaked
) + MC_(bytes_indirect
);
7151 *argp
[2] = MC_(bytes_dubious
);
7152 *argp
[3] = MC_(bytes_reachable
);
7153 *argp
[4] = MC_(bytes_suppressed
);
7154 // there is no argp[5]
7155 //*argp[5] = MC_(bytes_indirect);
7156 // XXX need to make *argp[1-4] defined; currently done in the
7157 // VALGRIND_COUNT_LEAKS_MACRO by initialising them to zero.
7161 case VG_USERREQ__COUNT_LEAK_BLOCKS
: { /* count leaked blocks */
7162 UWord
** argp
= (UWord
**)arg
;
7163 // MC_(blocks_leaked) et al were set by the last leak check (or zero
7164 // if no prior leak checks performed).
7165 *argp
[1] = MC_(blocks_leaked
) + MC_(blocks_indirect
);
7166 *argp
[2] = MC_(blocks_dubious
);
7167 *argp
[3] = MC_(blocks_reachable
);
7168 *argp
[4] = MC_(blocks_suppressed
);
7169 // there is no argp[5]
7170 //*argp[5] = MC_(blocks_indirect);
7171 // XXX need to make *argp[1-4] defined; currently done in the
7172 // VALGRIND_COUNT_LEAK_BLOCKS_MACRO by initialising them to zero.
7176 case VG_USERREQ__MALLOCLIKE_BLOCK
: {
7177 Addr p
= (Addr
)arg
[1];
7178 SizeT sizeB
= arg
[2];
7180 Bool is_zeroed
= (Bool
)arg
[4];
7182 MC_(new_block
) ( tid
, p
, sizeB
, /*ignored*/0U, 0U, is_zeroed
,
7183 MC_AllocCustom
, MC_(malloc_list
) );
7185 MC_(make_mem_noaccess
) ( p
- rzB
, rzB
);
7186 MC_(make_mem_noaccess
) ( p
+ sizeB
, rzB
);
7190 case VG_USERREQ__RESIZEINPLACE_BLOCK
: {
7191 Addr p
= (Addr
)arg
[1];
7192 SizeT oldSizeB
= arg
[2];
7193 SizeT newSizeB
= arg
[3];
7196 MC_(handle_resizeInPlace
) ( tid
, p
, oldSizeB
, newSizeB
, rzB
);
7199 case VG_USERREQ__FREELIKE_BLOCK
: {
7200 Addr p
= (Addr
)arg
[1];
7203 MC_(handle_free
) ( tid
, p
, rzB
, MC_AllocCustom
);
7207 case _VG_USERREQ__MEMCHECK_RECORD_OVERLAP_ERROR
: {
7208 HChar
* s
= (HChar
*)arg
[1];
7209 Addr dst
= (Addr
) arg
[2];
7210 Addr src
= (Addr
) arg
[3];
7211 SizeT len
= (SizeT
)arg
[4];
7212 MC_(record_overlap_error
)(tid
, s
, src
, dst
, len
);
7216 case _VG_USERREQ__MEMCHECK_VERIFY_ALIGNMENT
: {
7217 struct AlignedAllocInfo
*aligned_alloc_info
= (struct AlignedAllocInfo
*)arg
[1];
7218 tl_assert(aligned_alloc_info
);
7220 switch (aligned_alloc_info
->alloc_kind
) {
7221 case AllocKindMemalign
:
7222 // other platforms just ensure it is a power of 2
7223 // ignore Illumos only enforcing multiple of 4 (probably a bug)
7224 if (aligned_alloc_info
->orig_alignment
== 0U ||
7225 (aligned_alloc_info
->orig_alignment
& (aligned_alloc_info
->orig_alignment
- 1)) != 0) {
7226 MC_(record_bad_alignment
) ( tid
, aligned_alloc_info
->orig_alignment
, 0U, " (should be power of 2)" );
7228 // size zero not allowed on all platforms (e.g. Illumos)
7229 if (aligned_alloc_info
->size
== 0) {
7230 MC_(record_bad_size
) ( tid
, aligned_alloc_info
->size
, "memalign()" );
7233 case AllocKindPosixMemalign
:
7234 // must be power of 2
7235 // alignment at least sizeof(size_t)
7236 // size of 0 implementation defined
7237 if (aligned_alloc_info
->orig_alignment
< sizeof(SizeT
) ||
7238 (aligned_alloc_info
->orig_alignment
& (aligned_alloc_info
->orig_alignment
- 1)) != 0) {
7239 MC_(record_bad_alignment
) ( tid
, aligned_alloc_info
->orig_alignment
, 0U, " (should be non-zero, a power of 2 and a multiple of sizeof(void*))" );
7241 if (aligned_alloc_info
->size
== 0) {
7242 MC_(record_bad_size
) ( tid
, aligned_alloc_info
->size
, "posix_memalign()" );
7245 case AllocKindAlignedAlloc
:
7246 // must be power of 2
7247 if ((aligned_alloc_info
->orig_alignment
& (aligned_alloc_info
->orig_alignment
- 1)) != 0) {
7248 MC_(record_bad_alignment
) ( tid
, aligned_alloc_info
->orig_alignment
, 0U, " (should be a power of 2)" );
7250 // size should be integral multiple of alignment
7251 if (aligned_alloc_info
->orig_alignment
&&
7252 aligned_alloc_info
->size
% aligned_alloc_info
->orig_alignment
!= 0U) {
7253 MC_(record_bad_alignment
) ( tid
, aligned_alloc_info
->orig_alignment
, aligned_alloc_info
->size
, " (size should be a multiple of alignment)" );
7255 if (aligned_alloc_info
->size
== 0) {
7256 MC_(record_bad_size
) ( tid
, aligned_alloc_info
->size
, "aligned_alloc()" );
7259 case AllocKindDeleteSized
:
7260 mc
= VG_(HT_lookup
) ( MC_(malloc_list
), (UWord
)aligned_alloc_info
->mem
);
7261 if (mc
&& mc
->szB
!= aligned_alloc_info
->size
) {
7262 MC_(record_size_mismatch_error
) ( tid
, mc
, aligned_alloc_info
->size
, "new/delete" );
7265 case AllocKindVecDeleteSized
:
7266 mc
= VG_(HT_lookup
) ( MC_(malloc_list
), (UWord
)aligned_alloc_info
->mem
);
7267 if (mc
&& mc
->szB
!= aligned_alloc_info
->size
) {
7268 MC_(record_size_mismatch_error
) ( tid
, mc
, aligned_alloc_info
->size
, "new[][/delete[]" );
7271 case AllocKindNewAligned
:
7272 if (aligned_alloc_info
->orig_alignment
== 0 ||
7273 (aligned_alloc_info
->orig_alignment
& (aligned_alloc_info
->orig_alignment
- 1)) != 0) {
7274 MC_(record_bad_alignment
) ( tid
, aligned_alloc_info
->orig_alignment
, 0U, " (should be non-zero and a power of 2)" );
7277 case AllocKindVecNewAligned
:
7278 if (aligned_alloc_info
->orig_alignment
== 0 ||
7279 (aligned_alloc_info
->orig_alignment
& (aligned_alloc_info
->orig_alignment
- 1)) != 0) {
7280 MC_(record_bad_alignment
) ( tid
, aligned_alloc_info
->orig_alignment
, 0U, " (should be non-zero and a power of 2)" );
7283 case AllocKindDeleteAligned
:
7284 if (aligned_alloc_info
->orig_alignment
== 0 ||
7285 (aligned_alloc_info
->orig_alignment
& (aligned_alloc_info
->orig_alignment
- 1)) != 0) {
7286 MC_(record_bad_alignment
) ( tid
, aligned_alloc_info
->orig_alignment
, 0U, " (should be non-zero and a power of 2)" );
7288 mc
= VG_(HT_lookup
) ( MC_(malloc_list
), (UWord
)aligned_alloc_info
->mem
);
7289 if (mc
&& aligned_alloc_info
->orig_alignment
!= mc
->alignB
) {
7290 MC_(record_align_mismatch_error
) ( tid
, mc
, aligned_alloc_info
->orig_alignment
, "new/delete");
7293 case AllocKindVecDeleteAligned
:
7294 if (aligned_alloc_info
->orig_alignment
== 0 ||
7295 (aligned_alloc_info
->orig_alignment
& (aligned_alloc_info
->orig_alignment
- 1)) != 0) {
7296 MC_(record_bad_alignment
) ( tid
, aligned_alloc_info
->orig_alignment
, 0U, " (should be non-zero and a power of 2)" );
7298 mc
= VG_(HT_lookup
) ( MC_(malloc_list
), (UWord
)aligned_alloc_info
->mem
);
7299 if (mc
&& aligned_alloc_info
->orig_alignment
!= mc
->alignB
) {
7300 MC_(record_align_mismatch_error
) ( tid
, mc
, aligned_alloc_info
->orig_alignment
, "new[]/delete[]");
7303 case AllocKindDeleteSizedAligned
:
7304 mc
= VG_(HT_lookup
) ( MC_(malloc_list
), (UWord
)aligned_alloc_info
->mem
);
7305 if (mc
&& mc
->szB
!= aligned_alloc_info
->size
) {
7306 MC_(record_size_mismatch_error
) ( tid
, mc
, aligned_alloc_info
->size
, "new/delete");
7308 if (mc
&& aligned_alloc_info
->orig_alignment
!= mc
->alignB
) {
7309 MC_(record_align_mismatch_error
) ( tid
, mc
, aligned_alloc_info
->orig_alignment
, "new/delete");
7311 if (aligned_alloc_info
->orig_alignment
== 0 ||
7312 (aligned_alloc_info
->orig_alignment
& (aligned_alloc_info
->orig_alignment
- 1)) != 0) {
7313 MC_(record_bad_alignment
) ( tid
, aligned_alloc_info
->orig_alignment
, 0U, " (should be non-zero and a power of 2)" );
7316 case AllocKindVecDeleteSizedAligned
:
7317 mc
= VG_(HT_lookup
) ( MC_(malloc_list
), (UWord
)aligned_alloc_info
->mem
);
7318 if (mc
&& mc
->szB
!= aligned_alloc_info
->size
) {
7319 MC_(record_size_mismatch_error
) ( tid
, mc
, aligned_alloc_info
->size
, "new[]/delete[]" );
7321 if (mc
&& aligned_alloc_info
->orig_alignment
!= mc
->alignB
) {
7322 MC_(record_align_mismatch_error
) ( tid
, mc
, aligned_alloc_info
->orig_alignment
, "new[]/delete[]");
7324 if (aligned_alloc_info
->orig_alignment
== 0 ||
7325 (aligned_alloc_info
->orig_alignment
& (aligned_alloc_info
->orig_alignment
- 1)) != 0) {
7326 MC_(record_bad_alignment
) ( tid
, aligned_alloc_info
->orig_alignment
, 0U, " (should be non-zero and a power of 2)" );
7336 case VG_USERREQ__CREATE_MEMPOOL
: {
7337 Addr pool
= (Addr
)arg
[1];
7339 Bool is_zeroed
= (Bool
)arg
[3];
7340 UInt flags
= arg
[4];
7342 // The create_mempool function does not know these mempool flags,
7343 // pass as booleans.
7344 MC_(create_mempool
) ( pool
, rzB
, is_zeroed
,
7345 (flags
& VALGRIND_MEMPOOL_AUTO_FREE
),
7346 (flags
& VALGRIND_MEMPOOL_METAPOOL
) );
7350 case VG_USERREQ__DESTROY_MEMPOOL
: {
7351 Addr pool
= (Addr
)arg
[1];
7353 MC_(destroy_mempool
) ( pool
);
7357 case VG_USERREQ__MEMPOOL_ALLOC
: {
7358 Addr pool
= (Addr
)arg
[1];
7359 Addr addr
= (Addr
)arg
[2];
7362 MC_(mempool_alloc
) ( tid
, pool
, addr
, size
);
7366 case VG_USERREQ__MEMPOOL_FREE
: {
7367 Addr pool
= (Addr
)arg
[1];
7368 Addr addr
= (Addr
)arg
[2];
7370 MC_(mempool_free
) ( pool
, addr
);
7374 case VG_USERREQ__MEMPOOL_TRIM
: {
7375 Addr pool
= (Addr
)arg
[1];
7376 Addr addr
= (Addr
)arg
[2];
7379 MC_(mempool_trim
) ( pool
, addr
, size
);
7383 case VG_USERREQ__MOVE_MEMPOOL
: {
7384 Addr poolA
= (Addr
)arg
[1];
7385 Addr poolB
= (Addr
)arg
[2];
7387 MC_(move_mempool
) ( poolA
, poolB
);
7391 case VG_USERREQ__MEMPOOL_CHANGE
: {
7392 Addr pool
= (Addr
)arg
[1];
7393 Addr addrA
= (Addr
)arg
[2];
7394 Addr addrB
= (Addr
)arg
[3];
7397 MC_(mempool_change
) ( pool
, addrA
, addrB
, size
);
7401 case VG_USERREQ__MEMPOOL_EXISTS
: {
7402 Addr pool
= (Addr
)arg
[1];
7404 *ret
= (UWord
) MC_(mempool_exists
) ( pool
);
7408 case VG_USERREQ__GDB_MONITOR_COMMAND
: {
7409 Bool handled
= handle_gdb_monitor_command (tid
, (HChar
*)arg
[1]);
7417 case VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE
:
7418 case VG_USERREQ__ENABLE_ADDR_ERROR_REPORTING_IN_RANGE
: {
7420 = arg
[0] == VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE
;
7422 = modify_ignore_ranges(addRange
, arg
[1], arg
[2]);
7430 "Warning: unknown memcheck client request code %llx\n",
7439 /*------------------------------------------------------------*/
7440 /*--- Crude profiling machinery. ---*/
7441 /*------------------------------------------------------------*/
7443 // We track a number of interesting events (using PROF_EVENT)
7444 // if MC_PROFILE_MEMORY is defined.
7446 #ifdef MC_PROFILE_MEMORY
7448 ULong
MC_(event_ctr
)[MCPE_LAST
];
7450 /* Event counter names. Use the name of the function that increases the
7451 event counter. Drop any MC_() and mc_ prefices. */
7452 static const HChar
* MC_(event_ctr_name
)[MCPE_LAST
] = {
7453 [MCPE_LOADVN_SLOW
] = "LOADVn_slow",
7454 [MCPE_LOADVN_SLOW_LOOP
] = "LOADVn_slow_loop",
7455 [MCPE_STOREVN_SLOW
] = "STOREVn_slow",
7456 [MCPE_STOREVN_SLOW_LOOP
] = "STOREVn_slow(loop)",
7457 [MCPE_MAKE_ALIGNED_WORD32_UNDEFINED
] = "make_aligned_word32_undefined",
7458 [MCPE_MAKE_ALIGNED_WORD32_UNDEFINED_SLOW
] =
7459 "make_aligned_word32_undefined_slow",
7460 [MCPE_MAKE_ALIGNED_WORD64_UNDEFINED
] = "make_aligned_word64_undefined",
7461 [MCPE_MAKE_ALIGNED_WORD64_UNDEFINED_SLOW
] =
7462 "make_aligned_word64_undefined_slow",
7463 [MCPE_MAKE_ALIGNED_WORD32_NOACCESS
] = "make_aligned_word32_noaccess",
7464 [MCPE_MAKE_ALIGNED_WORD32_NOACCESS_SLOW
] =
7465 "make_aligned_word32_noaccess_slow",
7466 [MCPE_MAKE_ALIGNED_WORD64_NOACCESS
] = "make_aligned_word64_noaccess",
7467 [MCPE_MAKE_ALIGNED_WORD64_NOACCESS_SLOW
] =
7468 "make_aligned_word64_noaccess_slow",
7469 [MCPE_MAKE_MEM_NOACCESS
] = "make_mem_noaccess",
7470 [MCPE_MAKE_MEM_UNDEFINED
] = "make_mem_undefined",
7471 [MCPE_MAKE_MEM_UNDEFINED_W_OTAG
] = "make_mem_undefined_w_otag",
7472 [MCPE_MAKE_MEM_DEFINED
] = "make_mem_defined",
7473 [MCPE_CHEAP_SANITY_CHECK
] = "cheap_sanity_check",
7474 [MCPE_EXPENSIVE_SANITY_CHECK
] = "expensive_sanity_check",
7475 [MCPE_COPY_ADDRESS_RANGE_STATE
] = "copy_address_range_state",
7476 [MCPE_COPY_ADDRESS_RANGE_STATE_LOOP1
] = "copy_address_range_state(loop1)",
7477 [MCPE_COPY_ADDRESS_RANGE_STATE_LOOP2
] = "copy_address_range_state(loop2)",
7478 [MCPE_CHECK_MEM_IS_NOACCESS
] = "check_mem_is_noaccess",
7479 [MCPE_CHECK_MEM_IS_NOACCESS_LOOP
] = "check_mem_is_noaccess(loop)",
7480 [MCPE_IS_MEM_ADDRESSABLE
] = "is_mem_addressable",
7481 [MCPE_IS_MEM_ADDRESSABLE_LOOP
] = "is_mem_addressable(loop)",
7482 [MCPE_IS_MEM_DEFINED
] = "is_mem_defined",
7483 [MCPE_IS_MEM_DEFINED_LOOP
] = "is_mem_defined(loop)",
7484 [MCPE_IS_MEM_DEFINED_COMPREHENSIVE
] = "is_mem_defined_comprehensive",
7485 [MCPE_IS_MEM_DEFINED_COMPREHENSIVE_LOOP
] =
7486 "is_mem_defined_comprehensive(loop)",
7487 [MCPE_IS_DEFINED_ASCIIZ
] = "is_defined_asciiz",
7488 [MCPE_IS_DEFINED_ASCIIZ_LOOP
] = "is_defined_asciiz(loop)",
7489 [MCPE_FIND_CHUNK_FOR_OLD
] = "find_chunk_for_OLD",
7490 [MCPE_FIND_CHUNK_FOR_OLD_LOOP
] = "find_chunk_for_OLD(loop)",
7491 [MCPE_SET_ADDRESS_RANGE_PERMS
] = "set_address_range_perms",
7492 [MCPE_SET_ADDRESS_RANGE_PERMS_SINGLE_SECMAP
] =
7493 "set_address_range_perms(single-secmap)",
7494 [MCPE_SET_ADDRESS_RANGE_PERMS_STARTOF_SECMAP
] =
7495 "set_address_range_perms(startof-secmap)",
7496 [MCPE_SET_ADDRESS_RANGE_PERMS_MULTIPLE_SECMAPS
] =
7497 "set_address_range_perms(multiple-secmaps)",
7498 [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1
] =
7499 "set_address_range_perms(dist-sm1)",
7500 [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2
] =
7501 "set_address_range_perms(dist-sm2)",
7502 [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1_QUICK
] =
7503 "set_address_range_perms(dist-sm1-quick)",
7504 [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2_QUICK
] =
7505 "set_address_range_perms(dist-sm2-quick)",
7506 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1A
] = "set_address_range_perms(loop1a)",
7507 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1B
] = "set_address_range_perms(loop1b)",
7508 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1C
] = "set_address_range_perms(loop1c)",
7509 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8A
] = "set_address_range_perms(loop8a)",
7510 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8B
] = "set_address_range_perms(loop8b)",
7511 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K
] = "set_address_range_perms(loop64K)",
7512 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K_FREE_DIST_SM
] =
7513 "set_address_range_perms(loop64K-free-dist-sm)",
7514 [MCPE_LOADV_128_OR_256_SLOW_LOOP
] = "LOADV_128_or_256_slow(loop)",
7515 [MCPE_LOADV_128_OR_256
] = "LOADV_128_or_256",
7516 [MCPE_LOADV_128_OR_256_SLOW1
] = "LOADV_128_or_256-slow1",
7517 [MCPE_LOADV_128_OR_256_SLOW2
] = "LOADV_128_or_256-slow2",
7518 [MCPE_LOADV64
] = "LOADV64",
7519 [MCPE_LOADV64_SLOW1
] = "LOADV64-slow1",
7520 [MCPE_LOADV64_SLOW2
] = "LOADV64-slow2",
7521 [MCPE_STOREV64
] = "STOREV64",
7522 [MCPE_STOREV64_SLOW1
] = "STOREV64-slow1",
7523 [MCPE_STOREV64_SLOW2
] = "STOREV64-slow2",
7524 [MCPE_STOREV64_SLOW3
] = "STOREV64-slow3",
7525 [MCPE_STOREV64_SLOW4
] = "STOREV64-slow4",
7526 [MCPE_LOADV32
] = "LOADV32",
7527 [MCPE_LOADV32_SLOW1
] = "LOADV32-slow1",
7528 [MCPE_LOADV32_SLOW2
] = "LOADV32-slow2",
7529 [MCPE_STOREV32
] = "STOREV32",
7530 [MCPE_STOREV32_SLOW1
] = "STOREV32-slow1",
7531 [MCPE_STOREV32_SLOW2
] = "STOREV32-slow2",
7532 [MCPE_STOREV32_SLOW3
] = "STOREV32-slow3",
7533 [MCPE_STOREV32_SLOW4
] = "STOREV32-slow4",
7534 [MCPE_LOADV16
] = "LOADV16",
7535 [MCPE_LOADV16_SLOW1
] = "LOADV16-slow1",
7536 [MCPE_LOADV16_SLOW2
] = "LOADV16-slow2",
7537 [MCPE_STOREV16
] = "STOREV16",
7538 [MCPE_STOREV16_SLOW1
] = "STOREV16-slow1",
7539 [MCPE_STOREV16_SLOW2
] = "STOREV16-slow2",
7540 [MCPE_STOREV16_SLOW3
] = "STOREV16-slow3",
7541 [MCPE_STOREV16_SLOW4
] = "STOREV16-slow4",
7542 [MCPE_LOADV8
] = "LOADV8",
7543 [MCPE_LOADV8_SLOW1
] = "LOADV8-slow1",
7544 [MCPE_LOADV8_SLOW2
] = "LOADV8-slow2",
7545 [MCPE_STOREV8
] = "STOREV8",
7546 [MCPE_STOREV8_SLOW1
] = "STOREV8-slow1",
7547 [MCPE_STOREV8_SLOW2
] = "STOREV8-slow2",
7548 [MCPE_STOREV8_SLOW3
] = "STOREV8-slow3",
7549 [MCPE_STOREV8_SLOW4
] = "STOREV8-slow4",
7550 [MCPE_NEW_MEM_STACK_4
] = "new_mem_stack_4",
7551 [MCPE_NEW_MEM_STACK_8
] = "new_mem_stack_8",
7552 [MCPE_NEW_MEM_STACK_12
] = "new_mem_stack_12",
7553 [MCPE_NEW_MEM_STACK_16
] = "new_mem_stack_16",
7554 [MCPE_NEW_MEM_STACK_32
] = "new_mem_stack_32",
7555 [MCPE_NEW_MEM_STACK_112
] = "new_mem_stack_112",
7556 [MCPE_NEW_MEM_STACK_128
] = "new_mem_stack_128",
7557 [MCPE_NEW_MEM_STACK_144
] = "new_mem_stack_144",
7558 [MCPE_NEW_MEM_STACK_160
] = "new_mem_stack_160",
7559 [MCPE_DIE_MEM_STACK_4
] = "die_mem_stack_4",
7560 [MCPE_DIE_MEM_STACK_8
] = "die_mem_stack_8",
7561 [MCPE_DIE_MEM_STACK_12
] = "die_mem_stack_12",
7562 [MCPE_DIE_MEM_STACK_16
] = "die_mem_stack_16",
7563 [MCPE_DIE_MEM_STACK_32
] = "die_mem_stack_32",
7564 [MCPE_DIE_MEM_STACK_112
] = "die_mem_stack_112",
7565 [MCPE_DIE_MEM_STACK_128
] = "die_mem_stack_128",
7566 [MCPE_DIE_MEM_STACK_144
] = "die_mem_stack_144",
7567 [MCPE_DIE_MEM_STACK_160
] = "die_mem_stack_160",
7568 [MCPE_NEW_MEM_STACK
] = "new_mem_stack",
7569 [MCPE_DIE_MEM_STACK
] = "die_mem_stack",
7570 [MCPE_MAKE_STACK_UNINIT_W_O
] = "MAKE_STACK_UNINIT_w_o",
7571 [MCPE_MAKE_STACK_UNINIT_NO_O
] = "MAKE_STACK_UNINIT_no_o",
7572 [MCPE_MAKE_STACK_UNINIT_128_NO_O
] = "MAKE_STACK_UNINIT_128_no_o",
7573 [MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_16
]
7574 = "MAKE_STACK_UNINIT_128_no_o_aligned_16",
7575 [MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_8
]
7576 = "MAKE_STACK_UNINIT_128_no_o_aligned_8",
7577 [MCPE_MAKE_STACK_UNINIT_128_NO_O_SLOWCASE
]
7578 = "MAKE_STACK_UNINIT_128_no_o_slowcase",
7581 static void init_prof_mem ( void )
7583 Int i
, name_count
= 0;
7585 for (i
= 0; i
< MCPE_LAST
; i
++) {
7586 MC_(event_ctr
)[i
] = 0;
7587 if (MC_(event_ctr_name
)[i
] != NULL
)
7591 /* Make sure every profiling event has a name */
7592 tl_assert(name_count
== MCPE_LAST
);
7595 static void done_prof_mem ( void )
7598 Bool spaced
= False
;
7599 for (i
= n
= 0; i
< MCPE_LAST
; i
++) {
7600 if (!spaced
&& (n
% 10) == 0) {
7604 if (MC_(event_ctr
)[i
] > 0) {
7607 VG_(printf
)( "prof mem event %3d: %11llu %s\n",
7608 i
, MC_(event_ctr
)[i
],
7609 MC_(event_ctr_name
)[i
]);
7616 static void init_prof_mem ( void ) { }
7617 static void done_prof_mem ( void ) { }
7622 /*------------------------------------------------------------*/
7623 /*--- Origin tracking stuff ---*/
7624 /*------------------------------------------------------------*/
7626 /*--------------------------------------------*/
7627 /*--- Origin tracking: load handlers ---*/
7628 /*--------------------------------------------*/
7630 static INLINE UInt
merge_origins ( UInt or1
, UInt or2
) {
7631 return or1
> or2
? or1
: or2
;
7634 UWord
VG_REGPARM(1) MC_(helperc_b_load1
)( Addr a
) {
7637 UWord lineoff
= oc_line_offset(a
);
7638 UWord byteoff
= a
& 3; /* 0, 1, 2 or 3 */
7640 if (OC_ENABLE_ASSERTIONS
) {
7641 tl_assert(lineoff
>= 0 && lineoff
< OC_W32S_PER_LINE
);
7644 line
= find_OCacheLine( a
);
7646 descr
= line
->u
.main
.descr
[lineoff
];
7647 if (OC_ENABLE_ASSERTIONS
) {
7648 tl_assert(descr
< 0x10);
7651 if (LIKELY(0 == (descr
& (1 << byteoff
)))) {
7654 return line
->u
.main
.w32
[lineoff
];
7658 UWord
VG_REGPARM(1) MC_(helperc_b_load2
)( Addr a
) {
7661 UWord lineoff
, byteoff
;
7663 if (UNLIKELY(a
& 1)) {
7664 /* Handle misaligned case, slowly. */
7665 UInt oLo
= (UInt
)MC_(helperc_b_load1
)( a
+ 0 );
7666 UInt oHi
= (UInt
)MC_(helperc_b_load1
)( a
+ 1 );
7667 return merge_origins(oLo
, oHi
);
7670 lineoff
= oc_line_offset(a
);
7671 byteoff
= a
& 3; /* 0 or 2 */
7673 if (OC_ENABLE_ASSERTIONS
) {
7674 tl_assert(lineoff
>= 0 && lineoff
< OC_W32S_PER_LINE
);
7676 line
= find_OCacheLine( a
);
7678 descr
= line
->u
.main
.descr
[lineoff
];
7679 if (OC_ENABLE_ASSERTIONS
) {
7680 tl_assert(descr
< 0x10);
7683 if (LIKELY(0 == (descr
& (3 << byteoff
)))) {
7686 return line
->u
.main
.w32
[lineoff
];
7690 UWord
VG_REGPARM(1) MC_(helperc_b_load4
)( Addr a
) {
7695 if (UNLIKELY(a
& 3)) {
7696 /* Handle misaligned case, slowly. */
7697 UInt oLo
= (UInt
)MC_(helperc_b_load2
)( a
+ 0 );
7698 UInt oHi
= (UInt
)MC_(helperc_b_load2
)( a
+ 2 );
7699 return merge_origins(oLo
, oHi
);
7702 lineoff
= oc_line_offset(a
);
7703 if (OC_ENABLE_ASSERTIONS
) {
7704 tl_assert(lineoff
>= 0 && lineoff
< OC_W32S_PER_LINE
);
7707 line
= find_OCacheLine( a
);
7709 descr
= line
->u
.main
.descr
[lineoff
];
7710 if (OC_ENABLE_ASSERTIONS
) {
7711 tl_assert(descr
< 0x10);
7714 if (LIKELY(0 == descr
)) {
7717 return line
->u
.main
.w32
[lineoff
];
7721 UWord
VG_REGPARM(1) MC_(helperc_b_load8
)( Addr a
) {
7723 UChar descrLo
, descrHi
, descr
;
7726 if (UNLIKELY(a
& 7)) {
7727 /* Handle misaligned case, slowly. */
7728 UInt oLo
= (UInt
)MC_(helperc_b_load4
)( a
+ 0 );
7729 UInt oHi
= (UInt
)MC_(helperc_b_load4
)( a
+ 4 );
7730 return merge_origins(oLo
, oHi
);
7733 lineoff
= oc_line_offset(a
);
7734 if (OC_ENABLE_ASSERTIONS
) {
7735 tl_assert(lineoff
== (lineoff
& 6)); /*0,2,4,6*//*since 8-aligned*/
7738 line
= find_OCacheLine( a
);
7740 descrLo
= line
->u
.main
.descr
[lineoff
+ 0];
7741 descrHi
= line
->u
.main
.descr
[lineoff
+ 1];
7742 descr
= descrLo
| descrHi
;
7743 if (OC_ENABLE_ASSERTIONS
) {
7744 tl_assert(descr
< 0x10);
7747 if (LIKELY(0 == descr
)) {
7748 return 0; /* both 32-bit chunks are defined */
7750 UInt oLo
= descrLo
== 0 ? 0 : line
->u
.main
.w32
[lineoff
+ 0];
7751 UInt oHi
= descrHi
== 0 ? 0 : line
->u
.main
.w32
[lineoff
+ 1];
7752 return merge_origins(oLo
, oHi
);
7756 UWord
VG_REGPARM(1) MC_(helperc_b_load16
)( Addr a
) {
7757 UInt oLo
= (UInt
)MC_(helperc_b_load8
)( a
+ 0 );
7758 UInt oHi
= (UInt
)MC_(helperc_b_load8
)( a
+ 8 );
7759 UInt oBoth
= merge_origins(oLo
, oHi
);
7760 return (UWord
)oBoth
;
7763 UWord
VG_REGPARM(1) MC_(helperc_b_load32
)( Addr a
) {
7764 UInt oQ0
= (UInt
)MC_(helperc_b_load8
)( a
+ 0 );
7765 UInt oQ1
= (UInt
)MC_(helperc_b_load8
)( a
+ 8 );
7766 UInt oQ2
= (UInt
)MC_(helperc_b_load8
)( a
+ 16 );
7767 UInt oQ3
= (UInt
)MC_(helperc_b_load8
)( a
+ 24 );
7768 UInt oAll
= merge_origins(merge_origins(oQ0
, oQ1
),
7769 merge_origins(oQ2
, oQ3
));
7774 /*--------------------------------------------*/
7775 /*--- Origin tracking: store handlers ---*/
7776 /*--------------------------------------------*/
7778 void VG_REGPARM(2) MC_(helperc_b_store1
)( Addr a
, UWord d32
) {
7780 UWord lineoff
= oc_line_offset(a
);
7781 UWord byteoff
= a
& 3; /* 0, 1, 2 or 3 */
7783 if (OC_ENABLE_ASSERTIONS
) {
7784 tl_assert(lineoff
>= 0 && lineoff
< OC_W32S_PER_LINE
);
7787 line
= find_OCacheLine( a
);
7789 #if OC_PRECISION_STORE
7790 if (LIKELY(d32
== 0)) {
7791 // The byte is defined. Just mark it as so in the descr and leave the w32
7792 // unchanged. This may make the descr become zero, so the line no longer
7793 // contains useful info, but that's OK. No loss of information.
7794 line
->u
.main
.descr
[lineoff
] &= ~(1 << byteoff
);
7795 } else if (d32
== line
->u
.main
.w32
[lineoff
]) {
7796 // At least one of the four bytes in the w32 is undefined with the same
7797 // origin. Just extend the mask. No loss of information.
7798 line
->u
.main
.descr
[lineoff
] |= (1 << byteoff
);
7800 // Here, we have a conflict: at least one byte in the group is undefined
7801 // but with some other origin. We can't represent both origins, so we
7802 // forget about the previous origin and install this one instead.
7803 line
->u
.main
.descr
[lineoff
] = (1 << byteoff
);
7804 line
->u
.main
.w32
[lineoff
] = d32
;
7808 line
->u
.main
.descr
[lineoff
] &= ~(1 << byteoff
);
7810 line
->u
.main
.descr
[lineoff
] |= (1 << byteoff
);
7811 line
->u
.main
.w32
[lineoff
] = d32
;
7816 void VG_REGPARM(2) MC_(helperc_b_store2
)( Addr a
, UWord d32
) {
7818 UWord lineoff
, byteoff
;
7820 if (UNLIKELY(a
& 1)) {
7821 /* Handle misaligned case, slowly. */
7822 MC_(helperc_b_store1
)( a
+ 0, d32
);
7823 MC_(helperc_b_store1
)( a
+ 1, d32
);
7827 lineoff
= oc_line_offset(a
);
7828 byteoff
= a
& 3; /* 0 or 2 */
7830 if (OC_ENABLE_ASSERTIONS
) {
7831 tl_assert(lineoff
>= 0 && lineoff
< OC_W32S_PER_LINE
);
7834 line
= find_OCacheLine( a
);
7836 #if OC_PRECISION_STORE
7837 // Same logic as in the store1 case above.
7838 if (LIKELY(d32
== 0)) {
7839 line
->u
.main
.descr
[lineoff
] &= ~(3 << byteoff
);
7840 } else if (d32
== line
->u
.main
.w32
[lineoff
]) {
7841 line
->u
.main
.descr
[lineoff
] |= (3 << byteoff
);
7842 line
->u
.main
.w32
[lineoff
] = d32
;
7844 line
->u
.main
.descr
[lineoff
] = (3 << byteoff
);
7845 line
->u
.main
.w32
[lineoff
] = d32
;
7849 line
->u
.main
.descr
[lineoff
] &= ~(3 << byteoff
);
7851 line
->u
.main
.descr
[lineoff
] |= (3 << byteoff
);
7852 line
->u
.main
.w32
[lineoff
] = d32
;
7857 void VG_REGPARM(2) MC_(helperc_b_store4
)( Addr a
, UWord d32
) {
7861 if (UNLIKELY(a
& 3)) {
7862 /* Handle misaligned case, slowly. */
7863 MC_(helperc_b_store2
)( a
+ 0, d32
);
7864 MC_(helperc_b_store2
)( a
+ 2, d32
);
7868 lineoff
= oc_line_offset(a
);
7869 if (OC_ENABLE_ASSERTIONS
) {
7870 tl_assert(lineoff
>= 0 && lineoff
< OC_W32S_PER_LINE
);
7873 line
= find_OCacheLine( a
);
7876 line
->u
.main
.descr
[lineoff
] = 0;
7878 line
->u
.main
.descr
[lineoff
] = 0xF;
7879 line
->u
.main
.w32
[lineoff
] = d32
;
7883 void VG_REGPARM(2) MC_(helperc_b_store8
)( Addr a
, UWord d32
) {
7884 STATIC_ASSERT(OC_W32S_PER_LINE
== 8);
7888 if (UNLIKELY(a
& 7)) {
7889 /* Handle misaligned case, slowly. */
7890 MC_(helperc_b_store4
)( a
+ 0, d32
);
7891 MC_(helperc_b_store4
)( a
+ 4, d32
);
7895 lineoff
= oc_line_offset(a
);
7896 if (OC_ENABLE_ASSERTIONS
) {
7897 tl_assert(lineoff
== (lineoff
& 6)); /*0,2,4,6*//*since 8-aligned*/
7900 line
= find_OCacheLine( a
);
7903 line
->u
.main
.descr
[lineoff
+ 0] = 0;
7904 line
->u
.main
.descr
[lineoff
+ 1] = 0;
7906 line
->u
.main
.descr
[lineoff
+ 0] = 0xF;
7907 line
->u
.main
.descr
[lineoff
+ 1] = 0xF;
7908 line
->u
.main
.w32
[lineoff
+ 0] = d32
;
7909 line
->u
.main
.w32
[lineoff
+ 1] = d32
;
7913 void VG_REGPARM(2) MC_(helperc_b_store16
)( Addr a
, UWord d32
) {
7914 STATIC_ASSERT(OC_W32S_PER_LINE
== 8);
7918 if (UNLIKELY(a
& 15)) {
7919 /* Handle misaligned case, slowly. */
7920 MC_(helperc_b_store8
)( a
+ 0, d32
);
7921 MC_(helperc_b_store8
)( a
+ 8, d32
);
7925 lineoff
= oc_line_offset(a
);
7926 if (OC_ENABLE_ASSERTIONS
) {
7927 tl_assert(lineoff
== (lineoff
& 4)); /*0,4*//*since 16-aligned*/
7930 line
= find_OCacheLine( a
);
7933 line
->u
.main
.descr
[lineoff
+ 0] = 0;
7934 line
->u
.main
.descr
[lineoff
+ 1] = 0;
7935 line
->u
.main
.descr
[lineoff
+ 2] = 0;
7936 line
->u
.main
.descr
[lineoff
+ 3] = 0;
7938 line
->u
.main
.descr
[lineoff
+ 0] = 0xF;
7939 line
->u
.main
.descr
[lineoff
+ 1] = 0xF;
7940 line
->u
.main
.descr
[lineoff
+ 2] = 0xF;
7941 line
->u
.main
.descr
[lineoff
+ 3] = 0xF;
7942 line
->u
.main
.w32
[lineoff
+ 0] = d32
;
7943 line
->u
.main
.w32
[lineoff
+ 1] = d32
;
7944 line
->u
.main
.w32
[lineoff
+ 2] = d32
;
7945 line
->u
.main
.w32
[lineoff
+ 3] = d32
;
7949 void VG_REGPARM(2) MC_(helperc_b_store32
)( Addr a
, UWord d32
) {
7950 STATIC_ASSERT(OC_W32S_PER_LINE
== 8);
7954 if (UNLIKELY(a
& 31)) {
7955 /* Handle misaligned case, slowly. */
7956 MC_(helperc_b_store16
)( a
+ 0, d32
);
7957 MC_(helperc_b_store16
)( a
+ 16, d32
);
7961 lineoff
= oc_line_offset(a
);
7962 if (OC_ENABLE_ASSERTIONS
) {
7963 tl_assert(lineoff
== 0);
7966 line
= find_OCacheLine( a
);
7969 line
->u
.main
.descr
[0] = 0;
7970 line
->u
.main
.descr
[1] = 0;
7971 line
->u
.main
.descr
[2] = 0;
7972 line
->u
.main
.descr
[3] = 0;
7973 line
->u
.main
.descr
[4] = 0;
7974 line
->u
.main
.descr
[5] = 0;
7975 line
->u
.main
.descr
[6] = 0;
7976 line
->u
.main
.descr
[7] = 0;
7978 line
->u
.main
.descr
[0] = 0xF;
7979 line
->u
.main
.descr
[1] = 0xF;
7980 line
->u
.main
.descr
[2] = 0xF;
7981 line
->u
.main
.descr
[3] = 0xF;
7982 line
->u
.main
.descr
[4] = 0xF;
7983 line
->u
.main
.descr
[5] = 0xF;
7984 line
->u
.main
.descr
[6] = 0xF;
7985 line
->u
.main
.descr
[7] = 0xF;
7986 line
->u
.main
.w32
[0] = d32
;
7987 line
->u
.main
.w32
[1] = d32
;
7988 line
->u
.main
.w32
[2] = d32
;
7989 line
->u
.main
.w32
[3] = d32
;
7990 line
->u
.main
.w32
[4] = d32
;
7991 line
->u
.main
.w32
[5] = d32
;
7992 line
->u
.main
.w32
[6] = d32
;
7993 line
->u
.main
.w32
[7] = d32
;
7998 /*--------------------------------------------*/
7999 /*--- Origin tracking: sarp handlers ---*/
8000 /*--------------------------------------------*/
8002 // We may get asked to do very large SARPs (bug 446103), hence it is important
8003 // to process 32-byte chunks at a time when possible.
8005 __attribute__((noinline
))
8006 static void ocache_sarp_Set_Origins ( Addr a
, UWord len
, UInt otag
) {
8007 if ((a
& 1) && len
>= 1) {
8008 MC_(helperc_b_store1
)( a
, otag
);
8012 if ((a
& 2) && len
>= 2) {
8013 MC_(helperc_b_store2
)( a
, otag
);
8017 if ((a
& 4) && len
>= 4) {
8018 MC_(helperc_b_store4
)( a
, otag
);
8022 if ((a
& 8) && len
>= 8) {
8023 MC_(helperc_b_store8
)( a
, otag
);
8027 if ((a
& 16) && len
>= 16) {
8028 MC_(helperc_b_store16
)( a
, otag
);
8033 tl_assert(0 == (a
& 31));
8035 MC_(helperc_b_store32
)( a
, otag
);
8041 MC_(helperc_b_store16
)( a
, otag
);
8046 MC_(helperc_b_store8
)( a
, otag
);
8051 MC_(helperc_b_store4
)( a
, otag
);
8056 MC_(helperc_b_store2
)( a
, otag
);
8061 MC_(helperc_b_store1
)( a
, otag
);
8065 tl_assert(len
== 0);
8068 __attribute__((noinline
))
8069 static void ocache_sarp_Clear_Origins ( Addr a
, UWord len
) {
8070 if ((a
& 1) && len
>= 1) {
8071 MC_(helperc_b_store1
)( a
, 0 );
8075 if ((a
& 2) && len
>= 2) {
8076 MC_(helperc_b_store2
)( a
, 0 );
8080 if ((a
& 4) && len
>= 4) {
8081 MC_(helperc_b_store4
)( a
, 0 );
8085 if ((a
& 8) && len
>= 8) {
8086 MC_(helperc_b_store8
)( a
, 0 );
8090 if ((a
& 16) && len
>= 16) {
8091 MC_(helperc_b_store16
)( a
, 0 );
8096 tl_assert(0 == (a
& 31));
8098 MC_(helperc_b_store32
)( a
, 0 );
8104 MC_(helperc_b_store16
)( a
, 0 );
8109 MC_(helperc_b_store8
)( a
, 0 );
8114 MC_(helperc_b_store4
)( a
, 0 );
8119 MC_(helperc_b_store2
)( a
, 0 );
8124 MC_(helperc_b_store1
)( a
, 0 );
8128 tl_assert(len
== 0);
8132 /*------------------------------------------------------------*/
8133 /*--- Setup and finalisation ---*/
8134 /*------------------------------------------------------------*/
8136 static void mc_post_clo_init ( void )
8138 /* If we've been asked to emit XML, mash around various other
8139 options so as to constrain the output somewhat. */
8141 /* Extract as much info as possible from the leak checker. */
8142 MC_(clo_leak_check
) = LC_Full
;
8145 if (MC_(clo_freelist_big_blocks
) >= MC_(clo_freelist_vol
)
8146 && VG_(clo_verbosity
) == 1 && !VG_(clo_xml
)) {
8147 VG_(message
)(Vg_UserMsg
,
8148 "Warning: --freelist-big-blocks value %lld has no effect\n"
8149 "as it is >= to --freelist-vol value %lld\n",
8150 MC_(clo_freelist_big_blocks
),
8151 MC_(clo_freelist_vol
));
8154 if (MC_(clo_workaround_gcc296_bugs
)
8155 && VG_(clo_verbosity
) == 1 && !VG_(clo_xml
)) {
8157 "Warning: --workaround-gcc296-bugs=yes is deprecated.\n"
8158 "Warning: Instead use: --ignore-range-below-sp=1024-1\n"
8163 tl_assert( MC_(clo_mc_level
) >= 1 && MC_(clo_mc_level
) <= 3 );
8165 if (MC_(clo_mc_level
) == 3) {
8166 /* We're doing origin tracking. */
8167 # ifdef PERF_FAST_STACK
8168 VG_(track_new_mem_stack_4_w_ECU
) ( mc_new_mem_stack_4_w_ECU
);
8169 VG_(track_new_mem_stack_8_w_ECU
) ( mc_new_mem_stack_8_w_ECU
);
8170 VG_(track_new_mem_stack_12_w_ECU
) ( mc_new_mem_stack_12_w_ECU
);
8171 VG_(track_new_mem_stack_16_w_ECU
) ( mc_new_mem_stack_16_w_ECU
);
8172 VG_(track_new_mem_stack_32_w_ECU
) ( mc_new_mem_stack_32_w_ECU
);
8173 VG_(track_new_mem_stack_112_w_ECU
) ( mc_new_mem_stack_112_w_ECU
);
8174 VG_(track_new_mem_stack_128_w_ECU
) ( mc_new_mem_stack_128_w_ECU
);
8175 VG_(track_new_mem_stack_144_w_ECU
) ( mc_new_mem_stack_144_w_ECU
);
8176 VG_(track_new_mem_stack_160_w_ECU
) ( mc_new_mem_stack_160_w_ECU
);
8178 VG_(track_new_mem_stack_w_ECU
) ( mc_new_mem_stack_w_ECU
);
8179 VG_(track_new_mem_stack_signal
) ( mc_new_mem_w_tid_make_ECU
);
8181 /* Not doing origin tracking */
8182 # ifdef PERF_FAST_STACK
8183 VG_(track_new_mem_stack_4
) ( mc_new_mem_stack_4
);
8184 VG_(track_new_mem_stack_8
) ( mc_new_mem_stack_8
);
8185 VG_(track_new_mem_stack_12
) ( mc_new_mem_stack_12
);
8186 VG_(track_new_mem_stack_16
) ( mc_new_mem_stack_16
);
8187 VG_(track_new_mem_stack_32
) ( mc_new_mem_stack_32
);
8188 VG_(track_new_mem_stack_112
) ( mc_new_mem_stack_112
);
8189 VG_(track_new_mem_stack_128
) ( mc_new_mem_stack_128
);
8190 VG_(track_new_mem_stack_144
) ( mc_new_mem_stack_144
);
8191 VG_(track_new_mem_stack_160
) ( mc_new_mem_stack_160
);
8193 VG_(track_new_mem_stack
) ( mc_new_mem_stack
);
8194 VG_(track_new_mem_stack_signal
) ( mc_new_mem_w_tid_no_ECU
);
8197 // We assume that brk()/sbrk() does not initialise new memory. Is this
8198 // accurate? John Reiser says:
8200 // 0) sbrk() can *decrease* process address space. No zero fill is done
8201 // for a decrease, not even the fragment on the high end of the last page
8202 // that is beyond the new highest address. For maximum safety and
8203 // portability, then the bytes in the last page that reside above [the
8204 // new] sbrk(0) should be considered to be uninitialized, but in practice
8205 // it is exceedingly likely that they will retain their previous
8208 // 1) If an increase is large enough to require new whole pages, then
8209 // those new whole pages (like all new pages) are zero-filled by the
8210 // operating system. So if sbrk(0) already is page aligned, then
8211 // sbrk(PAGE_SIZE) *does* zero-fill the new memory.
8213 // 2) Any increase that lies within an existing allocated page is not
8214 // changed. So if (x = sbrk(0)) is not page aligned, then
8215 // sbrk(PAGE_SIZE) yields ((PAGE_SIZE -1) & -x) bytes which keep their
8216 // existing contents, and an additional PAGE_SIZE bytes which are zeroed.
8217 // ((PAGE_SIZE -1) & x) of them are "covered" by the sbrk(), and the rest
8218 // of them come along for the ride because the operating system deals
8219 // only in whole pages. Again, for maximum safety and portability, then
8220 // anything that lives above [the new] sbrk(0) should be considered
8221 // uninitialized, but in practice will retain previous contents [zero in
8226 // A key property of sbrk/brk is that new whole pages that are supplied
8227 // by the operating system *do* get initialized to zero.
8229 // As for the portability of all this:
8231 // sbrk and brk are not POSIX. However, any system that is a derivative
8232 // of *nix has sbrk and brk because there are too many software (such as
8233 // the Bourne shell) which rely on the traditional memory map (.text,
8234 // .data+.bss, stack) and the existence of sbrk/brk.
8236 // So we should arguably observe all this. However:
8237 // - The current inaccuracy has caused maybe one complaint in seven years(?)
8238 // - Relying on the zeroed-ness of whole brk'd pages is pretty grotty... I
8239 // doubt most programmers know the above information.
8240 // So I'm not terribly unhappy with marking it as undefined. --njn.
8242 // [More: I think most of what John said only applies to sbrk(). It seems
8243 // that brk() always deals in whole pages. And since this event deals
8244 // directly with brk(), not with sbrk(), perhaps it would be reasonable to
8245 // just mark all memory it allocates as defined.]
8247 # if !defined(VGO_solaris)
8248 if (MC_(clo_mc_level
) == 3)
8249 VG_(track_new_mem_brk
) ( mc_new_mem_w_tid_make_ECU
);
8251 VG_(track_new_mem_brk
) ( mc_new_mem_w_tid_no_ECU
);
8253 // On Solaris, brk memory has to be marked as defined, otherwise we get
8254 // many false positives.
8255 VG_(track_new_mem_brk
) ( make_mem_defined_w_tid
);
8258 /* This origin tracking cache is huge (~100M), so only initialise
8260 if (MC_(clo_mc_level
) >= 3) {
8262 tl_assert(ocacheL1
!= NULL
);
8263 for (UInt i
= 0; i
< 4096; i
++ ) {
8264 tl_assert(ocachesL2
[i
] != NULL
);
8267 tl_assert(ocacheL1
== NULL
);
8268 for (UInt i
= 0; i
< 4096; i
++ ) {
8269 tl_assert(ocachesL2
[i
] == NULL
);
8273 MC_(chunk_poolalloc
) = VG_(newPA
)
8274 (sizeof(MC_Chunk
) + MC_(n_where_pointers
)() * sizeof(ExeContext
*),
8277 "mc.cMC.1 (MC_Chunk pools)",
8280 /* Do not check definedness of guest state if --undef-value-errors=no */
8281 if (MC_(clo_mc_level
) >= 2)
8282 VG_(track_pre_reg_read
) ( mc_pre_reg_read
);
8284 if (VG_(clo_xtree_memory
) == Vg_XTMemory_Full
) {
8285 if (MC_(clo_keep_stacktraces
) == KS_none
8286 || MC_(clo_keep_stacktraces
) == KS_free
)
8287 VG_(fmsg_bad_option
)("--keep-stacktraces",
8288 "To use --xtree-memory=full, you must"
8289 " keep at least the alloc stacktrace\n");
8290 // Activate full xtree memory profiling.
8291 VG_(XTMemory_Full_init
)(VG_(XT_filter_1top_and_maybe_below_main
));
8296 static void print_SM_info(const HChar
* type
, Int n_SMs
)
8298 VG_(message
)(Vg_DebugMsg
,
8299 " memcheck: SMs: %s = %d (%luk, %luM)\n",
8302 n_SMs
* sizeof(SecMap
) / 1024UL,
8303 n_SMs
* sizeof(SecMap
) / (1024 * 1024UL) );
8306 static void mc_print_stats (void)
8308 SizeT max_secVBit_szB
, max_SMs_szB
, max_shmem_szB
;
8310 VG_(message
)(Vg_DebugMsg
, " memcheck: freelist: vol %lld length %lld\n",
8311 VG_(free_queue_volume
), VG_(free_queue_length
));
8312 VG_(message
)(Vg_DebugMsg
,
8313 " memcheck: sanity checks: %d cheap, %d expensive\n",
8314 n_sanity_cheap
, n_sanity_expensive
);
8315 VG_(message
)(Vg_DebugMsg
,
8316 " memcheck: auxmaps: %llu auxmap entries (%lluk, %lluM) in use\n",
8318 n_auxmap_L2_nodes
* 64,
8319 n_auxmap_L2_nodes
/ 16 );
8320 VG_(message
)(Vg_DebugMsg
,
8321 " memcheck: auxmaps_L1: %llu searches, %llu cmps, ratio %llu:10\n",
8322 n_auxmap_L1_searches
, n_auxmap_L1_cmps
,
8323 (10ULL * n_auxmap_L1_cmps
)
8324 / (n_auxmap_L1_searches
? n_auxmap_L1_searches
: 1)
8326 VG_(message
)(Vg_DebugMsg
,
8327 " memcheck: auxmaps_L2: %llu searches, %llu nodes\n",
8328 n_auxmap_L2_searches
, n_auxmap_L2_nodes
8331 print_SM_info("n_issued ", n_issued_SMs
);
8332 print_SM_info("n_deissued ", n_deissued_SMs
);
8333 print_SM_info("max_noaccess ", max_noaccess_SMs
);
8334 print_SM_info("max_undefined", max_undefined_SMs
);
8335 print_SM_info("max_defined ", max_defined_SMs
);
8336 print_SM_info("max_non_DSM ", max_non_DSM_SMs
);
8338 // Three DSMs, plus the non-DSM ones
8339 max_SMs_szB
= (3 + max_non_DSM_SMs
) * sizeof(SecMap
);
8340 // The 3*sizeof(Word) bytes is the AVL node metadata size.
8341 // The VG_ROUNDUP is because the OSet pool allocator will/must align
8342 // the elements on pointer size.
8343 // Note that the pool allocator has some additional small overhead
8344 // which is not counted in the below.
8345 // Hardwiring this logic sucks, but I don't see how else to do it.
8346 max_secVBit_szB
= max_secVBit_nodes
*
8347 (3*sizeof(Word
) + VG_ROUNDUP(sizeof(SecVBitNode
), sizeof(void*)));
8348 max_shmem_szB
= sizeof(primary_map
) + max_SMs_szB
+ max_secVBit_szB
;
8350 VG_(message
)(Vg_DebugMsg
,
8351 " memcheck: max sec V bit nodes: %d (%luk, %luM)\n",
8352 max_secVBit_nodes
, max_secVBit_szB
/ 1024,
8353 max_secVBit_szB
/ (1024 * 1024));
8354 VG_(message
)(Vg_DebugMsg
,
8355 " memcheck: set_sec_vbits8 calls: %llu (new: %llu, updates: %llu)\n",
8356 sec_vbits_new_nodes
+ sec_vbits_updates
,
8357 sec_vbits_new_nodes
, sec_vbits_updates
);
8358 VG_(message
)(Vg_DebugMsg
,
8359 " memcheck: max shadow mem size: %luk, %luM\n",
8360 max_shmem_szB
/ 1024, max_shmem_szB
/ (1024 * 1024));
8362 if (MC_(clo_mc_level
) >= 3) {
8363 VG_(message
)(Vg_DebugMsg
,
8364 " ocacheL1: %'14lu refs %'14lu misses (%'lu lossage)\n",
8365 stats_ocacheL1_find
,
8366 stats_ocacheL1_misses
,
8367 stats_ocacheL1_lossage
);
8368 VG_(message
)(Vg_DebugMsg
,
8369 " ocacheL1: %'14lu at 0 %'14lu at 1\n",
8370 stats_ocacheL1_find
- stats_ocacheL1_misses
8371 - stats_ocacheL1_found_at_1
8372 - stats_ocacheL1_found_at_N
,
8373 stats_ocacheL1_found_at_1
);
8374 VG_(message
)(Vg_DebugMsg
,
8375 " ocacheL1: %'14lu at 2+ %'14lu move-fwds\n",
8376 stats_ocacheL1_found_at_N
,
8377 stats_ocacheL1_movefwds
);
8378 VG_(message
)(Vg_DebugMsg
,
8379 " ocacheL1: %'14lu sizeB %'14d useful\n",
8380 (SizeT
)sizeof(OCache
),
8381 4 * OC_W32S_PER_LINE
* OC_LINES_PER_SET
* OC_N_SETS
);
8382 VG_(message
)(Vg_DebugMsg
,
8383 " ocacheL2: %'14lu finds %'14lu misses\n",
8384 stats__ocacheL2_finds
,
8385 stats__ocacheL2_misses
);
8386 VG_(message
)(Vg_DebugMsg
,
8387 " ocacheL2: %'14lu adds %'14lu dels\n",
8388 stats__ocacheL2_adds
,
8389 stats__ocacheL2_dels
);
8390 VG_(message
)(Vg_DebugMsg
,
8391 " ocacheL2: %'9lu max nodes %'9lu curr nodes\n",
8392 stats__ocacheL2_n_nodes_max
,
8393 stats__ocacheL2_n_nodes
);
8394 VG_(message
)(Vg_DebugMsg
,
8395 " niacache: %'12lu refs %'12lu misses\n",
8396 stats__nia_cache_queries
, stats__nia_cache_misses
);
8398 tl_assert(ocacheL1
== NULL
);
8399 for (UInt i
= 0; i
< 4096; i
++ ) {
8400 tl_assert(ocachesL2
[1] == NULL
);
8406 static void mc_fini ( Int exitcode
)
8408 MC_(xtmemory_report
) (VG_(clo_xtree_memory_file
), True
);
8409 MC_(print_malloc_stats
)();
8411 if (MC_(clo_leak_check
) != LC_Off
) {
8412 LeakCheckParams lcp
;
8413 HChar
* xt_filename
= NULL
;
8414 lcp
.mode
= MC_(clo_leak_check
);
8415 lcp
.show_leak_kinds
= MC_(clo_show_leak_kinds
);
8416 lcp
.heuristics
= MC_(clo_leak_check_heuristics
);
8417 lcp
.errors_for_leak_kinds
= MC_(clo_error_for_leak_kinds
);
8418 lcp
.deltamode
= LCD_Any
;
8419 lcp
.max_loss_records_output
= 999999999;
8420 lcp
.requested_by_monitor_command
= False
;
8421 if (MC_(clo_xtree_leak
)) {
8422 xt_filename
= VG_(expand_file_name
)("--xtree-leak-file",
8423 MC_(clo_xtree_leak_file
));
8424 lcp
.xt_filename
= xt_filename
;
8426 lcp
.show_leak_kinds
= MC_(all_Reachedness
)();
8429 lcp
.xt_filename
= NULL
;
8430 MC_(detect_memory_leaks
)(1/*bogus ThreadId*/, &lcp
);
8431 if (MC_(clo_xtree_leak
))
8432 VG_(free
)(xt_filename
);
8434 if (VG_(clo_verbosity
) == 1 && !VG_(clo_xml
)) {
8436 "For a detailed leak analysis, rerun with: --leak-check=full\n"
8442 if (MC_(any_value_errors
) && !VG_(clo_xml
) && VG_(clo_verbosity
) >= 1
8443 && MC_(clo_mc_level
) == 2) {
8444 VG_(message
)(Vg_UserMsg
,
8445 "Use --track-origins=yes to see where "
8446 "uninitialised values come from\n");
8449 /* Print a warning if any client-request generated ignore-ranges
8450 still exist. It would be reasonable to expect that a properly
8451 written program would remove any such ranges before exiting, and
8452 since they are a bit on the dangerous side, let's comment. By
8453 contrast ranges which are specified on the command line normally
8454 pertain to hardware mapped into the address space, and so we
8455 can't expect the client to have got rid of them. */
8456 if (gIgnoredAddressRanges
) {
8458 for (i
= 0; i
< VG_(sizeRangeMap
)(gIgnoredAddressRanges
); i
++) {
8459 UWord val
= IAR_INVALID
;
8460 UWord key_min
= ~(UWord
)0;
8461 UWord key_max
= (UWord
)0;
8462 VG_(indexRangeMap
)( &key_min
, &key_max
, &val
,
8463 gIgnoredAddressRanges
, i
);
8464 if (val
!= IAR_ClientReq
)
8466 /* Print the offending range. Also, if it is the first,
8467 print a banner before it. */
8471 "WARNING: exiting program has the following client-requested\n"
8472 "WARNING: address error disablement range(s) still in force,\n"
8474 "possibly as a result of some mistake in the use of the\n"
8476 "VALGRIND_{DISABLE,ENABLE}_ERROR_REPORTING_IN_RANGE macros.\n"
8479 VG_(umsg
)(" [%u] 0x%016lx-0x%016lx %s\n",
8480 i
, key_min
, key_max
, showIARKind(val
));
8490 VG_(message
)(Vg_DebugMsg
,
8491 "------ Valgrind's client block stats follow ---------------\n" );
8492 show_client_block_stats();
8496 /* mark the given addr/len unaddressable for watchpoint implementation
8497 The PointKind will be handled at access time */
8498 static Bool
mc_mark_unaddressable_for_watchpoint (PointKind kind
, Bool insert
,
8499 Addr addr
, SizeT len
)
8501 /* GDBTD this is somewhat fishy. We might rather have to save the previous
8502 accessibility and definedness in gdbserver so as to allow restoring it
8503 properly. Currently, we assume that the user only watches things
8504 which are properly addressable and defined */
8506 MC_(make_mem_noaccess
) (addr
, len
);
8508 MC_(make_mem_defined
) (addr
, len
);
8512 static void mc_pre_clo_init(void)
8514 VG_(details_name
) ("Memcheck");
8515 VG_(details_version
) (NULL
);
8516 VG_(details_description
) ("a memory error detector");
8517 VG_(details_copyright_author
)(
8518 "Copyright (C) 2002-2022, and GNU GPL'd, by Julian Seward et al.");
8519 VG_(details_bug_reports_to
) (VG_BUGS_TO
);
8520 VG_(details_avg_translation_sizeB
) ( 640 );
8522 VG_(basic_tool_funcs
) (mc_post_clo_init
,
8526 VG_(needs_final_IR_tidy_pass
) ( MC_(final_tidy
) );
8529 VG_(needs_core_errors
) ();
8530 VG_(needs_tool_errors
) (MC_(eq_Error
),
8531 MC_(before_pp_Error
),
8533 True
,/*show TIDs for errors*/
8534 MC_(update_Error_extra
),
8535 MC_(is_recognised_suppression
),
8536 MC_(read_extra_suppression_info
),
8537 MC_(error_matches_suppression
),
8538 MC_(get_error_name
),
8539 MC_(get_extra_suppression_info
),
8540 MC_(print_extra_suppression_use
),
8541 MC_(update_extra_suppression_use
));
8542 VG_(needs_libc_freeres
) ();
8543 VG_(needs_cxx_freeres
) ();
8544 VG_(needs_command_line_options
)(mc_process_cmd_line_options
,
8546 mc_print_debug_usage
);
8547 VG_(needs_client_requests
) (mc_handle_client_request
);
8548 VG_(needs_sanity_checks
) (mc_cheap_sanity_check
,
8549 mc_expensive_sanity_check
);
8550 VG_(needs_print_stats
) (mc_print_stats
);
8551 VG_(needs_info_location
) (MC_(pp_describe_addr
));
8552 VG_(needs_malloc_replacement
) (MC_(malloc
),
8554 MC_(__builtin_new_aligned
),
8555 MC_(__builtin_vec_new
),
8556 MC_(__builtin_vec_new_aligned
),
8560 MC_(__builtin_delete
),
8561 MC_(__builtin_delete_aligned
),
8562 MC_(__builtin_vec_delete
),
8563 MC_(__builtin_vec_delete_aligned
),
8565 MC_(malloc_usable_size
),
8566 MC_MALLOC_DEFAULT_REDZONE_SZB
);
8567 MC_(Malloc_Redzone_SzB
) = VG_(malloc_effective_client_redzone_size
)();
8569 VG_(needs_xml_output
) ();
8571 VG_(track_new_mem_startup
) ( mc_new_mem_startup
);
8573 // Handling of mmap and mprotect isn't simple (well, it is simple,
8574 // but the justification isn't.) See comments above, just prior to
8576 VG_(track_new_mem_mmap
) ( mc_new_mem_mmap
);
8577 VG_(track_change_mem_mprotect
) ( mc_new_mem_mprotect
);
8579 VG_(track_copy_mem_remap
) ( MC_(copy_address_range_state
) );
8581 VG_(track_die_mem_stack_signal
)( MC_(make_mem_noaccess
) );
8582 VG_(track_die_mem_brk
) ( MC_(make_mem_noaccess
) );
8583 VG_(track_die_mem_munmap
) ( MC_(make_mem_noaccess
) );
8585 /* Defer the specification of the new_mem_stack functions to the
8586 post_clo_init function, since we need to first parse the command
8587 line before deciding which set to use. */
8589 # ifdef PERF_FAST_STACK
8590 VG_(track_die_mem_stack_4
) ( mc_die_mem_stack_4
);
8591 VG_(track_die_mem_stack_8
) ( mc_die_mem_stack_8
);
8592 VG_(track_die_mem_stack_12
) ( mc_die_mem_stack_12
);
8593 VG_(track_die_mem_stack_16
) ( mc_die_mem_stack_16
);
8594 VG_(track_die_mem_stack_32
) ( mc_die_mem_stack_32
);
8595 VG_(track_die_mem_stack_112
) ( mc_die_mem_stack_112
);
8596 VG_(track_die_mem_stack_128
) ( mc_die_mem_stack_128
);
8597 VG_(track_die_mem_stack_144
) ( mc_die_mem_stack_144
);
8598 VG_(track_die_mem_stack_160
) ( mc_die_mem_stack_160
);
8600 VG_(track_die_mem_stack
) ( mc_die_mem_stack
);
8602 VG_(track_ban_mem_stack
) ( MC_(make_mem_noaccess
) );
8604 VG_(track_pre_mem_read
) ( check_mem_is_defined
);
8605 VG_(track_pre_mem_read_asciiz
) ( check_mem_is_defined_asciiz
);
8606 VG_(track_pre_mem_write
) ( check_mem_is_addressable
);
8607 VG_(track_post_mem_write
) ( mc_post_mem_write
);
8609 VG_(track_post_reg_write
) ( mc_post_reg_write
);
8610 VG_(track_post_reg_write_clientcall_return
)( mc_post_reg_write_clientcall
);
8612 if (MC_(clo_mc_level
) >= 2) {
8613 VG_(track_copy_mem_to_reg
) ( mc_copy_mem_to_reg
);
8614 VG_(track_copy_reg_to_mem
) ( mc_copy_reg_to_mem
);
8617 VG_(needs_watchpoint
) ( mc_mark_unaddressable_for_watchpoint
);
8619 init_shadow_memory();
8620 // MC_(chunk_poolalloc) must be allocated in post_clo_init
8621 tl_assert(MC_(chunk_poolalloc
) == NULL
);
8622 MC_(malloc_list
) = VG_(HT_construct
)( "MC_(malloc_list)" );
8623 MC_(mempool_list
) = VG_(HT_construct
)( "MC_(mempool_list)" );
8626 tl_assert( mc_expensive_sanity_check() );
8628 // {LOADV,STOREV}[8421] will all fail horribly if this isn't true.
8629 tl_assert(sizeof(UWord
) == sizeof(Addr
));
8630 // Call me paranoid. I don't care.
8631 tl_assert(sizeof(void*) == sizeof(Addr
));
8633 // BYTES_PER_SEC_VBIT_NODE must be a power of two.
8634 tl_assert(-1 != VG_(log2
)(BYTES_PER_SEC_VBIT_NODE
));
8636 /* This is small. Always initialise it. */
8637 init_nia_to_ecu_cache();
8639 /* We can't initialise ocacheL1/ocacheL2 yet, since we don't know
8640 if we need to, since the command line args haven't been
8641 processed yet. Hence defer it to mc_post_clo_init. */
8642 tl_assert(ocacheL1
== NULL
);
8643 for (UInt i
= 0; i
< 4096; i
++ ) {
8644 tl_assert(ocachesL2
[i
] == NULL
);
8647 /* Check some important stuff. See extensive comments above
8648 re UNALIGNED_OR_HIGH for background. */
8649 # if VG_WORDSIZE == 4
8650 tl_assert(sizeof(void*) == 4);
8651 tl_assert(sizeof(Addr
) == 4);
8652 tl_assert(sizeof(UWord
) == 4);
8653 tl_assert(sizeof(Word
) == 4);
8654 tl_assert(MAX_PRIMARY_ADDRESS
== 0xFFFFFFFFUL
);
8655 tl_assert(MASK(1) == 0UL);
8656 tl_assert(MASK(2) == 1UL);
8657 tl_assert(MASK(4) == 3UL);
8658 tl_assert(MASK(8) == 7UL);
8660 tl_assert(VG_WORDSIZE
== 8);
8661 tl_assert(sizeof(void*) == 8);
8662 tl_assert(sizeof(Addr
) == 8);
8663 tl_assert(sizeof(UWord
) == 8);
8664 tl_assert(sizeof(Word
) == 8);
8665 tl_assert(MAX_PRIMARY_ADDRESS
== 0x1FFFFFFFFFULL
);
8666 tl_assert(MASK(1) == 0xFFFFFFE000000000ULL
);
8667 tl_assert(MASK(2) == 0xFFFFFFE000000001ULL
);
8668 tl_assert(MASK(4) == 0xFFFFFFE000000003ULL
);
8669 tl_assert(MASK(8) == 0xFFFFFFE000000007ULL
);
8672 /* Check some assertions to do with the instrumentation machinery. */
8673 MC_(do_instrumentation_startup_checks
)();
8676 STATIC_ASSERT(sizeof(UWord
) == sizeof(SizeT
));
8678 VG_DETERMINE_INTERFACE_VERSION(mc_pre_clo_init
)
8680 /*--------------------------------------------------------------------*/
8681 /*--- end mc_main.c ---*/
8682 /*--------------------------------------------------------------------*/